프로그램

[파이썬] 문제 : pandas 학생 성적 처리(index, groupby, pivot, mean, median, count)

오디세이99 2023. 4. 8. 20:31
728x90
반응형

import numpy as np
import pandas as  pd
import random, string

rng = np.random.RandomState(123)

sname = [''.join(random.choices(string.ascii_uppercase + string.digits, k=6)) for i in range(100)]
sid = [''.join(random.choices(string.digits, k=8)) for i in range(100)]
classno = rng.randint(1, 6, 100)
gender = random.choices(['female','male'], k=100)
height = rng.randint(160, 190, 100)
weight = rng.randint(50, 80, 100)

student = pd.DataFrame({'sname':sname,'classno':classno,'gender':gender,'height':height,'weight':weight}, index=[sid])
student.index.names = ['sid']
student

tname = ['Kim','Lee','Park','Choi','Yun']
classno_series = pd.Series(classno)
classno_count = classno_series.value_counts().sort_index()
teacher = pd.DataFrame({'tname':tname,'size':classno_count})
teacher.index.names = ['classno']

info = pd.merge(student,teacher,on='classno', how='left')
info.index = sid
info.index.name = 'sid'
info = info.sort_index(axis = 0)

info['MBI'] = info['weight'] ** 2 / info['height']
info

info_sname = info.reset_index(inplace=False)
info_sname.set_index('sname', inplace=True)
info_sname

info_avg = info_sname.groupby('classno').mean()
info_avg.drop(columns=['size'], axis=1, inplace=True)
info_avg

pivot_table = info_sname.groupby(['classno','gender']).mean()
pivot_table.reset_index(inplace=True)
info_pivot = pivot_table.pivot(index='gender', columns='classno', values='MBI')
info_pivot

info_high = info[info['MBI'] >= info.median()['MBI']].sort_values(by='sid' ,ascending=True)
info_high.head()

info['height_dev'] = 0
info.loc[info['gender'] == 'female', 'height_dev'] = info['height'] - info.loc[info['gender'] == 'female'].mean()['height']
info.loc[info['gender'] == 'male', 'height_dev'] = info['height'] - info.loc[info['gender'] == 'male'].mean()['height']
info

ct_tall_female = info.loc[(info['gender'] == 'female') & (info['height_dev'] > 0)].count()[0]
ct_tall_male = info.loc[(info['gender'] == 'male') &  (info['height_dev'] > 0)].count()[0]
ct_tall_female, ct_tall_male

728x90
반응형