본문 바로가기
프로그램

[파이썬] 문제 : datascience Table 처리(nba데이터)

by 오디세이99 2023. 5. 22.
728x90
반응형

Welcome to datascience’s documentation! — datascience 0.17.6 documentation (data8.org)

 

Welcome to datascience’s documentation! — datascience 0.17.6 documentation

 

www.data8.org

Welcome to prob140’s documentation! — prob140 0.2.5.0 documentation (pythonhosted.org)

 

Welcome to prob140’s documentation! — prob140 0.2.5.0 documentation

 

pythonhosted.org

from datascience import *
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import numpy as np
fpath='https://raw.githubusercontent.com/data-8/textbook/878363106db4313de925a709b1617d6f328c56a1/assets/data/'

nba = Table.read_table(fpath+'nba_salaries.csv').relabeled("'15-'16 SALARY",'SALARY').sort(0)
nba

# 문제 1
def get_team_top(name, N):
    return nba.where("TEAM", name).sort("SALARY", descending = True).take(np.arange(0,N))

get_team_top('Chicago Bulls',7)

# 문제 2
def get_team_maxmin(name):
    CBmax = list(nba.where("TEAM", name).sort("SALARY", descending = True).take(0).column('SALARY'))[0]
    CBmin = list(nba.where("TEAM", name).sort("SALARY", descending = False).take(0).column('SALARY'))[0]
    return (CBmax, CBmin)

get_team_maxmin('Chicago Bulls')

# 문제 3
team_diff = nba.groups(["TEAM"], max).join('TEAM', nba.groups(["TEAM"], min))
team_diff.append_column('Difference', team_diff.column('SALARY max') - team_diff.column('SALARY min'))
team_diff = team_diff.drop(['PLAYER max','POSITION max','SALARY max','PLAYER min','POSITION min','SALARY min'])
team_diff.sort("TEAM", descending = False)
team_diff

# 문제 4
def position_salary(S):
    if S < 0:
        S2 = S * (-1)
        Tb = nba.where("SALARY", are.below(S2)).group('POSITION', sum)
        Tb.append_column('count', nba.where("SALARY", are.below(S2)).group('POSITION').column('count'))
    elif S > 0:
        Tb = nba.where("SALARY", are.above(S)).group('POSITION', sum)
        Tb.append_column('count', nba.where("SALARY", are.above(S)).group('POSITION').column('count'))
        
    Tb.append_column('SALARY mean', Tb.column('SALARY sum') / Tb.column('count'))
    Tb = Tb.drop(['PLAYER sum', 'TEAM sum', 'SALARY sum', 'count'])
        
    return Tb
    
position_salary(-2.5)

# 문제 5
top = position_salary(10)
bot = position_salary(-10)
top, bot

# 문제 6
topbot = top.join('POSITION', bot, 'POSITION')
topbot.relabel('SALARY mean', 'TOP')             # 컬럼명 변경
topbot.relabel('SALARY mean_2', 'BOT')
topbot

# 문제 7
topbot.append_column('TOP(%)', [float(f"{tb.column('TOP')[i] * 100 / sum(tb.column('TOP')):0.1f}") for i in range(len(tb.column('TOP')))])
topbot.append_column('BOT(%)', [float(f"{tb.column('BOT')[i] * 100 / sum(tb.column('BOT')):0.1f}") for i in range(len(tb.column('BOT')))])
topbot

# 문제 8
topbot.barh('POSITION', select=['TOP(%)','BOT(%)'])

# 문제 9
nba.hist('SALARY', bins=np.arange(0,25,1))

#  문제 10
# (단, s는 nba Table에 존재하지 않는 값으로 가정한다 = 공동 등수가 없다)
def position_ranking(p,  s):          # p:POSITION, s:SALARY
    rank = 0
    tmp = nba.where("POSITION", are.equal_to(p)).sort("SALARY", descending = True)
    if tmp.take(0).column('SALARY')[0] < s:
        rank = 1
    else:
        for i in range(1, len(tmp.column('SALARY'))):
            # print(tmp.take(i-1).column('SALARY')[0], tmp.take(i).column('SALARY')[0])
            if tmp.take(i-1).column('SALARY')[0] > s and tmp.take(i).column('SALARY')[0] < s: 
                rank = i+1
                break
    return rank
    
position_ranking('C',13.1)

10

728x90
반응형

댓글