본문 바로가기
프로그램

[파이썬] 문제 : 네이버 KBO 리그 순위 웹크롤링

by 오디세이99 2023. 6. 1.
728x90
반응형

기록/순위, 야구 : 네이버 스포츠 (naver.com)

import requests
import pandas as pd
import bs4
from pandas.io.json import json_normalize
import json

url = 'https://sports.news.naver.com/kbaseball/record/index?category=kbo'

response = requests.get(url)
# print(response.content)
# print(response.text)

# BeautifulSoup을 사용하여 HTML 파싱
soup = bs4.BeautifulSoup(response.text, 'html.parser')

# 표 헤더 데이터 추출
header_row = soup.select('table thead tr')[0]
headers = [header.text.strip() for header in header_row.find_all('strong')]

# 표 내용 데이터 추출
body_rows = soup.select('table tbody tr')
data = []
for row in body_rows[:10]:
    row_data = [cell.text.strip() for cell in row.find_all('td')]
    data.append(row_data)

data 결과

[['LG',
  '49',
  '31',
  '17',
  '1',
  '0.646',
  '0.0',
  '1패',
  '0.375',
  '0.400',
  '6승-3패-1무'],
 ['SSG',
  '47',
  '29',
  '17',
  '1',
  '0.630',
  '1.0',
  '1패',
  '0.335',
  '0.376',
  '6승-4패-0무'],
 ['롯데',
  '44',
  '27',
  '17',
  '0',
  '0.614',
  '2.0',
  '1승',
  '0.334',
  '0.350',
  '5승-5패-0무'],
 ['두산',
  '46',
  '23',
  '22',
  '1',
  '0.511',
  '6.5',
  '1승',
  '0.326',
  '0.361',
  '4승-6패-0무'],
 ['KIA',
  '44',
  '22',
  '22',
  '0',
  '0.500',
  '7.0',
  '2승',
  '0.333',
  '0.361',
  '5승-5패-0무'],
 ['NC',
  '46',
  '23',
  '23',
  '0',
  '0.500',
  '7.0',
  '1패',
  '0.348',
  '0.375',
  '4승-6패-0무'],
 ['삼성',
  '46',
  '20',
  '26',
  '0',
  '0.435',
  '10.0',
  '1승',
  '0.315',
  '0.355',
  '4승-6패-0무'],
 ['키움',
  '50',
  '21',
  '29',
  '0',
  '0.420',
  '11.0',
  '1승',
  '0.321',
  '0.343',
  '4승-6패-0무'],
 ['한화',
  '47',
  '17',
  '27',
  '3',
  '0.386',
  '12.0',
  '1패',
  '0.309',
  '0.308',
  '4승-5패-1무'],
 ['KT',
  '47',
  '16',
  '29',
  '2',
  '0.356',
  '13.5',
  '3패',
  '0.325',
  '0.349',
  '5승-5패-0무']]
728x90
반응형

댓글