File size: 4,051 Bytes
70801f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
def scrape_kosdaq_info():
# ๋๋ฒ๊น
๋ก๊ทธ๋ฅผ ๋ด์ ๋ฆฌ์คํธ
debug_info = []
debug_info.append("๋ฐ์ดํฐ ์คํฌ๋ํ์ ์์ํฉ๋๋ค...")
# ๋ชฉํ URL
url = "https://finance.naver.com/sise/sise_rise.naver?sosok=1"
try:
response = requests.get(url)
if response.status_code != 200:
debug_info.append(f"HTTP ์์ฒญ ์ค๋ฅ ๋ฐ์ - ์ํ ์ฝ๋: {response.status_code}")
return pd.DataFrame(), "\n".join(debug_info)
else:
debug_info.append("HTTP ์์ฒญ ์ฑ๊ณต!")
except Exception as e:
debug_info.append(f"HTTP ์์ฒญ ์ค ์์ธ ๋ฐ์: {e}")
return pd.DataFrame(), "\n".join(debug_info)
# HTML ํ์ฑ
html = response.text
soup = BeautifulSoup(html, "html.parser")
# ์ฝ์ค๋ฅ ์ข
๋ชฉ ์ ๋ณด๋ฅผ ๋ด๊ณ ์๋ ํ
์ด๋ธ ์ฐพ๊ธฐ
table = soup.find("table", class_="type_2")
if not table:
debug_info.append("ํ
์ด๋ธ์ ์ฐพ์ง ๋ชปํ์ต๋๋ค.")
return pd.DataFrame(), "\n".join(debug_info)
rows = table.find_all("tr")
debug_info.append(f"ํ
์ด๋ธ ๋ด tr ํ๊ทธ ๊ฐ์: {len(rows)}")
data = []
# <td class="no">๊ฐ ์๋ ํ์ด ์ค์ ์ข
๋ชฉ ์ ๋ณด ํ
for row in rows:
num_cell = row.find("td", class_="no")
if num_cell: # ์ข
๋ชฉ ๋ญํฌ(๋ฒํธ)๊ฐ ์๋ ํ๋ง ์ถ์ถ
cols = row.find_all("td")
# ๋๋ฒ๊น
์ฉ์ผ๋ก ํ ์ ๋ณด๋ฅผ ์ถ๋ ฅ
debug_info.append(f"ํ ์ ๋ณด: {[col.get_text(strip=True) for col in cols]}")
# ์์ฒญ์ฌํญ์์ ์ฃผ์ด์ง ๊ตฌ์กฐ์ 12๊ฐ ์ปฌ๋ผ ์ ๋ณด๋ฅผ ํ์ฑ
if len(cols) >= 12:
rank = cols[0].get_text(strip=True)
company_name = cols[1].get_text(strip=True)
current_price = cols[2].get_text(strip=True)
change = cols[3].get_text(strip=True)
change_rate = cols[4].get_text(strip=True)
volume = cols[5].get_text(strip=True)
bid_price = cols[6].get_text(strip=True)
ask_price = cols[7].get_text(strip=True)
total_bid = cols[8].get_text(strip=True)
total_ask = cols[9].get_text(strip=True)
per_val = cols[10].get_text(strip=True)
roe_val = cols[11].get_text(strip=True)
# ์ถ์ถ๋ ๋ฐ์ดํฐ๋ฅผ ๋ฆฌ์คํธ ํํ๋ก ์ ์ฅ
data.append([
rank, company_name, current_price,
change, change_rate, volume,
bid_price, ask_price, total_bid,
total_ask, per_val, roe_val
])
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํฐํ๋ ์์ผ๋ก ์์ฑ
df = pd.DataFrame(
data,
columns=["N", "์ข
๋ชฉ๋ช
", "ํ์ฌ๊ฐ", "์ ์ผ๋น", "๋ฑ๋ฝ๋ฅ ", "๊ฑฐ๋๋",
"๋งค์ํธ๊ฐ", "๋งค๋ํธ๊ฐ", "๋งค์์ด์๋", "๋งค๋์ด์๋", "PER", "ROE"]
)
debug_info.append(f"์คํฌ๋ํ ์๋ฃ. ์ด {len(df)}๊ฑด์ ๋ฐ์ดํฐ๊ฐ ์์ง๋์์ต๋๋ค.")
return df, "\n".join(debug_info)
def main():
with gr.Blocks() as demo:
gr.Markdown("## ๋ค์ด๋ฒ ์ฝ์ค๋ฅ ์ข
๋ชฉ ์ ๋ณด ์คํฌ๋ํ")
gr.Markdown("๋ฒํผ์ ํด๋ฆญํ๋ฉด ๋ค์ด๋ฒ ์ฆ๊ถ(์ฝ์ค๋ฅ) ์ฌ์ดํธ์์ ์ข
๋ชฉ ์ ๋ณด๋ฅผ ๊ฐ์ ธ์ต๋๋ค.")
# ๊ฒฐ๊ณผ ์ถ๋ ฅ์ฉ ์ปดํฌ๋ํธ
result_table = gr.Dataframe(label="์คํฌ๋ํ ๊ฒฐ๊ณผ")
debug_output = gr.Textbox(label="๋๋ฒ๊น
๋ก๊ทธ")
# ๋ฒํผ ํด๋ฆญ ์ ์คํ๋ ํจ์
def run_scraper():
df, debug_log = scrape_kosdaq_info()
return df, debug_log
scrape_button = gr.Button("๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ")
# ๋ฒํผ ํด๋ฆญ ์ result_table๊ณผ debug_output์ ๊ฒฐ๊ณผ ์ถ๋ ฅ
scrape_button.click(fn=run_scraper, inputs=[], outputs=[result_table, debug_output])
return demo
if __name__ == "__main__":
demo = main()
demo.launch()
|