import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr

def scrape_kosdaq_info():
    # 디버깅 로그를 담을 리스트
    debug_info = []
    debug_info.append("데이터 스크래핑을 시작합니다...")

    # 목표 URL
    url = "https://finance.naver.com/sise/sise_rise.naver?sosok=1"
    try:
        response = requests.get(url)
        if response.status_code != 200:
            debug_info.append(f"HTTP 요청 오류 발생 - 상태 코드: {response.status_code}")
            return pd.DataFrame(), "\n".join(debug_info)
        else:
            debug_info.append("HTTP 요청 성공!")
    except Exception as e:
        debug_info.append(f"HTTP 요청 중 예외 발생: {e}")
        return pd.DataFrame(), "\n".join(debug_info)

    # HTML 파싱
    html = response.text
    soup = BeautifulSoup(html, "html.parser")

    # 코스닥 종목 정보를 담고 있는 테이블 찾기
    table = soup.find("table", class_="type_2")
    if not table:
        debug_info.append("테이블을 찾지 못했습니다.")
        return pd.DataFrame(), "\n".join(debug_info)

    rows = table.find_all("tr")
    debug_info.append(f"테이블 내 tr 태그 개수: {len(rows)}")

    data = []
    # <td class="no">가 있는 행이 실제 종목 정보 행
    for row in rows:
        num_cell = row.find("td", class_="no")
        if num_cell:  # 종목 랭크(번호)가 있는 행만 추출
            cols = row.find_all("td")
            # 디버깅용으로 행 정보를 출력
            debug_info.append(f"행 정보: {[col.get_text(strip=True) for col in cols]}")

            # 요청사항에서 주어진 구조의 12개 컬럼 정보를 파싱
            if len(cols) >= 12:
                rank = cols[0].get_text(strip=True)
                company_name = cols[1].get_text(strip=True)
                current_price = cols[2].get_text(strip=True)
                change = cols[3].get_text(strip=True)
                change_rate = cols[4].get_text(strip=True)
                volume = cols[5].get_text(strip=True)
                bid_price = cols[6].get_text(strip=True)
                ask_price = cols[7].get_text(strip=True)
                total_bid = cols[8].get_text(strip=True)
                total_ask = cols[9].get_text(strip=True)
                per_val = cols[10].get_text(strip=True)
                roe_val = cols[11].get_text(strip=True)

                # 추출된 데이터를 리스트 형태로 저장
                data.append([
                    rank, company_name, current_price, 
                    change, change_rate, volume, 
                    bid_price, ask_price, total_bid, 
                    total_ask, per_val, roe_val
                ])

    # 결과를 데이터프레임으로 생성
    df = pd.DataFrame(
        data,
        columns=["N", "종목명", "현재가", "전일비", "등락률", "거래량", 
                 "매수호가", "매도호가", "매수총잔량", "매도총잔량", "PER", "ROE"]
    )

    debug_info.append(f"스크래핑 완료. 총 {len(df)}건의 데이터가 수집되었습니다.")
    return df, "\n".join(debug_info)

def main():
    with gr.Blocks() as demo:
        gr.Markdown("## 네이버 코스닥 종목 정보 스크래핑")
        gr.Markdown("버튼을 클릭하면 네이버 증권(코스닥) 사이트에서 종목 정보를 가져옵니다.")

        # 결과 출력용 컴포넌트
        result_table = gr.Dataframe(label="스크래핑 결과")
        debug_output = gr.Textbox(label="디버깅 로그")

        # 버튼 클릭 시 실행될 함수
        def run_scraper():
            df, debug_log = scrape_kosdaq_info()
            return df, debug_log
        
        scrape_button = gr.Button("데이터 가져오기")
        # 버튼 클릭 시 result_table과 debug_output에 결과 출력
        scrape_button.click(fn=run_scraper, inputs=[], outputs=[result_table, debug_output])

    return demo

if __name__ == "__main__":
    demo = main()
    demo.launch()