Spaces:

wanda222
/

Document_Parse_Demo

Running

File size: 4,841 Bytes

3790afa

import gradio as gr
import requests
import os
from openai import OpenAI

UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")

def parse_document(filename):
    """Parses a PDF document using the Upstage Document Parse API and returns the extracted HTML."""

    # Define the API endpoint
    url = "https://api.upstage.ai/v1/document-ai/document-parse"

    # Set the authorization header with your API key
    headers = {'Authorization': f'Bearer {UPSTAGE_API_KEY}'}

    # Open the PDF file in binary mode and attach it to the request
    files = {"document": open(filename, "rb")}

    # Define additional request parameters
    data = {
        "base64_encoding": "['table']",  # Request base64 encoding of table elements
        "model": "document-parse"        # Specify the model to use
    }
    # Send the POST request to the API
    response = requests.post(url, headers=headers, files=files, data=data)

    # Parse the JSON response
    result = response.json()

    # For debugging: print the entire API response
    # print(response.json())

    # Extract the HTML content from the response
    html_text = result.get("content", {}).get("html", "")

    return html_text
    
def chat_with_document(history, html_text, user_question):
    """Handles multi-turn Q&A based on the parsed HTML document using Upstage Solar Pro LLM."""

    # Initialize the OpenAI client for Solar LLM
    client = OpenAI(
        api_key=UPSTAGE_API_KEY,
        base_url="https://api.upstage.ai/v1"
    )

    # If this is the first turn, initialize an empty history
    history = history or []

    # Construct a system prompt with instructions and the HTML content
    system_prompt = f"""The following is a financial statement document extracted in HTML format.
                        Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
                        
                        Document:
                        {html_text}
                    """

    # Build the conversation history for the chat model
    messages = [{"role": "system", "content": system_prompt}]
    for user, bot in history:
        messages.append({"role": "user", "content": user})
        messages.append({"role": "assistant", "content": bot})

    # Add the current user question
    messages.append({"role": "user", "content": user_question})

    # Call the Solar LLM to generate a response
    response = client.chat.completions.create(
        model="solar-pro",
        messages=messages,
        temperature=0,
        max_tokens=1024
    )

    # Extract the assistant's reply
    bot_reply = response.choices[0].message.content

    # Update the chat history
    history.append((user_question, bot_reply))

    # Return updated chatbot display, state, and clear the input
    return history, history, ""

    
def set_example_question(example_text):
    return example_text

def toggle_html_view(current_html, is_visible):
    return (
        gr.update(value=current_html, visible=not is_visible),  # html_output toggle
        gr.update(value=current_html, visible=is_visible),      # html_display 반대로 toggle
        not is_visible
    )


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 📄 재무제표 분석 챗봇")
    gr.Markdown("1. Document Parse API로 PDF 문서를 HTML로 변환합니다.\n"
                "2. Solar LLM을 통해 문서 기반 질문에 답변합니다.")

    with gr.Row():
        file_input = gr.File(label="📎 재무제표 업로드")
        parse_btn = gr.Button("문서 HTML 변환")

    html_output = gr.Textbox(label="📘 문서 내용", lines=10, visible=True)
    html_display = gr.HTML(visible=False)
    toggle_html_btn = gr.Button("🔁 HTML 보기 전환")
    html_visible_state = gr.State(False)

    parse_btn.click(fn=parse_document, inputs=file_input, outputs=html_output)
    toggle_html_btn.click(
        fn=toggle_html_view,
        inputs=[html_output, html_visible_state],
        outputs=[html_output, html_display, html_visible_state]
    )

    chatbot = gr.Chatbot(label="💬 문서 기반 Q&A", height=400)
    user_question = gr.Textbox(label="❓ 질문을 입력하세요", lines=2)
    answer_btn = gr.Button("답변 생성")

    chat_state = gr.State([])

    with gr.Row():
        gr.Markdown("💡 예제 질문:")
        ex1 = gr.Button("어떤 기업의 재무제표인가요?")
        ex2 = gr.Button("Q3 분기의 총 매출액은 얼마인가요?")

    ex1.click(set_example_question, inputs=[], outputs=user_question)
    ex2.click(set_example_question, inputs=[], outputs=user_question)

    answer_btn.click(
        fn=chat_with_document,
        inputs=[chat_state, html_output, user_question],
        outputs=[chatbot, chat_state, user_question],
        show_progress=True
    )

demo.launch()