File size: 5,308 Bytes
a9ad3d8
d51ad5a
 
 
 
 
 
 
 
a9ad3d8
d51ad5a
 
a9ad3d8
d51ad5a
6ef497e
d51ad5a
 
6ef497e
d51ad5a
 
 
 
 
 
 
 
 
 
 
 
a9ad3d8
3e327a8
d51ad5a
 
 
baf3f32
d51ad5a
 
 
 
 
 
a9ad3d8
d51ad5a
 
 
 
 
3e327a8
d51ad5a
 
 
 
 
3e327a8
d51ad5a
 
3e327a8
d51ad5a
 
 
 
 
 
 
 
 
 
3e327a8
 
d51ad5a
 
3e327a8
d51ad5a
 
 
 
3e327a8
d51ad5a
 
 
6ef497e
d51ad5a
 
6ef497e
d51ad5a
6ef497e
d51ad5a
 
 
 
 
 
6ef497e
d51ad5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9ad3d8
 
d51ad5a
 
 
 
6ef497e
a9ad3d8
d51ad5a
 
 
6ef497e
d51ad5a
 
 
 
 
6ef497e
 
d51ad5a
 
 
 
6ef497e
 
a9ad3d8
d51ad5a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
import os
import pandas as pd
import tempfile
import nest_asyncio
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex
from llama_parse import LlamaParse
from llama_index.core.node_parser import MarkdownElementNodeParser

# Apply nest_asyncio to handle async operations in Gradio
nest_asyncio.apply()

def get_sheet_names(file):
    """
    Reads an uploaded Excel file and returns its sheet names.
    This function is triggered when a file is uploaded.
    """
    if file is None:
        # No file, so return an empty, non-interactive dropdown
        return gr.Dropdown(choices=[], interactive=False)
    try:
        # Use pandas to quickly get sheet names without loading all data
        xls = pd.ExcelFile(file.name)
        sheet_names = xls.sheet_names
        # Return an updated, interactive dropdown with the sheet names
        return gr.Dropdown(choices=sheet_names, value=sheet_names[0], interactive=True)
    except Exception as e:
        gr.Warning(f"Could not read Excel file: {e}")
        return gr.Dropdown(choices=[], interactive=False)


def process_and_query(openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text):
    """
    Processes a selected sheet from an uploaded Excel file and answers a user's query.

    Args:
        openai_api_key (str): The user's OpenAI API key.
        llama_parse_api_key (str): The user's LlamaParse API key.
        uploaded_file (gradio.File): The uploaded Excel file object.
        selected_sheet (str): The name of the sheet to process.
        query_text (str): The question to ask about the document.

    Returns:
        str: The answer to the query or an error message.
    """
    if not all([openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text]):
        return "Error: Please provide all inputs - both API keys, a file, a selected sheet, and a query."

    # Create a temporary file to store the selected sheet's data
    temp_file = None
    try:
        os.environ["OPENAI_API_KEY"] = openai_api_key
        llm = OpenAI(model="gpt-4o-mini", api_key=openai_api_key)

        # Read the selected sheet using pandas
        df = pd.read_excel(uploaded_file.name, sheet_name=selected_sheet)
        
        # Save the sheet's data to a temporary CSV file for LlamaParse
        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv", encoding='utf-8') as temp_file:
            df.to_csv(temp_file.name, index=False)
            temp_file_path = temp_file.name

        # Initialize LlamaParse
        parser = LlamaParse(
            api_key=llama_parse_api_key,
            result_type="markdown",
            verbose=True
        )
        
        # Load data from the temporary file containing only the selected sheet
        documents = parser.load_data(temp_file_path)
        
        # Initialize the parser and process the documents
        node_parser = MarkdownElementNodeParser(llm=llm, num_workers=4)
        nodes = node_parser.get_nodes_from_documents(documents)
        base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
        
        # Create the index and query engine
        recursive_index = VectorStoreIndex(nodes=base_nodes + objects, llm=llm)
        query_engine = recursive_index.as_query_engine(similarity_top_k=5, llm=llm)
        
        # Execute the query
        response = query_engine.query(query_text)
        
        return str(response)

    except Exception as e:
        return f"An error occurred: {e}"
    finally:
        # Clean up the temporary file
        if temp_file and os.path.exists(temp_file.name):
            os.unlink(temp_file.name)

# --- Gradio Interface ---
with gr.Blocks(
    title="Excel Sheet Q&A",
    theme=gr.themes.Soft(),
    css=".gradio-container {background: linear-gradient(to right, #C9D6FF, #E2E2E2)}"
) as iface:
    gr.Markdown(
        """
        # 📊 Excel Sheet Q&A
        
        1.  Enter your API keys.
        2.  Upload an Excel file.
        3.  Choose a specific sheet from the dropdown.
        4.  Ask a question about the data in that sheet.
        """
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            openai_key_input = gr.Textbox(label="OpenAI API Key", type="password")
            llamaparse_key_input = gr.Textbox(label="LlamaParse API Key", type="password")
            file_uploader = gr.File(label="Upload Excel Databook", file_types=[".xlsx", ".xls"])
            sheet_selector = gr.Dropdown(label="Choose a Sheet", interactive=False)
        
        with gr.Column(scale=2):
            query_input = gr.Textbox(label="Your Question", placeholder="e.g., What were the total revenues in 2022?", lines=8)
            submit_button = gr.Button("Ask Question", variant="primary")
            output_display = gr.Markdown(label="Answer")
    
    # Define the interactive workflow
    file_uploader.upload(
        fn=get_sheet_names,
        inputs=[file_uploader],
        outputs=[sheet_selector]
    )
    
    submit_button.click(
        fn=process_and_query,
        inputs=[openai_key_input, llamaparse_key_input, file_uploader, sheet_selector, query_input],
        outputs=output_display
    )

if __name__ == "__main__":
    iface.launch(share=True)