Spaces:
Paused
Paused
import gradio as gr | |
import os | |
import pandas as pd | |
import tempfile | |
import nest_asyncio | |
from llama_index.llms.openai import OpenAI | |
from llama_index.core import VectorStoreIndex | |
from llama_parse import LlamaParse | |
from llama_index.core.node_parser import MarkdownElementNodeParser | |
# Apply nest_asyncio to handle async operations in Gradio | |
nest_asyncio.apply() | |
def get_sheet_names(file): | |
""" | |
Reads an uploaded Excel file and returns its sheet names. | |
This function is triggered when a file is uploaded. | |
""" | |
if file is None: | |
# No file, so return an empty, non-interactive dropdown | |
return gr.Dropdown(choices=[], interactive=False) | |
try: | |
# Use pandas to quickly get sheet names without loading all data | |
xls = pd.ExcelFile(file.name) | |
sheet_names = xls.sheet_names | |
# Return an updated, interactive dropdown with the sheet names | |
return gr.Dropdown(choices=sheet_names, value=sheet_names[0], interactive=True) | |
except Exception as e: | |
gr.Warning(f"Could not read Excel file: {e}") | |
return gr.Dropdown(choices=[], interactive=False) | |
def process_and_query(openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text): | |
""" | |
Processes a selected sheet from an uploaded Excel file and answers a user's query. | |
Args: | |
openai_api_key (str): The user's OpenAI API key. | |
llama_parse_api_key (str): The user's LlamaParse API key. | |
uploaded_file (gradio.File): The uploaded Excel file object. | |
selected_sheet (str): The name of the sheet to process. | |
query_text (str): The question to ask about the document. | |
Returns: | |
str: The answer to the query or an error message. | |
""" | |
if not all([openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text]): | |
return "Error: Please provide all inputs - both API keys, a file, a selected sheet, and a query." | |
# Create a temporary file to store the selected sheet's data | |
temp_file = None | |
try: | |
os.environ["OPENAI_API_KEY"] = openai_api_key | |
llm = OpenAI(model="gpt-4o-mini", api_key=openai_api_key) | |
# Read the selected sheet using pandas | |
df = pd.read_excel(uploaded_file.name, sheet_name=selected_sheet) | |
# Save the sheet's data to a temporary CSV file for LlamaParse | |
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv", encoding='utf-8') as temp_file: | |
df.to_csv(temp_file.name, index=False) | |
temp_file_path = temp_file.name | |
# Initialize LlamaParse | |
parser = LlamaParse( | |
api_key=llama_parse_api_key, | |
result_type="markdown", | |
verbose=True | |
) | |
# Load data from the temporary file containing only the selected sheet | |
documents = parser.load_data(temp_file_path) | |
# Initialize the parser and process the documents | |
node_parser = MarkdownElementNodeParser(llm=llm, num_workers=4) | |
nodes = node_parser.get_nodes_from_documents(documents) | |
base_nodes, objects = node_parser.get_nodes_and_objects(nodes) | |
# Create the index and query engine | |
recursive_index = VectorStoreIndex(nodes=base_nodes + objects, llm=llm) | |
query_engine = recursive_index.as_query_engine(similarity_top_k=5, llm=llm) | |
# Execute the query | |
response = query_engine.query(query_text) | |
return str(response) | |
except Exception as e: | |
return f"An error occurred: {e}" | |
finally: | |
# Clean up the temporary file | |
if temp_file and os.path.exists(temp_file.name): | |
os.unlink(temp_file.name) | |
# --- Gradio Interface --- | |
with gr.Blocks( | |
title="Excel Sheet Q&A", | |
theme=gr.themes.Soft(), | |
css=".gradio-container {background: linear-gradient(to right, #C9D6FF, #E2E2E2)}" | |
) as iface: | |
gr.Markdown( | |
""" | |
# π Excel Sheet Q&A | |
1. Enter your API keys. | |
2. Upload an Excel file. | |
3. Choose a specific sheet from the dropdown. | |
4. Ask a question about the data in that sheet. | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
openai_key_input = gr.Textbox(label="OpenAI API Key", type="password") | |
llamaparse_key_input = gr.Textbox(label="LlamaParse API Key", type="password") | |
file_uploader = gr.File(label="Upload Excel Databook", file_types=[".xlsx", ".xls"]) | |
sheet_selector = gr.Dropdown(label="Choose a Sheet", interactive=False) | |
with gr.Column(scale=2): | |
query_input = gr.Textbox(label="Your Question", placeholder="e.g., What were the total revenues in 2022?", lines=8) | |
submit_button = gr.Button("Ask Question", variant="primary") | |
output_display = gr.Markdown(label="Answer") | |
# Define the interactive workflow | |
file_uploader.upload( | |
fn=get_sheet_names, | |
inputs=[file_uploader], | |
outputs=[sheet_selector] | |
) | |
submit_button.click( | |
fn=process_and_query, | |
inputs=[openai_key_input, llamaparse_key_input, file_uploader, sheet_selector, query_input], | |
outputs=output_display | |
) | |
if __name__ == "__main__": | |
iface.launch(share=True) | |