stealth / app.py
Adityabhaskar's picture
Update app.py
d51ad5a verified
raw
history blame
5.31 kB
import gradio as gr
import os
import pandas as pd
import tempfile
import nest_asyncio
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex
from llama_parse import LlamaParse
from llama_index.core.node_parser import MarkdownElementNodeParser
# Apply nest_asyncio to handle async operations in Gradio
nest_asyncio.apply()
def get_sheet_names(file):
"""
Reads an uploaded Excel file and returns its sheet names.
This function is triggered when a file is uploaded.
"""
if file is None:
# No file, so return an empty, non-interactive dropdown
return gr.Dropdown(choices=[], interactive=False)
try:
# Use pandas to quickly get sheet names without loading all data
xls = pd.ExcelFile(file.name)
sheet_names = xls.sheet_names
# Return an updated, interactive dropdown with the sheet names
return gr.Dropdown(choices=sheet_names, value=sheet_names[0], interactive=True)
except Exception as e:
gr.Warning(f"Could not read Excel file: {e}")
return gr.Dropdown(choices=[], interactive=False)
def process_and_query(openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text):
"""
Processes a selected sheet from an uploaded Excel file and answers a user's query.
Args:
openai_api_key (str): The user's OpenAI API key.
llama_parse_api_key (str): The user's LlamaParse API key.
uploaded_file (gradio.File): The uploaded Excel file object.
selected_sheet (str): The name of the sheet to process.
query_text (str): The question to ask about the document.
Returns:
str: The answer to the query or an error message.
"""
if not all([openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text]):
return "Error: Please provide all inputs - both API keys, a file, a selected sheet, and a query."
# Create a temporary file to store the selected sheet's data
temp_file = None
try:
os.environ["OPENAI_API_KEY"] = openai_api_key
llm = OpenAI(model="gpt-4o-mini", api_key=openai_api_key)
# Read the selected sheet using pandas
df = pd.read_excel(uploaded_file.name, sheet_name=selected_sheet)
# Save the sheet's data to a temporary CSV file for LlamaParse
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv", encoding='utf-8') as temp_file:
df.to_csv(temp_file.name, index=False)
temp_file_path = temp_file.name
# Initialize LlamaParse
parser = LlamaParse(
api_key=llama_parse_api_key,
result_type="markdown",
verbose=True
)
# Load data from the temporary file containing only the selected sheet
documents = parser.load_data(temp_file_path)
# Initialize the parser and process the documents
node_parser = MarkdownElementNodeParser(llm=llm, num_workers=4)
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
# Create the index and query engine
recursive_index = VectorStoreIndex(nodes=base_nodes + objects, llm=llm)
query_engine = recursive_index.as_query_engine(similarity_top_k=5, llm=llm)
# Execute the query
response = query_engine.query(query_text)
return str(response)
except Exception as e:
return f"An error occurred: {e}"
finally:
# Clean up the temporary file
if temp_file and os.path.exists(temp_file.name):
os.unlink(temp_file.name)
# --- Gradio Interface ---
with gr.Blocks(
title="Excel Sheet Q&A",
theme=gr.themes.Soft(),
css=".gradio-container {background: linear-gradient(to right, #C9D6FF, #E2E2E2)}"
) as iface:
gr.Markdown(
"""
# πŸ“Š Excel Sheet Q&A
1. Enter your API keys.
2. Upload an Excel file.
3. Choose a specific sheet from the dropdown.
4. Ask a question about the data in that sheet.
"""
)
with gr.Row():
with gr.Column(scale=1):
openai_key_input = gr.Textbox(label="OpenAI API Key", type="password")
llamaparse_key_input = gr.Textbox(label="LlamaParse API Key", type="password")
file_uploader = gr.File(label="Upload Excel Databook", file_types=[".xlsx", ".xls"])
sheet_selector = gr.Dropdown(label="Choose a Sheet", interactive=False)
with gr.Column(scale=2):
query_input = gr.Textbox(label="Your Question", placeholder="e.g., What were the total revenues in 2022?", lines=8)
submit_button = gr.Button("Ask Question", variant="primary")
output_display = gr.Markdown(label="Answer")
# Define the interactive workflow
file_uploader.upload(
fn=get_sheet_names,
inputs=[file_uploader],
outputs=[sheet_selector]
)
submit_button.click(
fn=process_and_query,
inputs=[openai_key_input, llamaparse_key_input, file_uploader, sheet_selector, query_input],
outputs=output_display
)
if __name__ == "__main__":
iface.launch(share=True)