Spaces:
Paused
Paused
File size: 5,308 Bytes
a9ad3d8 d51ad5a a9ad3d8 d51ad5a a9ad3d8 d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a a9ad3d8 3e327a8 d51ad5a baf3f32 d51ad5a a9ad3d8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a a9ad3d8 d51ad5a 6ef497e a9ad3d8 d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a 6ef497e a9ad3d8 d51ad5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import gradio as gr
import os
import pandas as pd
import tempfile
import nest_asyncio
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex
from llama_parse import LlamaParse
from llama_index.core.node_parser import MarkdownElementNodeParser
# Apply nest_asyncio to handle async operations in Gradio
nest_asyncio.apply()
def get_sheet_names(file):
"""
Reads an uploaded Excel file and returns its sheet names.
This function is triggered when a file is uploaded.
"""
if file is None:
# No file, so return an empty, non-interactive dropdown
return gr.Dropdown(choices=[], interactive=False)
try:
# Use pandas to quickly get sheet names without loading all data
xls = pd.ExcelFile(file.name)
sheet_names = xls.sheet_names
# Return an updated, interactive dropdown with the sheet names
return gr.Dropdown(choices=sheet_names, value=sheet_names[0], interactive=True)
except Exception as e:
gr.Warning(f"Could not read Excel file: {e}")
return gr.Dropdown(choices=[], interactive=False)
def process_and_query(openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text):
"""
Processes a selected sheet from an uploaded Excel file and answers a user's query.
Args:
openai_api_key (str): The user's OpenAI API key.
llama_parse_api_key (str): The user's LlamaParse API key.
uploaded_file (gradio.File): The uploaded Excel file object.
selected_sheet (str): The name of the sheet to process.
query_text (str): The question to ask about the document.
Returns:
str: The answer to the query or an error message.
"""
if not all([openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text]):
return "Error: Please provide all inputs - both API keys, a file, a selected sheet, and a query."
# Create a temporary file to store the selected sheet's data
temp_file = None
try:
os.environ["OPENAI_API_KEY"] = openai_api_key
llm = OpenAI(model="gpt-4o-mini", api_key=openai_api_key)
# Read the selected sheet using pandas
df = pd.read_excel(uploaded_file.name, sheet_name=selected_sheet)
# Save the sheet's data to a temporary CSV file for LlamaParse
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv", encoding='utf-8') as temp_file:
df.to_csv(temp_file.name, index=False)
temp_file_path = temp_file.name
# Initialize LlamaParse
parser = LlamaParse(
api_key=llama_parse_api_key,
result_type="markdown",
verbose=True
)
# Load data from the temporary file containing only the selected sheet
documents = parser.load_data(temp_file_path)
# Initialize the parser and process the documents
node_parser = MarkdownElementNodeParser(llm=llm, num_workers=4)
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
# Create the index and query engine
recursive_index = VectorStoreIndex(nodes=base_nodes + objects, llm=llm)
query_engine = recursive_index.as_query_engine(similarity_top_k=5, llm=llm)
# Execute the query
response = query_engine.query(query_text)
return str(response)
except Exception as e:
return f"An error occurred: {e}"
finally:
# Clean up the temporary file
if temp_file and os.path.exists(temp_file.name):
os.unlink(temp_file.name)
# --- Gradio Interface ---
with gr.Blocks(
title="Excel Sheet Q&A",
theme=gr.themes.Soft(),
css=".gradio-container {background: linear-gradient(to right, #C9D6FF, #E2E2E2)}"
) as iface:
gr.Markdown(
"""
# 📊 Excel Sheet Q&A
1. Enter your API keys.
2. Upload an Excel file.
3. Choose a specific sheet from the dropdown.
4. Ask a question about the data in that sheet.
"""
)
with gr.Row():
with gr.Column(scale=1):
openai_key_input = gr.Textbox(label="OpenAI API Key", type="password")
llamaparse_key_input = gr.Textbox(label="LlamaParse API Key", type="password")
file_uploader = gr.File(label="Upload Excel Databook", file_types=[".xlsx", ".xls"])
sheet_selector = gr.Dropdown(label="Choose a Sheet", interactive=False)
with gr.Column(scale=2):
query_input = gr.Textbox(label="Your Question", placeholder="e.g., What were the total revenues in 2022?", lines=8)
submit_button = gr.Button("Ask Question", variant="primary")
output_display = gr.Markdown(label="Answer")
# Define the interactive workflow
file_uploader.upload(
fn=get_sheet_names,
inputs=[file_uploader],
outputs=[sheet_selector]
)
submit_button.click(
fn=process_and_query,
inputs=[openai_key_input, llamaparse_key_input, file_uploader, sheet_selector, query_input],
outputs=output_display
)
if __name__ == "__main__":
iface.launch(share=True)
|