import gradio as gr import os import pandas as pd import tempfile import nest_asyncio from llama_index.llms.openai import OpenAI from llama_index.core import VectorStoreIndex from llama_parse import LlamaParse from llama_index.core.node_parser import MarkdownElementNodeParser # Apply nest_asyncio to handle async operations in Gradio nest_asyncio.apply() def get_sheet_names(file): """ Reads an uploaded Excel file and returns its sheet names. This function is triggered when a file is uploaded. """ if file is None: # No file, so return an empty, non-interactive dropdown return gr.Dropdown(choices=[], interactive=False) try: # Use pandas to quickly get sheet names without loading all data xls = pd.ExcelFile(file.name) sheet_names = xls.sheet_names # Return an updated, interactive dropdown with the sheet names return gr.Dropdown(choices=sheet_names, value=sheet_names[0], interactive=True) except Exception as e: gr.Warning(f"Could not read Excel file: {e}") return gr.Dropdown(choices=[], interactive=False) def process_and_query(openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text): """ Processes a selected sheet from an uploaded Excel file and answers a user's query. Args: openai_api_key (str): The user's OpenAI API key. llama_parse_api_key (str): The user's LlamaParse API key. uploaded_file (gradio.File): The uploaded Excel file object. selected_sheet (str): The name of the sheet to process. query_text (str): The question to ask about the document. Returns: str: The answer to the query or an error message. """ if not all([openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text]): return "Error: Please provide all inputs - both API keys, a file, a selected sheet, and a query." # Create a temporary file to store the selected sheet's data temp_file = None try: os.environ["OPENAI_API_KEY"] = openai_api_key llm = OpenAI(model="gpt-4o-mini", api_key=openai_api_key) # Read the selected sheet using pandas df = pd.read_excel(uploaded_file.name, sheet_name=selected_sheet) # Save the sheet's data to a temporary CSV file for LlamaParse with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv", encoding='utf-8') as temp_file: df.to_csv(temp_file.name, index=False) temp_file_path = temp_file.name # Initialize LlamaParse parser = LlamaParse( api_key=llama_parse_api_key, result_type="markdown", verbose=True ) # Load data from the temporary file containing only the selected sheet documents = parser.load_data(temp_file_path) # Initialize the parser and process the documents node_parser = MarkdownElementNodeParser(llm=llm, num_workers=4) nodes = node_parser.get_nodes_from_documents(documents) base_nodes, objects = node_parser.get_nodes_and_objects(nodes) # Create the index and query engine recursive_index = VectorStoreIndex(nodes=base_nodes + objects, llm=llm) query_engine = recursive_index.as_query_engine(similarity_top_k=5, llm=llm) # Execute the query response = query_engine.query(query_text) return str(response) except Exception as e: return f"An error occurred: {e}" finally: # Clean up the temporary file if temp_file and os.path.exists(temp_file.name): os.unlink(temp_file.name) # --- Gradio Interface --- with gr.Blocks( title="Excel Sheet Q&A", theme=gr.themes.Soft(), # Keeping the Soft theme as a base for component structure css=""" .gradio-container { background: linear-gradient(to right, #232526, #414345); /* Dark gradient */ color: #E0E0E0; /* Light text for contrast */ } .gr-textbox, .gr-dropdown, .gr-file { background-color: #333333; /* Dark background for input fields */ border: 1px solid #555555; /* Subtle lighter border */ color: #E0E0E0; /* Light text inside inputs */ } .gr-button.primary { background-color: #5e72e4; /* A pleasant, vibrant blue for the primary button */ color: white; border: none; } .gr-button.primary:hover { background-color: #4a5cc2; /* Slightly darker on hover */ } .gradio-container h1, .gradio-container h2, .gradio-container h3 { color: #FFFFFF; /* White headings */ } .gradio-container label { color: #AAAAAA; /* Slightly dimmer label text */ } """ ) as iface: gr.Markdown( """ # 📊 Excel Sheet Q&A 1. Enter your API keys. 2. Upload an Excel file. 3. Choose a specific sheet from the dropdown. 4. Ask a question about the data in that sheet. """ ) with gr.Row(): with gr.Column(scale=1): openai_key_input = gr.Textbox(label="OpenAI API Key", type="password") llamaparse_key_input = gr.Textbox(label="LlamaParse API Key", type="password") file_uploader = gr.File(label="Upload Excel Databook", file_types=[".xlsx", ".xls"]) sheet_selector = gr.Dropdown(label="Choose a Sheet", interactive=False) with gr.Column(scale=2): query_input = gr.Textbox(label="Your Question", placeholder="e.g., What were the total revenues in 2022?", lines=8) submit_button = gr.Button("Ask Question", variant="primary") output_display = gr.Markdown(label="Answer") # Define the interactive workflow file_uploader.upload( fn=get_sheet_names, inputs=[file_uploader], outputs=[sheet_selector] ) submit_button.click( fn=process_and_query, inputs=[openai_key_input, llamaparse_key_input, file_uploader, sheet_selector, query_input], outputs=output_display ) if __name__ == "__main__": iface.launch(share=True)