import subprocess import os from typing import Optional, Dict, Any from llama_index.core.tools import FunctionTool from llama_index.core import SimpleDirectoryReader from llama_index.readers.file import ( PandasCSVReader, CSVReader, ) def execute_python_file(file_path: str) -> Dict[str, Any]: """ Execute a Python file and return its output. Args: file_path: Path to the Python file to execute Returns: Dictionary containing the output and execution status """ # Check if file exists if not os.path.exists(file_path): return { "success": False, "error": f"File not found at {file_path}", "output": None } try: # Execute the Python file and capture output result = subprocess.run( ["python3", file_path], # Use python3 explicitly capture_output=True, text=True, check=True ) # Return the stdout output (trimmed of whitespace) return { "success": True, "error": None, "output": result.stdout.strip() } except subprocess.CalledProcessError as e: return { "success": False, "error": f"Execution error: {e}", "stderr": e.stderr, "output": None } except Exception as e: return { "success": False, "error": f"Error: {str(e)}", "output": None } # Create a function tool for audio transcription execute_python_file_tool = FunctionTool.from_defaults( name="execute_python_file", description="Execute a Python file and return its output.", fn=execute_python_file ) def csv_excel_reader(file_path: str) -> list: """ Read and parse CSV or Excel files using LlamaIndex document readers. This function determines the file type by extension and uses the appropriate loader: - For Excel files (.xlsx, .xls): Uses ExcelLoader - For CSV files (.csv): Uses PandasCSVReader with fallback to CSVReader Args: file_path (str): Path to the CSV or Excel file to be read Returns: list: Document objects containing the parsed data from the file Raises: FileNotFoundError: If the specified file doesn't exist ValueError: If the file cannot be parsed or has an unsupported extension Examples: >>> documents = csv_excel_reader("data/financial_report.csv") >>> print(f"Loaded {len(documents)} documents") >>> >>> # Or with Excel files >>> documents = csv_excel_reader("data/quarterly_reports.xlsx") >>> print(f"Loaded {len(documents)} documents from Excel file") """ import os # Check if file exists if not os.path.exists(file_path): raise FileNotFoundError(f"File not found at {file_path}") # Get file extension file_ext = os.path.splitext(file_path)[1].lower() # Use the appropriate loader based on file extension try: if file_ext in ['.xlsx', '.xls']: # Use ExcelLoader for Excel files from llama_index.readers.file.excel import ExcelLoader loader = ExcelLoader(file_path) return loader.load_data() elif file_ext == '.csv': # Use PandasCSVReader for CSV files try: from llama_index.readers.file.csv import PandasCSVReader from llama_index.core import SimpleDirectoryReader directory = os.path.dirname(file_path) or "." filename = os.path.basename(file_path) parser = PandasCSVReader() file_extractor = {".csv": parser} return SimpleDirectoryReader( input_dir=directory, input_files=[filename], file_extractor=file_extractor ).load_data() except Exception as e: # Fall back to basic CSVReader from llama_index.readers.file.csv import CSVReader from llama_index.core import SimpleDirectoryReader directory = os.path.dirname(file_path) or "." filename = os.path.basename(file_path) parser = CSVReader() file_extractor = {".csv": parser} return SimpleDirectoryReader( input_dir=directory, input_files=[filename], file_extractor=file_extractor ).load_data() else: raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls") except Exception as e: import sys import traceback exc_type, exc_value, exc_traceback = sys.exc_info() error_details = traceback.format_exception(exc_type, exc_value, exc_traceback) raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}") # Create a function tool for CSV/Excel reading csv_excel_reader_tool = FunctionTool.from_defaults( name="csv_excel_reader", description="Reads CSV or Excel files and returns them as Document objects. Uses ExcelLoader for Excel files and PandasCSVReader for CSV files.", fn=csv_excel_reader )