Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import pandas as pd
|
|
|
2 |
import os
|
3 |
import warnings
|
4 |
import gradio as gr
|
@@ -42,9 +43,10 @@ class ExcelPandasAgent:
|
|
42 |
for sheet_name in sheet_names:
|
43 |
try:
|
44 |
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
|
|
45 |
df = self._clean_dataframe(df)
|
46 |
self.excel_data[sheet_name] = df
|
47 |
-
self.logs.append(f" - Indexed sheet '{sheet_name}' ({df.shape[0]} rows × {df.shape[1]} columns)")
|
48 |
except Exception as e:
|
49 |
self.logs.append(f"⚠️ Error loading sheet '{sheet_name}': {str(e)}")
|
50 |
continue
|
@@ -55,19 +57,22 @@ class ExcelPandasAgent:
|
|
55 |
raise Exception(f"Error loading Excel file: {str(e)}")
|
56 |
|
57 |
def _clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
|
58 |
-
"""
|
|
|
|
|
59 |
df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
|
60 |
-
# Attempt to convert object columns to numeric or datetime where possible
|
61 |
for col in df.columns:
|
|
|
62 |
if df[col].dtype == 'object':
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
71 |
return df
|
72 |
|
73 |
def query_sheet(self, query: str, sheet_name: str) -> str:
|
@@ -85,8 +90,8 @@ class ExcelPandasAgent:
|
|
85 |
self.llm,
|
86 |
df,
|
87 |
verbose=True,
|
88 |
-
max_iterations=50,
|
89 |
-
max_execution_time=300,
|
90 |
agent_executor_kwargs={"handle_parsing_errors": True},
|
91 |
allow_dangerous_code=True
|
92 |
)
|
|
|
1 |
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
import os
|
4 |
import warnings
|
5 |
import gradio as gr
|
|
|
43 |
for sheet_name in sheet_names:
|
44 |
try:
|
45 |
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
46 |
+
# The cleaning function is called here for each sheet
|
47 |
df = self._clean_dataframe(df)
|
48 |
self.excel_data[sheet_name] = df
|
49 |
+
self.logs.append(f" - Indexed and cleaned sheet '{sheet_name}' ({df.shape[0]} rows × {df.shape[1]} columns)")
|
50 |
except Exception as e:
|
51 |
self.logs.append(f"⚠️ Error loading sheet '{sheet_name}': {str(e)}")
|
52 |
continue
|
|
|
57 |
raise Exception(f"Error loading Excel file: {str(e)}")
|
58 |
|
59 |
def _clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
|
60 |
+
"""
|
61 |
+
Cleans a DataFrame by removing empty rows/columns and robustly converting types.
|
62 |
+
"""
|
63 |
df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
|
|
|
64 |
for col in df.columns:
|
65 |
+
# Apply to object columns that might contain mixed numeric/text data
|
66 |
if df[col].dtype == 'object':
|
67 |
+
# This is the key change. It attempts to convert the column
|
68 |
+
# to numbers. Any value that fails (like 'Apr') becomes NaN (Not a Number).
|
69 |
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
70 |
+
|
71 |
+
# Now, find all numeric columns (including those just converted)
|
72 |
+
# and fill any resulting NaN values with 0. This prepares them for calculations.
|
73 |
+
for col in df.select_dtypes(include=np.number).columns:
|
74 |
+
df[col] = df[col].fillna(0)
|
75 |
+
|
76 |
return df
|
77 |
|
78 |
def query_sheet(self, query: str, sheet_name: str) -> str:
|
|
|
90 |
self.llm,
|
91 |
df,
|
92 |
verbose=True,
|
93 |
+
max_iterations=50,
|
94 |
+
max_execution_time=300,
|
95 |
agent_executor_kwargs={"handle_parsing_errors": True},
|
96 |
allow_dangerous_code=True
|
97 |
)
|