Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -24,26 +24,25 @@ class HybridExcelQuerySystem:
|
|
24 |
self.sheet_names = []
|
25 |
|
26 |
def _pivot_numerical_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
27 |
-
"""Pivots the special 'Numerical Data' sheet into a clean, usable format."""
|
28 |
-
# Find the row with month names, which will become our header
|
29 |
header_row_index = 0
|
30 |
-
df = df.T
|
31 |
-
df.columns = df.iloc[header_row_index]
|
32 |
-
df = df.drop(header_row_index) # Drop the header row from data
|
33 |
-
df = df.reset_index().rename(columns={'index': 'Month'}) # Make months a column
|
34 |
|
35 |
-
#
|
36 |
-
df
|
|
|
|
|
|
|
37 |
|
38 |
-
#
|
39 |
for col in df.columns:
|
40 |
if col != 'Month':
|
|
|
|
|
41 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
42 |
|
43 |
-
# Drop columns that are entirely empty after conversion
|
44 |
df = df.dropna(axis=1, how='all')
|
45 |
-
df = df.rename(columns={'Period': 'Month'})
|
46 |
-
# Filter out any non-month rows that might have slipped through
|
47 |
months = ['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar']
|
48 |
df = df[df['Month'].isin(months)]
|
49 |
return df
|
@@ -58,14 +57,12 @@ class HybridExcelQuerySystem:
|
|
58 |
for sheet_name in self.sheet_names:
|
59 |
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
60 |
|
61 |
-
# --- Prepare for Agent ---
|
62 |
if sheet_name == "Numerical Data":
|
63 |
agent_df = self._pivot_numerical_data(df.copy())
|
64 |
else:
|
65 |
agent_df = self._clean_dataframe_for_agent(df.copy())
|
66 |
self.dataframes[sheet_name] = agent_df
|
67 |
|
68 |
-
# --- Prepare for RAG ---
|
69 |
rag_df = self._clean_dataframe_for_rag(df.copy())
|
70 |
markdown_text = rag_df.to_markdown(index=False)
|
71 |
doc = Document(text=markdown_text, metadata={"source": sheet_name})
|
@@ -127,7 +124,7 @@ class HybridExcelQuerySystem:
|
|
127 |
verbose=True,
|
128 |
allow_dangerous_code=True,
|
129 |
max_iterations=15,
|
130 |
-
handle_parsing_errors=True
|
131 |
)
|
132 |
response = agent.invoke(query)
|
133 |
return {"answer": response['output'], "tool_used": "Calculation (Pandas Agent)"}
|
|
|
24 |
self.sheet_names = []
|
25 |
|
26 |
def _pivot_numerical_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
|
27 |
header_row_index = 0
|
28 |
+
df = df.T
|
29 |
+
df.columns = df.iloc[header_row_index]
|
|
|
|
|
30 |
|
31 |
+
# --- THIS IS THE FIXED LINE ---
|
32 |
+
df = df.iloc[1:] # Keep all rows from the second row onwards
|
33 |
+
|
34 |
+
df = df.reset_index().rename(columns={'index': 'Month'})
|
35 |
+
df.columns = df.columns.str.strip().str.replace(r'[^a-zA-Z0-9_%]', '_', regex=True).str.replace('__', '_')
|
36 |
|
37 |
+
# Identify and convert numeric columns
|
38 |
for col in df.columns:
|
39 |
if col != 'Month':
|
40 |
+
# Remove '%' and convert to numeric, coercing errors
|
41 |
+
df[col] = df[col].astype(str).str.replace('%', '', regex=False)
|
42 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
43 |
|
|
|
44 |
df = df.dropna(axis=1, how='all')
|
45 |
+
df = df.rename(columns={'Period': 'Month'})
|
|
|
46 |
months = ['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar']
|
47 |
df = df[df['Month'].isin(months)]
|
48 |
return df
|
|
|
57 |
for sheet_name in self.sheet_names:
|
58 |
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
59 |
|
|
|
60 |
if sheet_name == "Numerical Data":
|
61 |
agent_df = self._pivot_numerical_data(df.copy())
|
62 |
else:
|
63 |
agent_df = self._clean_dataframe_for_agent(df.copy())
|
64 |
self.dataframes[sheet_name] = agent_df
|
65 |
|
|
|
66 |
rag_df = self._clean_dataframe_for_rag(df.copy())
|
67 |
markdown_text = rag_df.to_markdown(index=False)
|
68 |
doc = Document(text=markdown_text, metadata={"source": sheet_name})
|
|
|
124 |
verbose=True,
|
125 |
allow_dangerous_code=True,
|
126 |
max_iterations=15,
|
127 |
+
handle_parsing_errors=True
|
128 |
)
|
129 |
response = agent.invoke(query)
|
130 |
return {"answer": response['output'], "tool_used": "Calculation (Pandas Agent)"}
|