Adityabhaskar commited on
Commit
0f4aab5
·
verified ·
1 Parent(s): 106e6b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -15
app.py CHANGED
@@ -24,26 +24,25 @@ class HybridExcelQuerySystem:
24
  self.sheet_names = []
25
 
26
  def _pivot_numerical_data(self, df: pd.DataFrame) -> pd.DataFrame:
27
- """Pivots the special 'Numerical Data' sheet into a clean, usable format."""
28
- # Find the row with month names, which will become our header
29
  header_row_index = 0
30
- df = df.T # Transpose the dataframe
31
- df.columns = df.iloc[header_row_index] # Set the first row as header
32
- df = df.drop(header_row_index) # Drop the header row from data
33
- df = df.reset_index().rename(columns={'index': 'Month'}) # Make months a column
34
 
35
- # Clean up column names (e.g., "Profit & Loss Account" -> "Profit_Loss_Account")
36
- df.columns = df.columns.str.strip().str.replace(r'[^a-zA-Z0-9_]', '_', regex=True)
 
 
 
37
 
38
- # Convert numeric columns to numbers, coercing errors
39
  for col in df.columns:
40
  if col != 'Month':
 
 
41
  df[col] = pd.to_numeric(df[col], errors='coerce')
42
 
43
- # Drop columns that are entirely empty after conversion
44
  df = df.dropna(axis=1, how='all')
45
- df = df.rename(columns={'Period': 'Month'}) # A final cleanup
46
- # Filter out any non-month rows that might have slipped through
47
  months = ['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar']
48
  df = df[df['Month'].isin(months)]
49
  return df
@@ -58,14 +57,12 @@ class HybridExcelQuerySystem:
58
  for sheet_name in self.sheet_names:
59
  df = pd.read_excel(file_path, sheet_name=sheet_name)
60
 
61
- # --- Prepare for Agent ---
62
  if sheet_name == "Numerical Data":
63
  agent_df = self._pivot_numerical_data(df.copy())
64
  else:
65
  agent_df = self._clean_dataframe_for_agent(df.copy())
66
  self.dataframes[sheet_name] = agent_df
67
 
68
- # --- Prepare for RAG ---
69
  rag_df = self._clean_dataframe_for_rag(df.copy())
70
  markdown_text = rag_df.to_markdown(index=False)
71
  doc = Document(text=markdown_text, metadata={"source": sheet_name})
@@ -127,7 +124,7 @@ class HybridExcelQuerySystem:
127
  verbose=True,
128
  allow_dangerous_code=True,
129
  max_iterations=15,
130
- handle_parsing_errors=True # The direct fix for the error
131
  )
132
  response = agent.invoke(query)
133
  return {"answer": response['output'], "tool_used": "Calculation (Pandas Agent)"}
 
24
  self.sheet_names = []
25
 
26
  def _pivot_numerical_data(self, df: pd.DataFrame) -> pd.DataFrame:
 
 
27
  header_row_index = 0
28
+ df = df.T
29
+ df.columns = df.iloc[header_row_index]
 
 
30
 
31
+ # --- THIS IS THE FIXED LINE ---
32
+ df = df.iloc[1:] # Keep all rows from the second row onwards
33
+
34
+ df = df.reset_index().rename(columns={'index': 'Month'})
35
+ df.columns = df.columns.str.strip().str.replace(r'[^a-zA-Z0-9_%]', '_', regex=True).str.replace('__', '_')
36
 
37
+ # Identify and convert numeric columns
38
  for col in df.columns:
39
  if col != 'Month':
40
+ # Remove '%' and convert to numeric, coercing errors
41
+ df[col] = df[col].astype(str).str.replace('%', '', regex=False)
42
  df[col] = pd.to_numeric(df[col], errors='coerce')
43
 
 
44
  df = df.dropna(axis=1, how='all')
45
+ df = df.rename(columns={'Period': 'Month'})
 
46
  months = ['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar']
47
  df = df[df['Month'].isin(months)]
48
  return df
 
57
  for sheet_name in self.sheet_names:
58
  df = pd.read_excel(file_path, sheet_name=sheet_name)
59
 
 
60
  if sheet_name == "Numerical Data":
61
  agent_df = self._pivot_numerical_data(df.copy())
62
  else:
63
  agent_df = self._clean_dataframe_for_agent(df.copy())
64
  self.dataframes[sheet_name] = agent_df
65
 
 
66
  rag_df = self._clean_dataframe_for_rag(df.copy())
67
  markdown_text = rag_df.to_markdown(index=False)
68
  doc = Document(text=markdown_text, metadata={"source": sheet_name})
 
124
  verbose=True,
125
  allow_dangerous_code=True,
126
  max_iterations=15,
127
+ handle_parsing_errors=True
128
  )
129
  response = agent.invoke(query)
130
  return {"answer": response['output'], "tool_used": "Calculation (Pandas Agent)"}