Adityabhaskar commited on
Commit
baf3f32
·
verified ·
1 Parent(s): 995d24d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -13
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import pandas as pd
 
2
  import os
3
  import warnings
4
  import gradio as gr
@@ -42,9 +43,10 @@ class ExcelPandasAgent:
42
  for sheet_name in sheet_names:
43
  try:
44
  df = pd.read_excel(file_path, sheet_name=sheet_name)
 
45
  df = self._clean_dataframe(df)
46
  self.excel_data[sheet_name] = df
47
- self.logs.append(f" - Indexed sheet '{sheet_name}' ({df.shape[0]} rows × {df.shape[1]} columns)")
48
  except Exception as e:
49
  self.logs.append(f"⚠️ Error loading sheet '{sheet_name}': {str(e)}")
50
  continue
@@ -55,19 +57,22 @@ class ExcelPandasAgent:
55
  raise Exception(f"Error loading Excel file: {str(e)}")
56
 
57
  def _clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
58
- """Cleans a DataFrame by removing empty rows/columns."""
 
 
59
  df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
60
- # Attempt to convert object columns to numeric or datetime where possible
61
  for col in df.columns:
 
62
  if df[col].dtype == 'object':
63
- try:
64
- df[col] = pd.to_numeric(df[col], errors='ignore')
65
- except:
66
- pass
67
- try:
68
- df[col] = pd.to_datetime(df[col], errors='ignore')
69
- except:
70
- pass
 
71
  return df
72
 
73
  def query_sheet(self, query: str, sheet_name: str) -> str:
@@ -85,8 +90,8 @@ class ExcelPandasAgent:
85
  self.llm,
86
  df,
87
  verbose=True,
88
- max_iterations=50, # <-- Correct placement
89
- max_execution_time=300, # <-- Correct placement
90
  agent_executor_kwargs={"handle_parsing_errors": True},
91
  allow_dangerous_code=True
92
  )
 
1
  import pandas as pd
2
+ import numpy as np
3
  import os
4
  import warnings
5
  import gradio as gr
 
43
  for sheet_name in sheet_names:
44
  try:
45
  df = pd.read_excel(file_path, sheet_name=sheet_name)
46
+ # The cleaning function is called here for each sheet
47
  df = self._clean_dataframe(df)
48
  self.excel_data[sheet_name] = df
49
+ self.logs.append(f" - Indexed and cleaned sheet '{sheet_name}' ({df.shape[0]} rows × {df.shape[1]} columns)")
50
  except Exception as e:
51
  self.logs.append(f"⚠️ Error loading sheet '{sheet_name}': {str(e)}")
52
  continue
 
57
  raise Exception(f"Error loading Excel file: {str(e)}")
58
 
59
  def _clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
60
+ """
61
+ Cleans a DataFrame by removing empty rows/columns and robustly converting types.
62
+ """
63
  df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
 
64
  for col in df.columns:
65
+ # Apply to object columns that might contain mixed numeric/text data
66
  if df[col].dtype == 'object':
67
+ # This is the key change. It attempts to convert the column
68
+ # to numbers. Any value that fails (like 'Apr') becomes NaN (Not a Number).
69
+ df[col] = pd.to_numeric(df[col], errors='coerce')
70
+
71
+ # Now, find all numeric columns (including those just converted)
72
+ # and fill any resulting NaN values with 0. This prepares them for calculations.
73
+ for col in df.select_dtypes(include=np.number).columns:
74
+ df[col] = df[col].fillna(0)
75
+
76
  return df
77
 
78
  def query_sheet(self, query: str, sheet_name: str) -> str:
 
90
  self.llm,
91
  df,
92
  verbose=True,
93
+ max_iterations=50,
94
+ max_execution_time=300,
95
  agent_executor_kwargs={"handle_parsing_errors": True},
96
  allow_dangerous_code=True
97
  )