Spaces:

Adityabhaskar
/

stealth

Paused

App Files Files Community

Adityabhaskar commited on Sep 3

Commit

253800a

verified ·

1 Parent(s): c4c4e2a

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -14

app.py CHANGED Viewed

@@ -28,24 +28,30 @@ class HybridExcelQuerySystem:
         --- NEW, MORE ROBUST VERSION ---
         Intelligently reconstructs the 'Numerical Data' sheet into a clean, tidy format.
         """
-        # Find the row with month names (it's the first non-empty row)
-        header_row_index = df.dropna(how='all').index[0]
-        month_headers = df.iloc[header_row_index].dropna().tolist()
-        # Find the first column with financial metrics
-        metric_col_name = df.columns[0]
-        df_metrics = df[[metric_col_name]].dropna()
-        # Find the starting row and column for the actual data
-        start_row = df_metrics.index[0]
-        start_col = df.columns.get_loc(month_headers[0])
-        # Extract the core data, metrics, and headers
-        data = df.iloc[start_row:, start_col:start_col+len(month_headers)]
-        metrics = df.iloc[start_row:, 0]
         # Create a new, clean DataFrame
-        clean_df = pd.DataFrame(data.values, index=metrics, columns=month_headers)
         # Transpose so months are rows
         clean_df = clean_df.T
@@ -63,6 +69,7 @@ class HybridExcelQuerySystem:
         return clean_df
     def load_excel_file(self, file_path: str) -> str:
         self.logs.clear()
         try:
@@ -71,7 +78,7 @@ class HybridExcelQuerySystem:
             self.logs.append(f"✅ Found {len(self.sheet_names)} sheets: {', '.join(self.sheet_names)}")
             for sheet_name in self.sheet_names:
-                df = pd.read_excel(file_path, sheet_name=sheet_name, header=None) # Read without a header
                 if sheet_name == "Numerical Data":
                     agent_df = self._pivot_numerical_data(df.copy())

         --- NEW, MORE ROBUST VERSION ---
         Intelligently reconstructs the 'Numerical Data' sheet into a clean, tidy format.
         """
+        # Define expected months to correctly identify the data block
+        months = ['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar']
+        # Find the first row that contains at least one month name
+        header_row_index = df[df.isin(months)].dropna(how='all').index[0]
+        header_series = df.iloc[header_row_index]
+        # Filter this row to get only the actual month names
+        actual_month_headers = [h for h in header_series if h in months]
+        # Find the start and end column positions of the month data
+        start_col_pos = header_series.tolist().index(actual_month_headers[0])
+        end_col_pos = header_series.tolist().index(actual_month_headers[-1])
+        # Find the start row of the financial metrics
+        metric_col = df.iloc[:, 0].dropna()
+        start_row_pos = metric_col.index[1] # The metrics start after "Profit & Loss Account"
+        # Slice the core data block
+        data = df.iloc[start_row_pos:, start_col_pos:end_col_pos+1]
+        metrics = df.iloc[start_row_pos:, 0]
         # Create a new, clean DataFrame
+        clean_df = pd.DataFrame(data.values, index=metrics, columns=actual_month_headers)
         # Transpose so months are rows
         clean_df = clean_df.T
         return clean_df
     def load_excel_file(self, file_path: str) -> str:
         self.logs.clear()
         try:
             self.logs.append(f"✅ Found {len(self.sheet_names)} sheets: {', '.join(self.sheet_names)}")
             for sheet_name in self.sheet_names:
+                df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)
                 if sheet_name == "Numerical Data":
                     agent_df = self._pivot_numerical_data(df.copy())