Spaces:

Ahmedik95316
/

Fake-News-Detection-with-MLOps

Sleeping

App Files Files Community

Ahmedik95316 commited on Aug 19

Commit

3a989cc

1 Parent(s): ce7aca5

Update model/train.py

Browse files

Files changed (1) hide show

model/train.py +41 -1

model/train.py CHANGED Viewed

@@ -111,7 +111,7 @@ class ProgressTracker:
         filled_length = int(bar_length * self.current_step // self.total_steps)
         bar = '█' * filled_length + '░' * (bar_length - filled_length)
-        # Print progress
         status_msg = f"\r{self.description}: [{bar}] {progress_pct:.1f}% | Step {self.current_step}/{self.total_steps}"
         if step_name:
             status_msg += f" | {step_name}"
@@ -120,6 +120,18 @@ class ProgressTracker:
         print(status_msg, end='', flush=True)
         # Store step time for better estimation
         if len(self.step_times) >= 3:  # Keep last 3 step times for moving average
             self.step_times.pop(0)
@@ -773,9 +785,37 @@ def main():
     # Parse command line arguments
     parser = argparse.ArgumentParser(description='Train fake news detection model')
     parser.add_argument('--data_path', type=str, help='Path to training data CSV file')
     args = parser.parse_args()
     trainer = RobustModelTrainer()
     success, message = trainer.train_model(data_path=args.data_path)
     if success:

         filled_length = int(bar_length * self.current_step // self.total_steps)
         bar = '█' * filled_length + '░' * (bar_length - filled_length)
+        # Print progress (this will be visible in Streamlit logs)
         status_msg = f"\r{self.description}: [{bar}] {progress_pct:.1f}% | Step {self.current_step}/{self.total_steps}"
         if step_name:
             status_msg += f" | {step_name}"
         print(status_msg, end='', flush=True)
+        # Also output JSON for Streamlit parsing (if needed)
+        progress_json = {
+            "type": "progress",
+            "step": self.current_step,
+            "total": self.total_steps,
+            "percentage": progress_pct,
+            "eta": str(eta) if eta != "calculating..." else None,
+            "step_name": step_name,
+            "elapsed": elapsed
+        }
+        print(f"\nPROGRESS_JSON: {json.dumps(progress_json)}")
         # Store step time for better estimation
         if len(self.step_times) >= 3:  # Keep last 3 step times for moving average
             self.step_times.pop(0)
     # Parse command line arguments
     parser = argparse.ArgumentParser(description='Train fake news detection model')
     parser.add_argument('--data_path', type=str, help='Path to training data CSV file')
+    parser.add_argument('--config_path', type=str, help='Path to training configuration JSON file')
     args = parser.parse_args()
     trainer = RobustModelTrainer()
+    # Load custom configuration if provided
+    if args.config_path and Path(args.config_path).exists():
+        try:
+            with open(args.config_path, 'r') as f:
+                config = json.load(f)
+            # Apply configuration
+            trainer.test_size = config.get('test_size', trainer.test_size)
+            trainer.cv_folds = config.get('cv_folds', trainer.cv_folds)
+            trainer.max_features = config.get('max_features', trainer.max_features)
+            trainer.ngram_range = tuple(config.get('ngram_range', trainer.ngram_range))
+            # Filter models if specified
+            selected_models = config.get('selected_models')
+            if selected_models and len(selected_models) < len(trainer.models):
+                all_models = trainer.models.copy()
+                trainer.models = {k: v for k, v in all_models.items() if k in selected_models}
+            # Update feature selection based on max_features
+            trainer.feature_selection_k = min(trainer.feature_selection_k, trainer.max_features)
+            logger.info(f"Applied custom configuration: {config}")
+        except Exception as e:
+            logger.warning(f"Failed to load configuration: {e}, using defaults")
     success, message = trainer.train_model(data_path=args.data_path)
     if success: