Spaces:

Ahmedik95316
/

Fake-News-Detection-with-MLOps

Running

App Files Files Community

Ahmedik95316 commited on Aug 28

Commit

9440fd6

verified ·

1 Parent(s): add90c3

Update initialize_system.py

Browse files

Files changed (1) hide show

initialize_system.py +69 -122

initialize_system.py CHANGED Viewed

@@ -548,130 +548,77 @@ def verify_system():
 def main():
-    """Main initialization function with ensemble support"""
-    import argparse
-    # Parse command line arguments
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--force-ensemble", action="store_true",
-                       help="Force ensemble training regardless of existing models")
-    args = parser.parse_args()
-    try:
-        log_step("Starting system initialization...")
-        log_step(f"Environment: {path_manager.environment}")
-        log_step(f"Base directory: {path_manager.base_paths['base']}")
-        log_step(f"Data directory: {path_manager.base_paths['data']}")
-        log_step(f"Model directory: {path_manager.base_paths['model']}")
-        # Handle force ensemble request
-        if args.force_ensemble:
-            log_step("Force ensemble training requested via web interface")
-            # Remove existing models to force retraining
-            model_files = [
-                path_manager.get_model_path("pipeline.pkl"),
-                path_manager.get_model_path("model.pkl"),
-                path_manager.get_model_path("vectorizer.pkl"),
-                path_manager.get_metadata_path()
-            ]
-            for file in model_files:
-                if file.exists():
-                    file.unlink()
-                    log_step(f"Removed existing model file: {file}")
-            return run_comprehensive_ensemble_training()
-        # Normal initialization steps
-        steps = [
-            ("Directory Creation", create_directories),
-            ("Existing Dataset Copy", copy_existing_datasets),
-            ("Minimal Dataset Creation", create_minimal_dataset),
-            ("Model Training", run_basic_training_fallback),
-            ("Log File Creation", create_initial_logs),
-            ("System Verification", verify_system)
-        ]
-        results = {}
-        total_steps = len(steps)
-        completed_steps = 0
-        failed_steps = 0
-        for step_name, step_function in steps:
-            log_step(f"Starting: {step_name}")
-            try:
-                success = step_function()
-                results[step_name] = success
-                if success:
-                    log_step(f"{step_name} completed")
-                    completed_steps += 1
-                else:
-                    log_step(f"{step_name} failed")
-                    failed_steps += 1
-            except Exception as e:
-                log_step(f"{step_name} failed with error: {e}")
-                results[step_name] = False
-                failed_steps += 1
-        # Summary
-        log_step("")
-        log_step("Initialization Summary:")
-        log_step(f"   Completed: {completed_steps}/{total_steps} steps")
-        log_step(f"   Failed: {failed_steps}/{total_steps} steps")
-        completed_step_names = [name for name, success in results.items() if success]
-        failed_step_names = [name for name, success in results.items() if not success]
-        if completed_step_names:
-            log_step(f"   Completed steps: {', '.join(completed_step_names)}")
-        if failed_step_names:
-            log_step(f"   Failed steps: {', '.join(failed_step_names)}")
-        # Determine overall success
-        critical_steps = ["Model Training", "System Verification"]
-        critical_success = all(results.get(step, False) for step in critical_steps)
-        if critical_success:
-            log_step("Initialization completed successfully")
-            # Print environment info
-            log_step("")
-            log_step("Environment Information:")
-            log_step(f"   Environment: {path_manager.environment}")
-            log_step(f"   Available datasets: {len([f for f in path_manager.get_available_datasets() if f])}")
-            log_step(f"   Available models: {len([f for f in path_manager.get_available_models() if f])}")
-            # Final pipeline check
-            pipeline_path = path_manager.get_model_path("pipeline.pkl")
-            log_step("")
-            log_step("Final Pipeline Check:")
-            log_step(f"   Pipeline path: {pipeline_path}")
-            log_step(f"   Pipeline exists: {pipeline_path.exists()}")
-            if pipeline_path.exists():
-                try:
-                    import joblib
-                    pipeline = joblib.load(pipeline_path)
-                    log_step(f"   Pipeline loadable: True")
-                    log_step(f"   Pipeline steps: {[step[0] for step in pipeline.steps]}")
-                except Exception as e:
-                    log_step(f"   Pipeline loadable: False ({e})")
-            log_step("")
-            log_step("System ready for use!")
-            return True
-        else:
-            if failed_steps <= 1:
-                log_step("Initialization completed with minor issues")
-                return True
-            else:
-                log_step("Initialization completed with significant issues")
-                return False
-    except Exception as e:
-        log_step(f"Initialization failed with critical error: {e}")
-        import traceback
-        log_step(f"Traceback: {traceback.format_exc()}")
-        return False
 if __name__ == "__main__":

 def main():
+    """Main initialization function"""
+    log_step("🚀 Starting system initialization...")
+    log_step(f"🌍 Environment: {path_manager.environment}")
+    log_step(f"📁 Base directory: {path_manager.base_paths['base']}")
+    log_step(f"📊 Data directory: {path_manager.base_paths['data']}")
+    log_step(f"🤖 Model directory: {path_manager.base_paths['model']}")
+    steps = [
+        ("Directory Creation", create_directories),
+        ("Existing Dataset Copy", copy_existing_datasets),
+        ("Minimal Dataset Creation", create_minimal_dataset),
+        ("Model Training", run_initial_training),
+        ("Log File Creation", create_initial_logs),
+        ("System Verification", verify_system)
+    ]
+    failed_steps = []
+    completed_steps = []
+    for step_name, step_function in steps:
+        try:
+            log_step(f"🔄 Starting: {step_name}")
+            if step_function():
+                log_step(f"✅ {step_name} completed")
+                completed_steps.append(step_name)
+            else:
+                log_step(f"❌ {step_name} failed")
+                failed_steps.append(step_name)
+        except Exception as e:
+            log_step(f"❌ {step_name} failed: {str(e)}")
+            failed_steps.append(step_name)
+    # Summary
+    log_step(f"\n📊 Initialization Summary:")
+    log_step(f"   ✅ Completed: {len(completed_steps)}/{len(steps)} steps")
+    log_step(f"   ❌ Failed: {len(failed_steps)}/{len(steps)} steps")
+    if completed_steps:
+        log_step(f"   Completed steps: {', '.join(completed_steps)}")
+    if failed_steps:
+        log_step(f"   Failed steps: {', '.join(failed_steps)}")
+        log_step(f"⚠️ Initialization completed with {len(failed_steps)} failed steps")
+    else:
+        log_step("🎉 System initialization completed successfully!")
+    # Environment info
+    log_step(f"\n🔍 Environment Information:")
+    env_info = path_manager.get_environment_info()
+    log_step(f"   Environment: {env_info['environment']}")
+    log_step(f"   Available datasets: {sum(env_info['available_datasets'].values())}")
+    log_step(f"   Available models: {sum(env_info['available_models'].values())}")
+    # Final pipeline check
+    pipeline_path = path_manager.get_pipeline_path()
+    log_step(f"\n🎯 Final Pipeline Check:")
+    log_step(f"   Pipeline path: {pipeline_path}")
+    log_step(f"   Pipeline exists: {pipeline_path.exists()}")
+    if pipeline_path.exists():
+        try:
+            import joblib
+            pipeline = joblib.load(pipeline_path)
+            log_step(f"   Pipeline loadable: ✅")
+            log_step(f"   Pipeline steps: {list(pipeline.named_steps.keys())}")
+        except Exception as e:
+            log_step(f"   Pipeline load error: {e}")
+    log_step("\n🎯 System ready for use!")
+    return len(failed_steps) == 0
 if __name__ == "__main__":