Ahmedik95316 commited on
Commit
9440fd6
Β·
verified Β·
1 Parent(s): add90c3

Update initialize_system.py

Browse files
Files changed (1) hide show
  1. initialize_system.py +69 -122
initialize_system.py CHANGED
@@ -548,130 +548,77 @@ def verify_system():
548
 
549
 
550
  def main():
551
- """Main initialization function with ensemble support"""
552
- import argparse
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
 
554
- # Parse command line arguments
555
- parser = argparse.ArgumentParser()
556
- parser.add_argument("--force-ensemble", action="store_true",
557
- help="Force ensemble training regardless of existing models")
558
- args = parser.parse_args()
559
 
560
- try:
561
- log_step("Starting system initialization...")
562
- log_step(f"Environment: {path_manager.environment}")
563
- log_step(f"Base directory: {path_manager.base_paths['base']}")
564
- log_step(f"Data directory: {path_manager.base_paths['data']}")
565
- log_step(f"Model directory: {path_manager.base_paths['model']}")
566
-
567
- # Handle force ensemble request
568
- if args.force_ensemble:
569
- log_step("Force ensemble training requested via web interface")
570
- # Remove existing models to force retraining
571
- model_files = [
572
- path_manager.get_model_path("pipeline.pkl"),
573
- path_manager.get_model_path("model.pkl"),
574
- path_manager.get_model_path("vectorizer.pkl"),
575
- path_manager.get_metadata_path()
576
- ]
577
- for file in model_files:
578
- if file.exists():
579
- file.unlink()
580
- log_step(f"Removed existing model file: {file}")
581
-
582
- return run_comprehensive_ensemble_training()
583
-
584
- # Normal initialization steps
585
- steps = [
586
- ("Directory Creation", create_directories),
587
- ("Existing Dataset Copy", copy_existing_datasets),
588
- ("Minimal Dataset Creation", create_minimal_dataset),
589
- ("Model Training", run_basic_training_fallback),
590
- ("Log File Creation", create_initial_logs),
591
- ("System Verification", verify_system)
592
- ]
593
-
594
- results = {}
595
- total_steps = len(steps)
596
- completed_steps = 0
597
- failed_steps = 0
598
-
599
- for step_name, step_function in steps:
600
- log_step(f"Starting: {step_name}")
601
- try:
602
- success = step_function()
603
- results[step_name] = success
604
- if success:
605
- log_step(f"{step_name} completed")
606
- completed_steps += 1
607
- else:
608
- log_step(f"{step_name} failed")
609
- failed_steps += 1
610
- except Exception as e:
611
- log_step(f"{step_name} failed with error: {e}")
612
- results[step_name] = False
613
- failed_steps += 1
614
-
615
- # Summary
616
- log_step("")
617
- log_step("Initialization Summary:")
618
- log_step(f" Completed: {completed_steps}/{total_steps} steps")
619
- log_step(f" Failed: {failed_steps}/{total_steps} steps")
620
-
621
- completed_step_names = [name for name, success in results.items() if success]
622
- failed_step_names = [name for name, success in results.items() if not success]
623
-
624
- if completed_step_names:
625
- log_step(f" Completed steps: {', '.join(completed_step_names)}")
626
- if failed_step_names:
627
- log_step(f" Failed steps: {', '.join(failed_step_names)}")
628
-
629
- # Determine overall success
630
- critical_steps = ["Model Training", "System Verification"]
631
- critical_success = all(results.get(step, False) for step in critical_steps)
632
-
633
- if critical_success:
634
- log_step("Initialization completed successfully")
635
-
636
- # Print environment info
637
- log_step("")
638
- log_step("Environment Information:")
639
- log_step(f" Environment: {path_manager.environment}")
640
- log_step(f" Available datasets: {len([f for f in path_manager.get_available_datasets() if f])}")
641
- log_step(f" Available models: {len([f for f in path_manager.get_available_models() if f])}")
642
-
643
- # Final pipeline check
644
- pipeline_path = path_manager.get_model_path("pipeline.pkl")
645
- log_step("")
646
- log_step("Final Pipeline Check:")
647
- log_step(f" Pipeline path: {pipeline_path}")
648
- log_step(f" Pipeline exists: {pipeline_path.exists()}")
649
-
650
- if pipeline_path.exists():
651
- try:
652
- import joblib
653
- pipeline = joblib.load(pipeline_path)
654
- log_step(f" Pipeline loadable: True")
655
- log_step(f" Pipeline steps: {[step[0] for step in pipeline.steps]}")
656
- except Exception as e:
657
- log_step(f" Pipeline loadable: False ({e})")
658
-
659
- log_step("")
660
- log_step("System ready for use!")
661
- return True
662
- else:
663
- if failed_steps <= 1:
664
- log_step("Initialization completed with minor issues")
665
- return True
666
- else:
667
- log_step("Initialization completed with significant issues")
668
- return False
669
-
670
- except Exception as e:
671
- log_step(f"Initialization failed with critical error: {e}")
672
- import traceback
673
- log_step(f"Traceback: {traceback.format_exc()}")
674
- return False
675
 
676
 
677
  if __name__ == "__main__":
 
548
 
549
 
550
  def main():
551
+ """Main initialization function"""
552
+ log_step("πŸš€ Starting system initialization...")
553
+ log_step(f"🌍 Environment: {path_manager.environment}")
554
+ log_step(f"πŸ“ Base directory: {path_manager.base_paths['base']}")
555
+ log_step(f"πŸ“Š Data directory: {path_manager.base_paths['data']}")
556
+ log_step(f"πŸ€– Model directory: {path_manager.base_paths['model']}")
557
+
558
+ steps = [
559
+ ("Directory Creation", create_directories),
560
+ ("Existing Dataset Copy", copy_existing_datasets),
561
+ ("Minimal Dataset Creation", create_minimal_dataset),
562
+ ("Model Training", run_initial_training),
563
+ ("Log File Creation", create_initial_logs),
564
+ ("System Verification", verify_system)
565
+ ]
566
+
567
+ failed_steps = []
568
+ completed_steps = []
569
+
570
+ for step_name, step_function in steps:
571
+ try:
572
+ log_step(f"πŸ”„ Starting: {step_name}")
573
+ if step_function():
574
+ log_step(f"βœ… {step_name} completed")
575
+ completed_steps.append(step_name)
576
+ else:
577
+ log_step(f"❌ {step_name} failed")
578
+ failed_steps.append(step_name)
579
+ except Exception as e:
580
+ log_step(f"❌ {step_name} failed: {str(e)}")
581
+ failed_steps.append(step_name)
582
+
583
+ # Summary
584
+ log_step(f"\nπŸ“Š Initialization Summary:")
585
+ log_step(f" βœ… Completed: {len(completed_steps)}/{len(steps)} steps")
586
+ log_step(f" ❌ Failed: {len(failed_steps)}/{len(steps)} steps")
587
 
588
+ if completed_steps:
589
+ log_step(f" Completed steps: {', '.join(completed_steps)}")
 
 
 
590
 
591
+ if failed_steps:
592
+ log_step(f" Failed steps: {', '.join(failed_steps)}")
593
+ log_step(f"⚠️ Initialization completed with {len(failed_steps)} failed steps")
594
+ else:
595
+ log_step("πŸŽ‰ System initialization completed successfully!")
596
+
597
+ # Environment info
598
+ log_step(f"\nπŸ” Environment Information:")
599
+ env_info = path_manager.get_environment_info()
600
+ log_step(f" Environment: {env_info['environment']}")
601
+ log_step(f" Available datasets: {sum(env_info['available_datasets'].values())}")
602
+ log_step(f" Available models: {sum(env_info['available_models'].values())}")
603
+
604
+ # Final pipeline check
605
+ pipeline_path = path_manager.get_pipeline_path()
606
+ log_step(f"\n🎯 Final Pipeline Check:")
607
+ log_step(f" Pipeline path: {pipeline_path}")
608
+ log_step(f" Pipeline exists: {pipeline_path.exists()}")
609
+ if pipeline_path.exists():
610
+ try:
611
+ import joblib
612
+ pipeline = joblib.load(pipeline_path)
613
+ log_step(f" Pipeline loadable: βœ…")
614
+ log_step(f" Pipeline steps: {list(pipeline.named_steps.keys())}")
615
+ except Exception as e:
616
+ log_step(f" Pipeline load error: {e}")
617
+
618
+ log_step("\n🎯 System ready for use!")
619
+
620
+ return len(failed_steps) == 0
621
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
 
623
 
624
  if __name__ == "__main__":