Spaces:

AlvaroMros
/

ufc-predictor

Running

AlvaroMros commited on Jul 5

Commit

1e04613

1 Parent(s): b7da0df

Integrate model saving into prediction pipeline

Moved model training and saving logic from save_model.py into the PredictionPipeline class. Updated config to define MODELS_DIR and refactored predict_new.py to use the new model directory. Removed the now-redundant save_model.py script.

Files changed (4) hide show

src/config.py +2 -6
src/predict/pipeline.py +33 -2
src/predict/predict_new.py +2 -2
src/predict/save_model.py +0 -53

src/config.py CHANGED Viewed

@@ -1,11 +1,7 @@
 import os
 OUTPUT_DIR = 'output'
 FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
 FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')
-MODEL_RESULTS_PATH = os.path.join(OUTPUT_DIR, 'model_results.json')
-# JSON files (temporary)
-EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
-FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')

 import os
 OUTPUT_DIR = 'output'
+MODELS_DIR = os.path.join(OUTPUT_DIR, 'models')
+MODEL_RESULTS_PATH = os.path.join(OUTPUT_DIR, 'model_results.json')
 FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
 FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')

src/predict/pipeline.py CHANGED Viewed

@@ -4,8 +4,9 @@ import sys
 from datetime import datetime
 from collections import OrderedDict
 import json
-from ..config import FIGHTS_CSV_PATH, MODEL_RESULTS_PATH
 from .models import BaseModel
 class PredictionPipeline:
@@ -43,7 +44,7 @@ class PredictionPipeline:
         print(f"Testing on the last {num_test_events} events.")
     def run(self, detailed_report=True):
-        """Executes the full pipeline: load, train, evaluate, and report."""
         self._load_and_split_data()
         eval_fights = [f for f in self.test_fights if f['winner'] not in ["Draw", "NC", ""]]
@@ -91,6 +92,36 @@ class PredictionPipeline:
         else:
             self._report_summary()
     def _report_summary(self):
         """Prints a concise summary of model performance."""
         print("\n\n--- Prediction Pipeline Summary ---")

 from datetime import datetime
 from collections import OrderedDict
 import json
+import joblib
+from ..config import FIGHTS_CSV_PATH, MODEL_RESULTS_PATH, MODELS_DIR
 from .models import BaseModel
 class PredictionPipeline:
         print(f"Testing on the last {num_test_events} events.")
     def run(self, detailed_report=True):
+        """Executes the full pipeline: load, train, evaluate, report and save models."""
         self._load_and_split_data()
         eval_fights = [f for f in self.test_fights if f['winner'] not in ["Draw", "NC", ""]]
         else:
             self._report_summary()
+        self._train_and_save_models()
+    def _train_and_save_models(self):
+        """Trains all models on the full dataset and saves them."""
+        print("\n\n--- Training and Saving All Models on Full Dataset ---")
+        if not os.path.exists(FIGHTS_CSV_PATH):
+            print(f"Error: Fights data not found at '{FIGHTS_CSV_PATH}'. Cannot save models.")
+            return
+        with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
+            all_fights = list(csv.DictReader(f))
+        print(f"Training models on all {len(all_fights)} available fights...")
+        if not os.path.exists(MODELS_DIR):
+            os.makedirs(MODELS_DIR)
+            print(f"Created directory: {MODELS_DIR}")
+        for model in self.models:
+            model_name = model.__class__.__name__
+            print(f"\n--- Training: {model_name} ---")
+            model.train(all_fights)
+            # Sanitize and save the model
+            file_name = f"{model_name}.joblib"
+            save_path = os.path.join(MODELS_DIR, file_name)
+            joblib.dump(model, save_path)
+            print(f"Model saved successfully to {save_path}")
     def _report_summary(self):
         """Prints a concise summary of model performance."""
         print("\n\n--- Prediction Pipeline Summary ---")

src/predict/predict_new.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import joblib
 from datetime import datetime
-from ..config import OUTPUT_DIR
 def predict_new_fight(fighter1_name, fighter2_name, model_path):
     """
@@ -45,7 +45,7 @@ if __name__ == '__main__':
     parser.add_argument(
         '--model_path',
         type=str,
-        default=os.path.join(OUTPUT_DIR, 'XGBoostModel.joblib'),
         help="Path to the saved model file."
     )
     args = parser.parse_args()

 import joblib
 from datetime import datetime
+from ..config import MODELS_DIR
 def predict_new_fight(fighter1_name, fighter2_name, model_path):
     """
     parser.add_argument(
         '--model_path',
         type=str,
+        default=os.path.join(MODELS_DIR, 'XGBoostModel.joblib'),
         help="Path to the saved model file."
     )
     args = parser.parse_args()

src/predict/save_model.py DELETED Viewed

@@ -1,53 +0,0 @@
-import argparse
-import os
-import joblib
-import pandas as pd
-from ..config import FIGHTS_CSV_PATH, OUTPUT_DIR
-import src.predict.models as models
-def save_model(model_name):
-    """
-    Trains a specified model on the entire dataset and saves it to a file.
-    :param model_name: The name of the model class to train (e.g., 'XGBoostModel').
-    """
-    print(f"--- Training and Saving Model: {model_name} ---")
-    # 1. Get the model class from the models module
-    try:
-        ModelClass = getattr(models, model_name)
-    except AttributeError:
-        print(f"Error: Model '{model_name}' not found in src/predict/models.py")
-        return
-    model = ModelClass()
-    # 2. Load all available fights for training
-    if not os.path.exists(FIGHTS_CSV_PATH):
-        raise FileNotFoundError(f"Fights data not found at '{FIGHTS_CSV_PATH}'.")
-    all_fights = pd.read_csv(FIGHTS_CSV_PATH).to_dict('records')
-    print(f"Training model on all {len(all_fights)} available fights...")
-    # 3. Train the model
-    model.train(all_fights)
-    # 4. Save the entire trained model object
-    model_name_to_save=f"{model_name}.joblib"
-    save_path = os.path.join(OUTPUT_DIR, model_name_to_save)
-    joblib.dump(model, save_path)
-    print(f"\nModel saved successfully to {save_path}")
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Train and save a prediction model.")
-    parser.add_argument(
-        '--model',
-        type=str,
-        default='XGBoostModel',
-        help="The name of the model class to train and save."
-    )
-    args = parser.parse_args()
-    save_model(args.model)