ufc-predictor / src /predict /pipeline.py
Alvaro
Refactor config imports and add JSON report for predictions
38c6a34
raw
history blame
5.86 kB
import csv
import os
import sys
from datetime import datetime
from collections import OrderedDict
import json
from ..config import FIGHTS_CSV_PATH, MODEL_RESULTS_PATH
from .models import BaseModel
class PredictionPipeline:
"""
Orchestrates the model training, evaluation, and reporting pipeline.
"""
def __init__(self, models):
if not all(isinstance(m, BaseModel) for m in models):
raise TypeError("All models must be instances of BaseModel.")
self.models = models
self.train_fights = []
self.test_fights = []
self.results = {}
def _load_and_split_data(self, num_test_events=10):
"""Loads and splits the data into chronological training and testing sets."""
print("\n--- Loading and Splitting Data ---")
if not os.path.exists(FIGHTS_CSV_PATH):
raise FileNotFoundError(f"Fights data not found at '{FIGHTS_CSV_PATH}'.")
with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
fights = list(csv.DictReader(f))
fights.sort(key=lambda x: datetime.strptime(x['event_date'], '%B %d, %Y'))
all_events = list(OrderedDict.fromkeys(f['event_name'] for f in fights))
if len(all_events) < num_test_events:
print(f"Warning: Fewer than {num_test_events} events found. Adjusting test set size.")
num_test_events = len(all_events)
test_event_names = all_events[-num_test_events:]
self.train_fights = [f for f in fights if f['event_name'] not in test_event_names]
self.test_fights = [f for f in fights if f['event_name'] in test_event_names]
print(f"Data loaded. {len(self.train_fights)} training fights, {len(self.test_fights)} testing fights.")
print(f"Testing on the last {num_test_events} events.")
def run(self, detailed_report=True):
"""Executes the full pipeline: load, train, evaluate, and report."""
self._load_and_split_data()
eval_fights = [f for f in self.test_fights if f['winner'] not in ["Draw", "NC", ""]]
if not eval_fights:
print("No fights with definitive outcomes in the test set. Aborting.")
return
for model in self.models:
model_name = model.__class__.__name__
print(f"\n--- Evaluating Model: {model_name} ---")
model.train(self.train_fights)
correct_predictions = 0
predictions = []
for fight in eval_fights:
f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
actual_winner = fight['winner']
event_name = fight.get('event_name', 'Unknown Event')
predicted_winner = model.predict(f1_name, f2_name)
is_correct = (predicted_winner == actual_winner)
if is_correct:
correct_predictions += 1
predictions.append({
'fight': f"{f1_name} vs. {f2_name}",
'event': event_name,
'predicted_winner': predicted_winner,
'actual_winner': actual_winner,
'is_correct': is_correct
})
accuracy = (correct_predictions / len(eval_fights)) * 100
self.results[model_name] = {
'accuracy': accuracy,
'predictions': predictions,
'total_fights': len(eval_fights)
}
if detailed_report:
self._report_detailed_results()
else:
self._report_summary()
def _report_summary(self):
"""Prints a concise summary of model performance."""
print("\n\n--- Prediction Pipeline Summary ---")
print(f"{'Model':<25} | {'Accuracy':<10} | {'Fights Evaluated':<20}")
print("-" * 65)
for model_name, result in self.results.items():
print(f"{model_name:<25} | {result['accuracy']:<9.2f}% | {result['total_fights']:<20}")
print("-" * 65)
def _save_report_to_json(self, file_path=MODEL_RESULTS_PATH):
"""Saves the detailed prediction results to a JSON file."""
print(f"\nSaving detailed report to {file_path}...")
try:
# Create a report structure that is clean and JSON-friendly
report = {}
for model_name, result in self.results.items():
# Group predictions by event for a more organized report
predictions_by_event = {}
for p in result['predictions']:
event_name = p.pop('event') # Extract event and remove it from the sub-dictionary
if event_name not in predictions_by_event:
predictions_by_event[event_name] = []
predictions_by_event[event_name].append(p)
report[model_name] = {
"overall_accuracy": f"{result['accuracy']:.2f}%",
"total_fights_evaluated": result['total_fights'],
"predictions_by_event": predictions_by_event
}
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(report, f, indent=4)
print("Report saved successfully.")
except (IOError, TypeError) as e:
print(f"Error saving report to JSON file: {e}")
def _report_detailed_results(self):
"""Prints a summary and saves the detailed report to a file."""
print("\n\n--- Prediction Pipeline Finished: Detailed Report ---")
# A summary is printed to the console for convenience.
self._report_summary()
# The detailed report is now saved to a JSON file.
self._save_report_to_json()