Spaces:
Running
Running
File size: 5,862 Bytes
e012a04 38c6a34 e012a04 38c6a34 e012a04 38c6a34 e012a04 38c6a34 e012a04 38c6a34 e012a04 38c6a34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import csv
import os
import sys
from datetime import datetime
from collections import OrderedDict
import json
from ..config import FIGHTS_CSV_PATH, MODEL_RESULTS_PATH
from .models import BaseModel
class PredictionPipeline:
"""
Orchestrates the model training, evaluation, and reporting pipeline.
"""
def __init__(self, models):
if not all(isinstance(m, BaseModel) for m in models):
raise TypeError("All models must be instances of BaseModel.")
self.models = models
self.train_fights = []
self.test_fights = []
self.results = {}
def _load_and_split_data(self, num_test_events=10):
"""Loads and splits the data into chronological training and testing sets."""
print("\n--- Loading and Splitting Data ---")
if not os.path.exists(FIGHTS_CSV_PATH):
raise FileNotFoundError(f"Fights data not found at '{FIGHTS_CSV_PATH}'.")
with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
fights = list(csv.DictReader(f))
fights.sort(key=lambda x: datetime.strptime(x['event_date'], '%B %d, %Y'))
all_events = list(OrderedDict.fromkeys(f['event_name'] for f in fights))
if len(all_events) < num_test_events:
print(f"Warning: Fewer than {num_test_events} events found. Adjusting test set size.")
num_test_events = len(all_events)
test_event_names = all_events[-num_test_events:]
self.train_fights = [f for f in fights if f['event_name'] not in test_event_names]
self.test_fights = [f for f in fights if f['event_name'] in test_event_names]
print(f"Data loaded. {len(self.train_fights)} training fights, {len(self.test_fights)} testing fights.")
print(f"Testing on the last {num_test_events} events.")
def run(self, detailed_report=True):
"""Executes the full pipeline: load, train, evaluate, and report."""
self._load_and_split_data()
eval_fights = [f for f in self.test_fights if f['winner'] not in ["Draw", "NC", ""]]
if not eval_fights:
print("No fights with definitive outcomes in the test set. Aborting.")
return
for model in self.models:
model_name = model.__class__.__name__
print(f"\n--- Evaluating Model: {model_name} ---")
model.train(self.train_fights)
correct_predictions = 0
predictions = []
for fight in eval_fights:
f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
actual_winner = fight['winner']
event_name = fight.get('event_name', 'Unknown Event')
predicted_winner = model.predict(f1_name, f2_name)
is_correct = (predicted_winner == actual_winner)
if is_correct:
correct_predictions += 1
predictions.append({
'fight': f"{f1_name} vs. {f2_name}",
'event': event_name,
'predicted_winner': predicted_winner,
'actual_winner': actual_winner,
'is_correct': is_correct
})
accuracy = (correct_predictions / len(eval_fights)) * 100
self.results[model_name] = {
'accuracy': accuracy,
'predictions': predictions,
'total_fights': len(eval_fights)
}
if detailed_report:
self._report_detailed_results()
else:
self._report_summary()
def _report_summary(self):
"""Prints a concise summary of model performance."""
print("\n\n--- Prediction Pipeline Summary ---")
print(f"{'Model':<25} | {'Accuracy':<10} | {'Fights Evaluated':<20}")
print("-" * 65)
for model_name, result in self.results.items():
print(f"{model_name:<25} | {result['accuracy']:<9.2f}% | {result['total_fights']:<20}")
print("-" * 65)
def _save_report_to_json(self, file_path=MODEL_RESULTS_PATH):
"""Saves the detailed prediction results to a JSON file."""
print(f"\nSaving detailed report to {file_path}...")
try:
# Create a report structure that is clean and JSON-friendly
report = {}
for model_name, result in self.results.items():
# Group predictions by event for a more organized report
predictions_by_event = {}
for p in result['predictions']:
event_name = p.pop('event') # Extract event and remove it from the sub-dictionary
if event_name not in predictions_by_event:
predictions_by_event[event_name] = []
predictions_by_event[event_name].append(p)
report[model_name] = {
"overall_accuracy": f"{result['accuracy']:.2f}%",
"total_fights_evaluated": result['total_fights'],
"predictions_by_event": predictions_by_event
}
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(report, f, indent=4)
print("Report saved successfully.")
except (IOError, TypeError) as e:
print(f"Error saving report to JSON file: {e}")
def _report_detailed_results(self):
"""Prints a summary and saves the detailed report to a file."""
print("\n\n--- Prediction Pipeline Finished: Detailed Report ---")
# A summary is printed to the console for convenience.
self._report_summary()
# The detailed report is now saved to a JSON file.
self._save_report_to_json() |