Spaces:

AlvaroMros
/

ufc-predictor

Running

ufc-predictor / src /predict /pipeline.py

Alvaro

Refactor config imports and add JSON report for predictions

38c6a34 3 months ago

5.86 kB

	import csv
	import os
	import sys
	from datetime import datetime
	from collections import OrderedDict
	import json

	from ..config import FIGHTS_CSV_PATH, MODEL_RESULTS_PATH
	from .models import BaseModel

	class PredictionPipeline:
	"""
	Orchestrates the model training, evaluation, and reporting pipeline.
	"""
	def __init__(self, models):
	if not all(isinstance(m, BaseModel) for m in models):
	raise TypeError("All models must be instances of BaseModel.")
	self.models = models
	self.train_fights = []
	self.test_fights = []
	self.results = {}

	def _load_and_split_data(self, num_test_events=10):
	"""Loads and splits the data into chronological training and testing sets."""
	print("\n--- Loading and Splitting Data ---")
	if not os.path.exists(FIGHTS_CSV_PATH):
	raise FileNotFoundError(f"Fights data not found at '{FIGHTS_CSV_PATH}'.")

	with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
	fights = list(csv.DictReader(f))

	fights.sort(key=lambda x: datetime.strptime(x['event_date'], '%B %d, %Y'))

	all_events = list(OrderedDict.fromkeys(f['event_name'] for f in fights))
	if len(all_events) < num_test_events:
	print(f"Warning: Fewer than {num_test_events} events found. Adjusting test set size.")
	num_test_events = len(all_events)

	test_event_names = all_events[-num_test_events:]
	self.train_fights = [f for f in fights if f['event_name'] not in test_event_names]
	self.test_fights = [f for f in fights if f['event_name'] in test_event_names]
	print(f"Data loaded. {len(self.train_fights)} training fights, {len(self.test_fights)} testing fights.")
	print(f"Testing on the last {num_test_events} events.")

	def run(self, detailed_report=True):
	"""Executes the full pipeline: load, train, evaluate, and report."""
	self._load_and_split_data()

	eval_fights = [f for f in self.test_fights if f['winner'] not in ["Draw", "NC", ""]]
	if not eval_fights:
	print("No fights with definitive outcomes in the test set. Aborting.")
	return

	for model in self.models:
	model_name = model.__class__.__name__
	print(f"\n--- Evaluating Model: {model_name} ---")

	model.train(self.train_fights)

	correct_predictions = 0
	predictions = []

	for fight in eval_fights:
	f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
	actual_winner = fight['winner']
	event_name = fight.get('event_name', 'Unknown Event')
	predicted_winner = model.predict(f1_name, f2_name)

	is_correct = (predicted_winner == actual_winner)
	if is_correct:
	correct_predictions += 1

	predictions.append({
	'fight': f"{f1_name} vs. {f2_name}",
	'event': event_name,
	'predicted_winner': predicted_winner,
	'actual_winner': actual_winner,
	'is_correct': is_correct
	})

	accuracy = (correct_predictions / len(eval_fights)) * 100
	self.results[model_name] = {
	'accuracy': accuracy,
	'predictions': predictions,
	'total_fights': len(eval_fights)
	}

	if detailed_report:
	self._report_detailed_results()
	else:
	self._report_summary()

	def _report_summary(self):
	"""Prints a concise summary of model performance."""
	print("\n\n--- Prediction Pipeline Summary ---")
	print(f"{'Model':<25} \| {'Accuracy':<10} \| {'Fights Evaluated':<20}")
	print("-" * 65)
	for model_name, result in self.results.items():
	print(f"{model_name:<25} \| {result['accuracy']:<9.2f}% \| {result['total_fights']:<20}")
	print("-" * 65)

	def _save_report_to_json(self, file_path=MODEL_RESULTS_PATH):
	"""Saves the detailed prediction results to a JSON file."""
	print(f"\nSaving detailed report to {file_path}...")
	try:
	# Create a report structure that is clean and JSON-friendly
	report = {}
	for model_name, result in self.results.items():

	# Group predictions by event for a more organized report
	predictions_by_event = {}
	for p in result['predictions']:
	event_name = p.pop('event') # Extract event and remove it from the sub-dictionary
	if event_name not in predictions_by_event:
	predictions_by_event[event_name] = []
	predictions_by_event[event_name].append(p)

	report[model_name] = {
	"overall_accuracy": f"{result['accuracy']:.2f}%",
	"total_fights_evaluated": result['total_fights'],
	"predictions_by_event": predictions_by_event
	}

	with open(file_path, 'w', encoding='utf-8') as f:
	json.dump(report, f, indent=4)
	print("Report saved successfully.")
	except (IOError, TypeError) as e:
	print(f"Error saving report to JSON file: {e}")

	def _report_detailed_results(self):
	"""Prints a summary and saves the detailed report to a file."""
	print("\n\n--- Prediction Pipeline Finished: Detailed Report ---")
	# A summary is printed to the console for convenience.
	self._report_summary()
	# The detailed report is now saved to a JSON file.
	self._save_report_to_json()