Spaces:

AlvaroMros
/

ufc-predictor

Sleeping

App Files Files

xet

Community

ufc-predictor / src /predict /predict.py

Alvaro

Baseline

d48eef6 4 months ago

raw

history blame

3.24 kB

	import csv
	import os
	import sys
	from datetime import datetime
	from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH

	def load_fighters_data():
	"""Loads fighter data, including ELO scores, into a dictionary."""
	if not os.path.exists(FIGHTERS_CSV_PATH):
	print(f"Error: Fighter data not found at '{FIGHTERS_CSV_PATH}'.")
	print("Please run the ELO analysis first ('python -m src.analysis.elo').")
	return None

	fighters = {}
	with open(FIGHTERS_CSV_PATH, 'r', encoding='utf-8') as f:
	reader = csv.DictReader(f)
	for row in reader:
	full_name = f"{row['first_name']} {row['last_name']}".strip()
	fighters[full_name] = {'elo': float(row.get('elo', 1500))} # Default ELO if missing
	return fighters

	def load_fights_data():
	"""Loads fight data and sorts it chronologically."""
	if not os.path.exists(FIGHTS_CSV_PATH):
	print(f"Error: Fights data not found at '{FIGHTS_CSV_PATH}'.")
	return None

	with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
	fights = list(csv.DictReader(f))

	# Sort fights chronologically to ensure a proper train/test split later
	fights.sort(key=lambda x: datetime.strptime(x['event_date'], '%B %d, %Y'))
	return fights

	def run_elo_baseline_model(fights, fighters):
	"""
	Runs a simple baseline prediction model where the fighter with the higher ELO is predicted to win.
	"""
	correct_predictions = 0
	total_predictions = 0

	for fight in fights:
	fighter1_name = fight['fighter_1']
	fighter2_name = fight['fighter_2']
	actual_winner = fight['winner']

	# Skip fights that are draws or no contests
	if actual_winner in ["Draw", "NC", ""]:
	continue

	fighter1 = fighters.get(fighter1_name)
	fighter2 = fighters.get(fighter2_name)

	if not fighter1 or not fighter2:
	continue # Skip if fighter data is missing

	elo1 = fighter1.get('elo', 1500)
	elo2 = fighter2.get('elo', 1500)

	# Predict winner based on higher ELO
	predicted_winner = fighter1_name if elo1 > elo2 else fighter2_name

	if predicted_winner == actual_winner:
	correct_predictions += 1

	total_predictions += 1

	accuracy = (correct_predictions / total_predictions) * 100 if total_predictions > 0 else 0
	return accuracy, total_predictions

	def main():
	"""
	Main function to run the prediction pipeline.
	"""
	print("--- Starting ML Prediction Pipeline ---")

	# Load data
	fighters_data = load_fighters_data()
	fights_data = load_fights_data()

	if not fighters_data or not fights_data:
	print("Aborting pipeline due to missing data.")
	return

	# Run baseline model
	print("\nRunning Baseline Model (Predicting winner by highest ELO)...")
	accuracy, total_fights = run_elo_baseline_model(fights_data, fighters_data)

	print("\n--- Baseline Model Evaluation ---")
	print(f"Total Fights Evaluated: {total_fights}")
	print(f"Model Accuracy: {accuracy:.2f}%")
	print("---------------------------------")

	if __name__ == '__main__':
	main()