Alvaro
Refactor config imports and add JSON report for predictions
38c6a34
raw
history blame
2.17 kB
import os
import json
from .scrape_fights import scrape_all_events
from .scrape_fighters import scrape_all_fighters
from .to_csv import json_to_csv, fighters_json_to_csv
from .preprocess import preprocess_fighters_csv
from .. import config
def main():
"""
Main pipeline to scrape UFC data and convert it to CSV.
"""
# Ensure the output directory exists
if not os.path.exists(config.OUTPUT_DIR):
os.makedirs(config.OUTPUT_DIR)
print(f"Created directory: {config.OUTPUT_DIR}")
# --- Step 1: Scrape Events and Fights ---
print("\n--- Starting Events and Fights Scraping ---")
all_events_data = scrape_all_events()
with open(config.EVENTS_JSON_PATH, 'w') as f:
json.dump(all_events_data, f, indent=4)
print(f"Scraping for events complete. Data saved to {config.EVENTS_JSON_PATH}")
# --- Step 2: Scrape Fighters ---
print("\n--- Starting Fighters Scraping ---")
all_fighters_data = scrape_all_fighters()
with open(config.FIGHTERS_JSON_PATH, 'w') as f:
json.dump(all_fighters_data, f, indent=4)
print(f"Scraping for fighters complete. Data saved to {config.FIGHTERS_JSON_PATH}")
# --- Step 3: Convert JSON to CSV ---
print("\n--- Converting all JSON files to CSV ---")
json_to_csv(config.EVENTS_JSON_PATH, config.FIGHTS_CSV_PATH)
fighters_json_to_csv(config.FIGHTERS_JSON_PATH, config.FIGHTERS_CSV_PATH)
# --- Step 4: Preprocess CSV data ---
print("\n--- Preprocessing fighter data (converting height to cm) ---")
preprocess_fighters_csv(config.FIGHTERS_CSV_PATH)
# --- Step 5: Clean up temporary JSON files ---
print("\n--- Deleting temporary JSON files ---")
try:
if os.path.exists(config.EVENTS_JSON_PATH):
os.remove(config.EVENTS_JSON_PATH)
print(f"Deleted: {config.EVENTS_JSON_PATH}")
if os.path.exists(config.FIGHTERS_JSON_PATH):
os.remove(config.FIGHTERS_JSON_PATH)
print(f"Deleted: {config.FIGHTERS_JSON_PATH}")
except OSError as e:
print(f"Error deleting JSON files: {e}")
print("\n--- Pipeline Finished ---")
if __name__ == '__main__':
main()