import polars as pl import api_scraper import pandas as pd scrape = api_scraper.MLB_Scrape() import df_update update = df_update.df_update() from matplotlib.colors import LinearSegmentedColormap, Normalize import numpy as np import requests from io import BytesIO from PIL import Image from matplotlib.gridspec import GridSpec import matplotlib.pyplot as plt import matplotlib.patches as patches import PIL level_dict = { '11':'AAA', '14':'A',} def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int): """ Display the player's bio information on the given axis. Parameters ---------- pitcher_id : str The player's ID. ax : plt.Axes The axis to display the bio information on. sport_id : int The sport ID (1 for MLB, other for minor leagues). year_input : int The season year. """ # Construct the URL to fetch player data url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam" # Send a GET request to the URL and parse the JSON response data = requests.get(url).json() # Extract player information from the JSON data player_name = data['people'][0]['fullName'] position = data['people'][0]['primaryPosition']['abbreviation'] pitcher_hand = data['people'][0]['pitchHand']['code'] age = data['people'][0]['currentAge'] height = data['people'][0]['height'] weight = data['people'][0]['weight'] # Display the player's name, handedness, age, height, and weight on the axis ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=30) ax.text(0.5, 0.65, f'{position}, Age:{age}, {height}/{weight}', va='top', ha='center', fontsize=20) ax.text(0.5, 0.4, f'Season Batting Percentiles', va='top', ha='center', fontsize=16) # Make API call to retrieve sports information response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json() # Convert the JSON response into a Polars DataFrame df_sport_id = pl.DataFrame(response['sports']) abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0] # Display the season and sport abbreviation ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=14, fontstyle='italic') # Turn off the axis ax.axis('off') df_teams = scrape.get_teams() team_dict = dict(zip(df_teams['team_id'],df_teams['parent_org_abbreviation'])) # List of MLB teams and their corresponding ESPN logo URLs mlb_teams = [ {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"}, {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"}, {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"}, {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"}, {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"}, {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"}, {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"}, {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"}, {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"}, {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"}, {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"}, {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"}, {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"}, {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"}, {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"}, {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"}, {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"}, {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"}, {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"}, {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"}, {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"}, {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"}, {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"}, {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"}, {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"}, {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"}, {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"}, {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"}, {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"}, {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"}, {"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"} ] df_image = pd.DataFrame(mlb_teams) image_dict = df_image.set_index('team')['logo_url'].to_dict() image_dict_flip = df_image.set_index('logo_url')['team'].to_dict() merged_dict = { "woba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "wOBA" }, "xwoba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "xwOBA" }, "launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Average EV"}, "launch_speed_90": { "format": '.1f', "percentile_flip": False, "stat_title": "90th% EV"}, "max_launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Max EV"}, "barrel_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Barrel%" }, "hard_hit_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Hard-Hit%" }, "sweet_spot_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "LA Sweet-Spot%" }, "zone_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Zone%" }, "zone_swing_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Z-Swing%" }, "chase_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "O-Swing%" }, "whiff_rate": { "format": '.1%', "percentile_flip": True, "stat_title": "Whiff%" }, "k_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "K%" }, "bb_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "BB%" }, "pull_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull%" }, "pulled_fly_ball_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull FB%" }, } # level_dict = {'1':'MLB', # '11':'AAA'} level_dict = { '11':'AAA', '14':'A (FSL)',} level_dict_file = { '11':'aaa', '14':'a',} year_list = [2024] from shiny import App, reactive, ui, render from shiny.ui import h2, tags # Define the UI layout for the app app_ui = ui.page_fluid( ui.tags.div( {"style": "width:90%;margin: 0 auto;max-width: 1600px;"}, ui.tags.style( """ h4 { margin-top: 1em;font-size:35px; } h2{ font-size:25px; } """ ), ui.tags.h4("TJStats"), ui.tags.i("Baseball Analytics and Visualizations"), ui.markdown("""Follow me on Twitter1"""), ui.markdown("""Support me on Patreon for Access to 2024 Apps1"""), ui.tags.h5("Statcast Batting Summaries"), ui.layout_sidebar( ui.panel_sidebar( # Row for selecting season and level ui.row( ui.column(6, ui.input_select('year_input', 'Select Season', year_list, selected=2024)), ui.column(6, ui.input_select('level_input', 'Select Level', level_dict)), ), # Row for the action button to get player list ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")), # Row for selecting the player ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))), ui.row( ui.column(6, ui.input_switch("switch", "Custom Team?", False)), ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False)) ), # Row for the action button to generate plot ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")), width=3, ), ui.panel_main( ui.navset_tab( # Tab for game summary plot ui.nav("Batter Summary", ui.output_text("status"), ui.output_plot('plot', width='1200px', height='1200px') ), ) ) ) ) ) def server(input, output, session): @render.ui @reactive.event(input.player_button, ignore_none=False) def player_select_ui(): #Get the list of pitchers for the selected level and season df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input())).filter( ~pl.col("position").is_in(['P','TWP'])).sort("name") # Create a dictionary of pitcher IDs and names batter_dict_pos = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['position'])) year = int(input.year_input()) sport_id = int(input.level_input()) batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv').sort('batter_name',descending=False) # Map elements in Polars DataFrame from a dictionary batter_summary = batter_summary.with_columns( pl.col("batter_id").map_elements(lambda x: batter_dict_pos.get(x, x)).alias("position") ) batter_dict_pos = dict(zip(batter_summary['batter_id'], batter_summary['batter_name'])) # Create a dictionary of pitcher IDs and names batter_dict = dict(zip(batter_summary['batter_id'], batter_summary['batter_name'] + ' - ' + batter_summary['position'])) # Return a select input for choosing a pitcher return ui.input_select("batter_id", "Select Batter", batter_dict, selectize=True) @output @render.plot @reactive.event(input.generate_plot, ignore_none=False) def plot(): # Show progress/loading notification with ui.Progress(min=0, max=1) as p: def draw_baseball_savant_percentiles(new_player_metrics, new_player_percentiles, colors=None, sport_id=None, year_input=None): """ Draw Baseball Savant-style percentile bars with proper alignment and scaling. :param new_player_metrics: DataFrame containing new player metrics. :param new_player_percentiles: DataFrame containing new player percentiles. :param colors: List of colors for bars (optional, red/blue default). """ # Extract player information batter_id = new_player_metrics['batter_id'][0] player_name = batter_name_id[batter_id] stats = [merged_dict[x]['stat_title'] for x in merged_dict.keys()] # Calculate percentiles and values percentiles = [int((1 - x) * 100) if merged_dict[stat]["percentile_flip"] else int(x * 100) for x, stat in zip(new_player_percentiles.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())] percentiles = np.clip(percentiles, 1, 100) values = [str(f'{x:{merged_dict[stat]["format"]}}').strip('%') for x, stat in zip(new_player_metrics.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())] # Get team logo URL logo_url = image_dict[team_dict[player_team_dict[batter_id]]] # Create a custom colormap color_list = ['#3661AD', '#B4CFD1', '#D82129'] cmap = LinearSegmentedColormap.from_list("custom_cmap", color_list) norm = Normalize(vmin=0.1, vmax=0.9) norm_percentiles = norm(percentiles / 100) colors = [cmap(p) for p in norm_percentiles] # Figure setup num_stats = len(stats) bar_height = 4.5 spacing = 1 fig_height = (bar_height + spacing) * num_stats fig = plt.figure(figsize=(12, 12)) gs = GridSpec(6, 5, height_ratios=[0.1, 1.5, 0.9, 0.9, 7.6, 0.1], width_ratios=[0.2, 1.5, 7, 1.5, 0.2]) # Define subplots ax_title = fig.add_subplot(gs[1, 2]) ax_table = fig.add_subplot(gs[2, :]) ax_fv_table = fig.add_subplot(gs[3, :]) ax = fig.add_subplot(gs[4, :]) ax_logo = fig.add_subplot(gs[1, 3]) ax.set_xlim(-1, 99) ax.set_ylim(-1, 99) ax.set_aspect("equal") ax.axis("off") # Draw each bar for i, (stat, percentile, value, color) in enumerate(zip(stats, percentiles, values, colors)): y = fig_height - (i + 1) * (bar_height + spacing) ax.add_patch(patches.Rectangle((0, y + bar_height / 4), 100, bar_height / 2, color="#C7DCDC", lw=0)) ax.add_patch(patches.Rectangle((0, y), percentile, bar_height, color=color, lw=0)) circle_y = y + bar_height - bar_height / 2 circle = plt.Circle((percentile, circle_y), bar_height / 2, color=color, ec='white', lw=1.5, zorder=10) ax.add_patch(circle) fs = 14 ax.text(percentile, circle_y, f"{percentile}", ha="center", va="center", fontsize=10, color='white', zorder=10, fontweight='bold') ax.text(-5, y + bar_height / 2, stat, ha="right", va="center", fontsize=fs) ax.text(115, y + bar_height / 2, str(value), ha="right", va="center", fontsize=fs, zorder=5) if i < len(stats) and i > 0: ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=-33, xmax=0) ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=100, xmax=115) # Draw vertical lines for 10%, 50%, and 90% with labels for x, label, align, color in zip([10, 50, 90], ["Poor", "Average", "Great"], ['center', 'center', 'center'], color_list): ax.axvline(x=x, ymin=0, ymax=1, color='#FFF', linestyle='-', lw=1, zorder=1, alpha=0.5) ax.text(x, fig_height + 4, label, ha=align, va='center', fontsize=12, fontweight='bold', color=color) triangle = patches.RegularPolygon((x, fig_height + 1), 3, radius=1, orientation=0, color=color, zorder=2) ax.add_patch(triangle) # # Title # ax_title.set_ylim(0, 1) # ax_title.text(0.5, 0.5, f"{player_name} - {player_position_dict[batter_id]}\nPercentile Rankings - 2024 AAA", ha="center", va="center", fontsize=24) # ax_title.axis("off") player_bio(batter_id, ax=ax_title, sport_id=sport_id, year_input=year_input) # Add team logo #response = requests.get(logo_url) if input.switch(): response = requests.get(input.logo_select()) else: response = requests.get(logo_url) img = Image.open(BytesIO(response.content)) ax_logo.imshow(img) ax_logo.axis("off") ax.axis('equal') # Metrics data table metrics_data = { "Pitches": new_player_metrics['pitches'][0], "PA": new_player_metrics['pa'][0], "BIP": new_player_metrics['bip'][0], "HR": f"{new_player_metrics['home_run'][0]:.0f}", "AVG": f"{new_player_metrics['avg'][0]:.3f}", "OBP": f"{new_player_metrics['obp'][0]:.3f}", "SLG": f"{new_player_metrics['slg'][0]:.3f}", "OPS": f"{new_player_metrics['obp'][0] + new_player_metrics['slg'][0]:.3f}", } df_table = pd.DataFrame(metrics_data, index=[0]) ax_table.axis('off') table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1]) for key, cell in table.get_celld().items(): if key[0] == 0: cell.set_text_props(fontweight='bold') table.auto_set_font_size(False) table.set_fontsize(12) table.scale(1, 1.5) # Additional subplots for spacing ax_top = fig.add_subplot(gs[0, :]) ax_bot = fig.add_subplot(gs[-1, :]) ax_top.axis('off') ax_bot.axis('off') ax_bot.text(0.05, 2, "By: Thomas Nestico (@TJStats)", ha="left", va="center", fontsize=14) ax_bot.text(0.95, 2, "Data: MLB, Fangraphs", ha="right", va="center", fontsize=14) fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01) # Player headshot ax_headshot = fig.add_subplot(gs[1, 1]) try: url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{batter_id}/headshot/milb/current.png' response = requests.get(url) img = Image.open(BytesIO(response.content)) ax_headshot.set_xlim(0, 1) ax_headshot.set_ylim(0, 1) ax_headshot.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper') except PIL.UnidentifiedImageError: ax_headshot.axis('off') return ax_headshot.axis('off') ax_table.set_title('Season Summary', style='italic') # Fangraphs scouting grades table print(batter_id) ax_fv_table.axis('off') if batter_id not in dict_mlb_fg.keys(): ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10)) return df_fv_table = df_prospects[(df_prospects['minorMasterId'] == dict_mlb_fg[batter_id])][['cFV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']].reset_index(drop=True) ax_fv_table.axis('off') if df_fv_table.empty: ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10)) return df_fv_table.columns = ['FV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld'] table_fv = ax_fv_table.table(cellText=df_fv_table.values, colLabels=df_fv_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1]) for key, cell in table_fv.get_celld().items(): if key[0] == 0: cell.set_text_props(fontweight='bold') table_fv.auto_set_font_size(False) table_fv.set_fontsize(12) table_fv.scale(1, 1.5) ax_fv_table.set_title('Fangraphs Scouting Grades', style='italic') #plt.show() def calculate_new_player_percentiles(player_id, new_player_metrics, player_summary_filtered): """ Calculate percentiles for a new player's metrics. :param player_id: ID of the player. :param new_player_metrics: DataFrame containing new player metrics. :param player_summary_filtered: Filtered player summary DataFrame. :return: DataFrame containing new player percentiles. """ filtered_summary_clone = player_summary_filtered[['batter_id'] + stat_list].filter(pl.col('batter_id') != player_id).clone() combined_data = pl.concat([filtered_summary_clone, new_player_metrics], how="vertical").to_pandas() combined_percentiles = pl.DataFrame(pd.concat([combined_data['batter_id'], combined_data[stat_list].rank(pct=True)], axis=1)) new_player_percentiles = combined_percentiles.filter(pl.col('batter_id') == player_id) return new_player_percentiles p.set(message="Generating plot", detail="This may take a while...") p.set(0.3, "Gathering data...") # Example: New player's metrics year = int(input.year_input()) sport_id = int(input.level_input()) batter_id = int(input.batter_id()) df_player = scrape.get_players(sport_id=sport_id,season=year) batter_name_id = dict(zip(df_player['player_id'],df_player['name'])) player_team_dict = dict(zip(df_player['player_id'],df_player['team'])) player_position_dict = dict(zip(df_player['player_id'],df_player['position'])) batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv') df_prospects = pd.read_csv(f'data/prospects/prospects_{year}.csv') df_rosters = pd.read_csv(f'data/rosters/fangraphs_rosters_{year}.csv') df_small = df_rosters[['minorbamid','minormasterid']].dropna() dict_mlb_fg=dict(zip(df_small['minorbamid'].astype(int),df_small['minormasterid'])) batter_summary_filter = batter_summary.filter((pl.col('pa') >= 300) & (pl.col('launch_speed') >= 0)) stat_list = batter_summary.columns[2:] batter_summary_filter_pd = batter_summary_filter.to_pandas() new_player_metrics = batter_summary.filter(pl.col('batter_id') == batter_id)[['batter_id'] + stat_list] # Get percentiles for the new player new_player_percentiles = calculate_new_player_percentiles(batter_id, new_player_metrics, batter_summary_filter) p.set(0.6, "Creating plot...") # Draw Baseball Savant-style percentile bars draw_baseball_savant_percentiles(new_player_metrics=new_player_metrics, new_player_percentiles=new_player_percentiles, sport_id=sport_id, year_input=year) app = App(app_ui, server)