import polars as pl
import api_scraper
import pandas as pd
scrape = api_scraper.MLB_Scrape()
import df_update
update = df_update.df_update()
from matplotlib.colors import LinearSegmentedColormap, Normalize
import numpy as np
import requests
from io import BytesIO
from PIL import Image
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import PIL
level_dict = {
'11':'AAA',
'14':'A',}
def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
"""
Display the player's bio information on the given axis.
Parameters
----------
pitcher_id : str
The player's ID.
ax : plt.Axes
The axis to display the bio information on.
sport_id : int
The sport ID (1 for MLB, other for minor leagues).
year_input : int
The season year.
"""
# Construct the URL to fetch player data
url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
# Send a GET request to the URL and parse the JSON response
data = requests.get(url).json()
# Extract player information from the JSON data
player_name = data['people'][0]['fullName']
position = data['people'][0]['primaryPosition']['abbreviation']
pitcher_hand = data['people'][0]['pitchHand']['code']
age = data['people'][0]['currentAge']
height = data['people'][0]['height']
weight = data['people'][0]['weight']
# Display the player's name, handedness, age, height, and weight on the axis
ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=30)
ax.text(0.5, 0.65, f'{position}, Age:{age}, {height}/{weight}', va='top', ha='center', fontsize=20)
ax.text(0.5, 0.4, f'Season Batting Percentiles', va='top', ha='center', fontsize=16)
# Make API call to retrieve sports information
response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
# Convert the JSON response into a Polars DataFrame
df_sport_id = pl.DataFrame(response['sports'])
abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
# Display the season and sport abbreviation
ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=14, fontstyle='italic')
# Turn off the axis
ax.axis('off')
df_teams = scrape.get_teams()
team_dict = dict(zip(df_teams['team_id'],df_teams['parent_org_abbreviation']))
# List of MLB teams and their corresponding ESPN logo URLs
mlb_teams = [
{"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
{"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
{"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
{"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
{"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
{"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
{"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
{"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
{"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
{"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
{"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
{"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
{"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
{"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
{"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
{"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
{"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
{"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
{"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
{"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
{"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
{"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
{"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
{"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
{"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
{"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
{"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
{"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
{"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
{"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
{"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"}
]
df_image = pd.DataFrame(mlb_teams)
image_dict = df_image.set_index('team')['logo_url'].to_dict()
image_dict_flip = df_image.set_index('logo_url')['team'].to_dict()
merged_dict = {
"woba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "wOBA" },
"xwoba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "xwOBA" },
"launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Average EV"},
"launch_speed_90": { "format": '.1f', "percentile_flip": False, "stat_title": "90th% EV"},
"max_launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Max EV"},
"barrel_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Barrel%" },
"hard_hit_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Hard-Hit%" },
"sweet_spot_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "LA Sweet-Spot%" },
"zone_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Zone%" },
"zone_swing_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Z-Swing%" },
"chase_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "O-Swing%" },
"whiff_rate": { "format": '.1%', "percentile_flip": True, "stat_title": "Whiff%" },
"k_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "K%" },
"bb_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "BB%" },
"pull_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull%" },
"pulled_fly_ball_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull FB%" },
}
# level_dict = {'1':'MLB',
# '11':'AAA'}
level_dict = {
'11':'AAA',
'14':'A (FSL)',}
level_dict_file = {
'11':'aaa',
'14':'a',}
year_list = [2024]
from shiny import App, reactive, ui, render
from shiny.ui import h2, tags
# Define the UI layout for the app
app_ui = ui.page_fluid(
ui.tags.div(
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
ui.tags.style(
"""
h4 {
margin-top: 1em;font-size:35px;
}
h2{
font-size:25px;
}
"""
),
ui.tags.h4("TJStats"),
ui.tags.i("Baseball Analytics and Visualizations"),
ui.markdown("""Follow me on Twitter1"""),
ui.markdown("""Support me on Patreon for Access to 2024 Apps1"""),
ui.tags.h5("Statcast Batting Summaries"),
ui.layout_sidebar(
ui.panel_sidebar(
# Row for selecting season and level
ui.row(
ui.column(6, ui.input_select('year_input', 'Select Season', year_list, selected=2024)),
ui.column(6, ui.input_select('level_input', 'Select Level', level_dict)),
),
# Row for the action button to get player list
ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
# Row for selecting the player
ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
ui.row(
ui.column(6, ui.input_switch("switch", "Custom Team?", False)),
ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False))
),
# Row for the action button to generate plot
ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")),
width=3,
),
ui.panel_main(
ui.navset_tab(
# Tab for game summary plot
ui.nav("Batter Summary",
ui.output_text("status"),
ui.output_plot('plot', width='1200px', height='1200px')
),
)
)
)
)
)
def server(input, output, session):
@render.ui
@reactive.event(input.player_button, ignore_none=False)
def player_select_ui():
#Get the list of pitchers for the selected level and season
df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input())).filter(
~pl.col("position").is_in(['P','TWP'])).sort("name")
# Create a dictionary of pitcher IDs and names
batter_dict_pos = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['position']))
year = int(input.year_input())
sport_id = int(input.level_input())
batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv').sort('batter_name',descending=False)
# Map elements in Polars DataFrame from a dictionary
batter_summary = batter_summary.with_columns(
pl.col("batter_id").map_elements(lambda x: batter_dict_pos.get(x, x)).alias("position")
)
batter_dict_pos = dict(zip(batter_summary['batter_id'], batter_summary['batter_name']))
# Create a dictionary of pitcher IDs and names
batter_dict = dict(zip(batter_summary['batter_id'], batter_summary['batter_name'] + ' - ' + batter_summary['position']))
# Return a select input for choosing a pitcher
return ui.input_select("batter_id", "Select Batter", batter_dict, selectize=True)
@output
@render.plot
@reactive.event(input.generate_plot, ignore_none=False)
def plot():
# Show progress/loading notification
with ui.Progress(min=0, max=1) as p:
def draw_baseball_savant_percentiles(new_player_metrics, new_player_percentiles, colors=None,
sport_id=None,
year_input=None):
"""
Draw Baseball Savant-style percentile bars with proper alignment and scaling.
:param new_player_metrics: DataFrame containing new player metrics.
:param new_player_percentiles: DataFrame containing new player percentiles.
:param colors: List of colors for bars (optional, red/blue default).
"""
# Extract player information
batter_id = new_player_metrics['batter_id'][0]
player_name = batter_name_id[batter_id]
stats = [merged_dict[x]['stat_title'] for x in merged_dict.keys()]
# Calculate percentiles and values
percentiles = [int((1 - x) * 100) if merged_dict[stat]["percentile_flip"] else int(x * 100) for x, stat in zip(new_player_percentiles.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
percentiles = np.clip(percentiles, 1, 100)
values = [str(f'{x:{merged_dict[stat]["format"]}}').strip('%') for x, stat in zip(new_player_metrics.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
# Get team logo URL
logo_url = image_dict[team_dict[player_team_dict[batter_id]]]
# Create a custom colormap
color_list = ['#3661AD', '#B4CFD1', '#D82129']
cmap = LinearSegmentedColormap.from_list("custom_cmap", color_list)
norm = Normalize(vmin=0.1, vmax=0.9)
norm_percentiles = norm(percentiles / 100)
colors = [cmap(p) for p in norm_percentiles]
# Figure setup
num_stats = len(stats)
bar_height = 4.5
spacing = 1
fig_height = (bar_height + spacing) * num_stats
fig = plt.figure(figsize=(12, 12))
gs = GridSpec(6, 5, height_ratios=[0.1, 1.5, 0.9, 0.9, 7.6, 0.1], width_ratios=[0.2, 1.5, 7, 1.5, 0.2])
# Define subplots
ax_title = fig.add_subplot(gs[1, 2])
ax_table = fig.add_subplot(gs[2, :])
ax_fv_table = fig.add_subplot(gs[3, :])
ax = fig.add_subplot(gs[4, :])
ax_logo = fig.add_subplot(gs[1, 3])
ax.set_xlim(-1, 99)
ax.set_ylim(-1, 99)
ax.set_aspect("equal")
ax.axis("off")
# Draw each bar
for i, (stat, percentile, value, color) in enumerate(zip(stats, percentiles, values, colors)):
y = fig_height - (i + 1) * (bar_height + spacing)
ax.add_patch(patches.Rectangle((0, y + bar_height / 4), 100, bar_height / 2, color="#C7DCDC", lw=0))
ax.add_patch(patches.Rectangle((0, y), percentile, bar_height, color=color, lw=0))
circle_y = y + bar_height - bar_height / 2
circle = plt.Circle((percentile, circle_y), bar_height / 2, color=color, ec='white', lw=1.5, zorder=10)
ax.add_patch(circle)
fs = 14
ax.text(percentile, circle_y, f"{percentile}", ha="center", va="center", fontsize=10, color='white', zorder=10, fontweight='bold')
ax.text(-5, y + bar_height / 2, stat, ha="right", va="center", fontsize=fs)
ax.text(115, y + bar_height / 2, str(value), ha="right", va="center", fontsize=fs, zorder=5)
if i < len(stats) and i > 0:
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=-33, xmax=0)
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=100, xmax=115)
# Draw vertical lines for 10%, 50%, and 90% with labels
for x, label, align, color in zip([10, 50, 90], ["Poor", "Average", "Great"], ['center', 'center', 'center'], color_list):
ax.axvline(x=x, ymin=0, ymax=1, color='#FFF', linestyle='-', lw=1, zorder=1, alpha=0.5)
ax.text(x, fig_height + 4, label, ha=align, va='center', fontsize=12, fontweight='bold', color=color)
triangle = patches.RegularPolygon((x, fig_height + 1), 3, radius=1, orientation=0, color=color, zorder=2)
ax.add_patch(triangle)
# # Title
# ax_title.set_ylim(0, 1)
# ax_title.text(0.5, 0.5, f"{player_name} - {player_position_dict[batter_id]}\nPercentile Rankings - 2024 AAA", ha="center", va="center", fontsize=24)
# ax_title.axis("off")
player_bio(batter_id, ax=ax_title, sport_id=sport_id, year_input=year_input)
# Add team logo
#response = requests.get(logo_url)
if input.switch():
response = requests.get(input.logo_select())
else:
response = requests.get(logo_url)
img = Image.open(BytesIO(response.content))
ax_logo.imshow(img)
ax_logo.axis("off")
ax.axis('equal')
# Metrics data table
metrics_data = {
"Pitches": new_player_metrics['pitches'][0],
"PA": new_player_metrics['pa'][0],
"BIP": new_player_metrics['bip'][0],
"HR": f"{new_player_metrics['home_run'][0]:.0f}",
"AVG": f"{new_player_metrics['avg'][0]:.3f}",
"OBP": f"{new_player_metrics['obp'][0]:.3f}",
"SLG": f"{new_player_metrics['slg'][0]:.3f}",
"OPS": f"{new_player_metrics['obp'][0] + new_player_metrics['slg'][0]:.3f}",
}
df_table = pd.DataFrame(metrics_data, index=[0])
ax_table.axis('off')
table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
for key, cell in table.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
# Additional subplots for spacing
ax_top = fig.add_subplot(gs[0, :])
ax_bot = fig.add_subplot(gs[-1, :])
ax_top.axis('off')
ax_bot.axis('off')
ax_bot.text(0.05, 2, "By: Thomas Nestico (@TJStats)", ha="left", va="center", fontsize=14)
ax_bot.text(0.95, 2, "Data: MLB, Fangraphs", ha="right", va="center", fontsize=14)
fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
# Player headshot
ax_headshot = fig.add_subplot(gs[1, 1])
try:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{batter_id}/headshot/milb/current.png'
response = requests.get(url)
img = Image.open(BytesIO(response.content))
ax_headshot.set_xlim(0, 1)
ax_headshot.set_ylim(0, 1)
ax_headshot.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
except PIL.UnidentifiedImageError:
ax_headshot.axis('off')
return
ax_headshot.axis('off')
ax_table.set_title('Season Summary', style='italic')
# Fangraphs scouting grades table
print(batter_id)
ax_fv_table.axis('off')
if batter_id not in dict_mlb_fg.keys():
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
return
df_fv_table = df_prospects[(df_prospects['minorMasterId'] == dict_mlb_fg[batter_id])][['cFV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']].reset_index(drop=True)
ax_fv_table.axis('off')
if df_fv_table.empty:
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
return
df_fv_table.columns = ['FV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']
table_fv = ax_fv_table.table(cellText=df_fv_table.values, colLabels=df_fv_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
for key, cell in table_fv.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
table_fv.auto_set_font_size(False)
table_fv.set_fontsize(12)
table_fv.scale(1, 1.5)
ax_fv_table.set_title('Fangraphs Scouting Grades', style='italic')
#plt.show()
def calculate_new_player_percentiles(player_id, new_player_metrics, player_summary_filtered):
"""
Calculate percentiles for a new player's metrics.
:param player_id: ID of the player.
:param new_player_metrics: DataFrame containing new player metrics.
:param player_summary_filtered: Filtered player summary DataFrame.
:return: DataFrame containing new player percentiles.
"""
filtered_summary_clone = player_summary_filtered[['batter_id'] + stat_list].filter(pl.col('batter_id') != player_id).clone()
combined_data = pl.concat([filtered_summary_clone, new_player_metrics], how="vertical").to_pandas()
combined_percentiles = pl.DataFrame(pd.concat([combined_data['batter_id'], combined_data[stat_list].rank(pct=True)], axis=1))
new_player_percentiles = combined_percentiles.filter(pl.col('batter_id') == player_id)
return new_player_percentiles
p.set(message="Generating plot", detail="This may take a while...")
p.set(0.3, "Gathering data...")
# Example: New player's metrics
year = int(input.year_input())
sport_id = int(input.level_input())
batter_id = int(input.batter_id())
df_player = scrape.get_players(sport_id=sport_id,season=year)
batter_name_id = dict(zip(df_player['player_id'],df_player['name']))
player_team_dict = dict(zip(df_player['player_id'],df_player['team']))
player_position_dict = dict(zip(df_player['player_id'],df_player['position']))
batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv')
df_prospects = pd.read_csv(f'data/prospects/prospects_{year}.csv')
df_rosters = pd.read_csv(f'data/rosters/fangraphs_rosters_{year}.csv')
df_small = df_rosters[['minorbamid','minormasterid']].dropna()
dict_mlb_fg=dict(zip(df_small['minorbamid'].astype(int),df_small['minormasterid']))
batter_summary_filter = batter_summary.filter((pl.col('pa') >= 300) & (pl.col('launch_speed') >= 0))
stat_list = batter_summary.columns[2:]
batter_summary_filter_pd = batter_summary_filter.to_pandas()
new_player_metrics = batter_summary.filter(pl.col('batter_id') == batter_id)[['batter_id'] + stat_list]
# Get percentiles for the new player
new_player_percentiles = calculate_new_player_percentiles(batter_id, new_player_metrics, batter_summary_filter)
p.set(0.6, "Creating plot...")
# Draw Baseball Savant-style percentile bars
draw_baseball_savant_percentiles(new_player_metrics=new_player_metrics,
new_player_percentiles=new_player_percentiles,
sport_id=sport_id,
year_input=year)
app = App(app_ui, server)