import polars as pl import api_scraper mlb_scrape = api_scraper.MLB_Scrape() from stuff_model import * from shiny import App, reactive, ui, render from shiny.ui import h2, tags from api_scraper import MLB_Scrape import datetime from stuff_model import feature_engineering as fe from stuff_model import stuff_apply from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme theme.tabulator_site() scraper = MLB_Scrape() df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet') pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name'])) app_ui = ui.page_fluid( ui.card( ui.card_header("2025 Spring Training Pitch Data App"), ui.row( ui.column(4, ui.markdown("""This app generates a table which shows the 2025 Spring Training data. * Differences are calculated based on 2024 regular season data * If 2024 data does not exist for pitcher, 2023 Data is used * If no difference exists, the pitch is labelled as a new pitch"""), ui.input_action_button( "refresh", "Refresh Data", class_="btn-primary", width="100%" ) ), ui.column(3, ui.div( "By: ", ui.tags.a( "@TJStats", href="https://x.com/TJStats", target="_blank" ) ), ui.tags.p("Data: MLB"), ui.tags.p( ui.tags.a( "Support me on Patreon for more baseball content", href="https://www.patreon.com/TJ_Stats", target="_blank" ) ) ) ), ui.navset_tab( ui.nav("All Pitches", output_tabulator("table_all") ), ) ) ) def server(input, output, session): @output @render_tabulator @reactive.event(input.refresh) def table_all(): import polars as pl df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet") date = datetime.datetime.now().date() date_str = date.strftime('%Y-%m-%d') # Initialize the scraper game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S']) .filter(pl.col('date') == date)['game_id']) data = scraper.get_data(game_list_input) df = scraper.get_data_df(data) df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True) # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl]))) # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023]))) df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring]))) import polars as pl # Compute total pitches for each pitcher df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg( pl.col("start_speed").count().alias("pitcher_total") ) df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([ pl.col('start_speed').count().alias('count'), pl.col('start_speed').mean().alias('start_speed'), pl.col('ivb').mean().alias('ivb'), pl.col('hb').mean().alias('hb'), pl.col('release_pos_z').mean().alias('release_pos_z'), pl.col('release_pos_x').mean().alias('release_pos_x'), pl.col('extension').mean().alias('extension'), pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'), (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count') ]) # Join total pitches per pitcher to the grouped DataFrame on pitcher_id df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left") # Now calculate the pitch percent for each pitcher/pitch_type combination df_spring_group = df_spring_group.with_columns( (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent") ) # Optionally, if you want the percentage of left/right-handed batters within the group: df_spring_group = df_spring_group.with_columns([ (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"), (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent") ]) df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old') df_merge = df_merge.with_columns( pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old') ) df_merge = df_merge.with_columns( pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old')) .then(pl.lit("TRUE")) .otherwise(pl.lit(None)) .alias("new_pitch") ) import polars as pl # Define the columns to subtract cols_to_subtract = [ ("start_speed", "start_speed_old"), ("ivb", "ivb_old"), ("hb", "hb_old"), ("release_pos_z", "release_pos_z_old"), ("release_pos_x", "release_pos_x_old"), ("extension", "extension_old"), ("tj_stuff_plus", "tj_stuff_plus_old") ] df_merge = df_merge.with_columns([ # Step 1: Create _diff columns with the default value (e.g., 80) if old is null pl.when(pl.col(old).is_null()) .then(pl.lit(10000)) # If old is null, assign 80 as the default .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new .alias(new + "_diff") for new, old in cols_to_subtract ]) # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80 df_merge = df_merge.with_columns([ pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string .otherwise( pl.col(new).round(1).cast(pl.Utf8) + "\n(" + pl.col(new + "_diff").round(1) .map_elements(lambda x: f"{x:+.1f}") + ")" ).alias(new + "_formatted") for new, _ in cols_to_subtract ]) percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent'] df_merge = df_merge.with_columns([ (pl.col(col) * 100) # Convert to percentage .round(1) # Round to 1 decimal .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%' .alias(col + "_formatted") for col in percent_cols ]).sort(['pitcher_id','count'],descending=True) columns = [ { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,}, { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,}, { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,}, { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,}, { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"}, { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"}, { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"}, { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"}, { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" }, { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" } ] df_plot = df_merge.to_pandas() team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team'])) df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict) return Tabulator( df_plot, table_options=TableOptions( height=750, columns=columns, ) ) app = App(app_ui, server)