spring_training_pitching_app

Running

App Files Files Community

nesticot commited on 1 day ago

Commit

8dab285

verified ·

1 Parent(s): 6781a47

Upload 2 files

Browse files

Files changed (2) hide show

app.py +596 -587
pitch_data_agg_2024.parquet +2 -2

app.py CHANGED Viewed

@@ -1,587 +1,596 @@
-import polars as pl
-import api_scraper
-mlb_scrape = api_scraper.MLB_Scrape()
-from stuff_model import *
-from shiny import App, reactive, ui, render
-from shiny.ui import h2, tags
-from api_scraper import MLB_Scrape
-import datetime
-from stuff_model import feature_engineering as fe
-from stuff_model import stuff_apply
-from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
-theme.tabulator_site()
-scraper = MLB_Scrape()
-df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet')
-pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name']))
-app_ui = ui.page_fluid(
-    ui.card(
-        ui.card_header("2025 Spring Training Pitch Data App"),
-        ui.row(
-            ui.column(4,
-                ui.markdown("""This app generates a table which shows the 2025 Spring Training data.
-* Differences are calculated based on 2024 regular season data
-* If 2024 data does not exist for pitcher, 2023 Data is used
-* If no difference exists, the pitch is labelled as a new pitch"""),
-                ui.input_action_button(
-                    "refresh",
-                    "Refresh Data",
-                    class_="btn-primary",
-                    width="100%"
-                )
-            ),
-            ui.column(3,
-                ui.div(
-                    "By: ",
-                    ui.tags.a(
-                        "@TJStats",
-                        href="https://x.com/TJStats",
-                        target="_blank"
-                    )
-                ),
-                ui.tags.p("Data: MLB"),
-                ui.tags.p(
-                    ui.tags.a(
-                        "Support me on Patreon for more baseball content",
-                        href="https://www.patreon.com/TJ_Stats",
-                        target="_blank"
-                    )
-                )
-            )
-        ),
-        ui.navset_tab(
-            ui.nav("All Pitches",
-                output_tabulator("table_all")
-            ),
-            ui.nav("Daily Pitches",
-                output_tabulator("table_daily")
-            ),
-            ui.nav("tjStuff+",
-                output_tabulator("table_tjstuff")
-            ),
-        )
-    )
-)
-def server(input, output, session):
-    @output
-    @render_tabulator
-    @reactive.event(input.refresh)
-    def table_all():
-        import polars as pl
-        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
-        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
-        print(datetime.datetime.now())
-        date_str = date.strftime('%Y-%m-%d')
-        # Initialize the scraper
-        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
-                    .filter(pl.col('date') == date)['game_id'])
-        data = scraper.get_data(game_list_input)
-        df = scraper.get_data_df(data)
-        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
-        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
-        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
-        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
-        import polars as pl
-        # Compute total pitches for each pitcher
-        df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
-            pl.col("start_speed").count().alias("pitcher_total")
-        )
-        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
-            pl.col('start_speed').count().alias('count'),
-            pl.col('start_speed').mean().alias('start_speed'),
-            pl.col('ivb').mean().alias('ivb'),
-            pl.col('hb').mean().alias('hb'),
-            pl.col('release_pos_z').mean().alias('release_pos_z'),
-            pl.col('release_pos_x').mean().alias('release_pos_x'),
-            pl.col('extension').mean().alias('extension'),
-            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
-            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
-            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
-        ])
-        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
-        df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
-        # Now calculate the pitch percent for each pitcher/pitch_type combination
-        df_spring_group = df_spring_group.with_columns(
-            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
-        )
-        # Optionally, if you want the percentage of left/right-handed batters within the group:
-        df_spring_group = df_spring_group.with_columns([
-            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
-            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
-        ])
-        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
-        df_merge = df_merge.with_columns(
-            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
-        )
-        df_merge = df_merge.with_columns(
-            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
-            .then(pl.lit("TRUE"))
-            .otherwise(pl.lit(None))
-            .alias("new_pitch")
-        )
-        import polars as pl
-        # Define the columns to subtract
-        cols_to_subtract = [
-            ("start_speed", "start_speed_old"),
-            ("ivb", "ivb_old"),
-            ("hb", "hb_old"),
-            ("release_pos_z", "release_pos_z_old"),
-            ("release_pos_x", "release_pos_x_old"),
-            ("extension", "extension_old"),
-            ("tj_stuff_plus", "tj_stuff_plus_old")
-        ]
-        df_merge = df_merge.with_columns([
-            # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
-            pl.when(pl.col(old).is_null())
-            .then(pl.lit(10000))  # If old is null, assign 80 as the default
-            .otherwise(pl.col(new) - pl.col(old))  # Otherwise subtract old from new
-            .alias(new + "_diff")
-            for new, old in cols_to_subtract
-        ])
-        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
-        df_merge = df_merge.with_columns([
-            pl.when(pl.col(new + "_diff").eq(10000))  # If diff is 80, no need to include brackets
-            .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t')  # Just return the new value as string
-            .otherwise(
-                pl.col(new).round(1).cast(pl.Utf8) +
-                "\n(" +
-                pl.col(new + "_diff").round(1)
-                    .map_elements(lambda x: f"{x:+.1f}") +
-                ")"
-            ).alias(new + "_formatted")
-            for new, _ in cols_to_subtract
-        ])
-        percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
-        df_merge = df_merge.with_columns([
-            (pl.col(col) * 100)  # Convert to percentage
-            .round(1)            # Round to 1 decimal
-            .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
-            .alias(col + "_formatted")
-            for col in percent_cols
-        ]).sort(['pitcher_id','count'],descending=True)
-        columns = [
-            { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
-            { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
-            { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
-            { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
-            { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input","contextMenu":True},
-            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
-            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
-            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
-            { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
-        ]
-        df_plot = df_merge.to_pandas()
-        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
-        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
-        return Tabulator(
-            df_plot,
-            table_options=TableOptions(
-                height=750,
-                columns=columns,
-            )
-        )
-    @output
-    @render_tabulator
-    @reactive.event(input.refresh)
-    def table_daily():
-        import polars as pl
-        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
-        import datetime
-        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
-        print(datetime.datetime.now())
-        date_str = date.strftime('%Y-%m-%d')
-        # Initialize the scraper
-        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
-                    .filter(pl.col('date') == date)['game_id'])
-        data = scraper.get_data(game_list_input)
-        df = scraper.get_data_df(data)
-        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
-        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
-        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
-        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
-        import polars as pl
-        # Compute total pitches for each pitcher
-        df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id",'game_id','game_date']).agg(
-            pl.col("start_speed").count().alias("pitcher_total")
-        )
-        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type','game_id','game_date']).agg([
-            pl.col('start_speed').count().alias('count'),
-            pl.col('start_speed').mean().alias('start_speed'),
-            pl.col('ivb').mean().alias('ivb'),
-            pl.col('hb').mean().alias('hb'),
-            pl.col('release_pos_z').mean().alias('release_pos_z'),
-            pl.col('release_pos_x').mean().alias('release_pos_x'),
-            pl.col('extension').mean().alias('extension'),
-            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
-            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
-            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
-        ])
-        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
-        df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id",'game_id','game_date'], how="left")
-        # Now calculate the pitch percent for each pitcher/pitch_type combination
-        df_spring_group = df_spring_group.with_columns(
-            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
-        )
-        # Optionally, if you want the percentage of left/right-handed batters within the group:
-        df_spring_group = df_spring_group.with_columns([
-            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
-            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
-        ])
-        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
-        df_merge = df_merge.with_columns(
-            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
-        )
-        df_merge = df_merge.with_columns(
-            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
-            .then(pl.lit("TRUE"))
-            .otherwise(pl.lit(None))
-            .alias("new_pitch")
-        )
-        import polars as pl
-        # Define the columns to subtract
-        cols_to_subtract = [
-            ("start_speed", "start_speed_old"),
-            ("ivb", "ivb_old"),
-            ("hb", "hb_old"),
-            ("release_pos_z", "release_pos_z_old"),
-            ("release_pos_x", "release_pos_x_old"),
-            ("extension", "extension_old"),
-            ("tj_stuff_plus", "tj_stuff_plus_old")
-        ]
-        df_merge = df_merge.with_columns([
-            # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
-            pl.when(pl.col(old).is_null())
-            .then(pl.lit(10000))  # If old is null, assign 80 as the default
-            .otherwise(pl.col(new) - pl.col(old))  # Otherwise subtract old from new
-            .alias(new + "_diff")
-            for new, old in cols_to_subtract
-        ])
-        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
-        df_merge = df_merge.with_columns([
-            pl.when(pl.col(new + "_diff").eq(10000))  # If diff is 80, no need to include brackets
-            .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t')  # Just return the new value as string
-            .otherwise(
-                pl.col(new).round(1).cast(pl.Utf8) +
-                "\n(" +
-                pl.col(new + "_diff").round(1)
-                    .map_elements(lambda x: f"{x:+.1f}") +
-                ")"
-            ).alias(new + "_formatted")
-            for new, _ in cols_to_subtract
-        ])
-        percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
-        df_merge = df_merge.with_columns([
-            (pl.col(col) * 100)  # Convert to percentage
-            .round(1)            # Round to 1 decimal
-            .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
-            .alias(col + "_formatted")
-            for col in percent_cols
-        ]).sort(['pitcher_id','count'],descending=True)
-        columns = [
-            { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
-            { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
-            { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
-            { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
-            { "title": "Date", "field": "game_date", "width": 100, "headerFilter":"input" ,"frozen":True,},
-            { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
-            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
-            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
-            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
-            { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
-        ]
-        df_plot = df_merge.to_pandas()
-        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
-        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
-        return Tabulator(
-            df_plot,
-            table_options=TableOptions(
-                height=750,
-                columns=columns,
-            )
-        )
-    @output
-    @render_tabulator
-    @reactive.event(input.refresh)
-    def table_tjstuff():
-        import polars as pl
-        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
-        import datetime
-        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
-        print(datetime.datetime.now())
-        date_str = date.strftime('%Y-%m-%d')
-        # Initialize the scraper
-        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
-                    .filter(pl.col('date') == date)['game_id'])
-        data = scraper.get_data(game_list_input)
-        df = scraper.get_data_df(data)
-        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
-        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
-        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
-        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
-        import polars as pl
-        # Compute total pitches for each pitcher
-        df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
-            pl.col("start_speed").count().alias("pitcher_total")
-        )
-        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
-            pl.col('start_speed').count().alias('count'),
-            pl.col('start_speed').mean().alias('start_speed'),
-            pl.col('ivb').mean().alias('ivb'),
-            pl.col('hb').mean().alias('hb'),
-            pl.col('release_pos_z').mean().alias('release_pos_z'),
-            pl.col('release_pos_x').mean().alias('release_pos_x'),
-            pl.col('extension').mean().alias('extension'),
-            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
-            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
-            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
-        ])
-        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
-        df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
-        # Now calculate the pitch percent for each pitcher/pitch_type combination
-        df_spring_group = df_spring_group.with_columns(
-            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
-        )
-        # Optionally, if you want the percentage of left/right-handed batters within the group:
-        df_spring_group = df_spring_group.with_columns([
-            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
-            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
-        ])
-        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
-        df_merge = df_merge.with_columns(
-            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
-        )
-        df_merge = df_merge.with_columns(
-            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
-            .then(pl.lit("TRUE"))
-            .otherwise(pl.lit(None))
-            .alias("new_pitch")
-        )
-        import polars as pl
-        # Define the columns to subtract
-        cols_to_subtract = [
-            ("start_speed", "start_speed_old"),
-            ("ivb", "ivb_old"),
-            ("hb", "hb_old"),
-            ("release_pos_z", "release_pos_z_old"),
-            ("release_pos_x", "release_pos_x_old"),
-            ("extension", "extension_old"),
-            ("tj_stuff_plus", "tj_stuff_plus_old")
-        ]
-        df_merge = df_merge.with_columns([
-            # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
-            pl.when(pl.col(old).is_null())
-            .then(pl.lit(None))  # If old is null, assign 80 as the default
-            .otherwise(pl.col(new) - pl.col(old))  # Otherwise subtract old from new
-            .alias(new + "_diff")
-            for new, old in cols_to_subtract
-        ])
-        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
-        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
-        df_merge = df_merge.with_columns([
-                pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted")
-            for new, _ in cols_to_subtract
-        ])
-        df_merge = df_merge.with_columns([
-            pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"),
-            pl.col("tj_stuff_plus_diff").round(1).map_elements(lambda x: f"{x:+.1f}").alias("tj_stuff_plus_diff")
-        ])
-        percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
-        df_merge = df_merge.with_columns([
-            (pl.col(col) * 100)  # Convert to percentage
-            .round(1)            # Round to 1 decimal
-            .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
-            .alias(col + "_formatted")
-            for col in percent_cols
-        ]).sort(['pitcher_id','count'],descending=True)
-        columns = [
-            { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
-            { "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,},
-            { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
-            { "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
-            { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
-            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
-            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"},
-            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"},
-            { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" },
-            { "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" }
-        ]
-        df_plot = df_merge.sort(['pitcher_id','count'],descending=True).to_pandas()
-        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
-        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
-        return Tabulator(
-            df_plot,
-            table_options=TableOptions(
-                height=750,
-                columns=columns,
-            )
-        )
-app = App(app_ui, server)

+import polars as pl
+import api_scraper
+mlb_scrape = api_scraper.MLB_Scrape()
+from stuff_model import *
+from shiny import App, reactive, ui, render
+from shiny.ui import h2, tags
+from api_scraper import MLB_Scrape
+import datetime
+from stuff_model import feature_engineering as fe
+from stuff_model import stuff_apply
+from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
+theme.tabulator_site()
+scraper = MLB_Scrape()
+df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet')
+pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name']))
+app_ui = ui.page_fluid(
+    ui.card(
+        ui.card_header("2025 Spring Training Pitch Data App"),
+        ui.row(
+            ui.column(4,
+                ui.markdown("""This app generates a table which shows the 2025 Spring Training data.
+* Differences are calculated based on 2024 regular season data
+* If 2024 data does not exist for pitcher, 2023 Data is used
+* If no difference exists, the pitch is labelled as a new pitch"""),
+                ui.input_action_button(
+                    "refresh",
+                    "Refresh Data",
+                    class_="btn-primary",
+                    width="100%"
+                )
+            ),
+            ui.column(3,
+                ui.div(
+                    "By: ",
+                    ui.tags.a(
+                        "@TJStats",
+                        href="https://x.com/TJStats",
+                        target="_blank"
+                    )
+                ),
+                ui.tags.p("Data: MLB"),
+                ui.tags.p(
+                    ui.tags.a(
+                        "Support me on Patreon for more baseball content",
+                        href="https://www.patreon.com/TJ_Stats",
+                        target="_blank"
+                    )
+                )
+            )
+        ),
+        ui.navset_tab(
+            ui.nav("All Pitches",
+                output_tabulator("table_all")
+            ),
+            ui.nav("Daily Pitches",
+                output_tabulator("table_daily")
+            ),
+            ui.nav("tjStuff+",
+                output_tabulator("table_tjstuff")
+            ),
+        )
+    )
+)
+def server(input, output, session):
+    @output
+    @render_tabulator
+    @reactive.event(input.refresh)
+    def table_all():
+        import polars as pl
+        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
+        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
+        print(datetime.datetime.now())
+        date_str = date.strftime('%Y-%m-%d')
+        # Initialize the scraper
+        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
+                    .filter(pl.col('date') == date)['game_id'])
+        data = scraper.get_data(game_list_input)
+        df = scraper.get_data_df(data)
+        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
+        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
+        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
+        import polars as pl
+        # Compute total pitches for each pitcher
+        df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
+            pl.col("start_speed").count().alias("pitcher_total")
+        )
+        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
+            pl.col('start_speed').count().alias('count'),
+            pl.col('start_speed').mean().alias('start_speed'),
+            pl.col('start_speed').max().alias('max_start_speed'),
+            pl.col('ivb').mean().alias('ivb'),
+            pl.col('hb').mean().alias('hb'),
+            pl.col('release_pos_z').mean().alias('release_pos_z'),
+            pl.col('release_pos_x').mean().alias('release_pos_x'),
+            pl.col('extension').mean().alias('extension'),
+            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
+        ])
+        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
+        df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
+        # Now calculate the pitch percent for each pitcher/pitch_type combination
+        df_spring_group = df_spring_group.with_columns(
+            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
+        )
+        # Optionally, if you want the percentage of left/right-handed batters within the group:
+        df_spring_group = df_spring_group.with_columns([
+            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
+            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
+        ])
+        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')
+        df_merge = df_merge.with_columns(
+            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
+        )
+        df_merge = df_merge.with_columns(
+            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
+            .then(pl.lit(True))
+            .otherwise(pl.lit(None))
+            .alias("new_pitch")
+        )
+        import polars as pl
+        # Define the columns to subtract
+        cols_to_subtract = [
+            ("start_speed", "start_speed_old"),
+            ("max_start_speed", "max_start_speed_old"),
+            ("ivb", "ivb_old"),
+            ("hb", "hb_old"),
+            ("release_pos_z", "release_pos_z_old"),
+            ("release_pos_x", "release_pos_x_old"),
+            ("extension", "extension_old"),
+            ("tj_stuff_plus", "tj_stuff_plus_old")
+        ]
+        df_merge = df_merge.with_columns([
+            # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
+            pl.when(pl.col(old).is_null())
+            .then(pl.lit(10000))  # If old is null, assign 80 as the default
+            .otherwise(pl.col(new) - pl.col(old))  # Otherwise subtract old from new
+            .alias(new + "_diff")
+            for new, old in cols_to_subtract
+        ])
+        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
+        df_merge = df_merge.with_columns([
+            pl.when(pl.col(new + "_diff").eq(10000))  # If diff is 80, no need to include brackets
+            .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t')  # Just return the new value as string
+            .otherwise(
+                pl.col(new).round(1).cast(pl.Utf8) +
+                "\n(" +
+                pl.col(new + "_diff").round(1)
+                    .map_elements(lambda x: f"{x:+.1f}") +
+                ")"
+            ).alias(new + "_formatted")
+            for new, _ in cols_to_subtract
+        ])
+        percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
+        df_merge = df_merge.with_columns([
+            (pl.col(col) * 100)  # Convert to percentage
+            .round(1)            # Round to 1 decimal
+            .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
+            .alias(col + "_formatted")
+            for col in percent_cols
+        ]).sort(['pitcher_id','count'],descending=True)
+        columns = [
+            { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
+            { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
+            { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input","contextMenu":True},
+            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
+        ]
+        df_plot = df_merge.to_pandas()
+        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
+        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
+        return Tabulator(
+            df_plot,
+            table_options=TableOptions(
+                height=750,
+                columns=columns,
+            )
+        )
+    @output
+    @render_tabulator
+    @reactive.event(input.refresh)
+    def table_daily():
+        import polars as pl
+        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
+        import datetime
+        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
+        print(datetime.datetime.now())
+        date_str = date.strftime('%Y-%m-%d')
+        # Initialize the scraper
+        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
+                    .filter(pl.col('date') == date)['game_id'])
+        data = scraper.get_data(game_list_input)
+        df = scraper.get_data_df(data)
+        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
+        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
+        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
+        import polars as pl
+        # Compute total pitches for each pitcher
+        df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id",'game_id','game_date']).agg(
+            pl.col("start_speed").count().alias("pitcher_total")
+        )
+        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type','game_id','game_date']).agg([
+            pl.col('start_speed').count().alias('count'),
+            pl.col('start_speed').mean().alias('start_speed'),
+            pl.col('start_speed').max().alias('max_start_speed'),
+            pl.col('ivb').mean().alias('ivb'),
+            pl.col('hb').mean().alias('hb'),
+            pl.col('release_pos_z').mean().alias('release_pos_z'),
+            pl.col('release_pos_x').mean().alias('release_pos_x'),
+            pl.col('extension').mean().alias('extension'),
+            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
+        ])
+        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
+        df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id",'game_id','game_date'], how="left")
+        # Now calculate the pitch percent for each pitcher/pitch_type combination
+        df_spring_group = df_spring_group.with_columns(
+            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
+        )
+        # Optionally, if you want the percentage of left/right-handed batters within the group:
+        df_spring_group = df_spring_group.with_columns([
+            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
+            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
+        ])
+        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')
+        df_merge = df_merge.with_columns(
+            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
+        )
+        df_merge = df_merge.with_columns(
+            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
+            .then(pl.lit(True))
+            .otherwise(pl.lit(None))
+            .alias("new_pitch")
+        )
+        import polars as pl
+        # Define the columns to subtract
+        cols_to_subtract = [
+            ("start_speed", "start_speed_old"),
+            ("max_start_speed", "max_start_speed_old"),
+            ("ivb", "ivb_old"),
+            ("hb", "hb_old"),
+            ("release_pos_z", "release_pos_z_old"),
+            ("release_pos_x", "release_pos_x_old"),
+            ("extension", "extension_old"),
+            ("tj_stuff_plus", "tj_stuff_plus_old")
+        ]
+        df_merge = df_merge.with_columns([
+            # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
+            pl.when(pl.col(old).is_null())
+            .then(pl.lit(10000))  # If old is null, assign 80 as the default
+            .otherwise(pl.col(new) - pl.col(old))  # Otherwise subtract old from new
+            .alias(new + "_diff")
+            for new, old in cols_to_subtract
+        ])
+        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
+        df_merge = df_merge.with_columns([
+            pl.when(pl.col(new + "_diff").eq(10000))  # If diff is 80, no need to include brackets
+            .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t')  # Just return the new value as string
+            .otherwise(
+                pl.col(new).round(1).cast(pl.Utf8) +
+                "\n(" +
+                pl.col(new + "_diff").round(1)
+                    .map_elements(lambda x: f"{x:+.1f}") +
+                ")"
+            ).alias(new + "_formatted")
+            for new, _ in cols_to_subtract
+        ])
+        percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
+        df_merge = df_merge.with_columns([
+            (pl.col(col) * 100)  # Convert to percentage
+            .round(1)            # Round to 1 decimal
+            .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
+            .alias(col + "_formatted")
+            for col in percent_cols
+        ]).sort(['pitcher_id','count'],descending=True)
+        columns = [
+            { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
+            { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
+            { "title": "Date", "field": "game_date", "width": 100, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
+            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
+        ]
+        df_plot = df_merge.to_pandas()
+        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
+        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
+        return Tabulator(
+            df_plot,
+            table_options=TableOptions(
+                height=750,
+                columns=columns,
+            )
+        )
+    @output
+    @render_tabulator
+    @reactive.event(input.refresh)
+    def table_tjstuff():
+        import polars as pl
+        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
+        import datetime
+        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
+        print(datetime.datetime.now())
+        date_str = date.strftime('%Y-%m-%d')
+        # Initialize the scraper
+        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
+                    .filter(pl.col('date') == date)['game_id'])
+        data = scraper.get_data(game_list_input)
+        df = scraper.get_data_df(data)
+        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
+        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
+        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
+        import polars as pl
+        # Compute total pitches for each pitcher
+        df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
+            pl.col("start_speed").count().alias("pitcher_total")
+        )
+        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
+            pl.col('start_speed').count().alias('count'),
+            pl.col('start_speed').mean().alias('start_speed'),
+            pl.col('start_speed').max().alias('max_start_speed'),
+            pl.col('ivb').mean().alias('ivb'),
+            pl.col('hb').mean().alias('hb'),
+            pl.col('release_pos_z').mean().alias('release_pos_z'),
+            pl.col('release_pos_x').mean().alias('release_pos_x'),
+            pl.col('extension').mean().alias('extension'),
+            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
+        ])
+        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
+        df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
+        # Now calculate the pitch percent for each pitcher/pitch_type combination
+        df_spring_group = df_spring_group.with_columns(
+            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
+        )
+        # Optionally, if you want the percentage of left/right-handed batters within the group:
+        df_spring_group = df_spring_group.with_columns([
+            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
+            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
+        ])
+        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')
+        df_merge = df_merge.with_columns(
+            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
+        )
+        df_merge = df_merge.with_columns(
+            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
+            .then(pl.lit(True))
+            .otherwise(pl.lit(None))
+            .alias("new_pitch")
+        )
+        import polars as pl
+        # Define the columns to subtract
+        cols_to_subtract = [
+            ("start_speed", "start_speed_old"),
+            ("max_start_speed", "max_start_speed_old"),
+            ("ivb", "ivb_old"),
+            ("hb", "hb_old"),
+            ("release_pos_z", "release_pos_z_old"),
+            ("release_pos_x", "release_pos_x_old"),
+            ("extension", "extension_old"),
+            ("tj_stuff_plus", "tj_stuff_plus_old")
+        ]
+        df_merge = df_merge.with_columns([
+            # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
+            pl.when(pl.col(old).is_null())
+            .then(pl.lit(None))  # If old is null, assign 80 as the default
+            .otherwise(pl.col(new) - pl.col(old))  # Otherwise subtract old from new
+            .alias(new + "_diff")
+            for new, old in cols_to_subtract
+        ])
+        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
+        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
+        df_merge = df_merge.with_columns([
+                pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted")
+            for new, _ in cols_to_subtract
+        ])
+        df_merge = df_merge.with_columns([
+            pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"),
+            pl.col("tj_stuff_plus_diff").round(1).map_elements(lambda x: f"{x:+.1f}").alias("tj_stuff_plus_diff")
+        ])
+        percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
+        df_merge = df_merge.with_columns([
+            (pl.col(col) * 100)  # Convert to percentage
+            .round(1)            # Round to 1 decimal
+            .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
+            .alias(col + "_formatted")
+            for col in percent_cols
+        ]).sort(['pitcher_id','count'],descending=True)
+        columns = [
+            { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
+            { "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
+            { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
+            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"},
+            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"},
+            { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" }
+        ]
+        df_plot = df_merge.sort(['pitcher_id','count'],descending=True).to_pandas()
+        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
+        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
+        return Tabulator(
+            df_plot,
+            table_options=TableOptions(
+                height=750,
+                columns=columns,
+            )
+        )
+app = App(app_ui, server)

pitch_data_agg_2024.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e4cfc5290d83b7707362d46380140b97bb464d1510e1fcca0cd878b65e4fb91
-size 561801

 version https://git-lfs.github.com/spec/v1
+oid sha256:21509d34d14646869a1a4dd2785b91c3ce210092ad42d42aea9a772aeeb38edf
+size 615259