spring_training_pitching_app

Running

App Files Files Community

nesticot commited on 3 days ago

Commit

6781a47

verified ·

1 Parent(s): 390c8e4

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -0

app.py CHANGED Viewed

@@ -65,6 +65,9 @@ app_ui = ui.page_fluid(
             ui.nav("Daily Pitches",
                 output_tabulator("table_daily")
             ),
         )
     )
 )
@@ -399,6 +402,178 @@ def server(input, output, session):
         return Tabulator(
             df_plot,

             ui.nav("Daily Pitches",
                 output_tabulator("table_daily")
             ),
+            ui.nav("tjStuff+",
+                output_tabulator("table_tjstuff")
+            ),
         )
     )
 )
+        return Tabulator(
+            df_plot,
+            table_options=TableOptions(
+                height=750,
+                columns=columns,
+            )
+        )
+    @output
+    @render_tabulator
+    @reactive.event(input.refresh)
+    def table_tjstuff():
+        import polars as pl
+        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
+        import datetime
+        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
+        print(datetime.datetime.now())
+        date_str = date.strftime('%Y-%m-%d')
+        # Initialize the scraper
+        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
+                    .filter(pl.col('date') == date)['game_id'])
+        data = scraper.get_data(game_list_input)
+        df = scraper.get_data_df(data)
+        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
+        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
+        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
+        import polars as pl
+        # Compute total pitches for each pitcher
+        df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
+            pl.col("start_speed").count().alias("pitcher_total")
+        )
+        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
+            pl.col('start_speed').count().alias('count'),
+            pl.col('start_speed').mean().alias('start_speed'),
+            pl.col('ivb').mean().alias('ivb'),
+            pl.col('hb').mean().alias('hb'),
+            pl.col('release_pos_z').mean().alias('release_pos_z'),
+            pl.col('release_pos_x').mean().alias('release_pos_x'),
+            pl.col('extension').mean().alias('extension'),
+            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
+        ])
+        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
+        df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
+        # Now calculate the pitch percent for each pitcher/pitch_type combination
+        df_spring_group = df_spring_group.with_columns(
+            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
+        )
+        # Optionally, if you want the percentage of left/right-handed batters within the group:
+        df_spring_group = df_spring_group.with_columns([
+            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
+            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
+        ])
+        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
+        df_merge = df_merge.with_columns(
+            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
+        )
+        df_merge = df_merge.with_columns(
+            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
+            .then(pl.lit("TRUE"))
+            .otherwise(pl.lit(None))
+            .alias("new_pitch")
+        )
+        import polars as pl
+        # Define the columns to subtract
+        cols_to_subtract = [
+            ("start_speed", "start_speed_old"),
+            ("ivb", "ivb_old"),
+            ("hb", "hb_old"),
+            ("release_pos_z", "release_pos_z_old"),
+            ("release_pos_x", "release_pos_x_old"),
+            ("extension", "extension_old"),
+            ("tj_stuff_plus", "tj_stuff_plus_old")
+        ]
+        df_merge = df_merge.with_columns([
+            # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
+            pl.when(pl.col(old).is_null())
+            .then(pl.lit(None))  # If old is null, assign 80 as the default
+            .otherwise(pl.col(new) - pl.col(old))  # Otherwise subtract old from new
+            .alias(new + "_diff")
+            for new, old in cols_to_subtract
+        ])
+        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
+        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
+        df_merge = df_merge.with_columns([
+                pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted")
+            for new, _ in cols_to_subtract
+        ])
+        df_merge = df_merge.with_columns([
+            pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"),
+            pl.col("tj_stuff_plus_diff").round(1).map_elements(lambda x: f"{x:+.1f}").alias("tj_stuff_plus_diff")
+        ])
+        percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
+        df_merge = df_merge.with_columns([
+            (pl.col(col) * 100)  # Convert to percentage
+            .round(1)            # Round to 1 decimal
+            .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
+            .alias(col + "_formatted")
+            for col in percent_cols
+        ]).sort(['pitcher_id','count'],descending=True)
+        columns = [
+            { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
+            { "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
+            { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
+            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"},
+            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"},
+            { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" }
+        ]
+        df_plot = df_merge.sort(['pitcher_id','count'],descending=True).to_pandas()
+        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
+        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
         return Tabulator(
             df_plot,