spring_training_pitching_app

Running

App Files Files Community

nesticot commited on 4 days ago

Commit

96cb552

verified ·

1 Parent(s): c03466a

Update app.py

Browse files

Files changed (1) hide show

app.py +213 -128

app.py CHANGED Viewed

@@ -1,153 +1,238 @@
 import polars as pl
-import numpy as np
-import joblib
-from shiny import App, reactive, render, ui
-import matplotlib.pyplot as plt
-import matplotlib.ticker as tkr
-import seaborn as sns
-import adjustText
-sns.set_style('whitegrid')
-import matplotlib
-cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFFFFF','#FFB000','#FE6100'])
-xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
-x = np.arange(-30,90.5,.5)
-y = np.arange(0,120.5,0.1)
-xx, yy = np.meshgrid(x, y)
-df = pl.DataFrame({'launch_angle': xx.ravel(), 'launch_speed': yy.ravel()})
-df = df.with_columns(
-    pl.Series('xwoba', xwoba_model.predict_proba(df.select(['launch_angle','launch_speed'])) @ [0, 0.883, 1.244, 1.569, 2.004])
-)
-df = df.with_columns(
-    pl.Series('xslg', xwoba_model.predict_proba(df.select(['launch_angle','launch_speed'])) @ [0, 1, 2, 3, 4])
 )
-app_ui = ui.page_sidebar(
-    ui.sidebar(
-        ui.markdown("""
-        ### How to use this app
-        1. Click anywhere on the plot to select a point, or manually enter coordinates
-        2. The selected point's coordinates will update automatically
-        3. The xwOBA value will be calculated based on these coordinates
-        """),
-        ui.hr(),
-        ui.input_numeric("x_select", "Launch Speed (mph)", value=110),
-        ui.input_numeric("y_select", "Launch Angle (°)", value=30),
-        ui.input_switch("flip_stat", "xwOBA", value=False),
-    ),
-    ui.output_plot("plot",width='900px',height='900px', click=True)
-)
-def server(input, output, session):
-    # Store the coordinates in reactive values
-    x_coord = reactive.value(110)
-    y_coord = reactive.value(30)
-    @reactive.effect
-    @reactive.event(input.plot_click)
-    def _():
-        # Update reactive values when plot is clicked
-        click_data = input.plot_click()
-        if click_data is not None:
-            x_coord.set(click_data["x"])
-            y_coord.set(click_data["y"])
-            # Update the numeric inputs
-            ui.update_numeric("x_select", value=round(click_data["x"],1))
-            ui.update_numeric("y_select", value=round(click_data["y"],1))
-    @reactive.effect
-    @reactive.event(input.x_select, input.y_select)
-    def _():
-        # Update reactive values when numeric inputs change
-        x_coord.set(round(input.x_select(),1))
-        y_coord.set(round(input.y_select(),1))
-    @render.plot
-    def plot():
-        switch = input.flip_stat()
-        fig, ax = plt.subplots(1, 1, figsize=(9, 9))
-        if switch:
-            h = ax.hexbin(df['launch_speed'],
-                        df['launch_angle'],
-                        C=df['xwoba'],
-                        gridsize=(40,25),
-                        cmap=cmap_sum,
-                        vmin=0.0,
-                        vmax=2.0,)
-            bounds=[0.0,0.4,0.8,1.2,1.6,2.0]
-            fig.colorbar(h, ax=ax, label='xwOBA',format=tkr.FormatStrFormatter('%.3f'),shrink=0.5,
-                        ticks=bounds)
-        else:
-            h = ax.hexbin(df['launch_speed'],
-                        df['launch_angle'],
-                        C=df['xslg'],
-                        gridsize=(40,25),
-                        cmap=cmap_sum,
-                        vmin=0.0,
-                        vmax=4.0,)
-            bounds=[0.0,0.5,1,1.5,2,2.5,3,3.5,4]
-            fig.colorbar(h, ax=ax, label='xSLG',format=tkr.FormatStrFormatter('%.3f'),shrink=0.5,
-                        ticks=bounds)
-        ax.set_xlabel('Launch Speed')
-        ax.set_ylabel('Launch Angle')
-        if switch:
-            ax.set_title('Exit Velocity vs Launch Angle\nExpected Weighted On Base Average (xwOBA)\nBy: @TJStats, Data:MLB')
-        else:
-            ax.set_title('Exit Velocity vs Launch Angle\nExpected Total Bases (xSLG)\nBy: @TJStats, Data:MLB')
-        ax.grid(False)
-        ax.axis('square')
-        ax.set_xlim(0, 120)
-        ax.set_ylim(-30, 90)
-        x_select = input.x_select()
-        y_select = input.y_select()
-        sns.scatterplot(x=[x_select],y=[y_select],color='#648FFF',s=50,ax=ax,edgecolor='k',zorder=100)
-        if switch:
-            xwoba_value = (xwoba_model.predict_proba([[y_select,x_select]]) @ [0, 0.883, 1.244, 1.569, 2.004])[0]
-            texts = [ax.text(x_select+3, y_select+3, f'xwOBA: {xwoba_value:.3f}', color='black', fontsize=12, weight='bold',
-                            zorder=1000, bbox=dict(facecolor='white', alpha=0.5, edgecolor='black'))]
-        else:
-            xwoba_value = (xwoba_model.predict_proba([[y_select,x_select]]) @ [0, 1, 2, 3, 4])[0]
-            texts = [ax.text(x_select+3, y_select+3, f'xSLG: {xwoba_value:.3f}', color='black', fontsize=12, weight='bold',
-                            zorder=1000, bbox=dict(facecolor='white', alpha=0.5, edgecolor='black'))]
-        adjustText.adjust_text(texts,
-                                arrowprops=dict(arrowstyle='->', color='#DC267F'),avoid_self=True,
-                                min_arrow_len =5)
-        # xwoba_value =
-        ax.axhline(y=y_select, color='k', linestyle='--',linewidth=1,alpha=0.5)
-        ax.axvline(x=x_select, color='k', linestyle='--',linewidth=1,alpha=0.5)
-        # ax.axis('square')
-app = App(app_ui, server)

 import polars as pl
+import api_scraper
+mlb_scrape = api_scraper.MLB_Scrape()
+from stuff_model import *
+from shiny import App, reactive, ui, render
+from shiny.ui import h2, tags
+from api_scraper import MLB_Scrape
+import datetime
+from stuff_model import feature_engineering as fe
+from stuff_model import stuff_apply
+from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
+theme.tabulator_site()
+scraper = MLB_Scrape()
+df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet')
+pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name']))
+app_ui = ui.page_fluid(
+    ui.card(
+        ui.card_header("2025 Spring Training Pitch Data App"),
+        ui.row(
+            ui.column(4,
+                ui.markdown("""This app generates a table which shows the 2025 Spring Training data.
+* Differences are calculated based on 2024 regular season data
+* If 2024 data does not exist for pitcher, 2023 Data is used
+* If no difference exists, the pitch is labelled as a new pitch"""),
+                ui.input_action_button(
+                    "refresh",
+                    "Refresh Data",
+                    class_="btn-primary",
+                    width="100%"
+                )
+            ),
+            ui.column(3,
+                ui.div(
+                    "By: ",
+                    ui.tags.a(
+                        "@TJStats",
+                        href="https://x.com/TJStats",
+                        target="_blank"
+                    )
+                ),
+                ui.tags.p("Data: MLB"),
+                ui.tags.p(
+                    ui.tags.a(
+                        "Support me on Patreon for more baseball content",
+                        href="https://www.patreon.com/TJ_Stats",
+                        target="_blank"
+                    )
+                )
+            )
+        ),
+        ui.navset_tab(
+            ui.nav("All Pitches",
+                output_tabulator("table_all")
+            ),
+        )
+    )
 )
+def server(input, output, session):
+    @output
+    @render_tabulator
+    @reactive.event(input.refresh)
+    def table_all():
+        import polars as pl
+        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
+        date = datetime.datetime.now().date()
+        date_str = date.strftime('%Y-%m-%d')
+        # Initialize the scraper
+        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
+                    .filter(pl.col('date') == date)['game_id'])
+        data = scraper.get_data(game_list_input)
+        df = scraper.get_data_df(data)
+        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
+        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
+        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
+        import polars as pl
+        # Compute total pitches for each pitcher
+        df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
+            pl.col("start_speed").count().alias("pitcher_total")
+        )
+        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
+            pl.col('start_speed').count().alias('count'),
+            pl.col('start_speed').mean().alias('start_speed'),
+            pl.col('ivb').mean().alias('ivb'),
+            pl.col('hb').mean().alias('hb'),
+            pl.col('release_pos_z').mean().alias('release_pos_z'),
+            pl.col('release_pos_x').mean().alias('release_pos_x'),
+            pl.col('extension').mean().alias('extension'),
+            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
+        ])
+        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
+        df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
+        # Now calculate the pitch percent for each pitcher/pitch_type combination
+        df_spring_group = df_spring_group.with_columns(
+            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
+        )
+        # Optionally, if you want the percentage of left/right-handed batters within the group:
+        df_spring_group = df_spring_group.with_columns([
+            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
+            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
+        ])
+        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
+        df_merge = df_merge.with_columns(
+            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
+        )
+        df_merge = df_merge.with_columns(
+            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
+            .then(pl.lit("TRUE"))
+            .otherwise(pl.lit(None))
+            .alias("new_pitch")
+        )
+        import polars as pl
+        # Define the columns to subtract
+        cols_to_subtract = [
+            ("start_speed", "start_speed_old"),
+            ("ivb", "ivb_old"),
+            ("hb", "hb_old"),
+            ("release_pos_z", "release_pos_z_old"),
+            ("release_pos_x", "release_pos_x_old"),
+            ("extension", "extension_old"),
+            ("tj_stuff_plus", "tj_stuff_plus_old")
+        ]
+        df_merge = df_merge.with_columns([
+            # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
+            pl.when(pl.col(old).is_null())
+            .then(pl.lit(10000))  # If old is null, assign 80 as the default
+            .otherwise(pl.col(new) - pl.col(old))  # Otherwise subtract old from new
+            .alias(new + "_diff")
+            for new, old in cols_to_subtract
+        ])
+        # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
+        df_merge = df_merge.with_columns([
+            pl.when(pl.col(new + "_diff").eq(10000))  # If diff is 80, no need to include brackets
+            .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t')  # Just return the new value as string
+            .otherwise(
+                pl.col(new).round(1).cast(pl.Utf8) +
+                "\n(" +
+                pl.col(new + "_diff").round(1)
+                    .map_elements(lambda x: f"{x:+.1f}") +
+                ")"
+            ).alias(new + "_formatted")
+            for new, _ in cols_to_subtract
+        ])
+        percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
+        df_merge = df_merge.with_columns([
+            (pl.col(col) * 100)  # Convert to percentage
+            .round(1)            # Round to 1 decimal
+            .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
+            .alias(col + "_formatted")
+            for col in percent_cols
+        ]).sort(['pitcher_id','count'],descending=True)
+        columns = [
+            { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
+            { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
+            { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
+            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
+            { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
+            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
+        ]
+        df_plot = df_merge.to_pandas()
+        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
+        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
+        return Tabulator(
+            df_plot,
+            table_options=TableOptions(
+                height=750,
+                columns=columns,
+            )
+        )
+app = App(app_ui, server)