nesticot commited on
Commit
96cb552
·
verified ·
1 Parent(s): c03466a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -128
app.py CHANGED
@@ -1,153 +1,238 @@
1
  import polars as pl
2
- import numpy as np
3
- import joblib
4
- from shiny import App, reactive, render, ui
5
- import matplotlib.pyplot as plt
6
- import matplotlib.ticker as tkr
7
- import seaborn as sns
8
- import adjustText
9
- sns.set_style('whitegrid')
10
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- import matplotlib
13
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFFFFF','#FFB000','#FE6100'])
14
 
15
- xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
16
 
17
- x = np.arange(-30,90.5,.5)
18
- y = np.arange(0,120.5,0.1)
19
 
20
- xx, yy = np.meshgrid(x, y)
21
 
22
- df = pl.DataFrame({'launch_angle': xx.ravel(), 'launch_speed': yy.ravel()})
23
 
24
- df = df.with_columns(
25
- pl.Series('xwoba', xwoba_model.predict_proba(df.select(['launch_angle','launch_speed'])) @ [0, 0.883, 1.244, 1.569, 2.004])
26
- )
 
 
 
27
 
28
- df = df.with_columns(
29
- pl.Series('xslg', xwoba_model.predict_proba(df.select(['launch_angle','launch_speed'])) @ [0, 1, 2, 3, 4])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  )
31
 
32
- app_ui = ui.page_sidebar(
33
- ui.sidebar(
34
- ui.markdown("""
35
- ### How to use this app
36
-
37
- 1. Click anywhere on the plot to select a point, or manually enter coordinates
38
- 2. The selected point's coordinates will update automatically
39
- 3. The xwOBA value will be calculated based on these coordinates
40
- """),
41
- ui.hr(),
42
- ui.input_numeric("x_select", "Launch Speed (mph)", value=110),
43
- ui.input_numeric("y_select", "Launch Angle (°)", value=30),
44
- ui.input_switch("flip_stat", "xwOBA", value=False),
45
-
46
-
47
- ),
48
- ui.output_plot("plot",width='900px',height='900px', click=True)
49
- )
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
- def server(input, output, session):
53
- # Store the coordinates in reactive values
54
- x_coord = reactive.value(110)
55
- y_coord = reactive.value(30)
56
-
57
- @reactive.effect
58
- @reactive.event(input.plot_click)
59
- def _():
60
- # Update reactive values when plot is clicked
61
- click_data = input.plot_click()
62
- if click_data is not None:
63
- x_coord.set(click_data["x"])
64
- y_coord.set(click_data["y"])
65
- # Update the numeric inputs
66
- ui.update_numeric("x_select", value=round(click_data["x"],1))
67
- ui.update_numeric("y_select", value=round(click_data["y"],1))
68
-
69
- @reactive.effect
70
- @reactive.event(input.x_select, input.y_select)
71
- def _():
72
- # Update reactive values when numeric inputs change
73
- x_coord.set(round(input.x_select(),1))
74
- y_coord.set(round(input.y_select(),1))
75
-
76
-
77
- @render.plot
78
- def plot():
79
- switch = input.flip_stat()
80
- fig, ax = plt.subplots(1, 1, figsize=(9, 9))
81
-
82
-
83
- if switch:
84
- h = ax.hexbin(df['launch_speed'],
85
- df['launch_angle'],
86
- C=df['xwoba'],
87
- gridsize=(40,25),
88
- cmap=cmap_sum,
89
- vmin=0.0,
90
- vmax=2.0,)
91
- bounds=[0.0,0.4,0.8,1.2,1.6,2.0]
92
- fig.colorbar(h, ax=ax, label='xwOBA',format=tkr.FormatStrFormatter('%.3f'),shrink=0.5,
93
- ticks=bounds)
94
-
95
- else:
96
- h = ax.hexbin(df['launch_speed'],
97
- df['launch_angle'],
98
- C=df['xslg'],
99
- gridsize=(40,25),
100
- cmap=cmap_sum,
101
- vmin=0.0,
102
- vmax=4.0,)
103
- bounds=[0.0,0.5,1,1.5,2,2.5,3,3.5,4]
104
- fig.colorbar(h, ax=ax, label='xSLG',format=tkr.FormatStrFormatter('%.3f'),shrink=0.5,
105
- ticks=bounds)
106
-
107
-
108
-
109
- ax.set_xlabel('Launch Speed')
110
- ax.set_ylabel('Launch Angle')
111
- if switch:
112
- ax.set_title('Exit Velocity vs Launch Angle\nExpected Weighted On Base Average (xwOBA)\nBy: @TJStats, Data:MLB')
113
- else:
114
- ax.set_title('Exit Velocity vs Launch Angle\nExpected Total Bases (xSLG)\nBy: @TJStats, Data:MLB')
115
-
116
- ax.grid(False)
117
- ax.axis('square')
118
- ax.set_xlim(0, 120)
119
- ax.set_ylim(-30, 90)
120
 
121
- x_select = input.x_select()
122
- y_select = input.y_select()
123
 
 
 
 
 
124
 
125
- sns.scatterplot(x=[x_select],y=[y_select],color='#648FFF',s=50,ax=ax,edgecolor='k',zorder=100)
 
 
 
 
 
 
 
 
 
 
 
126
 
127
-
128
- if switch:
129
- xwoba_value = (xwoba_model.predict_proba([[y_select,x_select]]) @ [0, 0.883, 1.244, 1.569, 2.004])[0]
130
- texts = [ax.text(x_select+3, y_select+3, f'xwOBA: {xwoba_value:.3f}', color='black', fontsize=12, weight='bold',
131
- zorder=1000, bbox=dict(facecolor='white', alpha=0.5, edgecolor='black'))]
132
-
133
- else:
134
- xwoba_value = (xwoba_model.predict_proba([[y_select,x_select]]) @ [0, 1, 2, 3, 4])[0]
135
- texts = [ax.text(x_select+3, y_select+3, f'xSLG: {xwoba_value:.3f}', color='black', fontsize=12, weight='bold',
136
- zorder=1000, bbox=dict(facecolor='white', alpha=0.5, edgecolor='black'))]
137
-
138
 
 
 
 
 
139
 
 
 
 
 
 
140
 
141
- adjustText.adjust_text(texts,
142
 
143
- arrowprops=dict(arrowstyle='->', color='#DC267F'),avoid_self=True,
144
- min_arrow_len =5)
145
- # xwoba_value =
146
 
147
- ax.axhline(y=y_select, color='k', linestyle='--',linewidth=1,alpha=0.5)
148
- ax.axvline(x=x_select, color='k', linestyle='--',linewidth=1,alpha=0.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
- # ax.axis('square')
151
 
152
 
153
- app = App(app_ui, server)
 
1
  import polars as pl
2
+ import api_scraper
3
+ mlb_scrape = api_scraper.MLB_Scrape()
 
 
 
 
 
 
4
 
5
+ from stuff_model import *
6
+ from shiny import App, reactive, ui, render
7
+ from shiny.ui import h2, tags
8
+ from api_scraper import MLB_Scrape
9
+ import datetime
10
+ from stuff_model import feature_engineering as fe
11
+ from stuff_model import stuff_apply
12
+ from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
13
+ theme.tabulator_site()
14
+ scraper = MLB_Scrape()
15
 
16
+ df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet')
 
17
 
18
+ pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name']))
19
 
 
 
20
 
 
21
 
 
22
 
23
+ app_ui = ui.page_fluid(
24
+ ui.card(
25
+ ui.card_header("2025 Spring Training Pitch Data App"),
26
+ ui.row(
27
+ ui.column(4,
28
+ ui.markdown("""This app generates a table which shows the 2025 Spring Training data.
29
 
30
+ * Differences are calculated based on 2024 regular season data
31
+ * If 2024 data does not exist for pitcher, 2023 Data is used
32
+ * If no difference exists, the pitch is labelled as a new pitch"""),
33
+
34
+
35
+ ui.input_action_button(
36
+ "refresh",
37
+ "Refresh Data",
38
+ class_="btn-primary",
39
+ width="100%"
40
+ )
41
+ ),
42
+ ui.column(3,
43
+ ui.div(
44
+ "By: ",
45
+ ui.tags.a(
46
+ "@TJStats",
47
+ href="https://x.com/TJStats",
48
+ target="_blank"
49
+ )
50
+ ),
51
+ ui.tags.p("Data: MLB"),
52
+ ui.tags.p(
53
+ ui.tags.a(
54
+ "Support me on Patreon for more baseball content",
55
+ href="https://www.patreon.com/TJ_Stats",
56
+ target="_blank"
57
+ )
58
+ )
59
+ )
60
+ ),
61
+ ui.navset_tab(
62
+ ui.nav("All Pitches",
63
+ output_tabulator("table_all")
64
+ ),
65
+ )
66
+ )
67
  )
68
 
69
+ def server(input, output, session):
70
+ @output
71
+ @render_tabulator
72
+ @reactive.event(input.refresh)
73
+ def table_all():
74
+
75
+ import polars as pl
76
+ df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
77
+
78
+
79
+ date = datetime.datetime.now().date()
80
+ date_str = date.strftime('%Y-%m-%d')
81
+ # Initialize the scraper
82
+
83
+
84
+ game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
85
+ .filter(pl.col('date') == date)['game_id'])
86
+
87
+ data = scraper.get_data(game_list_input)
88
+ df = scraper.get_data_df(data)
89
+
90
+ df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
91
+
92
+
93
+
94
+ # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
95
+ # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
96
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
97
 
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ import polars as pl
 
101
 
102
+ # Compute total pitches for each pitcher
103
+ df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
104
+ pl.col("start_speed").count().alias("pitcher_total")
105
+ )
106
 
107
+ df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
108
+ pl.col('start_speed').count().alias('count'),
109
+ pl.col('start_speed').mean().alias('start_speed'),
110
+ pl.col('ivb').mean().alias('ivb'),
111
+ pl.col('hb').mean().alias('hb'),
112
+ pl.col('release_pos_z').mean().alias('release_pos_z'),
113
+ pl.col('release_pos_x').mean().alias('release_pos_x'),
114
+ pl.col('extension').mean().alias('extension'),
115
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
116
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
117
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
118
+ ])
119
 
120
+ # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
121
+ df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
 
 
 
 
 
 
 
 
 
122
 
123
+ # Now calculate the pitch percent for each pitcher/pitch_type combination
124
+ df_spring_group = df_spring_group.with_columns(
125
+ (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
126
+ )
127
 
128
+ # Optionally, if you want the percentage of left/right-handed batters within the group:
129
+ df_spring_group = df_spring_group.with_columns([
130
+ (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
131
+ (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
132
+ ])
133
 
134
+ df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
135
 
 
 
 
136
 
137
+ df_merge = df_merge.with_columns(
138
+ pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
139
+ )
140
+
141
+ df_merge = df_merge.with_columns(
142
+ pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
143
+ .then(pl.lit("TRUE"))
144
+ .otherwise(pl.lit(None))
145
+ .alias("new_pitch")
146
+ )
147
+
148
+ import polars as pl
149
+
150
+ # Define the columns to subtract
151
+ cols_to_subtract = [
152
+ ("start_speed", "start_speed_old"),
153
+ ("ivb", "ivb_old"),
154
+ ("hb", "hb_old"),
155
+ ("release_pos_z", "release_pos_z_old"),
156
+ ("release_pos_x", "release_pos_x_old"),
157
+ ("extension", "extension_old"),
158
+ ("tj_stuff_plus", "tj_stuff_plus_old")
159
+ ]
160
+
161
+ df_merge = df_merge.with_columns([
162
+ # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
163
+ pl.when(pl.col(old).is_null())
164
+ .then(pl.lit(10000)) # If old is null, assign 80 as the default
165
+ .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
166
+ .alias(new + "_diff")
167
+ for new, old in cols_to_subtract
168
+ ])
169
+
170
+ # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
171
+ df_merge = df_merge.with_columns([
172
+ pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
173
+ .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
174
+ .otherwise(
175
+ pl.col(new).round(1).cast(pl.Utf8) +
176
+ "\n(" +
177
+ pl.col(new + "_diff").round(1)
178
+ .map_elements(lambda x: f"{x:+.1f}") +
179
+ ")"
180
+ ).alias(new + "_formatted")
181
+ for new, _ in cols_to_subtract
182
+ ])
183
+
184
+
185
+
186
+
187
+
188
+
189
+ percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
190
+
191
+ df_merge = df_merge.with_columns([
192
+ (pl.col(col) * 100) # Convert to percentage
193
+ .round(1) # Round to 1 decimal
194
+ .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
195
+ .alias(col + "_formatted")
196
+ for col in percent_cols
197
+ ]).sort(['pitcher_id','count'],descending=True)
198
+
199
+
200
+ columns = [
201
+ { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
202
+ { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
203
+ { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
204
+ { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
205
+ { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
206
+ { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
207
+ { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
208
+ { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
209
+ { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
210
+ { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
211
+ { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
212
+ { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
213
+ { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
214
+ { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
215
+ { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
216
+ ]
217
+
218
+
219
+ df_plot = df_merge.to_pandas()
220
+
221
+ team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
222
+ df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
223
+
224
+
225
+
226
+ return Tabulator(
227
+ df_plot,
228
+
229
+ table_options=TableOptions(
230
+ height=750,
231
+
232
+ columns=columns,
233
+ )
234
+ )
235
 
 
236
 
237
 
238
+ app = App(app_ui, server)