nesticot commited on
Commit
8dab285
·
verified ·
1 Parent(s): 6781a47

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +596 -587
  2. pitch_data_agg_2024.parquet +2 -2
app.py CHANGED
@@ -1,587 +1,596 @@
1
- import polars as pl
2
- import api_scraper
3
- mlb_scrape = api_scraper.MLB_Scrape()
4
-
5
- from stuff_model import *
6
- from shiny import App, reactive, ui, render
7
- from shiny.ui import h2, tags
8
- from api_scraper import MLB_Scrape
9
- import datetime
10
- from stuff_model import feature_engineering as fe
11
- from stuff_model import stuff_apply
12
- from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
13
- theme.tabulator_site()
14
- scraper = MLB_Scrape()
15
-
16
- df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet')
17
-
18
- pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name']))
19
-
20
-
21
-
22
-
23
- app_ui = ui.page_fluid(
24
- ui.card(
25
- ui.card_header("2025 Spring Training Pitch Data App"),
26
- ui.row(
27
- ui.column(4,
28
- ui.markdown("""This app generates a table which shows the 2025 Spring Training data.
29
-
30
- * Differences are calculated based on 2024 regular season data
31
- * If 2024 data does not exist for pitcher, 2023 Data is used
32
- * If no difference exists, the pitch is labelled as a new pitch"""),
33
-
34
-
35
- ui.input_action_button(
36
- "refresh",
37
- "Refresh Data",
38
- class_="btn-primary",
39
- width="100%"
40
- )
41
- ),
42
- ui.column(3,
43
- ui.div(
44
- "By: ",
45
- ui.tags.a(
46
- "@TJStats",
47
- href="https://x.com/TJStats",
48
- target="_blank"
49
- )
50
- ),
51
- ui.tags.p("Data: MLB"),
52
- ui.tags.p(
53
- ui.tags.a(
54
- "Support me on Patreon for more baseball content",
55
- href="https://www.patreon.com/TJ_Stats",
56
- target="_blank"
57
- )
58
- )
59
- )
60
- ),
61
- ui.navset_tab(
62
- ui.nav("All Pitches",
63
- output_tabulator("table_all")
64
- ),
65
- ui.nav("Daily Pitches",
66
- output_tabulator("table_daily")
67
- ),
68
- ui.nav("tjStuff+",
69
- output_tabulator("table_tjstuff")
70
- ),
71
- )
72
- )
73
- )
74
-
75
- def server(input, output, session):
76
- @output
77
- @render_tabulator
78
- @reactive.event(input.refresh)
79
- def table_all():
80
-
81
- import polars as pl
82
- df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
83
-
84
-
85
- date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
86
- print(datetime.datetime.now())
87
- date_str = date.strftime('%Y-%m-%d')
88
- # Initialize the scraper
89
-
90
-
91
- game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
92
- .filter(pl.col('date') == date)['game_id'])
93
-
94
- data = scraper.get_data(game_list_input)
95
- df = scraper.get_data_df(data)
96
-
97
- df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
98
-
99
-
100
-
101
- # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
102
- # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
103
- df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
104
-
105
-
106
-
107
- import polars as pl
108
-
109
- # Compute total pitches for each pitcher
110
- df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
111
- pl.col("start_speed").count().alias("pitcher_total")
112
- )
113
-
114
- df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
115
- pl.col('start_speed').count().alias('count'),
116
- pl.col('start_speed').mean().alias('start_speed'),
117
- pl.col('ivb').mean().alias('ivb'),
118
- pl.col('hb').mean().alias('hb'),
119
- pl.col('release_pos_z').mean().alias('release_pos_z'),
120
- pl.col('release_pos_x').mean().alias('release_pos_x'),
121
- pl.col('extension').mean().alias('extension'),
122
- pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
123
- (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
124
- (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
125
- ])
126
-
127
- # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
128
- df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
129
-
130
- # Now calculate the pitch percent for each pitcher/pitch_type combination
131
- df_spring_group = df_spring_group.with_columns(
132
- (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
133
- )
134
-
135
- # Optionally, if you want the percentage of left/right-handed batters within the group:
136
- df_spring_group = df_spring_group.with_columns([
137
- (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
138
- (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
139
- ])
140
-
141
- df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
142
-
143
-
144
- df_merge = df_merge.with_columns(
145
- pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
146
- )
147
-
148
- df_merge = df_merge.with_columns(
149
- pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
150
- .then(pl.lit("TRUE"))
151
- .otherwise(pl.lit(None))
152
- .alias("new_pitch")
153
- )
154
-
155
- import polars as pl
156
-
157
- # Define the columns to subtract
158
- cols_to_subtract = [
159
- ("start_speed", "start_speed_old"),
160
- ("ivb", "ivb_old"),
161
- ("hb", "hb_old"),
162
- ("release_pos_z", "release_pos_z_old"),
163
- ("release_pos_x", "release_pos_x_old"),
164
- ("extension", "extension_old"),
165
- ("tj_stuff_plus", "tj_stuff_plus_old")
166
- ]
167
-
168
- df_merge = df_merge.with_columns([
169
- # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
170
- pl.when(pl.col(old).is_null())
171
- .then(pl.lit(10000)) # If old is null, assign 80 as the default
172
- .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
173
- .alias(new + "_diff")
174
- for new, old in cols_to_subtract
175
- ])
176
-
177
- # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
178
- df_merge = df_merge.with_columns([
179
- pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
180
- .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
181
- .otherwise(
182
- pl.col(new).round(1).cast(pl.Utf8) +
183
- "\n(" +
184
- pl.col(new + "_diff").round(1)
185
- .map_elements(lambda x: f"{x:+.1f}") +
186
- ")"
187
- ).alias(new + "_formatted")
188
- for new, _ in cols_to_subtract
189
- ])
190
-
191
-
192
-
193
-
194
-
195
-
196
- percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
197
-
198
- df_merge = df_merge.with_columns([
199
- (pl.col(col) * 100) # Convert to percentage
200
- .round(1) # Round to 1 decimal
201
- .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
202
- .alias(col + "_formatted")
203
- for col in percent_cols
204
- ]).sort(['pitcher_id','count'],descending=True)
205
-
206
-
207
- columns = [
208
- { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
209
- { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
210
- { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
211
- { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
212
- { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input","contextMenu":True},
213
- { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
214
- { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
215
- { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
216
- { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
217
- { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
218
- { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
219
- { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
220
- { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
221
- { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
222
- { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
223
- ]
224
-
225
-
226
- df_plot = df_merge.to_pandas()
227
-
228
- team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
229
- df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
230
-
231
-
232
-
233
- return Tabulator(
234
- df_plot,
235
-
236
- table_options=TableOptions(
237
- height=750,
238
-
239
- columns=columns,
240
- )
241
- )
242
-
243
-
244
- @output
245
- @render_tabulator
246
- @reactive.event(input.refresh)
247
- def table_daily():
248
-
249
- import polars as pl
250
- df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
251
-
252
-
253
- import datetime
254
-
255
- date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
256
- print(datetime.datetime.now())
257
-
258
- date_str = date.strftime('%Y-%m-%d')
259
- # Initialize the scraper
260
-
261
-
262
- game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
263
- .filter(pl.col('date') == date)['game_id'])
264
-
265
- data = scraper.get_data(game_list_input)
266
- df = scraper.get_data_df(data)
267
-
268
- df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
269
-
270
-
271
-
272
- # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
273
- # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
274
- df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
275
-
276
-
277
-
278
- import polars as pl
279
-
280
- # Compute total pitches for each pitcher
281
- df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id",'game_id','game_date']).agg(
282
- pl.col("start_speed").count().alias("pitcher_total")
283
- )
284
-
285
- df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type','game_id','game_date']).agg([
286
- pl.col('start_speed').count().alias('count'),
287
- pl.col('start_speed').mean().alias('start_speed'),
288
- pl.col('ivb').mean().alias('ivb'),
289
- pl.col('hb').mean().alias('hb'),
290
- pl.col('release_pos_z').mean().alias('release_pos_z'),
291
- pl.col('release_pos_x').mean().alias('release_pos_x'),
292
- pl.col('extension').mean().alias('extension'),
293
- pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
294
- (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
295
- (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
296
- ])
297
-
298
- # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
299
- df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id",'game_id','game_date'], how="left")
300
-
301
- # Now calculate the pitch percent for each pitcher/pitch_type combination
302
- df_spring_group = df_spring_group.with_columns(
303
- (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
304
- )
305
-
306
- # Optionally, if you want the percentage of left/right-handed batters within the group:
307
- df_spring_group = df_spring_group.with_columns([
308
- (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
309
- (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
310
- ])
311
-
312
- df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
313
-
314
-
315
- df_merge = df_merge.with_columns(
316
- pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
317
- )
318
-
319
- df_merge = df_merge.with_columns(
320
- pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
321
- .then(pl.lit("TRUE"))
322
- .otherwise(pl.lit(None))
323
- .alias("new_pitch")
324
- )
325
-
326
- import polars as pl
327
-
328
- # Define the columns to subtract
329
- cols_to_subtract = [
330
- ("start_speed", "start_speed_old"),
331
- ("ivb", "ivb_old"),
332
- ("hb", "hb_old"),
333
- ("release_pos_z", "release_pos_z_old"),
334
- ("release_pos_x", "release_pos_x_old"),
335
- ("extension", "extension_old"),
336
- ("tj_stuff_plus", "tj_stuff_plus_old")
337
- ]
338
-
339
- df_merge = df_merge.with_columns([
340
- # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
341
- pl.when(pl.col(old).is_null())
342
- .then(pl.lit(10000)) # If old is null, assign 80 as the default
343
- .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
344
- .alias(new + "_diff")
345
- for new, old in cols_to_subtract
346
- ])
347
-
348
- # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
349
- df_merge = df_merge.with_columns([
350
- pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
351
- .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
352
- .otherwise(
353
- pl.col(new).round(1).cast(pl.Utf8) +
354
- "\n(" +
355
- pl.col(new + "_diff").round(1)
356
- .map_elements(lambda x: f"{x:+.1f}") +
357
- ")"
358
- ).alias(new + "_formatted")
359
- for new, _ in cols_to_subtract
360
- ])
361
-
362
-
363
-
364
-
365
-
366
-
367
- percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
368
-
369
- df_merge = df_merge.with_columns([
370
- (pl.col(col) * 100) # Convert to percentage
371
- .round(1) # Round to 1 decimal
372
- .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
373
- .alias(col + "_formatted")
374
- for col in percent_cols
375
- ]).sort(['pitcher_id','count'],descending=True)
376
-
377
-
378
- columns = [
379
- { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
380
- { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
381
- { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
382
- { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
383
- { "title": "Date", "field": "game_date", "width": 100, "headerFilter":"input" ,"frozen":True,},
384
- { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
385
- { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
386
- { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
387
- { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
388
- { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
389
- { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
390
- { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
391
- { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
392
- { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
393
- { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
394
- { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
395
- ]
396
-
397
-
398
- df_plot = df_merge.to_pandas()
399
-
400
- team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
401
- df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
402
-
403
-
404
-
405
- return Tabulator(
406
- df_plot,
407
-
408
- table_options=TableOptions(
409
- height=750,
410
-
411
- columns=columns,
412
- )
413
- )
414
-
415
- @output
416
- @render_tabulator
417
- @reactive.event(input.refresh)
418
- def table_tjstuff():
419
-
420
- import polars as pl
421
- df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
422
-
423
-
424
- import datetime
425
-
426
- date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
427
- print(datetime.datetime.now())
428
-
429
- date_str = date.strftime('%Y-%m-%d')
430
- # Initialize the scraper
431
-
432
-
433
- game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
434
- .filter(pl.col('date') == date)['game_id'])
435
-
436
- data = scraper.get_data(game_list_input)
437
- df = scraper.get_data_df(data)
438
-
439
- df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
440
-
441
-
442
-
443
- # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
444
- # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
445
- df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
446
-
447
-
448
-
449
- import polars as pl
450
-
451
- # Compute total pitches for each pitcher
452
- df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
453
- pl.col("start_speed").count().alias("pitcher_total")
454
- )
455
-
456
- df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
457
- pl.col('start_speed').count().alias('count'),
458
- pl.col('start_speed').mean().alias('start_speed'),
459
- pl.col('ivb').mean().alias('ivb'),
460
- pl.col('hb').mean().alias('hb'),
461
- pl.col('release_pos_z').mean().alias('release_pos_z'),
462
- pl.col('release_pos_x').mean().alias('release_pos_x'),
463
- pl.col('extension').mean().alias('extension'),
464
- pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
465
- (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
466
- (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
467
- ])
468
-
469
- # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
470
- df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
471
-
472
- # Now calculate the pitch percent for each pitcher/pitch_type combination
473
- df_spring_group = df_spring_group.with_columns(
474
- (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
475
- )
476
-
477
- # Optionally, if you want the percentage of left/right-handed batters within the group:
478
- df_spring_group = df_spring_group.with_columns([
479
- (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
480
- (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
481
- ])
482
-
483
- df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
484
-
485
-
486
- df_merge = df_merge.with_columns(
487
- pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
488
- )
489
-
490
- df_merge = df_merge.with_columns(
491
- pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
492
- .then(pl.lit("TRUE"))
493
- .otherwise(pl.lit(None))
494
- .alias("new_pitch")
495
- )
496
-
497
- import polars as pl
498
-
499
- # Define the columns to subtract
500
- cols_to_subtract = [
501
- ("start_speed", "start_speed_old"),
502
- ("ivb", "ivb_old"),
503
- ("hb", "hb_old"),
504
- ("release_pos_z", "release_pos_z_old"),
505
- ("release_pos_x", "release_pos_x_old"),
506
- ("extension", "extension_old"),
507
- ("tj_stuff_plus", "tj_stuff_plus_old")
508
- ]
509
-
510
- df_merge = df_merge.with_columns([
511
- # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
512
- pl.when(pl.col(old).is_null())
513
- .then(pl.lit(None)) # If old is null, assign 80 as the default
514
- .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
515
- .alias(new + "_diff")
516
- for new, old in cols_to_subtract
517
- ])
518
-
519
- # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
520
- # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
521
- df_merge = df_merge.with_columns([
522
-
523
- pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted")
524
- for new, _ in cols_to_subtract
525
- ])
526
-
527
-
528
-
529
- df_merge = df_merge.with_columns([
530
- pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"),
531
- pl.col("tj_stuff_plus_diff").round(1).map_elements(lambda x: f"{x:+.1f}").alias("tj_stuff_plus_diff")
532
- ])
533
-
534
-
535
-
536
- percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
537
-
538
- df_merge = df_merge.with_columns([
539
- (pl.col(col) * 100) # Convert to percentage
540
- .round(1) # Round to 1 decimal
541
- .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
542
- .alias(col + "_formatted")
543
- for col in percent_cols
544
- ]).sort(['pitcher_id','count'],descending=True)
545
-
546
-
547
-
548
-
549
- columns = [
550
- { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
551
- { "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,},
552
- { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
553
- { "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
554
- { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
555
- { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
556
- { "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"},
557
- { "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"},
558
- { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
559
- { "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
560
- { "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
561
- { "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
562
- { "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
563
- { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
564
- { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
565
- { "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" },
566
- { "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" }
567
- ]
568
-
569
-
570
- df_plot = df_merge.sort(['pitcher_id','count'],descending=True).to_pandas()
571
-
572
- team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
573
- df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
574
-
575
-
576
-
577
- return Tabulator(
578
- df_plot,
579
-
580
- table_options=TableOptions(
581
- height=750,
582
-
583
- columns=columns,
584
- )
585
- )
586
-
587
- app = App(app_ui, server)
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import api_scraper
3
+ mlb_scrape = api_scraper.MLB_Scrape()
4
+
5
+ from stuff_model import *
6
+ from shiny import App, reactive, ui, render
7
+ from shiny.ui import h2, tags
8
+ from api_scraper import MLB_Scrape
9
+ import datetime
10
+ from stuff_model import feature_engineering as fe
11
+ from stuff_model import stuff_apply
12
+ from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
13
+ theme.tabulator_site()
14
+ scraper = MLB_Scrape()
15
+
16
+ df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet')
17
+
18
+ pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name']))
19
+
20
+
21
+
22
+
23
+ app_ui = ui.page_fluid(
24
+ ui.card(
25
+ ui.card_header("2025 Spring Training Pitch Data App"),
26
+ ui.row(
27
+ ui.column(4,
28
+ ui.markdown("""This app generates a table which shows the 2025 Spring Training data.
29
+
30
+ * Differences are calculated based on 2024 regular season data
31
+ * If 2024 data does not exist for pitcher, 2023 Data is used
32
+ * If no difference exists, the pitch is labelled as a new pitch"""),
33
+
34
+
35
+ ui.input_action_button(
36
+ "refresh",
37
+ "Refresh Data",
38
+ class_="btn-primary",
39
+ width="100%"
40
+ )
41
+ ),
42
+ ui.column(3,
43
+ ui.div(
44
+ "By: ",
45
+ ui.tags.a(
46
+ "@TJStats",
47
+ href="https://x.com/TJStats",
48
+ target="_blank"
49
+ )
50
+ ),
51
+ ui.tags.p("Data: MLB"),
52
+ ui.tags.p(
53
+ ui.tags.a(
54
+ "Support me on Patreon for more baseball content",
55
+ href="https://www.patreon.com/TJ_Stats",
56
+ target="_blank"
57
+ )
58
+ )
59
+ )
60
+ ),
61
+ ui.navset_tab(
62
+ ui.nav("All Pitches",
63
+ output_tabulator("table_all")
64
+ ),
65
+ ui.nav("Daily Pitches",
66
+ output_tabulator("table_daily")
67
+ ),
68
+ ui.nav("tjStuff+",
69
+ output_tabulator("table_tjstuff")
70
+ ),
71
+ )
72
+ )
73
+ )
74
+
75
+ def server(input, output, session):
76
+ @output
77
+ @render_tabulator
78
+ @reactive.event(input.refresh)
79
+ def table_all():
80
+
81
+ import polars as pl
82
+ df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
83
+
84
+
85
+ date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
86
+ print(datetime.datetime.now())
87
+ date_str = date.strftime('%Y-%m-%d')
88
+ # Initialize the scraper
89
+
90
+
91
+ game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
92
+ .filter(pl.col('date') == date)['game_id'])
93
+
94
+ data = scraper.get_data(game_list_input)
95
+ df = scraper.get_data_df(data)
96
+
97
+ df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
98
+
99
+
100
+
101
+ # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
102
+ # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
103
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
104
+
105
+
106
+
107
+ import polars as pl
108
+
109
+ # Compute total pitches for each pitcher
110
+ df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
111
+ pl.col("start_speed").count().alias("pitcher_total")
112
+ )
113
+
114
+ df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
115
+ pl.col('start_speed').count().alias('count'),
116
+ pl.col('start_speed').mean().alias('start_speed'),
117
+ pl.col('start_speed').max().alias('max_start_speed'),
118
+ pl.col('ivb').mean().alias('ivb'),
119
+ pl.col('hb').mean().alias('hb'),
120
+ pl.col('release_pos_z').mean().alias('release_pos_z'),
121
+ pl.col('release_pos_x').mean().alias('release_pos_x'),
122
+ pl.col('extension').mean().alias('extension'),
123
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
124
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
125
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
126
+ ])
127
+
128
+ # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
129
+ df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
130
+
131
+ # Now calculate the pitch percent for each pitcher/pitch_type combination
132
+ df_spring_group = df_spring_group.with_columns(
133
+ (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
134
+ )
135
+
136
+ # Optionally, if you want the percentage of left/right-handed batters within the group:
137
+ df_spring_group = df_spring_group.with_columns([
138
+ (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
139
+ (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
140
+ ])
141
+
142
+ df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')
143
+
144
+
145
+ df_merge = df_merge.with_columns(
146
+ pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
147
+ )
148
+
149
+ df_merge = df_merge.with_columns(
150
+ pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
151
+ .then(pl.lit(True))
152
+ .otherwise(pl.lit(None))
153
+ .alias("new_pitch")
154
+ )
155
+
156
+ import polars as pl
157
+
158
+ # Define the columns to subtract
159
+ cols_to_subtract = [
160
+ ("start_speed", "start_speed_old"),
161
+ ("max_start_speed", "max_start_speed_old"),
162
+ ("ivb", "ivb_old"),
163
+ ("hb", "hb_old"),
164
+ ("release_pos_z", "release_pos_z_old"),
165
+ ("release_pos_x", "release_pos_x_old"),
166
+ ("extension", "extension_old"),
167
+ ("tj_stuff_plus", "tj_stuff_plus_old")
168
+ ]
169
+
170
+ df_merge = df_merge.with_columns([
171
+ # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
172
+ pl.when(pl.col(old).is_null())
173
+ .then(pl.lit(10000)) # If old is null, assign 80 as the default
174
+ .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
175
+ .alias(new + "_diff")
176
+ for new, old in cols_to_subtract
177
+ ])
178
+
179
+ # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
180
+ df_merge = df_merge.with_columns([
181
+ pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
182
+ .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
183
+ .otherwise(
184
+ pl.col(new).round(1).cast(pl.Utf8) +
185
+ "\n(" +
186
+ pl.col(new + "_diff").round(1)
187
+ .map_elements(lambda x: f"{x:+.1f}") +
188
+ ")"
189
+ ).alias(new + "_formatted")
190
+ for new, _ in cols_to_subtract
191
+ ])
192
+
193
+
194
+
195
+
196
+
197
+
198
+ percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
199
+
200
+ df_merge = df_merge.with_columns([
201
+ (pl.col(col) * 100) # Convert to percentage
202
+ .round(1) # Round to 1 decimal
203
+ .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
204
+ .alias(col + "_formatted")
205
+ for col in percent_cols
206
+ ]).sort(['pitcher_id','count'],descending=True)
207
+
208
+
209
+ columns = [
210
+ { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
211
+ { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
212
+ { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
213
+ { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
214
+ { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input","contextMenu":True},
215
+ { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
216
+ { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
217
+ { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
218
+ { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
219
+ { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
220
+ { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
221
+ { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
222
+ { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
223
+ { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
224
+ { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
225
+ { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
226
+ ]
227
+
228
+
229
+ df_plot = df_merge.to_pandas()
230
+
231
+ team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
232
+ df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
233
+
234
+
235
+
236
+ return Tabulator(
237
+ df_plot,
238
+
239
+ table_options=TableOptions(
240
+ height=750,
241
+
242
+ columns=columns,
243
+ )
244
+ )
245
+
246
+
247
+ @output
248
+ @render_tabulator
249
+ @reactive.event(input.refresh)
250
+ def table_daily():
251
+
252
+ import polars as pl
253
+ df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
254
+
255
+
256
+ import datetime
257
+
258
+ date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
259
+ print(datetime.datetime.now())
260
+
261
+ date_str = date.strftime('%Y-%m-%d')
262
+ # Initialize the scraper
263
+
264
+
265
+ game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
266
+ .filter(pl.col('date') == date)['game_id'])
267
+
268
+ data = scraper.get_data(game_list_input)
269
+ df = scraper.get_data_df(data)
270
+
271
+ df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
272
+
273
+
274
+
275
+ # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
276
+ # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
277
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
278
+
279
+
280
+
281
+ import polars as pl
282
+
283
+ # Compute total pitches for each pitcher
284
+ df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id",'game_id','game_date']).agg(
285
+ pl.col("start_speed").count().alias("pitcher_total")
286
+ )
287
+
288
+ df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type','game_id','game_date']).agg([
289
+ pl.col('start_speed').count().alias('count'),
290
+ pl.col('start_speed').mean().alias('start_speed'),
291
+ pl.col('start_speed').max().alias('max_start_speed'),
292
+ pl.col('ivb').mean().alias('ivb'),
293
+ pl.col('hb').mean().alias('hb'),
294
+ pl.col('release_pos_z').mean().alias('release_pos_z'),
295
+ pl.col('release_pos_x').mean().alias('release_pos_x'),
296
+ pl.col('extension').mean().alias('extension'),
297
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
298
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
299
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
300
+ ])
301
+
302
+ # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
303
+ df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id",'game_id','game_date'], how="left")
304
+
305
+ # Now calculate the pitch percent for each pitcher/pitch_type combination
306
+ df_spring_group = df_spring_group.with_columns(
307
+ (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
308
+ )
309
+
310
+ # Optionally, if you want the percentage of left/right-handed batters within the group:
311
+ df_spring_group = df_spring_group.with_columns([
312
+ (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
313
+ (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
314
+ ])
315
+
316
+ df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')
317
+
318
+
319
+ df_merge = df_merge.with_columns(
320
+ pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
321
+ )
322
+
323
+ df_merge = df_merge.with_columns(
324
+ pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
325
+ .then(pl.lit(True))
326
+ .otherwise(pl.lit(None))
327
+ .alias("new_pitch")
328
+ )
329
+
330
+ import polars as pl
331
+
332
+ # Define the columns to subtract
333
+ cols_to_subtract = [
334
+ ("start_speed", "start_speed_old"),
335
+ ("max_start_speed", "max_start_speed_old"),
336
+ ("ivb", "ivb_old"),
337
+ ("hb", "hb_old"),
338
+ ("release_pos_z", "release_pos_z_old"),
339
+ ("release_pos_x", "release_pos_x_old"),
340
+ ("extension", "extension_old"),
341
+ ("tj_stuff_plus", "tj_stuff_plus_old")
342
+ ]
343
+
344
+ df_merge = df_merge.with_columns([
345
+ # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
346
+ pl.when(pl.col(old).is_null())
347
+ .then(pl.lit(10000)) # If old is null, assign 80 as the default
348
+ .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
349
+ .alias(new + "_diff")
350
+ for new, old in cols_to_subtract
351
+ ])
352
+
353
+ # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
354
+ df_merge = df_merge.with_columns([
355
+ pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
356
+ .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
357
+ .otherwise(
358
+ pl.col(new).round(1).cast(pl.Utf8) +
359
+ "\n(" +
360
+ pl.col(new + "_diff").round(1)
361
+ .map_elements(lambda x: f"{x:+.1f}") +
362
+ ")"
363
+ ).alias(new + "_formatted")
364
+ for new, _ in cols_to_subtract
365
+ ])
366
+
367
+
368
+
369
+
370
+
371
+
372
+ percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
373
+
374
+ df_merge = df_merge.with_columns([
375
+ (pl.col(col) * 100) # Convert to percentage
376
+ .round(1) # Round to 1 decimal
377
+ .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
378
+ .alias(col + "_formatted")
379
+ for col in percent_cols
380
+ ]).sort(['pitcher_id','count'],descending=True)
381
+
382
+
383
+ columns = [
384
+ { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
385
+ { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
386
+ { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
387
+ { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
388
+ { "title": "Date", "field": "game_date", "width": 100, "headerFilter":"input" ,"frozen":True,},
389
+ { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
390
+ { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
391
+ { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
392
+ { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
393
+ { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
394
+ { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
395
+ { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
396
+ { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
397
+ { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
398
+ { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
399
+ { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
400
+ { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
401
+ ]
402
+
403
+
404
+ df_plot = df_merge.to_pandas()
405
+
406
+ team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
407
+ df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
408
+
409
+
410
+
411
+ return Tabulator(
412
+ df_plot,
413
+
414
+ table_options=TableOptions(
415
+ height=750,
416
+
417
+ columns=columns,
418
+ )
419
+ )
420
+
421
+ @output
422
+ @render_tabulator
423
+ @reactive.event(input.refresh)
424
+ def table_tjstuff():
425
+
426
+ import polars as pl
427
+ df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
428
+
429
+
430
+ import datetime
431
+
432
+ date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
433
+ print(datetime.datetime.now())
434
+
435
+ date_str = date.strftime('%Y-%m-%d')
436
+ # Initialize the scraper
437
+
438
+
439
+ game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
440
+ .filter(pl.col('date') == date)['game_id'])
441
+
442
+ data = scraper.get_data(game_list_input)
443
+ df = scraper.get_data_df(data)
444
+
445
+ df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
446
+
447
+
448
+
449
+ # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
450
+ # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
451
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
452
+
453
+
454
+
455
+ import polars as pl
456
+
457
+ # Compute total pitches for each pitcher
458
+ df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
459
+ pl.col("start_speed").count().alias("pitcher_total")
460
+ )
461
+
462
+ df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
463
+ pl.col('start_speed').count().alias('count'),
464
+ pl.col('start_speed').mean().alias('start_speed'),
465
+ pl.col('start_speed').max().alias('max_start_speed'),
466
+ pl.col('ivb').mean().alias('ivb'),
467
+ pl.col('hb').mean().alias('hb'),
468
+ pl.col('release_pos_z').mean().alias('release_pos_z'),
469
+ pl.col('release_pos_x').mean().alias('release_pos_x'),
470
+ pl.col('extension').mean().alias('extension'),
471
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
472
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
473
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
474
+ ])
475
+
476
+ # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
477
+ df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
478
+
479
+ # Now calculate the pitch percent for each pitcher/pitch_type combination
480
+ df_spring_group = df_spring_group.with_columns(
481
+ (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
482
+ )
483
+
484
+ # Optionally, if you want the percentage of left/right-handed batters within the group:
485
+ df_spring_group = df_spring_group.with_columns([
486
+ (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
487
+ (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
488
+ ])
489
+
490
+ df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')
491
+
492
+
493
+ df_merge = df_merge.with_columns(
494
+ pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
495
+ )
496
+
497
+ df_merge = df_merge.with_columns(
498
+ pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
499
+ .then(pl.lit(True))
500
+ .otherwise(pl.lit(None))
501
+ .alias("new_pitch")
502
+ )
503
+
504
+ import polars as pl
505
+
506
+ # Define the columns to subtract
507
+ cols_to_subtract = [
508
+ ("start_speed", "start_speed_old"),
509
+ ("max_start_speed", "max_start_speed_old"),
510
+ ("ivb", "ivb_old"),
511
+ ("hb", "hb_old"),
512
+ ("release_pos_z", "release_pos_z_old"),
513
+ ("release_pos_x", "release_pos_x_old"),
514
+ ("extension", "extension_old"),
515
+ ("tj_stuff_plus", "tj_stuff_plus_old")
516
+ ]
517
+
518
+ df_merge = df_merge.with_columns([
519
+ # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
520
+ pl.when(pl.col(old).is_null())
521
+ .then(pl.lit(None)) # If old is null, assign 80 as the default
522
+ .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
523
+ .alias(new + "_diff")
524
+ for new, old in cols_to_subtract
525
+ ])
526
+
527
+ # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
528
+ # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
529
+ df_merge = df_merge.with_columns([
530
+
531
+ pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted")
532
+ for new, _ in cols_to_subtract
533
+ ])
534
+
535
+
536
+
537
+ df_merge = df_merge.with_columns([
538
+ pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"),
539
+ pl.col("tj_stuff_plus_diff").round(1).map_elements(lambda x: f"{x:+.1f}").alias("tj_stuff_plus_diff")
540
+ ])
541
+
542
+
543
+
544
+ percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
545
+
546
+ df_merge = df_merge.with_columns([
547
+ (pl.col(col) * 100) # Convert to percentage
548
+ .round(1) # Round to 1 decimal
549
+ .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
550
+ .alias(col + "_formatted")
551
+ for col in percent_cols
552
+ ]).sort(['pitcher_id','count'],descending=True)
553
+
554
+
555
+
556
+
557
+ columns = [
558
+ { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
559
+ { "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,},
560
+ { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
561
+ { "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
562
+ { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
563
+ { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
564
+ { "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"},
565
+ { "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"},
566
+ { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
567
+ { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
568
+ { "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
569
+ { "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
570
+ { "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
571
+ { "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
572
+ { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
573
+ { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
574
+ { "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" },
575
+ { "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" }
576
+ ]
577
+
578
+
579
+ df_plot = df_merge.sort(['pitcher_id','count'],descending=True).to_pandas()
580
+
581
+ team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
582
+ df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
583
+
584
+
585
+
586
+ return Tabulator(
587
+ df_plot,
588
+
589
+ table_options=TableOptions(
590
+ height=750,
591
+
592
+ columns=columns,
593
+ )
594
+ )
595
+
596
+ app = App(app_ui, server)
pitch_data_agg_2024.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e4cfc5290d83b7707362d46380140b97bb464d1510e1fcca0cd878b65e4fb91
3
- size 561801
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21509d34d14646869a1a4dd2785b91c3ce210092ad42d42aea9a772aeeb38edf
3
+ size 615259