nesticot commited on
Commit
139a17d
·
verified ·
1 Parent(s): 0b5fea1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -1
app.py CHANGED
@@ -62,6 +62,9 @@ app_ui = ui.page_fluid(
62
  ui.nav("All Pitches",
63
  output_tabulator("table_all")
64
  ),
 
 
 
65
  )
66
  )
67
  )
@@ -202,7 +205,7 @@ def server(input, output, session):
202
  { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
203
  { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
204
  { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
205
- { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
206
  { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
207
  { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
208
  { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
@@ -234,5 +237,171 @@ def server(input, output, session):
234
  )
235
 
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
  app = App(app_ui, server)
 
62
  ui.nav("All Pitches",
63
  output_tabulator("table_all")
64
  ),
65
+ ui.nav("Daily Pitches",
66
+ output_tabulator("table_daily")
67
+ ),
68
  )
69
  )
70
  )
 
205
  { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
206
  { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
207
  { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
208
+ { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input","contextMenu":True},
209
  { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
210
  { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
211
  { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
 
237
  )
238
 
239
 
240
+ @output
241
+ @render_tabulator
242
+ @reactive.event(input.refresh)
243
+ def table_daily():
244
+
245
+ import polars as pl
246
+ df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
247
+
248
+
249
+ date = datetime.datetime.now().date()
250
+ date_str = date.strftime('%Y-%m-%d')
251
+ # Initialize the scraper
252
+
253
+
254
+ game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
255
+ .filter(pl.col('date') == date)['game_id'])
256
+
257
+ data = scraper.get_data(game_list_input)
258
+ df = scraper.get_data_df(data)
259
+
260
+ df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
261
+
262
+
263
+
264
+ # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
265
+ # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
266
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
267
+
268
+
269
+
270
+ import polars as pl
271
+
272
+ # Compute total pitches for each pitcher
273
+ df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id",'game_id','game_date']).agg(
274
+ pl.col("start_speed").count().alias("pitcher_total")
275
+ )
276
+
277
+ df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type','game_id','game_date']).agg([
278
+ pl.col('start_speed').count().alias('count'),
279
+ pl.col('start_speed').mean().alias('start_speed'),
280
+ pl.col('ivb').mean().alias('ivb'),
281
+ pl.col('hb').mean().alias('hb'),
282
+ pl.col('release_pos_z').mean().alias('release_pos_z'),
283
+ pl.col('release_pos_x').mean().alias('release_pos_x'),
284
+ pl.col('extension').mean().alias('extension'),
285
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
286
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
287
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
288
+ ])
289
+
290
+ # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
291
+ df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id",'game_id','game_date'], how="left")
292
+
293
+ # Now calculate the pitch percent for each pitcher/pitch_type combination
294
+ df_spring_group = df_spring_group.with_columns(
295
+ (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
296
+ )
297
+
298
+ # Optionally, if you want the percentage of left/right-handed batters within the group:
299
+ df_spring_group = df_spring_group.with_columns([
300
+ (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
301
+ (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
302
+ ])
303
+
304
+ df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
305
+
306
+
307
+ df_merge = df_merge.with_columns(
308
+ pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
309
+ )
310
+
311
+ df_merge = df_merge.with_columns(
312
+ pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
313
+ .then(pl.lit("TRUE"))
314
+ .otherwise(pl.lit(None))
315
+ .alias("new_pitch")
316
+ )
317
+
318
+ import polars as pl
319
+
320
+ # Define the columns to subtract
321
+ cols_to_subtract = [
322
+ ("start_speed", "start_speed_old"),
323
+ ("ivb", "ivb_old"),
324
+ ("hb", "hb_old"),
325
+ ("release_pos_z", "release_pos_z_old"),
326
+ ("release_pos_x", "release_pos_x_old"),
327
+ ("extension", "extension_old"),
328
+ ("tj_stuff_plus", "tj_stuff_plus_old")
329
+ ]
330
+
331
+ df_merge = df_merge.with_columns([
332
+ # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
333
+ pl.when(pl.col(old).is_null())
334
+ .then(pl.lit(10000)) # If old is null, assign 80 as the default
335
+ .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
336
+ .alias(new + "_diff")
337
+ for new, old in cols_to_subtract
338
+ ])
339
+
340
+ # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
341
+ df_merge = df_merge.with_columns([
342
+ pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
343
+ .then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
344
+ .otherwise(
345
+ pl.col(new).round(1).cast(pl.Utf8) +
346
+ "\n(" +
347
+ pl.col(new + "_diff").round(1)
348
+ .map_elements(lambda x: f"{x:+.1f}") +
349
+ ")"
350
+ ).alias(new + "_formatted")
351
+ for new, _ in cols_to_subtract
352
+ ])
353
+
354
+
355
+
356
+
357
+
358
+
359
+ percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
360
+
361
+ df_merge = df_merge.with_columns([
362
+ (pl.col(col) * 100) # Convert to percentage
363
+ .round(1) # Round to 1 decimal
364
+ .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
365
+ .alias(col + "_formatted")
366
+ for col in percent_cols
367
+ ]).sort(['pitcher_id','count'],descending=True)
368
+
369
+
370
+ columns = [
371
+ { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
372
+ { "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
373
+ { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
374
+ { "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
375
+ { "title": "Date", "field": "game_date", "width": 100, "headerFilter":"input" ,"frozen":True,},
376
+ { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
377
+ { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
378
+ { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
379
+ { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
380
+ { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
381
+ { "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
382
+ { "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
383
+ { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
384
+ { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
385
+ { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
386
+ { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
387
+ ]
388
+
389
+
390
+ df_plot = df_merge.to_pandas()
391
+
392
+ team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
393
+ df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
394
+
395
+
396
+
397
+ return Tabulator(
398
+ df_plot,
399
+
400
+ table_options=TableOptions(
401
+ height=750,
402
+
403
+ columns=columns,
404
+ )
405
+ )
406
 
407
  app = App(app_ui, server)