nesticot commited on
Commit
6781a47
·
verified ·
1 Parent(s): 390c8e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -0
app.py CHANGED
@@ -65,6 +65,9 @@ app_ui = ui.page_fluid(
65
  ui.nav("Daily Pitches",
66
  output_tabulator("table_daily")
67
  ),
 
 
 
68
  )
69
  )
70
  )
@@ -399,6 +402,178 @@ def server(input, output, session):
399
 
400
 
401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  return Tabulator(
403
  df_plot,
404
 
 
65
  ui.nav("Daily Pitches",
66
  output_tabulator("table_daily")
67
  ),
68
+ ui.nav("tjStuff+",
69
+ output_tabulator("table_tjstuff")
70
+ ),
71
  )
72
  )
73
  )
 
402
 
403
 
404
 
405
+ return Tabulator(
406
+ df_plot,
407
+
408
+ table_options=TableOptions(
409
+ height=750,
410
+
411
+ columns=columns,
412
+ )
413
+ )
414
+
415
+ @output
416
+ @render_tabulator
417
+ @reactive.event(input.refresh)
418
+ def table_tjstuff():
419
+
420
+ import polars as pl
421
+ df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
422
+
423
+
424
+ import datetime
425
+
426
+ date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
427
+ print(datetime.datetime.now())
428
+
429
+ date_str = date.strftime('%Y-%m-%d')
430
+ # Initialize the scraper
431
+
432
+
433
+ game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
434
+ .filter(pl.col('date') == date)['game_id'])
435
+
436
+ data = scraper.get_data(game_list_input)
437
+ df = scraper.get_data_df(data)
438
+
439
+ df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
440
+
441
+
442
+
443
+ # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
444
+ # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
445
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
446
+
447
+
448
+
449
+ import polars as pl
450
+
451
+ # Compute total pitches for each pitcher
452
+ df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
453
+ pl.col("start_speed").count().alias("pitcher_total")
454
+ )
455
+
456
+ df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
457
+ pl.col('start_speed').count().alias('count'),
458
+ pl.col('start_speed').mean().alias('start_speed'),
459
+ pl.col('ivb').mean().alias('ivb'),
460
+ pl.col('hb').mean().alias('hb'),
461
+ pl.col('release_pos_z').mean().alias('release_pos_z'),
462
+ pl.col('release_pos_x').mean().alias('release_pos_x'),
463
+ pl.col('extension').mean().alias('extension'),
464
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
465
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
466
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
467
+ ])
468
+
469
+ # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
470
+ df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
471
+
472
+ # Now calculate the pitch percent for each pitcher/pitch_type combination
473
+ df_spring_group = df_spring_group.with_columns(
474
+ (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
475
+ )
476
+
477
+ # Optionally, if you want the percentage of left/right-handed batters within the group:
478
+ df_spring_group = df_spring_group.with_columns([
479
+ (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
480
+ (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
481
+ ])
482
+
483
+ df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
484
+
485
+
486
+ df_merge = df_merge.with_columns(
487
+ pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
488
+ )
489
+
490
+ df_merge = df_merge.with_columns(
491
+ pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
492
+ .then(pl.lit("TRUE"))
493
+ .otherwise(pl.lit(None))
494
+ .alias("new_pitch")
495
+ )
496
+
497
+ import polars as pl
498
+
499
+ # Define the columns to subtract
500
+ cols_to_subtract = [
501
+ ("start_speed", "start_speed_old"),
502
+ ("ivb", "ivb_old"),
503
+ ("hb", "hb_old"),
504
+ ("release_pos_z", "release_pos_z_old"),
505
+ ("release_pos_x", "release_pos_x_old"),
506
+ ("extension", "extension_old"),
507
+ ("tj_stuff_plus", "tj_stuff_plus_old")
508
+ ]
509
+
510
+ df_merge = df_merge.with_columns([
511
+ # Step 1: Create _diff columns with the default value (e.g., 80) if old is null
512
+ pl.when(pl.col(old).is_null())
513
+ .then(pl.lit(None)) # If old is null, assign 80 as the default
514
+ .otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
515
+ .alias(new + "_diff")
516
+ for new, old in cols_to_subtract
517
+ ])
518
+
519
+ # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
520
+ # Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
521
+ df_merge = df_merge.with_columns([
522
+
523
+ pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted")
524
+ for new, _ in cols_to_subtract
525
+ ])
526
+
527
+
528
+
529
+ df_merge = df_merge.with_columns([
530
+ pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"),
531
+ pl.col("tj_stuff_plus_diff").round(1).map_elements(lambda x: f"{x:+.1f}").alias("tj_stuff_plus_diff")
532
+ ])
533
+
534
+
535
+
536
+ percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
537
+
538
+ df_merge = df_merge.with_columns([
539
+ (pl.col(col) * 100) # Convert to percentage
540
+ .round(1) # Round to 1 decimal
541
+ .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
542
+ .alias(col + "_formatted")
543
+ for col in percent_cols
544
+ ]).sort(['pitcher_id','count'],descending=True)
545
+
546
+
547
+
548
+
549
+ columns = [
550
+ { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
551
+ { "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,},
552
+ { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
553
+ { "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
554
+ { "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
555
+ { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
556
+ { "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"},
557
+ { "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"},
558
+ { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
559
+ { "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
560
+ { "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
561
+ { "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
562
+ { "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
563
+ { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
564
+ { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
565
+ { "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" },
566
+ { "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" }
567
+ ]
568
+
569
+
570
+ df_plot = df_merge.sort(['pitcher_id','count'],descending=True).to_pandas()
571
+
572
+ team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
573
+ df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
574
+
575
+
576
+
577
  return Tabulator(
578
  df_plot,
579