nesticot commited on
Commit
f2b749c
·
verified ·
1 Parent(s): 40d77df

Upload 2 files

Browse files
Files changed (2) hide show
  1. api_scraper.py +872 -0
  2. app.py +471 -358
api_scraper.py ADDED
@@ -0,0 +1,872 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import polars as pl
3
+ import numpy as np
4
+ from datetime import datetime
5
+ from tqdm import tqdm
6
+ from pytz import timezone
7
+ import re
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+
10
+
11
+ class MLB_Scrape:
12
+
13
+ def __init__(self):
14
+ # Initialize your class here if needed
15
+ pass
16
+
17
+ def get_sport_id(self):
18
+ """
19
+ Retrieves the list of sports from the MLB API and processes it into a Polars DataFrame.
20
+
21
+ Returns:
22
+ - df (pl.DataFrame): A DataFrame containing the sports information.
23
+ """
24
+ # Make API call to retrieve sports information
25
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
26
+
27
+ # Convert the JSON response into a Polars DataFrame
28
+ df = pl.DataFrame(response['sports'])
29
+
30
+ return df
31
+
32
+ def get_sport_id_check(self, sport_id: int = 1):
33
+ """
34
+ Checks if the provided sport ID exists in the list of sports retrieved from the MLB API.
35
+
36
+ Parameters:
37
+ - sport_id (int): The sport ID to check. Default is 1.
38
+
39
+ Returns:
40
+ - bool: True if the sport ID exists, False otherwise. If False, prints the available sport IDs.
41
+ """
42
+ # Retrieve the list of sports from the MLB API
43
+ sport_id_df = self.get_sport_id()
44
+
45
+ # Check if the provided sport ID exists in the DataFrame
46
+ if sport_id not in sport_id_df['id']:
47
+ print('Please Select a New Sport ID from the following')
48
+ print(sport_id_df)
49
+ return False
50
+
51
+ return True
52
+
53
+
54
+ def get_game_types(self):
55
+ """
56
+ Retrieves the different types of MLB games from the MLB API and processes them into a Polars DataFrame.
57
+
58
+ Returns:
59
+ - df (pl.DataFrame): A DataFrame containing the game types information.
60
+ """
61
+ # Make API call to retrieve game types information
62
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/gameTypes').json()
63
+
64
+ # Convert the JSON response into a Polars DataFrame
65
+ df = pl.DataFrame(response)
66
+
67
+ return df
68
+
69
+ def get_schedule(self,
70
+ year_input: list = [2024],
71
+ sport_id: list = [1],
72
+ game_type: list = ['R']):
73
+
74
+ """
75
+ Retrieves the schedule of baseball games based on the specified parameters.
76
+ Parameters:
77
+ - year_input (list): A list of years to filter the schedule. Default is [2024].
78
+ - sport_id (list): A list of sport IDs to filter the schedule. Default is [1].
79
+ - game_type (list): A list of game types to filter the schedule. Default is ['R'].
80
+ Returns:
81
+ - game_df (pandas.DataFrame): A DataFrame containing the game schedule information, including game ID, date, time, away team, home team, game state, venue ID, and venue name. If the schedule length is 0, it returns a message indicating that different parameters should be selected.
82
+ """
83
+
84
+ # Type checks
85
+ if not isinstance(year_input, list) or not all(isinstance(year, int) for year in year_input):
86
+ raise ValueError("year_input must be a list of integers.")
87
+ if not isinstance(sport_id, list) or not all(isinstance(sid, int) for sid in sport_id):
88
+ raise ValueError("sport_id must be a list of integers.")
89
+
90
+ if not isinstance(game_type, list) or not all(isinstance(gt, str) for gt in game_type):
91
+ raise ValueError("game_type must be a list of strings.")
92
+
93
+ eastern = timezone('US/Eastern')
94
+
95
+ # Convert input lists to comma-separated strings
96
+ year_input_str = ','.join([str(x) for x in year_input])
97
+ sport_id_str = ','.join([str(x) for x in sport_id])
98
+ game_type_str = ','.join([str(x) for x in game_type])
99
+
100
+ # Make API call to retrieve game schedule
101
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
102
+ try:
103
+ # Extract relevant data from the API response
104
+ game_list = [item for sublist in [[y.get('gamePk') for y in x['games']] for x in game_call['dates']] for item in sublist]
105
+ time_list = [item for sublist in [[y.get('gameDate') for y in x['games']] for x in game_call['dates']] for item in sublist]
106
+ date_list = [item for sublist in [[y.get('officialDate') for y in x['games']] for x in game_call['dates']] for item in sublist]
107
+ away_team_list = [item for sublist in [[y['teams']['away']['team'].get('name') for y in x['games']] for x in game_call['dates']] for item in sublist]
108
+ home_team_list = [item for sublist in [[y['teams']['home']['team'].get('name') for y in x['games']] for x in game_call['dates']] for item in sublist]
109
+ state_list = [item for sublist in [[y['status'].get('codedGameState') for y in x['games']] for x in game_call['dates']] for item in sublist]
110
+ venue_id = [item for sublist in [[y['venue'].get('id', None) for y in x['games']] for x in game_call['dates']] for item in sublist]
111
+ venue_name = [item for sublist in [[y['venue'].get('name') for y in x['games']] for x in game_call['dates']] for item in sublist]
112
+
113
+ # Create a Polars DataFrame with the extracted data
114
+ game_df = pl.DataFrame(data={'game_id': game_list,
115
+ 'time': time_list,
116
+ 'date': date_list,
117
+ 'away': away_team_list,
118
+ 'home': home_team_list,
119
+ 'state': state_list,
120
+ 'venue_id': venue_id,
121
+ 'venue_name': venue_name})
122
+
123
+
124
+ # Check if the DataFrame is empty
125
+ if len(game_df) == 0:
126
+ print('Schedule Length of 0, please select different parameters.')
127
+ return None
128
+
129
+ # Convert date and time columns to appropriate formats
130
+ game_df = game_df.with_columns(
131
+ game_df['date'].str.to_date(),
132
+ game_df['time'].str.to_datetime().dt.convert_time_zone(eastern.zone).dt.strftime("%I:%M %p"))
133
+
134
+ # Remove duplicate games and sort by date
135
+ game_df = game_df.unique(subset='game_id').sort('date')
136
+
137
+ # Check again if the DataFrame is empty after processing
138
+ if len(game_df) == 0:
139
+ print('Schedule Length of 0, please select different parameters.')
140
+ return None
141
+ except KeyError:
142
+ print('No Data for Selected Parameters')
143
+ return None
144
+
145
+
146
+ return game_df
147
+
148
+
149
+ def get_data(self, game_list_input: list):
150
+ """
151
+ Retrieves live game data for a list of game IDs in parallel.
152
+
153
+ Parameters:
154
+ - game_list_input (list): A list of game IDs for which to retrieve live data.
155
+
156
+ Returns:
157
+ - data_total (list): A list of JSON responses containing live game data for each game ID.
158
+ """
159
+ data_total = []
160
+ print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
161
+
162
+ def fetch_data(game_id):
163
+ r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_id}/feed/live')
164
+ return r.json()
165
+
166
+ with ThreadPoolExecutor() as executor:
167
+ futures = {executor.submit(fetch_data, game_id): game_id for game_id in game_list_input}
168
+ for future in tqdm(as_completed(futures), total=len(futures), desc="Processing", unit="iteration"):
169
+ data_total.append(future.result())
170
+
171
+ return data_total
172
+
173
+ def get_data_df(self, data_list):
174
+ """
175
+ Converts a list of game data JSON objects into a Polars DataFrame.
176
+
177
+ Parameters:
178
+ - data_list (list): A list of JSON objects containing game data.
179
+
180
+ Returns:
181
+ - data_df (pl.DataFrame): A DataFrame containing the structured game data.
182
+ """
183
+ swing_list = ['X','F','S','D','E','T','W']
184
+ whiff_list = ['S','T','W']
185
+ print('Converting Data to Dataframe.')
186
+ game_id = []
187
+ game_date = []
188
+ batter_id = []
189
+ batter_name = []
190
+ batter_hand = []
191
+ batter_team = []
192
+ batter_team_id = []
193
+ pitcher_id = []
194
+ pitcher_name = []
195
+ pitcher_hand = []
196
+ pitcher_team = []
197
+ pitcher_team_id = []
198
+
199
+ play_description = []
200
+ play_code = []
201
+ in_play = []
202
+ is_strike = []
203
+ is_swing = []
204
+ is_whiff = []
205
+ is_out = []
206
+ is_ball = []
207
+ is_review = []
208
+ pitch_type = []
209
+ pitch_description = []
210
+ strikes = []
211
+ balls = []
212
+ outs = []
213
+ strikes_after = []
214
+ balls_after = []
215
+ outs_after = []
216
+
217
+ start_speed = []
218
+ end_speed = []
219
+ sz_top = []
220
+ sz_bot = []
221
+ x = []
222
+ y = []
223
+ ax = []
224
+ ay = []
225
+ az = []
226
+ pfxx = []
227
+ pfxz = []
228
+ px = []
229
+ pz = []
230
+ vx0 = []
231
+ vy0 = []
232
+ vz0 = []
233
+ x0 = []
234
+ y0 = []
235
+ z0 = []
236
+ zone = []
237
+ type_confidence = []
238
+ plate_time = []
239
+ extension = []
240
+ spin_rate = []
241
+ spin_direction = []
242
+ vb = []
243
+ ivb = []
244
+ hb = []
245
+
246
+ launch_speed = []
247
+ launch_angle = []
248
+ launch_distance = []
249
+ launch_location = []
250
+ trajectory = []
251
+ hardness = []
252
+ hit_x = []
253
+ hit_y = []
254
+
255
+ index_play = []
256
+ play_id = []
257
+ start_time = []
258
+ end_time = []
259
+ is_pitch = []
260
+ type_type = []
261
+
262
+
263
+ type_ab = []
264
+ ab_number = []
265
+ event = []
266
+ event_type = []
267
+ rbi = []
268
+ away_score = []
269
+ home_score = []
270
+
271
+ for data in data_list:
272
+ try:
273
+ for ab_id in range(len(data['liveData']['plays']['allPlays'])):
274
+ ab_list = data['liveData']['plays']['allPlays'][ab_id]
275
+ for n in range(len(ab_list['playEvents'])):
276
+
277
+
278
+ if ab_list['playEvents'][n]['isPitch'] == True or 'call' in ab_list['playEvents'][n]['details']:
279
+ ab_number.append(ab_list['atBatIndex'] if 'atBatIndex' in ab_list else None)
280
+
281
+ game_id.append(data['gamePk'])
282
+ game_date.append(data['gameData']['datetime']['officialDate'])
283
+ if 'matchup' in ab_list:
284
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
285
+ if 'batter' in ab_list['matchup']:
286
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'fullName' in ab_list['matchup']['batter'] else None)
287
+ else:
288
+ batter_name.append(None)
289
+
290
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
291
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
292
+ if 'pitcher' in ab_list['matchup']:
293
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'fullName' in ab_list['matchup']['pitcher'] else None)
294
+ else:
295
+ pitcher_name.append(None)
296
+
297
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
298
+
299
+
300
+ if ab_list['about']['isTopInning']:
301
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
302
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
303
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
304
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
305
+
306
+ else:
307
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
308
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
309
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
310
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
311
+
312
+ play_description.append(ab_list['playEvents'][n]['details']['description'] if 'description' in ab_list['playEvents'][n]['details'] else None)
313
+ play_code.append(ab_list['playEvents'][n]['details']['code'] if 'code' in ab_list['playEvents'][n]['details'] else None)
314
+ in_play.append(ab_list['playEvents'][n]['details']['isInPlay'] if 'isInPlay' in ab_list['playEvents'][n]['details'] else None)
315
+ is_strike.append(ab_list['playEvents'][n]['details']['isStrike'] if 'isStrike' in ab_list['playEvents'][n]['details'] else None)
316
+
317
+ if 'details' in ab_list['playEvents'][n]:
318
+ is_swing.append(True if ab_list['playEvents'][n]['details']['code'] in swing_list else None)
319
+ is_whiff.append(True if ab_list['playEvents'][n]['details']['code'] in whiff_list else None)
320
+ else:
321
+ is_swing.append(None)
322
+ is_whiff.append(None)
323
+
324
+ is_ball.append(ab_list['playEvents'][n]['details']['isOut'] if 'isOut' in ab_list['playEvents'][n]['details'] else None)
325
+ is_review.append(ab_list['playEvents'][n]['details']['hasReview'] if 'hasReview' in ab_list['playEvents'][n]['details'] else None)
326
+ pitch_type.append(ab_list['playEvents'][n]['details']['type']['code'] if 'type' in ab_list['playEvents'][n]['details'] else None)
327
+ pitch_description.append(ab_list['playEvents'][n]['details']['type']['description'] if 'type' in ab_list['playEvents'][n]['details'] else None)
328
+
329
+ if ab_list['playEvents'][n]['pitchNumber'] == 1:
330
+ strikes.append(0)
331
+ balls.append(0)
332
+ strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
333
+ balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
334
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
335
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
336
+
337
+ else:
338
+ strikes.append(ab_list['playEvents'][n-1]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n-1]['count'] else None)
339
+ balls.append(ab_list['playEvents'][n-1]['count']['balls'] if 'balls' in ab_list['playEvents'][n-1]['count'] else None)
340
+ outs.append(ab_list['playEvents'][n-1]['count']['outs'] if 'outs' in ab_list['playEvents'][n-1]['count'] else None)
341
+
342
+ strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
343
+ balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
344
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
345
+
346
+
347
+ if 'pitchData' in ab_list['playEvents'][n]:
348
+
349
+ start_speed.append(ab_list['playEvents'][n]['pitchData']['startSpeed'] if 'startSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
350
+ end_speed.append(ab_list['playEvents'][n]['pitchData']['endSpeed'] if 'endSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
351
+
352
+ sz_top.append(ab_list['playEvents'][n]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in ab_list['playEvents'][n]['pitchData'] else None)
353
+ sz_bot.append(ab_list['playEvents'][n]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in ab_list['playEvents'][n]['pitchData'] else None)
354
+ x.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x'] if 'x' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
355
+ y.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y'] if 'y' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
356
+
357
+ ax.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aX'] if 'aX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
358
+ ay.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aY'] if 'aY' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
359
+ az.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aZ'] if 'aZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
360
+ pfxx.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxX'] if 'pfxX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
361
+ pfxz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
362
+ px.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pX'] if 'pX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
363
+ pz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pZ'] if 'pZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
364
+ vx0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vX0'] if 'vX0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
365
+ vy0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vY0'] if 'vY0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
366
+ vz0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vZ0'] if 'vZ0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
367
+ x0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x0'] if 'x0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
368
+ y0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y0'] if 'y0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
369
+ z0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['z0'] if 'z0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
370
+
371
+ zone.append(ab_list['playEvents'][n]['pitchData']['zone'] if 'zone' in ab_list['playEvents'][n]['pitchData'] else None)
372
+ type_confidence.append(ab_list['playEvents'][n]['pitchData']['typeConfidence'] if 'typeConfidence' in ab_list['playEvents'][n]['pitchData'] else None)
373
+ plate_time.append(ab_list['playEvents'][n]['pitchData']['plateTime'] if 'plateTime' in ab_list['playEvents'][n]['pitchData'] else None)
374
+ extension.append(ab_list['playEvents'][n]['pitchData']['extension'] if 'extension' in ab_list['playEvents'][n]['pitchData'] else None)
375
+
376
+ if 'breaks' in ab_list['playEvents'][n]['pitchData']:
377
+ spin_rate.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinRate'] if 'spinRate' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
378
+ spin_direction.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
379
+ vb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVertical'] if 'breakVertical' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
380
+ ivb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVerticalInduced'] if 'breakVerticalInduced' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
381
+ hb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakHorizontal'] if 'breakHorizontal' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
382
+
383
+ else:
384
+ start_speed.append(None)
385
+ end_speed.append(None)
386
+
387
+ sz_top.append(None)
388
+ sz_bot.append(None)
389
+ x.append(None)
390
+ y.append(None)
391
+
392
+ ax.append(None)
393
+ ay.append(None)
394
+ az.append(None)
395
+ pfxx.append(None)
396
+ pfxz.append(None)
397
+ px.append(None)
398
+ pz.append(None)
399
+ vx0.append(None)
400
+ vy0.append(None)
401
+ vz0.append(None)
402
+ x0.append(None)
403
+ y0.append(None)
404
+ z0.append(None)
405
+
406
+ zone.append(None)
407
+ type_confidence.append(None)
408
+ plate_time.append(None)
409
+ extension.append(None)
410
+ spin_rate.append(None)
411
+ spin_direction.append(None)
412
+ vb.append(None)
413
+ ivb.append(None)
414
+ hb.append(None)
415
+
416
+ if 'hitData' in ab_list['playEvents'][n]:
417
+ launch_speed.append(ab_list['playEvents'][n]['hitData']['launchSpeed'] if 'launchSpeed' in ab_list['playEvents'][n]['hitData'] else None)
418
+ launch_angle.append(ab_list['playEvents'][n]['hitData']['launchAngle'] if 'launchAngle' in ab_list['playEvents'][n]['hitData'] else None)
419
+ launch_distance.append(ab_list['playEvents'][n]['hitData']['totalDistance'] if 'totalDistance' in ab_list['playEvents'][n]['hitData'] else None)
420
+ launch_location.append(ab_list['playEvents'][n]['hitData']['location'] if 'location' in ab_list['playEvents'][n]['hitData'] else None)
421
+
422
+ trajectory.append(ab_list['playEvents'][n]['hitData']['trajectory'] if 'trajectory' in ab_list['playEvents'][n]['hitData'] else None)
423
+ hardness.append(ab_list['playEvents'][n]['hitData']['hardness'] if 'hardness' in ab_list['playEvents'][n]['hitData'] else None)
424
+ hit_x.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordX'] if 'coordX' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
425
+ hit_y.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordY'] if 'coordY' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
426
+ else:
427
+ launch_speed.append(None)
428
+ launch_angle.append(None)
429
+ launch_distance.append(None)
430
+ launch_location.append(None)
431
+ trajectory.append(None)
432
+ hardness.append(None)
433
+ hit_x.append(None)
434
+ hit_y.append(None)
435
+
436
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
437
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
438
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
439
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
440
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
441
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
442
+
443
+
444
+
445
+ if n == len(ab_list['playEvents']) - 1 :
446
+
447
+ type_ab.append(data['liveData']['plays']['allPlays'][ab_id]['result']['type'] if 'type' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
448
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'] if 'event' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
449
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'] if 'eventType' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
450
+ rbi.append(data['liveData']['plays']['allPlays'][ab_id]['result']['rbi'] if 'rbi' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
451
+ away_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['awayScore'] if 'awayScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
452
+ home_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['homeScore'] if 'homeScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
453
+ is_out.append(data['liveData']['plays']['allPlays'][ab_id]['result']['isOut'] if 'isOut' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
454
+
455
+ else:
456
+
457
+ type_ab.append(None)
458
+ event.append(None)
459
+ event_type.append(None)
460
+ rbi.append(None)
461
+ away_score.append(None)
462
+ home_score.append(None)
463
+ is_out.append(None)
464
+
465
+ elif ab_list['playEvents'][n]['count']['balls'] == 4:
466
+
467
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'])
468
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'])
469
+
470
+
471
+ game_id.append(data['gamePk'])
472
+ game_date.append(data['gameData']['datetime']['officialDate'])
473
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
474
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else None)
475
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
476
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
477
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else None)
478
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
479
+ if ab_list['about']['isTopInning']:
480
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
481
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
482
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
483
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
484
+ else:
485
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
486
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
487
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
488
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
489
+
490
+ play_description.append(None)
491
+ play_code.append(None)
492
+ in_play.append(None)
493
+ is_strike.append(None)
494
+ is_ball.append(None)
495
+ is_review.append(None)
496
+ pitch_type.append(None)
497
+ pitch_description.append(None)
498
+ strikes.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
499
+ balls.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
500
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
501
+ strikes_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
502
+ balls_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
503
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
504
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
505
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
506
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
507
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
508
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
509
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
510
+
511
+
512
+
513
+ is_swing.append(None)
514
+ is_whiff.append(None)
515
+ start_speed.append(None)
516
+ end_speed.append(None)
517
+ sz_top.append(None)
518
+ sz_bot.append(None)
519
+ x.append(None)
520
+ y.append(None)
521
+ ax.append(None)
522
+ ay.append(None)
523
+ az.append(None)
524
+ pfxx.append(None)
525
+ pfxz.append(None)
526
+ px.append(None)
527
+ pz.append(None)
528
+ vx0.append(None)
529
+ vy0.append(None)
530
+ vz0.append(None)
531
+ x0.append(None)
532
+ y0.append(None)
533
+ z0.append(None)
534
+ zone.append(None)
535
+ type_confidence.append(None)
536
+ plate_time.append(None)
537
+ extension.append(None)
538
+ spin_rate.append(None)
539
+ spin_direction.append(None)
540
+ vb.append(None)
541
+ ivb.append(None)
542
+ hb.append(None)
543
+ launch_speed.append(None)
544
+ launch_angle.append(None)
545
+ launch_distance.append(None)
546
+ launch_location.append(None)
547
+ trajectory.append(None)
548
+ hardness.append(None)
549
+ hit_x.append(None)
550
+ hit_y.append(None)
551
+ type_ab.append(None)
552
+ ab_number.append(None)
553
+
554
+ rbi.append(None)
555
+ away_score.append(None)
556
+ home_score.append(None)
557
+ is_out.append(None)
558
+
559
+ except KeyError:
560
+ print(f"No Data for Game")
561
+
562
+ df = pl.DataFrame(data={
563
+ 'game_id':game_id,
564
+ 'game_date':game_date,
565
+ 'batter_id':batter_id,
566
+ 'batter_name':batter_name,
567
+ 'batter_hand':batter_hand,
568
+ 'batter_team':batter_team,
569
+ 'batter_team_id':batter_team_id,
570
+ 'pitcher_id':pitcher_id,
571
+ 'pitcher_name':pitcher_name,
572
+ 'pitcher_hand':pitcher_hand,
573
+ 'pitcher_team':pitcher_team,
574
+ 'pitcher_team_id':pitcher_team_id,
575
+ 'ab_number':ab_number,
576
+ 'play_description':play_description,
577
+ 'play_code':play_code,
578
+ 'in_play':in_play,
579
+ 'is_strike':is_strike,
580
+ 'is_swing':is_swing,
581
+ 'is_whiff':is_whiff,
582
+ 'is_out':is_out,
583
+ 'is_ball':is_ball,
584
+ 'is_review':is_review,
585
+ 'pitch_type':pitch_type,
586
+ 'pitch_description':pitch_description,
587
+ 'strikes':strikes,
588
+ 'balls':balls,
589
+ 'outs':outs,
590
+ 'strikes_after':strikes_after,
591
+ 'balls_after':balls_after,
592
+ 'outs_after':outs_after,
593
+ 'start_speed':start_speed,
594
+ 'end_speed':end_speed,
595
+ 'sz_top':sz_top,
596
+ 'sz_bot':sz_bot,
597
+ 'x':x,
598
+ 'y':y,
599
+ 'ax':ax,
600
+ 'ay':ay,
601
+ 'az':az,
602
+ 'pfxx':pfxx,
603
+ 'pfxz':pfxz,
604
+ 'px':px,
605
+ 'pz':pz,
606
+ 'vx0':vx0,
607
+ 'vy0':vy0,
608
+ 'vz0':vz0,
609
+ 'x0':x0,
610
+ 'y0':y0,
611
+ 'z0':z0,
612
+ 'zone':zone,
613
+ 'type_confidence':type_confidence,
614
+ 'plate_time':plate_time,
615
+ 'extension':extension,
616
+ 'spin_rate':spin_rate,
617
+ 'spin_direction':spin_direction,
618
+ 'vb':vb,
619
+ 'ivb':ivb,
620
+ 'hb':hb,
621
+ 'launch_speed':launch_speed,
622
+ 'launch_angle':launch_angle,
623
+ 'launch_distance':launch_distance,
624
+ 'launch_location':launch_location,
625
+ 'trajectory':trajectory,
626
+ 'hardness':hardness,
627
+ 'hit_x':hit_x,
628
+ 'hit_y':hit_y,
629
+ 'index_play':index_play,
630
+ 'play_id':play_id,
631
+ 'start_time':start_time,
632
+ 'end_time':end_time,
633
+ 'is_pitch':is_pitch,
634
+ 'type_type':type_type,
635
+ 'type_ab':type_ab,
636
+ 'event':event,
637
+ 'event_type':event_type,
638
+ 'rbi':rbi,
639
+ 'away_score':away_score,
640
+ 'home_score':home_score,
641
+
642
+ },strict=False
643
+ )
644
+
645
+ return df
646
+
647
+ def get_teams(self):
648
+ """
649
+ Retrieves information about MLB teams from the MLB API and processes it into a Polars DataFrame.
650
+
651
+ Returns:
652
+ - mlb_teams_df (pl.DataFrame): A DataFrame containing team information, including team ID, city, name, franchise, abbreviation, parent organization ID, parent organization name, league ID, and league name.
653
+ """
654
+ # Make API call to retrieve team information
655
+ teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json()
656
+
657
+ # Extract relevant data from the API response
658
+ mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']]
659
+ mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']]
660
+ mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']]
661
+ mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']]
662
+ mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']]
663
+ mlb_teams_parent_id = [x['parentOrgId'] if 'parentOrgId' in x else None for x in teams['teams']]
664
+ mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']]
665
+ mlb_teams_league_id = [x['league']['id'] if 'id' in x['league'] else None for x in teams['teams']]
666
+ mlb_teams_league_name = [x['league']['name'] if 'name' in x['league'] else None for x in teams['teams']]
667
+
668
+ # Create a Polars DataFrame with the extracted data
669
+ mlb_teams_df = pl.DataFrame(data={'team_id': mlb_teams_id,
670
+ 'city': mlb_teams_franchise,
671
+ 'name': mlb_teams_name,
672
+ 'franchise': mlb_teams_franchise,
673
+ 'abbreviation': mlb_teams_abb,
674
+ 'parent_org_id': mlb_teams_parent_id,
675
+ 'parent_org': mlb_teams_parent,
676
+ 'league_id': mlb_teams_league_id,
677
+ 'league_name': mlb_teams_league_name
678
+ }).unique().drop_nulls(subset=['team_id']).sort('team_id')
679
+
680
+ # Fill missing parent organization IDs with team IDs
681
+ mlb_teams_df = mlb_teams_df.with_columns(
682
+ pl.when(pl.col('parent_org_id').is_null())
683
+ .then(pl.col('team_id'))
684
+ .otherwise(pl.col('parent_org_id'))
685
+ .alias('parent_org_id')
686
+ )
687
+
688
+ # Fill missing parent organization names with franchise names
689
+ mlb_teams_df = mlb_teams_df.with_columns(
690
+ pl.when(pl.col('parent_org').is_null())
691
+ .then(pl.col('franchise'))
692
+ .otherwise(pl.col('parent_org'))
693
+ .alias('parent_org')
694
+ )
695
+
696
+ # Create a dictionary for mapping team IDs to abbreviations
697
+ abbreviation_dict = mlb_teams_df.select(['team_id', 'abbreviation']).to_dict(as_series=False)
698
+ abbreviation_map = {k: v for k, v in zip(abbreviation_dict['team_id'], abbreviation_dict['abbreviation'])}
699
+
700
+ # Create a DataFrame for parent organization abbreviations
701
+ abbreviation_df = mlb_teams_df.select(['team_id', 'abbreviation']).rename({'team_id': 'parent_org_id', 'abbreviation': 'parent_org_abbreviation'})
702
+
703
+ # Join the parent organization abbreviations with the main DataFrame
704
+ mlb_teams_df = mlb_teams_df.join(abbreviation_df, on='parent_org_id', how='left')
705
+
706
+ return mlb_teams_df
707
+
708
+ def get_leagues(self):
709
+ """
710
+ Retrieves information about MLB leagues from the MLB API and processes it into a Polars DataFrame.
711
+
712
+ Returns:
713
+ - leagues_df (pl.DataFrame): A DataFrame containing league information, including league ID, league name, league abbreviation, and sport ID.
714
+ """
715
+ # Make API call to retrieve league information
716
+ leagues = requests.get(url='https://statsapi.mlb.com/api/v1/leagues/').json()
717
+
718
+ # Extract relevant data from the API response
719
+ sport_id = [x['sport']['id'] if 'sport' in x else None for x in leagues['leagues']]
720
+ league_id = [x['id'] if 'id' in x else None for x in leagues['leagues']]
721
+ league_name = [x['name'] if 'name' in x else None for x in leagues['leagues']]
722
+ league_abbreviation = [x['abbreviation'] if 'abbreviation' in x else None for x in leagues['leagues']]
723
+
724
+ # Create a Polars DataFrame with the extracted data
725
+ leagues_df = pl.DataFrame(data={
726
+ 'league_id': league_id,
727
+ 'league_name': league_name,
728
+ 'league_abbreviation': league_abbreviation,
729
+ 'sport_id': sport_id,
730
+ })
731
+
732
+ return leagues_df
733
+
734
+ def get_player_games_list(self, player_id: int,
735
+ season: int,
736
+ start_date: str = None,
737
+ end_date: str = None,
738
+ sport_id: int = 1,
739
+ game_type: list = ['R'],
740
+ pitching: bool = True):
741
+ """
742
+ Retrieves a list of game IDs for a specific player in a given season.
743
+
744
+ Parameters:
745
+ - player_id (int): The ID of the player.
746
+ - season (int): The season year for which to retrieve the game list.
747
+ - start_date (str): The start date (YYYY-MM-DD) of the range (default is January 1st of the specified season).
748
+ - end_date (str): The end date (YYYY-MM-DD) of the range (default is December 31st of the specified season).
749
+ - sport_id (int): The ID of the sport for which to retrieve player data.
750
+ - game_type (list): A list of game types to filter the schedule. Default is ['R'].
751
+ - pitching (bool): Return pitching games.
752
+
753
+ Returns:
754
+ - player_game_list (list): A list of game IDs in which the player participated during the specified season.
755
+ """
756
+ # Set default start and end dates if not provided
757
+ if not start_date:
758
+ start_date = f'{season}-01-01'
759
+ if not end_date:
760
+ end_date = f'{season}-12-31'
761
+
762
+ # Determine the group based on the pitching flag
763
+ group = 'pitching' if pitching else 'hitting'
764
+
765
+ # Validate date format
766
+ date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}$')
767
+ if not date_pattern.match(start_date):
768
+ raise ValueError(f"start_date {start_date} is not in YYYY-MM-DD format")
769
+ if not date_pattern.match(end_date):
770
+ raise ValueError(f"end_date {end_date} is not in YYYY-MM-DD format")
771
+
772
+ # Convert game type list to a comma-separated string
773
+ game_type_str = ','.join([str(x) for x in game_type])
774
+
775
+ # Make API call to retrieve player game logs
776
+ response = requests.get(url=f'http://statsapi.mlb.com/api/v1/people/{player_id}?hydrate=stats(group={group},type=gameLog,season={season},startDate={start_date},endDate={end_date},sportId={sport_id},gameType=[{game_type_str}]),hydrations').json()
777
+
778
+ # Check if stats are available in the response
779
+ if 'stats' not in response['people'][0]:
780
+ print(f'No {group} games found for player {player_id} in season {season}')
781
+ return []
782
+
783
+ # Extract game IDs from the API response
784
+ player_game_list = [x['game']['gamePk'] for x in response['people'][0]['stats'][0]['splits']]
785
+
786
+ return player_game_list
787
+
788
+ def get_players(self, sport_id: int, season: int, game_type: list = ['R']):
789
+ """
790
+ Retrieves data frame of players in a given league
791
+
792
+ Parameters:
793
+ - sport_id (int): The ID of the sport for which to retrieve player data.
794
+ - season (int): The season year for which to retrieve player data.
795
+ - game_type (list): A list of game types to filter the players. Default is ['R'].
796
+
797
+ Returns:
798
+ - player_df (pl.DataFrame): A DataFrame containing player information, including player ID, name, position, team, and age.
799
+ """
800
+ game_type_str = ','.join([str(x) for x in game_type])
801
+
802
+ # If game type is 'S', fetch data from a different endpoint
803
+ if game_type_str == 'S':
804
+ # Fetch pitcher data
805
+ pitcher_data = requests.get(f'https://bdfed.stitch.mlbinfra.com/bdfed/stats/player?&env=prod&season={season}&sportId=1&stats=season&group=pitching&gameType=S&limit=1000000&offset=0&sortStat=inningsPitched&order=asc').json()
806
+ fullName_list = [x['playerFullName'] for x in pitcher_data['stats']]
807
+ firstName_list = [x['playerFirstName'] for x in pitcher_data['stats']]
808
+ lastName_list = [x['playerLastName'] for x in pitcher_data['stats']]
809
+ id_list = [x['playerId'] for x in pitcher_data['stats']]
810
+ position_list = [x['primaryPositionAbbrev'] for x in pitcher_data['stats']]
811
+ team_list = [x['teamId'] for x in pitcher_data['stats']]
812
+
813
+ df_pitcher = pl.DataFrame(data={
814
+ 'player_id': id_list,
815
+ 'first_name': firstName_list,
816
+ 'last_name': lastName_list,
817
+ 'name': fullName_list,
818
+ 'position': position_list,
819
+ 'team': team_list
820
+ })
821
+
822
+ # Fetch batter data
823
+ batter_data = requests.get(f'https://bdfed.stitch.mlbinfra.com/bdfed/stats/player?&env=prod&season={season}&sportId=1&stats=season&group=hitting&gameType=S&limit=1000000&offset=0').json()
824
+ fullName_list = [x['playerFullName'] for x in batter_data['stats']]
825
+ firstName_list = [x['playerFirstName'] for x in batter_data['stats']]
826
+ lastName_list = [x['playerLastName'] for x in batter_data['stats']]
827
+ id_list = [x['playerId'] for x in batter_data['stats']]
828
+ position_list = [x['primaryPositionAbbrev'] for x in batter_data['stats']]
829
+ team_list = [x['teamId'] for x in batter_data['stats']]
830
+
831
+ df_batter = pl.DataFrame(data={
832
+ 'player_id': id_list,
833
+ 'first_name': firstName_list,
834
+ 'last_name': lastName_list,
835
+ 'name': fullName_list,
836
+ 'position': position_list,
837
+ 'team': team_list
838
+ })
839
+
840
+ # Combine pitcher and batter data
841
+ df = pl.concat([df_pitcher, df_batter]).unique().drop_nulls(subset=['player_id']).sort('player_id')
842
+
843
+ else:
844
+ # Fetch player data for other game types
845
+ player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players?season={season}&gameType=[{game_type_str}]').json()['people']
846
+
847
+ # Extract relevant data
848
+ fullName_list = [x['fullName'] for x in player_data]
849
+ firstName_list = [x['firstName'] for x in player_data]
850
+ lastName_list = [x['lastName'] for x in player_data]
851
+ id_list = [x['id'] for x in player_data]
852
+ position_list = [x['primaryPosition']['abbreviation'] if 'primaryPosition' in x else None for x in player_data]
853
+ team_list = [x['currentTeam']['id'] if 'currentTeam' in x else None for x in player_data]
854
+ weight_list = [x['weight'] if 'weight' in x else None for x in player_data]
855
+ height_list = [x['height'] if 'height' in x else None for x in player_data]
856
+ age_list = [x['currentAge'] if 'currentAge' in x else None for x in player_data]
857
+ birthDate_list = [x['birthDate'] if 'birthDate' in x else None for x in player_data]
858
+
859
+ df = pl.DataFrame(data={
860
+ 'player_id': id_list,
861
+ 'first_name': firstName_list,
862
+ 'last_name': lastName_list,
863
+ 'name': fullName_list,
864
+ 'position': position_list,
865
+ 'team': team_list,
866
+ 'weight': weight_list,
867
+ 'height': height_list,
868
+ 'age': age_list,
869
+ 'birthDate': birthDate_list
870
+ })
871
+
872
+ return df
app.py CHANGED
@@ -1,359 +1,472 @@
1
- import polars as pl
2
- import numpy as np
3
- import pandas as pd
4
- import api_scraper
5
- scrape = api_scraper.MLB_Scrape()
6
- from functions import df_update
7
- from functions import pitch_summary_functions
8
- update = df_update.df_update()
9
- from stuff_model import feature_engineering as fe
10
- from stuff_model import stuff_apply
11
- import requests
12
- import joblib
13
- from matplotlib.gridspec import GridSpec
14
- from shiny import App, reactive, ui, render
15
- from shiny.ui import h2, tags
16
- import matplotlib.pyplot as plt
17
- import matplotlib.gridspec as gridspec
18
- import seaborn as sns
19
- from functions.pitch_summary_functions import *
20
- from shiny import App, reactive, ui, render
21
- from shiny.ui import h2, tags
22
-
23
- colour_palette = ['#FFB000','#648FFF','#785EF0',
24
- '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
25
-
26
-
27
- year_list = [2017,2018,2019,2020,2021,2022,2023,2024]
28
-
29
-
30
-
31
- level_dict = {'1':'MLB',
32
- '11':'AAA',
33
- '12':'AA',
34
- '13':'A+',
35
- '14':'A',
36
- '17':'AFL',
37
- '22':'College',
38
- '21':'Prospects',
39
- '51':'International' }
40
-
41
- function_dict={
42
- 'velocity_kdes':'Velocity Distributions',
43
- 'break_plot':'Pitch Movement',
44
- 'tj_stuff_roling':'Rolling tjStuff+ by Pitch',
45
- 'tj_stuff_roling_game':'Rolling tjStuff+ by Game',
46
- 'location_plot_lhb':'Locations vs LHB',
47
- 'location_plot_rhb':'Locations vs RHB',
48
- }
49
-
50
-
51
- split_dict = {'all':'All',
52
- 'left':'LHH',
53
- 'right':'RHH'}
54
-
55
- split_dict_hand = {'all':['L','R'],
56
- 'left':['L'],
57
- 'right':['R']}
58
-
59
-
60
- type_dict = {'R':'Regular Season',
61
- 'S':'Spring',
62
- 'P':'Playoffs' }
63
-
64
-
65
-
66
- # List of MLB teams and their corresponding ESPN logo URLs
67
- mlb_teams = [
68
- {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
69
- {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
70
- {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
71
- {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
72
- {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
73
- {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
74
- {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
75
- {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
76
- {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
77
- {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
78
- {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
79
- {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
80
- {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
81
- {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
82
- {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
83
- {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
84
- {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
85
- {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
86
- {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
87
- {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
88
- {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
89
- {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
90
- {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
91
- {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
92
- {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
93
- {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
94
- {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
95
- {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
96
- {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
97
- {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
98
- {"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"}
99
- ]
100
-
101
-
102
- df_image = pd.DataFrame(mlb_teams)
103
- image_dict = df_image.set_index('team')['logo_url'].to_dict()
104
- image_dict_flip = df_image.set_index('logo_url')['team'].to_dict()
105
-
106
- from shiny import App, reactive, ui, render
107
- from shiny.ui import h2, tags
108
-
109
- # Define the UI layout for the app
110
- app_ui = ui.page_fluid(
111
- ui.layout_sidebar(
112
- ui.panel_sidebar(
113
- # Row for selecting season and level
114
- ui.row(
115
- ui.column(4, ui.input_select('year_input', 'Select Season', year_list, selected=2024)),
116
- ui.column(4, ui.input_select('level_input', 'Select Level', level_dict)),
117
- ui.column(4, ui.input_select('type_input', 'Select Type', type_dict,selected='R'))
118
- ),
119
- # Row for the action button to get player list
120
- ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
121
- # Row for selecting the player
122
- ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
123
- # Row for selecting the date range
124
- ui.row(ui.column(12, ui.output_ui('date_id', 'Select Date'))),
125
-
126
- # Rows for selecting plots and split options
127
- ui.row(
128
- ui.column(4, ui.input_select('plot_id_1', 'Plot Left', function_dict, multiple=False, selected='velocity_kdes')),
129
- ui.column(4, ui.input_select('plot_id_2', 'Plot Middle', function_dict, multiple=False, selected='tj_stuff_roling')),
130
- ui.column(4, ui.input_select('plot_id_3', 'Plot Right', function_dict, multiple=False, selected='break_plot'))
131
- ),
132
- ui.row(
133
- ui.column(6, ui.input_select('split_id', 'Select Split', split_dict, multiple=False)),
134
- ui.column(6, ui.input_numeric('rolling_window', 'Rolling Window (for tjStuff+ Plot)', min=1, value=50))
135
- ),
136
- ui.row(
137
- ui.column(6, ui.input_switch("switch", "Custom Team?", False)),
138
- ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False))
139
- ),
140
-
141
- # Row for the action button to generate plot
142
- ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")),
143
- ),
144
-
145
- ui.panel_main(
146
- ui.navset_tab(
147
- # Tab for game summary plot
148
- ui.nav("Pitching Summary",
149
- ui.output_text("status"),
150
- ui.output_plot('plot', width='2100px', height='2100px')
151
- ),
152
- )
153
- )
154
- )
155
- )
156
-
157
-
158
- def server(input, output, session):
159
-
160
- @reactive.calc
161
- @reactive.event(input.pitcher_id, input.date_id,input.split_id)
162
- def cached_data():
163
-
164
- year_input = int(input.year_input())
165
- sport_id = int(input.level_input())
166
- player_input = int(input.pitcher_id())
167
- start_date = str(input.date_id()[0])
168
- end_date = str(input.date_id()[1])
169
- # Simulate an expensive data operation
170
- game_list = scrape.get_player_games_list(sport_id = sport_id,
171
- season = year_input,
172
- player_id = player_input,
173
- start_date = start_date,
174
- end_date = end_date,
175
- game_type = [input.type_input()])
176
-
177
- data_list = scrape.get_data(game_list_input = game_list[:])
178
- df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
179
- (pl.col("pitcher_id") == player_input)&
180
- (pl.col("is_pitch") == True)&
181
- (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
182
-
183
- )))).with_columns(
184
- pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
185
- ))
186
- return df
187
-
188
- @render.ui
189
- @reactive.event(input.player_button, ignore_none=False)
190
- def player_select_ui():
191
- # Get the list of pitchers for the selected level and season
192
- df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input()), game_type = [input.type_input()]).filter(
193
- pl.col("position").is_in(['P','TWP'])).sort("name")
194
-
195
- # Create a dictionary of pitcher IDs and names
196
- pitcher_dict = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['name']))
197
-
198
- # Return a select input for choosing a pitcher
199
- return ui.input_select("pitcher_id", "Select Pitcher", pitcher_dict, selectize=True)
200
-
201
- @render.ui
202
- @reactive.event(input.player_button, ignore_none=False)
203
- def date_id():
204
- # Create a date range input for selecting the date range within the selected year
205
- return ui.input_date_range("date_id", "Select Date Range",
206
- start=f"{int(input.year_input())}-01-01",
207
- end=f"{int(input.year_input())}-12-31",
208
- min=f"{int(input.year_input())}-01-01",
209
- max=f"{int(input.year_input())}-12-31")
210
- @output
211
- @render.text
212
- def status():
213
- # Only show status when generating
214
- if input.generate == 0:
215
- return ""
216
- return ""
217
-
218
- @output
219
- @render.plot
220
- @reactive.event(input.generate_plot, ignore_none=False)
221
- def plot():
222
- # Show progress/loading notification
223
- with ui.Progress(min=0, max=1) as p:
224
- p.set(message="Generating plot", detail="This may take a while...")
225
-
226
-
227
- p.set(0.3, "Gathering data...")
228
- year_input = int(input.year_input())
229
- sport_id = int(input.level_input())
230
- player_input = int(input.pitcher_id())
231
- start_date = str(input.date_id()[0])
232
- end_date = str(input.date_id()[1])
233
-
234
- print(year_input, sport_id, player_input, start_date, end_date)
235
-
236
- df = cached_data()
237
- df = df.clone()
238
-
239
- p.set(0.6, "Creating plot...")
240
-
241
-
242
- #plt.rcParams["figure.figsize"] = [10,10]
243
- fig = plt.figure(figsize=(26,26))
244
- plt.rcParams.update({'figure.autolayout': True})
245
- fig.set_facecolor('white')
246
- sns.set_theme(style="whitegrid", palette=colour_palette)
247
- print('this is the one plot')
248
-
249
- gs = gridspec.GridSpec(6, 8,
250
- height_ratios=[5,20,12,36,36,7],
251
- width_ratios=[4,18,18,18,18,18,18,4])
252
-
253
-
254
- gs.update(hspace=0.2, wspace=0.5)
255
-
256
- # Define the positions of each subplot in the grid
257
- ax_headshot = fig.add_subplot(gs[1,1:3])
258
- ax_bio = fig.add_subplot(gs[1,3:5])
259
- ax_logo = fig.add_subplot(gs[1,5:7])
260
-
261
- ax_season_table = fig.add_subplot(gs[2,1:7])
262
-
263
- ax_plot_1 = fig.add_subplot(gs[3,1:3])
264
- ax_plot_2 = fig.add_subplot(gs[3,3:5])
265
- ax_plot_3 = fig.add_subplot(gs[3,5:7])
266
-
267
- ax_table = fig.add_subplot(gs[4,1:7])
268
-
269
- ax_footer = fig.add_subplot(gs[-1,1:7])
270
- ax_header = fig.add_subplot(gs[0,1:7])
271
- ax_left = fig.add_subplot(gs[:,0])
272
- ax_right = fig.add_subplot(gs[:,-1])
273
-
274
- # Hide axes for footer, header, left, and right
275
- ax_footer.axis('off')
276
- ax_header.axis('off')
277
- ax_left.axis('off')
278
- ax_right.axis('off')
279
-
280
- sns.set_theme(style="whitegrid", palette=colour_palette)
281
- fig.set_facecolor('white')
282
-
283
- df_teams = scrape.get_teams()
284
-
285
- player_headshot(player_input=player_input, ax=ax_headshot,sport_id=sport_id,season=year_input)
286
- player_bio(pitcher_id=player_input, ax=ax_bio,sport_id=sport_id,year_input=year_input)
287
-
288
- if input.switch():
289
-
290
- # Get the logo URL from the image dictionary using the team abbreviation
291
- logo_url = input.logo_select()
292
-
293
- # Send a GET request to the logo URL
294
- response = requests.get(logo_url)
295
-
296
- # Open the image from the response content
297
- img = Image.open(BytesIO(response.content))
298
-
299
- # Display the image on the axis
300
- ax_logo.set_xlim(0, 1.3)
301
- ax_logo.set_ylim(0, 1)
302
- ax_logo.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
303
-
304
- # Turn off the axis
305
- ax_logo.axis('off')
306
-
307
- else:
308
- plot_logo(pitcher_id=player_input, ax=ax_logo, df_team=df_teams,df_players=scrape.get_players(sport_id,year_input))
309
-
310
- stat_summary_table(df=df,
311
- ax=ax_season_table,
312
- player_input=player_input,
313
- split=input.split_id(),
314
- sport_id=sport_id,
315
- game_type=[input.type_input()])
316
-
317
- # break_plot(df=df_plot,ax=ax2)
318
- for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax_plot_1,ax_plot_2,ax_plot_3],[1,3,5]):
319
- if x == 'velocity_kdes':
320
- velocity_kdes(df,
321
- ax=y,
322
- gs=gs,
323
- gs_x=[3,4],
324
- gs_y=[z,z+2],
325
- fig=fig)
326
- if x == 'tj_stuff_roling':
327
- tj_stuff_roling(df=df,
328
- window=int(input.rolling_window()),
329
- ax=y)
330
-
331
- if x == 'tj_stuff_roling_game':
332
- tj_stuff_roling_game(df=df,
333
- window=int(input.rolling_window()),
334
- ax=y)
335
-
336
- if x == 'break_plot':
337
- break_plot(df = df,ax=y)
338
-
339
- if x == 'location_plot_lhb':
340
- location_plot(df = df,ax=y,hand='L')
341
-
342
- if x == 'location_plot_rhb':
343
- location_plot(df = df,ax=y,hand='R')
344
-
345
- summary_table(df=df,
346
- ax=ax_table)
347
-
348
- plot_footer(ax_footer)
349
-
350
- fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
351
-
352
-
353
-
354
-
355
- app = App(app_ui, server)
356
-
357
-
358
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  app = App(app_ui, server)
 
1
+ import polars as pl
2
+ import api_scraper
3
+ import pandas as pd
4
+ scrape = api_scraper.MLB_Scrape()
5
+
6
+ import df_update
7
+ update = df_update.df_update()
8
+ from matplotlib.colors import LinearSegmentedColormap, Normalize
9
+ import numpy as np
10
+ import requests
11
+ from io import BytesIO
12
+ from PIL import Image
13
+ from matplotlib.gridspec import GridSpec
14
+
15
+ import matplotlib.pyplot as plt
16
+ import matplotlib.patches as patches
17
+ import PIL
18
+
19
+ level_dict = {
20
+ '11':'AAA',
21
+ '14':'A',}
22
+
23
+
24
+
25
+ def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
26
+ """
27
+ Display the player's bio information on the given axis.
28
+ Parameters
29
+ ----------
30
+ pitcher_id : str
31
+ The player's ID.
32
+ ax : plt.Axes
33
+ The axis to display the bio information on.
34
+ sport_id : int
35
+ The sport ID (1 for MLB, other for minor leagues).
36
+ year_input : int
37
+ The season year.
38
+ """
39
+ # Construct the URL to fetch player data
40
+ url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
41
+
42
+ # Send a GET request to the URL and parse the JSON response
43
+ data = requests.get(url).json()
44
+
45
+ # Extract player information from the JSON data
46
+ player_name = data['people'][0]['fullName']
47
+ position = data['people'][0]['primaryPosition']['abbreviation']
48
+ pitcher_hand = data['people'][0]['pitchHand']['code']
49
+ age = data['people'][0]['currentAge']
50
+ height = data['people'][0]['height']
51
+ weight = data['people'][0]['weight']
52
+
53
+ # Display the player's name, handedness, age, height, and weight on the axis
54
+ ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=30)
55
+ ax.text(0.5, 0.65, f'{position}, Age:{age}, {height}/{weight}', va='top', ha='center', fontsize=20)
56
+ ax.text(0.5, 0.4, f'Season Batting Percentiles', va='top', ha='center', fontsize=16)
57
+
58
+ # Make API call to retrieve sports information
59
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
60
+
61
+ # Convert the JSON response into a Polars DataFrame
62
+ df_sport_id = pl.DataFrame(response['sports'])
63
+ abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
64
+
65
+ # Display the season and sport abbreviation
66
+ ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=14, fontstyle='italic')
67
+
68
+ # Turn off the axis
69
+ ax.axis('off')
70
+
71
+
72
+ df_teams = scrape.get_teams()
73
+ team_dict = dict(zip(df_teams['team_id'],df_teams['parent_org_abbreviation']))
74
+
75
+
76
+ # List of MLB teams and their corresponding ESPN logo URLs
77
+ mlb_teams = [
78
+ {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
79
+ {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
80
+ {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
81
+ {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
82
+ {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
83
+ {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
84
+ {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
85
+ {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
86
+ {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
87
+ {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
88
+ {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
89
+ {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
90
+ {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
91
+ {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
92
+ {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
93
+ {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
94
+ {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
95
+ {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
96
+ {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
97
+ {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
98
+ {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
99
+ {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
100
+ {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
101
+ {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
102
+ {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
103
+ {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
104
+ {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
105
+ {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
106
+ {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
107
+ {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
108
+ {"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"}
109
+ ]
110
+
111
+ df_image = pd.DataFrame(mlb_teams)
112
+ image_dict = df_image.set_index('team')['logo_url'].to_dict()
113
+ image_dict_flip = df_image.set_index('logo_url')['team'].to_dict()
114
+
115
+
116
+ merged_dict = {
117
+ "woba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "wOBA" },
118
+ "xwoba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "xwOBA" },
119
+ "launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Average EV"},
120
+ "launch_speed_90": { "format": '.1f', "percentile_flip": False, "stat_title": "90th% EV"},
121
+ "max_launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Max EV"},
122
+ "barrel_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Barrel%" },
123
+ "hard_hit_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Hard-Hit%" },
124
+ "sweet_spot_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "LA Sweet-Spot%" },
125
+ "zone_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Zone%" },
126
+ "zone_swing_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Z-Swing%" },
127
+ "chase_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "O-Swing%" },
128
+ "whiff_rate": { "format": '.1%', "percentile_flip": True, "stat_title": "Whiff%" },
129
+ "k_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "K%" },
130
+ "bb_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "BB%" },
131
+ "pull_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull%" },
132
+ "pulled_fly_ball_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull FB%" },
133
+ }
134
+
135
+
136
+ # level_dict = {'1':'MLB',
137
+ # '11':'AAA'}
138
+
139
+ level_dict = {
140
+ '11':'AAA',
141
+ '14':'A (FSL)',}
142
+
143
+
144
+ level_dict_file = {
145
+ '11':'aaa',
146
+ '14':'a',}
147
+
148
+
149
+
150
+ year_list = [2024]
151
+
152
+
153
+ from shiny import App, reactive, ui, render
154
+ from shiny.ui import h2, tags
155
+
156
+ # Define the UI layout for the app
157
+ app_ui = ui.page_fluid(
158
+
159
+
160
+ ui.tags.div(
161
+ {"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
162
+ ui.tags.style(
163
+ """
164
+ h4 {
165
+ margin-top: 1em;font-size:35px;
166
+ }
167
+ h2{
168
+ font-size:25px;
169
+ }
170
+ """
171
+ ),
172
+
173
+ ui.tags.h4("TJStats"),
174
+ ui.tags.i("Baseball Analytics and Visualizations"),
175
+ ui.markdown("""<a href='https://x.com/TJStats'>Follow me on Twitter</a><sup>1</sup>"""),
176
+ ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""),
177
+
178
+ ui.tags.h5("Statcast Batting Summaries"),
179
+ ui.layout_sidebar(
180
+ ui.panel_sidebar(
181
+ # Row for selecting season and level
182
+ ui.row(
183
+ ui.column(6, ui.input_select('year_input', 'Select Season', year_list, selected=2024)),
184
+ ui.column(6, ui.input_select('level_input', 'Select Level', level_dict)),
185
+ ),
186
+ # Row for the action button to get player list
187
+ ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
188
+ # Row for selecting the player
189
+ ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
190
+
191
+ ui.row(
192
+ ui.column(6, ui.input_switch("switch", "Custom Team?", False)),
193
+ ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False))
194
+ ),
195
+
196
+ # Row for the action button to generate plot
197
+ ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")),
198
+ width=3,
199
+ ),
200
+
201
+ ui.panel_main(
202
+ ui.navset_tab(
203
+ # Tab for game summary plot
204
+ ui.nav("Batter Summary",
205
+ ui.output_text("status"),
206
+ ui.output_plot('plot', width='1200px', height='1200px')
207
+ ),
208
+ )
209
+ )
210
+ )
211
+ )
212
+ )
213
+
214
+ def server(input, output, session):
215
+ @render.ui
216
+ @reactive.event(input.player_button, ignore_none=False)
217
+ def player_select_ui():
218
+ #Get the list of pitchers for the selected level and season
219
+ df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input())).filter(
220
+ ~pl.col("position").is_in(['P','TWP'])).sort("name")
221
+
222
+
223
+
224
+ # Create a dictionary of pitcher IDs and names
225
+ batter_dict_pos = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['position']))
226
+
227
+ year = int(input.year_input())
228
+ sport_id = int(input.level_input())
229
+ batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv').sort('batter_name',descending=False)
230
+ # Map elements in Polars DataFrame from a dictionary
231
+ batter_summary = batter_summary.with_columns(
232
+ pl.col("batter_id").map_elements(lambda x: batter_dict_pos.get(x, x)).alias("position")
233
+ )
234
+
235
+
236
+ batter_dict_pos = dict(zip(batter_summary['batter_id'], batter_summary['batter_name']))
237
+ # Create a dictionary of pitcher IDs and names
238
+ batter_dict = dict(zip(batter_summary['batter_id'], batter_summary['batter_name'] + ' - ' + batter_summary['position']))
239
+
240
+ # Return a select input for choosing a pitcher
241
+ return ui.input_select("batter_id", "Select Batter", batter_dict, selectize=True)
242
+
243
+
244
+
245
+
246
+ @output
247
+ @render.plot
248
+ @reactive.event(input.generate_plot, ignore_none=False)
249
+ def plot():
250
+ # Show progress/loading notification
251
+ with ui.Progress(min=0, max=1) as p:
252
+
253
+ def draw_baseball_savant_percentiles(new_player_metrics, new_player_percentiles, colors=None,
254
+ sport_id=None,
255
+ year_input=None):
256
+ """
257
+ Draw Baseball Savant-style percentile bars with proper alignment and scaling.
258
+
259
+ :param new_player_metrics: DataFrame containing new player metrics.
260
+ :param new_player_percentiles: DataFrame containing new player percentiles.
261
+ :param colors: List of colors for bars (optional, red/blue default).
262
+ """
263
+ # Extract player information
264
+ batter_id = new_player_metrics['batter_id'][0]
265
+ player_name = batter_name_id[batter_id]
266
+ stats = [merged_dict[x]['stat_title'] for x in merged_dict.keys()]
267
+
268
+ # Calculate percentiles and values
269
+ percentiles = [int((1 - x) * 100) if merged_dict[stat]["percentile_flip"] else int(x * 100) for x, stat in zip(new_player_percentiles.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
270
+ percentiles = np.clip(percentiles, 1, 100)
271
+ values = [str(f'{x:{merged_dict[stat]["format"]}}').strip('%') for x, stat in zip(new_player_metrics.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
272
+
273
+ # Get team logo URL
274
+ logo_url = image_dict[team_dict[player_team_dict[batter_id]]]
275
+
276
+ # Create a custom colormap
277
+ color_list = ['#3661AD', '#B4CFD1', '#D82129']
278
+ cmap = LinearSegmentedColormap.from_list("custom_cmap", color_list)
279
+ norm = Normalize(vmin=0.1, vmax=0.9)
280
+ norm_percentiles = norm(percentiles / 100)
281
+ colors = [cmap(p) for p in norm_percentiles]
282
+
283
+ # Figure setup
284
+ num_stats = len(stats)
285
+ bar_height = 4.5
286
+ spacing = 1
287
+ fig_height = (bar_height + spacing) * num_stats
288
+ fig = plt.figure(figsize=(12, 12))
289
+ gs = GridSpec(6, 5, height_ratios=[0.1, 1.5, 0.9, 0.9, 7.6, 0.1], width_ratios=[0.2, 1.5, 7, 1.5, 0.2])
290
+
291
+ # Define subplots
292
+ ax_title = fig.add_subplot(gs[1, 2])
293
+ ax_table = fig.add_subplot(gs[2, :])
294
+ ax_fv_table = fig.add_subplot(gs[3, :])
295
+ ax = fig.add_subplot(gs[4, :])
296
+ ax_logo = fig.add_subplot(gs[1, 3])
297
+
298
+ ax.set_xlim(-1, 99)
299
+ ax.set_ylim(-1, 99)
300
+ ax.set_aspect("equal")
301
+ ax.axis("off")
302
+
303
+ # Draw each bar
304
+ for i, (stat, percentile, value, color) in enumerate(zip(stats, percentiles, values, colors)):
305
+ y = fig_height - (i + 1) * (bar_height + spacing)
306
+ ax.add_patch(patches.Rectangle((0, y + bar_height / 4), 100, bar_height / 2, color="#C7DCDC", lw=0))
307
+ ax.add_patch(patches.Rectangle((0, y), percentile, bar_height, color=color, lw=0))
308
+ circle_y = y + bar_height - bar_height / 2
309
+ circle = plt.Circle((percentile, circle_y), bar_height / 2, color=color, ec='white', lw=1.5, zorder=10)
310
+ ax.add_patch(circle)
311
+ fs = 14
312
+ ax.text(percentile, circle_y, f"{percentile}", ha="center", va="center", fontsize=10, color='white', zorder=10, fontweight='bold')
313
+ ax.text(-5, y + bar_height / 2, stat, ha="right", va="center", fontsize=fs)
314
+ ax.text(115, y + bar_height / 2, str(value), ha="right", va="center", fontsize=fs, zorder=5)
315
+ if i < len(stats) and i > 0:
316
+ ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=-33, xmax=0)
317
+ ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=100, xmax=115)
318
+
319
+ # Draw vertical lines for 10%, 50%, and 90% with labels
320
+ for x, label, align, color in zip([10, 50, 90], ["Poor", "Average", "Great"], ['center', 'center', 'center'], color_list):
321
+ ax.axvline(x=x, ymin=0, ymax=1, color='#FFF', linestyle='-', lw=1, zorder=1, alpha=0.5)
322
+ ax.text(x, fig_height + 4, label, ha=align, va='center', fontsize=12, fontweight='bold', color=color)
323
+ triangle = patches.RegularPolygon((x, fig_height + 1), 3, radius=1, orientation=0, color=color, zorder=2)
324
+ ax.add_patch(triangle)
325
+
326
+ # # Title
327
+ # ax_title.set_ylim(0, 1)
328
+ # ax_title.text(0.5, 0.5, f"{player_name} - {player_position_dict[batter_id]}\nPercentile Rankings - 2024 AAA", ha="center", va="center", fontsize=24)
329
+ # ax_title.axis("off")
330
+ player_bio(batter_id, ax=ax_title, sport_id=sport_id, year_input=year_input)
331
+
332
+ # Add team logo
333
+ #response = requests.get(logo_url)
334
+ if input.switch():
335
+ response = requests.get(input.logo_select())
336
+ else:
337
+ response = requests.get(logo_url)
338
+ img = Image.open(BytesIO(response.content))
339
+ ax_logo.imshow(img)
340
+ ax_logo.axis("off")
341
+ ax.axis('equal')
342
+
343
+ # Metrics data table
344
+ metrics_data = {
345
+ "Pitches": new_player_metrics['pitches'][0],
346
+ "PA": new_player_metrics['pa'][0],
347
+ "BIP": new_player_metrics['bip'][0],
348
+ "HR": f"{new_player_metrics['home_run'][0]:.0f}",
349
+ "AVG": f"{new_player_metrics['avg'][0]:.3f}",
350
+ "OBP": f"{new_player_metrics['obp'][0]:.3f}",
351
+ "SLG": f"{new_player_metrics['slg'][0]:.3f}",
352
+ "OPS": f"{new_player_metrics['obp'][0] + new_player_metrics['slg'][0]:.3f}",
353
+ }
354
+ df_table = pd.DataFrame(metrics_data, index=[0])
355
+ ax_table.axis('off')
356
+ table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
357
+ for key, cell in table.get_celld().items():
358
+ if key[0] == 0:
359
+ cell.set_text_props(fontweight='bold')
360
+ table.auto_set_font_size(False)
361
+ table.set_fontsize(12)
362
+ table.scale(1, 1.5)
363
+
364
+ # Additional subplots for spacing
365
+ ax_top = fig.add_subplot(gs[0, :])
366
+ ax_bot = fig.add_subplot(gs[-1, :])
367
+ ax_top.axis('off')
368
+ ax_bot.axis('off')
369
+ ax_bot.text(0.05, 2, "By: Thomas Nestico (@TJStats)", ha="left", va="center", fontsize=14)
370
+ ax_bot.text(0.95, 2, "Data: MLB, Fangraphs", ha="right", va="center", fontsize=14)
371
+ fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
372
+
373
+ # Player headshot
374
+ ax_headshot = fig.add_subplot(gs[1, 1])
375
+ try:
376
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{batter_id}/headshot/milb/current.png'
377
+ response = requests.get(url)
378
+ img = Image.open(BytesIO(response.content))
379
+ ax_headshot.set_xlim(0, 1)
380
+ ax_headshot.set_ylim(0, 1)
381
+ ax_headshot.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
382
+ except PIL.UnidentifiedImageError:
383
+ ax_headshot.axis('off')
384
+ return
385
+ ax_headshot.axis('off')
386
+ ax_table.set_title('Season Summary', style='italic')
387
+
388
+ # Fangraphs scouting grades table
389
+ print(batter_id)
390
+ ax_fv_table.axis('off')
391
+ if batter_id not in dict_mlb_fg.keys():
392
+ ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
393
+ return
394
+ df_fv_table = df_prospects[(df_prospects['minorMasterId'] == dict_mlb_fg[batter_id])][['cFV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']].reset_index(drop=True)
395
+ ax_fv_table.axis('off')
396
+ if df_fv_table.empty:
397
+ ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
398
+ return
399
+ df_fv_table.columns = ['FV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']
400
+ table_fv = ax_fv_table.table(cellText=df_fv_table.values, colLabels=df_fv_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
401
+ for key, cell in table_fv.get_celld().items():
402
+ if key[0] == 0:
403
+ cell.set_text_props(fontweight='bold')
404
+ table_fv.auto_set_font_size(False)
405
+ table_fv.set_fontsize(12)
406
+ table_fv.scale(1, 1.5)
407
+ ax_fv_table.set_title('Fangraphs Scouting Grades', style='italic')
408
+
409
+
410
+
411
+ #plt.show()
412
+
413
+
414
+ def calculate_new_player_percentiles(player_id, new_player_metrics, player_summary_filtered):
415
+ """
416
+ Calculate percentiles for a new player's metrics.
417
+
418
+ :param player_id: ID of the player.
419
+ :param new_player_metrics: DataFrame containing new player metrics.
420
+ :param player_summary_filtered: Filtered player summary DataFrame.
421
+ :return: DataFrame containing new player percentiles.
422
+ """
423
+ filtered_summary_clone = player_summary_filtered[['batter_id'] + stat_list].filter(pl.col('batter_id') != player_id).clone()
424
+ combined_data = pl.concat([filtered_summary_clone, new_player_metrics], how="vertical").to_pandas()
425
+ combined_percentiles = pl.DataFrame(pd.concat([combined_data['batter_id'], combined_data[stat_list].rank(pct=True)], axis=1))
426
+ new_player_percentiles = combined_percentiles.filter(pl.col('batter_id') == player_id)
427
+ return new_player_percentiles
428
+
429
+
430
+
431
+ p.set(message="Generating plot", detail="This may take a while...")
432
+
433
+
434
+ p.set(0.3, "Gathering data...")
435
+
436
+ # Example: New player's metrics
437
+ year = int(input.year_input())
438
+ sport_id = int(input.level_input())
439
+ batter_id = int(input.batter_id())
440
+
441
+
442
+ df_player = scrape.get_players(sport_id=sport_id,season=year)
443
+ batter_name_id = dict(zip(df_player['player_id'],df_player['name']))
444
+ player_team_dict = dict(zip(df_player['player_id'],df_player['team']))
445
+ player_position_dict = dict(zip(df_player['player_id'],df_player['position']))
446
+
447
+
448
+ batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv')
449
+ df_prospects = pd.read_csv(f'data/prospects/prospects_{year}.csv')
450
+ df_rosters = pd.read_csv(f'data/rosters/fangraphs_rosters_{year}.csv')
451
+ df_small = df_rosters[['minorbamid','minormasterid']].dropna()
452
+ dict_mlb_fg=dict(zip(df_small['minorbamid'].astype(int),df_small['minormasterid']))
453
+
454
+
455
+
456
+
457
+ batter_summary_filter = batter_summary.filter((pl.col('pa') >= 300) & (pl.col('launch_speed') >= 0))
458
+ stat_list = batter_summary.columns[2:]
459
+ batter_summary_filter_pd = batter_summary_filter.to_pandas()
460
+ new_player_metrics = batter_summary.filter(pl.col('batter_id') == batter_id)[['batter_id'] + stat_list]
461
+
462
+ # Get percentiles for the new player
463
+ new_player_percentiles = calculate_new_player_percentiles(batter_id, new_player_metrics, batter_summary_filter)
464
+
465
+ p.set(0.6, "Creating plot...")
466
+ # Draw Baseball Savant-style percentile bars
467
+ draw_baseball_savant_percentiles(new_player_metrics=new_player_metrics,
468
+ new_player_percentiles=new_player_percentiles,
469
+ sport_id=sport_id,
470
+ year_input=year)
471
+
472
  app = App(app_ui, server)