carbonnnnn commited on
Commit
890203a
Β·
1 Parent(s): df99250

wworking date dropdown

Browse files
Files changed (5) hide show
  1. app.py +76 -107
  2. assets/merged_data.csv +1 -1
  3. assets/text_content.py +19 -2
  4. src/filter_utils.py +63 -60
  5. test.py +0 -10
app.py CHANGED
@@ -18,8 +18,11 @@ text_leaderboard[tc.LATENCY] = text_leaderboard[tc.LATENCY].round(1)
18
  text_leaderboard[tc.CLEMSCORE] = text_leaderboard[tc.CLEMSCORE].round(1)
19
 
20
  open_weight_df = text_leaderboard[text_leaderboard[tc.OPEN_WEIGHT] == True]
 
21
  if not open_weight_df.empty: # Check if filtered df is non-empty
22
- max_parameter_size = open_weight_df[tc.PARAMS].max()
 
 
23
 
24
  # Short leaderboard containing fixed columns
25
  short_leaderboard = filter_cols(text_leaderboard)
@@ -92,7 +95,7 @@ with llm_calc_app:
92
  # First Column
93
  ####################################
94
  ## Language Select
95
- with gr.Column():
96
 
97
  with gr.Row():
98
  lang_dropdown = gr.Dropdown(
@@ -102,47 +105,50 @@ with llm_calc_app:
102
  label="Languages πŸ—£οΈ"
103
  )
104
 
 
 
 
105
  with gr.Row():
106
- with gr.Column():
107
- start_year = gr.Dropdown(
108
- choices=YEARS,
109
- value=[YEARS[0]],
110
- allow_custom_value=True
111
- )
112
- start_month = gr.Dropdown(
113
- choices=MONTHS,
114
- value=[MONTHS[0]],
115
- allow_custom_value=True
116
- )
117
-
118
- with gr.Column():
119
- end_year = gr.Dropdown(
120
- choices=YEARS,
121
- value=[YEARS[-1]],
122
- allow_custom_value=True
123
- )
124
- end_month = gr.Dropdown(
125
- choices=MONTHS,
126
- value=[MONTHS[-1]],
127
- allow_custom_value=True
128
- )
129
-
130
-
131
- # Multiodality Select
132
- with gr.Row():
133
- multimodal_checkbox = gr.CheckboxGroup(
134
- choices=[tc.SINGLE_IMG, tc.MULT_IMG, tc.AUDIO, tc.VIDEO],
135
  value=[],
136
- label="Additional Modalities πŸ“·πŸŽ§πŸŽ¬",
 
 
 
 
 
137
  )
138
 
139
- # Open/Commercial Selection
 
 
 
 
 
 
 
 
 
 
 
140
  with gr.Row():
141
- open_weight_checkbox = gr.CheckboxGroup(
142
- choices=[tc.OPEN, tc.COMM],
143
- value=[tc.OPEN, tc.COMM],
144
- label="Model Type πŸ”“ πŸ’Ό",
145
- )
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # License selection
148
  with gr.Row():
@@ -155,9 +161,9 @@ with llm_calc_app:
155
  #############################################################
156
  # Second Column
157
  #############################################################
158
- with gr.Column():
159
 
160
- ####### LOG SLIDER 1 ###########
161
  with gr.Row():
162
  parameter_slider = RangeSlider(
163
  minimum=0,
@@ -168,7 +174,7 @@ with llm_calc_app:
168
  )
169
 
170
 
171
- ########### LOG SLIDER 2 ################
172
 
173
  with gr.Row():
174
  context_slider = RangeSlider(
@@ -179,25 +185,23 @@ with llm_calc_app:
179
  step=context_step
180
  )
181
 
182
- ############# PRICE SLIDER 1 ###############
183
  with gr.Row():
184
- input_pricing_slider = RangeSlider(
185
- minimum=0,
186
- maximum=max_input_price,
187
- value=(0, max_input_price),
188
- label="πŸ’²/1M input tokens",
189
- elem_id="double-slider-3"
190
  )
 
191
 
192
- ############### PRICE SLIDER 2 ###############
193
  with gr.Row():
194
- output_pricing_slider = RangeSlider(
195
- minimum=0,
196
- maximum=max_output_price,
197
- value=(0, max_output_price),
198
- label="πŸ’²/1M output tokens",
199
- elem_id="double-slider-4"
200
- )
201
 
202
 
203
  with gr.Row():
@@ -225,7 +229,7 @@ with llm_calc_app:
225
  filter,
226
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
227
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
228
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
229
  [leaderboard_table],
230
  queue=True
231
  )
@@ -234,7 +238,7 @@ with llm_calc_app:
234
  filter,
235
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
236
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
237
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
238
  [leaderboard_table],
239
  queue=True
240
  )
@@ -243,7 +247,7 @@ with llm_calc_app:
243
  filter,
244
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
245
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
246
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
247
  [leaderboard_table],
248
  queue=True
249
  )
@@ -252,7 +256,7 @@ with llm_calc_app:
252
  filter,
253
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
254
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
255
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
256
  [leaderboard_table],
257
  queue=True
258
  )
@@ -261,7 +265,7 @@ with llm_calc_app:
261
  filter,
262
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
263
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
264
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
265
  [leaderboard_table],
266
  queue=True
267
  )
@@ -270,7 +274,7 @@ with llm_calc_app:
270
  filter,
271
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
272
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
273
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
274
  [leaderboard_table],
275
  queue=True
276
  )
@@ -279,43 +283,43 @@ with llm_calc_app:
279
  filter,
280
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
281
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
282
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
283
  [leaderboard_table],
284
  queue=True
285
  )
286
 
287
- start_year.change(
288
  filter,
289
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
290
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
291
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
292
  [leaderboard_table],
293
  queue=True
294
  )
295
 
296
- start_month.change(
297
  filter,
298
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
299
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
300
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
301
  [leaderboard_table],
302
  queue=True
303
  )
304
 
305
- end_year.change(
306
  filter,
307
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
308
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
309
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
310
  [leaderboard_table],
311
  queue=True
312
  )
313
 
314
- end_month.change(
315
  filter,
316
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
317
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
318
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
319
  [leaderboard_table],
320
  queue=True
321
  )
@@ -324,7 +328,7 @@ with llm_calc_app:
324
  filter,
325
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
326
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
327
- context_slider, open_weight_checkbox, start_year, start_month, end_year, end_month, license_checkbox],
328
  [leaderboard_table],
329
  queue=True
330
  )
@@ -332,38 +336,3 @@ with llm_calc_app:
332
  llm_calc_app.load()
333
  llm_calc_app.queue()
334
  llm_calc_app.launch()
335
-
336
-
337
-
338
- """
339
- model_name, input_price, output_price,
340
- multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,
341
- source,licence_name,licence_url,languages,release_date,
342
- parameters_estimated,parameters_actual,
343
-
344
- open_weight,context,
345
-
346
- additional_prices_context_caching,
347
- additional_prices_context_storage,
348
- additional_prices_image_input,additional_prices_image_output,additional_prices_video_input,additional_prices_video_output,additional_prices_audio_input,additional_prices_audio_output,clemscore_v1.6.5_multimodal,clemscore_v1.6.5_ascii,clemscore_v1.6,latency_v1.6,latency_v1.6.5_multimodal,latency_v1.6.5_ascii,
349
-
350
- average_clemscore,average_latency,parameters
351
-
352
- Final list
353
-
354
- model_name, input_price, output_price,
355
- multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,
356
- source,licence_name,licence_url,languages,release_date, open_weight,context, average_clemscore,average_latency,parameters
357
-
358
-
359
- Filter
360
- multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,
361
- licence_name+licence_url, languages, release_date, open_weight
362
-
363
- RR
364
- model_name, input_price, output_price,
365
- source, release_date
366
-
367
- """
368
-
369
-
 
18
  text_leaderboard[tc.CLEMSCORE] = text_leaderboard[tc.CLEMSCORE].round(1)
19
 
20
  open_weight_df = text_leaderboard[text_leaderboard[tc.OPEN_WEIGHT] == True]
21
+ print(open_weight_df[tc.PARAMS])
22
  if not open_weight_df.empty: # Check if filtered df is non-empty
23
+ # Get max parameter size, ignoring NaN values
24
+ params = open_weight_df[tc.PARAMS].dropna()
25
+ max_parameter_size = params.max() if not params.empty else 0
26
 
27
  # Short leaderboard containing fixed columns
28
  short_leaderboard = filter_cols(text_leaderboard)
 
95
  # First Column
96
  ####################################
97
  ## Language Select
98
+ with gr.Column(scale=2):
99
 
100
  with gr.Row():
101
  lang_dropdown = gr.Dropdown(
 
105
  label="Languages πŸ—£οΈ"
106
  )
107
 
108
+
109
+ ## Release Date range selection
110
+
111
  with gr.Row():
112
+ start_year_dropdown = gr.Dropdown(
113
+ choices = YEARS,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  value=[],
115
+ label="Model Release - Year πŸ—“οΈ"
116
+ )
117
+ start_month_dropdown = gr.Dropdown(
118
+ choices = MONTHS,
119
+ value=[],
120
+ label="Month πŸ“œ"
121
  )
122
 
123
+ end_year_dropdown = gr.Dropdown(
124
+ choices = YEARS,
125
+ value=[],
126
+ label="End - Year πŸ—“οΈ"
127
+ )
128
+ end_month_dropdown = gr.Dropdown(
129
+ choices = MONTHS,
130
+ value=[],
131
+ label="Month πŸ“œ"
132
+ )
133
+
134
+ ## Price selection
135
  with gr.Row():
136
+
137
+ input_pricing_slider = RangeSlider(
138
+ minimum=0,
139
+ maximum=max_input_price,
140
+ value=(0, max_input_price),
141
+ label="πŸ’²/1M input tokens",
142
+ elem_id="double-slider-3"
143
+ )
144
+
145
+ output_pricing_slider = RangeSlider(
146
+ minimum=0,
147
+ maximum=max_output_price,
148
+ value=(0, max_output_price),
149
+ label="πŸ’²/1M output tokens",
150
+ elem_id="double-slider-4"
151
+ )
152
 
153
  # License selection
154
  with gr.Row():
 
161
  #############################################################
162
  # Second Column
163
  #############################################################
164
+ with gr.Column(scale=1):
165
 
166
+ ####### parameters ###########
167
  with gr.Row():
168
  parameter_slider = RangeSlider(
169
  minimum=0,
 
174
  )
175
 
176
 
177
+ ########### Context range ################
178
 
179
  with gr.Row():
180
  context_slider = RangeSlider(
 
185
  step=context_step
186
  )
187
 
188
+ ############# Modality selection checkbox ###############
189
  with gr.Row():
190
+ multimodal_checkbox = gr.CheckboxGroup(
191
+ choices=[tc.SINGLE_IMG, tc.MULT_IMG, tc.AUDIO, tc.VIDEO],
192
+ value=[],
193
+ label="Additional Modalities πŸ“·πŸŽ§πŸŽ¬",
 
 
194
  )
195
+
196
 
197
+ # ############### Model Type Checkbox ###############
198
  with gr.Row():
199
+ open_weight_checkbox = gr.CheckboxGroup(
200
+ choices=[tc.OPEN, tc.COMM],
201
+ value=[tc.OPEN, tc.COMM],
202
+ label="Model Type πŸ”“ πŸ’Ό",
203
+ )
204
+
 
205
 
206
 
207
  with gr.Row():
 
229
  filter,
230
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
231
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
232
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
233
  [leaderboard_table],
234
  queue=True
235
  )
 
238
  filter,
239
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
240
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
241
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
242
  [leaderboard_table],
243
  queue=True
244
  )
 
247
  filter,
248
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
249
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
250
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
251
  [leaderboard_table],
252
  queue=True
253
  )
 
256
  filter,
257
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
258
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
259
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
260
  [leaderboard_table],
261
  queue=True
262
  )
 
265
  filter,
266
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
267
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
268
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
269
  [leaderboard_table],
270
  queue=True
271
  )
 
274
  filter,
275
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
276
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
277
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
278
  [leaderboard_table],
279
  queue=True
280
  )
 
283
  filter,
284
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
285
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
286
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
287
  [leaderboard_table],
288
  queue=True
289
  )
290
 
291
+ start_year_dropdown.change(
292
  filter,
293
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
294
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
295
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
296
  [leaderboard_table],
297
  queue=True
298
  )
299
 
300
+ start_month_dropdown.change(
301
  filter,
302
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
303
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
304
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
305
  [leaderboard_table],
306
  queue=True
307
  )
308
 
309
+ end_year_dropdown.change(
310
  filter,
311
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
312
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
313
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
314
  [leaderboard_table],
315
  queue=True
316
  )
317
 
318
+ end_month_dropdown.change(
319
  filter,
320
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
321
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
322
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
323
  [leaderboard_table],
324
  queue=True
325
  )
 
328
  filter,
329
  [dummy_leaderboard_table, lang_dropdown, parameter_slider,
330
  input_pricing_slider, output_pricing_slider, multimodal_checkbox,
331
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
332
  [leaderboard_table],
333
  queue=True
334
  )
 
336
  llm_calc_app.load()
337
  llm_calc_app.queue()
338
  llm_calc_app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/merged_data.csv CHANGED
@@ -1,4 +1,4 @@
1
- Model Name,Latency (s),Clemscore,Parameters (B),Release Date,Open Weight,Languages,Context Size (k),License Name,License URL,Single Image,Multiple Images,Audio,Video,Input $/1M tokens,Output $/1M tokens,License,Temp Date
2
  o1-preview-2024-09-12,7.368572853601854,73.63,,2024-09-12,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,15.0,60.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-09-12
3
  gpt-4-1106-vision-preview,4.712557435752081,73.55,,2023-11-06,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,10.0,30.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-11-06
4
  claude-3-5-sonnet-20240620,2.0645066812060726,68.925,,2024-06-20,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,3.0,15.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-06-20
 
1
+ Model Name,Latency (s),Clemscore,Parameters (B),Release Date,Open Weight,Languages,Context Size (k),License Name,License URL,Single Image,Multi Image,Audio,Video,Input $/1M tokens,Output $/1M tokens,License,Temp Date
2
  o1-preview-2024-09-12,7.368572853601854,73.63,,2024-09-12,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,15.0,60.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-09-12
3
  gpt-4-1106-vision-preview,4.712557435752081,73.55,,2023-11-06,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,10.0,30.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-11-06
4
  claude-3-5-sonnet-20240620,2.0645066812060726,68.925,,2024-06-20,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,3.0,15.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-06-20
assets/text_content.py CHANGED
@@ -10,6 +10,9 @@ RESULT_FILE = "results.csv"
10
  LATENCY_SUFFIX = "_latency.csv"
11
 
12
  # Setup Column Names
 
 
 
13
  DEFAULT_MODEL_NAME = "Unnamed: 0"
14
  DEFAULT_CLEMSCORE = "-, clemscore"
15
 
@@ -24,7 +27,7 @@ CONTEXT = "Context Size (k)"
24
  LICENSE_NAME = "License Name"
25
  LICENSE_URL = "License URL"
26
  SINGLE_IMG = "Single Image"
27
- MULT_IMG = "Multiple Images"
28
  AUDIO = "Audio"
29
  VIDEO = "Video"
30
  INPUT = "Input $/1M tokens"
@@ -39,4 +42,18 @@ COMM = "Commercial"
39
  TITLE = """<h1 align="center" id="space-title"> LLM Calculator βš–οΈβš‘ πŸ“πŸ’°</h1>"""
40
 
41
  # Date Picker (set as Dropdown until datetime object is fixed)
42
- START_YEAR = "2020"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  LATENCY_SUFFIX = "_latency.csv"
11
 
12
  # Setup Column Names
13
+ # Note - Changing this does not affect the already generated csv `merged_data.csv`
14
+ # Run `src/process_data.py` for this
15
+
16
  DEFAULT_MODEL_NAME = "Unnamed: 0"
17
  DEFAULT_CLEMSCORE = "-, clemscore"
18
 
 
27
  LICENSE_NAME = "License Name"
28
  LICENSE_URL = "License URL"
29
  SINGLE_IMG = "Single Image"
30
+ MULT_IMG = "Multi Image"
31
  AUDIO = "Audio"
32
  VIDEO = "Video"
33
  INPUT = "Input $/1M tokens"
 
42
  TITLE = """<h1 align="center" id="space-title"> LLM Calculator βš–οΈβš‘ πŸ“πŸ’°</h1>"""
43
 
44
  # Date Picker (set as Dropdown until datetime object is fixed)
45
+ START_YEAR = "2020"
46
+ MONTH_MAP = {
47
+ "January": 1,
48
+ "February": 2,
49
+ "March": 3,
50
+ "April": 4,
51
+ "May": 5,
52
+ "June": 6,
53
+ "July": 7,
54
+ "August": 8,
55
+ "September": 9,
56
+ "October": 10,
57
+ "November": 11,
58
+ "December": 12
59
+ }
src/filter_utils.py CHANGED
@@ -2,6 +2,11 @@
2
 
3
  import pandas as pd
4
  import assets.text_content as tc
 
 
 
 
 
5
 
6
  def filter_cols(df):
7
 
@@ -19,6 +24,7 @@ def filter_cols(df):
19
 
20
  return df
21
 
 
22
  def convert_date_components_to_timestamp(year: str, month: str) -> int:
23
  """Convert year and month strings to timestamp."""
24
  # Create a datetime object for the first day of the month
@@ -26,70 +32,79 @@ def convert_date_components_to_timestamp(year: str, month: str) -> int:
26
  return int(pd.to_datetime(date_str).timestamp())
27
 
28
  def filter_by_date(df: pd.DataFrame,
29
- start_year: str,
30
- start_month: str,
31
- end_year: str,
32
- end_month: str,
33
- date_column: str) -> pd.DataFrame:
34
  """
35
  Filter DataFrame by date range using separate year and month components.
36
-
37
- Args:
38
- df: DataFrame to filter
39
- start_year: Starting year (e.g., "2023")
40
- start_month: Starting month (e.g., "1" for January)
41
- end_year: Ending year (e.g., "2024")
42
- end_month: Ending month (e.g., "12" for December)
43
- date_column: Name of the date column to filter on
44
  """
45
- # Convert string inputs to integers for date creation
46
- start_timestamp = convert_date_components_to_timestamp(
47
- int(start_year),
48
- int(start_month)
49
- )
50
-
51
- end_timestamp = convert_date_components_to_timestamp(
52
- int(end_year),
53
- int(end_month)
54
- )
55
-
56
- # Convert the DataFrame's date column to timestamps for comparison
57
- date_timestamps = pd.to_datetime(df[date_column]).apply(lambda x: int(x.timestamp()))
58
-
59
- # Filter the DataFrame
60
- return df[
61
- (date_timestamps >= start_timestamp) &
62
- (date_timestamps <= end_timestamp)
63
- ]
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  def filter(df, language_list, parameters, input_price, output_price, multimodal,
68
- context, open_weight, start_year, start_month, end_year, end_month, license ):
69
-
 
70
 
 
71
  if not df.empty: # Check if df is non-empty
72
  df = df[df[tc.LANGS].apply(lambda x: all(lang in x for lang in language_list))]
73
 
74
  if not df.empty:
75
- # Split dataframe by Open Weight
76
- open_weight_true = df[df[tc.OPEN_WEIGHT] == True]
77
- open_weight_false = df[df[tc.OPEN_WEIGHT] == False]
 
 
 
 
 
 
 
78
 
79
- # Get max parameter size for open weight models
80
- max_parameter_size = open_weight_true[tc.PARAMS].max() if not open_weight_true.empty else 0
 
81
 
82
  # Filter only the open weight models based on parameters
83
  if not open_weight_true.empty:
84
- if parameters[1] >= max_parameter_size:
85
- filtered_open = open_weight_true[
86
- (open_weight_true[tc.PARAMS] >= parameters[0])
87
- ]
88
- else:
89
- filtered_open = open_weight_true[
90
- (open_weight_true[tc.PARAMS] >= parameters[0]) &
91
- (open_weight_true[tc.PARAMS] <= parameters[1])
92
- ]
93
 
94
  # Combine filtered open weight models with unfiltered commercial models
95
  df = pd.concat([filtered_open, open_weight_false])
@@ -125,18 +140,6 @@ def filter(df, language_list, parameters, input_price, output_price, multimodal,
125
  if not df.empty: # Check if df is non-empty
126
  df = df[df[tc.LICENSE_NAME].apply(lambda x: any(lic in x for lic in license))]
127
 
128
- # # Convert 'Release Date' to int temporarily
129
- # if not df.empty: # Check if df is non-empty
130
- # df[tc.TEMP_DATE] = pd.to_datetime(df[tc.TEMP_DATE]).astype(int) // 10**9 # Convert to seconds since epoch
131
-
132
- # # Convert start and end to int (seconds since epoch)
133
- # start = int(pd.to_datetime(start).timestamp())
134
- # end = int(pd.to_datetime(end).timestamp())
135
-
136
- # # Filter based on the converted 'Release Date'
137
- # if not df.empty: # Check if df is non-empty
138
- # df = df[(df[tc.TEMP_DATE] >= start) & (df[tc.TEMP_DATE] <= end)]
139
-
140
  df = filter_by_date(df, start_year, start_month, end_year, end_month, tc.TEMP_DATE)
141
 
142
  df = filter_cols(df)
 
2
 
3
  import pandas as pd
4
  import assets.text_content as tc
5
+ import calendar
6
+ from typing import Union, List
7
+ from datetime import datetime
8
+
9
+ current_year = str(datetime.now().year)
10
 
11
  def filter_cols(df):
12
 
 
24
 
25
  return df
26
 
27
+
28
  def convert_date_components_to_timestamp(year: str, month: str) -> int:
29
  """Convert year and month strings to timestamp."""
30
  # Create a datetime object for the first day of the month
 
32
  return int(pd.to_datetime(date_str).timestamp())
33
 
34
  def filter_by_date(df: pd.DataFrame,
35
+ start_year, start_month,
36
+ end_year, end_month,
37
+ date_column: str = tc.RELEASE_DATE) -> pd.DataFrame:
 
 
38
  """
39
  Filter DataFrame by date range using separate year and month components.
 
 
 
 
 
 
 
 
40
  """
41
+ # All lists are passed at once, so set default values here instead of passing them in args- Overwritten by empty lists
42
+ if not start_year:
43
+ start_year = tc.START_YEAR
44
+ if not end_year:
45
+ end_year = current_year
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ if not start_month:
48
+ start_month = "January"
49
+ if not end_month:
50
+ end_month = "December"
51
+
52
+ try:
53
+ # Convert string inputs to integers for date creation
54
+ start_timestamp = convert_date_components_to_timestamp(
55
+ int(start_year),
56
+ int(tc.MONTH_MAP[start_month])
57
+ )
58
+
59
+ end_timestamp = convert_date_components_to_timestamp(
60
+ int(end_year),
61
+ int(tc.MONTH_MAP[end_month])
62
+ )
63
+
64
+ # Convert the DataFrame's date column to timestamps for comparison
65
+ date_timestamps = pd.to_datetime(df[date_column]).apply(lambda x: int(x.timestamp()))
66
+
67
+ # Filter the DataFrame
68
+ return df[
69
+ (date_timestamps >= start_timestamp) &
70
+ (date_timestamps <= end_timestamp)
71
+ ]
72
+ except (ValueError, TypeError) as e:
73
+ print(f"Error processing dates: {e}")
74
+ return df # Return unfiltered DataFrame if there's an error
75
 
76
 
77
  def filter(df, language_list, parameters, input_price, output_price, multimodal,
78
+ context, open_weight,
79
+ start_year, start_month, end_year, end_month,
80
+ license ):
81
 
82
+
83
  if not df.empty: # Check if df is non-empty
84
  df = df[df[tc.LANGS].apply(lambda x: all(lang in x for lang in language_list))]
85
 
86
  if not df.empty:
87
+ # Split dataframe by Open Weight, ensuring mutual exclusivity
88
+ open_weight_true = df[
89
+ (df[tc.OPEN_WEIGHT] == True) &
90
+ (~df[tc.PARAMS].isna())
91
+ ]
92
+ open_weight_false = df[
93
+ (df[tc.OPEN_WEIGHT] == False) |
94
+ (df[tc.PARAMS].isna()) |
95
+ (~df.index.isin(open_weight_true.index)) # Catch any remaining rows
96
+ ]
97
 
98
+ # Verify no overlap and no data loss
99
+ assert len(df) == len(open_weight_true) + len(open_weight_false), "Data loss detected"
100
+ assert len(set(open_weight_true.index) & set(open_weight_false.index)) == 0, "Duplicate entries detected"
101
 
102
  # Filter only the open weight models based on parameters
103
  if not open_weight_true.empty:
104
+ filtered_open = open_weight_true[
105
+ (open_weight_true[tc.PARAMS] >= parameters[0]) &
106
+ (open_weight_true[tc.PARAMS] <= parameters[1])
107
+ ]
 
 
 
 
 
108
 
109
  # Combine filtered open weight models with unfiltered commercial models
110
  df = pd.concat([filtered_open, open_weight_false])
 
140
  if not df.empty: # Check if df is non-empty
141
  df = df[df[tc.LICENSE_NAME].apply(lambda x: any(lic in x for lic in license))]
142
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  df = filter_by_date(df, start_year, start_month, end_year, end_month, tc.TEMP_DATE)
144
 
145
  df = filter_cols(df)
test.py DELETED
@@ -1,10 +0,0 @@
1
- import calendar
2
- import datetime
3
-
4
- today = datetime.date.today()
5
-
6
- year = today.year
7
-
8
- print(year)
9
-
10
- print(list(calendar.month_name[1:]))