|
|
|
|
|
import pandas as pd |
|
import assets.text_content as tc |
|
|
|
def filter_cols(df): |
|
|
|
df = df[[ |
|
tc.MODEL_NAME, |
|
tc.CLEMSCORE, |
|
tc.INPUT, |
|
tc.OUTPUT, |
|
tc.LATENCY, |
|
tc.CONTEXT, |
|
tc.PARAMS, |
|
tc.RELEASE_DATE, |
|
tc.LICENSE |
|
]] |
|
|
|
return df |
|
|
|
|
|
def filter(df, language_list, parameters, input_price, output_price, multimodal, |
|
context, open_weight, start, end, license ): |
|
|
|
|
|
if not df.empty: |
|
df = df[df[tc.LANGS].apply(lambda x: all(lang in x for lang in language_list))] |
|
|
|
if not df.empty: |
|
|
|
open_weight_true = df[df[tc.OPEN_WEIGHT] == True] |
|
open_weight_false = df[df[tc.OPEN_WEIGHT] == False] |
|
|
|
|
|
max_parameter_size = open_weight_true[tc.PARAMS].max() if not open_weight_true.empty else 0 |
|
|
|
|
|
if not open_weight_true.empty: |
|
if parameters[1] >= max_parameter_size: |
|
filtered_open = open_weight_true[ |
|
(open_weight_true[tc.PARAMS] >= parameters[0]) |
|
] |
|
else: |
|
filtered_open = open_weight_true[ |
|
(open_weight_true[tc.PARAMS] >= parameters[0]) & |
|
(open_weight_true[tc.PARAMS] <= parameters[1]) |
|
] |
|
|
|
|
|
df = pd.concat([filtered_open, open_weight_false]) |
|
|
|
if not df.empty: |
|
df = df[(df[tc.INPUT] >= input_price[0]) & (df[tc.INPUT] <= input_price[1])] |
|
|
|
if not df.empty: |
|
df = df[(df[tc.OUTPUT] >= output_price[0]) & (df[tc.OUTPUT] <= output_price[1])] |
|
|
|
if not df.empty: |
|
if tc.SINGLE_IMG in multimodal: |
|
df = df[df[tc.SINGLE_IMG] == True] |
|
if tc.MULT_IMG in multimodal: |
|
df = df[df[tc.MULT_IMG] == True] |
|
if tc.AUDIO in multimodal: |
|
df = df[df[tc.AUDIO] == True] |
|
if tc.VIDEO in multimodal: |
|
df = df[df[tc.VIDEO] == True] |
|
|
|
|
|
|
|
|
|
if not df.empty: |
|
if tc.OPEN in open_weight and tc.COMM not in open_weight: |
|
df = df[df[tc.OPEN_WEIGHT] == True] |
|
elif tc.COMM in open_weight and tc.OPEN not in open_weight: |
|
df = df[df[tc.OPEN_WEIGHT] == False] |
|
elif tc.OPEN not in open_weight and tc.COMM not in open_weight: |
|
|
|
df = pd.DataFrame(columns=df.columns) |
|
|
|
if not df.empty: |
|
df = df[df[tc.LICENSE_NAME].apply(lambda x: any(lic in x for lic in license))] |
|
|
|
|
|
if not df.empty: |
|
df[tc.TEMP_DATE] = pd.to_datetime(df[tc.TEMP_DATE]).astype(int) // 10**9 |
|
|
|
|
|
start = int(pd.to_datetime(start).timestamp()) |
|
end = int(pd.to_datetime(end).timestamp()) |
|
|
|
|
|
if not df.empty: |
|
df = df[(df[tc.TEMP_DATE] >= start) & (df[tc.TEMP_DATE] <= end)] |
|
|
|
df = filter_cols(df) |
|
df = df.sort_values(by=tc.CLEMSCORE, ascending=False) |
|
|
|
return df |
|
|
|
|