Spaces:
Running
Running
Commit
·
b3eff40
1
Parent(s):
09b313f
[ADD] Submit form, upload requests to requests dataset
Browse files- .gitignore +2 -0
- app.py +30 -37
- medic-harness-results/meta-llama/Llama-3.1-8B-Instruct/results_2024-07-24T15:26:36Z.json +1 -1
- src/display/utils.py +19 -16
- src/populate.py +0 -3
- src/submission/submit.py +25 -60
.gitignore
CHANGED
|
@@ -12,4 +12,6 @@ eval-queue-bk/
|
|
| 12 |
eval-results-bk/
|
| 13 |
eval-queue-local/
|
| 14 |
eval-results-local/
|
|
|
|
|
|
|
| 15 |
logs/
|
|
|
|
| 12 |
eval-results-bk/
|
| 13 |
eval-queue-local/
|
| 14 |
eval-results-local/
|
| 15 |
+
medic-harness-requests/
|
| 16 |
+
medic-harness-results/
|
| 17 |
logs/
|
app.py
CHANGED
|
@@ -361,7 +361,7 @@ with demo:
|
|
| 361 |
gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
|
| 362 |
# gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
|
| 363 |
gr.Markdown(LLM_BENCHMARKS_TEXT_3, elem_classes="markdown-text")
|
| 364 |
-
|
| 365 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
|
| 366 |
with gr.Column():
|
| 367 |
with gr.Row():
|
|
@@ -407,16 +407,8 @@ with demo:
|
|
| 407 |
|
| 408 |
with gr.Row():
|
| 409 |
with gr.Column():
|
| 410 |
-
|
| 411 |
model_name_textbox = gr.Textbox(label="Model name")
|
| 412 |
-
|
| 413 |
-
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
| 414 |
-
|
| 415 |
-
model_arch = gr.Radio(
|
| 416 |
-
choices=[t.to_str(" : ") for t in ModelArch if t != ModelArch.Unknown],
|
| 417 |
-
label="Model Architecture",
|
| 418 |
-
)
|
| 419 |
-
|
| 420 |
model_type = gr.Dropdown(
|
| 421 |
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
| 422 |
label="Model type",
|
|
@@ -426,29 +418,32 @@ with demo:
|
|
| 426 |
)
|
| 427 |
|
| 428 |
with gr.Column():
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
)
|
| 436 |
-
|
| 437 |
-
choices=[
|
| 438 |
-
label="
|
| 439 |
multiselect=False,
|
| 440 |
-
value=
|
| 441 |
interactive=True,
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
|
|
|
|
|
|
|
|
|
| 452 |
|
| 453 |
submit_button = gr.Button("Submit Eval")
|
| 454 |
submission_result = gr.Markdown()
|
|
@@ -456,15 +451,13 @@ with demo:
|
|
| 456 |
add_new_eval,
|
| 457 |
[
|
| 458 |
model_name_textbox,
|
| 459 |
-
|
| 460 |
revision_name_textbox,
|
| 461 |
-
model_arch,
|
| 462 |
-
label_normalization_map,
|
| 463 |
-
gliner_threshold,
|
| 464 |
-
gliner_tokenizer_bool,
|
| 465 |
-
prompt_name,
|
| 466 |
-
# weight_type,
|
| 467 |
model_type,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
],
|
| 469 |
submission_result,
|
| 470 |
)
|
|
|
|
| 361 |
gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
|
| 362 |
# gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
|
| 363 |
gr.Markdown(LLM_BENCHMARKS_TEXT_3, elem_classes="markdown-text")
|
| 364 |
+
|
| 365 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
|
| 366 |
with gr.Column():
|
| 367 |
with gr.Row():
|
|
|
|
| 407 |
|
| 408 |
with gr.Row():
|
| 409 |
with gr.Column():
|
|
|
|
| 410 |
model_name_textbox = gr.Textbox(label="Model name")
|
| 411 |
+
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
model_type = gr.Dropdown(
|
| 413 |
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
| 414 |
label="Model type",
|
|
|
|
| 418 |
)
|
| 419 |
|
| 420 |
with gr.Column():
|
| 421 |
+
precision = gr.Dropdown(
|
| 422 |
+
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
| 423 |
+
label="Precision",
|
| 424 |
+
multiselect=False,
|
| 425 |
+
value="float16",
|
| 426 |
+
interactive=True,
|
| 427 |
)
|
| 428 |
+
weight_type = gr.Dropdown(
|
| 429 |
+
choices=[i.value.name for i in WeightType],
|
| 430 |
+
label="Weights type",
|
| 431 |
multiselect=False,
|
| 432 |
+
value=WeightType.Original.value.name,
|
| 433 |
interactive=True,
|
| 434 |
+
)
|
| 435 |
+
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)", interactive=False)
|
| 436 |
+
with gr.Row():
|
| 437 |
+
domain_specific_toggle = gr.Checkbox(
|
| 438 |
+
label="Domain specific",
|
| 439 |
+
value=False,
|
| 440 |
+
info="Is your model medically oriented?",
|
| 441 |
+
)
|
| 442 |
+
chat_template_toggle = gr.Checkbox(
|
| 443 |
+
label="Use chat template",
|
| 444 |
+
value=False,
|
| 445 |
+
info="Is your model a chat model?",
|
| 446 |
+
)
|
| 447 |
|
| 448 |
submit_button = gr.Button("Submit Eval")
|
| 449 |
submission_result = gr.Markdown()
|
|
|
|
| 451 |
add_new_eval,
|
| 452 |
[
|
| 453 |
model_name_textbox,
|
| 454 |
+
base_model_name_textbox,
|
| 455 |
revision_name_textbox,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
model_type,
|
| 457 |
+
domain_specific_toggle,
|
| 458 |
+
chat_template_toggle,
|
| 459 |
+
precision,
|
| 460 |
+
weight_type
|
| 461 |
],
|
| 462 |
submission_result,
|
| 463 |
)
|
medic-harness-results/meta-llama/Llama-3.1-8B-Instruct/results_2024-07-24T15:26:36Z.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"model_name": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
"revision": "main",
|
| 5 |
"submitted_time": "2024-07-24 14:33:56+00:00",
|
| 6 |
-
"model_type": "
|
| 7 |
"num_params": 8000000000,
|
| 8 |
"private": false,
|
| 9 |
"evaluated_time": "2024-07-24T15:26:36Z"
|
|
|
|
| 3 |
"model_name": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
"revision": "main",
|
| 5 |
"submitted_time": "2024-07-24 14:33:56+00:00",
|
| 6 |
+
"model_type": "instruction-tuned",
|
| 7 |
"num_params": 8000000000,
|
| 8 |
"private": false,
|
| 9 |
"evaluated_time": "2024-07-24T15:26:36Z"
|
src/display/utils.py
CHANGED
|
@@ -58,9 +58,9 @@ class EvalQueueColumn: # Queue column
|
|
| 58 |
model = ColumnContent("model", "markdown", True)
|
| 59 |
revision = ColumnContent("revision", "str", True)
|
| 60 |
private = ColumnContent("private", "bool", True)
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
status = ColumnContent("status", "str", True)
|
| 65 |
|
| 66 |
|
|
@@ -73,12 +73,13 @@ class ModelDetails:
|
|
| 73 |
|
| 74 |
|
| 75 |
class ModelType(Enum):
|
| 76 |
-
ZEROSHOT = ModelDetails(name="zero-shot", symbol="⚫")
|
| 77 |
-
FINETUNED = ModelDetails(name="fine-tuned", symbol="⚪")
|
| 78 |
PT = ModelDetails(name="pretrained", symbol="🟢")
|
| 79 |
-
FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
| 80 |
-
#
|
| 81 |
-
|
|
|
|
| 82 |
Unknown = ModelDetails(name="", symbol="?")
|
| 83 |
|
| 84 |
def to_str(self, separator=" "):
|
|
@@ -86,18 +87,20 @@ class ModelType(Enum):
|
|
| 86 |
|
| 87 |
@staticmethod
|
| 88 |
def from_str(type):
|
| 89 |
-
if "zero-shot" in type or "⚫" in type:
|
| 90 |
-
|
| 91 |
-
if "fine-tuned" in type or "⚪" in type:
|
| 92 |
-
|
| 93 |
# if "fine-tuned" in type or "🔶" in type:
|
| 94 |
# return ModelType.FT
|
| 95 |
-
|
| 96 |
-
|
| 97 |
# if "RL-tuned" in type or "🟦" in type:
|
| 98 |
# return ModelType.RL
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
return ModelType.Unknown
|
| 102 |
|
| 103 |
class ModelArch(Enum):
|
|
|
|
| 58 |
model = ColumnContent("model", "markdown", True)
|
| 59 |
revision = ColumnContent("revision", "str", True)
|
| 60 |
private = ColumnContent("private", "bool", True)
|
| 61 |
+
model_type = ColumnContent("model_type", "str", True)
|
| 62 |
+
precision = ColumnContent("precision", "str", True)
|
| 63 |
+
weight_type = ColumnContent("weight_type", "str", "Original")
|
| 64 |
status = ColumnContent("status", "str", True)
|
| 65 |
|
| 66 |
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
class ModelType(Enum):
|
| 76 |
+
# ZEROSHOT = ModelDetails(name="zero-shot", symbol="⚫")
|
| 77 |
+
# FINETUNED = ModelDetails(name="fine-tuned", symbol="⚪")
|
| 78 |
PT = ModelDetails(name="pretrained", symbol="🟢")
|
| 79 |
+
# FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
| 80 |
+
# DS = ModelDetails(name="domain-specific", symbol="➕")
|
| 81 |
+
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
| 82 |
+
RL = ModelDetails(name="preference-tuned", symbol="🟦")
|
| 83 |
Unknown = ModelDetails(name="", symbol="?")
|
| 84 |
|
| 85 |
def to_str(self, separator=" "):
|
|
|
|
| 87 |
|
| 88 |
@staticmethod
|
| 89 |
def from_str(type):
|
| 90 |
+
# if "zero-shot" in type or "⚫" in type:
|
| 91 |
+
# return ModelType.ZEROSHOT
|
| 92 |
+
# if "fine-tuned" in type or "⚪" in type:
|
| 93 |
+
# return ModelType.FINETUNED
|
| 94 |
# if "fine-tuned" in type or "🔶" in type:
|
| 95 |
# return ModelType.FT
|
| 96 |
+
if "pretrained" in type or "🟢" in type:
|
| 97 |
+
return ModelType.PT
|
| 98 |
# if "RL-tuned" in type or "🟦" in type:
|
| 99 |
# return ModelType.RL
|
| 100 |
+
if "instruction-tuned" in type or "⭕" in type:
|
| 101 |
+
return ModelType.IFT
|
| 102 |
+
# if "domain-specific" in type or "➕" in type:
|
| 103 |
+
# return ModelType.DS
|
| 104 |
return ModelType.Unknown
|
| 105 |
|
| 106 |
class ModelArch(Enum):
|
src/populate.py
CHANGED
|
@@ -29,16 +29,13 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
| 29 |
"""Creates the different dataframes for the evaluation queues requestes"""
|
| 30 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
| 31 |
all_evals = []
|
| 32 |
-
|
| 33 |
for entry in entries:
|
| 34 |
if ".json" in entry:
|
| 35 |
file_path = os.path.join(save_path, entry)
|
| 36 |
with open(file_path) as fp:
|
| 37 |
data = json.load(fp)
|
| 38 |
-
|
| 39 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model_name"])
|
| 40 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 41 |
-
|
| 42 |
all_evals.append(data)
|
| 43 |
elif ".md" not in entry:
|
| 44 |
# this is a folder
|
|
|
|
| 29 |
"""Creates the different dataframes for the evaluation queues requestes"""
|
| 30 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
| 31 |
all_evals = []
|
|
|
|
| 32 |
for entry in entries:
|
| 33 |
if ".json" in entry:
|
| 34 |
file_path = os.path.join(save_path, entry)
|
| 35 |
with open(file_path) as fp:
|
| 36 |
data = json.load(fp)
|
|
|
|
| 37 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model_name"])
|
| 38 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
|
|
|
| 39 |
all_evals.append(data)
|
| 40 |
elif ".md" not in entry:
|
| 41 |
# this is a folder
|
src/submission/submit.py
CHANGED
|
@@ -42,16 +42,13 @@ PLACEHOLDER_DATASET_WISE_NORMALIZATION_CONFIG = """{
|
|
| 42 |
|
| 43 |
def add_new_eval(
|
| 44 |
model: str,
|
| 45 |
-
|
| 46 |
revision: str,
|
| 47 |
-
# precision: str,
|
| 48 |
-
# weight_type: str,
|
| 49 |
-
model_arch: str,
|
| 50 |
-
label_normalization_map: str,
|
| 51 |
-
gliner_threshold:str,
|
| 52 |
-
gliner_tokenizer_bool:str,
|
| 53 |
-
prompt_template_name:str,
|
| 54 |
model_type: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
):
|
| 56 |
"""
|
| 57 |
Saves request if valid else returns the error.
|
|
@@ -85,22 +82,16 @@ def add_new_eval(
|
|
| 85 |
if revision == "":
|
| 86 |
revision = "main"
|
| 87 |
|
| 88 |
-
#
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
|
| 94 |
-
if not
|
| 95 |
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
| 96 |
if not model_on_hub:
|
| 97 |
return styled_error(f'Model "{model}" {error}')
|
| 98 |
-
else:
|
| 99 |
-
model_name_matches = list(API.list_models(model_name=model))
|
| 100 |
-
if len(model_name_matches) < 1:
|
| 101 |
-
return styled_error(f'Model "{model}" does not exist on the hub!')
|
| 102 |
-
elif model_name_matches[0].id != model:
|
| 103 |
-
return styled_error(f'Model "{model}" does not exist on the hub! There might be a typo in the name')
|
| 104 |
|
| 105 |
|
| 106 |
# Is the model info correctly filled?
|
|
@@ -122,39 +113,15 @@ def add_new_eval(
|
|
| 122 |
return styled_error(error_msg)
|
| 123 |
|
| 124 |
# Verify the inference config now
|
| 125 |
-
try:
|
| 126 |
-
|
| 127 |
-
except Exception as e:
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
inference_config = {
|
| 131 |
-
# "model_arch" : model_arch,
|
| 132 |
-
"label_normalization_map": label_normalization_map,
|
| 133 |
-
}
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
if not prompt_template_name in [prompt_template.value for prompt_template in PromptTemplateName]:
|
| 140 |
-
return styled_error("Prompt template name is invalid")
|
| 141 |
-
inference_config = {
|
| 142 |
-
**inference_config,
|
| 143 |
-
"prompt_template_identifier": prompt_template_name,
|
| 144 |
-
}
|
| 145 |
-
case "GLiNER Encoder":
|
| 146 |
-
try:
|
| 147 |
-
gliner_threshold = float(gliner_threshold)
|
| 148 |
-
gliner_tokenizer_bool = ast.literal_eval(gliner_tokenizer_bool)
|
| 149 |
-
inference_config = {
|
| 150 |
-
**inference_config,
|
| 151 |
-
"gliner_threshold": gliner_threshold,
|
| 152 |
-
"gliner_tokenizer_bool" : gliner_tokenizer_bool
|
| 153 |
-
}
|
| 154 |
-
except Exception as e:
|
| 155 |
-
return styled_error("Please enter a valid float for the threshold")
|
| 156 |
-
case _:
|
| 157 |
-
return styled_error("Model Architecture is invalid")
|
| 158 |
|
| 159 |
# Seems good, creating the eval
|
| 160 |
print("Adding new eval")
|
|
@@ -162,11 +129,10 @@ def add_new_eval(
|
|
| 162 |
|
| 163 |
eval_entry = {
|
| 164 |
"model_name": model,
|
| 165 |
-
|
| 166 |
"revision": revision,
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
"model_architecture": model_arch,
|
| 170 |
"status": "PENDING",
|
| 171 |
"submitted_time": current_time,
|
| 172 |
"model_type": model_type,
|
|
@@ -174,18 +140,17 @@ def add_new_eval(
|
|
| 174 |
"num_params": model_size,
|
| 175 |
"license": license,
|
| 176 |
"private": False,
|
| 177 |
-
"inference_config":inference_config,
|
| 178 |
}
|
| 179 |
|
| 180 |
# Check for duplicate submission
|
| 181 |
|
| 182 |
-
if f"{model}_{revision}" in REQUESTED_MODELS:
|
| 183 |
return styled_warning("This model has been already submitted. Add the revision if the model has been updated.")
|
| 184 |
|
| 185 |
print("Creating eval file")
|
| 186 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
| 187 |
os.makedirs(OUT_DIR, exist_ok=True)
|
| 188 |
-
out_path = f"{OUT_DIR}/{model_path}_{revision}_eval_request.json"
|
| 189 |
|
| 190 |
with open(out_path, "w") as f:
|
| 191 |
f.write(json.dumps(eval_entry))
|
|
@@ -193,7 +158,7 @@ def add_new_eval(
|
|
| 193 |
print("Uploading eval file")
|
| 194 |
API.upload_file(
|
| 195 |
path_or_fileobj=out_path,
|
| 196 |
-
path_in_repo=out_path.split("
|
| 197 |
repo_id=QUEUE_REPO,
|
| 198 |
repo_type="dataset",
|
| 199 |
commit_message=f"Add {model} to eval queue",
|
|
|
|
| 42 |
|
| 43 |
def add_new_eval(
|
| 44 |
model: str,
|
| 45 |
+
base_model: str,
|
| 46 |
revision: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
model_type: str,
|
| 48 |
+
domain_specific: bool,
|
| 49 |
+
chat_template: bool,
|
| 50 |
+
precision: str,
|
| 51 |
+
weight_type: str,
|
| 52 |
):
|
| 53 |
"""
|
| 54 |
Saves request if valid else returns the error.
|
|
|
|
| 82 |
if revision == "":
|
| 83 |
revision = "main"
|
| 84 |
|
| 85 |
+
# Is the model on the hub?
|
| 86 |
+
if weight_type in ["Delta", "Adapter"]:
|
| 87 |
+
base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
|
| 88 |
+
if not base_model_on_hub:
|
| 89 |
+
return styled_error(f'Base model "{base_model}" {error}')
|
| 90 |
|
| 91 |
+
if not weight_type == "Adapter":
|
| 92 |
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
| 93 |
if not model_on_hub:
|
| 94 |
return styled_error(f'Model "{model}" {error}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
# Is the model info correctly filled?
|
|
|
|
| 113 |
return styled_error(error_msg)
|
| 114 |
|
| 115 |
# Verify the inference config now
|
| 116 |
+
# try:
|
| 117 |
+
# label_normalization_map = ast.literal_eval(label_normalization_map)
|
| 118 |
+
# except Exception as e:
|
| 119 |
+
# return styled_error("Please enter a valid json for the labe; normalization map")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
+
# inference_config = {
|
| 122 |
+
# # "model_arch" : model_arch,
|
| 123 |
+
# "label_normalization_map": label_normalization_map,
|
| 124 |
+
# }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
# Seems good, creating the eval
|
| 127 |
print("Adding new eval")
|
|
|
|
| 129 |
|
| 130 |
eval_entry = {
|
| 131 |
"model_name": model,
|
| 132 |
+
"base_model": base_model,
|
| 133 |
"revision": revision,
|
| 134 |
+
"precision": precision,
|
| 135 |
+
"weight_type": weight_type,
|
|
|
|
| 136 |
"status": "PENDING",
|
| 137 |
"submitted_time": current_time,
|
| 138 |
"model_type": model_type,
|
|
|
|
| 140 |
"num_params": model_size,
|
| 141 |
"license": license,
|
| 142 |
"private": False,
|
|
|
|
| 143 |
}
|
| 144 |
|
| 145 |
# Check for duplicate submission
|
| 146 |
|
| 147 |
+
if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
|
| 148 |
return styled_warning("This model has been already submitted. Add the revision if the model has been updated.")
|
| 149 |
|
| 150 |
print("Creating eval file")
|
| 151 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
| 152 |
os.makedirs(OUT_DIR, exist_ok=True)
|
| 153 |
+
out_path = f"{OUT_DIR}/{model_path}_{revision}_{precision}_{weight_type}_eval_request.json"
|
| 154 |
|
| 155 |
with open(out_path, "w") as f:
|
| 156 |
f.write(json.dumps(eval_entry))
|
|
|
|
| 158 |
print("Uploading eval file")
|
| 159 |
API.upload_file(
|
| 160 |
path_or_fileobj=out_path,
|
| 161 |
+
path_in_repo=out_path.split(f"{EVAL_REQUESTS_PATH}/")[1],
|
| 162 |
repo_id=QUEUE_REPO,
|
| 163 |
repo_type="dataset",
|
| 164 |
commit_message=f"Add {model} to eval queue",
|