Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
6269bd0
1
Parent(s):
1a3f05a
Allow old model metrics
Browse files
src/display/changelog.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
| 1 |
CHANGELOG_TEXT = f"""
|
| 2 |
# Changes made to the leaderboard
|
| 3 |
|
| 4 |
-
### [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
Protype version launched with 7 benchmarks ENEM, BLUEX, OAB Exams, ASSIN 2 RTE and STS, FAQUAD NLI and SPARROW POR
|
| 6 |
"""
|
|
|
|
| 1 |
CHANGELOG_TEXT = f"""
|
| 2 |
# Changes made to the leaderboard
|
| 3 |
|
| 4 |
+
### [1.1.0] - 2024-02-16
|
| 5 |
+
Removed the Sparrow POR benchmark from the leaderboard because of low quality annotations
|
| 6 |
+
Added HateBR Offensive, PT Hate Speech and tweetSentBR benchmarks to the leaderboard, started new evaluation queue for these benchmarks
|
| 7 |
+
|
| 8 |
+
### [1.0.0] - 2024-02-01
|
| 9 |
Protype version launched with 7 benchmarks ENEM, BLUEX, OAB Exams, ASSIN 2 RTE and STS, FAQUAD NLI and SPARROW POR
|
| 10 |
"""
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -216,7 +216,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 216 |
with open(tmp_request_file, "r") as f:
|
| 217 |
req_content = json.load(f)
|
| 218 |
if (
|
| 219 |
-
req_content["status"] in ["FINISHED"]
|
| 220 |
and req_content["precision"] == precision.split(".")[-1]
|
| 221 |
):
|
| 222 |
request_file = tmp_request_file
|
|
@@ -262,7 +262,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
| 262 |
results = []
|
| 263 |
for v in eval_results.values():
|
| 264 |
try:
|
| 265 |
-
if v.status
|
| 266 |
v.to_dict() # we test if the dict version is complete
|
| 267 |
results.append(v)
|
| 268 |
except KeyError as e: # not all eval values present
|
|
|
|
| 216 |
with open(tmp_request_file, "r") as f:
|
| 217 |
req_content = json.load(f)
|
| 218 |
if (
|
| 219 |
+
req_content["status"] in ["FINISHED", "PENDING_NEW_EVAL"]
|
| 220 |
and req_content["precision"] == precision.split(".")[-1]
|
| 221 |
):
|
| 222 |
request_file = tmp_request_file
|
|
|
|
| 262 |
results = []
|
| 263 |
for v in eval_results.values():
|
| 264 |
try:
|
| 265 |
+
if v.status in ["FINISHED", "PENDING_NEW_EVAL"] and not v.hidden:
|
| 266 |
v.to_dict() # we test if the dict version is complete
|
| 267 |
results.append(v)
|
| 268 |
except KeyError as e: # not all eval values present
|