Spaces:
Running
Running
Merge open llm leaderboard last changes
Browse files- README.md +1 -1
- requirements.txt +1 -1
- src/display/css_html_js.py +2 -2
- src/display/utils.py +0 -12
- src/leaderboard/read_evals.py +5 -5
README.md
CHANGED
|
@@ -43,4 +43,4 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
|
|
| 43 |
You'll find
|
| 44 |
- the main table' columns names and properties in `src/display/utils.py`
|
| 45 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
| 46 |
-
-
|
|
|
|
| 43 |
You'll find
|
| 44 |
- the main table' columns names and properties in `src/display/utils.py`
|
| 45 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
| 46 |
+
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
requirements.txt
CHANGED
|
@@ -3,7 +3,7 @@ black
|
|
| 3 |
datasets
|
| 4 |
gradio
|
| 5 |
gradio[oauth]
|
| 6 |
-
gradio_leaderboard==0.0.
|
| 7 |
gradio_client
|
| 8 |
huggingface-hub>=0.18.0
|
| 9 |
matplotlib
|
|
|
|
| 3 |
datasets
|
| 4 |
gradio
|
| 5 |
gradio[oauth]
|
| 6 |
+
gradio_leaderboard==0.0.13
|
| 7 |
gradio_client
|
| 8 |
huggingface-hub>=0.18.0
|
| 9 |
matplotlib
|
src/display/css_html_js.py
CHANGED
|
@@ -39,8 +39,8 @@ custom_css = """
|
|
| 39 |
}
|
| 40 |
|
| 41 |
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
| 42 |
-
table td:
|
| 43 |
-
table th:
|
| 44 |
max-width: 400px;
|
| 45 |
overflow: auto;
|
| 46 |
white-space: nowrap;
|
|
|
|
| 39 |
}
|
| 40 |
|
| 41 |
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
| 42 |
+
#leaderboard-table td:nth-child(2),
|
| 43 |
+
#leaderboard-table th:nth-child(2) {
|
| 44 |
max-width: 400px;
|
| 45 |
overflow: auto;
|
| 46 |
white-space: nowrap;
|
src/display/utils.py
CHANGED
|
@@ -91,10 +91,6 @@ class WeightType(Enum):
|
|
| 91 |
class Precision(Enum):
|
| 92 |
float16 = ModelDetails("float16")
|
| 93 |
bfloat16 = ModelDetails("bfloat16")
|
| 94 |
-
float32 = ModelDetails("float32")
|
| 95 |
-
#qt_8bit = ModelDetails("8bit")
|
| 96 |
-
#qt_4bit = ModelDetails("4bit")
|
| 97 |
-
#qt_GPTQ = ModelDetails("GPTQ")
|
| 98 |
Unknown = ModelDetails("?")
|
| 99 |
|
| 100 |
def from_str(precision):
|
|
@@ -102,14 +98,6 @@ class Precision(Enum):
|
|
| 102 |
return Precision.float16
|
| 103 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
| 104 |
return Precision.bfloat16
|
| 105 |
-
if precision in ["float32"]:
|
| 106 |
-
return Precision.float32
|
| 107 |
-
#if precision in ["8bit"]:
|
| 108 |
-
# return Precision.qt_8bit
|
| 109 |
-
#if precision in ["4bit"]:
|
| 110 |
-
# return Precision.qt_4bit
|
| 111 |
-
#if precision in ["GPTQ", "None"]:
|
| 112 |
-
# return Precision.qt_GPTQ
|
| 113 |
return Precision.Unknown
|
| 114 |
|
| 115 |
# Column selection
|
|
|
|
| 91 |
class Precision(Enum):
|
| 92 |
float16 = ModelDetails("float16")
|
| 93 |
bfloat16 = ModelDetails("bfloat16")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
Unknown = ModelDetails("?")
|
| 95 |
|
| 96 |
def from_str(precision):
|
|
|
|
| 98 |
return Precision.float16
|
| 99 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
| 100 |
return Precision.bfloat16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
return Precision.Unknown
|
| 102 |
|
| 103 |
# Column selection
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -190,10 +190,10 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 190 |
|
| 191 |
results = []
|
| 192 |
for v in eval_results.values():
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
|
| 199 |
return results
|
|
|
|
| 190 |
|
| 191 |
results = []
|
| 192 |
for v in eval_results.values():
|
| 193 |
+
try:
|
| 194 |
+
v.to_dict() # we test if the dict version is complete
|
| 195 |
+
results.append(v)
|
| 196 |
+
except KeyError: # not all eval values present
|
| 197 |
+
continue
|
| 198 |
|
| 199 |
return results
|