Spaces:
Runtime error
Runtime error
chore: clean up
Browse files- .gitignore +1 -0
- app.py +8 -0
- utils.py +7 -11
.gitignore
CHANGED
|
@@ -15,3 +15,4 @@ logs/
|
|
| 15 |
.idea/
|
| 16 |
.venv/
|
| 17 |
toys/
|
|
|
|
|
|
| 15 |
.idea/
|
| 16 |
.venv/
|
| 17 |
toys/
|
| 18 |
+
.DS_Store
|
app.py
CHANGED
|
@@ -290,6 +290,14 @@ with demo:
|
|
| 290 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
| 291 |
with gr.Row():
|
| 292 |
gr.Markdown("## ✉️Submit your model here!", elem_classes="markdown-text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
with gr.Row():
|
| 294 |
file_output = gr.File()
|
| 295 |
with gr.Row():
|
|
|
|
| 290 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
| 291 |
with gr.Row():
|
| 292 |
gr.Markdown("## ✉️Submit your model here!", elem_classes="markdown-text")
|
| 293 |
+
with gr.Row():
|
| 294 |
+
with gr.Column():
|
| 295 |
+
benchmark_version = gr.Dropdown(
|
| 296 |
+
['AIR-Bench_24.04',], value=['AIR-Bench_24.04',], interactive=True, label="AIR-Bench Version")
|
| 297 |
+
with gr.Column():
|
| 298 |
+
model_name_textbox = gr.Textbox(label="Model name")
|
| 299 |
+
with gr.Column():
|
| 300 |
+
model_url = gr.Textbox(label="Model URL")
|
| 301 |
with gr.Row():
|
| 302 |
file_output = gr.File()
|
| 303 |
with gr.Row():
|
utils.py
CHANGED
|
@@ -1,14 +1,10 @@
|
|
| 1 |
-
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
from src.display.formatting import styled_error, styled_message, styled_warning
|
| 5 |
|
| 6 |
-
|
| 7 |
|
| 8 |
-
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, QA_BENCHMARK_COLS, LONG_DOC_BENCHMARK_COLS
|
| 9 |
from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
|
|
|
|
| 10 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
| 11 |
-
from typing import List
|
| 12 |
|
| 13 |
|
| 14 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
@@ -41,7 +37,7 @@ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
|
| 41 |
return df[(df[AutoEvalColumnQA.retrieval_model.name].str.contains(query, case=False))]
|
| 42 |
|
| 43 |
|
| 44 |
-
def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, task: str="qa") -> pd.DataFrame:
|
| 45 |
if task == "qa":
|
| 46 |
always_here_cols = [
|
| 47 |
AutoEvalColumnQA.retrieval_model.name,
|
|
@@ -111,7 +107,7 @@ def update_metric(
|
|
| 111 |
query: str,
|
| 112 |
) -> pd.DataFrame:
|
| 113 |
if task == 'qa':
|
| 114 |
-
leaderboard_df = get_leaderboard_df(raw_data,
|
| 115 |
return update_table(
|
| 116 |
leaderboard_df,
|
| 117 |
domains,
|
|
@@ -120,7 +116,7 @@ def update_metric(
|
|
| 120 |
query
|
| 121 |
)
|
| 122 |
elif task == 'long_doc':
|
| 123 |
-
leaderboard_df = get_leaderboard_df(raw_data,
|
| 124 |
return update_table_long_doc(
|
| 125 |
leaderboard_df,
|
| 126 |
domains,
|
|
@@ -138,4 +134,4 @@ def upload_file(files):
|
|
| 138 |
# print(file_paths)
|
| 139 |
# HfApi(token="").upload_file(...)
|
| 140 |
# os.remove(fp)
|
| 141 |
-
return file_paths
|
|
|
|
| 1 |
+
from typing import List
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
import pandas as pd
|
| 4 |
|
|
|
|
| 5 |
from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
|
| 6 |
+
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC
|
| 7 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
|
|
| 37 |
return df[(df[AutoEvalColumnQA.retrieval_model.name].str.contains(query, case=False))]
|
| 38 |
|
| 39 |
|
| 40 |
+
def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, task: str = "qa") -> pd.DataFrame:
|
| 41 |
if task == "qa":
|
| 42 |
always_here_cols = [
|
| 43 |
AutoEvalColumnQA.retrieval_model.name,
|
|
|
|
| 107 |
query: str,
|
| 108 |
) -> pd.DataFrame:
|
| 109 |
if task == 'qa':
|
| 110 |
+
leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
|
| 111 |
return update_table(
|
| 112 |
leaderboard_df,
|
| 113 |
domains,
|
|
|
|
| 116 |
query
|
| 117 |
)
|
| 118 |
elif task == 'long_doc':
|
| 119 |
+
leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
|
| 120 |
return update_table_long_doc(
|
| 121 |
leaderboard_df,
|
| 122 |
domains,
|
|
|
|
| 134 |
# print(file_paths)
|
| 135 |
# HfApi(token="").upload_file(...)
|
| 136 |
# os.remove(fp)
|
| 137 |
+
return file_paths
|