HugSib commited on
Commit
3c64e23
Β·
verified Β·
1 Parent(s): 1045c52

feat : added search + checkboxes + documentation

Browse files
Files changed (3) hide show
  1. app.py +114 -46
  2. model_handler.py +105 -0
  3. utils.py +37 -0
app.py CHANGED
@@ -1,8 +1,9 @@
1
- from data.model_handler import ModelHandler
2
- from app.utils import add_rank_and_format, get_refresh_function
3
  import gradio as gr
4
 
5
- METRICS = ["ndcg_at_5", "recall_at_1", "recall_at_5", "mrr_at_5"]
 
 
 
6
 
7
  def main():
8
  model_handler = ModelHandler()
@@ -31,52 +32,119 @@ def main():
31
  .filter-checkbox-group {
32
  max-width: max-content;
33
  }
 
 
 
 
 
34
  """
35
 
36
  with gr.Blocks(css=css) as block:
37
- gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark πŸ“šπŸ”")
38
- gr.Markdown("## From the paper - ColPali: Efficient Document Retrieval with Vision Language Models πŸ‘€")
39
-
40
- gr.Markdown(
41
- """
42
- Visual Document Retrieval Benchmark leaderboard. To submit, refer to the <a href="https://github.com/tonywu71/vidore-benchmark/" target="_blank" style="text-decoration: underline">ViDoRe GitHub repository</a>. Refer to the [ColPali paper](https://arxiv.org/abs/XXXX.XXXXX) for details on metrics, tasks and models.
43
- """
44
- )
45
- #all_columns = list(data.columns)
46
- #default_columns = all_columns
47
-
48
- with gr.Row():
49
- metric_dropdown = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
50
- #column_checkboxes = gr.CheckboxGroup(choices=all_columns, value=default_columns, label="Select Columns to Display")
51
-
52
- with gr.Row():
53
- datatype = ["number", "markdown"] + ["number"] * (NUM_DATASETS + 1)
54
- dataframe = gr.Dataframe(data, datatype=datatype, type="pandas")
55
-
56
- with gr.Row():
57
- refresh_button = gr.Button("Refresh")
58
- refresh_button.click(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe, concurrency_limit=20)
59
-
60
-
61
- # Automatically refresh the dataframe when the dropdown value changes
62
- metric_dropdown.change(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe)
63
- #column_checkboxes.change(get_refresh_function(), inputs=[metric_dropdown, column_checkboxes], outputs=dataframe)
64
-
65
-
66
- gr.Markdown(
67
- f"""
68
- - **Total Datasets**: {NUM_DATASETS}
69
- - **Total Scores**: {NUM_SCORES}
70
- - **Total Models**: {NUM_MODELS}
71
- """
72
- + r"""
73
- Please consider citing:
74
-
75
- ```bibtex
76
- INSERT LATER
77
- ```
78
- """
79
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  block.queue(max_size=10).launch(debug=True)
82
 
 
 
 
1
  import gradio as gr
2
 
3
+ from app.utils import add_rank_and_format, filter_models, get_refresh_function
4
+ from data.model_handler import ModelHandler
5
+
6
+ METRICS = ["ndcg_at_5", "recall_at_1"]
7
 
8
  def main():
9
  model_handler = ModelHandler()
 
32
  .filter-checkbox-group {
33
  max-width: max-content;
34
  }
35
+
36
+ #markdown size
37
+ .markdown {
38
+ font-size: 1rem;
39
+ }
40
  """
41
 
42
  with gr.Blocks(css=css) as block:
43
+ with gr.Tabs():
44
+ with gr.TabItem("πŸ† Leaderboard"):
45
+ gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark πŸ“šπŸ”")
46
+ gr.Markdown("## From the paper - ColPali: Efficient Document Retrieval with Vision Language Models πŸ‘€")
47
+
48
+ gr.Markdown(
49
+ """
50
+ Visual Document Retrieval Benchmark leaderboard. To submit, refer to the corresponding tab.
51
+
52
+ Refer to the [ColPali paper](https://arxiv.org/abs/XXXX.XXXXX) for details on metrics, tasks and models.
53
+ """
54
+ )
55
+ datasets_columns = list(data.columns[3:])
56
+ anchor_columns = list(data.columns[:3])
57
+ default_columns = anchor_columns + datasets_columns
58
+
59
+ with gr.Row():
60
+ metric_dropdown = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
61
+ research_textbox = gr.Textbox(placeholder="πŸ” Search Models... [press enter]", label="Filter Models by Name", )
62
+ column_checkboxes = gr.CheckboxGroup(choices=datasets_columns, value=default_columns, label="Select Columns to Display")
63
+
64
+ with gr.Row():
65
+ datatype = ["number", "markdown"] + ["number"] * (NUM_DATASETS + 1)
66
+ dataframe = gr.Dataframe(data, datatype=datatype, type="pandas")
67
+
68
+ def update_data(metric, search_term, selected_columns):
69
+ data = model_handler.get_vidore_data(metric)
70
+ data = add_rank_and_format(data)
71
+ data = filter_models(data, search_term)
72
+ if selected_columns:
73
+ selected_columns = selected_columns
74
+ data = data[selected_columns]
75
+ return data
76
+
77
+ with gr.Row():
78
+ refresh_button = gr.Button("Refresh")
79
+ refresh_button.click(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe, concurrency_limit=20)
80
+
81
+
82
+ # Automatically refresh the dataframe when the dropdown value changes
83
+ metric_dropdown.change(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe)
84
+ research_textbox.submit(
85
+ lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns),
86
+ inputs=[metric_dropdown, research_textbox, column_checkboxes],
87
+ outputs=dataframe
88
+ )
89
+ column_checkboxes.change(
90
+ lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns),
91
+ inputs=[metric_dropdown, research_textbox, column_checkboxes],
92
+ outputs=dataframe
93
+ )
94
+
95
+ #column_checkboxes.change(get_refresh_function(), inputs=[metric_dropdown, column_checkboxes], outputs=dataframe)
96
+
97
+
98
+ gr.Markdown(
99
+ f"""
100
+ - **Total Datasets**: {NUM_DATASETS}
101
+ - **Total Scores**: {NUM_SCORES}
102
+ - **Total Models**: {NUM_MODELS}
103
+ """
104
+ + r"""
105
+ Please consider citing:
106
+
107
+ ```bibtex
108
+ INSERT LATER
109
+ ```
110
+ """
111
+ )
112
+ with gr.TabItem("πŸ“š Submit your model"):
113
+ gr.Markdown("# How to Submit a New Model to the Leaderboard")
114
+ gr.Markdown(
115
+ """
116
+ To submit a new model to the ViDoRe leaderboard, follow these steps:
117
+
118
+ 1. **Evaluate your model**:
119
+ - You can either follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/tonywu71/vidore-benchmark/)
120
+ - Use your own evaluation script.
121
+
122
+ 2. **Format your submission file**:
123
+ - The submission file should be named `results.json`, and therefore in JSON format.
124
+ - It should have the following structure:
125
+ ```json
126
+ {
127
+ "dataset_name_1": {
128
+ "metric_1": score_1,
129
+ "metric_2": score_2,
130
+ ...
131
+ },
132
+ "dataset_name_2": {
133
+ "metric_1": score_1,
134
+ "metric_2": score_2,
135
+ ...
136
+ },
137
+ }
138
+ ```
139
+ - The dataset names should be the same as viDoRe dataset names listed in the following collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d).
140
+
141
+ 3. **Submit your model**:
142
+ - Create a huggingface model repository with your model and the submission file.
143
+ - Add the tag 'vidore' to your model.
144
+
145
+ And you're done ! Your model will appear on the leaderboard once it is approved by the ViDoRe team.
146
+ """
147
+ )
148
 
149
  block.queue(max_size=10).launch(debug=True)
150
 
model_handler.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from typing import Dict
4
+ from huggingface_hub import HfApi, hf_hub_download, metadata_load
5
+ import pandas as pd
6
+ from .dataset_handler import get_datasets_nickname, VIDORE_DATASETS_KEYWORDS
7
+
8
+ class ModelHandler:
9
+ def __init__(self, model_infos_path="model_infos.json"):
10
+ self.api = HfApi()
11
+ self.model_infos_path = model_infos_path
12
+ self.model_infos = self._load_model_infos()
13
+
14
+ def _load_model_infos(self) -> Dict:
15
+ if os.path.exists(self.model_infos_path):
16
+ with open(self.model_infos_path) as f:
17
+ return json.load(f)
18
+ return {}
19
+
20
+ def _save_model_infos(self):
21
+ with open(self.model_infos_path, "w") as f:
22
+ json.dump(self.model_infos, f)
23
+
24
+ def get_vidore_data(self, metric="ndcg_at_5"):
25
+ models = self.api.list_models(filter="vidore")
26
+ repositories = [model.modelId for model in models] # type: ignore
27
+
28
+ for repo_id in repositories:
29
+ files = [f for f in self.api.list_repo_files(repo_id) if f.endswith('_metrics.json') or f == 'results.json']
30
+
31
+
32
+ if len(files) == 0:
33
+ continue
34
+ else:
35
+ for file in files:
36
+ if file.endswith('results.json'):
37
+ model_name = repo_id.replace('/', '_')
38
+ else:
39
+ model_name = file.split('_metrics.json')[0]
40
+
41
+ if model_name not in self.model_infos:
42
+ readme_path = hf_hub_download(repo_id, filename="README.md")
43
+ meta = metadata_load(readme_path)
44
+ try:
45
+ result_path = hf_hub_download(repo_id, filename=file)
46
+
47
+ with open(result_path) as f:
48
+ results = json.load(f)
49
+
50
+ for dataset in results:
51
+ results[dataset] = {key: value for key, value in results[dataset].items()}
52
+
53
+ self.model_infos[model_name] = {"meta": meta, "results": results}
54
+ except Exception as e:
55
+ print(f"Error loading {model_name} - {e}")
56
+ continue
57
+
58
+ #self._save_model_infos()
59
+
60
+ model_res = {}
61
+ if len(self.model_infos) > 0:
62
+ for model in self.model_infos.keys():
63
+ res = self.model_infos[model]["results"]
64
+ dataset_res = {}
65
+ for dataset in res.keys():
66
+ #for each keyword check if it is in the dataset name if not continue
67
+ if not any(keyword in dataset for keyword in VIDORE_DATASETS_KEYWORDS):
68
+ print(f"{dataset} not found in ViDoRe datasets. Skipping ...")
69
+ continue
70
+
71
+ dataset_nickname = get_datasets_nickname(dataset)
72
+ dataset_res[dataset_nickname] = res[dataset][metric]
73
+ model_res[model] = dataset_res
74
+
75
+ df = pd.DataFrame(model_res).T
76
+
77
+ return df
78
+ return pd.DataFrame()
79
+
80
+ @staticmethod
81
+ def add_rank(df):
82
+ df.fillna(0.0, inplace=True)
83
+ cols_to_rank = [
84
+ col
85
+ for col in df.columns
86
+ if col
87
+ not in [
88
+ "Model",
89
+ "Model Size (Million Parameters)",
90
+ "Memory Usage (GB, fp32)",
91
+ "Embedding Dimensions",
92
+ "Max Tokens",
93
+ ]
94
+ ]
95
+ if len(cols_to_rank) == 1:
96
+ df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
97
+ else:
98
+ df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
99
+ df.sort_values("Average", ascending=False, inplace=True)
100
+ df.insert(0, "Rank", list(range(1, len(df) + 1)))
101
+ #multiply values by 100 if they are floats and round to 1 decimal place
102
+ for col in df.columns:
103
+ if df[col].dtype == "float64":
104
+ df[col] = df[col].apply(lambda x: round(x * 100, 1))
105
+ return df
utils.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from data.model_handler import ModelHandler
2
+
3
+ def make_clickable_model(model_name, link=None):
4
+ if link is None:
5
+ desanitized_model_name = model_name.replace("_", "/")
6
+
7
+ if '/captioning' in desanitized_model_name:
8
+ desanitized_model_name = desanitized_model_name.replace('/captioning', '')
9
+ if '/ocr' in desanitized_model_name:
10
+ desanitized_model_name = desanitized_model_name.replace('/ocr', '')
11
+
12
+ link = "https://huggingface.co/" + desanitized_model_name
13
+
14
+ return f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name}</a>'
15
+
16
+
17
+ def add_rank_and_format(df):
18
+ df = df.reset_index()
19
+ df = df.rename(columns={"index": "Model"})
20
+ df = ModelHandler.add_rank(df)
21
+ df["Model"] = df["Model"].apply(make_clickable_model)
22
+ return df
23
+
24
+ def get_refresh_function():
25
+ def _refresh(metric):
26
+ model_handler = ModelHandler()
27
+ data_task_category = model_handler.get_vidore_data(metric)
28
+ df = add_rank_and_format(data_task_category)
29
+ return df
30
+
31
+ return _refresh
32
+
33
+
34
+ def filter_models(data, search_term):
35
+ if search_term:
36
+ data = data[data['Model'].str.contains(search_term, case=False, na=False)]
37
+ return data