paulml commited on
Commit
4064f94
·
verified ·
1 Parent(s): e07f1c2

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +366 -0
  2. racine.svg +3 -0
  3. scores.json +163 -0
app.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ # Function to load and modify SVG for white color
7
+ def load_svg_as_white():
8
+ try:
9
+ with open('racine.svg', 'r', encoding='utf-8') as f:
10
+ svg_content = f.read()
11
+ # Add CSS to make it white while preserving the SVG structure
12
+ white_svg = svg_content.replace('<svg', '''<svg style="filter: brightness(0) invert(1); left: 33%; position: absolute; margin-top: 0px; width: 500px;"''')
13
+ return white_svg
14
+ except FileNotFoundError:
15
+ print("Warning: racine.svg file not found")
16
+ return "<!-- SVG file not found -->"
17
+ except Exception as e:
18
+ print(f"Error loading SVG: {e}")
19
+ return "<!-- Error loading SVG -->"
20
+
21
+ # Load the scores from JSON file
22
+ def load_scores():
23
+ with open('scores.json', 'r') as f:
24
+ return json.load(f)
25
+
26
+ # Function to create dataframe for a specific language and sector filter
27
+ def create_language_df(scores, language, sector_filter='all'):
28
+ models = list(scores.keys())
29
+ sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
30
+ sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
31
+
32
+ if sector_filter == 'en_only':
33
+ selected_sectors = sectors_en
34
+ elif sector_filter == 'fr_only':
35
+ selected_sectors = sectors_fr
36
+ else: # 'all'
37
+ selected_sectors = sectors_en + sectors_fr
38
+
39
+ data = []
40
+ for model in models:
41
+ row = {'Model': model}
42
+
43
+ # Add origin information (for styling)
44
+ if 'origin' in scores[model]:
45
+ row['origin'] = scores[model]['origin']
46
+ else:
47
+ row['origin'] = 'CN' # Default to Chinese if not specified
48
+
49
+ # Special handling for AMPERE-1 model
50
+ if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1
51
+ row['coming_soon'] = True
52
+ # Fill all sector values with empty strings
53
+ for sector in selected_sectors:
54
+ row[sector] = ""
55
+ row['Average'] = ""
56
+ # Add sort value for correct ordering
57
+ row['sort_value'] = float('inf') # Place at the top when sorting
58
+ else:
59
+ row['coming_soon'] = False
60
+ # Add sector scores
61
+ sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors}
62
+ row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()})
63
+
64
+ # Calculate and add average score
65
+ avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores)
66
+ row['Average'] = f"{avg_score:.3f}"
67
+ # Add sort value for correct ordering
68
+ row['sort_value'] = avg_score
69
+
70
+ data.append(row)
71
+
72
+ df = pd.DataFrame(data)
73
+
74
+ # Sort by the sort_value
75
+ df = df.sort_values('sort_value', ascending=False)
76
+
77
+ # Remove the sort column
78
+ df = df.drop('sort_value', axis=1)
79
+
80
+ # Move Average column to be the second column (right after Model)
81
+ cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']]
82
+ # Add hidden columns at the end
83
+ if 'origin' in df.columns:
84
+ cols.append('origin')
85
+ if 'coming_soon' in df.columns:
86
+ cols.append('coming_soon')
87
+
88
+ df = df[cols]
89
+
90
+ return df
91
+
92
+ def create_average_language_df(scores):
93
+ models = list(scores.keys())
94
+ languages = ['en', 'fr', 'es', 'de', 'it']
95
+ sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
96
+ sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
97
+ all_sectors = sectors_en + sectors_fr
98
+
99
+ data = []
100
+ for model in models:
101
+ row = {'Model': model}
102
+
103
+ # Add origin information (for styling)
104
+ if 'origin' in scores[model]:
105
+ row['origin'] = scores[model]['origin']
106
+ else:
107
+ row['origin'] = 'CN' # Default to Chinese if not specified
108
+
109
+ # Special handling for AMPERE-1 model
110
+ if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1
111
+ row['coming_soon'] = True
112
+ # Fill all sector values with empty strings
113
+ for sector in all_sectors:
114
+ row[sector] = ""
115
+ row['Average'] = ""
116
+ # Add sort value for correct ordering
117
+ row['sort_value'] = float('inf') # Place at the top when sorting
118
+ else:
119
+ row['coming_soon'] = False
120
+ # Calculate average for each sector across all languages
121
+ for sector in all_sectors:
122
+ sector_scores = [scores[model][sector][lang] for lang in languages]
123
+ sector_avg = np.mean(sector_scores)
124
+ row[sector] = f"{sector_avg:.3f}"
125
+
126
+ # Calculate overall average across all sectors
127
+ sector_values = [float(row[sector]) for sector in all_sectors]
128
+ avg_value = np.mean(sector_values) if sector_values else 0
129
+ row['Average'] = f"{avg_value:.3f}"
130
+ # Add sort value for correct ordering
131
+ row['sort_value'] = avg_value
132
+
133
+ data.append(row)
134
+
135
+ df = pd.DataFrame(data)
136
+
137
+ # Sort by the sort_value
138
+ df = df.sort_values('sort_value', ascending=False)
139
+
140
+ # Remove the sort column
141
+ df = df.drop('sort_value', axis=1)
142
+
143
+ # Move Average column to be the second column
144
+ cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']]
145
+ # Add hidden columns at the end
146
+ if 'origin' in df.columns:
147
+ cols.append('origin')
148
+ if 'coming_soon' in df.columns:
149
+ cols.append('coming_soon')
150
+
151
+ df = df[cols]
152
+
153
+ return df
154
+
155
+ def create_leaderboard():
156
+ scores = load_scores()
157
+ languages = {
158
+ 'en': 'English',
159
+ 'fr': 'French',
160
+ 'es': 'Spanish',
161
+ 'de': 'German',
162
+ 'it': 'Italian'
163
+ }
164
+
165
+ # Load the SVG content
166
+ white_svg_logo = load_svg_as_white()
167
+
168
+ with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard",
169
+ theme='argilla/argilla-theme') as demo:
170
+
171
+ # Header section with white title and logo
172
+ # Added an anchor tag around the SVG logo with href to racine.ai
173
+ gr.HTML(f"""
174
+ <div style="padding: 2em; margin-bottom: 4em; height: 350px; background-color: transparent;">
175
+ <div style="display: flex; flex-direction: column; align-items: center; justify-content: center;">
176
+ <div style="width: 50px; margin-bottom: 20px; cursor: pointer;">
177
+ <a href="https://racine.ai" target="_blank" style="display: block;">
178
+ {white_svg_logo}
179
+ </a>
180
+ </div>
181
+ <h1 style="font-size: 3em; font-weight: bold; margin: 0.5em 0; color: white; margin-top: 200px;">
182
+ Open VLM Retrieval Leaderboard
183
+ </h1>
184
+ </div>
185
+ </div>
186
+ """)
187
+
188
+ gr.Markdown("""
189
+ This leaderboard presents the performance of various visual embedding models across different business sectors
190
+ and languages. The evaluation is based on retrieval accuracy for visual search tasks.
191
+
192
+ ## Structure
193
+ - **Sectors**: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR)
194
+ - **Models**: Each row shows a different model's performance
195
+ - **Scores**: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval)
196
+ - **Average**: Overall mean performance across all sectors for each model
197
+ - **Colors**: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models
198
+ """)
199
+
200
+ # Info box with custom styling
201
+ gr.Markdown("""
202
+ ### How to Read the Results
203
+ - Select a language tab to see how models perform with queries in that language
204
+ - Click on column headers to sort by performance in specific sectors or by average performance
205
+ - All scores are normalized retrieval accuracy metrics
206
+ - Background colors indicate model origins (Blue = EU, Red = Chinese)
207
+ """)
208
+
209
+ # Custom CSS for styling tables
210
+ gr.HTML("""
211
+ <style>
212
+ table.gradio-dataframe tr[data-origin="EU"] {
213
+ background-color: rgba(0, 0, 255, 0.2) !important;
214
+ }
215
+ table.gradio-dataframe tr[data-origin="CN"] {
216
+ background-color: rgba(255, 0, 0, 0.2) !important;
217
+ }
218
+ </style>
219
+ """)
220
+
221
+ # Tabs section
222
+ with gr.Tabs() as tabs:
223
+ # Add Average Languages tab first
224
+ with gr.Tab("Average Across Languages"):
225
+ gr.Markdown("""
226
+ ### Average Performance Across Languages
227
+ This table shows the average performance of each model for each sector,
228
+ averaged across all query languages.
229
+ """)
230
+
231
+ # Get the dataframe for average across languages
232
+ avg_df = create_average_language_df(scores)
233
+
234
+ # Create HTML for the colored table
235
+ html_table = "<table class='gradio-dataframe'><thead><tr>"
236
+
237
+ # Add headers
238
+ for col in avg_df.columns:
239
+ if col not in ['origin', 'coming_soon']:
240
+ html_table += f"<th>{col}</th>"
241
+
242
+ html_table += "</tr></thead><tbody>"
243
+
244
+ # Add rows with appropriate background colors
245
+ for _, row in avg_df.iterrows():
246
+ origin = row['origin'] if 'origin' in row else 'CN'
247
+ coming_soon = row.get('coming_soon', False)
248
+ html_table += f"<tr data-origin='{origin}'>"
249
+
250
+ for col in avg_df.columns:
251
+ if col not in ['origin', 'coming_soon']:
252
+ if coming_soon and col != 'Model':
253
+ if col == 'Average':
254
+ # Add "Coming Soon" text in italics
255
+ html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
256
+ else:
257
+ html_table += "<td></td>"
258
+ else:
259
+ html_table += f"<td>{row[col]}</td>"
260
+
261
+ html_table += "</tr>"
262
+
263
+ html_table += "</tbody></table>"
264
+
265
+ gr.HTML(html_table)
266
+
267
+ # Add color legend
268
+ gr.HTML("""
269
+ <div style="margin-top: 20px; margin-bottom: 40px;">
270
+ <div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
271
+ <div style="display: flex; align-items: center; margin-bottom: 8px;">
272
+ <div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
273
+ <div>European Union</div>
274
+ </div>
275
+ <div style="display: flex; align-items: center;">
276
+ <div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
277
+ <div>China</div>
278
+ </div>
279
+ </div>
280
+ """)
281
+
282
+ # Individual language tabs
283
+ for lang_code, lang_name in languages.items():
284
+ with gr.Tab(f"{lang_name} Queries"):
285
+ gr.Markdown(f"""
286
+ ### Performance with {lang_name} Queries
287
+ The table below shows how each model performs when the search queries are in {lang_name}.
288
+ """)
289
+
290
+ # Get the dataframe for this language
291
+ lang_df = create_language_df(scores, lang_code, 'all')
292
+
293
+ # Create HTML for the colored table
294
+ html_table = "<table class='gradio-dataframe'><thead><tr>"
295
+
296
+ # Add headers
297
+ for col in lang_df.columns:
298
+ if col not in ['origin', 'coming_soon']:
299
+ html_table += f"<th>{col}</th>"
300
+
301
+ html_table += "</tr></thead><tbody>"
302
+
303
+ # Add rows with appropriate background colors
304
+ for _, row in lang_df.iterrows():
305
+ origin = row['origin'] if 'origin' in row else 'CN'
306
+ coming_soon = row.get('coming_soon', False)
307
+ html_table += f"<tr data-origin='{origin}'>"
308
+
309
+ for col in lang_df.columns:
310
+ if col not in ['origin', 'coming_soon']:
311
+ if coming_soon and col != 'Model':
312
+ if col == 'Average':
313
+ # Add "Coming Soon" text in italics
314
+ html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
315
+ else:
316
+ html_table += "<td></td>"
317
+ else:
318
+ html_table += f"<td>{row[col]}</td>"
319
+
320
+ html_table += "</tr>"
321
+
322
+ html_table += "</tbody></table>"
323
+
324
+ gr.HTML(html_table)
325
+
326
+ # Add color legend
327
+ gr.HTML("""
328
+ <div style="margin-top: 20px; margin-bottom: 40px;">
329
+ <div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
330
+ <div style="display: flex; align-items: center; margin-bottom: 8px;">
331
+ <div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
332
+ <div>European Union</div>
333
+ </div>
334
+ <div style="display: flex; align-items: center;">
335
+ <div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
336
+ <div>China</div>
337
+ </div>
338
+ </div>
339
+ """)
340
+
341
+ # Footer section
342
+ gr.Markdown("""
343
+ ---
344
+ ### Additional Information
345
+ - Scores are updated regularly as new models are evaluated
346
+ - All evaluations use the same test set for fair comparison
347
+ - Models are evaluated on both English and French datasets to assess cross-lingual capabilities
348
+ - Color coding indicates model origin (Blue = EU, Red = Chinese)
349
+
350
+ ### Citation
351
+ If you use these benchmarks in your research, please cite:
352
+ ```
353
+ @article{visual_embeddings_benchmark_2024,
354
+ title={Cross-lingual Visual Embeddings Benchmark},
355
+ author={[Your Name]},
356
+ year={2024}
357
+ }
358
+ ```
359
+ """)
360
+
361
+ return demo
362
+
363
+ # Create and launch the interface
364
+ if __name__ == "__main__":
365
+ demo = create_leaderboard()
366
+ demo.launch()
racine.svg ADDED
scores.json ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llamaindex/vdr-2b-multi-v1 (1536 dim) (960 max pixels)": {
3
+ "ENERGY_EN": {
4
+ "en": 0.9064713561013534,
5
+ "fr": 0.8915116814696813,
6
+ "de": 0.8771447513905304,
7
+ "it": 0.8822130007875514,
8
+ "es": 0.8890421421056137
9
+ },
10
+ "ENERGY_FR": {
11
+ "en": 0.8664716145521915,
12
+ "fr": 0.8724679747924354,
13
+ "de": 0.7969272485807078,
14
+ "it": 0.8369809864805748,
15
+ "es": 0.8398171676654868
16
+ }
17
+ },
18
+ "llamaindex/vdr-2b-multi-v1 (1536 dim) (768 max pixels)": {
19
+ "ENERGY_EN": {
20
+ "en": 0.9056966191854877,
21
+ "fr": 0.8814481576759942,
22
+ "de": 0.8677028930739694,
23
+ "it": 0.887347176313906,
24
+ "es": 0.8836230691570296
25
+ },
26
+ "ENERGY_FR": {
27
+ "en": 0.8600078106111717,
28
+ "fr": 0.875590870797616,
29
+ "de": 0.7994682538707824,
30
+ "it": 0.8329727980886149,
31
+ "es": 0.8373233417400284
32
+ }
33
+ },
34
+ "llamaindex/vdr-2b-multi-v1 (768 dim) (960 max pixels)": {
35
+ "ENERGY_EN": {
36
+ "en": 0.9040759668179204,
37
+ "fr": 0.8746541123436405,
38
+ "de": 0.8560369810570134,
39
+ "it": 0.8554604694074869,
40
+ "es": 0.8559578821798726
41
+ },
42
+ "ENERGY_FR": {
43
+ "en": 0.8260181607076341,
44
+ "fr": 0.856274189278424,
45
+ "de": 0.7687147115662343,
46
+ "it": 0.8059781690988007,
47
+ "es": 0.8164392225457765
48
+ }
49
+ },
50
+ "marco/mcdse-2b-v1 (1536 dim) (960 max pixels)": {
51
+ "ENERGY_EN": {
52
+ "en": 0.8864914044758345,
53
+ "fr": 0.8581359097016441,
54
+ "de": 0.8607091625368953,
55
+ "it": 0.8539746155123089,
56
+ "es": 0.8670746944536166
57
+ },
58
+ "ENERGY_FR": {
59
+ "en": 0.8363043545751958,
60
+ "fr": 0.8344453830143979,
61
+ "de": 0.8013868818049785,
62
+ "it": 0.8271768291414843,
63
+ "es": 0.8266757566975349
64
+ }
65
+ },
66
+ "marco/mcdse-2b-v1 (768 dim) (960 max pixels)": {
67
+ "ENERGY_EN": {
68
+ "en": 0.8755419235816851,
69
+ "fr": 0.8573657099961326,
70
+ "de": 0.8481401154301397,
71
+ "it": 0.8506702006425194,
72
+ "es": 0.854859417172228
73
+ },
74
+ "ENERGY_FR": {
75
+ "en": 0.8199730664365921,
76
+ "fr": 0.8313944410898241,
77
+ "de": 0.79254381618098,
78
+ "it": 0.8149253984511224,
79
+ "es": 0.8115440946149329
80
+ }
81
+ },
82
+ "MrLight/dse-qwen2-2b-mrl-v1 (1024 max pixels)": {
83
+ "ENERGY_EN": {
84
+ "en": 0.8858914849980944,
85
+ "fr": 0.8319955161103443,
86
+ "de": 0.8007595660782697,
87
+ "it": 0.7884830257969229,
88
+ "es": 0.8109588364468638
89
+ },
90
+ "ENERGY_FR": {
91
+ "en": 0.7999719292959505,
92
+ "fr": 0.7814200135493101,
93
+ "de": 0.6931412447554907,
94
+ "it": 0.7124515040042555,
95
+ "es": 0.7407532416059531
96
+ }
97
+ },
98
+ "vidore/colqwen2-v1.0": {
99
+ "ENERGY_EN": {
100
+ "en": 0.9450269368391911,
101
+ "fr": 0.8799090261578681,
102
+ "de": 0.8941818719335239,
103
+ "it": 0.8928554849519516,
104
+ "es": 0.8973223517567471
105
+ },
106
+ "ENERGY_FR": {
107
+ "en": 0.797855079299299,
108
+ "fr": 0.8427709258268349,
109
+ "de": 0.7758948792503111,
110
+ "it": 0.8388839166668723,
111
+ "es": 0.8330444309570463
112
+ }
113
+ },
114
+ "racineai/AMPERE-1 (1536 dim) (768 max pixels)": {
115
+ "ENERGY_EN": {
116
+ "en": 0.9189998628097908,
117
+ "fr": 0.9062905947057467,
118
+ "de": 0.8924913100154964,
119
+ "it": 0.9014115284688254,
120
+ "es": 0.9054600624422264
121
+ },
122
+ "ENERGY_FR": {
123
+ "en": 0.8478239831155939,
124
+ "fr": 0.8830485083419397,
125
+ "de": 0.8368160139139695,
126
+ "it": 0.8617192303292741,
127
+ "es": 0.8589934133953208
128
+ }
129
+ },
130
+ "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct": {
131
+ "ENERGY_EN": {
132
+ "en": 0.8543160297514112,
133
+ "fr": 0.8233691101050026,
134
+ "de": 0.8144689878335026,
135
+ "it": 0.8226984776178596,
136
+ "es": 0.8435247185057887
137
+ },
138
+ "ENERGY_FR": {
139
+ "en": 0.7948213876766665,
140
+ "fr": 0.8141921315218869,
141
+ "de": 0.7879807743413478,
142
+ "it": 0.8203145999058352,
143
+ "es": 0.8279510851214207
144
+ }
145
+ },
146
+ "racineai/smolvlm-2b-dse": {
147
+ "origin": "EU",
148
+ "ENERGY_EN": {
149
+ "en": 0.8867680191920602,
150
+ "fr": 0.7577338662000416,
151
+ "de": 0.7085700422386438,
152
+ "it": 0.7922815982637218,
153
+ "es": 0.8241859159760317
154
+ },
155
+ "ENERGY_FR": {
156
+ "en": 0.7567789969566571,
157
+ "fr": 0.8282483912934573,
158
+ "de": 0.6031166536296358,
159
+ "it": 0.7530777182006402,
160
+ "es": 0.757678752178639
161
+ }
162
+ }
163
+ }