import gradio as gr
import json
import pandas as pd
import numpy as np
# Function to load and display PNG logo
def load_png_as_logo():
try:
import base64
with open('racine.png', 'rb') as f:
png_data = base64.b64encode(f.read()).decode('utf-8')
# Return an img tag with the logo
return f'''
'''
except FileNotFoundError:
print("Warning: racine.png file not found")
return ""
except Exception as e:
print(f"Error loading PNG: {e}")
return ""
# Load the scores from JSON file
def load_scores():
with open('scores.json', 'r') as f:
return json.load(f)
# Function to create dataframe for a specific language and sector filter
def create_language_df(scores, language, sector_filter='all'):
models = list(scores.keys())
sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
if sector_filter == 'en_only':
selected_sectors = sectors_en
elif sector_filter == 'fr_only':
selected_sectors = sectors_fr
else: # 'all'
selected_sectors = sectors_en + sectors_fr
data = []
for model in models:
row = {'Model': model}
# Check if license info exists in JSON, otherwise default logic
if 'license' in scores[model]:
row['License'] = scores[model]['license']
else:
row['License'] = "Apache 2.0"
if "jina" in model.lower():
row['License'] = "Qwen Research License"
# Add origin information (for styling)
if 'origin' in scores[model]:
row['origin'] = scores[model]['origin']
else:
row['origin'] = 'CN' # Default to Chinese if not specified
# Special handling for AMPERE-1 model
if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1
row['coming_soon'] = True
# Fill all sector values with empty strings
for sector in selected_sectors:
row[sector] = ""
row['Average'] = ""
# Add sort value for correct ordering
row['sort_value'] = float('inf') # Place at the top when sorting
else:
row['coming_soon'] = False
# Add sector scores
sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors}
row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()})
# Calculate and add average score
avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores)
row['Average'] = f"{avg_score:.3f}"
# Add sort value for correct ordering
row['sort_value'] = avg_score
data.append(row)
df = pd.DataFrame(data)
# Sort by the sort_value
df = df.sort_values('sort_value', ascending=False)
# Remove the sort column
df = df.drop('sort_value', axis=1)
# Move Average column to be the second column (right after Model)
cols = ['Model', 'License', 'Average'] + [col for col in df.columns if col not in ['Model', 'License', 'Average', 'origin', 'coming_soon']]
# Add hidden columns at the end
if 'origin' in df.columns:
cols.append('origin')
if 'coming_soon' in df.columns:
cols.append('coming_soon')
df = df[cols]
return df
def create_average_language_df(scores):
models = list(scores.keys())
languages = ['en', 'fr', 'es', 'de', 'it']
sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
all_sectors = sectors_en + sectors_fr
data = []
for model in models:
row = {'Model': model}
# Check if license info exists in JSON, otherwise default logic
if 'License' in scores[model]:
row['License'] = scores[model]['License']
else:
row['License'] = "N/A"
# Add origin information (for styling)
if 'origin' in scores[model]:
row['origin'] = scores[model]['origin']
else:
row['origin'] = 'CN' # Default to Chinese if not specified
# Special handling for AMPERE-1 model
if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1
row['coming_soon'] = True
# Fill all sector values with empty strings
for sector in all_sectors:
row[sector] = ""
row['Average'] = ""
# Add sort value for correct ordering
row['sort_value'] = float('inf') # Place at the top when sorting
else:
row['coming_soon'] = False
# Calculate average for each sector across all languages
for sector in all_sectors:
sector_scores = [scores[model][sector][lang] for lang in languages]
sector_avg = np.mean(sector_scores)
row[sector] = f"{sector_avg:.3f}"
# Calculate overall average across all sectors
sector_values = [float(row[sector]) for sector in all_sectors]
avg_value = np.mean(sector_values) if sector_values else 0
row['Average'] = f"{avg_value:.3f}"
# Add sort value for correct ordering
row['sort_value'] = avg_value
data.append(row)
df = pd.DataFrame(data)
# Sort by the sort_value
df = df.sort_values('sort_value', ascending=False)
# Remove the sort column
df = df.drop('sort_value', axis=1)
# Move Average column to be the third column, with License second
cols = ['Model', 'License', 'Average'] + [col for col in df.columns if col not in ['Model', 'License', 'Average', 'origin', 'coming_soon']]
# Add hidden columns at the end
if 'origin' in df.columns:
cols.append('origin')
if 'coming_soon' in df.columns:
cols.append('coming_soon')
df = df[cols]
return df
def create_leaderboard():
scores = load_scores()
languages = {
'en': 'English',
'fr': 'French',
'es': 'Spanish',
'de': 'German',
'it': 'Italian'
}
with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard",
theme='argilla/argilla-theme') as demo:
# Header section with Racine.ai and title
gr.HTML("""
Racine.ai
Open VLM Retrieval Leaderboard
""")
gr.Markdown("""
This leaderboard presents the performance of various visual embedding models across different business sectors
and languages. The evaluation is based on retrieval accuracy for visual search tasks.
## Structure
- **Sectors**: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR)
- **Models**: Each row shows a different model's performance
- **Scores**: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval)
- **Average**: Overall mean performance across all sectors for each model
- **Colors**: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models
The leaderboard was created in collaboration with the Intelligence Lab of the ECE - Ecole centrale d'électronique.
""")
# Info box with custom styling
gr.Markdown("""
### How to Read the Results
- Select a language tab to see how models perform with queries in that language
- All scores are normalized retrieval accuracy metrics
- Background colors indicate model origins (Blue = EU, Red = Chinese)
""")
# Custom CSS for styling tables
gr.HTML("""
""")
# Tabs section
with gr.Tabs() as tabs:
# Add Average Languages tab first
with gr.Tab("Average Across Languages"):
gr.Markdown("""
### Average Performance Across Languages
This table shows the average performance of each model for each sector,
averaged across all query languages.
""")
# Get the dataframe for average across languages
avg_df = create_average_language_df(scores)
# Create HTML for the colored table
html_table = ""
# Add headers
for col in avg_df.columns:
if col not in ['origin', 'coming_soon']:
html_table += f"| {col} | "
html_table += "
"
# Add rows with appropriate background colors
for _, row in avg_df.iterrows():
origin = row['origin'] if 'origin' in row else 'CN'
coming_soon = row.get('coming_soon', False)
html_table += f""
for col in avg_df.columns:
if col not in ['origin', 'coming_soon']:
if coming_soon and col != 'Model':
if col == 'Average':
# Add "Coming Soon" text in italics
html_table += "| Coming Soon | "
else:
html_table += " | "
else:
html_table += f"{row[col]} | "
html_table += "
"
html_table += "
"
gr.HTML(html_table)
# Add color legend
gr.HTML("""
""")
# Individual language tabs
for lang_code, lang_name in languages.items():
with gr.Tab(f"{lang_name} Queries"):
gr.Markdown(f"""
### Performance with {lang_name} Queries
The table below shows how each model performs when the search queries are in {lang_name}.
""")
# Get the dataframe for this language
lang_df = create_language_df(scores, lang_code, 'all')
# Create HTML for the colored table
html_table = ""
# Add headers
for col in lang_df.columns:
if col not in ['origin', 'coming_soon']:
html_table += f"| {col} | "
html_table += "
"
# Add rows with appropriate background colors
for _, row in lang_df.iterrows():
origin = row['origin'] if 'origin' in row else 'CN'
coming_soon = row.get('coming_soon', False)
html_table += f""
for col in lang_df.columns:
if col not in ['origin', 'coming_soon']:
if coming_soon and col != 'Model':
if col == 'Average':
# Add "Coming Soon" text in italics
html_table += "| Coming Soon | "
else:
html_table += " | "
else:
html_table += f"{row[col]} | "
html_table += "
"
html_table += "
"
gr.HTML(html_table)
# Add color legend
gr.HTML("""
""")
# Footer section - Only citation
gr.Markdown("""
If you use these benchmarks in your research, please cite:
```
@article{visual_embeddings_benchmark_2025,
title={Cross-lingual Visual Embeddings Benchmark},
author={racine.ai},
year={2025}
}
```
""")
return demo
# Create and launch the interface
if __name__ == "__main__":
demo = create_leaderboard()
demo.launch()