Spaces:

racineai
/

Open-VLM-Retrieval-Leaderboard

Running

App Files Files Community

paulml commited on Mar 26

Commit

4064f94

verified ·

1 Parent(s): e07f1c2

Upload 3 files

Browse files

Files changed (3) hide show

app.py +366 -0
racine.svg +3 -0
scores.json +163 -0

app.py ADDED Viewed

	@@ -0,0 +1,366 @@

+import gradio as gr
+import json
+import pandas as pd
+import numpy as np
+# Function to load and modify SVG for white color
+def load_svg_as_white():
+    try:
+        with open('racine.svg', 'r', encoding='utf-8') as f:
+            svg_content = f.read()
+            # Add CSS to make it white while preserving the SVG structure
+            white_svg = svg_content.replace('<svg', '''<svg style="filter: brightness(0) invert(1); left: 33%; position: absolute; margin-top: 0px; width: 500px;"''')
+            return white_svg
+    except FileNotFoundError:
+        print("Warning: racine.svg file not found")
+        return "<!-- SVG file not found -->"
+    except Exception as e:
+        print(f"Error loading SVG: {e}")
+        return "<!-- Error loading SVG -->"
+# Load the scores from JSON file
+def load_scores():
+    with open('scores.json', 'r') as f:
+        return json.load(f)
+# Function to create dataframe for a specific language and sector filter
+def create_language_df(scores, language, sector_filter='all'):
+    models = list(scores.keys())
+    sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
+    sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
+    if sector_filter == 'en_only':
+        selected_sectors = sectors_en
+    elif sector_filter == 'fr_only':
+        selected_sectors = sectors_fr
+    else:  # 'all'
+        selected_sectors = sectors_en + sectors_fr
+    data = []
+    for model in models:
+        row = {'Model': model}
+        # Add origin information (for styling)
+        if 'origin' in scores[model]:
+            row['origin'] = scores[model]['origin']
+        else:
+            row['origin'] = 'CN'  # Default to Chinese if not specified
+        # Special handling for AMPERE-1 model
+        if "AMPERE-1" in model and "AMPERE-1.1" not in model:  # Exclude AMPERE-1.1
+            row['coming_soon'] = True
+            # Fill all sector values with empty strings
+            for sector in selected_sectors:
+                row[sector] = ""
+            row['Average'] = ""
+            # Add sort value for correct ordering
+            row['sort_value'] = float('inf')  # Place at the top when sorting
+        else:
+            row['coming_soon'] = False
+            # Add sector scores
+            sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors}
+            row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()})
+            # Calculate and add average score
+            avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores)
+            row['Average'] = f"{avg_score:.3f}"
+            # Add sort value for correct ordering
+            row['sort_value'] = avg_score
+        data.append(row)
+    df = pd.DataFrame(data)
+    # Sort by the sort_value
+    df = df.sort_values('sort_value', ascending=False)
+    # Remove the sort column
+    df = df.drop('sort_value', axis=1)
+    # Move Average column to be the second column (right after Model)
+    cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']]
+    # Add hidden columns at the end
+    if 'origin' in df.columns:
+        cols.append('origin')
+    if 'coming_soon' in df.columns:
+        cols.append('coming_soon')
+    df = df[cols]
+    return df
+def create_average_language_df(scores):
+    models = list(scores.keys())
+    languages = ['en', 'fr', 'es', 'de', 'it']
+    sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
+    sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
+    all_sectors = sectors_en + sectors_fr
+    data = []
+    for model in models:
+        row = {'Model': model}
+        # Add origin information (for styling)
+        if 'origin' in scores[model]:
+            row['origin'] = scores[model]['origin']
+        else:
+            row['origin'] = 'CN'  # Default to Chinese if not specified
+        # Special handling for AMPERE-1 model
+        if "AMPERE-1" in model and "AMPERE-1.1" not in model:  # Exclude AMPERE-1.1
+            row['coming_soon'] = True
+            # Fill all sector values with empty strings
+            for sector in all_sectors:
+                row[sector] = ""
+            row['Average'] = ""
+            # Add sort value for correct ordering
+            row['sort_value'] = float('inf')  # Place at the top when sorting
+        else:
+            row['coming_soon'] = False
+            # Calculate average for each sector across all languages
+            for sector in all_sectors:
+                sector_scores = [scores[model][sector][lang] for lang in languages]
+                sector_avg = np.mean(sector_scores)
+                row[sector] = f"{sector_avg:.3f}"
+            # Calculate overall average across all sectors
+            sector_values = [float(row[sector]) for sector in all_sectors]
+            avg_value = np.mean(sector_values) if sector_values else 0
+            row['Average'] = f"{avg_value:.3f}"
+            # Add sort value for correct ordering
+            row['sort_value'] = avg_value
+        data.append(row)
+    df = pd.DataFrame(data)
+    # Sort by the sort_value
+    df = df.sort_values('sort_value', ascending=False)
+    # Remove the sort column
+    df = df.drop('sort_value', axis=1)
+    # Move Average column to be the second column
+    cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']]
+    # Add hidden columns at the end
+    if 'origin' in df.columns:
+        cols.append('origin')
+    if 'coming_soon' in df.columns:
+        cols.append('coming_soon')
+    df = df[cols]
+    return df
+def create_leaderboard():
+    scores = load_scores()
+    languages = {
+        'en': 'English',
+        'fr': 'French',
+        'es': 'Spanish',
+        'de': 'German',
+        'it': 'Italian'
+    }
+    # Load the SVG content
+    white_svg_logo = load_svg_as_white()
+    with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard",
+                theme='argilla/argilla-theme') as demo:
+        # Header section with white title and logo
+        # Added an anchor tag around the SVG logo with href to racine.ai
+        gr.HTML(f"""
+            <div style="padding: 2em; margin-bottom: 4em; height: 350px; background-color: transparent;">
+                <div style="display: flex; flex-direction: column; align-items: center; justify-content: center;">
+                    <div style="width: 50px; margin-bottom: 20px; cursor: pointer;">
+                        <a href="https://racine.ai" target="_blank" style="display: block;">
+                            {white_svg_logo}
+                        </a>
+                    </div>
+                    <h1 style="font-size: 3em; font-weight: bold; margin: 0.5em 0; color: white; margin-top: 200px;">
+                        Open VLM Retrieval Leaderboard
+                    </h1>
+                </div>
+            </div>
+            """)
+        gr.Markdown("""
+        This leaderboard presents the performance of various visual embedding models across different business sectors
+        and languages. The evaluation is based on retrieval accuracy for visual search tasks.
+        ## Structure
+        - **Sectors**: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR)
+        - **Models**: Each row shows a different model's performance
+        - **Scores**: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval)
+        - **Average**: Overall mean performance across all sectors for each model
+        - **Colors**: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models
+        """)
+        # Info box with custom styling
+        gr.Markdown("""
+        ### How to Read the Results
+        - Select a language tab to see how models perform with queries in that language
+        - Click on column headers to sort by performance in specific sectors or by average performance
+        - All scores are normalized retrieval accuracy metrics
+        - Background colors indicate model origins (Blue = EU, Red = Chinese)
+        """)
+        # Custom CSS for styling tables
+        gr.HTML("""
+        <style>
+        table.gradio-dataframe tr[data-origin="EU"] {
+            background-color: rgba(0, 0, 255, 0.2) !important;
+        }
+        table.gradio-dataframe tr[data-origin="CN"] {
+            background-color: rgba(255, 0, 0, 0.2) !important;
+        }
+        </style>
+        """)
+        # Tabs section
+        with gr.Tabs() as tabs:
+            # Add Average Languages tab first
+            with gr.Tab("Average Across Languages"):
+                gr.Markdown("""
+                ### Average Performance Across Languages
+                This table shows the average performance of each model for each sector,
+                averaged across all query languages.
+                """)
+                # Get the dataframe for average across languages
+                avg_df = create_average_language_df(scores)
+                # Create HTML for the colored table
+                html_table = "<table class='gradio-dataframe'><thead><tr>"
+                # Add headers
+                for col in avg_df.columns:
+                    if col not in ['origin', 'coming_soon']:
+                        html_table += f"<th>{col}</th>"
+                html_table += "</tr></thead><tbody>"
+                # Add rows with appropriate background colors
+                for _, row in avg_df.iterrows():
+                    origin = row['origin'] if 'origin' in row else 'CN'
+                    coming_soon = row.get('coming_soon', False)
+                    html_table += f"<tr data-origin='{origin}'>"
+                    for col in avg_df.columns:
+                        if col not in ['origin', 'coming_soon']:
+                            if coming_soon and col != 'Model':
+                                if col == 'Average':
+                                    # Add "Coming Soon" text in italics
+                                    html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
+                                else:
+                                    html_table += "<td></td>"
+                            else:
+                                html_table += f"<td>{row[col]}</td>"
+                    html_table += "</tr>"
+                html_table += "</tbody></table>"
+                gr.HTML(html_table)
+                # Add color legend
+                gr.HTML("""
+                <div style="margin-top: 20px; margin-bottom: 40px;">
+                    <div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
+                    <div style="display: flex; align-items: center; margin-bottom: 8px;">
+                        <div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
+                        <div>European Union</div>
+                    </div>
+                    <div style="display: flex; align-items: center;">
+                        <div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
+                        <div>China</div>
+                    </div>
+                </div>
+                """)
+            # Individual language tabs
+            for lang_code, lang_name in languages.items():
+                with gr.Tab(f"{lang_name} Queries"):
+                    gr.Markdown(f"""
+                    ### Performance with {lang_name} Queries
+                    The table below shows how each model performs when the search queries are in {lang_name}.
+                    """)
+                    # Get the dataframe for this language
+                    lang_df = create_language_df(scores, lang_code, 'all')
+                    # Create HTML for the colored table
+                    html_table = "<table class='gradio-dataframe'><thead><tr>"
+                    # Add headers
+                    for col in lang_df.columns:
+                        if col not in ['origin', 'coming_soon']:
+                            html_table += f"<th>{col}</th>"
+                    html_table += "</tr></thead><tbody>"
+                    # Add rows with appropriate background colors
+                    for _, row in lang_df.iterrows():
+                        origin = row['origin'] if 'origin' in row else 'CN'
+                        coming_soon = row.get('coming_soon', False)
+                        html_table += f"<tr data-origin='{origin}'>"
+                        for col in lang_df.columns:
+                            if col not in ['origin', 'coming_soon']:
+                                if coming_soon and col != 'Model':
+                                    if col == 'Average':
+                                        # Add "Coming Soon" text in italics
+                                        html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
+                                    else:
+                                        html_table += "<td></td>"
+                                else:
+                                    html_table += f"<td>{row[col]}</td>"
+                        html_table += "</tr>"
+                    html_table += "</tbody></table>"
+                    gr.HTML(html_table)
+                    # Add color legend
+                    gr.HTML("""
+                    <div style="margin-top: 20px; margin-bottom: 40px;">
+                        <div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
+                        <div style="display: flex; align-items: center; margin-bottom: 8px;">
+                            <div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
+                            <div>European Union</div>
+                        </div>
+                        <div style="display: flex; align-items: center;">
+                            <div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
+                            <div>China</div>
+                        </div>
+                    </div>
+                    """)
+        # Footer section
+        gr.Markdown("""
+        ---
+        ### Additional Information
+        - Scores are updated regularly as new models are evaluated
+        - All evaluations use the same test set for fair comparison
+        - Models are evaluated on both English and French datasets to assess cross-lingual capabilities
+        - Color coding indicates model origin (Blue = EU, Red = Chinese)
+        ### Citation
+        If you use these benchmarks in your research, please cite:
+        ```
+        @article{visual_embeddings_benchmark_2024,
+            title={Cross-lingual Visual Embeddings Benchmark},
+            author={[Your Name]},
+            year={2024}
+        }
+        ```
+        """)
+    return demo
+# Create and launch the interface
+if __name__ == "__main__":
+    demo = create_leaderboard()
+    demo.launch()

racine.svg ADDED Viewed

scores.json ADDED Viewed

	@@ -0,0 +1,163 @@

+{
+  "llamaindex/vdr-2b-multi-v1 (1536 dim) (960 max pixels)": {
+    "ENERGY_EN": {
+        "en": 0.9064713561013534,
+        "fr": 0.8915116814696813,
+        "de": 0.8771447513905304,
+        "it": 0.8822130007875514,
+        "es": 0.8890421421056137
+    },
+    "ENERGY_FR": {
+        "en": 0.8664716145521915,
+        "fr": 0.8724679747924354,
+        "de": 0.7969272485807078,
+        "it": 0.8369809864805748,
+        "es": 0.8398171676654868
+    }
+  },
+  "llamaindex/vdr-2b-multi-v1 (1536 dim) (768 max pixels)": {
+    "ENERGY_EN": {
+        "en": 0.9056966191854877,
+        "fr": 0.8814481576759942,
+        "de": 0.8677028930739694,
+        "it": 0.887347176313906,
+        "es": 0.8836230691570296
+    },
+    "ENERGY_FR": {
+        "en": 0.8600078106111717,
+        "fr": 0.875590870797616,
+        "de": 0.7994682538707824,
+        "it": 0.8329727980886149,
+        "es": 0.8373233417400284
+    }
+  },
+  "llamaindex/vdr-2b-multi-v1 (768 dim) (960 max pixels)": {
+    "ENERGY_EN": {
+        "en": 0.9040759668179204,
+        "fr": 0.8746541123436405,
+        "de": 0.8560369810570134,
+        "it": 0.8554604694074869,
+        "es": 0.8559578821798726
+    },
+    "ENERGY_FR": {
+        "en": 0.8260181607076341,
+        "fr": 0.856274189278424,
+        "de": 0.7687147115662343,
+        "it": 0.8059781690988007,
+        "es": 0.8164392225457765
+    }
+  },
+  "marco/mcdse-2b-v1 (1536 dim) (960 max pixels)": {
+    "ENERGY_EN": {
+        "en": 0.8864914044758345,
+        "fr": 0.8581359097016441,
+        "de": 0.8607091625368953,
+        "it": 0.8539746155123089,
+        "es": 0.8670746944536166
+    },
+    "ENERGY_FR": {
+        "en": 0.8363043545751958,
+        "fr": 0.8344453830143979,
+        "de": 0.8013868818049785,
+        "it": 0.8271768291414843,
+        "es": 0.8266757566975349
+    }
+  },
+  "marco/mcdse-2b-v1 (768 dim) (960 max pixels)": {
+    "ENERGY_EN": {
+        "en": 0.8755419235816851,
+        "fr": 0.8573657099961326,
+        "de": 0.8481401154301397,
+        "it": 0.8506702006425194,
+        "es": 0.854859417172228
+    },
+    "ENERGY_FR": {
+        "en": 0.8199730664365921,
+        "fr": 0.8313944410898241,
+        "de": 0.79254381618098,
+        "it": 0.8149253984511224,
+        "es": 0.8115440946149329
+    }
+  },
+  "MrLight/dse-qwen2-2b-mrl-v1 (1024 max pixels)": {
+    "ENERGY_EN": {
+        "en": 0.8858914849980944,
+        "fr": 0.8319955161103443,
+        "de": 0.8007595660782697,
+        "it": 0.7884830257969229,
+        "es": 0.8109588364468638
+    },
+    "ENERGY_FR": {
+        "en": 0.7999719292959505,
+        "fr": 0.7814200135493101,
+        "de": 0.6931412447554907,
+        "it": 0.7124515040042555,
+        "es": 0.7407532416059531
+    }
+  },
+  "vidore/colqwen2-v1.0": {
+    "ENERGY_EN": {
+        "en": 0.9450269368391911,
+        "fr": 0.8799090261578681,
+        "de": 0.8941818719335239,
+        "it": 0.8928554849519516,
+        "es": 0.8973223517567471
+    },
+    "ENERGY_FR": {
+        "en": 0.797855079299299,
+        "fr": 0.8427709258268349,
+        "de": 0.7758948792503111,
+        "it": 0.8388839166668723,
+        "es": 0.8330444309570463
+    }
+  },
+  "racineai/AMPERE-1 (1536 dim) (768 max pixels)": {
+    "ENERGY_EN": {
+        "en": 0.9189998628097908,
+        "fr": 0.9062905947057467,
+        "de": 0.8924913100154964,
+        "it": 0.9014115284688254,
+        "es": 0.9054600624422264
+    },
+    "ENERGY_FR": {
+        "en": 0.8478239831155939,
+        "fr": 0.8830485083419397,
+        "de": 0.8368160139139695,
+        "it": 0.8617192303292741,
+        "es": 0.8589934133953208
+    }
+  },
+  "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct": {
+    "ENERGY_EN": {
+      "en": 0.8543160297514112,
+      "fr": 0.8233691101050026,
+      "de": 0.8144689878335026,
+      "it": 0.8226984776178596,
+      "es": 0.8435247185057887
+    },
+    "ENERGY_FR": {
+        "en": 0.7948213876766665,
+        "fr": 0.8141921315218869,
+        "de": 0.7879807743413478,
+        "it": 0.8203145999058352,
+        "es": 0.8279510851214207
+    }
+  },
+  "racineai/smolvlm-2b-dse": {
+    "origin": "EU",
+    "ENERGY_EN": {
+      "en": 0.8867680191920602,
+      "fr": 0.7577338662000416,
+      "de": 0.7085700422386438,
+      "it": 0.7922815982637218,
+      "es": 0.8241859159760317
+    },
+    "ENERGY_FR": {
+      "en": 0.7567789969566571,
+      "fr": 0.8282483912934573,
+      "de": 0.6031166536296358,
+      "it": 0.7530777182006402,
+      "es": 0.757678752178639
+    }
+  }
+}