FormulaOne-Leaderboard

Runtime error

App Files Files Community

galb-dai commited on Aug 14

Commit

06f2a09

1 Parent(s): d7db717

Updated.

Browse files

Files changed (2) hide show

app.py +99 -29
src/display/css_html_js.py +23 -0

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 import gradio as gr
 import pandas as pd
-import plotly.graph_objects as go  # NEW: for interactive chart
 from apscheduler.schedulers.background import BackgroundScheduler
 from gradio_leaderboard import Leaderboard, SelectColumns
 from huggingface_hub import whoami
@@ -233,42 +233,96 @@ STATIC_RESULTS = {
     },
 }
 def build_accuracy_figure(tier: str):
-    """Create interactive bar chart with hover showing 'solved / total'."""
-    results = STATIC_RESULTS.get(tier, {})
     total = TIER_TOTALS[tier]
-    x = MODELS_ORDER
-    y = [results[m] for m in x]
-    hover = [f"{m}<br><b>{v}/{total}</b> problems solved" for m, v in zip(x, y)]
-    fig = go.Figure(
-        data=[
-            go.Bar(
-                x=x,
-                y=y,
-                text=[f"{v}/{total}" for v in y],
-                textposition="auto",
-                hovertext=hover,
-                hoverinfo="text",
-                marker_line_width=0.5,
             )
-        ]
-    )
     fig.update_layout(
         template="plotly_white",
-        margin=dict(l=30, r=20, t=10, b=40),
-        yaxis=dict(title="# Problems Solved", range=[0, total], dtick=max(5, total // 10)),
-        xaxis=dict(title=None),
         height=420,
     )
     return fig
-# Precompute initial figure (Warmup)
 _initial_accuracy_fig = build_accuracy_figure("Warmup")
 # Force light theme even if HF user prefers dark
 blocks = gr.Blocks(
     css=custom_css,
@@ -278,14 +332,13 @@ blocks = gr.Blocks(
 with blocks:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        # === NEW LANDING TAB (first) ===
-        with gr.TabItem("Model Accuracy on FormulaOne", id=0, elem_id="landing-accuracy-tab"):
             gr.Markdown(
                 "The chart below summarizes static (non-live) results for model performance on FormulaOne.",
                 elem_classes="markdown-text",
             )
-            # Selector aligned to the top-right (see CSS)
             with gr.Row(elem_id="f1-tier-select-row"):
                 tier_selector = gr.Radio(
                     choices=list(TIER_TOTALS.keys()),
@@ -295,15 +348,32 @@ with blocks:
                     elem_id="f1-tier-select",
                 )
-            accuracy_plot = gr.Plot(value=_initial_accuracy_fig)
-            # Wire selector → plot
             tier_selector.change(
                 lambda t: build_accuracy_figure(t),
                 inputs=tier_selector,
                 outputs=accuracy_plot,
             )
         # Existing "What is FormulaOne" tab
         with gr.TabItem("What is FormulaOne", id=1, elem_id="what-is-tab"):

 import gradio as gr
 import pandas as pd
+import plotly.graph_objects as go
 from apscheduler.schedulers.background import BackgroundScheduler
 from gradio_leaderboard import Leaderboard, SelectColumns
 from huggingface_hub import whoami
     },
 }
+MODEL_RELEASES = {
+    "GPT-5": "2025-08-07",
+    "Gemini 2.5 Pro": "2025-03-25",
+    "Grok 4": "2025-07-09",
+    "Claude Opus 4": "2025-05-22",
+    "o3 Pro": "2025-06-10",
+}
+TIER_TOTALS = {"Warmup": 100, "Tier 1": 100, "Tier 2": 20}
+MODELS_ORDER = ["GPT-5", "Gemini 2.5 Pro", "Grok 4", "Claude Opus 4", "o3 Pro"]
+ACCURACY_PCT = {
+    "Warmup": {
+        "GPT-5": 38,
+        "Gemini 2.5 Pro": 35,
+        "Grok 4": 28,
+        "Claude Opus 4": 32,
+        "o3 Pro": 30,
+    },
+    "Tier 1": {
+        "GPT-5": 3,
+        "Gemini 2.5 Pro": 2,
+        "Grok 4": 1,
+        "Claude Opus 4": 2,
+        "o3 Pro": 2,
+    },
+    "Tier 2": {
+        "GPT-5": 0,
+        "Gemini 2.5 Pro": 0,
+        "Grok 4": 0,
+        "Claude Opus 4": 0,
+        "o3 Pro": 0,
+    },
+}
 def build_accuracy_figure(tier: str):
+    """Interactive scatter: x = release date, y = accuracy (%). Hover shows solved/total."""
     total = TIER_TOTALS[tier]
+    fig = go.Figure()
+    for model in MODELS_ORDER:
+        date_str = MODEL_RELEASES[model]
+        y = ACCURACY_PCT[tier][model]
+        solved = round(y * total / 100)
+        fig.add_trace(
+            go.Scatter(
+                x=[date_str],
+                y=[y],
+                mode="markers",
+                name=model,
+                marker=dict(size=12, line=dict(width=1)),
+                hovertemplate=(
+                    f"<b>{model}</b><br>"
+                    "Release: %{x|%b %d, %Y}<br>"
+                    "Accuracy: %{y:.1f}%<br>"
+                    f"Solved: {solved}/{total}"
+                    "<extra></extra>"
+                ),
             )
+        )
+    # Comfortable y-range (dynamic ceiling for readability)
+    max_y = max(ACCURACY_PCT[tier].values()) or 1
+    upper = max(1, math.ceil(max_y * 1.25))
     fig.update_layout(
         template="plotly_white",
         height=420,
+        margin=dict(l=30, r=120, t=10, b=40),  # extra right room for legend
+        xaxis=dict(
+            title=None,
+            type="date",
+            tickformat="%b %Y",
+            showgrid=True,
+        ),
+        yaxis=dict(
+            title="Accuracy (%)",
+            range=[0, upper],
+            dtick=max(1, upper // 5),
+            showgrid=True,
+        ),
+        legend=dict(title="Models", orientation="v", y=1, x=1.02, yanchor="top"),
+        hovermode="closest",
     )
     return fig
 _initial_accuracy_fig = build_accuracy_figure("Warmup")
 # Force light theme even if HF user prefers dark
 blocks = gr.Blocks(
     css=custom_css,
 with blocks:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("FormulaOne", id=0, elem_id="landing-accuracy-tab"):
             gr.Markdown(
                 "The chart below summarizes static (non-live) results for model performance on FormulaOne.",
                 elem_classes="markdown-text",
             )
+            # Pill-style selector aligned to the top-right
             with gr.Row(elem_id="f1-tier-select-row"):
                 tier_selector = gr.Radio(
                     choices=list(TIER_TOTALS.keys()),
                     elem_id="f1-tier-select",
                 )
+            accuracy_plot = gr.Plot(value=_initial_accuracy_fig, elem_id="f1-accuracy-plot")
             tier_selector.change(
                 lambda t: build_accuracy_figure(t),
                 inputs=tier_selector,
                 outputs=accuracy_plot,
             )
+            # Footnote (sampling + prompt details)
+            gr.Markdown(
+                """
+<div class="f1-container">
+<p class="f1-p" style="font-size:0.95rem;color:var(--f1-subtle);">
+    <em>Footnote.</em> All models were sampled with their highest available reasoning settings and a generous token budget.
+    We also used a diverse few-shot prompt that is highly supportive for these problems, covering many of the subtle
+    details inherent in the tasks (state design, invariants, and bag transformations).
+</p>
+</div>
+        """,
+                elem_classes="markdown-text",
+            )
+            # "Learn more" link to the explainer tab
+            gr.Markdown(
+                '<div class="f1-container"><p><a class="f1-a" href="#what-is-tab">Learn more about FormulaOne.</a></p></div>'
+            )
         # Existing "What is FormulaOne" tab
         with gr.TabItem("What is FormulaOne", id=1, elem_id="what-is-tab"):

src/display/css_html_js.py CHANGED Viewed

@@ -21,6 +21,29 @@ custom_css = """
 /* NEW: landing tab width + tier selector alignment */
 #landing-accuracy-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
 #f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
 /* Text */
 .f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }

 /* NEW: landing tab width + tier selector alignment */
 #landing-accuracy-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
 #f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
+#f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
+#f1-tier-select .wrap {
+  display: inline-flex;
+  gap: 6px;
+  padding: 4px;
+  background: #ffffff;
+  border: 1px solid var(--f1-border);
+  border-radius: 999px;
+}
+#f1-tier-select input[type="radio"] { display: none; }
+#f1-tier-select label {
+  border: none;
+  border-radius: 999px;
+  padding: 6px 12px;
+  background: transparent;
+  cursor: pointer;
+}
+#f1-tier-select input[type="radio"]:checked + span {
+  background: #eef2ff; /* subtle non-white for selected pill */
+  border-radius: 999px;
+  padding: 6px 12px;
+  box-shadow: 0 1px 2px rgba(0,0,0,0.04);
+}
 /* Text */
 .f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }