import gradio as gr import json import pandas as pd import plotly.express as px import plotly.graph_objects as go def load_results(): with open('results.json', 'r') as f: return json.load(f) def create_metrics_df(results): rows = [] for r in results: row = { 'Model': r['model_name'], 'Timestamp': r['timestamp'], 'Embeddings': r['config']['embedding_model'], 'Retriever': r['config']['retriever_type'], 'Top-K': r['config']['retrieval_config'].get('top_k', 'N/A') } # Add metrics metrics = r['metrics'] for category in ['retrieval', 'generation']: if category in metrics: for metric_name, value in metrics[category].items(): row[f"{category}_{metric_name}"] = round(value, 4) rows.append(row) return pd.DataFrame(rows) def create_comparison_plot(df, metric_category): metrics = [col for col in df.columns if col.startswith(metric_category)] if not metrics: return None fig = go.Figure() for metric in metrics: fig.add_trace(go.Bar( name=metric.split('_')[-1], x=df['Model'], y=df[metric], text=df[metric].round(3), textposition='auto', )) fig.update_layout( title=f"{metric_category.capitalize()} Metrics Comparison", xaxis_title="Model", yaxis_title="Score", barmode='group' ) return fig def create_interface(): results = load_results() df = create_metrics_df(results) with gr.Blocks() as demo: gr.Markdown("# RAG Evaluation Leaderboard") with gr.Tabs(): with gr.Tab("Leaderboard"): gr.Dataframe( df, headers=df.columns.tolist(), interactive=False ) with gr.Tab("Retrieval Metrics"): gr.Plot(create_comparison_plot(df, 'retrieval')) with gr.Tab("Generation Metrics"): gr.Plot(create_comparison_plot(df, 'generation')) with gr.Tab("Configuration Details"): config_df = df[['Model', 'Embeddings', 'Retriever', 'Top-K', 'Timestamp']] gr.Dataframe(config_df) gr.Markdown(''' ## How to Submit To submit your results: ```python from rag_leaderboard import RAGLeaderboard # Initialize leaderboard leaderboard = RAGLeaderboard( repo_id="your-username/repo-name", token="your-hf-token" ) # Submit results leaderboard.submit_results( model_name="Your Model Name", metrics={ "retrieval": {"hit_rate": 0.8, "mrr": 0.6}, "generation": {"rouge1": 0.7, "rouge2": 0.5, "rougeL": 0.6} }, config={ "embedding_model": "your-embedding-model", "retriever_type": "dense", "retrieval_config": {"top_k": 3} } ) ``` ''') return demo demo = create_interface() demo.launch()