averoo commited on
Commit
cbf9114
·
verified ·
1 Parent(s): 119f89d

Update leaderboard display

Browse files
Files changed (1) hide show
  1. app.py +114 -114
app.py CHANGED
@@ -1,114 +1,114 @@
1
-
2
- import gradio as gr
3
- import json
4
- import pandas as pd
5
- import plotly.express as px
6
- import plotly.graph_objects as go
7
-
8
- def load_results():
9
- with open('results.json', 'r') as f:
10
- return json.load(f)
11
-
12
- def create_metrics_df(results):
13
- rows = []
14
- for r in results:
15
- row = {
16
- 'Model': r['model_name'],
17
- 'Timestamp': r['timestamp'],
18
- 'Embeddings': r['config']['embedding_model'],
19
- 'Retriever': r['config']['retriever_type'],
20
- 'Top-K': r['config']['retrieval_config'].get('top_k', 'N/A')
21
- }
22
-
23
- # Add metrics
24
- metrics = r['metrics']
25
- for category in ['retrieval', 'generation']:
26
- if category in metrics:
27
- for metric_name, value in metrics[category].items():
28
- row[f"{category}_{metric_name}"] = round(value, 4)
29
-
30
- rows.append(row)
31
-
32
- return pd.DataFrame(rows)
33
-
34
- def create_comparison_plot(df, metric_category):
35
- metrics = [col for col in df.columns if col.startswith(metric_category)]
36
- if not metrics:
37
- return None
38
-
39
- fig = go.Figure()
40
- for metric in metrics:
41
- fig.add_trace(go.Bar(
42
- name=metric.split('_')[-1],
43
- x=df['Model'],
44
- y=df[metric],
45
- text=df[metric].round(3),
46
- textposition='auto',
47
- ))
48
-
49
- fig.update_layout(
50
- title=f"{metric_category.capitalize()} Metrics Comparison",
51
- xaxis_title="Model",
52
- yaxis_title="Score",
53
- barmode='group'
54
- )
55
- return fig
56
-
57
- def create_interface():
58
- results = load_results()
59
- df = create_metrics_df(results)
60
-
61
- with gr.Blocks() as demo:
62
- gr.Markdown("# RAG Evaluation Leaderboard")
63
-
64
- with gr.Tabs():
65
- with gr.Tab("Leaderboard"):
66
- gr.Dataframe(
67
- df,
68
- headers=df.columns.tolist(),
69
- interactive=False
70
- )
71
-
72
- with gr.Tab("Retrieval Metrics"):
73
- gr.Plot(create_comparison_plot(df, 'retrieval'))
74
-
75
- with gr.Tab("Generation Metrics"):
76
- gr.Plot(create_comparison_plot(df, 'generation'))
77
-
78
- with gr.Tab("Configuration Details"):
79
- config_df = df[['Model', 'Embeddings', 'Retriever', 'Top-K', 'Timestamp']]
80
- gr.Dataframe(config_df)
81
-
82
- gr.Markdown('''
83
- ## How to Submit
84
-
85
- To submit your results:
86
- ```python
87
- from rag_leaderboard import RAGLeaderboard
88
-
89
- # Initialize leaderboard
90
- leaderboard = RAGLeaderboard(
91
- repo_id="your-username/repo-name",
92
- token="your-hf-token"
93
- )
94
-
95
- # Submit results
96
- leaderboard.submit_results(
97
- model_name="Your Model Name",
98
- metrics={
99
- "retrieval": {"hit_rate": 0.8, "mrr": 0.6},
100
- "generation": {"rouge1": 0.7, "rouge2": 0.5, "rougeL": 0.6}
101
- },
102
- config={
103
- "embedding_model": "your-embedding-model",
104
- "retriever_type": "dense",
105
- "retrieval_config": {"top_k": 3}
106
- }
107
- )
108
- ```
109
- ''')
110
-
111
- return demo
112
-
113
- demo = create_interface()
114
- demo.launch()
 
1
+
2
+ import gradio as gr
3
+ import json
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+
8
+ def load_results():
9
+ with open('results.json', 'r') as f:
10
+ return json.load(f)
11
+
12
+ def create_metrics_df(results):
13
+ rows = []
14
+ for r in results:
15
+ row = {
16
+ 'Model': r['model_name'],
17
+ 'Timestamp': r['timestamp'],
18
+ 'Embeddings': r['config']['embedding_model'],
19
+ 'Retriever': r['config']['retriever_type'],
20
+ 'Top-K': r['config']['retrieval_config'].get('top_k', 'N/A')
21
+ }
22
+
23
+ # Add metrics
24
+ metrics = r['metrics']
25
+ for category in ['retrieval', 'generation']:
26
+ if category in metrics:
27
+ for metric_name, value in metrics[category].items():
28
+ row[f"{category}_{metric_name}"] = round(value, 4)
29
+
30
+ rows.append(row)
31
+
32
+ return pd.DataFrame(rows)
33
+
34
+ def create_comparison_plot(df, metric_category):
35
+ metrics = [col for col in df.columns if col.startswith(metric_category)]
36
+ if not metrics:
37
+ return None
38
+
39
+ fig = go.Figure()
40
+ for metric in metrics:
41
+ fig.add_trace(go.Bar(
42
+ name=metric.split('_')[-1],
43
+ x=df['Model'],
44
+ y=df[metric],
45
+ text=df[metric].round(3),
46
+ textposition='auto',
47
+ ))
48
+
49
+ fig.update_layout(
50
+ title=f"{metric_category.capitalize()} Metrics Comparison",
51
+ xaxis_title="Model",
52
+ yaxis_title="Score",
53
+ barmode='group'
54
+ )
55
+ return fig
56
+
57
+ def create_interface():
58
+ results = load_results()
59
+ df = create_metrics_df(results)
60
+
61
+ with gr.Blocks() as demo:
62
+ gr.Markdown("# RAG Evaluation Leaderboard")
63
+
64
+ with gr.Tabs():
65
+ with gr.Tab("Leaderboard"):
66
+ gr.Dataframe(
67
+ df,
68
+ headers=df.columns.tolist(),
69
+ interactive=False
70
+ )
71
+
72
+ with gr.Tab("Retrieval Metrics"):
73
+ gr.Plot(create_comparison_plot(df, 'retrieval'))
74
+
75
+ with gr.Tab("Generation Metrics"):
76
+ gr.Plot(create_comparison_plot(df, 'generation'))
77
+
78
+ with gr.Tab("Configuration Details"):
79
+ config_df = df[['Model', 'Embeddings', 'Retriever', 'Top-K', 'Timestamp']]
80
+ gr.Dataframe(config_df)
81
+
82
+ gr.Markdown('''
83
+ ## How to Submit
84
+
85
+ To submit your results:
86
+ ```python
87
+ from rag_leaderboard import RAGLeaderboard
88
+
89
+ # Initialize leaderboard
90
+ leaderboard = RAGLeaderboard(
91
+ repo_id="your-username/repo-name",
92
+ token="your-hf-token"
93
+ )
94
+
95
+ # Submit results
96
+ leaderboard.submit_results(
97
+ model_name="Your Model Name",
98
+ metrics={
99
+ "retrieval": {"hit_rate": 0.8, "mrr": 0.6},
100
+ "generation": {"rouge1": 0.7, "rouge2": 0.5, "rougeL": 0.6}
101
+ },
102
+ config={
103
+ "embedding_model": "your-embedding-model",
104
+ "retriever_type": "dense",
105
+ "retrieval_config": {"top_k": 3}
106
+ }
107
+ )
108
+ ```
109
+ ''')
110
+
111
+ return demo
112
+
113
+ demo = create_interface()
114
+ demo.launch()