Spaces:
Sleeping
Sleeping
import gradio as gr | |
# LLM performance data with scores | |
performance_data = { | |
"Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)], | |
"Graduate level reasoning": [("Claude 3 Opus", 50.4), ("Claude 3 Sonnet", 40.4), ("GPT-4", 35.7)], | |
"Grade school math": [("Claude 3 Opus", 95.0), ("Gemini 1.0 Ultra", 94.4), ("GPT-4", 92.0)], | |
"Math problem-solving": [("Claude 3 Opus", 60.1), ("Gemini 1.0 Ultra", 53.2), ("GPT-4", 52.9)], | |
"Multilingual math": [("Claude 3 Opus", 90.7), ("Claude 3 Sonnet", 83.5), ("Gemini 1.0 Ultra", 79.0)], | |
"Code": [("Claude 3 Opus", 84.9), ("Gemini 1.0 Ultra", 74.4), ("Claude 3 Haiku", 75.9)], | |
"Reasoning over text": [("Claude 3 Opus", 83.1), ("Gemini 1.0 Ultra", 82.4), ("GPT-4", 80.9)], | |
"Mixed evaluations": [("Claude 3 Opus", 86.8), ("Gemini 1.0 Ultra", 83.6), ("GPT-4", 83.1)], | |
"Knowledge Q&A": [("Claude 3 Opus", 96.4), ("GPT-4", 96.3), ("Claude 3 Sonnet", 93.2)], | |
"Common Knowledge": [("Claude 3 Opus", 95.4), ("GPT-4", 95.3), ("Gemini 1.0 Ultra", 87.8)], | |
} | |
def recommend_llm(task): | |
recommendations = performance_data.get(task, []) | |
if not recommendations: | |
return "No data available" | |
recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True) | |
result = f"For {task}, the recommended LLMs are:\n" | |
for i, (model, score) in enumerate(recommendations_sorted): | |
result += f"{i+1}. {model} with a score of {score}%\n" | |
return result | |
# Gradio interface | |
interface = gr.Interface( | |
fn=recommend_llm, | |
inputs=gr.Dropdown(list(performance_data.keys()), label="Select Task"), | |
outputs=gr.Textbox(label="LLM Recommendations"), | |
title="LLM Recommendation App", | |
description="Select a task to get recommendations for the best LLMs based on performance data." | |
) | |
# Launch the app | |
interface.launch() | |