import os
import gradio as gr
import requests
import openai
# Gunakan OpenAI GPT untuk menjawab pertanyaan
class GPTAgent:
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = self.api_key
self.model = "gpt-4o" # atau "gpt-3.5-turbo" jika tidak punya akses
def __call__(self, question: str) -> str:
try:
response = openai.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": question},
]
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"ERROR: {e}"
# Fungsi utama untuk evaluasi & submit
def run_and_submit_all(user_profile):
username = user_profile if user_profile else "anonymous"
agent = GPTAgent()
questions_url = "https://agents-course-unit4-scoring.hf.space/questions"
submit_url = "https://agents-course-unit4-scoring.hf.space/submit"
try:
questions = requests.get(questions_url, timeout=10).json()
except Exception as e:
return f"❌ Error fetching questions: {e}", []
answers = []
results = []
for q in questions:
task_id = q["task_id"]
question_text = q["question"]
answer = agent(question_text)
answers.append({"task_id": task_id, "answer": answer})
results.append([task_id, question_text, answer])
payload = {
"user": username,
"answers": answers
}
try:
res = requests.post(submit_url, json=payload, timeout=10)
res.raise_for_status()
score = res.json().get("score", 0.0)
correct = res.json().get("correct", 0)
total = res.json().get("total", len(questions))
status = f"✅ User: {username}\nScore: {score:.1f}%\nCorrect: {correct}/{total}"
return status, results
except Exception as e:
return f"❌ Error submitting answers: {e}", results
# UI Gradio
with gr.Blocks() as demo:
gr.Markdown("# GAIA Benchmark Agent with GPT-4o")
gr.Markdown(
"""
1. Login using your Hugging Face account below.
2. Click 'Run Evaluation & Submit' to start.
3. Wait for your agent to answer all questions and submit.
"""
)
login = gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit")
status = gr.Textbox(label="Status", interactive=False, lines=4)
results = gr.DataFrame(headers=["Task ID", "Question", "Answer"])
run_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status, results])
if __name__ == "__main__":
demo.launch()