import os import gradio as gr import requests import openai # Gunakan OpenAI GPT untuk menjawab pertanyaan class GPTAgent: def __init__(self): self.api_key = os.getenv("OPENAI_API_KEY") openai.api_key = self.api_key self.model = "gpt-4o" # atau "gpt-3.5-turbo" jika tidak punya akses def __call__(self, question: str) -> str: try: response = openai.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": question}, ] ) return response.choices[0].message.content.strip() except Exception as e: return f"ERROR: {e}" # Fungsi utama untuk evaluasi & submit def run_and_submit_all(user_profile): username = user_profile if user_profile else "anonymous" agent = GPTAgent() questions_url = "https://agents-course-unit4-scoring.hf.space/questions" submit_url = "https://agents-course-unit4-scoring.hf.space/submit" try: questions = requests.get(questions_url, timeout=10).json() except Exception as e: return f"❌ Error fetching questions: {e}", [] answers = [] results = [] for q in questions: task_id = q["task_id"] question_text = q["question"] answer = agent(question_text) answers.append({"task_id": task_id, "answer": answer}) results.append([task_id, question_text, answer]) payload = { "user": username, "answers": answers } try: res = requests.post(submit_url, json=payload, timeout=10) res.raise_for_status() score = res.json().get("score", 0.0) correct = res.json().get("correct", 0) total = res.json().get("total", len(questions)) status = f"✅ User: {username}\nScore: {score:.1f}%\nCorrect: {correct}/{total}" return status, results except Exception as e: return f"❌ Error submitting answers: {e}", results # UI Gradio with gr.Blocks() as demo: gr.Markdown("# GAIA Benchmark Agent with GPT-4o") gr.Markdown( """ 1. Login using your Hugging Face account below.
2. Click 'Run Evaluation & Submit' to start.
3. Wait for your agent to answer all questions and submit.
""" ) login = gr.LoginButton() run_btn = gr.Button("Run Evaluation & Submit") status = gr.Textbox(label="Status", interactive=False, lines=4) results = gr.DataFrame(headers=["Task ID", "Question", "Answer"]) run_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status, results]) if __name__ == "__main__": demo.launch()