import os import gradio as gr import requests import pandas as pd from openai import OpenAI # Constants DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # ToolEnhancedAgent menggunakan OpenAI API terbaru (1.x) class ToolEnhancedAgent: def __init__(self): api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY not found in environment variables.") self.client = OpenAI(api_key=api_key) print("ToolEnhancedAgent initialized with OpenAI GPT model.") def use_tool(self, tool_name: str, input_text: str) -> str: # Contoh penggunaan tool sederhana: kalkulator, tanggal, Wikipedia try: if tool_name == "calculator": # Aman eval dengan math import math return str(eval(input_text, {"__builtins__": None, "math": math})) elif tool_name == "date": import datetime return str(datetime.datetime.now().date()) elif tool_name == "wikipedia": return self.search_wikipedia(input_text) else: return "[Tool Error: Unknown tool]" except Exception as e: return f"[Tool Error: {e}]" def search_wikipedia(self, query: str) -> str: try: res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}") if res.status_code == 200: return res.json().get("extract", "No summary found.") return f"No Wikipedia summary for {query}." except Exception as e: return f"Wikipedia Error: {e}" def __call__(self, question: str) -> str: # Prompt dengan Chain of Thought dan instruksi penggunaan tools prompt = ( "You are an AI assistant that can think step-by-step and use tools when needed.\n" f"Question: {question}\n" "Answer with your reasoning steps. If needed, mention the tool you want to use like [calculator], [date], [wikipedia]." ) try: response = self.client.chat.completions.create( model="gpt-4o-mini", messages=[ {"role": "system", "content": "You are a helpful assistant using tools and reasoning."}, {"role": "user", "content": prompt} ], temperature=0.3, max_tokens=700, ) answer = response.choices[0].message.content.strip() # Simple tool simulation: jika ada tag [tool:toolname] di jawaban, gunakan tool dan tambahkan hasilnya # Contoh: "[calculator] 2+2" -> hitung 4 dan tambahkan ke jawaban import re pattern = r"\[([a-z]+)\](.*)" match = re.search(pattern, answer, re.IGNORECASE) if match: tool_name = match.group(1).lower() tool_input = match.group(2).strip() tool_result = self.use_tool(tool_name, tool_input) answer += f"\n\n[Tool used: {tool_name}]\nResult: {tool_result}" return answer except Exception as e: print(f"Agent error: {e}") return f"[Agent Error: {e}]" # Revisi run_and_submit_all untuk menerima profile (LoginButton output) def run_and_submit_all(profile: gr.OAuthProfile | None): if profile is None: return "Please login with your Hugging Face account.", None username = profile.username space_id = os.getenv("SPACE_ID") or "your-username/your-space" # Ganti sesuai space kamu jika perlu api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" try: agent = ToolEnhancedAgent() except Exception as e: return f"Error initializing agent: {e}", None agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" # Ambil pertanyaan try: response = requests.get(questions_url, timeout=15) response.raise_for_status() questions_data = response.json() except Exception as e: return f"Error fetching questions: {e}", None answers_payload = [] results_log = [] for item in questions_data: task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: continue try: answer = agent(question_text) answers_payload.append({"task_id": task_id, "submitted_answer": answer}) results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": answer, }) except Exception as e: results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": f"Agent Error: {e}", }) if not answers_payload: return "Agent did not produce answers to submit.", pd.DataFrame(results_log) submission_data = { "username": username.strip(), "agent_code": agent_code_url, "answers": answers_payload, } try: submit_response = requests.post(submit_url, json=submission_data, timeout=60) submit_response.raise_for_status() result = submit_response.json() status = ( f"Submission Successful!\n" f"User: {result.get('username')}\n" f"Score: {result.get('score', 'N/A')}% " f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n" f"Message: {result.get('message', 'No message')}" ) return status, pd.DataFrame(results_log) except Exception as e: return f"Submission failed: {e}", pd.DataFrame(results_log) # Gradio UI with gr.Blocks() as demo: gr.Markdown("# GAIA Benchmark Agent Runner") gr.Markdown(""" 1. Login with your Hugging Face account. 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, and submit answers. """) login_btn = gr.LoginButton() run_btn = gr.Button("Run Evaluation & Submit All Answers") status_out = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) results_df = gr.DataFrame(label="Questions and Agent Answers", wrap=True) run_btn.click( fn=run_and_submit_all, inputs=[login_btn], outputs=[status_out, results_df] ) if __name__ == "__main__": demo.launch(debug=True, share=False)