hasanain9's picture
test3
3628aaf verified
raw
history blame
5.83 kB
import os
import datetime
import requests
import pandas as pd
import gradio as gr
from openai import OpenAI
class ToolEnhancedAgent:
def __init__(self):
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not found in environment variables.")
self.client = OpenAI(api_key=api_key)
print("ToolEnhancedAgent initialized.")
def use_tool(self, tool_name: str, input_text: str) -> str:
try:
if tool_name == "calculator":
# Sangat dasar dan raw eval, hati2 di produksi
return str(eval(input_text))
elif tool_name == "date":
return str(datetime.datetime.now().date())
elif tool_name == "wikipedia":
return self.search_wikipedia(input_text)
else:
return "[Unknown tool]"
except Exception as e:
return f"[Tool error: {e}]"
def search_wikipedia(self, query: str) -> str:
try:
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}"
res = requests.get(url)
if res.status_code == 200:
return res.json().get("extract", "No summary found.")
else:
return f"No Wikipedia summary for '{query}'."
except Exception as e:
return f"Wikipedia API error: {e}"
def __call__(self, question: str) -> str:
# Prompt dengan chain of thought agar GPT berpikir langkah demi langkah
prompt = (
"You are an assistant that solves problems step-by-step, and you can use tools like calculator, date, and wikipedia if needed.\n"
f"Question: {question}\n"
"Answer (think step-by-step and use tools if helpful):"
)
try:
response = self.client.chat.completions.create(
model="gpt-4o-mini", # model yang lebih ringan dan biasanya cukup
messages=[
{"role": "system", "content": "You are a helpful assistant that thinks step-by-step and can use tools."},
{"role": "user", "content": prompt}
],
temperature=0.2,
max_tokens=700,
)
answer = response.choices[0].message.content.strip()
print(f"Generated answer (preview): {answer[:100]}...")
return answer
except Exception as e:
print(f"Agent error: {e}")
return f"[Agent error: {e}]"
def run_and_submit_all(profile: gr.OAuthProfile | None):
if profile is None:
return "Please login to Hugging Face to submit answers.", None
username = profile.username
api_url = "https://agents-course-unit4-scoring.hf.space"
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = ToolEnhancedAgent()
except Exception as e:
return f"Agent initialization error: {e}", None
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions = response.json()
if not isinstance(questions, list) or len(questions) == 0:
return "Failed to fetch questions or empty list.", None
except Exception as e:
return f"Error fetching questions: {e}", None
answers = []
results_log = []
for item in questions:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
continue
try:
answer = agent(question_text)
answers.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({
"Task ID": task_id,
"Question": question_text,
"Submitted Answer": answer
})
except Exception as e:
results_log.append({
"Task ID": task_id,
"Question": question_text,
"Submitted Answer": f"[Agent error: {e}]"
})
if not answers:
return "Agent did not produce answers.", pd.DataFrame(results_log)
agent_code_url = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
submission_payload = {
"username": username,
"agent_code": agent_code_url,
"answers": answers
}
try:
submit_response = requests.post(submit_url, json=submission_payload, timeout=60)
submit_response.raise_for_status()
result = submit_response.json()
final_status = (
f"Submission Success!\n"
f"User: {result.get('username')}\n"
f"Score: {result.get('score', 'N/A')}%\n"
f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}\n"
f"Message: {result.get('message', 'No message')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission error: {e}", pd.DataFrame(results_log)
with gr.Blocks() as demo:
gr.Markdown("# GAIA Benchmark - Tool Enhanced Agent")
gr.Markdown(
"""
1. Login with your Hugging Face account.
2. Click 'Run Evaluation & Submit All Answers' to run the agent on GAIA tasks.
3. View your results and submission status.
"""
)
login_button = gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_box = gr.Textbox(label="Status / Submission Result", lines=7)
results_table = gr.DataFrame(label="Questions and Agent Answers")
run_button.click(
fn=run_and_submit_all,
inputs=login_button,
outputs=[status_box, results_table]
)
if __name__ == "__main__":
demo.launch()