hasanain9 commited on
Commit
d8dafef
·
verified ·
1 Parent(s): 177be6f
Files changed (1) hide show
  1. app.py +67 -54
app.py CHANGED
@@ -5,20 +5,19 @@ import pandas as pd
5
  import gradio as gr
6
  from openai import OpenAI
7
 
8
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
-
10
- # -------- Tool-Enhanced Agent --------
11
  class ToolEnhancedAgent:
12
  def __init__(self):
13
  api_key = os.getenv("OPENAI_API_KEY")
14
  if not api_key:
15
- raise ValueError("OPENAI_API_KEY is not set.")
16
  self.client = OpenAI(api_key=api_key)
17
- print("ToolEnhancedAgent initialized with GPT-4 + CoT + Tools.")
18
 
19
  def use_tool(self, tool_name: str, input_text: str) -> str:
20
  try:
21
  if tool_name == "calculator":
 
22
  return str(eval(input_text))
23
  elif tool_name == "date":
24
  return str(datetime.datetime.now().date())
@@ -31,117 +30,131 @@ class ToolEnhancedAgent:
31
 
32
  def search_wikipedia(self, query: str) -> str:
33
  try:
34
- res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}")
 
35
  if res.status_code == 200:
36
  return res.json().get("extract", "No summary found.")
37
  else:
38
  return f"No Wikipedia summary found for '{query}'."
39
  except Exception as e:
40
- return f"Wiki error: {e}"
41
 
42
  def __call__(self, question: str) -> str:
43
  prompt = (
44
- "You are a helpful AI assistant. You can use tools (calculator, date, wikipedia). "
45
- "Think step-by-step before answering.\n\n"
46
  f"Question: {question}\n"
47
- "Answer (show thinking steps):"
48
  )
49
-
50
  try:
51
  response = self.client.chat.completions.create(
52
  model="gpt-4",
53
  messages=[
54
- {"role": "system", "content": "You are a smart assistant that uses tools and thinks step-by-step."},
55
  {"role": "user", "content": prompt}
56
  ],
57
  temperature=0.3,
58
  max_tokens=700,
59
  )
60
  answer = response.choices[0].message.content.strip()
61
- print(f"📤 Agent response: {answer[:100]}...")
62
  return answer
63
  except Exception as e:
64
  print(f"[Agent Error]: {e}")
65
  return f"[Agent Error: {e}]"
66
 
67
- # -------- Evaluation & Submission Function --------
68
  def run_and_submit_all(profile: gr.OAuthProfile | None):
69
- space_id = os.getenv("SPACE_ID")
70
  if not profile:
71
- return "Please login with your Hugging Face account.", None
72
- username = profile.username
73
 
74
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
75
- questions_url = f"{DEFAULT_API_URL}/questions"
76
- submit_url = f"{DEFAULT_API_URL}/submit"
 
77
 
78
  try:
79
  agent = ToolEnhancedAgent()
80
  except Exception as e:
81
- return f"Agent init error: {e}", None
82
 
83
  try:
84
  response = requests.get(questions_url, timeout=15)
85
  response.raise_for_status()
86
  questions = response.json()
 
 
87
  except Exception as e:
88
- return f"Failed to fetch questions: {e}", None
89
 
 
90
  results_log = []
91
- answers_payload = []
92
 
93
  for item in questions:
94
  task_id = item.get("task_id")
95
  question_text = item.get("question")
96
- if not task_id or not question_text:
97
  continue
98
  try:
99
  answer = agent(question_text)
 
 
 
 
 
 
100
  except Exception as e:
101
- answer = f"[Agent Error: {e}]"
102
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
103
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
 
 
 
 
 
104
 
105
- submission = {
 
 
106
  "username": username,
107
- "agent_code": agent_code,
108
- "answers": answers_payload,
109
  }
110
 
111
  try:
112
- response = requests.post(submit_url, json=submission, timeout=60)
113
- response.raise_for_status()
114
- result = response.json()
115
- status = (
116
- f"✅ Submission Successful!\n"
117
- f"User: {result.get('username')}\n"
118
- f"Score: {result.get('score')}%\n"
119
- f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
120
- f"Message: {result.get('message')}"
 
121
  )
122
- except Exception as e:
123
- status = f"❌ Submission failed: {e}"
124
 
125
- return status, pd.DataFrame(results_log)
 
126
 
127
- # -------- Gradio Interface --------
128
  with gr.Blocks() as demo:
129
- gr.Markdown("## 🤖 GAIA Agent Evaluation with ToolEnhancedAgent")
 
130
  gr.Markdown(
131
  """
132
- - This Space lets you run your agent on GAIA benchmark.
133
- - Login with your HF account, click "Run Evaluation", and wait for the results.
 
134
  """
135
  )
136
 
137
- gr.LoginButton()
138
- run_button = gr.Button("🚀 Run Evaluation & Submit")
139
- status_output = gr.Textbox(label="Status / Score", lines=6, interactive=False)
140
- table_output = gr.DataFrame(label="Agent Answers")
141
 
142
- run_button.click(fn=run_and_submit_all, outputs=[status_output, table_output])
143
 
144
- # -------- Launch App --------
145
  if __name__ == "__main__":
146
- print("✅ Launching GAIA Agent Evaluation App")
147
- demo.launch(debug=True)
 
5
  import gradio as gr
6
  from openai import OpenAI
7
 
8
+ # --- Agent yang memakai GPT-4 dengan Chain-of-Thought dan Tools ---
 
 
9
  class ToolEnhancedAgent:
10
  def __init__(self):
11
  api_key = os.getenv("OPENAI_API_KEY")
12
  if not api_key:
13
+ raise ValueError("OPENAI_API_KEY is not set in environment variables.")
14
  self.client = OpenAI(api_key=api_key)
15
+ print("ToolEnhancedAgent initialized with GPT-4 + CoT + Tools.")
16
 
17
  def use_tool(self, tool_name: str, input_text: str) -> str:
18
  try:
19
  if tool_name == "calculator":
20
+ # Hati-hati menggunakan eval, sebaiknya batasi ekspresi jika produksi
21
  return str(eval(input_text))
22
  elif tool_name == "date":
23
  return str(datetime.datetime.now().date())
 
30
 
31
  def search_wikipedia(self, query: str) -> str:
32
  try:
33
+ url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}"
34
+ res = requests.get(url)
35
  if res.status_code == 200:
36
  return res.json().get("extract", "No summary found.")
37
  else:
38
  return f"No Wikipedia summary found for '{query}'."
39
  except Exception as e:
40
+ return f"Wikipedia error: {e}"
41
 
42
  def __call__(self, question: str) -> str:
43
  prompt = (
44
+ "You are a helpful AI assistant. Use tools if needed. Think step-by-step before answering.\n"
 
45
  f"Question: {question}\n"
46
+ "Answer (show your thinking steps):"
47
  )
 
48
  try:
49
  response = self.client.chat.completions.create(
50
  model="gpt-4",
51
  messages=[
52
+ {"role": "system", "content": "You are a smart assistant that can use tools and think step-by-step."},
53
  {"role": "user", "content": prompt}
54
  ],
55
  temperature=0.3,
56
  max_tokens=700,
57
  )
58
  answer = response.choices[0].message.content.strip()
59
+ print(f"Answer generated (truncated): {answer[:150]}...")
60
  return answer
61
  except Exception as e:
62
  print(f"[Agent Error]: {e}")
63
  return f"[Agent Error: {e}]"
64
 
65
+ # --- Fungsi utama untuk fetch soal, jalankan agent, dan submit jawaban ---
66
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
67
  if not profile:
68
+ return "Please login to Hugging Face to submit answers.", None
 
69
 
70
+ username = profile.username
71
+ api_url = "https://agents-course-unit4-scoring.hf.space"
72
+ questions_url = f"{api_url}/questions"
73
+ submit_url = f"{api_url}/submit"
74
 
75
  try:
76
  agent = ToolEnhancedAgent()
77
  except Exception as e:
78
+ return f"Error initializing agent: {e}", None
79
 
80
  try:
81
  response = requests.get(questions_url, timeout=15)
82
  response.raise_for_status()
83
  questions = response.json()
84
+ if not questions:
85
+ return "No questions fetched or invalid response.", None
86
  except Exception as e:
87
+ return f"Error fetching questions: {e}", None
88
 
89
+ answers = []
90
  results_log = []
 
91
 
92
  for item in questions:
93
  task_id = item.get("task_id")
94
  question_text = item.get("question")
95
+ if not task_id or question_text is None:
96
  continue
97
  try:
98
  answer = agent(question_text)
99
+ answers.append({"task_id": task_id, "submitted_answer": answer})
100
+ results_log.append({
101
+ "Task ID": task_id,
102
+ "Question": question_text,
103
+ "Submitted Answer": answer
104
+ })
105
  except Exception as e:
106
+ results_log.append({
107
+ "Task ID": task_id,
108
+ "Question": question_text,
109
+ "Submitted Answer": f"[Agent Error: {e}]"
110
+ })
111
+
112
+ if not answers:
113
+ return "Agent did not produce any answers.", pd.DataFrame(results_log)
114
 
115
+ agent_code_url = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
116
+
117
+ submission_payload = {
118
  "username": username,
119
+ "agent_code": agent_code_url,
120
+ "answers": answers
121
  }
122
 
123
  try:
124
+ submit_response = requests.post(submit_url, json=submission_payload, timeout=60)
125
+ submit_response.raise_for_status()
126
+ result_data = submit_response.json()
127
+
128
+ final_status = (
129
+ f"Submission Successful!\n"
130
+ f"User: {result_data.get('username')}\n"
131
+ f"Score: {result_data.get('score', 'N/A')}%\n"
132
+ f"Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n"
133
+ f"Message: {result_data.get('message', 'No message received.')}"
134
  )
135
+ return final_status, pd.DataFrame(results_log)
 
136
 
137
+ except Exception as e:
138
+ return f"Submission failed: {e}", pd.DataFrame(results_log)
139
 
140
+ # --- Gradio UI ---
141
  with gr.Blocks() as demo:
142
+ gr.Markdown("# Tool Enhanced Agent for GAIA Benchmark")
143
+
144
  gr.Markdown(
145
  """
146
+ 1. Login with your Hugging Face account.
147
+ 2. Click 'Run & Submit All Answers' to evaluate the agent on GAIA tasks.
148
+ 3. View your score and answers below.
149
  """
150
  )
151
 
152
+ login = gr.LoginButton()
153
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
154
+ status_output = gr.Textbox(label="Status / Submission Result", lines=7)
155
+ results_table = gr.DataFrame(label="Questions and Answers")
156
 
157
+ run_button.click(fn=run_and_submit_all, inputs=login, outputs=[status_output, results_table])
158
 
 
159
  if __name__ == "__main__":
160
+ demo.launch()