hasanain9 commited on
Commit
2707bf9
·
verified ·
1 Parent(s): 2d5a004
Files changed (1) hide show
  1. app.py +81 -51
app.py CHANGED
@@ -1,145 +1,175 @@
1
  import os
 
2
  import requests
3
- import datetime
4
  import pandas as pd
5
- import gradio as gr
6
  from openai import OpenAI
7
 
 
 
 
 
8
  class ToolEnhancedAgent:
9
  def __init__(self):
10
  api_key = os.getenv("OPENAI_API_KEY")
11
  if not api_key:
12
- raise ValueError("OPENAI_API_KEY is not set.")
13
  self.client = OpenAI(api_key=api_key)
14
- print("ToolEnhancedAgent initialized with GPT + CoT + Tools.")
15
 
16
  def use_tool(self, tool_name: str, input_text: str) -> str:
 
17
  try:
18
  if tool_name == "calculator":
19
- return str(eval(input_text))
 
 
20
  elif tool_name == "date":
 
21
  return str(datetime.datetime.now().date())
22
  elif tool_name == "wikipedia":
23
  return self.search_wikipedia(input_text)
24
  else:
25
- return "[Tool Error: Unknown Tool]"
26
  except Exception as e:
27
  return f"[Tool Error: {e}]"
28
 
29
- def search_wikipedia(self, query):
30
  try:
31
  res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}")
32
  if res.status_code == 200:
33
  return res.json().get("extract", "No summary found.")
34
- else:
35
- return f"No Wikipedia summary for {query}."
36
  except Exception as e:
37
- return f"Wiki error: {e}"
38
 
39
  def __call__(self, question: str) -> str:
 
40
  prompt = (
41
- "You are a helpful AI assistant. Use tools when necessary. "
42
- "Think step-by-step before answering. Respond clearly.\n\n"
43
  f"Question: {question}\n"
44
- "Answer (show thinking steps):"
45
  )
46
 
47
  try:
48
  response = self.client.chat.completions.create(
49
  model="gpt-4o-mini",
50
  messages=[
51
- {"role": "system", "content": "You are a smart assistant that can use tools and think step-by-step."},
52
  {"role": "user", "content": prompt}
53
  ],
54
  temperature=0.3,
55
  max_tokens=700,
56
  )
57
  answer = response.choices[0].message.content.strip()
58
- print(f"Answer generated: {answer[:100]}...")
 
 
 
 
 
 
 
 
 
59
  return answer
60
  except Exception as e:
61
- print(f"[Agent Error]: {e}")
62
  return f"[Agent Error: {e}]"
63
 
64
- def run_and_submit_all(profile):
65
- if not profile:
66
- return "Please login to Hugging Face first.", None
 
67
 
68
  username = profile.username
69
- api_url = "https://agents-course-unit4-scoring.hf.space"
 
 
70
  questions_url = f"{api_url}/questions"
71
  submit_url = f"{api_url}/submit"
72
 
73
- # Instantiate agent
74
  try:
75
  agent = ToolEnhancedAgent()
76
  except Exception as e:
77
- return f"Agent initialization error: {e}", None
 
 
78
 
79
- # Fetch questions
80
  try:
81
  response = requests.get(questions_url, timeout=15)
82
  response.raise_for_status()
83
- questions = response.json()
84
- if not questions:
85
- return "No questions fetched.", None
86
  except Exception as e:
87
  return f"Error fetching questions: {e}", None
88
 
89
  answers_payload = []
90
  results_log = []
91
 
92
- for q in questions:
93
- task_id = q.get("task_id")
94
- question = q.get("question")
95
- if not task_id or question is None:
96
  continue
97
  try:
98
- answer = agent(question)
99
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
100
- results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
 
 
 
 
101
  except Exception as e:
102
- results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"Error: {e}"})
 
 
 
 
103
 
104
  if not answers_payload:
105
- return "No answers generated.", pd.DataFrame(results_log)
106
 
107
  submission_data = {
108
- "username": username,
109
- "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
110
  "answers": answers_payload,
111
  }
112
 
113
  try:
114
- resp = requests.post(submit_url, json=submission_data, timeout=60)
115
- resp.raise_for_status()
116
- result = resp.json()
 
117
  status = (
118
  f"Submission Successful!\n"
119
  f"User: {result.get('username')}\n"
120
- f"Score: {result.get('score', 'N/A')}%\n"
121
- f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}\n"
122
- f"Message: {result.get('message', '')}"
123
  )
124
- results_df = pd.DataFrame(results_log)
125
- return status, results_df
126
  except Exception as e:
127
- results_df = pd.DataFrame(results_log)
128
- return f"Submission failed: {e}", results_df
129
 
 
130
  with gr.Blocks() as demo:
131
- gr.Markdown("# ToolEnhancedAgent for GAIA Benchmark")
 
 
 
 
 
132
  login_btn = gr.LoginButton()
133
  run_btn = gr.Button("Run Evaluation & Submit All Answers")
134
 
135
- status_output = gr.Textbox(label="Status / Result", lines=6, interactive=False)
136
- results_table = gr.DataFrame(headers=["Task ID", "Question", "Submitted Answer"], label="Agent Answers", wrap=True)
137
 
138
  run_btn.click(
139
  fn=run_and_submit_all,
140
  inputs=[login_btn],
141
- outputs=[status_output, results_table]
142
  )
143
 
144
  if __name__ == "__main__":
145
- demo.launch()
 
1
  import os
2
+ import gradio as gr
3
  import requests
 
4
  import pandas as pd
 
5
  from openai import OpenAI
6
 
7
+ # Constants
8
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
+
10
+ # ToolEnhancedAgent menggunakan OpenAI API terbaru (1.x)
11
  class ToolEnhancedAgent:
12
  def __init__(self):
13
  api_key = os.getenv("OPENAI_API_KEY")
14
  if not api_key:
15
+ raise ValueError("OPENAI_API_KEY not found in environment variables.")
16
  self.client = OpenAI(api_key=api_key)
17
+ print("ToolEnhancedAgent initialized with OpenAI GPT model.")
18
 
19
  def use_tool(self, tool_name: str, input_text: str) -> str:
20
+ # Contoh penggunaan tool sederhana: kalkulator, tanggal, Wikipedia
21
  try:
22
  if tool_name == "calculator":
23
+ # Aman eval dengan math
24
+ import math
25
+ return str(eval(input_text, {"__builtins__": None, "math": math}))
26
  elif tool_name == "date":
27
+ import datetime
28
  return str(datetime.datetime.now().date())
29
  elif tool_name == "wikipedia":
30
  return self.search_wikipedia(input_text)
31
  else:
32
+ return "[Tool Error: Unknown tool]"
33
  except Exception as e:
34
  return f"[Tool Error: {e}]"
35
 
36
+ def search_wikipedia(self, query: str) -> str:
37
  try:
38
  res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}")
39
  if res.status_code == 200:
40
  return res.json().get("extract", "No summary found.")
41
+ return f"No Wikipedia summary for {query}."
 
42
  except Exception as e:
43
+ return f"Wikipedia Error: {e}"
44
 
45
  def __call__(self, question: str) -> str:
46
+ # Prompt dengan Chain of Thought dan instruksi penggunaan tools
47
  prompt = (
48
+ "You are an AI assistant that can think step-by-step and use tools when needed.\n"
 
49
  f"Question: {question}\n"
50
+ "Answer with your reasoning steps. If needed, mention the tool you want to use like [calculator], [date], [wikipedia]."
51
  )
52
 
53
  try:
54
  response = self.client.chat.completions.create(
55
  model="gpt-4o-mini",
56
  messages=[
57
+ {"role": "system", "content": "You are a helpful assistant using tools and reasoning."},
58
  {"role": "user", "content": prompt}
59
  ],
60
  temperature=0.3,
61
  max_tokens=700,
62
  )
63
  answer = response.choices[0].message.content.strip()
64
+ # Simple tool simulation: jika ada tag [tool:toolname] di jawaban, gunakan tool dan tambahkan hasilnya
65
+ # Contoh: "[calculator] 2+2" -> hitung 4 dan tambahkan ke jawaban
66
+ import re
67
+ pattern = r"\[([a-z]+)\](.*)"
68
+ match = re.search(pattern, answer, re.IGNORECASE)
69
+ if match:
70
+ tool_name = match.group(1).lower()
71
+ tool_input = match.group(2).strip()
72
+ tool_result = self.use_tool(tool_name, tool_input)
73
+ answer += f"\n\n[Tool used: {tool_name}]\nResult: {tool_result}"
74
  return answer
75
  except Exception as e:
76
+ print(f"Agent error: {e}")
77
  return f"[Agent Error: {e}]"
78
 
79
+ # Revisi run_and_submit_all untuk menerima profile (LoginButton output)
80
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
81
+ if profile is None:
82
+ return "Please login with your Hugging Face account.", None
83
 
84
  username = profile.username
85
+ space_id = os.getenv("SPACE_ID") or "your-username/your-space" # Ganti sesuai space kamu jika perlu
86
+
87
+ api_url = DEFAULT_API_URL
88
  questions_url = f"{api_url}/questions"
89
  submit_url = f"{api_url}/submit"
90
 
 
91
  try:
92
  agent = ToolEnhancedAgent()
93
  except Exception as e:
94
+ return f"Error initializing agent: {e}", None
95
+
96
+ agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main"
97
 
98
+ # Ambil pertanyaan
99
  try:
100
  response = requests.get(questions_url, timeout=15)
101
  response.raise_for_status()
102
+ questions_data = response.json()
 
 
103
  except Exception as e:
104
  return f"Error fetching questions: {e}", None
105
 
106
  answers_payload = []
107
  results_log = []
108
 
109
+ for item in questions_data:
110
+ task_id = item.get("task_id")
111
+ question_text = item.get("question")
112
+ if not task_id or question_text is None:
113
  continue
114
  try:
115
+ answer = agent(question_text)
116
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
117
+ results_log.append({
118
+ "Task ID": task_id,
119
+ "Question": question_text,
120
+ "Submitted Answer": answer,
121
+ })
122
  except Exception as e:
123
+ results_log.append({
124
+ "Task ID": task_id,
125
+ "Question": question_text,
126
+ "Submitted Answer": f"Agent Error: {e}",
127
+ })
128
 
129
  if not answers_payload:
130
+ return "Agent did not produce answers to submit.", pd.DataFrame(results_log)
131
 
132
  submission_data = {
133
+ "username": username.strip(),
134
+ "agent_code": agent_code_url,
135
  "answers": answers_payload,
136
  }
137
 
138
  try:
139
+ submit_response = requests.post(submit_url, json=submission_data, timeout=60)
140
+ submit_response.raise_for_status()
141
+ result = submit_response.json()
142
+
143
  status = (
144
  f"Submission Successful!\n"
145
  f"User: {result.get('username')}\n"
146
+ f"Score: {result.get('score', 'N/A')}% "
147
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
148
+ f"Message: {result.get('message', 'No message')}"
149
  )
150
+ return status, pd.DataFrame(results_log)
 
151
  except Exception as e:
152
+ return f"Submission failed: {e}", pd.DataFrame(results_log)
 
153
 
154
+ # Gradio UI
155
  with gr.Blocks() as demo:
156
+ gr.Markdown("# GAIA Benchmark Agent Runner")
157
+ gr.Markdown("""
158
+ 1. Login with your Hugging Face account.
159
+ 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, and submit answers.
160
+ """)
161
+
162
  login_btn = gr.LoginButton()
163
  run_btn = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
+ status_out = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
+ results_df = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
167
 
168
  run_btn.click(
169
  fn=run_and_submit_all,
170
  inputs=[login_btn],
171
+ outputs=[status_out, results_df]
172
  )
173
 
174
  if __name__ == "__main__":
175
+ demo.launch(debug=True, share=False)