hasanain9 commited on
Commit
3fd9d2e
·
verified ·
1 Parent(s): d9575d5
Files changed (1) hide show
  1. app.py +70 -113
app.py CHANGED
@@ -2,88 +2,67 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
  from openai import OpenAI
6
 
7
- # Constants
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
- # ToolEnhancedAgent menggunakan OpenAI API terbaru (1.x)
11
  class ToolEnhancedAgent:
12
  def __init__(self):
13
  api_key = os.getenv("OPENAI_API_KEY")
14
  if not api_key:
15
- raise ValueError("OPENAI_API_KEY not found in environment variables.")
16
  self.client = OpenAI(api_key=api_key)
17
- print("ToolEnhancedAgent initialized with OpenAI GPT model.")
18
 
19
  def use_tool(self, tool_name: str, input_text: str) -> str:
20
- # Contoh penggunaan tool sederhana: kalkulator, tanggal, Wikipedia
21
  try:
22
  if tool_name == "calculator":
23
- # Aman eval dengan math
24
- import math
25
- return str(eval(input_text, {"__builtins__": None, "math": math}))
26
  elif tool_name == "date":
27
- import datetime
28
  return str(datetime.datetime.now().date())
29
  elif tool_name == "wikipedia":
30
- return self.search_wikipedia(input_text)
 
 
 
 
31
  else:
32
- return "[Tool Error: Unknown tool]"
33
  except Exception as e:
34
  return f"[Tool Error: {e}]"
35
 
36
- def search_wikipedia(self, query: str) -> str:
37
- try:
38
- res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}")
39
- if res.status_code == 200:
40
- return res.json().get("extract", "No summary found.")
41
- return f"No Wikipedia summary for {query}."
42
- except Exception as e:
43
- return f"Wikipedia Error: {e}"
44
-
45
  def __call__(self, question: str) -> str:
46
- # Prompt dengan Chain of Thought dan instruksi penggunaan tools
47
- prompt = (
48
- "You are an AI assistant that can think step-by-step and use tools when needed.\n"
49
- f"Question: {question}\n"
50
- "Answer with your reasoning steps. If needed, mention the tool you want to use like [calculator], [date], [wikipedia]."
51
- )
52
-
53
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  response = self.client.chat.completions.create(
55
- model="gpt-4o-mini",
56
- messages=[
57
- {"role": "system", "content": "You are a helpful assistant using tools and reasoning."},
58
- {"role": "user", "content": prompt}
59
- ],
60
  temperature=0.3,
61
- max_tokens=700,
62
  )
63
- answer = response.choices[0].message.content.strip()
64
- # Simple tool simulation: jika ada tag [tool:toolname] di jawaban, gunakan tool dan tambahkan hasilnya
65
- # Contoh: "[calculator] 2+2" -> hitung 4 dan tambahkan ke jawaban
66
- import re
67
- pattern = r"\[([a-z]+)\](.*)"
68
- match = re.search(pattern, answer, re.IGNORECASE)
69
- if match:
70
- tool_name = match.group(1).lower()
71
- tool_input = match.group(2).strip()
72
- tool_result = self.use_tool(tool_name, tool_input)
73
- answer += f"\n\n[Tool used: {tool_name}]\nResult: {tool_result}"
74
- return answer
75
  except Exception as e:
76
- print(f"Agent error: {e}")
77
  return f"[Agent Error: {e}]"
78
 
79
- # Revisi run_and_submit_all untuk menerima profile (LoginButton output)
80
- def run_and_submit_all(profile: gr.OAuthProfile | None):
81
- if profile is None:
82
- return "Please login with your Hugging Face account.", None
83
-
84
- username = profile.username
85
- space_id = os.getenv("SPACE_ID") or "your-username/your-space" # Ganti sesuai space kamu jika perlu
86
-
87
  api_url = DEFAULT_API_URL
88
  questions_url = f"{api_url}/questions"
89
  submit_url = f"{api_url}/submit"
@@ -91,85 +70,63 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
91
  try:
92
  agent = ToolEnhancedAgent()
93
  except Exception as e:
94
- return f"Error initializing agent: {e}", None
95
-
96
- agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main"
97
 
98
- # Ambil pertanyaan
99
  try:
100
- response = requests.get(questions_url, timeout=15)
101
- response.raise_for_status()
102
- questions_data = response.json()
103
  except Exception as e:
104
- return f"Error fetching questions: {e}", None
105
 
106
  answers_payload = []
107
- results_log = []
108
 
109
- for item in questions_data:
110
- task_id = item.get("task_id")
111
- question_text = item.get("question")
112
- if not task_id or question_text is None:
113
  continue
114
  try:
115
- answer = agent(question_text)
116
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
117
- results_log.append({
118
- "Task ID": task_id,
119
- "Question": question_text,
120
- "Submitted Answer": answer,
121
- })
122
  except Exception as e:
123
- results_log.append({
124
- "Task ID": task_id,
125
- "Question": question_text,
126
- "Submitted Answer": f"Agent Error: {e}",
127
- })
128
 
129
  if not answers_payload:
130
- return "Agent did not produce answers to submit.", pd.DataFrame(results_log)
131
 
132
- submission_data = {
133
- "username": username.strip(),
134
- "agent_code": agent_code_url,
135
- "answers": answers_payload,
136
  }
137
 
138
  try:
139
- submit_response = requests.post(submit_url, json=submission_data, timeout=60)
140
- submit_response.raise_for_status()
141
- result = submit_response.json()
142
-
143
- status = (
144
- f"Submission Successful!\n"
145
  f"User: {result.get('username')}\n"
146
- f"Score: {result.get('score', 'N/A')}% "
147
- f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
148
- f"Message: {result.get('message', 'No message')}"
149
  )
150
- return status, pd.DataFrame(results_log)
151
  except Exception as e:
152
- return f"Submission failed: {e}", pd.DataFrame(results_log)
153
 
154
- # Gradio UI
155
  with gr.Blocks() as demo:
156
- gr.Markdown("# GAIA Benchmark Agent Runner")
157
- gr.Markdown("""
158
- 1. Login with your Hugging Face account.
159
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, and submit answers.
160
- """)
161
-
162
- login_btn = gr.LoginButton()
163
- run_btn = gr.Button("Run Evaluation & Submit All Answers")
164
-
165
- status_out = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- results_df = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
167
 
168
- run_btn.click(
169
- fn=run_and_submit_all,
170
- inputs=[login_btn],
171
- outputs=[status_out, results_df]
172
- )
173
 
174
  if __name__ == "__main__":
175
- demo.launch(debug=True, share=False)
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import datetime
6
  from openai import OpenAI
7
 
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
 
10
  class ToolEnhancedAgent:
11
  def __init__(self):
12
  api_key = os.getenv("OPENAI_API_KEY")
13
  if not api_key:
14
+ raise ValueError("OPENAI_API_KEY is not set.")
15
  self.client = OpenAI(api_key=api_key)
16
+ print("ToolEnhancedAgent initialized.")
17
 
18
  def use_tool(self, tool_name: str, input_text: str) -> str:
 
19
  try:
20
  if tool_name == "calculator":
21
+ return str(eval(input_text))
 
 
22
  elif tool_name == "date":
 
23
  return str(datetime.datetime.now().date())
24
  elif tool_name == "wikipedia":
25
+ res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{input_text}")
26
+ if res.status_code == 200:
27
+ return res.json().get("extract", "No summary found.")
28
+ else:
29
+ return "No summary found."
30
  else:
31
+ return "[Unknown Tool]"
32
  except Exception as e:
33
  return f"[Tool Error: {e}]"
34
 
 
 
 
 
 
 
 
 
 
35
  def __call__(self, question: str) -> str:
 
 
 
 
 
 
 
36
  try:
37
+ prompt = f"""
38
+ You are an advanced AI assistant with access to tools like calculator, date lookup, and Wikipedia.
39
+ Follow this format:
40
+ Step 1: Think step-by-step.
41
+ Step 2: Use tool if needed.
42
+ Step 3: Final answer.
43
+
44
+ Question: {question}
45
+ Answer:
46
+ """
47
+ messages = [
48
+ {"role": "system", "content": "You are a helpful assistant that uses tools and thinks step-by-step."},
49
+ {"role": "user", "content": prompt}
50
+ ]
51
  response = self.client.chat.completions.create(
52
+ model="gpt-4",
53
+ messages=messages,
 
 
 
54
  temperature=0.3,
55
+ max_tokens=700
56
  )
57
+ return response.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
 
 
 
58
  except Exception as e:
 
59
  return f"[Agent Error: {e}]"
60
 
61
+ def run_and_submit_all():
62
+ profile_username = os.getenv("HF_USER") or "anonymous"
63
+ space_id = os.getenv("SPACE_ID")
64
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
65
+
 
 
 
66
  api_url = DEFAULT_API_URL
67
  questions_url = f"{api_url}/questions"
68
  submit_url = f"{api_url}/submit"
 
70
  try:
71
  agent = ToolEnhancedAgent()
72
  except Exception as e:
73
+ return f"Failed to initialize agent: {e}", pd.DataFrame()
 
 
74
 
 
75
  try:
76
+ res = requests.get(questions_url, timeout=20)
77
+ res.raise_for_status()
78
+ questions_data = res.json()
79
  except Exception as e:
80
+ return f"Error fetching questions: {e}", pd.DataFrame()
81
 
82
  answers_payload = []
83
+ log = []
84
 
85
+ for q in questions_data:
86
+ task_id = q.get("task_id")
87
+ question = q.get("question")
88
+ if not task_id or not question:
89
  continue
90
  try:
91
+ answer = agent(question)
 
 
 
 
 
 
92
  except Exception as e:
93
+ answer = f"[Error]: {e}"
94
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
95
+ log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
 
 
96
 
97
  if not answers_payload:
98
+ return "No answers submitted.", pd.DataFrame(log)
99
 
100
+ submission = {
101
+ "username": profile_username,
102
+ "agent_code": agent_code,
103
+ "answers": answers_payload
104
  }
105
 
106
  try:
107
+ res = requests.post(submit_url, json=submission, timeout=30)
108
+ res.raise_for_status()
109
+ result = res.json()
110
+ msg = (
111
+ f"✅ Submission Successful!\n"
 
112
  f"User: {result.get('username')}\n"
113
+ f"Score: {result.get('score')}%\n"
114
+ f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
115
+ f"Message: {result.get('message')}"
116
  )
117
+ return msg, pd.DataFrame(log)
118
  except Exception as e:
119
+ return f"Submission Failed: {e}", pd.DataFrame(log)
120
 
 
121
  with gr.Blocks() as demo:
122
+ gr.Markdown("# 🔍 GAIA Agent Evaluator")
123
+ gr.Markdown("Log in, then click the button to evaluate your agent.")
124
+ run_button = gr.Button("▶️ Run Evaluation & Submit All Answers")
125
+ status = gr.Textbox(label="Status", lines=4)
126
+ table = gr.DataFrame(label="Agent Logs")
 
 
 
 
 
 
127
 
128
+ run_button.click(fn=run_and_submit_all, outputs=[status, table])
 
 
 
 
129
 
130
  if __name__ == "__main__":
131
+ print("Launching GAIA App...")
132
+ demo.launch()