hasanain9 commited on
Commit
3628aaf
·
verified ·
1 Parent(s): ef7ec19
Files changed (1) hide show
  1. app.py +44 -38
app.py CHANGED
@@ -5,28 +5,27 @@ import pandas as pd
5
  import gradio as gr
6
  from openai import OpenAI
7
 
8
- # --- Agent yang memakai GPT-4 dengan Chain-of-Thought dan Tools ---
9
  class ToolEnhancedAgent:
10
  def __init__(self):
11
  api_key = os.getenv("OPENAI_API_KEY")
12
  if not api_key:
13
- raise ValueError("OPENAI_API_KEY is not set in environment variables.")
14
  self.client = OpenAI(api_key=api_key)
15
- print("ToolEnhancedAgent initialized with GPT-4 + CoT + Tools.")
16
 
17
  def use_tool(self, tool_name: str, input_text: str) -> str:
18
  try:
19
  if tool_name == "calculator":
20
- # Hati-hati menggunakan eval, sebaiknya batasi ekspresi jika produksi
21
  return str(eval(input_text))
22
  elif tool_name == "date":
23
  return str(datetime.datetime.now().date())
24
  elif tool_name == "wikipedia":
25
  return self.search_wikipedia(input_text)
26
  else:
27
- return "[Tool Error: Unknown Tool]"
28
  except Exception as e:
29
- return f"[Tool Error: {e}]"
30
 
31
  def search_wikipedia(self, query: str) -> str:
32
  try:
@@ -35,36 +34,38 @@ class ToolEnhancedAgent:
35
  if res.status_code == 200:
36
  return res.json().get("extract", "No summary found.")
37
  else:
38
- return f"No Wikipedia summary found for '{query}'."
39
  except Exception as e:
40
- return f"Wikipedia error: {e}"
41
 
42
  def __call__(self, question: str) -> str:
 
43
  prompt = (
44
- "You are a helpful AI assistant. Use tools if needed. Think step-by-step before answering.\n"
45
  f"Question: {question}\n"
46
- "Answer (show your thinking steps):"
47
  )
 
48
  try:
49
  response = self.client.chat.completions.create(
50
- model="gpt-4",
51
  messages=[
52
- {"role": "system", "content": "You are a smart assistant that can use tools and think step-by-step."},
53
  {"role": "user", "content": prompt}
54
  ],
55
- temperature=0.3,
56
  max_tokens=700,
57
  )
58
  answer = response.choices[0].message.content.strip()
59
- print(f"Answer generated (truncated): {answer[:150]}...")
60
  return answer
61
  except Exception as e:
62
- print(f"[Agent Error]: {e}")
63
- return f"[Agent Error: {e}]"
 
64
 
65
- # --- Fungsi utama untuk fetch soal, jalankan agent, dan submit jawaban ---
66
  def run_and_submit_all(profile: gr.OAuthProfile | None):
67
- if not profile:
68
  return "Please login to Hugging Face to submit answers.", None
69
 
70
  username = profile.username
@@ -75,14 +76,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
75
  try:
76
  agent = ToolEnhancedAgent()
77
  except Exception as e:
78
- return f"Error initializing agent: {e}", None
79
 
80
  try:
81
  response = requests.get(questions_url, timeout=15)
82
  response.raise_for_status()
83
  questions = response.json()
84
- if not questions:
85
- return "No questions fetched or invalid response.", None
86
  except Exception as e:
87
  return f"Error fetching questions: {e}", None
88
 
@@ -106,11 +107,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
106
  results_log.append({
107
  "Task ID": task_id,
108
  "Question": question_text,
109
- "Submitted Answer": f"[Agent Error: {e}]"
110
  })
111
 
112
  if not answers:
113
- return "Agent did not produce any answers.", pd.DataFrame(results_log)
114
 
115
  agent_code_url = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
116
 
@@ -123,38 +124,43 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
123
  try:
124
  submit_response = requests.post(submit_url, json=submission_payload, timeout=60)
125
  submit_response.raise_for_status()
126
- result_data = submit_response.json()
127
 
128
  final_status = (
129
- f"Submission Successful!\n"
130
- f"User: {result_data.get('username')}\n"
131
- f"Score: {result_data.get('score', 'N/A')}%\n"
132
- f"Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n"
133
- f"Message: {result_data.get('message', 'No message received.')}"
134
  )
135
  return final_status, pd.DataFrame(results_log)
136
 
137
  except Exception as e:
138
- return f"Submission failed: {e}", pd.DataFrame(results_log)
 
139
 
140
- # --- Gradio UI ---
141
  with gr.Blocks() as demo:
142
- gr.Markdown("# Tool Enhanced Agent for GAIA Benchmark")
143
 
144
  gr.Markdown(
145
  """
146
  1. Login with your Hugging Face account.
147
- 2. Click 'Run & Submit All Answers' to evaluate the agent on GAIA tasks.
148
- 3. View your score and answers below.
149
  """
150
  )
151
 
152
- login = gr.LoginButton()
153
  run_button = gr.Button("Run Evaluation & Submit All Answers")
154
- status_output = gr.Textbox(label="Status / Submission Result", lines=7)
155
- results_table = gr.DataFrame(label="Questions and Answers")
 
 
 
 
 
 
156
 
157
- run_button.click(fn=run_and_submit_all, inputs=login, outputs=[status_output, results_table])
158
 
159
  if __name__ == "__main__":
160
  demo.launch()
 
5
  import gradio as gr
6
  from openai import OpenAI
7
 
 
8
  class ToolEnhancedAgent:
9
  def __init__(self):
10
  api_key = os.getenv("OPENAI_API_KEY")
11
  if not api_key:
12
+ raise ValueError("OPENAI_API_KEY not found in environment variables.")
13
  self.client = OpenAI(api_key=api_key)
14
+ print("ToolEnhancedAgent initialized.")
15
 
16
  def use_tool(self, tool_name: str, input_text: str) -> str:
17
  try:
18
  if tool_name == "calculator":
19
+ # Sangat dasar dan raw eval, hati2 di produksi
20
  return str(eval(input_text))
21
  elif tool_name == "date":
22
  return str(datetime.datetime.now().date())
23
  elif tool_name == "wikipedia":
24
  return self.search_wikipedia(input_text)
25
  else:
26
+ return "[Unknown tool]"
27
  except Exception as e:
28
+ return f"[Tool error: {e}]"
29
 
30
  def search_wikipedia(self, query: str) -> str:
31
  try:
 
34
  if res.status_code == 200:
35
  return res.json().get("extract", "No summary found.")
36
  else:
37
+ return f"No Wikipedia summary for '{query}'."
38
  except Exception as e:
39
+ return f"Wikipedia API error: {e}"
40
 
41
  def __call__(self, question: str) -> str:
42
+ # Prompt dengan chain of thought agar GPT berpikir langkah demi langkah
43
  prompt = (
44
+ "You are an assistant that solves problems step-by-step, and you can use tools like calculator, date, and wikipedia if needed.\n"
45
  f"Question: {question}\n"
46
+ "Answer (think step-by-step and use tools if helpful):"
47
  )
48
+
49
  try:
50
  response = self.client.chat.completions.create(
51
+ model="gpt-4o-mini", # model yang lebih ringan dan biasanya cukup
52
  messages=[
53
+ {"role": "system", "content": "You are a helpful assistant that thinks step-by-step and can use tools."},
54
  {"role": "user", "content": prompt}
55
  ],
56
+ temperature=0.2,
57
  max_tokens=700,
58
  )
59
  answer = response.choices[0].message.content.strip()
60
+ print(f"Generated answer (preview): {answer[:100]}...")
61
  return answer
62
  except Exception as e:
63
+ print(f"Agent error: {e}")
64
+ return f"[Agent error: {e}]"
65
+
66
 
 
67
  def run_and_submit_all(profile: gr.OAuthProfile | None):
68
+ if profile is None:
69
  return "Please login to Hugging Face to submit answers.", None
70
 
71
  username = profile.username
 
76
  try:
77
  agent = ToolEnhancedAgent()
78
  except Exception as e:
79
+ return f"Agent initialization error: {e}", None
80
 
81
  try:
82
  response = requests.get(questions_url, timeout=15)
83
  response.raise_for_status()
84
  questions = response.json()
85
+ if not isinstance(questions, list) or len(questions) == 0:
86
+ return "Failed to fetch questions or empty list.", None
87
  except Exception as e:
88
  return f"Error fetching questions: {e}", None
89
 
 
107
  results_log.append({
108
  "Task ID": task_id,
109
  "Question": question_text,
110
+ "Submitted Answer": f"[Agent error: {e}]"
111
  })
112
 
113
  if not answers:
114
+ return "Agent did not produce answers.", pd.DataFrame(results_log)
115
 
116
  agent_code_url = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
117
 
 
124
  try:
125
  submit_response = requests.post(submit_url, json=submission_payload, timeout=60)
126
  submit_response.raise_for_status()
127
+ result = submit_response.json()
128
 
129
  final_status = (
130
+ f"Submission Success!\n"
131
+ f"User: {result.get('username')}\n"
132
+ f"Score: {result.get('score', 'N/A')}%\n"
133
+ f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}\n"
134
+ f"Message: {result.get('message', 'No message')}"
135
  )
136
  return final_status, pd.DataFrame(results_log)
137
 
138
  except Exception as e:
139
+ return f"Submission error: {e}", pd.DataFrame(results_log)
140
+
141
 
 
142
  with gr.Blocks() as demo:
143
+ gr.Markdown("# GAIA Benchmark - Tool Enhanced Agent")
144
 
145
  gr.Markdown(
146
  """
147
  1. Login with your Hugging Face account.
148
+ 2. Click 'Run Evaluation & Submit All Answers' to run the agent on GAIA tasks.
149
+ 3. View your results and submission status.
150
  """
151
  )
152
 
153
+ login_button = gr.LoginButton()
154
  run_button = gr.Button("Run Evaluation & Submit All Answers")
155
+ status_box = gr.Textbox(label="Status / Submission Result", lines=7)
156
+ results_table = gr.DataFrame(label="Questions and Agent Answers")
157
+
158
+ run_button.click(
159
+ fn=run_and_submit_all,
160
+ inputs=login_button,
161
+ outputs=[status_box, results_table]
162
+ )
163
 
 
164
 
165
  if __name__ == "__main__":
166
  demo.launch()