Mehedi2 commited on
Commit
19ecb6a
·
verified ·
1 Parent(s): 81917a3

Create agent.py

Browse files
Files changed (1) hide show
  1. agent.py +342 -0
agent.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import requests
5
+ import pandas as pd
6
+ from pathlib import Path
7
+ from typing import Optional, Union, Dict, Any, List
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv()
11
+
12
+ # Simple tool-based agent without LangGraph for now
13
+ class SimpleAgent:
14
+ """Simple agent with tool capabilities"""
15
+
16
+ def __init__(self, llm):
17
+ self.llm = llm
18
+ self.tools = {
19
+ 'search_web': self.search_web,
20
+ 'search_wikipedia': self.search_wikipedia,
21
+ 'execute_python': self.execute_python,
22
+ 'read_excel_file': self.read_excel_file,
23
+ 'read_text_file': self.read_text_file,
24
+ }
25
+
26
+ def search_web(self, query: str) -> str:
27
+ """Search the web using DuckDuckGo for current information."""
28
+ try:
29
+ search_url = f"https://api.duckduckgo.com/?q={query}&format=json&no_html=1&skip_disambig=1"
30
+ response = requests.get(search_url, timeout=10)
31
+
32
+ if response.status_code == 200:
33
+ data = response.json()
34
+ results = []
35
+ if data.get("AbstractText"):
36
+ results.append(f"Abstract: {data['AbstractText']}")
37
+
38
+ if data.get("RelatedTopics"):
39
+ for topic in data["RelatedTopics"][:3]:
40
+ if isinstance(topic, dict) and topic.get("Text"):
41
+ results.append(f"Related: {topic['Text']}")
42
+
43
+ if results:
44
+ return "\n".join(results)
45
+ else:
46
+ return f"Search performed for '{query}' but no specific results found."
47
+ else:
48
+ return f"Search failed with status code {response.status_code}"
49
+ except Exception as e:
50
+ return f"Search error: {str(e)}"
51
+
52
+ def search_wikipedia(self, query: str) -> str:
53
+ """Search Wikipedia for factual information."""
54
+ try:
55
+ search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
56
+ response = requests.get(search_url, timeout=10)
57
+
58
+ if response.status_code == 200:
59
+ data = response.json()
60
+ extract = data.get("extract", "")
61
+ if extract:
62
+ return f"Wikipedia: {extract[:500]}..."
63
+ else:
64
+ return f"Wikipedia page found for '{query}' but no extract available."
65
+ else:
66
+ return f"Wikipedia search failed for '{query}'"
67
+ except Exception as e:
68
+ return f"Wikipedia search error: {str(e)}"
69
+
70
+ def execute_python(self, code: str) -> str:
71
+ """Execute Python code and return the result."""
72
+ try:
73
+ import io
74
+ import sys
75
+
76
+ safe_globals = {
77
+ '__builtins__': {
78
+ 'print': print, 'len': len, 'str': str, 'int': int, 'float': float,
79
+ 'bool': bool, 'list': list, 'dict': dict, 'tuple': tuple, 'set': set,
80
+ 'range': range, 'sum': sum, 'max': max, 'min': min, 'abs': abs,
81
+ 'round': round, 'sorted': sorted, 'enumerate': enumerate, 'zip': zip,
82
+ },
83
+ 'math': __import__('math'),
84
+ 'json': __import__('json'),
85
+ }
86
+
87
+ old_stdout = sys.stdout
88
+ sys.stdout = mystdout = io.StringIO()
89
+
90
+ try:
91
+ exec(code, safe_globals)
92
+ output = mystdout.getvalue()
93
+ finally:
94
+ sys.stdout = old_stdout
95
+
96
+ return output if output else "Code executed successfully (no output)"
97
+ except Exception as e:
98
+ return f"Python execution error: {str(e)}"
99
+
100
+ def read_excel_file(self, file_path: str, sheet_name: Optional[str] = None) -> str:
101
+ """Read an Excel file and return its contents."""
102
+ try:
103
+ file_path_obj = Path(file_path)
104
+ if not file_path_obj.exists():
105
+ return f"Error: File not found at {file_path}"
106
+
107
+ if sheet_name and sheet_name.isdigit():
108
+ sheet_name = int(sheet_name)
109
+ elif sheet_name is None:
110
+ sheet_name = 0
111
+
112
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
113
+
114
+ if len(df) > 20:
115
+ result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
116
+ result += "First 10 rows:\n" + df.head(10).to_string(index=False)
117
+ result += f"\n\n... ({len(df) - 20} rows omitted) ...\n\n"
118
+ result += "Last 10 rows:\n" + df.tail(10).to_string(index=False)
119
+ else:
120
+ result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
121
+ result += df.to_string(index=False)
122
+
123
+ return result
124
+ except Exception as e:
125
+ return f"Error reading Excel file: {str(e)}"
126
+
127
+ def read_text_file(self, file_path: str) -> str:
128
+ """Read a text file and return its contents."""
129
+ try:
130
+ file_path_obj = Path(file_path)
131
+ if not file_path_obj.exists():
132
+ return f"Error: File not found at {file_path}"
133
+
134
+ encodings = ['utf-8', 'utf-16', 'iso-8859-1', 'cp1252']
135
+
136
+ for encoding in encodings:
137
+ try:
138
+ with open(file_path_obj, 'r', encoding=encoding) as f:
139
+ content = f.read()
140
+ return f"File content ({encoding} encoding):\n\n{content}"
141
+ except UnicodeDecodeError:
142
+ continue
143
+
144
+ return f"Error: Could not decode file with any standard encoding"
145
+ except Exception as e:
146
+ return f"Error reading file: {str(e)}"
147
+
148
+ def run(self, question: str) -> str:
149
+ """Run the agent with tool usage"""
150
+ # First, try to answer directly
151
+ direct_response = self.llm(f"""
152
+ Question: {question}
153
+ Think step by step. If this question requires:
154
+ - Web search for current information, say "NEED_SEARCH: <search query>"
155
+ - Mathematical calculation, say "NEED_PYTHON: <python code>"
156
+ - Wikipedia lookup, say "NEED_WIKI: <search term>"
157
+ - File analysis (if file path mentioned), say "NEED_FILE: <file_path>"
158
+ Otherwise, provide a direct answer.
159
+ Your response:""")
160
+
161
+ # Check if tools are needed
162
+ if "NEED_SEARCH:" in direct_response:
163
+ search_query = direct_response.split("NEED_SEARCH:")[1].strip()
164
+ search_result = self.search_web(search_query)
165
+ return self.llm(f"Question: {question}\n\nSearch results: {search_result}\n\nFinal answer:")
166
+
167
+ elif "NEED_PYTHON:" in direct_response:
168
+ code = direct_response.split("NEED_PYTHON:")[1].strip()
169
+ exec_result = self.execute_python(code)
170
+ return self.llm(f"Question: {question}\n\nCalculation result: {exec_result}\n\nFinal answer:")
171
+
172
+ elif "NEED_WIKI:" in direct_response:
173
+ wiki_query = direct_response.split("NEED_WIKI:")[1].strip()
174
+ wiki_result = self.search_wikipedia(wiki_query)
175
+ return self.llm(f"Question: {question}\n\nWikipedia info: {wiki_result}\n\nFinal answer:")
176
+
177
+ elif "NEED_FILE:" in direct_response:
178
+ file_path = direct_response.split("NEED_FILE:")[1].strip()
179
+ if file_path.endswith(('.xlsx', '.xls')):
180
+ file_content = self.read_excel_file(file_path)
181
+ else:
182
+ file_content = self.read_text_file(file_path)
183
+ return self.llm(f"Question: {question}\n\nFile content: {file_content}\n\nFinal answer:")
184
+
185
+ else:
186
+ return direct_response
187
+ class OpenRouterLLM:
188
+ """Simple OpenRouter LLM wrapper"""
189
+
190
+ def __init__(self, model: str = "deepseek/deepseek-v3.1-terminus"):
191
+ self.api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
192
+ self.model = model
193
+ self.base_url = "https://openrouter.ai/api/v1/chat/completions"
194
+
195
+ def __call__(self, prompt: str, max_tokens: int = 1500, temperature: float = 0.1) -> str:
196
+ """Make API call to OpenRouter"""
197
+
198
+ if not self.api_key or not self.api_key.startswith('sk-or-v1-'):
199
+ return "Error: Invalid OpenRouter API key"
200
+
201
+ headers = {
202
+ "Authorization": f"Bearer {self.api_key}",
203
+ "Content-Type": "application/json",
204
+ }
205
+
206
+ payload = {
207
+ "model": self.model,
208
+ "messages": [
209
+ {
210
+ "role": "system",
211
+ "content": "You are a helpful AI assistant. Provide direct, accurate answers. For GAIA evaluation, be precise and concise."
212
+ },
213
+ {
214
+ "role": "user",
215
+ "content": prompt
216
+ }
217
+ ],
218
+ "temperature": temperature,
219
+ "max_tokens": max_tokens,
220
+ }
221
+
222
+ try:
223
+ response = requests.post(self.base_url, headers=headers, json=payload, timeout=30)
224
+
225
+ if response.status_code != 200:
226
+ return f"API Error: {response.status_code}"
227
+
228
+ result = response.json()
229
+
230
+ if "choices" in result and len(result["choices"]) > 0:
231
+ answer = result["choices"][0]["message"]["content"].strip()
232
+ return self._clean_answer(answer)
233
+ else:
234
+ return "Error: No response content received"
235
+
236
+ except Exception as e:
237
+ return f"Error: {str(e)}"
238
+
239
+ def _clean_answer(self, answer: str) -> str:
240
+ """Clean the answer for GAIA evaluation"""
241
+ answer = answer.strip()
242
+
243
+ # Remove common prefixes
244
+ prefixes = [
245
+ "Answer:", "The answer is:", "Final answer:", "Result:",
246
+ "Solution:", "Based on", "Therefore", "In conclusion"
247
+ ]
248
+
249
+ for prefix in prefixes:
250
+ if answer.lower().startswith(prefix.lower()):
251
+ answer = answer[len(prefix):].strip()
252
+ if answer.startswith(':'):
253
+ answer = answer[1:].strip()
254
+ break
255
+
256
+ # Remove quotes and periods from short answers
257
+ if len(answer.split()) <= 3:
258
+ answer = answer.strip('"\'.')
259
+
260
+ return answer
261
+
262
+
263
+ class GaiaAgent:
264
+ """Simple tool-based agent for GAIA tasks"""
265
+
266
+ def __init__(self):
267
+ print("Initializing GaiaAgent with OpenRouter DeepSeek...")
268
+
269
+ # Initialize the LLM
270
+ self.llm = OpenRouterLLM(model="deepseek/deepseek-v3.1-terminus")
271
+
272
+ # Initialize the agent with tools
273
+ self.agent = SimpleAgent(self.llm)
274
+
275
+ print("GaiaAgent initialized successfully!")
276
+
277
+ def __call__(self, task_id: str, question: str) -> str:
278
+ """Process a question and return the answer"""
279
+ try:
280
+ print(f"Processing task {task_id}: {question[:100]}...")
281
+
282
+ # Check if there are file references in the question
283
+ enhanced_question = self._enhance_question_with_file_analysis(question)
284
+
285
+ # Run the agent
286
+ answer = self.agent.run(enhanced_question)
287
+
288
+ # Clean up the answer
289
+ clean_answer = self._clean_final_answer(answer)
290
+
291
+ print(f"Agent answer for {task_id}: {clean_answer}")
292
+ return clean_answer
293
+
294
+ except Exception as e:
295
+ error_msg = f"Agent error: {str(e)}"
296
+ print(f"Error processing task {task_id}: {error_msg}")
297
+ return error_msg
298
+
299
+ def _enhance_question_with_file_analysis(self, question: str) -> str:
300
+ """Check if question mentions files and enhance accordingly"""
301
+ # Look for file path mentions in the question
302
+ file_patterns = [
303
+ r'/tmp/gaia_cached_files/[^\s]+',
304
+ r'saved locally at:\s*([^\s]+)',
305
+ r'file.*?\.xlsx?',
306
+ r'file.*?\.csv',
307
+ r'file.*?\.txt'
308
+ ]
309
+
310
+ for pattern in file_patterns:
311
+ matches = re.findall(pattern, question, re.IGNORECASE)
312
+ if matches:
313
+ # File found, the agent will handle it automatically
314
+ break
315
+
316
+ return question
317
+
318
+ def _clean_final_answer(self, answer: str) -> str:
319
+ """Final cleaning of the answer"""
320
+ answer = answer.strip()
321
+
322
+ # Look for final answer pattern
323
+ if "final answer:" in answer.lower():
324
+ parts = answer.lower().split("final answer:")
325
+ if len(parts) > 1:
326
+ answer = answer.split(":")[-1].strip()
327
+
328
+ # Remove common unnecessary phrases
329
+ cleanup_phrases = [
330
+ "based on the", "according to", "the answer is", "therefore",
331
+ "in conclusion", "as a result", "so the answer is"
332
+ ]
333
+
334
+ for phrase in cleanup_phrases:
335
+ if answer.lower().startswith(phrase):
336
+ answer = answer[len(phrase):].strip()
337
+ break
338
+
339
+ # Clean up formatting
340
+ answer = answer.strip('.,;:"\'')
341
+
342
+ return answer