volker commited on
Commit
7ea0bd8
·
1 Parent(s): e929245

Additional websearch tools.

Browse files
Files changed (2) hide show
  1. basic_agent.py +60 -1
  2. requirements.txt +2 -0
basic_agent.py CHANGED
@@ -5,7 +5,11 @@ from ac_tools import DuckDuckGoSearchToolWH
5
  import requests
6
  import os
7
  from PIL import Image
 
 
8
  from transformers import pipeline
 
 
9
 
10
 
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -52,6 +56,59 @@ def download_file(task_id: str, filename: str) -> str:
52
  raise RuntimeError(error_msg)
53
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  class BasicAgent:
56
  def __init__(self):
57
  print("BasicAgent initialized.")
@@ -71,7 +128,9 @@ class BasicSmolAgent:
71
  if not model:
72
  model = HfApiModel()
73
  search_tool = DuckDuckGoSearchToolWH()
74
- self.agent = CodeAgent(tools=[search_tool], model=model, max_steps=10, additional_authorized_imports=['pandas'])
 
 
75
  self.prompt = ("The question is the following:\n ```{}```"
76
  " YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings."
77
  " If you are asked for a number, don't use comma to write your number neither use units"
 
5
  import requests
6
  import os
7
  from PIL import Image
8
+ import wikipedia
9
+
10
  from transformers import pipeline
11
+ import requests
12
+ from bs4 import BeautifulSoup
13
 
14
 
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
56
  raise RuntimeError(error_msg)
57
 
58
 
59
+ class WikipediaSearchTool(Tool):
60
+ name = "wikipedia_search"
61
+ description = "Searches Wikipedia and returns a short summary of the most relevant article."
62
+ inputs = {
63
+ "query": {"type": "string", "description": "The search term or topic to look up on Wikipedia."}
64
+ }
65
+ output_type = "string"
66
+
67
+ def __init__(self, summary_sentences=3):
68
+ super().__init__()
69
+ self.summary_sentences = summary_sentences
70
+
71
+ def forward(self, query: str) -> str:
72
+ try:
73
+ page_title = wikipedia.search(query)[0]
74
+ page = wikipedia.page(page_title)
75
+ return f"**{page.title}**\n\n{page.content}"
76
+ except IndexError:
77
+ return "No Wikipedia results found for that query."
78
+ except Exception as e:
79
+ return f"Error during Wikipedia search: {e}"
80
+
81
+
82
+ class WebpageReaderTool(Tool):
83
+ name = "read_webpage"
84
+ description = "Fetches the text content from a given URL and returns the main body text."
85
+ inputs = {
86
+ "url": {"type": "string", "description": "The URL of the webpage to read."}
87
+ }
88
+ output_type = "string"
89
+
90
+ def forward(self, url: str) -> str:
91
+ try:
92
+ headers = {
93
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
94
+ "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
95
+ }
96
+ response = requests.get(url, headers=headers, timeout=10)
97
+ response.raise_for_status()
98
+
99
+ soup = BeautifulSoup(response.text, "html.parser")
100
+
101
+ # Extract visible text (ignore scripts/styles)
102
+ for tag in soup(["script", "style", "noscript"]):
103
+ tag.extract()
104
+ text = soup.get_text(separator="\n")
105
+ cleaned = "\n".join(line.strip() for line in text.splitlines() if line.strip())
106
+
107
+ return cleaned[:5000] # Optionally limit to 5,000 chars
108
+ except Exception as e:
109
+ return f"Error reading webpage: {e}"
110
+
111
+
112
  class BasicAgent:
113
  def __init__(self):
114
  print("BasicAgent initialized.")
 
128
  if not model:
129
  model = HfApiModel()
130
  search_tool = DuckDuckGoSearchToolWH()
131
+ wiki_tool = WikipediaSearchTool()
132
+ webpage_tool = WebpageReaderTool()
133
+ self.agent = CodeAgent(tools=[search_tool, wiki_tool, webpage_tool], model=model, max_steps=10, additional_authorized_imports=['pandas'])
134
  self.prompt = ("The question is the following:\n ```{}```"
135
  " YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings."
136
  " If you are asked for a number, don't use comma to write your number neither use units"
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
  --extra-index-url https://download.pytorch.org/whl/cpu
2
 
3
  gradio[oauth]
 
 
4
  requests
5
  openai
6
  python-dotenv
 
1
  --extra-index-url https://download.pytorch.org/whl/cpu
2
 
3
  gradio[oauth]
4
+ wikipedia
5
+ bs4
6
  requests
7
  openai
8
  python-dotenv