avisha bhiryani commited on
Commit
b96334b
·
unverified ·
1 Parent(s): ae7a494

Added a tool to do web search and rerank results

Browse files
Files changed (4) hide show
  1. app.py +40 -8
  2. requirements.txt +64 -5
  3. tools/visit_webpage.py +1 -0
  4. tools/web_search.py +3 -2
app.py CHANGED
@@ -3,20 +3,51 @@ import datetime
3
  import requests
4
  import pytz
5
  import yaml
 
6
  from tools.final_answer import FinalAnswerTool
 
 
7
 
8
  from Gradio_UI import GradioUI
9
 
10
- # Below is an example of a tool that does nothing. Amaze us with your creativity !
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  @tool
12
- def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
13
- #Keep this format for the description / args / args description but feel free to modify the tool
14
- """A tool that does nothing yet
15
  Args:
16
- arg1: the first argument
17
- arg2: the second argument
18
  """
19
- return "What magic will you build ?"
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @tool
22
  def get_current_time_in_timezone(timezone: str) -> str:
@@ -34,6 +65,7 @@ def get_current_time_in_timezone(timezone: str) -> str:
34
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
35
 
36
 
 
37
  final_answer = FinalAnswerTool()
38
 
39
  # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
@@ -55,7 +87,7 @@ with open("prompts.yaml", 'r') as stream:
55
 
56
  agent = CodeAgent(
57
  model=model,
58
- tools=[final_answer], ## add your tools here (don't remove final answer)
59
  max_steps=6,
60
  verbosity_level=1,
61
  grammar=None,
 
3
  import requests
4
  import pytz
5
  import yaml
6
+ from rank_bm25 import BM25Okapi
7
  from tools.final_answer import FinalAnswerTool
8
+ from tools.visit_webpage import VisitWebpageTool
9
+ from tools.web_search import DuckDuckGoSearchTool
10
 
11
  from Gradio_UI import GradioUI
12
 
13
+ def rank_urls_by_relevance(search_results, query):
14
+ """Ranks URLs based on BM25 similarity between query and article titles/snippets."""
15
+ docs = []
16
+ for res in search_results:
17
+ docs.append(res[0] + " " + res[2])
18
+
19
+ tokenized_docs = [doc.lower().split() for doc in docs]
20
+
21
+ bm25 = BM25Okapi(tokenized_docs)
22
+ query_tokens = query.lower().split()
23
+ scores = bm25.get_scores(query_tokens)
24
+
25
+ # Sort results by BM25 score (higher is better)
26
+ ranked_results = sorted(zip(search_results, scores), key=lambda x: x[1], reverse=True)
27
+ return ranked_results
28
+
29
+ # The tool below uses the DuckDuckGoSearchTool to fetch data from the web that is relevant to the query.
30
+ # The fetched results are reranked using BM25 and data from the url with the highest score is fetched.
31
  @tool
32
+ def get_answer_from_web(query:str)-> str: #it's import to specify the return type
33
+ """A tool that fetches results from the web. This function first calls DuckDuckGoSearchTool and the url received is passed to VisitWebpageTool to receive the final results.
 
34
  Args:
35
+ query: A string that represents the query to search the web for.
 
36
  """
37
+ try:
38
+ web_search_tool = DuckDuckGoSearchTool()
39
+ results = web_search_tool.forward(query)
40
+
41
+ visit_webpage_tool = VisitWebpageTool()
42
+
43
+ ranked_results = rank_urls_by_relevance(results, query)
44
+
45
+ page_content = visit_webpage_tool.forward(ranked_results[0][0][1])
46
+
47
+ return page_content
48
+
49
+ except Exception as e:
50
+ return f"Error fetching results"
51
 
52
  @tool
53
  def get_current_time_in_timezone(timezone: str) -> str:
 
65
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
66
 
67
 
68
+
69
  final_answer = FinalAnswerTool()
70
 
71
  # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
 
87
 
88
  agent = CodeAgent(
89
  model=model,
90
+ tools=[final_answer, image_generation_tool, get_current_time_in_timezone, get_answer_from_web], ## add your tools here (don't remove final answer)
91
  max_steps=6,
92
  verbosity_level=1,
93
  grammar=None,
requirements.txt CHANGED
@@ -1,5 +1,64 @@
1
- markdownify
2
- smolagents
3
- requests
4
- duckduckgo_search
5
- pandas
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.8.0
4
+ beautifulsoup4==4.13.3
5
+ Brotli==1.1.0
6
+ certifi==2025.1.31
7
+ charset-normalizer==3.4.1
8
+ click==8.1.8
9
+ duckduckgo_search==7.4.2
10
+ fastapi==0.115.8
11
+ ffmpy==0.5.0
12
+ filelock==3.17.0
13
+ fsspec==2025.2.0
14
+ gradio==5.16.1
15
+ gradio_client==1.7.0
16
+ h11==0.14.0
17
+ h2==4.2.0
18
+ hpack==4.1.0
19
+ httpcore==1.0.7
20
+ httpx==0.28.1
21
+ huggingface-hub==0.28.1
22
+ hyperframe==6.1.0
23
+ idna==3.10
24
+ Jinja2==3.1.5
25
+ lxml==5.3.1
26
+ markdown-it-py==3.0.0
27
+ markdownify==0.14.1
28
+ MarkupSafe==2.1.5
29
+ mdurl==0.1.2
30
+ numpy==2.2.3
31
+ orjson==3.10.15
32
+ packaging==24.2
33
+ pandas==2.2.3
34
+ pillow==11.1.0
35
+ pydantic==2.10.6
36
+ pydantic_core==2.27.2
37
+ pydub==0.25.1
38
+ Pygments==2.19.1
39
+ python-dateutil==2.9.0.post0
40
+ python-dotenv==1.0.1
41
+ python-multipart==0.0.20
42
+ pytz==2025.1
43
+ PyYAML==6.0.2
44
+ rank-bm25==0.2.2
45
+ requests==2.32.3
46
+ rich==13.9.4
47
+ ruff==0.9.6
48
+ safehttpx==0.1.6
49
+ semantic-version==2.10.0
50
+ shellingham==1.5.4
51
+ six==1.17.0
52
+ smolagents==1.9.2
53
+ sniffio==1.3.1
54
+ socksio==1.0.0
55
+ soupsieve==2.6
56
+ starlette==0.45.3
57
+ tomlkit==0.13.2
58
+ tqdm==4.67.1
59
+ typer==0.15.1
60
+ typing_extensions==4.12.2
61
+ tzdata==2025.1
62
+ urllib3==2.3.0
63
+ uvicorn==0.34.0
64
+ websockets==14.2
tools/visit_webpage.py CHANGED
@@ -1,6 +1,7 @@
1
  from typing import Any, Optional
2
  from smolagents.tools import Tool
3
  import requests
 
4
  import markdownify
5
  import smolagents
6
 
 
1
  from typing import Any, Optional
2
  from smolagents.tools import Tool
3
  import requests
4
+ import re
5
  import markdownify
6
  import smolagents
7
 
tools/web_search.py CHANGED
@@ -23,5 +23,6 @@ class DuckDuckGoSearchTool(Tool):
23
  results = self.ddgs.text(query, max_results=self.max_results)
24
  if len(results) == 0:
25
  raise Exception("No results found! Try a less restrictive/shorter query.")
26
- postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
27
- return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
 
 
23
  results = self.ddgs.text(query, max_results=self.max_results)
24
  if len(results) == 0:
25
  raise Exception("No results found! Try a less restrictive/shorter query.")
26
+ # postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
27
+ postprocessed_results = [(result['title'], result['href'], result['body']) for result in results]
28
+ return postprocessed_results