Charles Azam commited on
Commit
6c0aeb9
·
1 Parent(s): e5b423a

feat: add logging mechanism

Browse files
src/deepengineer/deepsearch/analyse_markdown_agent.py CHANGED
@@ -5,7 +5,7 @@ Simple agent to analyse a markdown, just to test some ideas.
5
  from enum import Enum
6
 
7
  from mistralai import OCRResponse
8
- from smolagents import CodeAgent, LiteLLMModel, Tool
9
 
10
  from deepengineer.webcrawler.pdf_utils import (
11
  convert_ocr_response_to_markdown,
@@ -13,6 +13,7 @@ from deepengineer.webcrawler.pdf_utils import (
13
  get_markdown_by_page_numbers,
14
  get_table_of_contents_per_page_markdown,
15
  )
 
16
 
17
 
18
  class ToolNames(Enum):
@@ -22,7 +23,7 @@ class ToolNames(Enum):
22
  FIND_IN_MARKDOWN = "find_in_markdown"
23
 
24
 
25
- class GetTableOfContentsTool(Tool):
26
  name = ToolNames.GET_TABLE_OF_CONTENTS.value
27
  description = "Returns all of the titles in the document along with the page number they are on."
28
  inputs = {}
@@ -39,7 +40,7 @@ class GetTableOfContentsTool(Tool):
39
  return self.table_of_contents
40
 
41
 
42
- class GetMarkdownTool(Tool):
43
  name = ToolNames.GET_MARKDOWN.value
44
  description = f"Returns the markdown entire content of the document. Beware this might be too long to be useful, except for small documents, use {ToolNames.GET_PAGES_CONTENT.value} instead. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the document including the number of pages."
45
  inputs = {}
@@ -54,7 +55,7 @@ class GetMarkdownTool(Tool):
54
  return self.markdown_content
55
 
56
 
57
- class GetPagesContentTool(Tool):
58
  name = ToolNames.GET_PAGES_CONTENT.value
59
  description = f"Returns the content of the pages. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the document including the number of pages. Expects a list of page numbers as integers as input."
60
  inputs = {
@@ -73,7 +74,7 @@ class GetPagesContentTool(Tool):
73
  return get_markdown_by_page_numbers(self.markdown, page_numbers)
74
 
75
 
76
- class FindInMarkdownTool(Tool):
77
  name = ToolNames.FIND_IN_MARKDOWN.value
78
  description = f"Finds the page numbers of the document that contain the search queries. If you are looking for a specific information, you can use this tool to find the page numbers of the document that contain the information and then use {ToolNames.GET_PAGES_CONTENT.value} to get the content of the pages."
79
  inputs = {
 
5
  from enum import Enum
6
 
7
  from mistralai import OCRResponse
8
+ from smolagents import CodeAgent, LiteLLMModel
9
 
10
  from deepengineer.webcrawler.pdf_utils import (
11
  convert_ocr_response_to_markdown,
 
13
  get_markdown_by_page_numbers,
14
  get_table_of_contents_per_page_markdown,
15
  )
16
+ from deepengineer.logging_tools import LoggingTool
17
 
18
 
19
  class ToolNames(Enum):
 
23
  FIND_IN_MARKDOWN = "find_in_markdown"
24
 
25
 
26
+ class GetTableOfContentsTool(LoggingTool):
27
  name = ToolNames.GET_TABLE_OF_CONTENTS.value
28
  description = "Returns all of the titles in the document along with the page number they are on."
29
  inputs = {}
 
40
  return self.table_of_contents
41
 
42
 
43
+ class GetMarkdownTool(LoggingTool):
44
  name = ToolNames.GET_MARKDOWN.value
45
  description = f"Returns the markdown entire content of the document. Beware this might be too long to be useful, except for small documents, use {ToolNames.GET_PAGES_CONTENT.value} instead. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the document including the number of pages."
46
  inputs = {}
 
55
  return self.markdown_content
56
 
57
 
58
+ class GetPagesContentTool(LoggingTool):
59
  name = ToolNames.GET_PAGES_CONTENT.value
60
  description = f"Returns the content of the pages. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the document including the number of pages. Expects a list of page numbers as integers as input."
61
  inputs = {
 
74
  return get_markdown_by_page_numbers(self.markdown, page_numbers)
75
 
76
 
77
+ class FindInMarkdownTool(LoggingTool):
78
  name = ToolNames.FIND_IN_MARKDOWN.value
79
  description = f"Finds the page numbers of the document that contain the search queries. If you are looking for a specific information, you can use this tool to find the page numbers of the document that contain the information and then use {ToolNames.GET_PAGES_CONTENT.value} to get the content of the pages."
80
  inputs = {
src/deepengineer/deepsearch/draw_agent.py CHANGED
@@ -12,8 +12,8 @@ from PIL import Image
12
 
13
  from smolagents import CodeAgent, LiteLLMModel
14
  from smolagents.agents import ActionStep
15
- from smolagents import Tool
16
  from deepengineer.webcrawler.crawl_database import DataBase
 
17
 
18
 
19
  def _find_and_save_matplotlib_figure(image_path: Path = Path("figure.png")) -> str:
@@ -28,7 +28,7 @@ def _find_and_save_matplotlib_figure(image_path: Path = Path("figure.png")) -> s
28
  return f"Figure saved to {image_path}."
29
 
30
 
31
- class SaveMatplotlibFigTool(Tool):
32
  name = "save_matplotlib_fig"
33
  description = """Save the current matplotlib figure to the current directory. Then plt.close() is called to clear the figure. The image is returned as a markdown string, use this markdown inside the final answer to include the image.
34
  """
@@ -148,7 +148,7 @@ def draw_matplotlib_image_from_prompt(
148
  return image_path
149
 
150
 
151
- class DrawImageTool(Tool):
152
  name = "draw_image"
153
  description = f"Draw an image based on a prompt. The image is saved in the current directory. The image is returned as a markdown image, use this markdown inside the final answer to include the image. You must be very specific in your prompt."
154
  inputs = {
 
12
 
13
  from smolagents import CodeAgent, LiteLLMModel
14
  from smolagents.agents import ActionStep
 
15
  from deepengineer.webcrawler.crawl_database import DataBase
16
+ from deepengineer.logging_tools import LoggingTool
17
 
18
 
19
  def _find_and_save_matplotlib_figure(image_path: Path = Path("figure.png")) -> str:
 
28
  return f"Figure saved to {image_path}."
29
 
30
 
31
+ class SaveMatplotlibFigTool(LoggingTool):
32
  name = "save_matplotlib_fig"
33
  description = """Save the current matplotlib figure to the current directory. Then plt.close() is called to clear the figure. The image is returned as a markdown string, use this markdown inside the final answer to include the image.
34
  """
 
148
  return image_path
149
 
150
 
151
+ class DrawImageTool(LoggingTool):
152
  name = "draw_image"
153
  description = f"Draw an image based on a prompt. The image is saved in the current directory. The image is returned as a markdown image, use this markdown inside the final answer to include the image. You must be very specific in your prompt."
154
  inputs = {
src/deepengineer/deepsearch/main_agent.py CHANGED
@@ -15,6 +15,7 @@ from deepengineer.common_path import DATA_DIR
15
  from smolagents import CodeAgent, LiteLLMModel
16
  import random
17
  from pathlib import Path
 
18
 
19
 
20
  def _create_output_image_path():
@@ -97,7 +98,7 @@ Run verification steps if that's needed, you must make sure you find the correct
97
 
98
 
99
  def create_main_search_agent(
100
- model_id="deepseek/deepseek-reasoner", database: DataBase | None = None
101
  ):
102
  """
103
  Simple agent that can search the web and answer the question. This is much faster and better for simple questions that do not require deep research.
@@ -111,14 +112,14 @@ def create_main_search_agent(
111
 
112
  # Web search and crawling tools
113
  WEB_SEARCH_TOOLS = [
114
- SearchTool(),
115
- ArxivSearchTool(),
116
- ScientificSearchTool(),
117
- GetTableOfContentsTool(database),
118
- GetMarkdownTool(database),
119
- GetPagesContentTool(database),
120
- FindInMarkdownTool(database),
121
- SaveMatplotlibFigTool(output_dir=output_image_path),
122
  ]
123
 
124
  search_agent = CodeAgent(
@@ -138,7 +139,7 @@ def create_main_search_agent(
138
  return search_agent
139
 
140
 
141
- def main_search(task: str):
142
  MAIN_PROMPT = """
143
  You are DeepDraft, an advanced research and analysis agent specialized in deep technical research, data visualization, and comprehensive information synthesis. You have access to powerful tools for web search, document analysis, and data visualization.
144
 
@@ -164,7 +165,7 @@ Failure or 'I cannot answer' or 'None found' will not be tolerated, success will
164
  Run verification steps if that's needed, you must make sure you find the correct answer! Here is the task:
165
  {task}
166
  """
167
- agent = create_main_search_agent(model_id="mistral/mistral-medium-latest")
168
  answer = agent.run(MAIN_PROMPT.format(task=task))
169
  return answer
170
 
 
15
  from smolagents import CodeAgent, LiteLLMModel
16
  import random
17
  from pathlib import Path
18
+ import queue
19
 
20
 
21
  def _create_output_image_path():
 
98
 
99
 
100
  def create_main_search_agent(
101
+ model_id="deepseek/deepseek-reasoner", database: DataBase | None = None, log_queue: queue.Queue | None = None
102
  ):
103
  """
104
  Simple agent that can search the web and answer the question. This is much faster and better for simple questions that do not require deep research.
 
112
 
113
  # Web search and crawling tools
114
  WEB_SEARCH_TOOLS = [
115
+ SearchTool(log_queue=log_queue,),
116
+ ArxivSearchTool(log_queue=log_queue,),
117
+ ScientificSearchTool(log_queue=log_queue,),
118
+ GetTableOfContentsTool(log_queue=log_queue, database=database),
119
+ GetMarkdownTool(log_queue=log_queue, database=database),
120
+ GetPagesContentTool(log_queue=log_queue, database=database),
121
+ FindInMarkdownTool(log_queue=log_queue, database=database),
122
+ SaveMatplotlibFigTool(log_queue=log_queue,output_dir=output_image_path),
123
  ]
124
 
125
  search_agent = CodeAgent(
 
139
  return search_agent
140
 
141
 
142
+ def main_search(task: str, log_queue: queue.Queue | None = None):
143
  MAIN_PROMPT = """
144
  You are DeepDraft, an advanced research and analysis agent specialized in deep technical research, data visualization, and comprehensive information synthesis. You have access to powerful tools for web search, document analysis, and data visualization.
145
 
 
165
  Run verification steps if that's needed, you must make sure you find the correct answer! Here is the task:
166
  {task}
167
  """
168
+ agent = create_main_search_agent(model_id="mistral/mistral-medium-latest", log_queue=log_queue)
169
  answer = agent.run(MAIN_PROMPT.format(task=task))
170
  return answer
171
 
src/deepengineer/deepsearch/scawl_web_agent.py CHANGED
@@ -1,7 +1,7 @@
1
  import asyncio
2
  from enum import Enum
3
 
4
- from smolagents import CodeAgent, LiteLLMModel, Tool
5
 
6
  from deepengineer.webcrawler.async_search import (
7
  SearchResponse,
@@ -17,6 +17,7 @@ from deepengineer.webcrawler.pdf_utils import (
17
  get_markdown_by_page_numbers,
18
  get_table_of_contents_per_page_markdown,
19
  )
 
20
 
21
 
22
  class ToolNames(Enum):
@@ -41,7 +42,7 @@ def filter_search_results(
41
  return search_response.to_string()
42
 
43
 
44
- class SearchTool(Tool):
45
  name = ToolNames.SEARCH_TOOL.value
46
  description = """Search the web using Linkup API. Good for deep research with sourced answers.
47
  Linkup also provides an answer. This answer is not always correct, so you might want to check the sources.
@@ -63,7 +64,7 @@ class SearchTool(Tool):
63
  return filter_search_results(result)
64
 
65
 
66
- class ArxivSearchTool(Tool):
67
  name = ToolNames.ARXIV_SEARCH.value
68
  description = """Search arXiv for academic papers and preprints with Linkup API.
69
  Linkup also provides an answer. This answer is not always correct, so you might want to check the sources.
@@ -81,7 +82,7 @@ class ArxivSearchTool(Tool):
81
  return filter_search_results(result)
82
 
83
 
84
- class PubmedSearchTool(Tool):
85
  name = ToolNames.PUBMED_SEARCH.value
86
  description = """Search PubMed for medical and scientific literature with Linkup API.
87
  Linkup also provides an answer. This answer is not always correct, so you might want to check the sources.
@@ -99,7 +100,7 @@ class PubmedSearchTool(Tool):
99
  return filter_search_results(result)
100
 
101
 
102
- class ScientificSearchTool(Tool):
103
  name = ToolNames.SCIENTIFIC_SEARCH.value
104
  description = """Search across multiple scientific domains: Wikipedia, arXiv, PubMed, and ScienceDirect.
105
  Linkup also provides an answer. This answer is not always correct, so you might want to check the sources.
@@ -120,7 +121,7 @@ class ScientificSearchTool(Tool):
120
  URL_EXPLAINATION = """The URL can be be converted to a markdown. If the URL points to a PDF, the pdf is converted to markdown, otherwise the URL is crawled and the markdown is extracted. This markdown is split into pages that are numbered. You can use the page numbers to get the content of the pages."""
121
 
122
 
123
- class GetTableOfContentsTool(Tool):
124
  name = ToolNames.GET_TABLE_OF_CONTENTS.value
125
  description = f"""Returns all of the titles in the document along with the page number they are on.
126
  {URL_EXPLAINATION}
@@ -143,7 +144,7 @@ class GetTableOfContentsTool(Tool):
143
  return table_of_contents
144
 
145
 
146
- class GetMarkdownTool(Tool):
147
  name = ToolNames.GET_MARKDOWN.value
148
  description = f"Returns in markdown entire content of the url. Beware this might be too long to be useful, except for small documents, use {ToolNames.GET_PAGES_CONTENT.value} instead. You can also use {ToolNames.GET_TABLE_OF_CONTENTS.value} first to get the table of contents of the document including the number of pages."
149
  inputs = {
@@ -161,7 +162,7 @@ class GetMarkdownTool(Tool):
161
  return markdown_content
162
 
163
 
164
- class GetPagesContentTool(Tool):
165
  name = ToolNames.GET_PAGES_CONTENT.value
166
  description = f"Returns the content of the pages. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the url including the number of pages. Expects a list of page numbers as integers as input. {URL_EXPLAINATION}"
167
  inputs = {
@@ -182,7 +183,7 @@ class GetPagesContentTool(Tool):
182
  return get_markdown_by_page_numbers(markdown, page_numbers)
183
 
184
 
185
- class FindInMarkdownTool(Tool):
186
  name = ToolNames.FIND_IN_MARKDOWN.value
187
  description = f"Finds the page numbers of the url that contain the search queries. If you are looking for a specific information, you can use this tool to find the page numbers of the url that contain the information and then use {ToolNames.GET_PAGES_CONTENT.value} to get the content of the pages. {URL_EXPLAINATION}"
188
  inputs = {
 
1
  import asyncio
2
  from enum import Enum
3
 
4
+ from smolagents import CodeAgent, LiteLLMModel
5
 
6
  from deepengineer.webcrawler.async_search import (
7
  SearchResponse,
 
17
  get_markdown_by_page_numbers,
18
  get_table_of_contents_per_page_markdown,
19
  )
20
+ from deepengineer.logging_tools import LoggingTool
21
 
22
 
23
  class ToolNames(Enum):
 
42
  return search_response.to_string()
43
 
44
 
45
+ class SearchTool(LoggingTool):
46
  name = ToolNames.SEARCH_TOOL.value
47
  description = """Search the web using Linkup API. Good for deep research with sourced answers.
48
  Linkup also provides an answer. This answer is not always correct, so you might want to check the sources.
 
64
  return filter_search_results(result)
65
 
66
 
67
+ class ArxivSearchTool(LoggingTool):
68
  name = ToolNames.ARXIV_SEARCH.value
69
  description = """Search arXiv for academic papers and preprints with Linkup API.
70
  Linkup also provides an answer. This answer is not always correct, so you might want to check the sources.
 
82
  return filter_search_results(result)
83
 
84
 
85
+ class PubmedSearchTool(LoggingTool):
86
  name = ToolNames.PUBMED_SEARCH.value
87
  description = """Search PubMed for medical and scientific literature with Linkup API.
88
  Linkup also provides an answer. This answer is not always correct, so you might want to check the sources.
 
100
  return filter_search_results(result)
101
 
102
 
103
+ class ScientificSearchTool(LoggingTool):
104
  name = ToolNames.SCIENTIFIC_SEARCH.value
105
  description = """Search across multiple scientific domains: Wikipedia, arXiv, PubMed, and ScienceDirect.
106
  Linkup also provides an answer. This answer is not always correct, so you might want to check the sources.
 
121
  URL_EXPLAINATION = """The URL can be be converted to a markdown. If the URL points to a PDF, the pdf is converted to markdown, otherwise the URL is crawled and the markdown is extracted. This markdown is split into pages that are numbered. You can use the page numbers to get the content of the pages."""
122
 
123
 
124
+ class GetTableOfContentsTool(LoggingTool):
125
  name = ToolNames.GET_TABLE_OF_CONTENTS.value
126
  description = f"""Returns all of the titles in the document along with the page number they are on.
127
  {URL_EXPLAINATION}
 
144
  return table_of_contents
145
 
146
 
147
+ class GetMarkdownTool(LoggingTool):
148
  name = ToolNames.GET_MARKDOWN.value
149
  description = f"Returns in markdown entire content of the url. Beware this might be too long to be useful, except for small documents, use {ToolNames.GET_PAGES_CONTENT.value} instead. You can also use {ToolNames.GET_TABLE_OF_CONTENTS.value} first to get the table of contents of the document including the number of pages."
150
  inputs = {
 
162
  return markdown_content
163
 
164
 
165
+ class GetPagesContentTool(LoggingTool):
166
  name = ToolNames.GET_PAGES_CONTENT.value
167
  description = f"Returns the content of the pages. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the url including the number of pages. Expects a list of page numbers as integers as input. {URL_EXPLAINATION}"
168
  inputs = {
 
183
  return get_markdown_by_page_numbers(markdown, page_numbers)
184
 
185
 
186
+ class FindInMarkdownTool(LoggingTool):
187
  name = ToolNames.FIND_IN_MARKDOWN.value
188
  description = f"Finds the page numbers of the url that contain the search queries. If you are looking for a specific information, you can use this tool to find the page numbers of the url that contain the information and then use {ToolNames.GET_PAGES_CONTENT.value} to get the content of the pages. {URL_EXPLAINATION}"
189
  inputs = {
src/deepengineer/logging_tools.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import queue
2
+ from typing import Any
3
+ from smolagents import Tool
4
+
5
+ class LoggingTool(Tool):
6
+ """
7
+ Base class for tools that can push logs to a queue.
8
+ """
9
+ def __init__(self, log_queue: queue.Queue | None = None):
10
+ super().__init__()
11
+ self.log_queue = log_queue
12
+
13
+ def push_log(self, msg: str):
14
+ if self.log_queue:
15
+ self.log_queue.put(msg)