Charles Azam commited on
Commit
0159aaf
ยท
1 Parent(s): 24da399

feat: add logging mechanism

Browse files
gradio_app.py CHANGED
@@ -1,41 +1,7 @@
1
  import gradio as gr
2
- import threading, time, queue, logging
 
3
 
4
- # ---------- 1. A threadโ€‘safe queue for log messages ----------
5
-
6
-
7
- class _BaseTool:
8
- def __init__(self, log_queue: queue.Queue | None = None):
9
- self.log_queue = log_queue
10
-
11
- def push_log(self, msg: str):
12
- if self.log_queue:
13
- self.log_queue.put(msg)
14
-
15
- def forward(self, input: str) -> str:
16
- raise NotImplementedError("Subclasses must implement forward method")
17
-
18
- # ---------- 2. Tools that report progress ----------
19
- class SearchTool(_BaseTool):
20
- def forward(self, query: str) -> str:
21
- self.push_log(f"๐Ÿ” SearchTool โ†’ {query}")
22
- time.sleep(2) # expensive workโ€ฆ
23
- out = f"Found {query}"
24
- self.push_log(f"โœ… SearchTool done")
25
- return out
26
-
27
- class DrawTool(_BaseTool):
28
- def forward(self, prompt: str) -> str:
29
- self.push_log(f"๐ŸŽจ DrawTool โ†’ {prompt}")
30
- time.sleep(2)
31
- out = f"Drawing {prompt}"
32
- self.push_log(f"โœ… DrawTool done")
33
- return out
34
-
35
- # ---------- 3. Your unchanged agent -------------------------
36
- def agent(user_input: str, log_queue: queue.Queue) -> str:
37
- st, dt = SearchTool(log_queue), DrawTool(log_queue)
38
- return st.forward(user_input) + dt.forward(user_input) + st.forward(user_input) + dt.forward(user_input)
39
 
40
  # ---------- 4. Wrapper that streams -------------------------
41
  def run_agent_stream(user_input: str):
@@ -48,6 +14,7 @@ def run_agent_stream(user_input: str):
48
  Yields tuples: (agent_output, log_output)
49
  """
50
  log_queue = queue.Queue()
 
51
  # empty queue before each run
52
  while not log_queue.empty():
53
  log_queue.get_nowait()
@@ -56,7 +23,7 @@ def run_agent_stream(user_input: str):
56
  done = threading.Event()
57
 
58
  def _worker():
59
- answer_container["text"] = agent(user_input, log_queue)
60
  done.set()
61
 
62
  threading.Thread(target=_worker, daemon=True).start()
@@ -81,17 +48,17 @@ def run_agent_stream(user_input: str):
81
 
82
  # ---------- 5. Gradio UI ------------------------------------
83
  with gr.Blocks() as demo:
84
- gr.Markdown("## Agent Interface with Realโ€‘Time Tool Logging")
85
  user_input = gr.Textbox(label="User Message")
86
- agent_output = gr.Textbox(label="Agent Response")
87
- log_output = gr.Textbox(label="Tool Invocation Log", interactive=False)
88
 
89
  send = gr.Button("Send")
90
  send.click(
91
  fn=run_agent_stream,
92
  inputs=[user_input],
93
- outputs=[agent_output, log_output],
94
- concurrency_limit=4,
95
  )
96
 
97
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import threading, time, queue
3
+ from deepengineer.deepsearch.main_agent import main_search
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # ---------- 4. Wrapper that streams -------------------------
7
  def run_agent_stream(user_input: str):
 
14
  Yields tuples: (agent_output, log_output)
15
  """
16
  log_queue = queue.Queue()
17
+
18
  # empty queue before each run
19
  while not log_queue.empty():
20
  log_queue.get_nowait()
 
23
  done = threading.Event()
24
 
25
  def _worker():
26
+ answer_container["text"] = main_search(user_input, log_queue)
27
  done.set()
28
 
29
  threading.Thread(target=_worker, daemon=True).start()
 
48
 
49
  # ---------- 5. Gradio UI ------------------------------------
50
  with gr.Blocks() as demo:
51
+ gr.Markdown("# Agent Interface with Realโ€‘Time Tool Logging")
52
  user_input = gr.Textbox(label="User Message")
53
+ agent_output = gr.Markdown(label="Agent Response")
54
+ log_output = gr.Textbox(label="Tool Invocation Log", interactive=False)
55
 
56
  send = gr.Button("Send")
57
  send.click(
58
  fn=run_agent_stream,
59
  inputs=[user_input],
60
+ outputs=[log_output, agent_output],
61
+ concurrency_limit=2,
62
  )
63
 
64
  if __name__ == "__main__":
src/deepengineer/deepsearch/draw_agent.py CHANGED
@@ -14,6 +14,7 @@ from smolagents import CodeAgent, LiteLLMModel
14
  from smolagents.agents import ActionStep
15
  from deepengineer.webcrawler.crawl_database import DataBase
16
  from deepengineer.logging_tools import LoggingTool
 
17
 
18
 
19
  def _find_and_save_matplotlib_figure(image_path: Path = Path("figure.png")) -> str:
@@ -40,11 +41,12 @@ class SaveMatplotlibFigTool(LoggingTool):
40
  }
41
  output_type = "string"
42
 
43
- def __init__(self, output_dir: Path):
44
- super().__init__()
45
  self.output_dir: Path = output_dir
46
 
47
  def forward(self, image_name: str) -> str:
 
48
  if not image_name.endswith(".png"):
49
  image_name = image_name + ".png"
50
  output_path = self.output_dir / image_name
@@ -173,6 +175,7 @@ class DrawImageTool(LoggingTool):
173
  self.output_dir: Path = output_dir
174
 
175
  def forward(self, prompt: str, image_name: str) -> str:
 
176
  if not image_name.endswith(".png"):
177
  image_name = image_name + ".png"
178
  output_path = draw_matplotlib_image_from_prompt(
 
14
  from smolagents.agents import ActionStep
15
  from deepengineer.webcrawler.crawl_database import DataBase
16
  from deepengineer.logging_tools import LoggingTool
17
+ import queue
18
 
19
 
20
  def _find_and_save_matplotlib_figure(image_path: Path = Path("figure.png")) -> str:
 
41
  }
42
  output_type = "string"
43
 
44
+ def __init__(self, output_dir: Path, log_queue: queue.Queue | None = None):
45
+ super().__init__(log_queue=log_queue)
46
  self.output_dir: Path = output_dir
47
 
48
  def forward(self, image_name: str) -> str:
49
+ self.push_log(f"๐Ÿ–ผ๏ธ Saving matplotlib figure to {image_name}")
50
  if not image_name.endswith(".png"):
51
  image_name = image_name + ".png"
52
  output_path = self.output_dir / image_name
 
175
  self.output_dir: Path = output_dir
176
 
177
  def forward(self, prompt: str, image_name: str) -> str:
178
+ self.push_log(f"๐Ÿ–Š๏ธ Drawing image from prompt: {prompt}")
179
  if not image_name.endswith(".png"):
180
  image_name = image_name + ".png"
181
  output_path = draw_matplotlib_image_from_prompt(
src/deepengineer/deepsearch/{analyse_markdown_agent.py โ†’ main_pdf_agent.py} RENAMED
File without changes
src/deepengineer/deepsearch/scawl_web_agent.py CHANGED
@@ -18,7 +18,7 @@ from deepengineer.webcrawler.pdf_utils import (
18
  get_table_of_contents_per_page_markdown,
19
  )
20
  from deepengineer.logging_tools import LoggingTool
21
-
22
 
23
  class ToolNames(Enum):
24
  # Search tools
@@ -54,8 +54,12 @@ class SearchTool(LoggingTool):
54
  },
55
  }
56
  output_type = "object"
 
 
 
57
 
58
  def forward(self, search_query: str) -> str:
 
59
  result = asyncio.run(
60
  linkup_search_async(
61
  search_query=search_query,
@@ -76,8 +80,12 @@ class ArxivSearchTool(LoggingTool):
76
  }
77
  }
78
  output_type = "object"
 
 
 
79
 
80
  def forward(self, search_query: str) -> str:
 
81
  result = asyncio.run(arxiv_search_async(search_query))
82
  return filter_search_results(result)
83
 
@@ -94,8 +102,12 @@ class PubmedSearchTool(LoggingTool):
94
  }
95
  }
96
  output_type = "object"
 
 
 
97
 
98
  def forward(self, search_query: str) -> str:
 
99
  result = asyncio.run(pubmed_search_async(search_query))
100
  return filter_search_results(result)
101
 
@@ -113,7 +125,11 @@ class ScientificSearchTool(LoggingTool):
113
  }
114
  output_type = "object"
115
 
 
 
 
116
  def forward(self, search_query: str) -> dict:
 
117
  result = asyncio.run(scientific_search_async(search_query))
118
  return filter_search_results(result)
119
 
@@ -123,7 +139,7 @@ URL_EXPLAINATION = """The URL can be be converted to a markdown. If the URL poin
123
 
124
  class GetTableOfContentsTool(LoggingTool):
125
  name = ToolNames.GET_TABLE_OF_CONTENTS.value
126
- description = f"""Returns all of the titles in the document along with the page number they are on.
127
  {URL_EXPLAINATION}
128
  """
129
  inputs = {
@@ -134,11 +150,12 @@ class GetTableOfContentsTool(LoggingTool):
134
  }
135
  output_type = "string"
136
 
137
- def __init__(self, database: DataBase):
138
- super().__init__()
139
  self.database: DataBase = database
140
 
141
  def forward(self, url: str) -> str:
 
142
  markdown = self.database.get_markdown_of_url(url)
143
  table_of_contents: str = get_table_of_contents_per_page_markdown(markdown)
144
  return table_of_contents
@@ -152,11 +169,12 @@ class GetMarkdownTool(LoggingTool):
152
  }
153
  output_type = "string"
154
 
155
- def __init__(self, database: DataBase):
156
- super().__init__()
157
  self.database: DataBase = database
158
 
159
  def forward(self, url: str) -> str:
 
160
  markdown = self.database.get_markdown_of_url(url)
161
  markdown_content: str = convert_ocr_response_to_markdown(markdown)
162
  return markdown_content
@@ -174,11 +192,12 @@ class GetPagesContentTool(LoggingTool):
174
  }
175
  output_type = "string"
176
 
177
- def __init__(self, database: DataBase):
178
- super().__init__()
179
  self.database: DataBase = database
180
 
181
  def forward(self, url: str, page_numbers: list[int]) -> str:
 
182
  markdown = self.database.get_markdown_of_url(url)
183
  return get_markdown_by_page_numbers(markdown, page_numbers)
184
 
@@ -195,17 +214,18 @@ class FindInMarkdownTool(LoggingTool):
195
  }
196
  output_type = "array"
197
 
198
- def __init__(self, database: DataBase):
199
- super().__init__()
200
  self.database: DataBase = database
201
 
202
  def forward(self, url: str, search_queries: list[str]) -> list[int]:
 
203
  markdown = self.database.get_markdown_of_url(url)
204
  return find_in_markdown(markdown, search_queries)
205
 
206
 
207
  def create_web_search_agent(
208
- model_id="deepseek/deepseek-reasoner", database: DataBase | None = None
209
  ):
210
  """Create a web search agent with search, crawling, and PDF analysis capabilities."""
211
 
@@ -215,14 +235,14 @@ def create_web_search_agent(
215
 
216
  # Web search and crawling tools
217
  WEB_SEARCH_TOOLS = [
218
- SearchTool(),
219
- ArxivSearchTool(),
220
- PubmedSearchTool(),
221
- ScientificSearchTool(),
222
- GetTableOfContentsTool(database),
223
- GetMarkdownTool(database),
224
- GetPagesContentTool(database),
225
- FindInMarkdownTool(database),
226
  ]
227
 
228
  web_search_agent = CodeAgent(
 
18
  get_table_of_contents_per_page_markdown,
19
  )
20
  from deepengineer.logging_tools import LoggingTool
21
+ import queue
22
 
23
  class ToolNames(Enum):
24
  # Search tools
 
54
  },
55
  }
56
  output_type = "object"
57
+
58
+ def __init__(self, log_queue: queue.Queue | None = None):
59
+ super().__init__(log_queue=log_queue)
60
 
61
  def forward(self, search_query: str) -> str:
62
+ self.push_log(f"๐Ÿ” Searching web for: {search_query}")
63
  result = asyncio.run(
64
  linkup_search_async(
65
  search_query=search_query,
 
80
  }
81
  }
82
  output_type = "object"
83
+
84
+ def __init__(self, log_queue: queue.Queue | None = None):
85
+ super().__init__(log_queue=log_queue)
86
 
87
  def forward(self, search_query: str) -> str:
88
+ self.push_log(f"๐Ÿ” Searching arXiv for: {search_query}")
89
  result = asyncio.run(arxiv_search_async(search_query))
90
  return filter_search_results(result)
91
 
 
102
  }
103
  }
104
  output_type = "object"
105
+
106
+ def __init__(self, log_queue: queue.Queue | None = None):
107
+ super().__init__(log_queue=log_queue)
108
 
109
  def forward(self, search_query: str) -> str:
110
+ self.push_log(f"๐Ÿ” Searching PubMed for: {search_query}")
111
  result = asyncio.run(pubmed_search_async(search_query))
112
  return filter_search_results(result)
113
 
 
125
  }
126
  output_type = "object"
127
 
128
+ def __init__(self, log_queue: queue.Queue | None = None):
129
+ super().__init__(log_queue=log_queue)
130
+
131
  def forward(self, search_query: str) -> dict:
132
+ self.push_log(f"๐Ÿ” Searching scientific domains for: {search_query}")
133
  result = asyncio.run(scientific_search_async(search_query))
134
  return filter_search_results(result)
135
 
 
139
 
140
  class GetTableOfContentsTool(LoggingTool):
141
  name = ToolNames.GET_TABLE_OF_CONTENTS.value
142
+ description = f"""Returns all of the titles in the url along with the page number they are on.
143
  {URL_EXPLAINATION}
144
  """
145
  inputs = {
 
150
  }
151
  output_type = "string"
152
 
153
+ def __init__(self, database: DataBase, log_queue: queue.Queue | None = None):
154
+ super().__init__(log_queue=log_queue)
155
  self.database: DataBase = database
156
 
157
  def forward(self, url: str) -> str:
158
+ self.push_log(f"๐Ÿ” Getting table of contents for url: {url}")
159
  markdown = self.database.get_markdown_of_url(url)
160
  table_of_contents: str = get_table_of_contents_per_page_markdown(markdown)
161
  return table_of_contents
 
169
  }
170
  output_type = "string"
171
 
172
+ def __init__(self, database: DataBase, log_queue: queue.Queue | None = None):
173
+ super().__init__(log_queue=log_queue)
174
  self.database: DataBase = database
175
 
176
  def forward(self, url: str) -> str:
177
+ self.push_log(f"๐Ÿ” Getting markdown for url: {url}")
178
  markdown = self.database.get_markdown_of_url(url)
179
  markdown_content: str = convert_ocr_response_to_markdown(markdown)
180
  return markdown_content
 
192
  }
193
  output_type = "string"
194
 
195
+ def __init__(self, database: DataBase, log_queue: queue.Queue | None = None):
196
+ super().__init__(log_queue=log_queue)
197
  self.database: DataBase = database
198
 
199
  def forward(self, url: str, page_numbers: list[int]) -> str:
200
+ self.push_log(f"๐Ÿ” Getting content of pages {page_numbers} for url: {url}")
201
  markdown = self.database.get_markdown_of_url(url)
202
  return get_markdown_by_page_numbers(markdown, page_numbers)
203
 
 
214
  }
215
  output_type = "array"
216
 
217
+ def __init__(self, database: DataBase, log_queue: queue.Queue | None = None):
218
+ super().__init__(log_queue=log_queue)
219
  self.database: DataBase = database
220
 
221
  def forward(self, url: str, search_queries: list[str]) -> list[int]:
222
+ self.push_log(f"๐Ÿ” Finding {search_queries} in url: {url}")
223
  markdown = self.database.get_markdown_of_url(url)
224
  return find_in_markdown(markdown, search_queries)
225
 
226
 
227
  def create_web_search_agent(
228
+ model_id="deepseek/deepseek-reasoner", database: DataBase | None = None, log_queue: queue.Queue | None = None
229
  ):
230
  """Create a web search agent with search, crawling, and PDF analysis capabilities."""
231
 
 
235
 
236
  # Web search and crawling tools
237
  WEB_SEARCH_TOOLS = [
238
+ SearchTool(log_queue=log_queue),
239
+ ArxivSearchTool(log_queue=log_queue),
240
+ PubmedSearchTool(log_queue=log_queue),
241
+ ScientificSearchTool(log_queue=log_queue),
242
+ GetTableOfContentsTool(database=database, log_queue=log_queue),
243
+ GetMarkdownTool(database=database, log_queue=log_queue),
244
+ GetPagesContentTool(database=database, log_queue=log_queue),
245
+ FindInMarkdownTool(database=database, log_queue=log_queue),
246
  ]
247
 
248
  web_search_agent = CodeAgent(
tests/deepsearch/test_main_agent.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from deepengineer.deepsearch.main_agent import main_search
2
+ import queue
3
+
4
+
5
+ def test_main_agent():
6
+ log_queue = queue.Queue()
7
+
8
+ main_search(
9
+ task="""
10
+ Search a paper called "High Energy Physics Opportunities Using Reactor Antineutrinos" on arXiv, download it and extract the table of contents
11
+ """, log_queue=log_queue
12
+ )
tests/deepsearch/test_pdf_agent.py CHANGED
@@ -1,6 +1,6 @@
1
  import pytest
2
  from deepengineer.common_path import DATA_DIR
3
- from deepengineer.deepsearch.analyse_markdown_agent import (
4
  FindInMarkdownTool,
5
  GetMarkdownTool,
6
  GetPagesContentTool,
 
1
  import pytest
2
  from deepengineer.common_path import DATA_DIR
3
+ from deepengineer.deepsearch.main_pdf_agent import (
4
  FindInMarkdownTool,
5
  GetMarkdownTool,
6
  GetPagesContentTool,
tests/deepsearch/test_web_agent.py CHANGED
@@ -16,5 +16,3 @@ def test_run_web_search_agent():
16
  is not None
17
  )
18
 
19
-
20
- test_run_web_search_agent()
 
16
  is not None
17
  )
18