Charles Azam commited on
Commit
84c66cd
·
1 Parent(s): e003639

feat: start writing tools

Browse files
src/deepengineer/webcrawler/async_search.py CHANGED
@@ -93,7 +93,7 @@ def get_linkup_balance():
93
  return balance
94
 
95
 
96
- async def async_linkup_search(
97
  search_query: str,
98
  depth: Literal["standard", "deep"] = "standard",
99
  output_type: Literal['searchResults', 'sourcedAnswer', 'structured'] = "sourcedAnswer",
@@ -138,24 +138,24 @@ async def async_linkup_search(
138
  async def arxiv_search_async(
139
  search_query: str,
140
  ) -> SearchResponse:
141
- response = await async_linkup_search(search_query, include_domains=[ScientificDomains.arxiv])
142
  return response
143
 
144
 
145
  async def pubmed_search_async(
146
  search_query: str,
147
  ) -> SearchResponse:
148
- response = await async_linkup_search(search_query, include_domains=[ScientificDomains.pubmed])
149
  return response
150
 
151
  async def sciencedirect_search_async(
152
  search_query: str,
153
  ) -> SearchResponse:
154
- response = await async_linkup_search(search_query, include_domains=[ScientificDomains.sciencedirect])
155
  return response
156
 
157
  async def scientific_search_async(
158
  search_query: str,
159
  ) -> SearchResponse:
160
- response = await async_linkup_search(search_query, include_domains=[ScientificDomains.wikipedia, ScientificDomains.arxiv, ScientificDomains.pubmed, ScientificDomains.sciencedirect])
161
  return response
 
93
  return balance
94
 
95
 
96
+ async def linkup_search_async(
97
  search_query: str,
98
  depth: Literal["standard", "deep"] = "standard",
99
  output_type: Literal['searchResults', 'sourcedAnswer', 'structured'] = "sourcedAnswer",
 
138
  async def arxiv_search_async(
139
  search_query: str,
140
  ) -> SearchResponse:
141
+ response = await linkup_search_async(search_query, include_domains=[ScientificDomains.arxiv])
142
  return response
143
 
144
 
145
  async def pubmed_search_async(
146
  search_query: str,
147
  ) -> SearchResponse:
148
+ response = await linkup_search_async(search_query, include_domains=[ScientificDomains.pubmed])
149
  return response
150
 
151
  async def sciencedirect_search_async(
152
  search_query: str,
153
  ) -> SearchResponse:
154
+ response = await linkup_search_async(search_query, include_domains=[ScientificDomains.sciencedirect])
155
  return response
156
 
157
  async def scientific_search_async(
158
  search_query: str,
159
  ) -> SearchResponse:
160
+ response = await linkup_search_async(search_query, include_domains=[ScientificDomains.wikipedia, ScientificDomains.arxiv, ScientificDomains.pubmed, ScientificDomains.sciencedirect])
161
  return response
src/deepengineer/webcrawler/tools.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from deepengineer.webcrawler.async_search import linkup_search_async, tavily_search_async, arxiv_search_async, pubmed_search_async, sciencedirect_search_async, scientific_search_async
2
+ from deepengineer.webcrawler.async_crawl import crawl4ai_extract_markdown_of_url_async, arxiv_download_pdf_async, download_pdf_async
3
+
4
+
5
+
6
+
7
+
8
+
9
+
10
+
11
+
12
+
13
+
14
+
tests/webcrawler/test_async_crawl.py CHANGED
@@ -5,7 +5,6 @@ from deepengineer.webcrawler.async_crawl import (
5
  arxiv_download_pdf_async,
6
  )
7
  from mistralai import OCRResponse
8
- from deepengineer.webcrawler.pdf_tools import convert_pdf_to_markdown_async
9
  from deepengineer.webcrawler.testing import URL_WIKIPEDIA, URL_PDF, ARXIV_URL
10
  from deepengineer.common_path import DATA_DIR
11
 
@@ -31,14 +30,3 @@ async def test_arxiv_download_pdf_async():
31
  pdf_path = await arxiv_download_pdf_async(ARXIV_URL, output_path=output_path)
32
  assert pdf_path == output_path
33
  assert output_path.exists()
34
-
35
- @pytest.mark.expensive
36
- @pytest.mark.asyncio
37
- async def test_convert_pdf_to_markdown_async():
38
- pdf_path = DATA_DIR / "report_thermal_neutron.pdf"
39
- assert pdf_path.exists()
40
- markdown, ocr_response = await convert_pdf_to_markdown_async(pdf_path)
41
- assert isinstance(ocr_response, OCRResponse)
42
- assert len(ocr_response.pages) == 16
43
- assert isinstance(markdown, str)
44
- assert "where each cylinder represent" in markdown
 
5
  arxiv_download_pdf_async,
6
  )
7
  from mistralai import OCRResponse
 
8
  from deepengineer.webcrawler.testing import URL_WIKIPEDIA, URL_PDF, ARXIV_URL
9
  from deepengineer.common_path import DATA_DIR
10
 
 
30
  pdf_path = await arxiv_download_pdf_async(ARXIV_URL, output_path=output_path)
31
  assert pdf_path == output_path
32
  assert output_path.exists()
 
 
 
 
 
 
 
 
 
 
 
tests/webcrawler/test_async_search.py CHANGED
@@ -4,7 +4,7 @@ from deepengineer.webcrawler.async_search import (
4
  tavily_search_async,
5
  SearchResponse,
6
  get_tavily_usage,
7
- async_linkup_search,
8
  get_linkup_balance
9
  )
10
 
@@ -47,7 +47,7 @@ async def test_linkup_search_async():
47
  balance_before = get_linkup_balance()
48
  print(balance_before)
49
 
50
- response = await async_linkup_search(
51
  search_query="Would it be possible to make a thermal reactor with graphite and lead?",
52
  )
53
  print(response.answer)
 
4
  tavily_search_async,
5
  SearchResponse,
6
  get_tavily_usage,
7
+ linkup_search_async,
8
  get_linkup_balance
9
  )
10
 
 
47
  balance_before = get_linkup_balance()
48
  print(balance_before)
49
 
50
+ response = await linkup_search_async(
51
  search_query="Would it be possible to make a thermal reactor with graphite and lead?",
52
  )
53
  print(response.answer)
tests/webcrawler/test_pdfs_tools.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from deepengineer.webcrawler.pdf_tools import convert_pdf_to_markdown_async
2
+ from mistralai import OCRResponse
3
+ from deepengineer.common_path import DATA_DIR
4
+ import pytest
5
+
6
+
7
+ @pytest.mark.expensive
8
+ @pytest.mark.asyncio
9
+ async def test_convert_pdf_to_markdown_async():
10
+ pdf_path = DATA_DIR / "report_thermal_neutron.pdf"
11
+ assert pdf_path.exists()
12
+ markdown, ocr_response = await convert_pdf_to_markdown_async(pdf_path)
13
+ assert isinstance(ocr_response, OCRResponse)
14
+ assert len(ocr_response.pages) == 16
15
+ assert isinstance(markdown, str)
16
+ assert "where each cylinder represent" in markdown