Spaces:
Sleeping
Sleeping
Charles Azam
commited on
Commit
·
84c66cd
1
Parent(s):
e003639
feat: start writing tools
Browse files
src/deepengineer/webcrawler/async_search.py
CHANGED
@@ -93,7 +93,7 @@ def get_linkup_balance():
|
|
93 |
return balance
|
94 |
|
95 |
|
96 |
-
async def
|
97 |
search_query: str,
|
98 |
depth: Literal["standard", "deep"] = "standard",
|
99 |
output_type: Literal['searchResults', 'sourcedAnswer', 'structured'] = "sourcedAnswer",
|
@@ -138,24 +138,24 @@ async def async_linkup_search(
|
|
138 |
async def arxiv_search_async(
|
139 |
search_query: str,
|
140 |
) -> SearchResponse:
|
141 |
-
response = await
|
142 |
return response
|
143 |
|
144 |
|
145 |
async def pubmed_search_async(
|
146 |
search_query: str,
|
147 |
) -> SearchResponse:
|
148 |
-
response = await
|
149 |
return response
|
150 |
|
151 |
async def sciencedirect_search_async(
|
152 |
search_query: str,
|
153 |
) -> SearchResponse:
|
154 |
-
response = await
|
155 |
return response
|
156 |
|
157 |
async def scientific_search_async(
|
158 |
search_query: str,
|
159 |
) -> SearchResponse:
|
160 |
-
response = await
|
161 |
return response
|
|
|
93 |
return balance
|
94 |
|
95 |
|
96 |
+
async def linkup_search_async(
|
97 |
search_query: str,
|
98 |
depth: Literal["standard", "deep"] = "standard",
|
99 |
output_type: Literal['searchResults', 'sourcedAnswer', 'structured'] = "sourcedAnswer",
|
|
|
138 |
async def arxiv_search_async(
|
139 |
search_query: str,
|
140 |
) -> SearchResponse:
|
141 |
+
response = await linkup_search_async(search_query, include_domains=[ScientificDomains.arxiv])
|
142 |
return response
|
143 |
|
144 |
|
145 |
async def pubmed_search_async(
|
146 |
search_query: str,
|
147 |
) -> SearchResponse:
|
148 |
+
response = await linkup_search_async(search_query, include_domains=[ScientificDomains.pubmed])
|
149 |
return response
|
150 |
|
151 |
async def sciencedirect_search_async(
|
152 |
search_query: str,
|
153 |
) -> SearchResponse:
|
154 |
+
response = await linkup_search_async(search_query, include_domains=[ScientificDomains.sciencedirect])
|
155 |
return response
|
156 |
|
157 |
async def scientific_search_async(
|
158 |
search_query: str,
|
159 |
) -> SearchResponse:
|
160 |
+
response = await linkup_search_async(search_query, include_domains=[ScientificDomains.wikipedia, ScientificDomains.arxiv, ScientificDomains.pubmed, ScientificDomains.sciencedirect])
|
161 |
return response
|
src/deepengineer/webcrawler/tools.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from deepengineer.webcrawler.async_search import linkup_search_async, tavily_search_async, arxiv_search_async, pubmed_search_async, sciencedirect_search_async, scientific_search_async
|
2 |
+
from deepengineer.webcrawler.async_crawl import crawl4ai_extract_markdown_of_url_async, arxiv_download_pdf_async, download_pdf_async
|
3 |
+
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
|
tests/webcrawler/test_async_crawl.py
CHANGED
@@ -5,7 +5,6 @@ from deepengineer.webcrawler.async_crawl import (
|
|
5 |
arxiv_download_pdf_async,
|
6 |
)
|
7 |
from mistralai import OCRResponse
|
8 |
-
from deepengineer.webcrawler.pdf_tools import convert_pdf_to_markdown_async
|
9 |
from deepengineer.webcrawler.testing import URL_WIKIPEDIA, URL_PDF, ARXIV_URL
|
10 |
from deepengineer.common_path import DATA_DIR
|
11 |
|
@@ -31,14 +30,3 @@ async def test_arxiv_download_pdf_async():
|
|
31 |
pdf_path = await arxiv_download_pdf_async(ARXIV_URL, output_path=output_path)
|
32 |
assert pdf_path == output_path
|
33 |
assert output_path.exists()
|
34 |
-
|
35 |
-
@pytest.mark.expensive
|
36 |
-
@pytest.mark.asyncio
|
37 |
-
async def test_convert_pdf_to_markdown_async():
|
38 |
-
pdf_path = DATA_DIR / "report_thermal_neutron.pdf"
|
39 |
-
assert pdf_path.exists()
|
40 |
-
markdown, ocr_response = await convert_pdf_to_markdown_async(pdf_path)
|
41 |
-
assert isinstance(ocr_response, OCRResponse)
|
42 |
-
assert len(ocr_response.pages) == 16
|
43 |
-
assert isinstance(markdown, str)
|
44 |
-
assert "where each cylinder represent" in markdown
|
|
|
5 |
arxiv_download_pdf_async,
|
6 |
)
|
7 |
from mistralai import OCRResponse
|
|
|
8 |
from deepengineer.webcrawler.testing import URL_WIKIPEDIA, URL_PDF, ARXIV_URL
|
9 |
from deepengineer.common_path import DATA_DIR
|
10 |
|
|
|
30 |
pdf_path = await arxiv_download_pdf_async(ARXIV_URL, output_path=output_path)
|
31 |
assert pdf_path == output_path
|
32 |
assert output_path.exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/webcrawler/test_async_search.py
CHANGED
@@ -4,7 +4,7 @@ from deepengineer.webcrawler.async_search import (
|
|
4 |
tavily_search_async,
|
5 |
SearchResponse,
|
6 |
get_tavily_usage,
|
7 |
-
|
8 |
get_linkup_balance
|
9 |
)
|
10 |
|
@@ -47,7 +47,7 @@ async def test_linkup_search_async():
|
|
47 |
balance_before = get_linkup_balance()
|
48 |
print(balance_before)
|
49 |
|
50 |
-
response = await
|
51 |
search_query="Would it be possible to make a thermal reactor with graphite and lead?",
|
52 |
)
|
53 |
print(response.answer)
|
|
|
4 |
tavily_search_async,
|
5 |
SearchResponse,
|
6 |
get_tavily_usage,
|
7 |
+
linkup_search_async,
|
8 |
get_linkup_balance
|
9 |
)
|
10 |
|
|
|
47 |
balance_before = get_linkup_balance()
|
48 |
print(balance_before)
|
49 |
|
50 |
+
response = await linkup_search_async(
|
51 |
search_query="Would it be possible to make a thermal reactor with graphite and lead?",
|
52 |
)
|
53 |
print(response.answer)
|
tests/webcrawler/test_pdfs_tools.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from deepengineer.webcrawler.pdf_tools import convert_pdf_to_markdown_async
|
2 |
+
from mistralai import OCRResponse
|
3 |
+
from deepengineer.common_path import DATA_DIR
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
|
7 |
+
@pytest.mark.expensive
|
8 |
+
@pytest.mark.asyncio
|
9 |
+
async def test_convert_pdf_to_markdown_async():
|
10 |
+
pdf_path = DATA_DIR / "report_thermal_neutron.pdf"
|
11 |
+
assert pdf_path.exists()
|
12 |
+
markdown, ocr_response = await convert_pdf_to_markdown_async(pdf_path)
|
13 |
+
assert isinstance(ocr_response, OCRResponse)
|
14 |
+
assert len(ocr_response.pages) == 16
|
15 |
+
assert isinstance(markdown, str)
|
16 |
+
assert "where each cylinder represent" in markdown
|