File size: 1,635 Bytes
e040f4f
4fbfc2b
e040f4f
4fbfc2b
e040f4f
 
 
 
 
b5fafa1
 
 
 
e040f4f
b5fafa1
4fbfc2b
e040f4f
 
 
 
 
b5fafa1
 
 
 
e040f4f
b5fafa1
 
4fbfc2b
ce79b68
 
 
b5fafa1
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from deepengineer.webcrawler.crawl_database import DataBase
import pytest

@pytest.mark.expensive
def test_crawl_database_arxiv_pdf():
    db = DataBase()
    db.crawl_url("https://arxiv.org/pdf/2105.00643")
    assert db.get_markdown_of_url("https://arxiv.org/pdf/2105.00643") is not None
    assert db.get_markdown_of_url("https://arxiv.org/abs/2105.00643") is not None
    assert (
        db.get_markdown_of_url("https://arxiv.org/pdf/2105.00643").pages[0].markdown
        is not None
    )
    assert len(db.get_markdown_of_url("https://arxiv.org/pdf/2105.00643").pages) == 20

@pytest.mark.expensive
def test_crawl_database_arxiv_link():
    db = DataBase()
    db.crawl_url("https://arxiv.org/abs/2105.00643")
    assert db.get_markdown_of_url("https://arxiv.org/abs/2105.00643") is not None
    assert db.get_markdown_of_url("https://arxiv.org/pdf/2105.00643") is not None
    assert (
        db.get_markdown_of_url("https://arxiv.org/abs/2105.00643").pages[0].markdown
        is not None
    )
    assert len(db.get_markdown_of_url("https://arxiv.org/abs/2105.00643").pages) == 20


@pytest.mark.expensive
def test_crawl_database_wikipedia_url():
    db = DataBase()
    db.crawl_url("https://en.wikipedia.org/wiki/Deep_learning")
    assert (
        db.get_markdown_of_url("https://en.wikipedia.org/wiki/Deep_learning")
        is not None
    )
    assert (
        db.get_markdown_of_url("https://en.wikipedia.org/wiki/Deep_learning")
        .pages[0]
        .markdown
        is not None
    )
    assert (
        len(db.get_markdown_of_url("https://en.wikipedia.org/wiki/Deep_learning").pages)
        >= 40
    )