|
import os |
|
import sys |
|
import unittest |
|
|
|
from bs4 import BeautifulSoup |
|
|
|
sys.path.append(os.path.abspath("../scripts")) |
|
|
|
from browse import extract_hyperlinks |
|
|
|
|
|
class TestBrowseLinks(unittest.TestCase): |
|
def test_extract_hyperlinks(self): |
|
body = """ |
|
<body> |
|
<a href="https://google.com">Google</a> |
|
<a href="foo.html">Foo</a> |
|
<div>Some other crap</div> |
|
</body> |
|
""" |
|
soup = BeautifulSoup(body, "html.parser") |
|
links = extract_hyperlinks(soup, "http://example.com") |
|
self.assertEqual( |
|
links, |
|
[("Google", "https://google.com"), ("Foo", "http://example.com/foo.html")], |
|
) |
|
|