File size: 5,114 Bytes
8a58cf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""Tests of the builder registry."""

import pytest
import warnings

from bs4 import BeautifulSoup
from bs4.builder import (
    builder_registry as registry,
    HTMLParserTreeBuilder,
    TreeBuilderRegistry,
)

from . import (
    HTML5LIB_PRESENT,
    LXML_PRESENT,
)

if HTML5LIB_PRESENT:
    from bs4.builder import HTML5TreeBuilder

if LXML_PRESENT:
    from bs4.builder import (
        LXMLTreeBuilderForXML,
        LXMLTreeBuilder,
        )


# TODO: Split out the lxml and html5lib tests into their own classes
# and gate with pytest.mark.skipIf.
class TestBuiltInRegistry(object):
    """Test the built-in registry with the default builders registered."""

    def test_combination(self):
        assert registry.lookup('strict', 'html') == HTMLParserTreeBuilder
        if LXML_PRESENT:
            assert registry.lookup('fast', 'html') == LXMLTreeBuilder
            assert registry.lookup('permissive', 'xml') == LXMLTreeBuilderForXML
        if HTML5LIB_PRESENT:
            assert registry.lookup('html5lib', 'html') == HTML5TreeBuilder

    def test_lookup_by_markup_type(self):
        if LXML_PRESENT:
            assert registry.lookup('html') == LXMLTreeBuilder
            assert registry.lookup('xml') == LXMLTreeBuilderForXML
        else:
            assert registry.lookup('xml') == None
            if HTML5LIB_PRESENT:
                assert registry.lookup('html') == HTML5TreeBuilder
            else:
                assert registry.lookup('html') == HTMLParserTreeBuilder

    def test_named_library(self):
        if LXML_PRESENT:
            assert registry.lookup('lxml', 'xml') == LXMLTreeBuilderForXML
            assert registry.lookup('lxml', 'html') == LXMLTreeBuilder
        if HTML5LIB_PRESENT:
            assert registry.lookup('html5lib') == HTML5TreeBuilder

        assert registry.lookup('html.parser') == HTMLParserTreeBuilder

    def test_beautifulsoup_constructor_does_lookup(self):

        with warnings.catch_warnings(record=True) as w:
            # This will create a warning about not explicitly
            # specifying a parser, but we'll ignore it.

            # You can pass in a string.
            BeautifulSoup("", features="html")
            # Or a list of strings.
            BeautifulSoup("", features=["html", "fast"])
            pass
            
        # You'll get an exception if BS can't find an appropriate
        # builder.
        with pytest.raises(ValueError):
            BeautifulSoup("", features="no-such-feature")

class TestRegistry(object):
    """Test the TreeBuilderRegistry class in general."""

    def setup_method(self):
        self.registry = TreeBuilderRegistry()

    def builder_for_features(self, *feature_list):
        cls = type('Builder_' + '_'.join(feature_list),
                   (object,), {'features' : feature_list})

        self.registry.register(cls)
        return cls

    def test_register_with_no_features(self):
        builder = self.builder_for_features()

        # Since the builder advertises no features, you can't find it
        # by looking up features.
        assert self.registry.lookup('foo') is None

        # But you can find it by doing a lookup with no features, if
        # this happens to be the only registered builder.
        assert self.registry.lookup() == builder

    def test_register_with_features_makes_lookup_succeed(self):
        builder = self.builder_for_features('foo', 'bar')
        assert self.registry.lookup('foo') is builder
        assert self.registry.lookup('bar') is builder

    def test_lookup_fails_when_no_builder_implements_feature(self):
        builder = self.builder_for_features('foo', 'bar')
        assert self.registry.lookup('baz') is None

    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
        builder1 = self.builder_for_features('foo')
        builder2 = self.builder_for_features('bar')
        assert self.registry.lookup() == builder2

    def test_lookup_fails_when_no_tree_builders_registered(self):
        assert self.registry.lookup() is None

    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
        has_one = self.builder_for_features('foo')
        has_the_other = self.builder_for_features('bar')
        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
        lacks_one = self.builder_for_features('bar')
        has_the_other = self.builder_for_features('foo')

        # There are two builders featuring 'foo' and 'bar', but
        # the one that also features 'quux' was registered later.
        assert self.registry.lookup('foo', 'bar') == has_both_late

        # There is only one builder featuring 'foo', 'bar', and 'baz'.
        assert self.registry.lookup('foo', 'bar', 'baz') == has_both_early

    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
        builder1 = self.builder_for_features('foo', 'bar')
        builder2 = self.builder_for_features('foo', 'baz')
        assert self.registry.lookup('bar', 'baz') is None