11from __future__import absolute_import ,division ,unicode_literals
22
3+ import pytest
4+
35from html5lib import constants ,parseFragment ,serialize
46from html5lib .filters import sanitizer
57
68
7- def runSanitizerTest (_ ,expected ,input ):
8- parsed = parseFragment (expected )
9- expected = serialize (parsed ,
10- omit_optional_tags = False ,
11- use_trailing_solidus = True ,
12- space_before_trailing_solidus = False ,
13- quote_attr_values = "always" ,
14- quote_char = '"' ,
15- alphabetical_attributes = True )
16- assert expected == sanitize_html (input )
17-
18-
199def sanitize_html (stream ):
2010parsed = parseFragment (stream )
2111serialized = serialize (parsed ,
@@ -59,27 +49,27 @@ def test_data_uri_disallowed_type():
5949assert expected == sanitized
6050
6151
62- def test_sanitizer ():
52+ def param_sanitizer ():
6353for ns ,tag_name in sanitizer .allowed_elements :
6454if ns != constants .namespaces ["html" ]:
6555continue
6656if tag_name in ['caption' ,'col' ,'colgroup' ,'optgroup' ,'option' ,'table' ,'tbody' ,'td' ,
6757'tfoot' ,'th' ,'thead' ,'tr' ,'select' ]:
6858continue # TODO
6959if tag_name == 'image' :
70- yield (runSanitizerTest , "test_should_allow_%s_tag" % tag_name ,
60+ yield ("test_should_allow_%s_tag" % tag_name ,
7161"<img title=\" 1\" />foo <bad>bar</bad> baz" ,
7262"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name ,tag_name ))
7363elif tag_name == 'br' :
74- yield (runSanitizerTest , "test_should_allow_%s_tag" % tag_name ,
64+ yield ("test_should_allow_%s_tag" % tag_name ,
7565"<br title=\" 1\" />foo <bad>bar</bad> baz<br/>" ,
7666"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name ,tag_name ))
7767elif tag_name in constants .voidElements :
78- yield (runSanitizerTest , "test_should_allow_%s_tag" % tag_name ,
68+ yield ("test_should_allow_%s_tag" % tag_name ,
7969"<%s title=\" 1\" />foo <bad>bar</bad> baz" % tag_name ,
8070"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name ,tag_name ))
8171else :
82- yield (runSanitizerTest , "test_should_allow_%s_tag" % tag_name ,
72+ yield ("test_should_allow_%s_tag" % tag_name ,
8373"<%s title=\" 1\" >foo <bad>bar</bad> baz</%s>" % (tag_name ,tag_name ),
8474"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name ,tag_name ))
8575
@@ -93,15 +83,15 @@ def test_sanitizer():
9383attribute_value = 'foo'
9484if attribute_name in sanitizer .attr_val_is_uri :
9585attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer .allowed_protocols [0 ]
96- yield (runSanitizerTest , "test_should_allow_%s_attribute" % attribute_name ,
86+ yield ("test_should_allow_%s_attribute" % attribute_name ,
9787"<p %s=\" %s\" >foo <bad>bar</bad> baz</p>" % (attribute_name ,attribute_value ),
9888"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name ,attribute_value ))
9989
10090for protocol in sanitizer .allowed_protocols :
10191rest_of_uri = '//sub.domain.tld/path/object.ext'
10292if protocol == 'data' :
10393rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
104- yield (runSanitizerTest , "test_should_allow_uppercase_%s_uris" % protocol ,
94+ yield ("test_should_allow_uppercase_%s_uris" % protocol ,
10595"<img src=\" %s:%s\" >foo</a>" % (protocol ,rest_of_uri ),
10696"""<img src="%s:%s">foo</a>""" % (protocol ,rest_of_uri ))
10797
@@ -110,11 +100,26 @@ def test_sanitizer():
110100if protocol == 'data' :
111101rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
112102protocol = protocol .upper ()
113- yield (runSanitizerTest , "test_should_allow_uppercase_%s_uris" % protocol ,
103+ yield ("test_should_allow_uppercase_%s_uris" % protocol ,
114104"<img src=\" %s:%s\" >foo</a>" % (protocol ,rest_of_uri ),
115105"""<img src="%s:%s">foo</a>""" % (protocol ,rest_of_uri ))
116106
117107
108+ @pytest .mark .parametrize ("expected, input" ,
109+ (pytest .param (expected ,input ,id = id )
110+ for id ,expected ,input in param_sanitizer ()))
111+ def test_sanitizer (expected ,input ):
112+ parsed = parseFragment (expected )
113+ expected = serialize (parsed ,
114+ omit_optional_tags = False ,
115+ use_trailing_solidus = True ,
116+ space_before_trailing_solidus = False ,
117+ quote_attr_values = "always" ,
118+ quote_char = '"' ,
119+ alphabetical_attributes = True )
120+ assert expected == sanitize_html (input )
121+
122+
118123def test_lowercase_color_codes_in_style ():
119124sanitized = sanitize_html ("<p style=\" border: 1px solid #a2a2a2;\" ></p>" )
120125expected = '<p style=\" border: 1px solid #a2a2a2;\" ></p>'