Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1d3434f

Browse files
Marc DMgsnedders
Marc DM
authored andcommitted
Fix#63: treewalker fails to handle bytes data from outwith html5lib
We do, ourselves, ensure everything we put in the tree is unicodeunder Python 2; users, however, may not be so careful and assignan attribute using a bare string, for example. We should handlethis gracefully under Python 2 by coercing gently into unicode,failing in the normal way if we are unable to decode the bytes.I (Geoffrey, the committer) am not entirely convinced this is theright way to test this; on the other hand, a thorough testsuite forthe issues previously in the treewalker would be large, especiallyconsidering the large number of treewalkers we support.
1 parent84d8a74 commit1d3434f

File tree

2 files changed

+104
-33
lines changed

2 files changed

+104
-33
lines changed

‎html5lib/tests/test_treewalkers.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,3 +310,54 @@ def test_treewalker():
310310
"document")]
311311
errors=errors.split("\n")
312312
yieldrunTreewalkerTest,innerHTML,input,expected,errors,treeCls
313+
314+
315+
defset_attribute_on_first_child(docfrag,name,value,treeName):
316+
"""naively sets an attribute on the first child of the document
317+
fragment passed in"""
318+
setter= {'ElementTree':lambdad:d[0].set,
319+
'DOM':lambdad:d.firstChild.setAttribute}
320+
setter['cElementTree']=setter['ElementTree']
321+
try:
322+
setter.get(treeName,setter['DOM'])(docfrag)(name,value)
323+
exceptAttributeError:
324+
setter['ElementTree'](docfrag)(name,value)
325+
326+
327+
defrunTreewalkerEditTest(intext,expected,attrs_to_add,tree):
328+
"""tests what happens when we add attributes to the intext"""
329+
treeName,treeClass=tree
330+
parser=html5parser.HTMLParser(tree=treeClass["builder"])
331+
document=parser.parseFragment(intext)
332+
fornom,valinattrs_to_add:
333+
set_attribute_on_first_child(document,nom,val,treeName)
334+
335+
document=treeClass.get("adapter",lambdax:x)(document)
336+
output=convertTokens(treeClass["walker"](document))
337+
output=attrlist.sub(sortattrs,output)
338+
ifnotoutputinexpected:
339+
raiseAssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s"% (treeName,expected,output))
340+
341+
342+
deftest_treewalker_six_mix():
343+
"""Str/Unicode mix. If str attrs added to tree"""
344+
345+
# On Python 2.x string literals are of type str. Unless, like this
346+
# file, the programmer imports unicode_literals from __future__.
347+
# In that case, string literals become objects of type unicode.
348+
349+
# This test simulates a Py2 user, modifying attributes on a document
350+
# fragment but not using the u'' syntax nor importing unicode_literals
351+
sm_tests= [
352+
('<a href="http://example.com">Example</a>',
353+
[(str('class'),str('test123'))],
354+
'<a>\n class="test123"\n href="http://example.com"\n "Example"'),
355+
356+
('<link href="http://example.com/cow">',
357+
[(str('rel'),str('alternate'))],
358+
'<link>\n href="http://example.com/cow"\n rel="alternate"\n "Example"')
359+
]
360+
361+
fortreeintreeTypes.items():
362+
forintext,attrs,expectedinsm_tests:
363+
yieldrunTreewalkerEditTest,intext,expected,attrs,tree

‎html5lib/treewalkers/_base.py

Lines changed: 53 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from __future__importabsolute_import,division,unicode_literals
2-
fromsiximporttext_type
2+
fromsiximporttext_type,string_types
33

44
importgettext
55
_=gettext.gettext
@@ -8,6 +8,24 @@
88
spaceCharacters="".join(spaceCharacters)
99

1010

11+
defto_text(s,blank_if_none=True):
12+
"""Wrapper around six.text_type to convert None to empty string"""
13+
ifsisNone:
14+
ifblank_if_none:
15+
return""
16+
else:
17+
returnNone
18+
elifisinstance(s,text_type):
19+
returns
20+
else:
21+
returntext_type(s)
22+
23+
24+
defis_text_or_none(string):
25+
"""Wrapper around isinstance(string_types) or is None"""
26+
returnstringisNoneorisinstance(string,string_types)
27+
28+
1129
classTreeWalker(object):
1230
def__init__(self,tree):
1331
self.tree=tree
@@ -19,45 +37,47 @@ def error(self, msg):
1937
return {"type":"SerializeError","data":msg}
2038

2139
defemptyTag(self,namespace,name,attrs,hasChildren=False):
22-
assertnamespaceisNoneorisinstance(namespace,text_type),type(namespace)
23-
assertisinstance(name,text_type),type(name)
24-
assertall((namespaceisNoneorisinstance(namespace,text_type))and
25-
isinstance(name,text_type)and
26-
isinstance(value,text_type)
40+
assertnamespaceisNoneorisinstance(namespace,string_types),type(namespace)
41+
assertisinstance(name,string_types),type(name)
42+
assertall((namespaceisNoneorisinstance(namespace,string_types))and
43+
isinstance(name,string_types)and
44+
isinstance(value,string_types)
2745
for (namespace,name),valueinattrs.items())
2846

29-
yield {"type":"EmptyTag","name":name,
30-
"namespace":namespace,
47+
yield {"type":"EmptyTag","name":to_text(name,False),
48+
"namespace":to_text(namespace),
3149
"data":attrs}
3250
ifhasChildren:
3351
yieldself.error(_("Void element has children"))
3452

3553
defstartTag(self,namespace,name,attrs):
36-
assertnamespaceisNoneorisinstance(namespace,text_type),type(namespace)
37-
assertisinstance(name,text_type),type(name)
38-
assertall((namespaceisNoneorisinstance(namespace,text_type))and
39-
isinstance(name,text_type)and
40-
isinstance(value,text_type)
54+
assertnamespaceisNoneorisinstance(namespace,string_types),type(namespace)
55+
assertisinstance(name,string_types),type(name)
56+
assertall((namespaceisNoneorisinstance(namespace,string_types))and
57+
isinstance(name,string_types)and
58+
isinstance(value,string_types)
4159
for (namespace,name),valueinattrs.items())
4260

4361
return {"type":"StartTag",
44-
"name":name,
45-
"namespace":namespace,
46-
"data":attrs}
62+
"name":text_type(name),
63+
"namespace":to_text(namespace),
64+
"data":dict(((to_text(namespace,False),to_text(name)),
65+
to_text(value,False))
66+
for (namespace,name),valueinattrs.items())}
4767

4868
defendTag(self,namespace,name):
49-
assertnamespaceisNoneorisinstance(namespace,text_type),type(namespace)
50-
assertisinstance(name,text_type),type(namespace)
69+
assertnamespaceisNoneorisinstance(namespace,string_types),type(namespace)
70+
assertisinstance(name,string_types),type(namespace)
5171

5272
return {"type":"EndTag",
53-
"name":name,
54-
"namespace":namespace,
73+
"name":to_text(name,False),
74+
"namespace":to_text(namespace),
5575
"data": {}}
5676

5777
deftext(self,data):
58-
assertisinstance(data,text_type),type(data)
78+
assertisinstance(data,string_types),type(data)
5979

60-
data=data
80+
data=to_text(data)
6181
middle=data.lstrip(spaceCharacters)
6282
left=data[:len(data)-len(middle)]
6383
ifleft:
@@ -71,25 +91,25 @@ def text(self, data):
7191
yield {"type":"SpaceCharacters","data":right}
7292

7393
defcomment(self,data):
74-
assertisinstance(data,text_type),type(data)
94+
assertisinstance(data,string_types),type(data)
7595

76-
return {"type":"Comment","data":data}
96+
return {"type":"Comment","data":text_type(data)}
7797

7898
defdoctype(self,name,publicId=None,systemId=None,correct=True):
79-
assertnameisNoneorisinstance(name,text_type),type(name)
80-
assertpublicIdisNoneorisinstance(publicId,text_type),type(publicId)
81-
assertsystemIdisNoneorisinstance(systemId,text_type),type(systemId)
99+
assertis_text_or_none(name),type(name)
100+
assertis_text_or_none(publicId),type(publicId)
101+
assertis_text_or_none(systemId),type(systemId)
82102

83103
return {"type":"Doctype",
84-
"name":nameifnameisnotNoneelse"",
85-
"publicId":publicId,
86-
"systemId":systemId,
87-
"correct":correct}
104+
"name":to_text(name),
105+
"publicId":to_text(publicId),
106+
"systemId":to_text(systemId),
107+
"correct":to_text(correct)}
88108

89109
defentity(self,name):
90-
assertisinstance(name,text_type),type(name)
110+
assertisinstance(name,string_types),type(name)
91111

92-
return {"type":"Entity","name":name}
112+
return {"type":"Entity","name":text_type(name)}
93113

94114
defunknown(self,nodeType):
95115
returnself.error(_("Unknown node type: ")+nodeType)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp