Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitdeb205a

Browse files
committed
Get the lxml treewalker working under the joint codebase under Py2.
This hard-codes the fact that lxml uses UTF-8 (byte) strings under Py2, andadds asserts to the generic treewalker to ensure we have Unicode strings.
1 parent82377ec commitdeb205a

File tree

2 files changed

+56
-26
lines changed

2 files changed

+56
-26
lines changed

‎html5lib/treewalkers/_base.py‎

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,37 +17,45 @@ def __iter__(self):
1717
deferror(self,msg):
1818
return {"type":"SerializeError","data":msg}
1919

20-
defnormalizeAttrs(self,attrs):
21-
newattrs= {}
22-
ifattrs:
23-
#TODO: treewalkers should always have attrs
24-
for (namespace,name),valueinattrs.items():
25-
assertnamespaceisNoneorisinstance(namespace,text_type),type(namespace)
26-
assertisinstance(name,text_type)
27-
assertisinstance(value,text_type)
28-
newattrs[(namespace,name)]=value
29-
returnnewattrs
30-
3120
defemptyTag(self,namespace,name,attrs,hasChildren=False):
21+
assertnamespaceisNoneorisinstance(namespace,text_type),type(namespace)
22+
assertisinstance(name,text_type),type(name)
23+
assertall((namespaceisNoneorisinstance(namespace,text_type))and
24+
isinstance(name,text_type)and
25+
isinstance(value,text_type)
26+
for (namespace,name),valueinattrs.items())
27+
3228
yield {"type":"EmptyTag","name":name,
3329
"namespace":namespace,
34-
"data":self.normalizeAttrs(attrs)}
30+
"data":attrs}
3531
ifhasChildren:
3632
yieldself.error(_("Void element has children"))
3733

3834
defstartTag(self,namespace,name,attrs):
35+
assertnamespaceisNoneorisinstance(namespace,text_type),type(namespace)
36+
assertisinstance(name,text_type),type(name)
37+
assertall((namespaceisNoneorisinstance(namespace,text_type))and
38+
isinstance(name,text_type)and
39+
isinstance(value,text_type)
40+
for (namespace,name),valueinattrs.items())
41+
3942
return {"type":"StartTag",
4043
"name":name,
4144
"namespace":namespace,
42-
"data":self.normalizeAttrs(attrs)}
45+
"data":attrs}
4346

4447
defendTag(self,namespace,name):
48+
assertnamespaceisNoneorisinstance(namespace,text_type),type(namespace)
49+
assertisinstance(name,text_type),type(namespace)
50+
4551
return {"type":"EndTag",
4652
"name":name,
4753
"namespace":namespace,
4854
"data": {}}
4955

5056
deftext(self,data):
57+
assertisinstance(data,text_type),type(data)
58+
5159
data=data
5260
middle=data.lstrip(spaceCharacters)
5361
left=data[:len(data)-len(middle)]
@@ -62,16 +70,24 @@ def text(self, data):
6270
yield {"type":"SpaceCharacters","data":right}
6371

6472
defcomment(self,data):
73+
assertisinstance(data,text_type),type(data)
74+
6575
return {"type":"Comment","data":data}
6676

6777
defdoctype(self,name,publicId=None,systemId=None,correct=True):
78+
assertnameisNoneorisinstance(name,text_type),type(name)
79+
assertpublicIdisNoneorisinstance(publicId,text_type),type(publicId)
80+
assertsystemIdisNoneorisinstance(systemId,text_type),type(systemId)
81+
6882
return {"type":"Doctype",
69-
"name":nameisnotNoneandnameor"",
83+
"name":nameifnameisnotNoneelse"",
7084
"publicId":publicId,
7185
"systemId":systemId,
7286
"correct":correct}
7387

7488
defentity(self,name):
89+
assertisinstance(name,text_type),type(name)
90+
7591
return {"type":"Entity","name":name}
7692

7793
defunknown(self,nodeType):

‎html5lib/treewalkers/lxmletree.py‎

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from __future__importabsolute_import,division,unicode_literals
2+
fromsiximporttext_type
23

34
fromlxmlimportetree
45
fromhtml5lib.treebuilders.etreeimporttag_regexp
@@ -12,14 +13,23 @@
1213
fromhtml5lib.constantsimportvoidElements
1314
fromhtml5libimportihatexml
1415

16+
defensure_str(s):
17+
ifsisNone:
18+
returnNone
19+
elifisinstance(s,text_type):
20+
returns
21+
else:
22+
returns.decode("utf-8","strict")
23+
1524
classRoot(object):
1625
def__init__(self,et):
1726
self.elementtree=et
1827
self.children= []
1928
ifet.docinfo.internalDTD:
20-
self.children.append(Doctype(self,et.docinfo.root_name,
21-
et.docinfo.public_id,
22-
et.docinfo.system_url))
29+
self.children.append(Doctype(self,
30+
ensure_str(et.docinfo.root_name),
31+
ensure_str(et.docinfo.public_id),
32+
ensure_str(et.docinfo.system_url)))
2333
root=et.getroot()
2434
node=root
2535

@@ -67,15 +77,17 @@ def __init__(self, fragment_root, obj):
6777
self.root_node=fragment_root
6878
self.obj=obj
6979
ifhasattr(self.obj,'text'):
70-
self.text=self.obj.text
80+
self.text=ensure_str(self.obj.text)
7181
else:
7282
self.text=None
7383
ifhasattr(self.obj,'tail'):
74-
self.tail=self.obj.tail
84+
self.tail=ensure_str(self.obj.tail)
7585
else:
7686
self.tail=None
7787
self.isstring=isinstance(obj,str)orisinstance(obj,bytes)
78-
assertnotself.isstringorisinstance(obj,str)orsys.version_info.major==2
88+
# Support for bytes here is Py2
89+
ifself.isstring:
90+
self.obj=ensure_str(self.obj)
7991

8092
def__getattr__(self,name):
8193
returngetattr(self.obj,name)
@@ -120,7 +132,7 @@ def getNodeDetails(self, node):
120132
ifisinstance(node,tuple):# Text node
121133
node,key=node
122134
assertkeyin ("text","tail"),_("Text nodes are text or tail, found %s")%key
123-
return_base.TEXT,getattr(node,key)
135+
return_base.TEXT,ensure_str(getattr(node,key))
124136

125137
elifisinstance(node,Root):
126138
return (_base.DOCUMENT,)
@@ -129,24 +141,26 @@ def getNodeDetails(self, node):
129141
return_base.DOCTYPE,node.name,node.public_id,node.system_id
130142

131143
elifisinstance(node,FragmentWrapper)andnode.isstring:
132-
return_base.TEXT,node
144+
return_base.TEXT,node.obj
133145

134146
elifnode.tag==etree.Comment:
135-
return_base.COMMENT,node.text
147+
return_base.COMMENT,ensure_str(node.text)
136148

137149
elifnode.tag==etree.Entity:
138-
return_base.ENTITY,node.text[1:-1]# strip &;
150+
return_base.ENTITY,ensure_str(node.text)[1:-1]# strip &;
139151

140152
else:
141153
#This is assumed to be an ordinary element
142-
match=tag_regexp.match(node.tag)
154+
match=tag_regexp.match(ensure_str(node.tag))
143155
ifmatch:
144156
namespace,tag=match.groups()
145157
else:
146158
namespace=None
147-
tag=node.tag
159+
tag=ensure_str(node.tag)
148160
attrs= {}
149161
forname,valueinlist(node.attrib.items()):
162+
name=ensure_str(name)
163+
value=ensure_str(value)
150164
match=tag_regexp.match(name)
151165
ifmatch:
152166
attrs[(match.group(1),match.group(2))]=value

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp