Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit114ab64

Browse files
committed
Make the Python html5lib tree-walker API use a dict with (namespace, name): value key: value pairs for attributes.
I promise I won't update this part of the API again — this is now something I'm happy with, so I won't do what I've just done and change it twice in six^W seven months again. The only possible slight update from this is to move to using an ordered dict for trees that can preserve attribute order, but this should have no API breakage.
1 parentbc4ceca commit114ab64

File tree

10 files changed

+64
-75
lines changed

10 files changed

+64
-75
lines changed

‎html5lib/filters/inject_meta_charset.py‎

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,26 @@ def __iter__(self):
2020
iftoken["name"].lower()=="meta":
2121
# replace charset with actual encoding
2222
has_http_equiv_content_type=False
23-
content_index=-1
24-
fori,attrinenumerate(token["data"]):
25-
namespace=attr["namespace"]
26-
name=attr["name"]
27-
value=attr["value"]
23+
for (namespace,name),valueintoken["data"].iteritems():
2824
ifnamespace!=None:
2925
continue
3026
elifname.lower()=='charset':
31-
token["data"][i]["value"]=self.encoding
27+
token["data"][(namespace,name)]=self.encoding
3228
meta_found=True
3329
break
3430
elifname=='http-equiv'andvalue.lower()=='content-type':
3531
has_http_equiv_content_type=True
36-
elifname=='content':
37-
content_index=i
3832
else:
39-
ifhas_http_equiv_content_typeandcontent_index>=0:
40-
token["data"][content_index]["value"]=u'text/html; charset=%s'%self.encoding
33+
ifhas_http_equiv_content_typeand(None,"content")intoken["data"]:
34+
token["data"][(None,"content")]=u'text/html; charset=%s'%self.encoding
4135
meta_found=True
4236

4337
eliftoken["name"].lower()=="head"andnotmeta_found:
4438
# insert meta into empty head
4539
yield {"type":"StartTag","name":"head",
4640
"data":token["data"]}
4741
yield {"type":"EmptyTag","name":"meta",
48-
"data":[{"namespace":None,"name":"charset","value":self.encoding}]}
42+
"data":{(None,"charset"):self.encoding}}
4943
yield {"type":"EndTag","name":"head"}
5044
meta_found=True
5145
continue
@@ -56,7 +50,7 @@ def __iter__(self):
5650
yieldpending.pop(0)
5751
ifnotmeta_found:
5852
yield {"type":"EmptyTag","name":"meta",
59-
"data":[{"namespace":None,"name":"charset","value":self.encoding}]}
53+
"data":{(None,"charset"):self.encoding}}
6054
whilepending:
6155
yieldpending.pop(0)
6256
meta_found=True

‎html5lib/serializer/htmlserializer.py‎

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -220,15 +220,11 @@ def serialize(self, treewalker, encoding=None):
220220
in_cdata=True
221221
elifin_cdata:
222222
self.serializeError(_("Unexpected child element of a CDATA element"))
223-
attrs=token["data"]
224-
ifhasattr(attrs,"items"):
225-
attrs=attrs.items()
226-
attrs.sort()
227223
attributes= []
228-
forattrinattrs:
224+
for(attr_namespace,attr_name),attr_valueinsorted(token["data"].items()):
229225
#TODO: Add namespace support here
230-
k=attr["name"]
231-
v=attr["value"]
226+
k=attr_name
227+
v=attr_value
232228
ifencoding:
233229
k=k.encode(encoding,"strict")
234230
attributes.append(' ')

‎html5lib/tests/test_serializer.py‎

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __iter__(self):
3131
else:
3232
namespace=default_namespace
3333
name,attrib=token[1:3]
34-
yieldself.startTag(namespace,name,attrib)
34+
yieldself.startTag(namespace,name,self._convertAttrib(attrib))
3535
eliftype=="EndTag":
3636
iflen(token)==3:
3737
namespace,name=token[1:3]
@@ -45,7 +45,7 @@ def __iter__(self):
4545
else:
4646
namespace=default_namespace
4747
name,attrib=token[1:]
48-
fortokeninself.emptyTag(namespace,name,attrib):
48+
fortokeninself.emptyTag(namespace,name,self._convertAttrib(attrib)):
4949
yieldtoken
5050
eliftype=="Comment":
5151
yieldself.comment(token[1])
@@ -61,6 +61,19 @@ def __iter__(self):
6161
yieldself.doctype(token[1])
6262
else:
6363
raiseValueError("Unknown token type: "+type)
64+
65+
def_convertAttrib(self,attribs):
66+
"""html5lib tree-walkers use a dict of (namespace, name): value for
67+
attributes, but JSON cannot represent this. Convert from the format
68+
in the serializer tests (a list of dicts with "namespace", "name",
69+
and "value" as keys) to html5lib's tree-walker format."""
70+
attrs= {}
71+
forattribinattribs:
72+
name= (attrib["namespace"],attrib["name"])
73+
assert(namenotinattrs)
74+
attrs[name]=attrib["value"]
75+
returnattrs
76+
6477

6578
classTestCase(unittest.TestCase):
6679
defaddTest(cls,name,description,input,expected,xhtml,options):

‎html5lib/tests/test_treewalkers.py‎

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -196,17 +196,17 @@ def convertTokens(tokens):
196196
indent+=2
197197
attrs=token["data"]
198198
ifattrs:
199-
attrs.sort(lambdaa,b:cmp(a["name"],b["name"]))
200-
forattrinattrs:
201-
ifattr["namespace"]:
202-
ifattr["namespace"]inconstants.prefixes:
203-
name=constants.prefixes[attr["namespace"]]
199+
#TODO: Remove this if statement, attrs should always exist
200+
for(namespace,name),valueinsorted(attrs.items()):
201+
ifnamespace:
202+
ifnamespaceinconstants.prefixes:
203+
outputname=constants.prefixes[namespace]
204204
else:
205-
name=attr["namespace"]
206-
name+=u" "+attr["name"]
205+
outputname=namespace
206+
outputname+=u" "+name
207207
else:
208-
name=attr["name"]
209-
output.append(u"%s%s=\"%s\""% (" "*indent,name,attr["value"]))
208+
outputname=name
209+
output.append(u"%s%s=\"%s\""% (" "*indent,outputname,value))
210210
iftype=="EmptyTag":
211211
indent-=2
212212
eliftype=="EndTag":
@@ -270,17 +270,17 @@ def runTest(self, innerHTML, input, expected, errors, treeClass):
270270
classTokenTestCase(unittest.TestCase):
271271
deftest_all_tokens(self):
272272
expected= [
273-
{'data':[],'type':'StartTag','name':u'html'},
274-
{'data':[],'type':'StartTag','name':u'head'},
275-
{'data':[],'type':'EndTag','name':u'head'},
276-
{'data':[],'type':'StartTag','name':u'body'},
273+
{'data':{},'type':'StartTag','namespace':u'http://www.w3.org/1999/xhtml','name':u'html'},
274+
{'data':{},'type':'StartTag','namespace':u'http://www.w3.org/1999/xhtml','name':u'head'},
275+
{'data':{},'type':'EndTag','namespace':u'http://www.w3.org/1999/xhtml','name':u'head'},
276+
{'data':{},'type':'StartTag','namespace':u'http://www.w3.org/1999/xhtml','name':u'body'},
277277
{'data':u'a','type':'Characters'},
278-
{'data':[],'type':'StartTag','name':u'div'},
278+
{'data':{},'type':'StartTag','namespace':u'http://www.w3.org/1999/xhtml','name':u'div'},
279279
{'data':u'b','type':'Characters'},
280-
{'data':[],'type':'EndTag','name':u'div'},
280+
{'data':{},'type':'EndTag','namespace':u'http://www.w3.org/1999/xhtml','name':u'div'},
281281
{'data':u'c','type':'Characters'},
282-
{'data':[],'type':'EndTag','name':u'body'},
283-
{'data':[],'type':'EndTag','name':u'html'}
282+
{'data':{},'type':'EndTag','namespace':u'http://www.w3.org/1999/xhtml','name':u'body'},
283+
{'data':{},'type':'EndTag','namespace':u'http://www.w3.org/1999/xhtml','name':u'html'}
284284
]
285285
fortreeName,treeClsintreeTypes.iteritems():
286286
p=html5parser.HTMLParser(tree=treeCls["builder"])

‎html5lib/treewalkers/_base.py‎

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ def error(self, msg):
1515
return {"type":"SerializeError","data":msg}
1616

1717
defnormalizeAttrs(self,attrs):
18-
ifnotattrs:
19-
attrs= []
20-
forattrinattrs:
21-
attr["namespace"]=unicode(attr["namespace"])ifattr["namespace"]elseNone
22-
attr["name"]=unicode(attr["name"])
23-
attr["value"]=unicode(attr["value"])
24-
returnattrs
18+
newattrs= {}
19+
ifattrs:
20+
#TODO: treewalkers should always have attrs
21+
for (namespace,name),valueinattrs.iteritems():
22+
namespace=unicode(namespace)ifnamespaceelseNone
23+
name=unicode(name)
24+
value=unicode(value)
25+
newattrs[(namespace,name)]=value
26+
returnnewattrs
2527

2628
defemptyTag(self,namespace,name,attrs,hasChildren=False):
2729
yield {"type":"EmptyTag","name":unicode(name),
@@ -40,7 +42,7 @@ def endTag(self, namespace, name):
4042
return {"type":"EndTag",
4143
"name":unicode(name),
4244
"namespace":unicode(namespace),
43-
"data":[]}
45+
"data":{}}
4446

4547
deftext(self,data):
4648
data=unicode(data)

‎html5lib/treewalkers/dom.py‎

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,10 @@ def getNodeDetails(self, node):
1515
return_base.TEXT,node.nodeValue
1616

1717
elifnode.nodeType==Node.ELEMENT_NODE:
18-
attrs=[]
18+
attrs={}
1919
forattrinnode.attributes.keys():
2020
attr=node.getAttributeNode(attr)
21-
attrs.append({"namespace":attr.namespaceURI,
22-
"name":attr.localName,
23-
"value":attr.value})
21+
attrs[(attr.namespaceURI,attr.localName)]=attr.value
2422
return (_base.ELEMENT,node.namespaceURI,node.nodeName,
2523
attrs,node.hasChildNodes())
2624

‎html5lib/treewalkers/etree.py‎

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,13 @@ def getNodeDetails(self, node):
6969
else:
7070
namespace=None
7171
tag=node.tag
72-
attrs=[]
72+
attrs={}
7373
forname,valueinnode.attrib.items():
7474
match=tag_regexp.match(name)
7575
ifmatch:
76-
attrs.append({"namespace":match.group(1),
77-
"name":match.group(2),
78-
"value":value})
76+
attrs[(match.group(1),match.group(2))]=value
7977
else:
80-
attrs.append({"namespace":None,
81-
"name":name,
82-
"value":value})
78+
attrs[(None,name)]=value
8379
return (_base.ELEMENT,namespace,tag,
8480
attrs,len(node)ornode.text)
8581

‎html5lib/treewalkers/lxmletree.py‎

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -140,17 +140,13 @@ def getNodeDetails(self, node):
140140
else:
141141
namespace=None
142142
tag=node.tag
143-
attrs=[]
143+
attrs={}
144144
forname,valueinnode.attrib.items():
145145
match=tag_regexp.match(name)
146146
ifmatch:
147-
attrs.append({"namespace":match.group(1),
148-
"name":match.group(2),
149-
"value":value})
147+
attrs[(match.group(1),match.group(2))]=value
150148
else:
151-
attrs.append({"namespace":None,
152-
"name":name,
153-
"value":value})
149+
attrs[(None,name)]=value
154150
return (_base.ELEMENT,namespace,self.filter.fromXmlName(tag),
155151
attrs,len(node)>0ornode.text)
156152

‎html5lib/treewalkers/pulldom.py‎

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,10 @@ def tokens(self, event, next):
3030
iftype==START_ELEMENT:
3131
name=node.nodeName
3232
namespace=node.namespaceURI
33-
attrs=[]
33+
attrs={}
3434
forattrinnode.attributes.keys():
3535
attr=node.getAttributeNode(attr)
36-
attrs.append({"namespace":attr.namespaceURI,
37-
"name":attr.localName,
38-
"value":attr.value})
36+
attrs[(attr.namespaceURI,attr.localName)]=attr.value
3937
ifnameinvoidElements:
4038
fortokeninself.emptyTag(namespace,
4139
name,

‎html5lib/treewalkers/simpletree.py‎

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,12 @@ def getNodeDetails(self, node):
3232
return_base.TEXT,node.value
3333

3434
elifnode.type==5:# Element
35-
attrs=[]
35+
attrs={}
3636
forname,valueinnode.attributes.items():
3737
ifisinstance(name,tuple):
38-
attrs.append({"namespace":name[2],
39-
"name":name[1],
40-
"value":value})
38+
attrs[(name[2],name[1])]=value
4139
else:
42-
attrs.append({"namespace":None,
43-
"name":name,
44-
"value":value})
40+
attrs[(None,name)]=value
4541
return (_base.ELEMENT,node.namespace,node.name,
4642
attrs,node.hasContent())
4743

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp