Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit55d183d

Browse files
committed
New implementation of the elementtree treewalker that doesn't fail with character data immediatley before the root node
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401008
1 parent8ae506e commit55d183d

File tree

2 files changed

+64
-53
lines changed

2 files changed

+64
-53
lines changed

‎src/html5lib/treewalkers/etree.py

Lines changed: 56 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -28,23 +28,22 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
2828
to avoid using recursion, returns "nodes" as tuples with the following
2929
content:
3030
31-
1.An Element node serving as *context* (it cannot be called the parent
32-
node due to the particular ``tail`` text nodes.
33-
34-
2. Either the string literals ``"text"`` or ``"tail"`` or a child index
35-
36-
3. A list used as a stack of all ancestor *context nodes*. It is a
37-
pair tuple whose first item is an Element and second itemis a child
38-
index.
31+
1.The current element
32+
33+
2. The index of the element relative to its parent
34+
35+
3. A stack of ancestor elements
36+
37+
4. A flag "text", "tail" or None to indicate if the current nodeis a
38+
text node; either the text or tail of the current element (1)
3939
"""
40-
41-
defgetNodeDetails(self,node):
40+
defgetNodeDetails(self,node):
4241
ifisinstance(node,tuple):# It might be the root Element
43-
elt,key,parents=node
44-
ifkeyin ("text","tail"):
45-
return_base.TEXT,getattr(elt,key)
42+
elt,key,parents,flag=node
43+
ifflagin ("text","tail"):
44+
return_base.TEXT,getattr(elt,flag)
4645
else:
47-
node=elt[int(key)]
46+
node=elt
4847

4948
ifnot(hasattr(node,"tag")):
5049
node=node.getroot()
@@ -61,54 +60,60 @@ def getNodeDetails(self, node):
6160
else:
6261
#This is assumed to be an ordinary element
6362
return_base.ELEMENT,node.tag,node.attrib.items(),len(node)ornode.text
64-
63+
6564
defgetFirstChild(self,node):
66-
ifisinstance(node,tuple):# It might be the root Element
67-
elt,key,parents=node
68-
assertkeynotin ("text","tail"),"Text nodes have no children"
69-
parents.append((elt,int(key)))
70-
node=elt[int(key)]
65+
ifisinstance(node,tuple):
66+
element,key,parents,flag=node
7167
else:
72-
parents= []
73-
74-
assertlen(node)ornode.text,"Node has no children"
75-
ifnode.text:
76-
return (node,"text",parents)
68+
element,key,parents,flag=node,None, [],None
69+
70+
ifflagin ("text","tail"):
71+
returnNone
7772
else:
78-
return (node,0,parents)
79-
73+
ifelement.text:
74+
returnelement,key,parents,"text"
75+
eliflen(element):
76+
parents.append(element)
77+
returnelement[0],0,parents,None
78+
else:
79+
returnNone
80+
8081
defgetNextSibling(self,node):
8182
ifisinstance(node,tuple):
82-
elt,key,parents=node
83-
ifkey=="text":
84-
key=-1
85-
elifkey=="tail":
86-
elt,key=parents.pop()
87-
else:
88-
# Look for "tail" of the "revisited" node
89-
child=elt[key]
90-
ifchild.tail:
91-
parents.append((elt,key))
92-
return (child,"tail",parents)
83+
element,key,parents,flag=node
9384
else:
9485
returnNone
95-
96-
# case where key were "text" or "tail" or elt[key] had a tail
97-
key+=1
98-
iflen(elt)>key:
99-
return (elt,key,parents)
86+
87+
ifflag=="text":
88+
iflen(element):
89+
parents.append(element)
90+
returnelement[0],0,parents,None
91+
else:
92+
returnNone
10093
else:
101-
returnNone
102-
94+
ifelement.tailandflag!="tail":
95+
returnelement,key,parents,"tail"
96+
elifkey<len(parents[-1])-1:
97+
returnparents[-1][key+1],key+1,parents,None
98+
else:
99+
returnNone
100+
103101
defgetParentNode(self,node):
104102
ifisinstance(node,tuple):
105-
elt,key,parents=node
106-
ifparents:
107-
elt,key=parents.pop()
108-
returnelt,key,parents
109-
else:
110-
returnelt
103+
element,key,parents,flag=node
111104
else:
112105
returnNone
106+
107+
ifflag=="text":
108+
ifnotparents:
109+
returnelement
110+
else:
111+
returnelement,key,parents,None
112+
else:
113+
parent=parents.pop()
114+
ifnotparents:
115+
returnparent
116+
else:
117+
returnparent,list(parents[-1]).index(parent),parents,None
113118

114119
returnlocals()

‎tests/test_treewalkers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,17 @@ def test_all_tokens(self):
226226
{'data': [],'type':'StartTag','name':u'head'},
227227
{'data': [],'type':'EndTag','name':u'head'},
228228
{'data': [],'type':'StartTag','name':u'body'},
229+
{'data':u'a','type':'Characters'},
230+
{'data': [],'type':'StartTag','name':u'div'},
231+
{'data':u'b','type':'Characters'},
232+
{'data': [],'type':'EndTag','name':u'div'},
233+
{'data':u'c','type':'Characters'},
229234
{'data': [],'type':'EndTag','name':u'body'},
230-
{'data': [],'type':'EndTag','name':u'html'}]
235+
{'data': [],'type':'EndTag','name':u'html'}
236+
]
231237
fortreeName,treeClsintreeTypes.iteritems():
232238
p=html5parser.HTMLParser(tree=treeCls["builder"])
233-
document=p.parse("<html></html>")
239+
document=p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
234240
document=treeCls.get("adapter",lambdax:x)(document)
235241
output=treeCls["walker"](document)
236242
forexpectedToken,outputTokeninzip(expected,output):

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp