Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0f1994b

Browse files
willkggsnedders
authored andcommitted
Document html5lib.treewalkers (html5lib#386)
1 parent4ed8b8b commit0f1994b

File tree

2 files changed

+128
-15
lines changed

2 files changed

+128
-15
lines changed

‎html5lib/treewalkers/__init__.py

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,25 @@
2121
defgetTreeWalker(treeType,implementation=None,**kwargs):
2222
"""Get a TreeWalker class for various types of tree with built-in support
2323
24-
Args:
25-
treeType (str): the name of the tree type required (case-insensitive).
26-
Supported values are:
27-
28-
- "dom": The xml.dom.minidom DOM implementation
29-
- "etree": A generic walker for tree implementations exposing an
30-
elementtree-like interface (known to work with
31-
ElementTree, cElementTree and lxml.etree).
32-
- "lxml": Optimized walker for lxml.etree
33-
- "genshi": a Genshi stream
34-
35-
Implementation: A module implementing the tree type e.g.
36-
xml.etree.ElementTree or cElementTree (Currently applies to the
37-
"etree" tree type only).
24+
:arg str treeType: the name of the tree type required (case-insensitive).
25+
Supported values are:
26+
27+
* "dom": The xml.dom.minidom DOM implementation
28+
* "etree": A generic walker for tree implementations exposing an
29+
elementtree-like interface (known to work with ElementTree,
30+
cElementTree and lxml.etree).
31+
* "lxml": Optimized walker for lxml.etree
32+
* "genshi": a Genshi stream
33+
34+
:arg implementation: A module implementing the tree type e.g.
35+
xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
36+
tree type only).
37+
38+
:arg kwargs: keyword arguments passed to the etree walker--for other
39+
walkers, this has no effect
40+
41+
:returns: a TreeWalker class
42+
3843
"""
3944

4045
treeType=treeType.lower()
@@ -73,7 +78,13 @@ def concatenateCharacterTokens(tokens):
7378

7479

7580
defpprint(walker):
76-
"""Pretty printer for tree walkers"""
81+
"""Pretty printer for tree walkers
82+
83+
Takes a TreeWalker instance and pretty prints the output of walking the tree.
84+
85+
:arg walker: a TreeWalker instance
86+
87+
"""
7788
output= []
7889
indent=0
7990
fortokeninconcatenateCharacterTokens(walker):

‎html5lib/treewalkers/base.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,34 +18,110 @@
1818

1919

2020
classTreeWalker(object):
21+
"""Walks a tree yielding tokens
22+
23+
Tokens are dicts that all have a ``type`` field specifying the type of the
24+
token.
25+
26+
"""
2127
def__init__(self,tree):
28+
"""Creates a TreeWalker
29+
30+
:arg tree: the tree to walk
31+
32+
"""
2233
self.tree=tree
2334

2435
def__iter__(self):
2536
raiseNotImplementedError
2637

2738
deferror(self,msg):
39+
"""Generates an error token with the given message
40+
41+
:arg msg: the error message
42+
43+
:returns: SerializeError token
44+
45+
"""
2846
return {"type":"SerializeError","data":msg}
2947

3048
defemptyTag(self,namespace,name,attrs,hasChildren=False):
49+
"""Generates an EmptyTag token
50+
51+
:arg namespace: the namespace of the token--can be ``None``
52+
53+
:arg name: the name of the element
54+
55+
:arg attrs: the attributes of the element as a dict
56+
57+
:arg hasChildren: whether or not to yield a SerializationError because
58+
this tag shouldn't have children
59+
60+
:returns: EmptyTag token
61+
62+
"""
3163
yield {"type":"EmptyTag","name":name,
3264
"namespace":namespace,
3365
"data":attrs}
3466
ifhasChildren:
3567
yieldself.error("Void element has children")
3668

3769
defstartTag(self,namespace,name,attrs):
70+
"""Generates a StartTag token
71+
72+
:arg namespace: the namespace of the token--can be ``None``
73+
74+
:arg name: the name of the element
75+
76+
:arg attrs: the attributes of the element as a dict
77+
78+
:returns: StartTag token
79+
80+
"""
3881
return {"type":"StartTag",
3982
"name":name,
4083
"namespace":namespace,
4184
"data":attrs}
4285

4386
defendTag(self,namespace,name):
87+
"""Generates an EndTag token
88+
89+
:arg namespace: the namespace of the token--can be ``None``
90+
91+
:arg name: the name of the element
92+
93+
:returns: EndTag token
94+
95+
"""
4496
return {"type":"EndTag",
4597
"name":name,
4698
"namespace":namespace}
4799

48100
deftext(self,data):
101+
"""Generates SpaceCharacters and Characters tokens
102+
103+
Depending on what's in the data, this generates one or more
104+
``SpaceCharacters`` and ``Characters`` tokens.
105+
106+
For example:
107+
108+
>>> from html5lib.treewalkers.base import TreeWalker
109+
>>> # Give it an empty tree just so it instantiates
110+
>>> walker = TreeWalker([])
111+
>>> list(walker.text(''))
112+
[]
113+
>>> list(walker.text(' '))
114+
[{u'data': ' ', u'type': u'SpaceCharacters'}]
115+
>>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
116+
[{u'data': ' ', u'type': u'SpaceCharacters'},
117+
{u'data': u'abc', u'type': u'Characters'},
118+
{u'data': u' ', u'type': u'SpaceCharacters'}]
119+
120+
:arg data: the text data
121+
122+
:returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
123+
124+
"""
49125
data=data
50126
middle=data.lstrip(spaceCharacters)
51127
left=data[:len(data)-len(middle)]
@@ -60,18 +136,44 @@ def text(self, data):
60136
yield {"type":"SpaceCharacters","data":right}
61137

62138
defcomment(self,data):
139+
"""Generates a Comment token
140+
141+
:arg data: the comment
142+
143+
:returns: Comment token
144+
145+
"""
63146
return {"type":"Comment","data":data}
64147

65148
defdoctype(self,name,publicId=None,systemId=None):
149+
"""Generates a Doctype token
150+
151+
:arg name:
152+
153+
:arg publicId:
154+
155+
:arg systemId:
156+
157+
:returns: the Doctype token
158+
159+
"""
66160
return {"type":"Doctype",
67161
"name":name,
68162
"publicId":publicId,
69163
"systemId":systemId}
70164

71165
defentity(self,name):
166+
"""Generates an Entity token
167+
168+
:arg name: the entity name
169+
170+
:returns: an Entity token
171+
172+
"""
72173
return {"type":"Entity","name":name}
73174

74175
defunknown(self,nodeType):
176+
"""Handles unknown node types"""
75177
returnself.error("Unknown node type: "+nodeType)
76178

77179

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp