1818
1919
2020class TreeWalker (object ):
21+ """Walks a tree yielding tokens
22+
23+ Tokens are dicts that all have a ``type`` field specifying the type of the
24+ token.
25+
26+ """
2127def __init__ (self ,tree ):
28+ """Creates a TreeWalker
29+
30+ :arg tree: the tree to walk
31+
32+ """
2233self .tree = tree
2334
2435def __iter__ (self ):
2536raise NotImplementedError
2637
2738def error (self ,msg ):
39+ """Generates an error token with the given message
40+
41+ :arg msg: the error message
42+
43+ :returns: SerializeError token
44+
45+ """
2846return {"type" :"SerializeError" ,"data" :msg }
2947
3048def emptyTag (self ,namespace ,name ,attrs ,hasChildren = False ):
49+ """Generates an EmptyTag token
50+
51+ :arg namespace: the namespace of the token--can be ``None``
52+
53+ :arg name: the name of the element
54+
55+ :arg attrs: the attributes of the element as a dict
56+
57+ :arg hasChildren: whether or not to yield a SerializationError because
58+ this tag shouldn't have children
59+
60+ :returns: EmptyTag token
61+
62+ """
3163yield {"type" :"EmptyTag" ,"name" :name ,
3264"namespace" :namespace ,
3365"data" :attrs }
3466if hasChildren :
3567yield self .error ("Void element has children" )
3668
3769def startTag (self ,namespace ,name ,attrs ):
70+ """Generates a StartTag token
71+
72+ :arg namespace: the namespace of the token--can be ``None``
73+
74+ :arg name: the name of the element
75+
76+ :arg attrs: the attributes of the element as a dict
77+
78+ :returns: StartTag token
79+
80+ """
3881return {"type" :"StartTag" ,
3982"name" :name ,
4083"namespace" :namespace ,
4184"data" :attrs }
4285
4386def endTag (self ,namespace ,name ):
87+ """Generates an EndTag token
88+
89+ :arg namespace: the namespace of the token--can be ``None``
90+
91+ :arg name: the name of the element
92+
93+ :returns: EndTag token
94+
95+ """
4496return {"type" :"EndTag" ,
4597"name" :name ,
4698"namespace" :namespace }
4799
48100def text (self ,data ):
101+ """Generates SpaceCharacters and Characters tokens
102+
103+ Depending on what's in the data, this generates one or more
104+ ``SpaceCharacters`` and ``Characters`` tokens.
105+
106+ For example:
107+
108+ >>> from html5lib.treewalkers.base import TreeWalker
109+ >>> # Give it an empty tree just so it instantiates
110+ >>> walker = TreeWalker([])
111+ >>> list(walker.text(''))
112+ []
113+ >>> list(walker.text(' '))
114+ [{u'data': ' ', u'type': u'SpaceCharacters'}]
115+ >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
116+ [{u'data': ' ', u'type': u'SpaceCharacters'},
117+ {u'data': u'abc', u'type': u'Characters'},
118+ {u'data': u' ', u'type': u'SpaceCharacters'}]
119+
120+ :arg data: the text data
121+
122+ :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
123+
124+ """
49125data = data
50126middle = data .lstrip (spaceCharacters )
51127left = data [:len (data )- len (middle )]
@@ -60,18 +136,44 @@ def text(self, data):
60136yield {"type" :"SpaceCharacters" ,"data" :right }
61137
62138def comment (self ,data ):
139+ """Generates a Comment token
140+
141+ :arg data: the comment
142+
143+ :returns: Comment token
144+
145+ """
63146return {"type" :"Comment" ,"data" :data }
64147
65148def doctype (self ,name ,publicId = None ,systemId = None ):
149+ """Generates a Doctype token
150+
151+ :arg name:
152+
153+ :arg publicId:
154+
155+ :arg systemId:
156+
157+ :returns: the Doctype token
158+
159+ """
66160return {"type" :"Doctype" ,
67161"name" :name ,
68162"publicId" :publicId ,
69163"systemId" :systemId }
70164
71165def entity (self ,name ):
166+ """Generates an Entity token
167+
168+ :arg name: the entity name
169+
170+ :returns: an Entity token
171+
172+ """
72173return {"type" :"Entity" ,"name" :name }
73174
74175def unknown (self ,nodeType ):
176+ """Handles unknown node types"""
75177return self .error ("Unknown node type: " + nodeType )
76178
77179