44from html5lib .constants import DataLossWarning
55import etree as etree_builders
66try :
7- import lxml .html as etree
7+ import lxml .etree as etree
88except ImportError :
99import lxml .etree as etree
1010
@@ -48,21 +48,33 @@ def testSerializer(element):
4848finalText = None
4949def serializeElement (element ,indent = 0 ):
5050if not hasattr (element ,"tag" ):
51- rv .append ("#document" )
52- if element .docinfo .internalDTD :
53- if not (element .docinfo .public_id or element .docinfo .system_url ):
54- dtd_str = "<!DOCTYPE %s>" % element .docinfo .root_name
55- else :
56- dtd_str = """<!DOCTYPE %s PUBLIC "%s" "%s">""" % (
57- element .docinfo .root_name ,element .docinfo .public_id ,
58- element .docinfo .system_url )
59- rv .append ("|%s%s" % (' ' * (indent + 2 ),dtd_str ))
60- next_element = element .getroot ()
61- while next_element .getprevious ()is not None :
62- next_element = next_element .getprevious ()
63- while next_element is not None :
64- serializeElement (next_element ,indent + 2 )
65- next_element = next_element .getnext ()
51+ if hasattr (element ,"getroot" ):
52+ #Full tree case
53+ rv .append ("#document" )
54+ if element .docinfo .internalDTD :
55+ if not (element .docinfo .public_id or
56+ element .docinfo .system_url ):
57+ dtd_str = "<!DOCTYPE %s>" % element .docinfo .root_name
58+ else :
59+ dtd_str = """<!DOCTYPE %s PUBLIC "%s" "%s">""" % (
60+ element .docinfo .root_name ,
61+ element .docinfo .public_id ,
62+ element .docinfo .system_url )
63+ rv .append ("|%s%s" % (' ' * (indent + 2 ),dtd_str ))
64+ next_element = element .getroot ()
65+ while next_element .getprevious ()is not None :
66+ next_element = next_element .getprevious ()
67+ while next_element is not None :
68+ serializeElement (next_element ,indent + 2 )
69+ next_element = next_element .getnext ()
70+ elif isinstance (element ,basestring ):
71+ #Text in a fragment
72+ rv .append ("|%s\" %s\" " % (' ' * indent ,element ))
73+ else :
74+ #Fragment case
75+ rv .append ("#document-fragment" )
76+ for next_element in element :
77+ serializeElement (next_element ,indent + 2 )
6678elif type (element .tag )== type (etree .Comment ):
6779rv .append ("|%s<!-- %s -->" % (' ' * indent ,element .text ))
6880else :
@@ -132,13 +144,13 @@ class TreeBuilder(_base.TreeBuilder):
132144doctypeClass = DocumentType
133145elementClass = None
134146commentClass = None
135- fragmentClass = None
147+ fragmentClass = Document
136148
137149def __init__ (self ,fullTree = False ):
138150builder = etree_builders .getETreeModule (etree ,fullTree = fullTree )
139151self .elementClass = builder .Element
140152self .commentClass = builder .Comment
141- self .fragmentClass = builder .DocumentFragment
153+ # self.fragmentClass = builder.DocumentFragment
142154_base .TreeBuilder .__init__ (self )
143155
144156def reset (self ):
@@ -157,7 +169,14 @@ def getDocument(self):
157169return self .document ._elementTree .getroot ()
158170
159171def getFragment (self ):
160- return _base .TreeBuilder .getFragment (self )._element
172+ fragment = []
173+ element = self .openElements [0 ]._element
174+ if element .text :
175+ fragment .append (element .text )
176+ fragment .extend (element .getchildren ())
177+ if element .tail :
178+ fragment .append (element .tail )
179+ return fragment
161180
162181def insertDoctype (self ,name ,publicId ,systemId ):
163182if not name :
@@ -172,9 +191,10 @@ def insertCommentInitial(self, data, parent=None):
172191
173192def insertRoot (self ,name ):
174193"""Create the document root"""
175- #Because of the way libxml2 works, it doesn't seem to be possible to alter information
176- #like the doctype after the tree has been parsed. Therefore we need to use the built-in
177- #parser to create our iniial tree, after which we can add elements like normal
194+ #Because of the way libxml2 works, it doesn't seem to be possible to
195+ #alter informatioN like the doctype after the tree has been parsed.
196+ #Therefore we need to use the built-in parser to create our iniial
197+ #tree, after which we can add elements like normal
178198docStr = ""
179199if self .doctype :
180200docStr += "<!DOCTYPE %s" % self .doctype .name
@@ -205,4 +225,4 @@ def insertRoot(self, name):
205225self .openElements .append (root_element )
206226
207227#Reset to the default insert comment function
208- self .insertComment = super (TreeBuilder ,self ).insertComment
228+ self .insertComment = super (TreeBuilder ,self ).insertComment