Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit461bda3

Browse files
willkggsnedders
authored andcommitted
First pass at documenting serializer (#376)
1 parented8e017 commit461bda3

File tree

1 file changed

+119
-44
lines changed

1 file changed

+119
-44
lines changed

‎html5lib/serializer.py

Lines changed: 119 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,33 @@ def htmlentityreplace_errors(exc):
6868
else:
6969
returnxmlcharrefreplace_errors(exc)
7070

71+
7172
register_error("htmlentityreplace",htmlentityreplace_errors)
7273

7374

7475
defserialize(input,tree="etree",encoding=None,**serializer_opts):
76+
"""Serializes the input token stream using the specified treewalker
77+
78+
:arg input: the token stream to serialize
79+
80+
:arg tree: the treewalker to use
81+
82+
:arg encoding: the encoding to use
83+
84+
:arg serializer_opts: any options to pass to the
85+
:py:class:`html5lib.serializer.HTMLSerializer` that gets created
86+
87+
:returns: the tree serialized as a string
88+
89+
Example:
90+
91+
>>> from html5lib.html5parser import parse
92+
>>> from html5lib.serializer import serialize
93+
>>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
94+
>>> serialize(token_stream, omit_optional_tags=False)
95+
'<html><head></head><body><p>Hi!</p></body></html>'
96+
97+
"""
7598
# XXX: Should we cache this?
7699
walker=treewalkers.getTreeWalker(tree)
77100
s=HTMLSerializer(**serializer_opts)
@@ -110,50 +133,83 @@ class HTMLSerializer(object):
110133
"strip_whitespace","sanitize")
111134

112135
def__init__(self,**kwargs):
113-
"""Initialize HTMLSerializer.
114-
115-
Keyword options (default given first unless specified) include:
116-
117-
inject_meta_charset=True|False
118-
Whether it insert a meta element to define the character set of the
119-
document.
120-
quote_attr_values="legacy"|"spec"|"always"
121-
Whether to quote attribute values that don't require quoting
122-
per legacy browser behaviour, when required by the standard, or always.
123-
quote_char=u'"'|u"'"
124-
Use given quote character for attribute quoting. Default is to
125-
use double quote unless attribute value contains a double quote,
126-
in which case single quotes are used instead.
127-
escape_lt_in_attrs=False|True
128-
Whether to escape < in attribute values.
129-
escape_rcdata=False|True
130-
Whether to escape characters that need to be escaped within normal
131-
elements within rcdata elements such as style.
132-
resolve_entities=True|False
133-
Whether to resolve named character entities that appear in the
134-
source tree. The XML predefined entities &lt; &gt; &amp; &quot; &apos;
135-
are unaffected by this setting.
136-
strip_whitespace=False|True
137-
Whether to remove semantically meaningless whitespace. (This
138-
compresses all whitespace to a single space except within pre.)
139-
minimize_boolean_attributes=True|False
140-
Shortens boolean attributes to give just the attribute value,
141-
for example <input disabled="disabled"> becomes <input disabled>.
142-
use_trailing_solidus=False|True
143-
Includes a close-tag slash at the end of the start tag of void
144-
elements (empty elements whose end tag is forbidden). E.g. <hr/>.
145-
space_before_trailing_solidus=True|False
146-
Places a space immediately before the closing slash in a tag
147-
using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
148-
sanitize=False|True
149-
Strip all unsafe or unknown constructs from output.
150-
See `html5lib user documentation`_
151-
omit_optional_tags=True|False
152-
Omit start/end tags that are optional.
153-
alphabetical_attributes=False|True
154-
Reorder attributes to be in alphabetical order.
155-
156-
.. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
136+
"""Initialize HTMLSerializer
137+
138+
:arg inject_meta_charset: Whether or not to inject the meta charset.
139+
140+
Defaults to ``True``.
141+
142+
:arg quote_attr_values: Whether to quote attribute values that don't
143+
require quoting per legacy browser behavior (``"legacy"``), when
144+
required by the standard (``"spec"``), or always (``"always"``).
145+
146+
Defaults to ``"legacy"``.
147+
148+
:arg quote_char: Use given quote character for attribute quoting.
149+
150+
Defaults to ``"`` which will use double quotes unless attribute
151+
value contains a double quote, in which case single quotes are
152+
used.
153+
154+
:arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
155+
values.
156+
157+
Defaults to ``False``.
158+
159+
:arg escape_rcdata: Whether to escape characters that need to be
160+
escaped within normal elements within rcdata elements such as
161+
style.
162+
163+
Defaults to ``False``.
164+
165+
:arg resolve_entities: Whether to resolve named character entities that
166+
appear in the source tree. The XML predefined entities &lt; &gt;
167+
&amp; &quot; &apos; are unaffected by this setting.
168+
169+
Defaults to ``True``.
170+
171+
:arg strip_whitespace: Whether to remove semantically meaningless
172+
whitespace. (This compresses all whitespace to a single space
173+
except within ``pre``.)
174+
175+
Defaults to ``False``.
176+
177+
:arg minimize_boolean_attributes: Shortens boolean attributes to give
178+
just the attribute value, for example::
179+
180+
<input disabled="disabled">
181+
182+
becomes::
183+
184+
<input disabled>
185+
186+
Defaults to ``True``.
187+
188+
:arg use_trailing_solidus: Includes a close-tag slash at the end of the
189+
start tag of void elements (empty elements whose end tag is
190+
forbidden). E.g. ``<hr/>``.
191+
192+
Defaults to ``False``.
193+
194+
:arg space_before_trailing_solidus: Places a space immediately before
195+
the closing slash in a tag using a trailing solidus. E.g.
196+
``<hr />``. Requires ``use_trailing_solidus=True``.
197+
198+
Defaults to ``True``.
199+
200+
:arg sanitize: Strip all unsafe or unknown constructs from output.
201+
See :py:class:`html5lib.filters.sanitizer.Filter`.
202+
203+
Defaults to ``False``.
204+
205+
:arg omit_optional_tags: Omit start/end tags that are optional.
206+
207+
Defaults to ``True``.
208+
209+
:arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
210+
211+
Defaults to ``False``.
212+
157213
"""
158214
unexpected_args=frozenset(kwargs)-frozenset(self.options)
159215
iflen(unexpected_args)>0:
@@ -317,6 +373,25 @@ def serialize(self, treewalker, encoding=None):
317373
self.serializeError(token["data"])
318374

319375
defrender(self,treewalker,encoding=None):
376+
"""Serializes the stream from the treewalker into a string
377+
378+
:arg treewalker: the treewalker to serialize
379+
380+
:arg encoding: the string encoding to use
381+
382+
:returns: the serialized tree
383+
384+
Example:
385+
386+
>>> from html5lib import parse, getTreeWalker
387+
>>> from html5lib.serializer import HTMLSerializer
388+
>>> token_stream = parse('<html><body>Hi!</body></html>')
389+
>>> walker = getTreeWalker('etree')
390+
>>> serializer = HTMLSerializer(omit_optional_tags=False)
391+
>>> serializer.render(walker(token_stream))
392+
'<html><head></head><body>Hi!</body></html>'
393+
394+
"""
320395
ifencoding:
321396
returnb"".join(list(self.serialize(treewalker,encoding)))
322397
else:

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp