Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0e39324

Browse files
committed
Add patch from issue 152 by fantasai
--HG--branch : csswg-testsuite
1 parent21bf1ad commit0e39324

File tree

1 file changed

+100
-18
lines changed

1 file changed

+100
-18
lines changed

‎html5lib/serializer/htmlserializer.py

Lines changed: 100 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
fromhtml5libimportutils
1313
fromxml.sax.saxutilsimportescape
1414

15+
importre
16+
1517
spaceCharacters=u"".join(spaceCharacters)
1618

1719
try:
@@ -84,6 +86,7 @@ class HTMLSerializer(object):
8486
resolve_entities=True
8587

8688
# miscellaneous options
89+
emit_doctype='preserve'
8790
inject_meta_charset=True
8891
strip_whitespace=False
8992
sanitize=False
@@ -92,13 +95,23 @@ class HTMLSerializer(object):
9295
"minimize_boolean_attributes","use_trailing_solidus",
9396
"space_before_trailing_solidus","omit_optional_tags",
9497
"strip_whitespace","inject_meta_charset","escape_lt_in_attrs",
95-
"escape_rcdata","resolve_entities","sanitize")
98+
"escape_rcdata","resolve_entities","emit_doctype","sanitize")
9699

97100
def__init__(self,**kwargs):
98101
"""Initialize HTMLSerializer.
99102
100103
Keyword options (default given first unless specified) include:
101104
105+
emit_doctype='html'|'xhtml'|'html5'|'preserve'
106+
Whether to output a doctype.
107+
* emit_doctype='xhtml' preserves unknown doctypes and valid
108+
XHTML doctypes, converts valid HTML doctypes to their XHTML
109+
counterparts, and drops <!DOCTYPE html>
110+
* emit_doctype='html' preserves unknown doctypes and valid
111+
HTML doctypes, converts valid XHTML doctypes to their HTML
112+
counterparts, and uses <!DOCTYPE html> for missing doctypes
113+
* emit_doctype='html5' Uses <!DOCTYPE html> as the doctype
114+
* emit_doctype='preserve' preserves the doctype, if any, unchanged
102115
inject_meta_charset=True|False
103116
..?
104117
quote_attr_values=True|False
@@ -140,6 +153,86 @@ def __init__(self, **kwargs):
140153
self.errors= []
141154
self.strict=False
142155

156+
defcalc_doctype(self,token=None):
157+
ifself.emit_doctype=='html5'or \
158+
nottokenandself.emit_doctype=='html':
159+
iftoken:
160+
returnu'<!DOCTYPE html>'
161+
else:
162+
returnu'<!DOCTYPE html>\n'
163+
164+
rootElement=token["name"]
165+
publicID=token["publicId"]
166+
systemID=token["systemId"]
167+
168+
ifre.match(u'html',rootElement,re.IGNORECASE):
169+
ifself.emit_doctype==u'html':
170+
# XHTML 1.1
171+
ifpublicID==u"-//W3C//DTD XHTML 1.1//EN"and (notsystemID \
172+
orsystemID==u"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"):
173+
publicID=u"-//W3C//DTD HTML 4.01//EN"
174+
ifsystemID:
175+
systemID=u"http://www.w3.org/TR/html4/strict.dtd"
176+
# XHTML 1.0 Strict
177+
elifpublicID==u"-//W3C//DTD XHTML 1.0 Strict//EN"and (notsystemID \
178+
orsystemID==u"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"):
179+
publicID=u"-//W3C//DTD HTML 4.01//EN"
180+
ifsystemID:
181+
systemID=u"http://www.w3.org/TR/html4/strict.dtd"
182+
# XHTML 1.0 Transitional
183+
elifpublicID==u"-//W3C//DTD XHTML 1.0 Transitional//EN"and (notsystemID \
184+
orsystemID==u"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"):
185+
publicID=u"-//W3C//DTD HTML 4.01 Transitional//EN"
186+
ifsystemID:
187+
systemID=u"http://www.w3.org/TR/html4/loose.dtd"
188+
# XHTML 1.0 Frameset
189+
elifpublicID==u"-//W3C//DTD XHTML 1.0 Frameset//EN"and (notsystemID \
190+
orsystemID==u"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"):
191+
publicID=u"-//W3C//DTD HTML 4.01 Frameset//EN"
192+
ifsystemID:
193+
systemID=u"http://www.w3.org/TR/html4/frameset.dtd"
194+
elifself.emit_doctype==u'xhtml':
195+
# HTML 4.01 Strict
196+
ifre.match(u"-//W3C//DTD HTML 4.0(1)?//EN",publicID)and \
197+
(notsystemIDor \
198+
re.match(u"http://www.w3.org/TR/(html4|REC-html40)/strict.dtd",systemID)):
199+
publicID=u"-//W3C//DTD XHTML 1.0 Strict//EN"
200+
ifsystemID:
201+
systemID=u"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
202+
# HTML4.01 Transitional
203+
elifre.match(u"-//W3C//DTD HTML 4.0(1)? Transitional//EN",publicID)and \
204+
(notsystemIDor \
205+
re.match(u"http://www.w3.org/TR/(html4|REC-html40)/loose.dtd",systemID)):
206+
publicID=u"-//W3C//DTD XHTML 1.0 Transitional//EN"
207+
ifsystemID:
208+
systemID=u"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
209+
# HTML 4.01 Frameset
210+
elifre.match(u"-//W3C//DTD HTML 4.0(1)? Frameset//EN",publicID)and \
211+
(notsystemIDor \
212+
re.match(u"http://www.w3.org/TR/(html4|REC-html40)/frameset.dtd",systemID)):
213+
publicID=u"-//W3C//DTD XHTML 1.0 Frameset//EN"
214+
ifsystemID:
215+
systemID=u"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"
216+
# HTML 3.2
217+
elifre.match(u"-//W3C//DTD HTML 3.2( Final)?//EN",publicID)andnotsystemID:
218+
publicID=u"-//W3C//DTD XHTML 1.0 Transitional//EN"
219+
220+
doctype=u"<!DOCTYPE %s"%rootElement
221+
iftoken["publicId"]:
222+
doctype+=u' PUBLIC "%s"'%publicID
223+
elifsystemID:
224+
doctype+=u" SYSTEM"
225+
ifsystemID:
226+
ifsystemID.find(u'"')>=0:
227+
ifsystemID.find(u"'")>=0:
228+
self.serializeError(_("System identifer contains both single and double quote characters"))
229+
quote_char=u"'"
230+
else:
231+
quote_char=u'"'
232+
doctype+=u" %s%s%s"% (quote_char,systemID,quote_char)
233+
doctype+=u">"
234+
returndoctype
235+
143236
defserialize(self,treewalker,encoding=None):
144237
in_cdata=False
145238
self.errors= []
@@ -157,26 +250,12 @@ def serialize(self, treewalker, encoding=None):
157250
ifself.omit_optional_tags:
158251
fromhtml5lib.filters.optionaltagsimportFilter
159252
treewalker=Filter(treewalker)
253+
posted_doctype=False
160254
fortokenintreewalker:
161255
type=token["type"]
162256
iftype=="Doctype":
163-
doctype=u"<!DOCTYPE %s"%token["name"]
164-
165-
iftoken["publicId"]:
166-
doctype+=u' PUBLIC "%s"'%token["publicId"]
167-
eliftoken["systemId"]:
168-
doctype+=u" SYSTEM"
169-
iftoken["systemId"]:
170-
iftoken["systemId"].find(u'"')>=0:
171-
iftoken["systemId"].find(u"'")>=0:
172-
self.serializeError(_("System identifer contains both single and double quote characters"))
173-
quote_char=u"'"
174-
else:
175-
quote_char=u'"'
176-
doctype+=u" %s%s%s"% (quote_char,token["systemId"],quote_char)
177-
178-
doctype+=u">"
179-
257+
posted_doctype=True
258+
doctype=self.calc_doctype(token)
180259
ifencoding:
181260
yielddoctype.encode(encoding)
182261
else:
@@ -196,6 +275,9 @@ def serialize(self, treewalker, encoding=None):
196275
yieldescape(token["data"])
197276

198277
eliftypein ("StartTag","EmptyTag"):
278+
ifnotposted_doctype:
279+
posted_doctype=True
280+
yieldself.calc_doctype()
199281
name=token["name"]
200282
ifnameinrcdataElementsandnotself.escape_rcdata:
201283
in_cdata=True

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp