Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbbbb03f

Browse files
committed
Fix issue#156.
Fix some Unicode mix-up in the serializer, too, making sure Unicode strings are unicode strings.
1 parentc2eecb5 commitbbbb03f

File tree

2 files changed

+57
-67
lines changed

2 files changed

+57
-67
lines changed

‎html5lib/filters/inject_meta_charset.py‎

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,44 +13,44 @@ def __iter__(self):
1313
fortokenin_base.Filter.__iter__(self):
1414
type=token["type"]
1515
iftype=="StartTag":
16-
iftoken["name"].lower()=="head":
16+
iftoken["name"].lower()==u"head":
1717
state="in_head"
1818

1919
eliftype=="EmptyTag":
20-
iftoken["name"].lower()=="meta":
20+
iftoken["name"].lower()==u"meta":
2121
# replace charset with actual encoding
2222
has_http_equiv_content_type=False
2323
for (namespace,name),valueintoken["data"].iteritems():
2424
ifnamespace!=None:
2525
continue
26-
elifname.lower()=='charset':
26+
elifname.lower()==u'charset':
2727
token["data"][(namespace,name)]=self.encoding
2828
meta_found=True
2929
break
30-
elifname=='http-equiv'andvalue.lower()=='content-type':
30+
elifname==u'http-equiv'andvalue.lower()==u'content-type':
3131
has_http_equiv_content_type=True
3232
else:
33-
ifhas_http_equiv_content_typeand (None,"content")intoken["data"]:
34-
token["data"][(None,"content")]=u'text/html; charset=%s'%self.encoding
33+
ifhas_http_equiv_content_typeand (None,u"content")intoken["data"]:
34+
token["data"][(None,u"content")]=u'text/html; charset=%s'%self.encoding
3535
meta_found=True
3636

37-
eliftoken["name"].lower()=="head"andnotmeta_found:
37+
eliftoken["name"].lower()==u"head"andnotmeta_found:
3838
# insert meta into empty head
39-
yield {"type":"StartTag","name":"head",
39+
yield {"type":"StartTag","name":u"head",
4040
"data":token["data"]}
41-
yield {"type":"EmptyTag","name":"meta",
42-
"data": {(None,"charset"):self.encoding}}
43-
yield {"type":"EndTag","name":"head"}
41+
yield {"type":"EmptyTag","name":u"meta",
42+
"data": {(None,u"charset"):self.encoding}}
43+
yield {"type":"EndTag","name":u"head"}
4444
meta_found=True
4545
continue
4646

4747
eliftype=="EndTag":
48-
iftoken["name"].lower()=="head"andpending:
48+
iftoken["name"].lower()==u"head"andpending:
4949
# insert meta into head (if necessary) and flush pending queue
5050
yieldpending.pop(0)
5151
ifnotmeta_found:
52-
yield {"type":"EmptyTag","name":"meta",
53-
"data": {(None,"charset"):self.encoding}}
52+
yield {"type":"EmptyTag","name":u"meta",
53+
"data": {(None,u"charset"):self.encoding}}
5454
whilepending:
5555
yieldpending.pop(0)
5656
meta_found=True

‎html5lib/serializer/htmlserializer.py‎

Lines changed: 43 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,12 @@ def htmlentityreplace_errors(exc):
7676

7777
delregister_error
7878

79-
defencode(text,encoding):
80-
returntext.encode(encoding,unicode_encode_errors)
8179

8280
classHTMLSerializer(object):
8381

8482
# attribute quoting options
8583
quote_attr_values=False
86-
quote_char='"'
84+
quote_char=u'"'
8785
use_best_quote_char=True
8886

8987
# tag syntax options
@@ -159,7 +157,22 @@ def __init__(self, **kwargs):
159157
self.errors= []
160158
self.strict=False
161159

160+
defencode(self,string):
161+
assert(isinstance(string,unicode))
162+
ifself.encoding:
163+
returnstring.encode(self.encoding,unicode_encode_errors)
164+
else:
165+
returnstring
166+
167+
defencodeStrict(self,string):
168+
assert(isinstance(string,unicode))
169+
ifself.encoding:
170+
returnstring.encode(self.encoding,"strict")
171+
else:
172+
returnstring
173+
162174
defserialize(self,treewalker,encoding=None):
175+
self.encoding=encoding
163176
in_cdata=False
164177
self.errors= []
165178
ifencodingandself.inject_meta_charset:
@@ -195,27 +208,19 @@ def serialize(self, treewalker, encoding=None):
195208
doctype+=u" %s%s%s"% (quote_char,token["systemId"],quote_char)
196209

197210
doctype+=u">"
198-
199-
ifencoding:
200-
yielddoctype.encode(encoding)
201-
else:
202-
yielddoctype
211+
yieldself.encodeStrict(doctype)
203212

204213
eliftypein ("Characters","SpaceCharacters"):
205214
iftype=="SpaceCharacters"orin_cdata:
206215
ifin_cdataandtoken["data"].find("</")>=0:
207216
self.serializeError(_("Unexpected </ in CDATA"))
208-
ifencoding:
209-
yieldtoken["data"].encode(encoding,"strict")
210-
else:
211-
yieldtoken["data"]
212-
elifencoding:
213-
yieldencode(escape(token["data"]),encoding)
217+
yieldself.encode(token["data"])
214218
else:
215-
yieldescape(token["data"])
219+
yieldself.encode(escape(token["data"]))
216220

217221
eliftypein ("StartTag","EmptyTag"):
218222
name=token["name"]
223+
yieldself.encodeStrict(u"<%s"%name)
219224
ifnameinrcdataElementsandnotself.escape_rcdata:
220225
in_cdata=True
221226
elifin_cdata:
@@ -225,69 +230,56 @@ def serialize(self, treewalker, encoding=None):
225230
#TODO: Add namespace support here
226231
k=attr_name
227232
v=attr_value
228-
ifencoding:
229-
k=k.encode(encoding,"strict")
230-
attributes.append(' ')
233+
yieldself.encodeStrict(u' ')
231234

232-
attributes.append(k)
235+
yieldself.encodeStrict(k)
233236
ifnotself.minimize_boolean_attributesor \
234237
(knotinbooleanAttributes.get(name,tuple()) \
235238
andknotinbooleanAttributes.get("",tuple())):
236-
attributes.append("=")
239+
yieldself.encodeStrict(u"=")
237240
ifself.quote_attr_valuesornotv:
238241
quote_attr=True
239242
else:
240243
quote_attr=reduce(lambdax,y:xor (yinv),
241-
spaceCharacters+">\"'=",False)
242-
v=v.replace("&","&amp;")
243-
ifself.escape_lt_in_attrs:v=v.replace("<","&lt;")
244-
ifencoding:
245-
v=encode(v,encoding)
244+
spaceCharacters+u">\"'=",False)
245+
v=v.replace(u"&",u"&amp;")
246+
ifself.escape_lt_in_attrs:v=v.replace(u"<",u"&lt;")
246247
ifquote_attr:
247248
quote_char=self.quote_char
248249
ifself.use_best_quote_char:
249-
if"'"invand'"'notinv:
250-
quote_char='"'
251-
elif'"'invand"'"notinv:
252-
quote_char="'"
253-
ifquote_char=="'":
254-
v=v.replace("'","&#39;")
250+
ifu"'"invandu'"'notinv:
251+
quote_char=u'"'
252+
elifu'"'invandu"'"notinv:
253+
quote_char=u"'"
254+
ifquote_char==u"'":
255+
v=v.replace(u"'",u"&#39;")
255256
else:
256-
v=v.replace('"',"&quot;")
257-
attributes.append(quote_char)
258-
attributes.append(v)
259-
attributes.append(quote_char)
257+
v=v.replace(u'"',u"&quot;")
258+
yieldself.encodeStrict(quote_char)
259+
yieldself.encode(v)
260+
yieldself.encodeStrict(quote_char)
260261
else:
261-
attributes.append(v)
262+
yieldself.encode(v)
262263
ifnameinvoidElementsandself.use_trailing_solidus:
263264
ifself.space_before_trailing_solidus:
264-
attributes.append(" /")
265+
yieldself.encodeStrict(u" /")
265266
else:
266-
attributes.append("/")
267-
ifencoding:
268-
yield"<%s%s>"% (name.encode(encoding,"strict"),"".join(attributes))
269-
else:
270-
yieldu"<%s%s>"% (name,u"".join(attributes))
267+
yieldself.encodeStrict(u"/")
268+
yieldself.encode(u">")
271269

272270
eliftype=="EndTag":
273271
name=token["name"]
274272
ifnameinrcdataElements:
275273
in_cdata=False
276274
elifin_cdata:
277275
self.serializeError(_("Unexpected child element of a CDATA element"))
278-
end_tag=u"</%s>"%name
279-
ifencoding:
280-
end_tag=end_tag.encode(encoding,"strict")
281-
yieldend_tag
276+
yieldself.encodeStrict(u"</%s>"%name)
282277

283278
eliftype=="Comment":
284279
data=token["data"]
285280
ifdata.find("--")>=0:
286281
self.serializeError(_("Comment contains --"))
287-
comment=u"<!--%s-->"%token["data"]
288-
ifencoding:
289-
comment=comment.encode(encoding,unicode_encode_errors)
290-
yieldcomment
282+
yieldself.encodeStrict(u"<!--%s-->"%token["data"])
291283

292284
eliftype=="Entity":
293285
name=token["name"]
@@ -298,9 +290,7 @@ def serialize(self, treewalker, encoding=None):
298290
data=entities[key]
299291
else:
300292
data=u"&%s;"%name
301-
ifencoding:
302-
data=data.encode(encoding,unicode_encode_errors)
303-
yielddata
293+
yieldself.encodeStrict(data)
304294

305295
else:
306296
self.serializeError(token["data"])

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp