Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf6741ea

Browse files
committed
Merge pull request#95 from gsnedders/escape-characters-serializer
Fix#11 by escaping enough to be safe in legacy browsers; r=nobody!
2 parentsb48d0c1 +9b8d8eb commitf6741ea

File tree

10 files changed

+4597
-74
lines changed

10 files changed

+4597
-74
lines changed

‎CHANGES.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ Released on XXX
3333
* **Use scripting disabled by default (as we don't implement
3434
scripting).**
3535

36+
* **Fix #11, avoiding the XSS bug potentially caused by serializer
37+
allowing attribute values to be escaped out of in old browser versions,
38+
changing the quote_attr_values option on serializer to take one of
39+
three values, "always" (the old True value), "legacy" (the new option,
40+
and the new default), and "spec" (the old False value, and the old
41+
default).**
42+
3643

3744
0.9999999/1.0b8
3845
~~~~~~~~~~~~~~~

‎html5lib/filters/lint.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010

1111

1212
classFilter(_base.Filter):
13+
def__init__(self,source,require_matching_tags=True):
14+
super(Filter,self).__init__(source)
15+
self.require_matching_tags=require_matching_tags
16+
1317
def__iter__(self):
1418
open_elements= []
1519
fortokenin_base.Filter.__iter__(self):
@@ -26,7 +30,7 @@ def __iter__(self):
2630
asserttype=="EmptyTag"
2731
else:
2832
asserttype=="StartTag"
29-
iftype=="StartTag":
33+
iftype=="StartTag"andself.require_matching_tags:
3034
open_elements.append((namespace,name))
3135
for (namespace,name),valueintoken["data"].items():
3236
assertnamespaceisNoneorisinstance(namespace,text_type)
@@ -44,7 +48,7 @@ def __iter__(self):
4448
assertname!=""
4549
if (notnamespaceornamespace==namespaces["html"])andnameinvoidElements:
4650
assertFalse,"Void element reported as EndTag token: %(tag)s"% {"tag":name}
47-
else:
51+
elifself.require_matching_tags:
4852
start=open_elements.pop()
4953
assertstart== (namespace,name)
5054

‎html5lib/serializer/htmlserializer.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
from __future__importabsolute_import,division,unicode_literals
22
fromsiximporttext_type
33

4-
try:
5-
fromfunctoolsimportreduce
6-
exceptImportError:
7-
pass
4+
importre
85

96
from ..constantsimportvoidElements,booleanAttributes,spaceCharacters
107
from ..constantsimportrcdataElements,entities,xmlEntities
@@ -13,6 +10,17 @@
1310

1411
spaceCharacters="".join(spaceCharacters)
1512

13+
quoteAttributeSpecChars=spaceCharacters+"\"'=<>`"
14+
quoteAttributeSpec=re.compile("["+quoteAttributeSpecChars+"]")
15+
quoteAttributeLegacy=re.compile("["+quoteAttributeSpecChars+
16+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
17+
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
18+
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
19+
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
20+
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
21+
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
22+
"\u3000]")
23+
1624
try:
1725
fromcodecsimportregister_error,xmlcharrefreplace_errors
1826
exceptImportError:
@@ -73,7 +81,7 @@ def htmlentityreplace_errors(exc):
7381
classHTMLSerializer(object):
7482

7583
# attribute quoting options
76-
quote_attr_values=False
84+
quote_attr_values="legacy"# be secure by default
7785
quote_char='"'
7886
use_best_quote_char=True
7987

@@ -109,9 +117,9 @@ def __init__(self, **kwargs):
109117
inject_meta_charset=True|False
110118
Whether it insert a meta element to define the character set of the
111119
document.
112-
quote_attr_values=True|False
120+
quote_attr_values="legacy"|"spec"|"always"
113121
Whether to quote attribute values that don't require quoting
114-
perHTML5 parsing rules.
122+
perlegacy browser behaviour, when required by the standard, or always.
115123
quote_char=u'"'|u"'"
116124
Use given quote character for attribute quoting. Default is to
117125
use double quote unless attribute value contains a double quote,
@@ -240,11 +248,15 @@ def serialize(self, treewalker, encoding=None):
240248
(knotinbooleanAttributes.get(name,tuple())and
241249
knotinbooleanAttributes.get("",tuple())):
242250
yieldself.encodeStrict("=")
243-
ifself.quote_attr_valuesornotv:
251+
ifself.quote_attr_values=="always"orlen(v)==0:
244252
quote_attr=True
253+
elifself.quote_attr_values=="spec":
254+
quote_attr=quoteAttributeSpec.search(v)isnotNone
255+
elifself.quote_attr_values=="legacy":
256+
quote_attr=quoteAttributeLegacy.search(v)isnotNone
245257
else:
246-
quote_attr=reduce(lambdax,y:xor (yinv),
247-
spaceCharacters+">\"'=",False)
258+
raiseValueError("quote_attr_values must be one of: "
259+
"'always', 'spec', or 'legacy'")
248260
v=v.replace("&","&amp;")
249261
ifself.escape_lt_in_attrs:
250262
v=v.replace("<","&lt;")

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp