Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9b8d8eb

Browse files
committed
Fix#11,#12: quote attributes that need escaping in legacy browsers
These are mostly out of the market now, so this isn't massivelyneeded any more; nevertheless, avoiding XSS as much as possible isinevitably desirable.This alters the API so that quote_attr_values is now a ternarysetting, choosing between legacy-safe behaviour, spec behaviour, andalways quoting.
1 parent4768c64 commit9b8d8eb

File tree

5 files changed

+132
-11
lines changed

5 files changed

+132
-11
lines changed

‎CHANGES.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ Released on XXX
3333
* **Use scripting disabled by default (as we don't implement
3434
scripting).**
3535

36+
* **Fix #11, avoiding the XSS bug potentially caused by serializer
37+
allowing attribute values to be escaped out of in old browser versions,
38+
changing the quote_attr_values option on serializer to take one of
39+
three values, "always" (the old True value), "legacy" (the new option,
40+
and the new default), and "spec" (the old False value, and the old
41+
default).**
42+
3643

3744
0.9999999/1.0b8
3845
~~~~~~~~~~~~~~~

‎html5lib/serializer/htmlserializer.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,16 @@
1010

1111
spaceCharacters="".join(spaceCharacters)
1212

13-
quoteAttributeSpec=re.compile("["+spaceCharacters+"\"'=<>`]")
13+
quoteAttributeSpecChars=spaceCharacters+"\"'=<>`"
14+
quoteAttributeSpec=re.compile("["+quoteAttributeSpecChars+"]")
15+
quoteAttributeLegacy=re.compile("["+quoteAttributeSpecChars+
16+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
17+
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
18+
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
19+
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
20+
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
21+
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
22+
"\u3000]")
1423

1524
try:
1625
fromcodecsimportregister_error,xmlcharrefreplace_errors
@@ -72,7 +81,7 @@ def htmlentityreplace_errors(exc):
7281
classHTMLSerializer(object):
7382

7483
# attribute quoting options
75-
quote_attr_values=False
84+
quote_attr_values="legacy"# be secure by default
7685
quote_char='"'
7786
use_best_quote_char=True
7887

@@ -108,9 +117,9 @@ def __init__(self, **kwargs):
108117
inject_meta_charset=True|False
109118
Whether it insert a meta element to define the character set of the
110119
document.
111-
quote_attr_values=True|False
120+
quote_attr_values="legacy"|"spec"|"always"
112121
Whether to quote attribute values that don't require quoting
113-
perHTML5 parsing rules.
122+
perlegacy browser behaviour, when required by the standard, or always.
114123
quote_char=u'"'|u"'"
115124
Use given quote character for attribute quoting. Default is to
116125
use double quote unless attribute value contains a double quote,
@@ -239,10 +248,15 @@ def serialize(self, treewalker, encoding=None):
239248
(knotinbooleanAttributes.get(name,tuple())and
240249
knotinbooleanAttributes.get("",tuple())):
241250
yieldself.encodeStrict("=")
242-
ifself.quote_attr_values:
251+
ifself.quote_attr_values=="always"orlen(v)==0:
243252
quote_attr=True
253+
elifself.quote_attr_values=="spec":
254+
quote_attr=quoteAttributeSpec.search(v)isnotNone
255+
elifself.quote_attr_values=="legacy":
256+
quote_attr=quoteAttributeLegacy.search(v)isnotNone
244257
else:
245-
quote_attr=len(v)==0orquoteAttributeSpec.search(v)
258+
raiseValueError("quote_attr_values must be one of: "
259+
"'always', 'spec', or 'legacy'")
246260
v=v.replace("&","&amp;")
247261
ifself.escape_lt_in_attrs:
248262
v=v.replace("<","&lt;")

‎html5lib/tests/serializer-testdata/core.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@
242242
},
243243
{
244244
"expected": [
245-
"<span title=foo\u000bbar>"
245+
"<span title=\"foo\u000bbar\">"
246246
],
247247
"input": [
248248
[

‎html5lib/tests/serializer-testdata/options.test

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
]
4242
]
4343
],
44-
"description": "quote_attr_values=true",
44+
"description": "quote_attr_values='always'",
4545
"options": {
46-
"quote_attr_values":true
46+
"quote_attr_values":"always"
4747
}
4848
},
4949
{
@@ -64,9 +64,78 @@
6464
]
6565
]
6666
],
67-
"description": "quote_attr_values=true with irrelevant",
67+
"description": "quote_attr_values='always' with irrelevant",
6868
"options": {
69-
"quote_attr_values": true
69+
"quote_attr_values": "always"
70+
}
71+
},
72+
{
73+
"expected": [
74+
"<div class=\"foo\">"
75+
],
76+
"input": [
77+
[
78+
"StartTag",
79+
"http://www.w3.org/1999/xhtml",
80+
"div",
81+
[
82+
{
83+
"namespace": null,
84+
"name": "class",
85+
"value": "foo"
86+
}
87+
]
88+
]
89+
],
90+
"description": "non-minimized quote_attr_values='always'",
91+
"options": {
92+
"quote_attr_values": "always"
93+
}
94+
},
95+
{
96+
"expected": [
97+
"<div class=foo>"
98+
],
99+
"input": [
100+
[
101+
"StartTag",
102+
"http://www.w3.org/1999/xhtml",
103+
"div",
104+
[
105+
{
106+
"namespace": null,
107+
"name": "class",
108+
"value": "foo"
109+
}
110+
]
111+
]
112+
],
113+
"description": "non-minimized quote_attr_values='legacy'",
114+
"options": {
115+
"quote_attr_values": "legacy"
116+
}
117+
},
118+
{
119+
"expected": [
120+
"<div class=foo>"
121+
],
122+
"input": [
123+
[
124+
"StartTag",
125+
"http://www.w3.org/1999/xhtml",
126+
"div",
127+
[
128+
{
129+
"namespace": null,
130+
"name": "class",
131+
"value": "foo"
132+
}
133+
]
134+
]
135+
],
136+
"description": "non-minimized quote_attr_values='spec'",
137+
"options": {
138+
"quote_attr_values": "spec"
70139
}
71140
},
72141
{

‎html5lib/tests/test_serializer.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,37 @@ def testComment():
146146
throwsWithLatin1([["Comment","\u0101"]])
147147

148148

149+
@pytest.mark.parametrize("c",list("\t\n\u000C\x20\r\"'=<>`"))
150+
deftestSpecQuoteAttribute(c):
151+
input_= [["StartTag","http://www.w3.org/1999/xhtml","span",
152+
[{"namespace":None,"name":"foo","value":c}]]]
153+
ifc=='"':
154+
output_= ["<span foo='%s'>"%c]
155+
else:
156+
output_= ['<span foo="%s">'%c]
157+
options_= {"quote_attr_values":"spec"}
158+
runSerializerTest(input_,output_,options_)
159+
160+
161+
@pytest.mark.parametrize("c",list("\t\n\u000C\x20\r\"'=<>`"
162+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
163+
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
164+
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
165+
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
166+
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
167+
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
168+
"\u3000"))
169+
deftestLegacyQuoteAttribute(c):
170+
input_= [["StartTag","http://www.w3.org/1999/xhtml","span",
171+
[{"namespace":None,"name":"foo","value":c}]]]
172+
ifc=='"':
173+
output_= ["<span foo='%s'>"%c]
174+
else:
175+
output_= ['<span foo="%s">'%c]
176+
options_= {"quote_attr_values":"legacy"}
177+
runSerializerTest(input_,output_,options_)
178+
179+
149180
@pytest.fixture
150181
deflxml_parser():
151182
returnetree.XMLParser(resolve_entities=False)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp