|
1 | 1 | from __future__importabsolute_import,division,unicode_literals |
2 | 2 | fromsiximporttext_type |
3 | 3 |
|
4 | | -try: |
5 | | -fromfunctoolsimportreduce |
6 | | -exceptImportError: |
7 | | -pass |
| 4 | +importre |
8 | 5 |
|
9 | 6 | from ..constantsimportvoidElements,booleanAttributes,spaceCharacters |
10 | 7 | from ..constantsimportrcdataElements,entities,xmlEntities |
|
13 | 10 |
|
14 | 11 | spaceCharacters="".join(spaceCharacters) |
15 | 12 |
|
| 13 | +quoteAttributeSpecChars=spaceCharacters+"\"'=<>`" |
| 14 | +quoteAttributeSpec=re.compile("["+quoteAttributeSpecChars+"]") |
| 15 | +quoteAttributeLegacy=re.compile("["+quoteAttributeSpecChars+ |
| 16 | +"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" |
| 17 | +"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" |
| 18 | +"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" |
| 19 | +"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" |
| 20 | +"\u2001\u2002\u2003\u2004\u2005\u2006\u2007" |
| 21 | +"\u2008\u2009\u200a\u2028\u2029\u202f\u205f" |
| 22 | +"\u3000]") |
| 23 | + |
16 | 24 | try: |
17 | 25 | fromcodecsimportregister_error,xmlcharrefreplace_errors |
18 | 26 | exceptImportError: |
@@ -73,7 +81,7 @@ def htmlentityreplace_errors(exc): |
73 | 81 | classHTMLSerializer(object): |
74 | 82 |
|
75 | 83 | # attribute quoting options |
76 | | -quote_attr_values=False |
| 84 | +quote_attr_values="legacy"# be secure by default |
77 | 85 | quote_char='"' |
78 | 86 | use_best_quote_char=True |
79 | 87 |
|
@@ -109,9 +117,9 @@ def __init__(self, **kwargs): |
109 | 117 | inject_meta_charset=True|False |
110 | 118 | Whether it insert a meta element to define the character set of the |
111 | 119 | document. |
112 | | - quote_attr_values=True|False |
| 120 | + quote_attr_values="legacy"|"spec"|"always" |
113 | 121 | Whether to quote attribute values that don't require quoting |
114 | | - perHTML5 parsing rules. |
| 122 | + perlegacy browser behaviour, when required by the standard, or always. |
115 | 123 | quote_char=u'"'|u"'" |
116 | 124 | Use given quote character for attribute quoting. Default is to |
117 | 125 | use double quote unless attribute value contains a double quote, |
@@ -240,11 +248,15 @@ def serialize(self, treewalker, encoding=None): |
240 | 248 | (knotinbooleanAttributes.get(name,tuple())and |
241 | 249 | knotinbooleanAttributes.get("",tuple())): |
242 | 250 | yieldself.encodeStrict("=") |
243 | | -ifself.quote_attr_valuesornotv: |
| 251 | +ifself.quote_attr_values=="always"orlen(v)==0: |
244 | 252 | quote_attr=True |
| 253 | +elifself.quote_attr_values=="spec": |
| 254 | +quote_attr=quoteAttributeSpec.search(v)isnotNone |
| 255 | +elifself.quote_attr_values=="legacy": |
| 256 | +quote_attr=quoteAttributeLegacy.search(v)isnotNone |
245 | 257 | else: |
246 | | -quote_attr=reduce(lambdax,y:xor (yinv), |
247 | | -spaceCharacters+">\"'=",False) |
| 258 | +raiseValueError("quote_attr_values must be one of: " |
| 259 | +"'always', 'spec', or 'legacy'") |
248 | 260 | v=v.replace("&","&") |
249 | 261 | ifself.escape_lt_in_attrs: |
250 | 262 | v=v.replace("<","<") |
|