|
8 | 8 | from .constantsimporttokenTypes |
9 | 9 |
|
10 | 10 |
|
| 11 | +content_type_rgx=re.compile(r''' |
| 12 | + ^ |
| 13 | + # Match a content type <application>/<type> |
| 14 | + (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) |
| 15 | + # Match any character set and encoding |
| 16 | + # Note that this does not prevent the |
| 17 | + # same one being set twice |
| 18 | + # The charset group is currently unused |
| 19 | + (?:;charset=(?P<charset>[-a-zA-Z0-9]+)|;(?P<encoding>base64)){0,2} |
| 20 | + # Match the base64-encoded or urlencoded |
| 21 | + # data |
| 22 | + # The data group is currently unused |
| 23 | + (?P<data>,(?P<base64_encoded_data>[a-zA-Z0-9+/]+=*|(?P<url_encoded_data>[a-zA-Z0-9]+|%[a-fA-F0-9]{2}))) |
| 24 | + $ |
| 25 | + ''', |
| 26 | +re.VERBOSE) |
| 27 | + |
| 28 | + |
11 | 29 | classHTMLSanitizerMixin(object): |
12 | 30 | """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" |
13 | 31 |
|
@@ -197,24 +215,8 @@ def allowed_token(self, token, token_type): |
197 | 215 | ifuri: |
198 | 216 | ifuri.schemenotinself.allowed_protocols: |
199 | 217 | delattrs[attr] |
200 | | -rgx=re.compile(r''' |
201 | | - ^ |
202 | | - # Match a content type <application>/<type> |
203 | | - (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) |
204 | | - # Match any character set and encoding |
205 | | - # Note that this does not prevent the |
206 | | - # same one being set twice |
207 | | - # The charset group is currently unused |
208 | | - (?:;charset=(?P<charset>[-a-zA-Z0-9]+)|;(?P<encoding>base64)){0,2} |
209 | | - # Match the base64-encoded or urlencoded |
210 | | - # data |
211 | | - # The data group is currently unused |
212 | | - (?P<data>,(?P<base64_encoded_data>[a-zA-Z0-9+/]+=*|(?P<url_encoded_data>[a-zA-Z0-9]+|%[a-fA-F0-9]{2}))) |
213 | | - $ |
214 | | - ''', |
215 | | -re.VERBOSE) |
216 | 218 | ifuri.scheme=='data': |
217 | | -m=rgx.match(uri.path) |
| 219 | +m=content_type_rgx.match(uri.path) |
218 | 220 | ifnotm: |
219 | 221 | delattrs[attr] |
220 | 222 | ifm.group('content_type')notinself.allowed_content_types: |
|