@@ -168,57 +168,63 @@ def sanitize_token(self, token):
168
168
if token_type in (tokenTypes ["StartTag" ],tokenTypes ["EndTag" ],
169
169
tokenTypes ["EmptyTag" ]):
170
170
if token ["name" ]in self .allowed_elements :
171
- if "data" in token :
172
- attrs = dict ([(name ,val )for name ,val in
173
- token ["data" ][::- 1 ]
174
- if name in self .allowed_attributes ])
175
- for attr in self .attr_val_is_uri :
176
- if attr not in attrs :
177
- continue
178
- val_unescaped = re .sub ("[`\000 -\040 \177 -\240 \s]+" ,'' ,
179
- unescape (attrs [attr ])).lower ()
180
- #remove replacement characters from unescaped characters
181
- val_unescaped = val_unescaped .replace ("\ufffd " ,"" )
182
- if (re .match ("^[a-z0-9][-+.a-z0-9]*:" ,val_unescaped )and
183
- (val_unescaped .split (':' )[0 ]not in
184
- self .allowed_protocols )):
185
- del attrs [attr ]
186
- for attr in self .svg_attr_val_allows_ref :
187
- if attr in attrs :
188
- attrs [attr ]= re .sub (r'url\s*\(\s*[^#\s][^)]+?\)' ,
189
- ' ' ,
190
- unescape (attrs [attr ]))
191
- if (token ["name" ]in self .svg_allow_local_href and
192
- 'xlink:href' in attrs and re .search ('^\s*[^#\s].*' ,
193
- attrs ['xlink:href' ])):
194
- del attrs ['xlink:href' ]
195
- if 'style' in attrs :
196
- attrs ['style' ]= self .sanitize_css (attrs ['style' ])
197
- token ["data" ]= [[name ,val ]for name ,val in list (attrs .items ())]
198
- return token
171
+ return self .allowed_token (token ,token_type )
199
172
else :
200
- if token_type == tokenTypes ["EndTag" ]:
201
- token ["data" ]= "</%s>" % token ["name" ]
202
- elif token ["data" ]:
203
- attrs = '' .join ([' %s="%s"' % (k ,escape (v ))for k ,v in token ["data" ]])
204
- token ["data" ]= "<%s%s>" % (token ["name" ],attrs )
205
- else :
206
- token ["data" ]= "<%s>" % token ["name" ]
207
- if token .get ("selfClosing" ):
208
- token ["data" ]= token ["data" ][:- 1 ]+ "/>"
209
-
210
- if token ["type" ]in list (tokenTypes .keys ()):
211
- token ["type" ]= "Characters"
212
- else :
213
- token ["type" ]= tokenTypes ["Characters" ]
214
-
215
- del token ["name" ]
216
- return token
173
+ return self .disallowed_token (token ,token_type )
217
174
elif token_type == tokenTypes ["Comment" ]:
218
175
pass
219
176
else :
220
177
return token
221
178
179
+ def allowed_token (self ,token ,token_type ):
180
+ if "data" in token :
181
+ attrs = dict ([(name ,val )for name ,val in
182
+ token ["data" ][::- 1 ]
183
+ if name in self .allowed_attributes ])
184
+ for attr in self .attr_val_is_uri :
185
+ if attr not in attrs :
186
+ continue
187
+ val_unescaped = re .sub ("[`\000 -\040 \177 -\240 \s]+" ,'' ,
188
+ unescape (attrs [attr ])).lower ()
189
+ #remove replacement characters from unescaped characters
190
+ val_unescaped = val_unescaped .replace ("\ufffd " ,"" )
191
+ if (re .match ("^[a-z0-9][-+.a-z0-9]*:" ,val_unescaped )and
192
+ (val_unescaped .split (':' )[0 ]not in
193
+ self .allowed_protocols )):
194
+ del attrs [attr ]
195
+ for attr in self .svg_attr_val_allows_ref :
196
+ if attr in attrs :
197
+ attrs [attr ]= re .sub (r'url\s*\(\s*[^#\s][^)]+?\)' ,
198
+ ' ' ,
199
+ unescape (attrs [attr ]))
200
+ if (token ["name" ]in self .svg_allow_local_href and
201
+ 'xlink:href' in attrs and re .search ('^\s*[^#\s].*' ,
202
+ attrs ['xlink:href' ])):
203
+ del attrs ['xlink:href' ]
204
+ if 'style' in attrs :
205
+ attrs ['style' ]= self .sanitize_css (attrs ['style' ])
206
+ token ["data" ]= [[name ,val ]for name ,val in list (attrs .items ())]
207
+ return token
208
+
209
+ def disallowed_token (self ,token ,token_type ):
210
+ if token_type == tokenTypes ["EndTag" ]:
211
+ token ["data" ]= "</%s>" % token ["name" ]
212
+ elif token ["data" ]:
213
+ attrs = '' .join ([' %s="%s"' % (k ,escape (v ))for k ,v in token ["data" ]])
214
+ token ["data" ]= "<%s%s>" % (token ["name" ],attrs )
215
+ else :
216
+ token ["data" ]= "<%s>" % token ["name" ]
217
+ if token .get ("selfClosing" ):
218
+ token ["data" ]= token ["data" ][:- 1 ]+ "/>"
219
+
220
+ if token ["type" ]in list (tokenTypes .keys ()):
221
+ token ["type" ]= "Characters"
222
+ else :
223
+ token ["type" ]= tokenTypes ["Characters" ]
224
+
225
+ del token ["name" ]
226
+ return token
227
+
222
228
def sanitize_css (self ,style ):
223
229
# disallow urls
224
230
style = re .compile ('url\s*\(\s*[^\s)]+?\s*\)\s*' ).sub (' ' ,style )