7
7
class HTMLSanitizerMixin (object ):
8
8
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
9
9
10
- acceptable_elements = ['a' ,'abbr' ,'acronym' ,'address' ,'area' ,'b' ,
11
- 'big' ,'blockquote' ,'br' ,'button' ,'caption' ,'center' ,'cite' ,
12
- 'code' ,'col' ,'colgroup' ,'dd' ,'del' ,'dfn' ,'dir' ,'div' ,'dl' ,'dt' ,
13
- 'em' ,'fieldset' ,'font' ,'form' ,'h1' ,'h2' ,'h3' ,'h4' ,'h5' ,'h6' ,
14
- 'hr' ,'i' ,'img' ,'input' ,'ins' ,'kbd' ,'label' ,'legend' ,'li' ,'map' ,
15
- 'menu' ,'ol' ,'optgroup' ,'option' ,'p' ,'pre' ,'q' ,'s' ,'samp' ,
16
- 'select' ,'small' ,'span' ,'strike' ,'strong' ,'sub' ,'sup' ,'table' ,
17
- 'tbody' ,'td' ,'textarea' ,'tfoot' ,'th' ,'thead' ,'tr' ,'tt' ,'u' ,
18
- 'ul' ,'var' ]
10
+ acceptable_elements = ['a' ,'abbr' ,'acronym' ,'address' ,'area' ,
11
+ 'article' ,'aside' ,'audio' ,'b' ,'big' ,'blockquote' ,'br' ,'button' ,
12
+ 'canvas' ,'caption' ,'center' ,'cite' ,'code' ,'col' ,'colgroup' ,
13
+ 'command' ,'datagrid' ,'datalist' ,'dd' ,'del' ,'details' ,'dfn' ,
14
+ 'dialog' ,'dir' ,'div' ,'dl' ,'dt' ,'em' ,'event-source' ,'fieldset' ,
15
+ 'figure' ,'footer' ,'font' ,'form' ,'header' ,'h1' ,'h2' ,'h3' ,'h4' ,
16
+ 'h5' ,'h6' ,'hr' ,'i' ,'img' ,'input' ,'ins' ,'keygen' ,'kbd' ,
17
+ 'label' ,'legend' ,'li' ,'m' ,'map' ,'menu' ,'meter' ,'multicol' ,
18
+ 'nav' ,'nextid' ,'ol' ,'output' ,'optgroup' ,'option' ,'p' ,'pre' ,
19
+ 'progress' ,'q' ,'s' ,'samp' ,'section' ,'select' ,'small' ,'sound' ,
20
+ 'source' ,'spacer' ,'span' ,'strike' ,'strong' ,'sub' ,'sup' ,'table' ,
21
+ 'tbody' ,'td' ,'textarea' ,'time' ,'tfoot' ,'th' ,'thead' ,'tr' ,'tt' ,
22
+ 'u' ,'ul' ,'var' ,'video' ]
19
23
20
24
mathml_elements = ['maction' ,'math' ,'merror' ,'mfrac' ,'mi' ,
21
25
'mmultiscripts' ,'mn' ,'mo' ,'mover' ,'mpadded' ,'mphantom' ,
@@ -24,24 +28,35 @@ class HTMLSanitizerMixin(object):
24
28
'munderover' ,'none' ]
25
29
26
30
svg_elements = ['a' ,'animate' ,'animateColor' ,'animateMotion' ,
27
- 'animateTransform' ,'circle ' ,'defs ' ,'desc ' ,'ellipse ' ,'font-face ' ,
28
- 'font-face- name' ,'font-face-src' ,'g' ,'glyph' ,'hkern' ,
31
+ 'animateTransform' ,'clipPath ' ,'circle ' ,'defs ' ,'desc ' ,'ellipse ' ,
32
+ 'font-face' , 'font-face- name' ,'font-face-src' ,'g' ,'glyph' ,'hkern' ,
29
33
'linearGradient' ,'line' ,'marker' ,'metadata' ,'missing-glyph' ,
30
34
'mpath' ,'path' ,'polygon' ,'polyline' ,'radialGradient' ,'rect' ,
31
35
'set' ,'stop' ,'svg' ,'switch' ,'text' ,'title' ,'tspan' ,'use' ]
32
36
33
37
acceptable_attributes = ['abbr' ,'accept' ,'accept-charset' ,'accesskey' ,
34
- 'action' ,'align' ,'alt' ,'axis' ,'border' ,'cellpadding' ,
35
- 'cellspacing' ,'char' ,'charoff' ,'charset' ,'checked' ,'cite' ,'class' ,
36
- 'clear' ,'cols' ,'colspan' ,'color' ,'compact' ,'coords' ,'datetime' ,
37
- 'dir' ,'disabled' ,'enctype' ,'for' ,'frame' ,'headers' ,'height' ,
38
- 'href' ,'hreflang' ,'hspace' ,'id' ,'ismap' ,'label' ,'lang' ,
39
- 'longdesc' ,'maxlength' ,'media' ,'method' ,'multiple' ,'name' ,
40
- 'nohref' ,'noshade' ,'nowrap' ,'prompt' ,'readonly' ,'rel' ,'rev' ,
41
- 'rows' ,'rowspan' ,'rules' ,'scope' ,'selected' ,'shape' ,'size' ,
42
- 'span' ,'src' ,'start' ,'style' ,'summary' ,'tabindex' ,'target' ,
43
- 'title' ,'type' ,'usemap' ,'valign' ,'value' ,'vspace' ,'width' ,
44
- 'xml:lang' ]
38
+ 'action' ,'align' ,'alt' ,'autocomplete' ,'autofocus' ,'axis' ,
39
+ 'background' ,'balance' ,'bgcolor' ,'bgproperties' ,'border' ,
40
+ 'bordercolor' ,'bordercolordark' ,'bordercolorlight' ,'bottompadding' ,
41
+ 'cellpadding' ,'cellspacing' ,'ch' ,'challenge' ,'char' ,'charoff' ,
42
+ 'choff' ,'charset' ,'checked' ,'cite' ,'class' ,'clear' ,'color' ,
43
+ 'cols' ,'colspan' ,'compact' ,'contenteditable' ,'controls' ,'coords' ,
44
+ 'data' ,'datafld' ,'datapagesize' ,'datasrc' ,'datetime' ,'default' ,
45
+ 'delay' ,'dir' ,'disabled' ,'draggable' ,'dynsrc' ,'enctype' ,'end' ,
46
+ 'face' ,'for' ,'form' ,'frame' ,'galleryimg' ,'gutter' ,'headers' ,
47
+ 'height' ,'hidefocus' ,'hidden' ,'high' ,'href' ,'hreflang' ,'hspace' ,
48
+ 'icon' ,'id' ,'inputmode' ,'ismap' ,'keytype' ,'label' ,'leftspacing' ,
49
+ 'lang' ,'list' ,'longdesc' ,'loop' ,'loopcount' ,'loopend' ,
50
+ 'loopstart' ,'low' ,'lowsrc' ,'max' ,'maxlength' ,'media' ,'method' ,
51
+ 'min' ,'multiple' ,'name' ,'nohref' ,'noshade' ,'nowrap' ,'open' ,
52
+ 'optimum' ,'pattern' ,'ping' ,'point-size' ,'prompt' ,'pqg' ,
53
+ 'radiogroup' ,'readonly' ,'rel' ,'repeat-max' ,'repeat-min' ,
54
+ 'replace' ,'required' ,'rev' ,'rightspacing' ,'rows' ,'rowspan' ,
55
+ 'rules' ,'scope' ,'selected' ,'shape' ,'size' ,'span' ,'src' ,'start' ,
56
+ 'step' ,'style' ,'summary' ,'suppress' ,'tabindex' ,'target' ,
57
+ 'template' ,'title' ,'toppadding' ,'type' ,'unselectable' ,'usemap' ,
58
+ 'urn' ,'valign' ,'value' ,'variable' ,'volume' ,'vspace' ,'vrml' ,
59
+ 'width' ,'wrap' ,'xml:lang' ]
45
60
46
61
mathml_attributes = ['actiontype' ,'align' ,'columnalign' ,'columnalign' ,
47
62
'columnalign' ,'columnlines' ,'columnspacing' ,'columnspan' ,'depth' ,
@@ -54,43 +69,45 @@ class HTMLSanitizerMixin(object):
54
69
'xlink:type' ,'xmlns' ,'xmlns:xlink' ]
55
70
56
71
svg_attributes = ['accent-height' ,'accumulate' ,'additive' ,'alphabetic' ,
57
- 'arabic-form' ,'ascent' ,'attributeName' ,'attributeType' ,
58
- 'baseProfile' ,'bbox' ,'begin' ,'by' ,'calcMode' ,'cap-height' ,
59
- 'class' ,'color' ,'color-rendering' ,'content' ,'cx' , 'cy' , 'd' , 'dx ' ,
60
- ' dy' ,'descent' ,'display' ,'dur' ,'end' ,'fill' , 'fill-opacity ' ,
61
- 'fill-rule ' ,'font-family ' ,'font-size ' ,'font-stretch' , 'font-style ' ,
62
- 'font-variant ' ,'font-weight ' ,'from ' ,'fx ' ,'fy' , 'g1' , 'g2 ' ,
63
- 'glyph-name ' ,'gradientUnits ' ,'hanging ' ,'height ' ,'horiz-adv-x ' ,
64
- 'horiz-origin -x' ,'id ' ,'ideographic ' ,'k ' ,'keyPoints ' ,
65
- 'keySplines' ,'keyTimes' ,'lang' ,'marker-end' , 'marker-mid ' ,
66
- 'marker-start ' ,'markerHeight ' ,'markerUnits ' ,'markerWidth ' ,
67
- 'mathematical' ,'max' ,'min' ,'name' ,'offset' , 'opacity' , 'orient ' ,
68
- 'origin ' ,'overline-position ' ,'overline-thickness ' ,'panose-1 ' ,
69
- 'path ' ,'pathLength ' ,'points ' ,'preserveAspectRatio ' ,'r' , 'refX ' ,
70
- 'refY ' ,'repeatCount ' ,'repeatDur ' ,'requiredExtensions ' ,
71
- 'requiredFeatures ' ,'restart ' ,'rotate ' ,'rx' , 'ry' , 'slope ' ,
72
- 'stemh ' ,'stemv ' ,'stop-color ' ,'stop-opacity ' ,
73
- 'strikethrough-position ' ,'strikethrough-thickness ' ,'stroke ' ,
74
- 'stroke-dasharray' ,'stroke-dashoffset' ,'stroke-linecap' ,
75
- 'stroke-linejoin' ,'stroke-miterlimit' ,'stroke-opacity' ,
76
- 'stroke-width' ,'systemLanguage' ,'target' ,'text-anchor' ,'to' ,
77
- 'transform' ,'type' ,'u1' ,'u2' ,'underline-position' ,
78
- 'underline-thickness' ,'unicode' ,'unicode-range' ,'units-per-em' ,
79
- 'values' ,'version' ,'viewBox' ,'visibility' ,'width' ,'widths' ,'x' ,
80
- 'x-height' ,'x1' ,'x2' ,'xlink:actuate' ,'xlink:arcrole' ,
81
- 'xlink:href' ,'xlink:role' ,'xlink:show' ,'xlink:title' ,
82
- 'xlink:type' , ' xml:base' ,'xml:lang' ,'xml:space' ,'xmlns' ,
83
- 'xmlns:xlink' , 'y' , 'y1' ,'y2' ,'zoomAndPan' ]
72
+ 'arabic-form' ,'ascent' ,'attributeName' ,'attributeType' ,
73
+ 'baseProfile' ,'bbox' ,'begin' ,'by' ,'calcMode' ,'cap-height' ,
74
+ 'class' ,'clip-path' , ' color' ,'color-rendering' ,'content' ,'cx' ,
75
+ 'cy' , 'd' , 'dx' , ' dy' ,'descent' ,'display' ,'dur' ,'end' ,'fill' ,
76
+ 'fill-opacity ' ,'fill-rule ' ,'font-family ' ,'font-size ' ,
77
+ 'font-stretch ' ,'font-style ' ,'font-variant ' ,'font-weight ' ,'from ' ,
78
+ 'fx ' ,'fy ' ,'g1 ' ,'g2 ' ,'glyph-name' , 'gradientUnits' , 'hanging ' ,
79
+ 'height' , 'horiz-adv -x' ,'horiz-origin-x ' ,'id ' ,'ideographic ' ,'k ' ,
80
+ 'keyPoints' , 'keySplines' ,'keyTimes' ,'lang' ,'marker-end' ,
81
+ 'marker-mid ' ,'marker-start ' ,'markerHeight ' ,'markerUnits ' ,
82
+ 'markerWidth' , 'mathematical' ,'max' ,'min' ,'name' ,'offset' ,
83
+ 'opacity ' ,'orient ' ,'origin ' ,'overline-position ' ,
84
+ 'overline-thickness ' ,'panose-1 ' ,'path ' ,'pathLength ' ,'points ' ,
85
+ 'preserveAspectRatio' , 'r ' ,'refX ' ,'refY ' ,'repeatCount ' ,
86
+ 'repeatDur ' ,'requiredExtensions ' ,'requiredFeatures ' ,'restart ' ,
87
+ 'rotate' , 'rx ' ,'ry ' ,'slope ' ,'stemh' , 'stemv' , ' stop-color ' ,
88
+ 'stop-opacity ' ,'strikethrough-position ' ,'strikethrough-thickness ' ,
89
+ 'stroke' , 'stroke-dasharray' ,'stroke-dashoffset' ,'stroke-linecap' ,
90
+ 'stroke-linejoin' ,'stroke-miterlimit' ,'stroke-opacity' ,
91
+ 'stroke-width' ,'systemLanguage' ,'target' ,'text-anchor' ,'to' ,
92
+ 'transform' ,'type' ,'u1' ,'u2' ,'underline-position' ,
93
+ 'underline-thickness' ,'unicode' ,'unicode-range' ,'units-per-em' ,
94
+ 'values' ,'version' ,'viewBox' ,'visibility' ,'width' ,'widths' ,'x' ,
95
+ 'x-height' ,'x1' ,'x2' ,'xlink:actuate' ,'xlink:arcrole' ,
96
+ 'xlink:href' ,'xlink:role' ,'xlink:show' ,'xlink:title' , 'xlink:type ' ,
97
+ ' xml:base' ,'xml:lang' ,'xml:space' ,'xmlns' , 'xmlns:xlink' , 'y ' ,
98
+ 'y1' ,'y2' ,'zoomAndPan' ]
84
99
85
100
attr_val_is_uri = ['href' ,'src' ,'cite' ,'action' ,'longdesc' ,
86
- 'xlink:href' ,'xml:base' ]
101
+ 'xlink:href' ,'xml:base' ]
87
102
88
103
svg_attr_val_allows_ref = ['clip-path' ,'color-profile' ,'cursor' ,'fill' ,
89
- 'filter' ,'marker' ,'marker-start' ,'marker-mid' ,'marker-end' ,'mask' ,'stroke' ]
104
+ 'filter' ,'marker' ,'marker-start' ,'marker-mid' ,'marker-end' ,
105
+ 'mask' ,'stroke' ]
90
106
91
- svg_allow_local_href = ['altGlyph' ,'animate' ,'animateColor' ,'animateMotion' ,
92
- 'animateTransform' ,'cursor' ,'feImage' ,'filter' ,'linearGradient' ,'pattern' ,
93
- 'radialGradient' ,'textpath' ,'tref' ,'set' ,'use' ]
107
+ svg_allow_local_href = ['altGlyph' ,'animate' ,'animateColor' ,
108
+ 'animateMotion' ,'animateTransform' ,'cursor' ,'feImage' ,'filter' ,
109
+ 'linearGradient' ,'pattern' ,'radialGradient' ,'textpath' ,'tref' ,
110
+ 'set' ,'use' ]
94
111
95
112
acceptable_css_properties = ['azimuth' ,'background-color' ,
96
113
'border-bottom-color' ,'border-collapse' ,'border-color' ,
@@ -140,7 +157,13 @@ class HTMLSanitizerMixin(object):
140
157
# sanitize_html('<a href="#"true">
141
158
# => <a>Click here for $100</a>
142
159
def sanitize_token (self ,token ):
143
- if token ["type" ]in (tokenTypes ["StartTag" ],tokenTypes ["EndTag" ],
160
+
161
+ # accommodate filters which use token_type differently
162
+ token_type = token ["type" ]
163
+ if token_type in tokenTypes .keys ():
164
+ token_type = tokenTypes [token_type ]
165
+
166
+ if token_type in (tokenTypes ["StartTag" ],tokenTypes ["EndTag" ],
144
167
tokenTypes ["EmptyTag" ]):
145
168
if token ["name" ]in self .allowed_elements :
146
169
if token .has_key ("data" ):
@@ -172,19 +195,24 @@ def sanitize_token(self, token):
172
195
token ["data" ]= [[name ,val ]for name ,val in attrs .items ()]
173
196
return token
174
197
else :
175
- if token [ "type" ] == tokenTypes ["EndTag" ]:
198
+ if token_type == tokenTypes ["EndTag" ]:
176
199
token ["data" ]= "</%s>" % token ["name" ]
177
200
elif token ["data" ]:
178
201
attrs = '' .join ([' %s="%s"' % (k ,escape (v ))for k ,v in token ["data" ]])
179
202
token ["data" ]= "<%s%s>" % (token ["name" ],attrs )
180
203
else :
181
204
token ["data" ]= "<%s>" % token ["name" ]
182
- if token [ "selfClosing" ] :
205
+ if token . get ( "selfClosing" ) :
183
206
token ["data" ]= token ["data" ][:- 1 ]+ "/>"
184
- token ["type" ]= tokenTypes ["Characters" ]
207
+
208
+ if token ["type" ]in tokenTypes .keys ():
209
+ token ["type" ]= "Characters"
210
+ else :
211
+ token ["type" ]= tokenTypes ["Characters" ]
212
+
185
213
del token ["name" ]
186
214
return token
187
- elif token [ "type" ] == tokenTypes ["Comment" ]:
215
+ elif token_type == tokenTypes ["Comment" ]:
188
216
pass
189
217
else :
190
218
return token