77class HTMLSanitizerMixin (object ):
88""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
99
10- acceptable_elements = ['a' ,'abbr' ,'acronym' ,'address' ,'area' ,'b' ,
11- 'big' ,'blockquote' ,'br' ,'button' ,'caption' ,'center' ,'cite' ,
12- 'code' ,'col' ,'colgroup' ,'dd' ,'del' ,'dfn' ,'dir' ,'div' ,'dl' ,'dt' ,
13- 'em' ,'fieldset' ,'font' ,'form' ,'h1' ,'h2' ,'h3' ,'h4' ,'h5' ,'h6' ,
14- 'hr' ,'i' ,'img' ,'input' ,'ins' ,'kbd' ,'label' ,'legend' ,'li' ,'map' ,
15- 'menu' ,'ol' ,'optgroup' ,'option' ,'p' ,'pre' ,'q' ,'s' ,'samp' ,
16- 'select' ,'small' ,'span' ,'strike' ,'strong' ,'sub' ,'sup' ,'table' ,
17- 'tbody' ,'td' ,'textarea' ,'tfoot' ,'th' ,'thead' ,'tr' ,'tt' ,'u' ,
18- 'ul' ,'var' ]
10+ acceptable_elements = ['a' ,'abbr' ,'acronym' ,'address' ,'area' ,
11+ 'article' ,'aside' ,'audio' ,'b' ,'big' ,'blockquote' ,'br' ,'button' ,
12+ 'canvas' ,'caption' ,'center' ,'cite' ,'code' ,'col' ,'colgroup' ,
13+ 'command' ,'datagrid' ,'datalist' ,'dd' ,'del' ,'details' ,'dfn' ,
14+ 'dialog' ,'dir' ,'div' ,'dl' ,'dt' ,'em' ,'event-source' ,'fieldset' ,
15+ 'figure' ,'footer' ,'font' ,'form' ,'header' ,'h1' ,'h2' ,'h3' ,'h4' ,
16+ 'h5' ,'h6' ,'hr' ,'i' ,'img' ,'input' ,'ins' ,'keygen' ,'kbd' ,
17+ 'label' ,'legend' ,'li' ,'m' ,'map' ,'menu' ,'meter' ,'multicol' ,
18+ 'nav' ,'nextid' ,'ol' ,'output' ,'optgroup' ,'option' ,'p' ,'pre' ,
19+ 'progress' ,'q' ,'s' ,'samp' ,'section' ,'select' ,'small' ,'sound' ,
20+ 'source' ,'spacer' ,'span' ,'strike' ,'strong' ,'sub' ,'sup' ,'table' ,
21+ 'tbody' ,'td' ,'textarea' ,'time' ,'tfoot' ,'th' ,'thead' ,'tr' ,'tt' ,
22+ 'u' ,'ul' ,'var' ,'video' ]
1923
2024mathml_elements = ['maction' ,'math' ,'merror' ,'mfrac' ,'mi' ,
2125'mmultiscripts' ,'mn' ,'mo' ,'mover' ,'mpadded' ,'mphantom' ,
@@ -24,24 +28,35 @@ class HTMLSanitizerMixin(object):
2428'munderover' ,'none' ]
2529
2630svg_elements = ['a' ,'animate' ,'animateColor' ,'animateMotion' ,
27- 'animateTransform' ,'circle ' ,'defs ' ,'desc ' ,'ellipse ' ,'font-face ' ,
28- 'font-face- name' ,'font-face-src' ,'g' ,'glyph' ,'hkern' ,
31+ 'animateTransform' ,'clipPath ' ,'circle ' ,'defs ' ,'desc ' ,'ellipse ' ,
32+ 'font-face' , 'font-face- name' ,'font-face-src' ,'g' ,'glyph' ,'hkern' ,
2933'linearGradient' ,'line' ,'marker' ,'metadata' ,'missing-glyph' ,
3034'mpath' ,'path' ,'polygon' ,'polyline' ,'radialGradient' ,'rect' ,
3135'set' ,'stop' ,'svg' ,'switch' ,'text' ,'title' ,'tspan' ,'use' ]
3236
3337acceptable_attributes = ['abbr' ,'accept' ,'accept-charset' ,'accesskey' ,
34- 'action' ,'align' ,'alt' ,'axis' ,'border' ,'cellpadding' ,
35- 'cellspacing' ,'char' ,'charoff' ,'charset' ,'checked' ,'cite' ,'class' ,
36- 'clear' ,'cols' ,'colspan' ,'color' ,'compact' ,'coords' ,'datetime' ,
37- 'dir' ,'disabled' ,'enctype' ,'for' ,'frame' ,'headers' ,'height' ,
38- 'href' ,'hreflang' ,'hspace' ,'id' ,'ismap' ,'label' ,'lang' ,
39- 'longdesc' ,'maxlength' ,'media' ,'method' ,'multiple' ,'name' ,
40- 'nohref' ,'noshade' ,'nowrap' ,'prompt' ,'readonly' ,'rel' ,'rev' ,
41- 'rows' ,'rowspan' ,'rules' ,'scope' ,'selected' ,'shape' ,'size' ,
42- 'span' ,'src' ,'start' ,'style' ,'summary' ,'tabindex' ,'target' ,
43- 'title' ,'type' ,'usemap' ,'valign' ,'value' ,'vspace' ,'width' ,
44- 'xml:lang' ]
38+ 'action' ,'align' ,'alt' ,'autocomplete' ,'autofocus' ,'axis' ,
39+ 'background' ,'balance' ,'bgcolor' ,'bgproperties' ,'border' ,
40+ 'bordercolor' ,'bordercolordark' ,'bordercolorlight' ,'bottompadding' ,
41+ 'cellpadding' ,'cellspacing' ,'ch' ,'challenge' ,'char' ,'charoff' ,
42+ 'choff' ,'charset' ,'checked' ,'cite' ,'class' ,'clear' ,'color' ,
43+ 'cols' ,'colspan' ,'compact' ,'contenteditable' ,'controls' ,'coords' ,
44+ 'data' ,'datafld' ,'datapagesize' ,'datasrc' ,'datetime' ,'default' ,
45+ 'delay' ,'dir' ,'disabled' ,'draggable' ,'dynsrc' ,'enctype' ,'end' ,
46+ 'face' ,'for' ,'form' ,'frame' ,'galleryimg' ,'gutter' ,'headers' ,
47+ 'height' ,'hidefocus' ,'hidden' ,'high' ,'href' ,'hreflang' ,'hspace' ,
48+ 'icon' ,'id' ,'inputmode' ,'ismap' ,'keytype' ,'label' ,'leftspacing' ,
49+ 'lang' ,'list' ,'longdesc' ,'loop' ,'loopcount' ,'loopend' ,
50+ 'loopstart' ,'low' ,'lowsrc' ,'max' ,'maxlength' ,'media' ,'method' ,
51+ 'min' ,'multiple' ,'name' ,'nohref' ,'noshade' ,'nowrap' ,'open' ,
52+ 'optimum' ,'pattern' ,'ping' ,'point-size' ,'prompt' ,'pqg' ,
53+ 'radiogroup' ,'readonly' ,'rel' ,'repeat-max' ,'repeat-min' ,
54+ 'replace' ,'required' ,'rev' ,'rightspacing' ,'rows' ,'rowspan' ,
55+ 'rules' ,'scope' ,'selected' ,'shape' ,'size' ,'span' ,'src' ,'start' ,
56+ 'step' ,'style' ,'summary' ,'suppress' ,'tabindex' ,'target' ,
57+ 'template' ,'title' ,'toppadding' ,'type' ,'unselectable' ,'usemap' ,
58+ 'urn' ,'valign' ,'value' ,'variable' ,'volume' ,'vspace' ,'vrml' ,
59+ 'width' ,'wrap' ,'xml:lang' ]
4560
4661mathml_attributes = ['actiontype' ,'align' ,'columnalign' ,'columnalign' ,
4762'columnalign' ,'columnlines' ,'columnspacing' ,'columnspan' ,'depth' ,
@@ -54,43 +69,45 @@ class HTMLSanitizerMixin(object):
5469'xlink:type' ,'xmlns' ,'xmlns:xlink' ]
5570
5671svg_attributes = ['accent-height' ,'accumulate' ,'additive' ,'alphabetic' ,
57- 'arabic-form' ,'ascent' ,'attributeName' ,'attributeType' ,
58- 'baseProfile' ,'bbox' ,'begin' ,'by' ,'calcMode' ,'cap-height' ,
59- 'class' ,'color' ,'color-rendering' ,'content' ,'cx' , 'cy' , 'd' , 'dx ' ,
60- ' dy' ,'descent' ,'display' ,'dur' ,'end' ,'fill' , 'fill-opacity ' ,
61- 'fill-rule ' ,'font-family ' ,'font-size ' ,'font-stretch' , 'font-style ' ,
62- 'font-variant ' ,'font-weight ' ,'from ' ,'fx ' ,'fy' , 'g1' , 'g2 ' ,
63- 'glyph-name ' ,'gradientUnits ' ,'hanging ' ,'height ' ,'horiz-adv-x ' ,
64- 'horiz-origin -x' ,'id ' ,'ideographic ' ,'k ' ,'keyPoints ' ,
65- 'keySplines' ,'keyTimes' ,'lang' ,'marker-end' , 'marker-mid ' ,
66- 'marker-start ' ,'markerHeight ' ,'markerUnits ' ,'markerWidth ' ,
67- 'mathematical' ,'max' ,'min' ,'name' ,'offset' , 'opacity' , 'orient ' ,
68- 'origin ' ,'overline-position ' ,'overline-thickness ' ,'panose-1 ' ,
69- 'path ' ,'pathLength ' ,'points ' ,'preserveAspectRatio ' ,'r' , 'refX ' ,
70- 'refY ' ,'repeatCount ' ,'repeatDur ' ,'requiredExtensions ' ,
71- 'requiredFeatures ' ,'restart ' ,'rotate ' ,'rx' , 'ry' , 'slope ' ,
72- 'stemh ' ,'stemv ' ,'stop-color ' ,'stop-opacity ' ,
73- 'strikethrough-position ' ,'strikethrough-thickness ' ,'stroke ' ,
74- 'stroke-dasharray' ,'stroke-dashoffset' ,'stroke-linecap' ,
75- 'stroke-linejoin' ,'stroke-miterlimit' ,'stroke-opacity' ,
76- 'stroke-width' ,'systemLanguage' ,'target' ,'text-anchor' ,'to' ,
77- 'transform' ,'type' ,'u1' ,'u2' ,'underline-position' ,
78- 'underline-thickness' ,'unicode' ,'unicode-range' ,'units-per-em' ,
79- 'values' ,'version' ,'viewBox' ,'visibility' ,'width' ,'widths' ,'x' ,
80- 'x-height' ,'x1' ,'x2' ,'xlink:actuate' ,'xlink:arcrole' ,
81- 'xlink:href' ,'xlink:role' ,'xlink:show' ,'xlink:title' ,
82- 'xlink:type' , ' xml:base' ,'xml:lang' ,'xml:space' ,'xmlns' ,
83- 'xmlns:xlink' , 'y' , 'y1' ,'y2' ,'zoomAndPan' ]
72+ 'arabic-form' ,'ascent' ,'attributeName' ,'attributeType' ,
73+ 'baseProfile' ,'bbox' ,'begin' ,'by' ,'calcMode' ,'cap-height' ,
74+ 'class' ,'clip-path' , ' color' ,'color-rendering' ,'content' ,'cx' ,
75+ 'cy' , 'd' , 'dx' , ' dy' ,'descent' ,'display' ,'dur' ,'end' ,'fill' ,
76+ 'fill-opacity ' ,'fill-rule ' ,'font-family ' ,'font-size ' ,
77+ 'font-stretch ' ,'font-style ' ,'font-variant ' ,'font-weight ' ,'from ' ,
78+ 'fx ' ,'fy ' ,'g1 ' ,'g2 ' ,'glyph-name' , 'gradientUnits' , 'hanging ' ,
79+ 'height' , 'horiz-adv -x' ,'horiz-origin-x ' ,'id ' ,'ideographic ' ,'k ' ,
80+ 'keyPoints' , 'keySplines' ,'keyTimes' ,'lang' ,'marker-end' ,
81+ 'marker-mid ' ,'marker-start ' ,'markerHeight ' ,'markerUnits ' ,
82+ 'markerWidth' , 'mathematical' ,'max' ,'min' ,'name' ,'offset' ,
83+ 'opacity ' ,'orient ' ,'origin ' ,'overline-position ' ,
84+ 'overline-thickness ' ,'panose-1 ' ,'path ' ,'pathLength ' ,'points ' ,
85+ 'preserveAspectRatio' , 'r ' ,'refX ' ,'refY ' ,'repeatCount ' ,
86+ 'repeatDur ' ,'requiredExtensions ' ,'requiredFeatures ' ,'restart ' ,
87+ 'rotate' , 'rx ' ,'ry ' ,'slope ' ,'stemh' , 'stemv' , ' stop-color ' ,
88+ 'stop-opacity ' ,'strikethrough-position ' ,'strikethrough-thickness ' ,
89+ 'stroke' , 'stroke-dasharray' ,'stroke-dashoffset' ,'stroke-linecap' ,
90+ 'stroke-linejoin' ,'stroke-miterlimit' ,'stroke-opacity' ,
91+ 'stroke-width' ,'systemLanguage' ,'target' ,'text-anchor' ,'to' ,
92+ 'transform' ,'type' ,'u1' ,'u2' ,'underline-position' ,
93+ 'underline-thickness' ,'unicode' ,'unicode-range' ,'units-per-em' ,
94+ 'values' ,'version' ,'viewBox' ,'visibility' ,'width' ,'widths' ,'x' ,
95+ 'x-height' ,'x1' ,'x2' ,'xlink:actuate' ,'xlink:arcrole' ,
96+ 'xlink:href' ,'xlink:role' ,'xlink:show' ,'xlink:title' , 'xlink:type ' ,
97+ ' xml:base' ,'xml:lang' ,'xml:space' ,'xmlns' , 'xmlns:xlink' , 'y ' ,
98+ 'y1' ,'y2' ,'zoomAndPan' ]
8499
85100attr_val_is_uri = ['href' ,'src' ,'cite' ,'action' ,'longdesc' ,
86- 'xlink:href' ,'xml:base' ]
101+ 'xlink:href' ,'xml:base' ]
87102
88103svg_attr_val_allows_ref = ['clip-path' ,'color-profile' ,'cursor' ,'fill' ,
89- 'filter' ,'marker' ,'marker-start' ,'marker-mid' ,'marker-end' ,'mask' ,'stroke' ]
104+ 'filter' ,'marker' ,'marker-start' ,'marker-mid' ,'marker-end' ,
105+ 'mask' ,'stroke' ]
90106
91- svg_allow_local_href = ['altGlyph' ,'animate' ,'animateColor' ,'animateMotion' ,
92- 'animateTransform' ,'cursor' ,'feImage' ,'filter' ,'linearGradient' ,'pattern' ,
93- 'radialGradient' ,'textpath' ,'tref' ,'set' ,'use' ]
107+ svg_allow_local_href = ['altGlyph' ,'animate' ,'animateColor' ,
108+ 'animateMotion' ,'animateTransform' ,'cursor' ,'feImage' ,'filter' ,
109+ 'linearGradient' ,'pattern' ,'radialGradient' ,'textpath' ,'tref' ,
110+ 'set' ,'use' ]
94111
95112acceptable_css_properties = ['azimuth' ,'background-color' ,
96113'border-bottom-color' ,'border-collapse' ,'border-color' ,
@@ -140,7 +157,13 @@ class HTMLSanitizerMixin(object):
140157# sanitize_html('<a href="#"diff-834325e5b2a3ee73a1247e5fc782219c9ff9dd763438649df220c7a7d50cd2bf-141-158-0" data-selected="false" role="gridcell" tabindex="-1" valign="top">141
158# => <a>Click here for $100</a>
142159def sanitize_token (self ,token ):
143- if token ["type" ]in (tokenTypes ["StartTag" ],tokenTypes ["EndTag" ],
160+
161+ # accommodate filters which use token_type differently
162+ token_type = token ["type" ]
163+ if token_type in tokenTypes .keys ():
164+ token_type = tokenTypes [token_type ]
165+
166+ if token_type in (tokenTypes ["StartTag" ],tokenTypes ["EndTag" ],
144167tokenTypes ["EmptyTag" ]):
145168if token ["name" ]in self .allowed_elements :
146169if token .has_key ("data" ):
@@ -172,19 +195,24 @@ def sanitize_token(self, token):
172195token ["data" ]= [[name ,val ]for name ,val in attrs .items ()]
173196return token
174197else :
175- if token [ "type" ] == tokenTypes ["EndTag" ]:
198+ if token_type == tokenTypes ["EndTag" ]:
176199token ["data" ]= "</%s>" % token ["name" ]
177200elif token ["data" ]:
178201attrs = '' .join ([' %s="%s"' % (k ,escape (v ))for k ,v in token ["data" ]])
179202token ["data" ]= "<%s%s>" % (token ["name" ],attrs )
180203else :
181204token ["data" ]= "<%s>" % token ["name" ]
182- if token [ "selfClosing" ] :
205+ if token . get ( "selfClosing" ) :
183206token ["data" ]= token ["data" ][:- 1 ]+ "/>"
184- token ["type" ]= tokenTypes ["Characters" ]
207+
208+ if token ["type" ]in tokenTypes .keys ():
209+ token ["type" ]= "Characters"
210+ else :
211+ token ["type" ]= tokenTypes ["Characters" ]
212+
185213del token ["name" ]
186214return token
187- elif token [ "type" ] == tokenTypes ["Comment" ]:
215+ elif token_type == tokenTypes ["Comment" ]:
188216pass
189217else :
190218return token