Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit221b909

Browse files
author
Mark Pilgrim
committed
refactored validator filter, added attribute tests for input element
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40968
1 parent14ee2b6 commit221b909

File tree

1 file changed

+238
-81
lines changed

1 file changed

+238
-81
lines changed

‎src/html5lib/filters/validator.py‎

Lines changed: 238 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -9,84 +9,196 @@
99
>>> p.parse('<!doctype html>\n<html foo=bar></html>')
1010
<<class 'html5lib.treebuilders.simpletree.Document'> None>
1111
>>> p.errors
12-
[((2, 14), 'unrecognized-attribute', {'attributeName': u'foo', 'tagName': u'html'})]
12+
[((2, 14), 'unknown-attribute', {'attributeName': u'foo', 'tagName': u'html'})]
1313
"""
1414

15+
try:
16+
frozenset
17+
exceptNameError:
18+
# Import from the sets module for python 2.3
19+
fromsetsimportSetasset
20+
fromsetsimportImmutableSetasfrozenset
1521
import_base
1622
fromhtml5lib.constantsimportE
1723
fromhtml5libimporttokenizer
1824
importgettext
1925
_=gettext.gettext
2026

2127
E.update({
22-
"unrecognized-attribute":
23-
_(u"Unrecognized attribute '%(attributeName)s' in <%(tagName)s>"),
28+
"unknown-start-tag":
29+
_(u"Unknown start tag <%(tagName)s'"),
30+
"unknown-attribute":
31+
_(u"Unknown '%(attributeName)s' attribute on <%(tagName)s>"),
2432
"missing-required-attribute":
25-
_(u"Missing required attribute '%(attributeName)s' in <%(tagName)s>"),
33+
_(u"Missing required '%(attributeName)s' attribute on <%(tagName)s>"),
34+
"unknown-input-type":
35+
_(u"Unknown value for input type: '%(inputType)s'"),
36+
"attribute-not-allowed-on-this-input-type":
37+
_(u"'%(attributeName)s' attribute is not allowed on <input type='%(inputType)s'>"),
2638
})
2739

28-
globalAttributes=['class','contenteditable','contextmenu','dir',
40+
globalAttributes=frozenset(('class','contenteditable','contextmenu','dir',
2941
'draggable','id','irrelevant','lang','ref','tabindex','template',
3042
'title','onabort','onbeforeunload','onblur','onchange','onclick',
3143
'oncontextmenu','ondblclick','ondrag','ondragend','ondragenter',
3244
'ondragleave','ondragover','ondragstart','ondrop','onerror',
3345
'onfocus','onkeydown','onkeypress','onkeyup','onload','onmessage',
3446
'onmousedown','onmousemove','onmouseout','onmouseover','onmouseup',
35-
'onmousewheel','onresize','onscroll','onselect','onsubmit','onunload']
47+
'onmousewheel','onresize','onscroll','onselect','onsubmit','onunload'))
3648
# XXX lang in HTML only, xml:lang in XHTML only
3749

3850
allowedAttributeMap= {
39-
'html': ['xmlns'],
40-
'base': ['href','target'],
41-
'link': ['href','rel','media','hreflang','type'],
42-
'meta': ['name','http-equiv','content','charset'],# XXX charset in HTML only
43-
'style': ['media','type','scoped'],
44-
'blockquote': ['cite'],
45-
'ol': ['start'],
46-
'li': ['value'],# XXX depends on parent
47-
'a': ['href','target','ping','rel','media','hreflang','type'],
48-
'q': ['cite'],
49-
'time': ['datetime'],
50-
'meter': ['value','min','low','high','max','optimum'],
51-
'progress': ['value','max'],
52-
'ins': ['cite','datetime'],
53-
'del': ['cite','datetime'],
54-
'img': ['alt','src','usemap','ismap','height','width'],# XXX ismap depends on parent
55-
'iframe': ['src'],
56-
'object': ['data','type','usemap','height','width'],
57-
'param': ['name','value'],
58-
'video': ['src','autoplay','start','loopstart','loopend','end',
59-
'loopcount','controls'],
60-
'audio': ['src','autoplay','start','loopstart','loopend','end',
61-
'loopcount','controls'],
62-
'source': ['src','type','media'],
63-
'canvas': ['height','width'],
64-
'area': ['alt','coords','shape','href','target','ping','rel',
65-
'media','hreflang','type'],
66-
'colgroup': ['span'],# XXX only if element contains no <col> elements
67-
'col': ['span'],
68-
'td': ['colspan','rowspan'],
69-
'th': ['colspan','rowspan','scope'],
70-
# XXX form elements
71-
'script': ['src','defer','async','type'],
72-
'event-source': ['src'],
73-
'details': ['open'],
74-
'datagrid': ['multiple','disabled'],
75-
'command': ['type','label','icon','hidden','disabled','checked',
76-
'radiogroup','default'],
77-
'menu': ['type','label','autosubmit'],
78-
'font': ['style']
51+
'html':frozenset(('xmlns',)),
52+
'head':frozenset(()),
53+
'title':frozenset(()),
54+
'base':frozenset(('href','target')),
55+
'link':frozenset(('href','rel','media','hreflang','type')),
56+
'meta':frozenset(('name','http-equiv','content','charset')),# XXX charset in HTML only
57+
'style':frozenset(('media','type','scoped')),
58+
'body':frozenset(()),
59+
'section':frozenset(()),
60+
'nav':frozenset(()),
61+
'article':frozenset(()),
62+
'blockquote':frozenset(('cite',)),
63+
'aside':frozenset(()),
64+
'h1':frozenset(()),
65+
'h2':frozenset(()),
66+
'h3':frozenset(()),
67+
'h4':frozenset(()),
68+
'h5':frozenset(()),
69+
'h6':frozenset(()),
70+
'header':frozenset(()),
71+
'footer':frozenset(()),
72+
'address':frozenset(()),
73+
'p':frozenset(()),
74+
'hr':frozenset(()),
75+
'br':frozenset(()),
76+
'dialog':frozenset(()),
77+
'pre':frozenset(()),
78+
'ol':frozenset(('start',)),
79+
'ul':frozenset(()),
80+
'li':frozenset(('value',)),# XXX depends on parent
81+
'dl':frozenset(()),
82+
'dt':frozenset(()),
83+
'dd':frozenset(()),
84+
'a':frozenset(('href','target','ping','rel','media','hreflang','type')),
85+
'q':frozenset(('cite',)),
86+
'cite':frozenset(()),
87+
'em':frozenset(()),
88+
'strong':frozenset(()),
89+
'small':frozenset(()),
90+
'm':frozenset(()),
91+
'dfn':frozenset(()),
92+
'abbr':frozenset(()),
93+
'time':frozenset(('datetime',)),
94+
'meter':frozenset(('value','min','low','high','max','optimum')),
95+
'progress':frozenset(('value','max')),
96+
'code':frozenset(()),
97+
'var':frozenset(()),
98+
'samp':frozenset(()),
99+
'kbd':frozenset(()),
100+
'sup':frozenset(()),
101+
'sub':frozenset(()),
102+
'span':frozenset(()),
103+
'i':frozenset(()),
104+
'b':frozenset(()),
105+
'bdo':frozenset(()),
106+
'ins':frozenset(('cite','datetime')),
107+
'del':frozenset(('cite','datetime')),
108+
'figure':frozenset(()),
109+
'img':frozenset(('alt','src','usemap','ismap','height','width')),# XXX ismap depends on parent
110+
'iframe':frozenset(('src',)),
111+
# <embed> handled separately
112+
'object':frozenset(('data','type','usemap','height','width')),
113+
'param':frozenset(('name','value')),
114+
'video':frozenset(('src','autoplay','start','loopstart','loopend','end',
115+
'loopcount','controls')),
116+
'audio':frozenset(('src','autoplay','start','loopstart','loopend','end',
117+
'loopcount','controls')),
118+
'source':frozenset(('src','type','media')),
119+
'canvas':frozenset(('height','width')),
120+
'map':frozenset(()),
121+
'area':frozenset(('alt','coords','shape','href','target','ping','rel',
122+
'media','hreflang','type')),
123+
'table':frozenset(()),
124+
'caption':frozenset(()),
125+
'colgroup':frozenset(('span',)),# XXX only if element contains no <col> elements
126+
'col':frozenset(('span',)),
127+
'tbody':frozenset(()),
128+
'thead':frozenset(()),
129+
'tfoot':frozenset(()),
130+
'tr':frozenset(()),
131+
'td':frozenset(('colspan','rowspan')),
132+
'th':frozenset(('colspan','rowspan','scope')),
133+
# 'form': frozenset(('action', 'method', 'enctype', 'accept', 'name', 'onsubmit',
134+
# 'onreset', 'accept-charset', 'data', 'replace')),
135+
# all possible <input> attributes are listed here but <input> is really handled separately
136+
'input':frozenset(('accept','accesskey','action','alt','autocomplete','autofocus','checked','disabled','enctype','form','inputmode','list','maxlength','method','min','max','name','pattern','step','readonly','replace','required','size','src','tabindex','target','template','value')),
137+
# 'button': frozenset(('name', 'value', 'type', 'disabled', 'form', 'autofocus')),
138+
# 'select': frozenset(('name', 'size', 'multiple', 'disabled', 'data', 'accesskey',
139+
# 'form', 'autofocus')),
140+
# 'optgroup': frozenset(('disabled', 'label', 'form', 'autofocus')),
141+
# 'option': frozenset(('selected', 'disabled', 'label', 'value', 'form', 'autofocus')),
142+
# 'textarea': frozenset(('name', 'rows', 'cols', 'disabled', 'readonly', 'required',
143+
# 'form', 'autofocus', 'wrap', 'accept')),
144+
# 'label': frozenset(('for', 'accesskey', 'form')),
145+
# 'fieldset': frozenset(('disabled', 'form')),
146+
# 'output': frozenset(('form', 'name', 'for', 'onforminput', 'onformchange')),
147+
# 'datalist': frozenset(('data')),
148+
# # XXX repetition model for repeating form controls
149+
'script':frozenset(('src','defer','async','type')),
150+
'noscript':frozenset(()),
151+
'noembed':frozenset(()),
152+
'event-source':frozenset(('src',)),
153+
'details':frozenset(('open',)),
154+
'datagrid':frozenset(('multiple','disabled')),
155+
'command':frozenset(('type','label','icon','hidden','disabled','checked',
156+
'radiogroup','default')),
157+
'menu':frozenset(('type','label','autosubmit')),
158+
'datatemplate':frozenset(()),
159+
'rule':frozenset(()),
160+
'nest':frozenset(()),
161+
'legend':frozenset(()),
162+
'div':frozenset(()),
163+
'font':frozenset(('style',)),
79164
}
80165

81166
requiredAttributeMap= {
82-
'link': ['href','rel'],
83-
'bdo': ['dir'],
84-
'img': ['src'],
85-
'embed': ['src'],
86-
'object': [],# XXX one of 'data' or 'type' is required
87-
'param': ['name','value'],
88-
'source': ['src'],
89-
'map': ['id'],
167+
'link':frozenset(('href','rel')),
168+
'bdo':frozenset(('dir',)),
169+
'img':frozenset(('src',)),
170+
'embed':frozenset(('src',)),
171+
'object':frozenset(()),# XXX one of 'data' or 'type' is required
172+
'param':frozenset(('name','value')),
173+
'source':frozenset(('src',)),
174+
'map':frozenset(('id',)),
175+
}
176+
177+
inputTypeAllowedAttributeMap= {
178+
'text':frozenset(('accesskey','autocomplete','autofocus','disabled','form','inputmode','list','maxlength','name','pattern','readonly','required','size','tabindex','value')),
179+
'password':frozenset(('accesskey','autocomplete','autofocus','disabled','form','inputmode','maxlength','name','pattern','readonly','required','size','tabindex','value')),
180+
'checkbox':frozenset(('accesskey','autofocus','checked','disabled','form','name','required','tabindex','value')),
181+
'radio':frozenset(('accesskey','autofocus','checked','disabled','form','name','required','tabindex','value')),
182+
'button':frozenset(('accesskey','autofocus','disabled','form','name','tabindex','value')),
183+
'submit':frozenset(('accesskey','action','autofocus','disabled','enctype','form','method','name','replace','tabindex','target','value')),
184+
'reset':frozenset(('accesskey','autofocus','disabled','form','name','tabindex','value')),
185+
'add':frozenset(('accesskey','autofocus','disabled','form','name','tabindex','template','value')),
186+
'remove':frozenset(('accesskey','autofocus','disabled','form','name','tabindex','value')),
187+
'move-up':frozenset(('accesskey','autofocus','disabled','form','name','tabindex','value')),
188+
'move-down':frozenset(('accesskey','autofocus','disabled','form','name','tabindex','value')),
189+
'file':frozenset(('accept','accesskey','autofocus','disabled','form','min','max','name','required','tabindex')),
190+
'hidden':frozenset(('disabled','form','name','value')),
191+
'image':frozenset(('accesskey','action','alt','autofocus','disabled','enctype','form','method','name','replace','src','tabindex','target')),
192+
'datetime':frozenset(('accesskey','autocomplete','autofocus','disabled','form','list','min','max','name','step','readonly','required','tabindex','value')),
193+
'datetime-local':frozenset(('accesskey','autocomplete','autofocus','disabled','form','list','min','max','name','step','readonly','required','tabindex','value')),
194+
'date':frozenset(('accesskey','autocomplete','autofocus','disabled','form','list','min','max','name','step','readonly','required','tabindex','value')),
195+
'month':frozenset(('accesskey','autocomplete','autofocus','disabled','form','list','min','max','name','step','readonly','required','tabindex','value')),
196+
'week':frozenset(('accesskey','autocomplete','autofocus','disabled','form','list','min','max','name','step','readonly','required','tabindex','value')),
197+
'time':frozenset(('accesskey','autocomplete','autofocus','disabled','form','list','min','max','name','step','readonly','required','tabindex','value')),
198+
'number':frozenset(('accesskey','autocomplete','autofocus','disabled','form','list','min','max','name','step','readonly','required','tabindex','value')),
199+
'range':frozenset(('accesskey','autocomplete','autofocus','disabled','form','list','min','max','name','step','readonly','required','tabindex','value')),
200+
'email':frozenset(('accesskey','autocomplete','autofocus','disabled','form','inputmode','list','maxlength','name','pattern','readonly','required','tabindex','value')),
201+
'url':frozenset(('accesskey','autocomplete','autofocus','disabled','form','inputmode','list','maxlength','name','pattern','readonly','required','tabindex','value')),
90202
}
91203

92204
classHTMLConformanceChecker(_base.Filter):
@@ -96,31 +208,76 @@ def __init__(self, stream, encoding, parseMeta, **kwargs):
96208

97209
def__iter__(self):
98210
fortokenin_base.Filter.__iter__(self):
99-
type=token["type"]
100-
iftype=="StartTag":
101-
name=token["name"].lower()
102-
ifname=='embed':
103-
# XXX spec says "any attributes w/o namespace"
104-
pass
105-
else:
106-
ifnameinallowedAttributeMap.keys():
107-
allowedAttributes=globalAttributes+ \
108-
allowedAttributeMap[name]
109-
else:
110-
allowedAttributes=globalAttributes
111-
forattrName,attrValueintoken["data"]:
112-
ifattrName.lower()notinallowedAttributes:
113-
yield {"type":"ParseError",
114-
"data":"unrecognized-attribute",
115-
"datavars": {"tagName":name,
116-
"attributeName":attrName}}
117-
ifnameinrequiredAttributeMap.keys():
118-
attrsPresent= [attrNameforattrName,attrValue
119-
intoken["data"]]
120-
forattrNameinrequiredAttributeMap[name]:
121-
ifattrNamenotinattrsPresent:
122-
yield {"type":"ParseError",
123-
"data":"missing-required-attribute",
124-
"datavars": {"tagName":name,
125-
"attributeName":attrName}}
211+
fakeToken= {"type":token.get("type","-"),
212+
"name":token.get("name","-").capitalize()}
213+
method=getattr(self,"validate%(type)s%(name)s"%fakeToken,None)
214+
ifmethod:
215+
fortinmethod(token)or []:yieldt
216+
else:
217+
method=getattr(self,"validate%(type)s"%fakeToken,None)
218+
ifmethod:
219+
fortinmethod(token)or []:yieldt
126220
yieldtoken
221+
222+
defvalidateStartTag(self,token):
223+
fortinself.checkUnknownStartTag(token)or []:yieldt
224+
fortinself.checkStartTagRequiredAttributes(token)or []:yieldt
225+
fortinself.checkStartTagUnknownAttributes(token)or []:yieldt
226+
227+
defvalidateStartTagEmbed(self,token):
228+
fortinself.checkStartTagRequiredAttributes(token)or []:yieldt
229+
# spec says "any attributes w/o namespace"
230+
# so don't call checkStartTagUnknownAttributes
231+
232+
defvalidateStartTagInput(self,token):
233+
attrDict=dict([(name.lower(),value)forname,valueintoken["data"]])
234+
inputType=attrDict.get("type","text")
235+
ifinputTypenotininputTypeAllowedAttributeMap.keys():
236+
yield {"type":"ParseError",
237+
"data":"unknown-input-type",
238+
"datavars": {"attrValue":inputType}}
239+
allowedAttributes=inputTypeAllowedAttributeMap.get(inputType, [])
240+
forattrName,attrValueinattrDict.items():
241+
ifattrNamenotinallowedAttributeMap['input']:
242+
yield {"type":"ParseError",
243+
"data":"unknown-attribute",
244+
"datavars": {"tagName":"input",
245+
"attributeName":attrName}}
246+
elifattrNamenotinallowedAttributes:
247+
yield {"type":"ParseError",
248+
"data":"attribute-not-allowed-on-this-input-type",
249+
"datavars": {"attributeName":attrName,
250+
"inputType":inputType}}
251+
252+
defcheckUnknownStartTag(self,token):
253+
# check for recognized tag name
254+
name=token["name"].lower()
255+
ifnamenotinallowedAttributeMap.keys():
256+
yield {"type":"ParseError",
257+
"data":"unknown-start-tag",
258+
"datavars": {"tagName":name}}
259+
260+
defcheckStartTagRequiredAttributes(self,token):
261+
# check for presence of required attributes
262+
name=token["name"].lower()
263+
ifnameinrequiredAttributeMap.keys():
264+
attrsPresent= [attrNameforattrName,attrValue
265+
intoken["data"]]
266+
forattrNameinrequiredAttributeMap[name]:
267+
ifattrNamenotinattrsPresent:
268+
yield {"type":"ParseError",
269+
"data":"missing-required-attribute",
270+
"datavars": {"tagName":name,
271+
"attributeName":attrName}}
272+
273+
defcheckStartTagUnknownAttributes(self,token):
274+
# check for recognized attribute names
275+
name=token["name"].lower()
276+
allowedAttributes=globalAttributes|allowedAttributeMap.get(name,frozenset(()))
277+
forattrName,attrValueintoken["data"]:
278+
ifattrName.lower()notinallowedAttributes:
279+
yield {"type":"ParseError",
280+
"data":"unknown-attribute",
281+
"datavars": {"tagName":name,
282+
"attributeName":attrName}}
283+

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp