1919from sets import Set as set
2020from sets import ImmutableSet as frozenset
2121import _base
22- from html5lib .constants import E
22+ from html5lib .constants import E , spaceCharacters
2323from html5lib import tokenizer
2424import gettext
2525_ = gettext .gettext
3737_ (u"'%(attributeName)s' attribute is not allowed on <input type=%(inputType)s>." ),
3838"deprecated-attribute" :
3939_ (u"'%(attributeName)s' attribute is deprecated on <%(tagName)s>." ),
40+ "invalid-class-attribute" :
41+ _ (u"Invalid class attribute value on <%(tagName)s>." ),
42+ "duplicate-value-in-token-list" :
43+ _ (u"Duplicate value '%(attributeValue)s' in token list in '%(attributeName)s' attribute on <%(tagName)s>." ),
4044})
4145
4246globalAttributes = frozenset (('class' ,'contenteditable' ,'contextmenu' ,'dir' ,
@@ -225,18 +229,35 @@ def __iter__(self):
225229for t in method (token )or []:yield t
226230yield token
227231
232+ def checkAttributeValues (self ,token ):
233+ tagName = token .get ("name" ,"" )
234+ fakeToken = {"tagName" :tagName .capitalize ()}
235+ for attrName ,attrValue in token .get ("data" , []):
236+ attrName = attrName .lower ()
237+ fakeToken ["attributeName" ]= attrName .capitalize ()
238+ method = getattr (self ,"validateAttributeValue%(tagName)s%(attributeName)s" % fakeToken ,None )
239+ if method :
240+ for t in method (token ,tagName ,attrName ,attrValue )or []:yield t
241+ else :
242+ method = getattr (self ,"validateAttributeValue%(attributeName)s" % fakeToken ,None )
243+ if method :
244+ for t in method (token ,tagName ,attrName ,attrValue )or []:yield t
245+
228246def validateStartTag (self ,token ):
229247for t in self .checkUnknownStartTag (token )or []:yield t
230248for t in self .checkStartTagRequiredAttributes (token )or []:yield t
231249for t in self .checkStartTagUnknownAttributes (token )or []:yield t
250+ for t in self .checkAttributeValues (token )or []:yield t
232251
233252def validateStartTagEmbed (self ,token ):
234253for t in self .checkStartTagRequiredAttributes (token )or []:yield t
254+ for t in self .checkAttributeValues (token )or []:yield t
235255# spec says "any attributes w/o namespace"
236256# so don't call checkStartTagUnknownAttributes
237257
238258def validateStartTagInput (self ,token ):
239- attrDict = dict ([(name .lower (),value )for name ,value in token ["data" ]])
259+ for t in self .checkAttributeValues (token )or []:yield t
260+ attrDict = dict ([(name .lower (),value )for name ,value in token .get ("data" , [])])
240261inputType = attrDict .get ("type" ,"text" )
241262if inputType not in inputTypeAllowedAttributeMap .keys ():
242263yield {"type" :"ParseError" ,
@@ -262,18 +283,18 @@ def validateStartTagInput(self, token):
262283
263284def checkUnknownStartTag (self ,token ):
264285# check for recognized tag name
265- name = token [ "name" ] .lower ()
286+ name = token . get ( "name" , "" ) .lower ()
266287if name not in allowedAttributeMap .keys ():
267288yield {"type" :"ParseError" ,
268289"data" :"unknown-start-tag" ,
269290"datavars" : {"tagName" :name }}
270291
271292def checkStartTagRequiredAttributes (self ,token ):
272293# check for presence of required attributes
273- name = token [ "name" ] .lower ()
294+ name = token . get ( "name" , "" ) .lower ()
274295if name in requiredAttributeMap .keys ():
275296attrsPresent = [attrName for attrName ,attrValue
276- in token [ "data" ] ]
297+ in token . get ( "data" , []) ]
277298for attrName in requiredAttributeMap [name ]:
278299if attrName not in attrsPresent :
279300yield {"type" :"ParseError" ,
@@ -283,12 +304,39 @@ def checkStartTagRequiredAttributes(self, token):
283304
284305def checkStartTagUnknownAttributes (self ,token ):
285306# check for recognized attribute names
286- name = token [ "name" ] .lower ()
307+ name = token . get ( "name" ) .lower ()
287308allowedAttributes = globalAttributes | allowedAttributeMap .get (name ,frozenset (()))
288- for attrName ,attrValue in token [ "data" ] :
309+ for attrName ,attrValue in token . get ( "data" , []) :
289310if attrName .lower ()not in allowedAttributes :
290311yield {"type" :"ParseError" ,
291312"data" :"unknown-attribute" ,
292313"datavars" : {"tagName" :name ,
293314"attributeName" :attrName }}
294315
316+ def validateAttributeValueClass (self ,token ,tagName ,attrName ,attrValue ):
317+ for t in self .checkTokenList (tagName ,attrName ,attrValue )or []:
318+ yield t
319+ yield {"type" :"ParseError" ,
320+ "data" :"invalid-class-attribute" ,
321+ "datavars" : {"tagName" :tagName }}
322+
323+ def checkTokenList (self ,tagName ,attrName ,attrValue ):
324+ # The "token" in the method name refers to tokens in an attribute value
325+ # i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
326+ # but the "token" parameter refers to the token generated from
327+ # HTMLTokenizer. Sorry for the confusion.
328+ valueList = []
329+ currentValue = ''
330+ for c in attrValue + ' ' :
331+ if c in spaceCharacters :
332+ if currentValue :
333+ if currentValue in valueList :
334+ yield {"type" :"ParseError" ,
335+ "data" :"duplicate-value-in-token-list" ,
336+ "datavars" : {"tagName" :tagName ,
337+ "attributeName" :attrName ,
338+ "attributeValue" :currentValue }}
339+ valueList .append (currentValue )
340+ currentValue = ''
341+ else :
342+ currentValue += c