2222import iso639codes
2323import rfc3987
2424import rfc2046
25- from html5lib .constants import E ,spaceCharacters ,digits
25+ from html5lib .constants import E ,spaceCharacters ,digits , tokenTypes
2626from html5lib import tokenizer
2727import gettext
2828_ = gettext .gettext
@@ -267,8 +267,9 @@ def __init__(self, stream, encoding, parseMeta, **kwargs):
267267self .IDsWeHaveKnownAndLoved = []
268268
269269def __iter__ (self ):
270+ types = dict ((v ,k )for k ,v in tokenTypes .iteritems ())
270271for token in _base .Filter .__iter__ (self ):
271- fakeToken = {"type" :token .get ("type" ,"-" ),
272+ fakeToken = {"type" :types . get ( token .get ("type" , "-" ) ,"-" ),
272273"name" :token .get ("name" ,"-" ).capitalize ()}
273274method = getattr (self ,"validate%(type)s%(name)s" % fakeToken ,None )
274275if method :
@@ -301,23 +302,23 @@ def validateStartTagInput(self, token):
301302attrDict = dict ([(name .lower (),value )for name ,value in token .get ("data" , [])])
302303inputType = attrDict .get ("type" ,"text" )
303304if inputType not in inputTypeAllowedAttributeMap .keys ():
304- yield {"type" :"ParseError" ,
305+ yield {"type" :tokenTypes [ "ParseError" ] ,
305306"data" :"unknown-input-type" ,
306307"datavars" : {"attrValue" :inputType }}
307308allowedAttributes = inputTypeAllowedAttributeMap .get (inputType , [])
308309for attrName ,attrValue in attrDict .items ():
309310if attrName not in allowedAttributeMap ['input' ]:
310- yield {"type" :"ParseError" ,
311+ yield {"type" :tokenTypes [ "ParseError" ] ,
311312"data" :"unknown-attribute" ,
312313"datavars" : {"tagName" :"input" ,
313314"attributeName" :attrName }}
314315elif attrName not in allowedAttributes :
315- yield {"type" :"ParseError" ,
316+ yield {"type" :tokenTypes [ "ParseError" ] ,
316317"data" :"attribute-not-allowed-on-this-input-type" ,
317318"datavars" : {"attributeName" :attrName ,
318319"inputType" :inputType }}
319320if attrName in inputTypeDeprecatedAttributeMap .get (inputType , []):
320- yield {"type" :"ParseError" ,
321+ yield {"type" :tokenTypes [ "ParseError" ] ,
321322"data" :"deprecated-attribute" ,
322323"datavars" : {"attributeName" :attrName ,
323324"inputType" :inputType }}
@@ -330,7 +331,7 @@ def checkUnknownStartTag(self, token):
330331# check for recognized tag name
331332name = token .get ("name" ,"" ).lower ()
332333if name not in allowedAttributeMap .keys ():
333- yield {"type" :"ParseError" ,
334+ yield {"type" :tokenTypes [ "ParseError" ] ,
334335"data" :"unknown-start-tag" ,
335336"datavars" : {"tagName" :name }}
336337
@@ -342,7 +343,7 @@ def checkStartTagRequiredAttributes(self, token):
342343in token .get ("data" , [])]
343344for attrName in requiredAttributeMap [name ]:
344345if attrName not in attrsPresent :
345- yield {"type" :"ParseError" ,
346+ yield {"type" :tokenTypes [ "ParseError" ] ,
346347"data" :"missing-required-attribute" ,
347348"datavars" : {"tagName" :name ,
348349"attributeName" :attrName }}
@@ -353,7 +354,7 @@ def checkStartTagUnknownAttributes(self, token):
353354allowedAttributes = globalAttributes | allowedAttributeMap .get (name ,frozenset (()))
354355for attrName ,attrValue in token .get ("data" , []):
355356if attrName .lower ()not in allowedAttributes :
356- yield {"type" :"ParseError" ,
357+ yield {"type" :tokenTypes [ "ParseError" ] ,
357358"data" :"unknown-attribute" ,
358359"datavars" : {"tagName" :name ,
359360"attributeName" :attrName }}
@@ -365,40 +366,40 @@ def checkStartTagUnknownAttributes(self, token):
365366# def checkURI(self, token, tagName, attrName, attrValue):
366367# isValid, errorCode = rfc3987.isValidURI(attrValue)
367368# if not isValid:
368- # yield {"type": "ParseError",
369+ # yield {"type":tokenTypes[ "ParseError"] ,
369370# "data": errorCode,
370371# "datavars": {"tagName": tagName,
371372# "attributeName": attrName}}
372- # yield {"type": "ParseError",
373+ # yield {"type":tokenTypes[ "ParseError"] ,
373374# "data": "invalid-attribute-value",
374375# "datavars": {"tagName": tagName,
375376# "attributeName": attrName}}
376377
377378def checkIRI (self ,token ,tagName ,attrName ,attrValue ):
378379isValid ,errorCode = rfc3987 .isValidIRI (attrValue )
379380if not isValid :
380- yield {"type" :"ParseError" ,
381+ yield {"type" :tokenTypes [ "ParseError" ] ,
381382"data" :errorCode ,
382383"datavars" : {"tagName" :tagName ,
383384"attributeName" :attrName }}
384- yield {"type" :"ParseError" ,
385+ yield {"type" :tokenTypes [ "ParseError" ] ,
385386"data" :"invalid-attribute-value" ,
386387"datavars" : {"tagName" :tagName ,
387388"attributeName" :attrName }}
388389
389390def checkID (self ,token ,tagName ,attrName ,attrValue ):
390391if not attrValue :
391- yield {"type" :"ParseError" ,
392+ yield {"type" :tokenTypes [ "ParseError" ] ,
392393"data" :"attribute-value-can-not-be-blank" ,
393394"datavars" : {"tagName" :tagName ,
394395"attributeName" :attrName }}
395396for c in attrValue :
396397if c in spaceCharacters :
397- yield {"type" :"ParseError" ,
398+ yield {"type" :tokenTypes [ "ParseError" ] ,
398399"data" :"space-in-id" ,
399400"datavars" : {"tagName" :tagName ,
400401"attributeName" :attrName }}
401- yield {"type" :"ParseError" ,
402+ yield {"type" :tokenTypes [ "ParseError" ] ,
402403"data" :"invalid-attribute-value" ,
403404"datavars" : {"tagName" :tagName ,
404405"attributeName" :attrName }}
@@ -427,7 +428,7 @@ def checkTokenList(self, tagName, attrName, attrValue):
427428valueDict = {}
428429for currentValue in valueList :
429430if valueDict .has_key (currentValue ):
430- yield {"type" :"ParseError" ,
431+ yield {"type" :tokenTypes [ "ParseError" ] ,
431432"data" :"duplicate-value-in-token-list" ,
432433"datavars" : {"tagName" :tagName ,
433434"attributeName" :attrName ,
@@ -437,32 +438,32 @@ def checkTokenList(self, tagName, attrName, attrValue):
437438
438439def checkEnumeratedValue (self ,token ,tagName ,attrName ,attrValue ,enumeratedValues ):
439440if not attrValue and ('' not in enumeratedValues ):
440- yield {"type" :"ParseError" ,
441+ yield {"type" :tokenTypes [ "ParseError" ] ,
441442"data" :"attribute-value-can-not-be-blank" ,
442443"datavars" : {"tagName" :tagName ,
443444"attributeName" :attrName }}
444445return
445446attrValue = attrValue .lower ()
446447if attrValue not in enumeratedValues :
447- yield {"type" :"ParseError" ,
448+ yield {"type" :tokenTypes [ "ParseError" ] ,
448449"data" :"invalid-enumerated-value" ,
449450"datavars" : {"tagName" :tagName ,
450451"attributeName" :attrName ,
451452"enumeratedValues" :tuple (enumeratedValues )}}
452- yield {"type" :"ParseError" ,
453+ yield {"type" :tokenTypes [ "ParseError" ] ,
453454"data" :"invalid-attribute-value" ,
454455"datavars" : {"tagName" :tagName ,
455456"attributeName" :attrName }}
456457
457458def checkBoolean (self ,token ,tagName ,attrName ,attrValue ):
458459enumeratedValues = frozenset ((attrName ,'' ))
459460if attrValue not in enumeratedValues :
460- yield {"type" :"ParseError" ,
461+ yield {"type" :tokenTypes [ "ParseError" ] ,
461462"data" :"invalid-boolean-value" ,
462463"datavars" : {"tagName" :tagName ,
463464"attributeName" :attrName ,
464465"enumeratedValues" :tuple (enumeratedValues )}}
465- yield {"type" :"ParseError" ,
466+ yield {"type" :tokenTypes [ "ParseError" ] ,
466467"data" :"invalid-attribute-value" ,
467468"datavars" : {"tagName" :tagName ,
468469"attributeName" :attrName }}
@@ -471,7 +472,7 @@ def checkInteger(self, token, tagName, attrName, attrValue):
471472sign = 1
472473numberString = ''
473474state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
474- error = {"type" :"ParseError" ,
475+ error = {"type" :tokenTypes [ "ParseError" ] ,
475476"data" :"invalid-integer-value" ,
476477"datavars" : {"tagName" :tagName ,
477478"attributeName" :attrName ,
@@ -503,7 +504,7 @@ def checkInteger(self, token, tagName, attrName, attrValue):
503504elif state == 'trailing-junk' :
504505pass
505506if not numberString :
506- yield {"type" :"ParseError" ,
507+ yield {"type" :tokenTypes [ "ParseError" ] ,
507508"data" :"attribute-value-can-not-be-blank" ,
508509"datavars" : {"tagName" :tagName ,
509510"attributeName" :attrName }}
@@ -517,15 +518,15 @@ def checkBrowsingContext(self, token, tagName, attrName, attrValue):
517518if attrValue [0 ]!= '_' :return
518519attrValue = attrValue .lower ()
519520if attrValue in frozenset (('_self' ,'_parent' ,'_top' ,'_blank' )):return
520- yield {"type" :"ParseError" ,
521+ yield {"type" :tokenTypes [ "ParseError" ] ,
521522"data" :"invalid-browsing-context" ,
522523"datavars" : {"tagName" :tagName ,
523524"attributeName" :attrName }}
524525
525526def checkLangCode (self ,token ,tagName ,attrName ,attrValue ):
526527if not attrValue :return # blank is OK
527528if not iso639codes .isValidLangCode (attrValue ):
528- yield {"type" :"ParseError" ,
529+ yield {"type" :tokenTypes [ "ParseError" ] ,
529530"data" :"invalid-lang-code" ,
530531"datavars" : {"tagName" :tagName ,
531532"attributeName" :attrName ,
@@ -534,13 +535,13 @@ def checkLangCode(self, token, tagName, attrName, attrValue):
534535def checkMIMEType (self ,token ,tagName ,attrName ,attrValue ):
535536# XXX needs tests
536537if not attrValue :
537- yield {"type" :"ParseError" ,
538+ yield {"type" :tokenTypes [ "ParseError" ] ,
538539"data" :"attribute-value-can-not-be-blank" ,
539540"datavars" : {"tagName" :tagName ,
540541"attributeName" :attrName }}
541542
542543if not rfc2046 .isValidMIMEType (attrValue ):
543- yield {"type" :"ParseError" ,
544+ yield {"type" :tokenTypes [ "ParseError" ] ,
544545"data" :"invalid-mime-type" ,
545546"datavars" : {"tagName" :tagName ,
546547"attributeName" :attrName ,
@@ -556,7 +557,7 @@ def checkLinkRelation(self, token, tagName, attrName, attrValue):
556557allowedValues = (tagName == 'link' )and linkRelValues or aRelValues
557558for currentValue in valueList :
558559if currentValue not in allowedValues :
559- yield {"type" :"ParseError" ,
560+ yield {"type" :tokenTypes [ "ParseError" ] ,
560561"data" :"invalid-rel" ,
561562"datavars" : {"tagName" :tagName ,
562563"attributeName" :attrName }}
@@ -593,7 +594,7 @@ def checkAttributeValues(self, token):
593594def validateAttributeValueClass (self ,token ,tagName ,attrName ,attrValue ):
594595for t in self .checkTokenList (tagName ,attrName ,attrValue )or []:
595596yield t
596- yield {"type" :"ParseError" ,
597+ yield {"type" :tokenTypes [ "ParseError" ] ,
597598"data" :"invalid-attribute-value" ,
598599"datavars" : {"tagName" :tagName ,
599600"attributeName" :attrName }}
@@ -623,7 +624,7 @@ def validateAttributeValueId(self, token, tagName, attrName, attrValue):
623624for t in self .checkID (token ,tagName ,attrName ,attrValue )or []:yield t
624625if not attrValue :return
625626if attrValue in self .IDsWeHaveKnownAndLoved :
626- yield {"type" :"ParseError" ,
627+ yield {"type" :tokenTypes [ "ParseError" ] ,
627628"data" :"duplicate-id" ,
628629"datavars" : {"tagName" :tagName }}
629630self .IDsWeHaveKnownAndLoved .append (attrValue )
@@ -641,7 +642,7 @@ def validateAttributeValueTemplate(self, token, tagName, attrName, attrValue):
641642
642643def validateAttributeValueHtmlXmlns (self ,token ,tagName ,attrName ,attrValue ):
643644if attrValue != "http://www.w3.org/1999/xhtml" :
644- yield {"type" :"ParseError" ,
645+ yield {"type" :tokenTypes [ "ParseError" ] ,
645646"data" :"invalid-root-namespace" ,
646647"datavars" : {"tagName" :tagName ,
647648"attributeName" :attrName }}
@@ -699,7 +700,7 @@ def eof(self):
699700# hooray for obscure side effects!
700701attrValue = attrsDict .get ("contextmenu" ,"" )
701702if attrValue and (attrValue not in self .IDsWeHaveKnownAndLoved ):
702- yield {"type" :"ParseError" ,
703+ yield {"type" :tokenTypes [ "ParseError" ] ,
703704"data" :"id-does-not-exist" ,
704705"datavars" : {"tagName" :tagName ,
705706"attributeName" :"contextmenu" ,
@@ -710,6 +711,6 @@ def eof(self):
710711if not id :continue
711712if id == attrValue :
712713if refToken .get ("name" ,"" ).lower ()!= "menu" :
713- yield {"type" :"ParseError" ,
714+ yield {"type" :tokenTypes [ "ParseError" ] ,
714715"data" :"contextmenu-must-point-to-menu" }
715716break