@@ -51,6 +51,7 @@ def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
5151"attributeValueDoubleQuoted" :self .attributeValueDoubleQuotedState ,
5252"attributeValueSingleQuoted" :self .attributeValueSingleQuotedState ,
5353"attributeValueUnQuoted" :self .attributeValueUnQuotedState ,
54+ "afterAttributeValue" :self .afterAttributeValueState ,
5455"bogusComment" :self .bogusCommentState ,
5556"markupDeclarationOpen" :self .markupDeclarationOpenState ,
5657"commentStart" :self .commentStartState ,
@@ -185,10 +186,11 @@ def consumeNumberEntity(self, isHex):
185186
186187return char
187188
188- def consumeEntity (self ,fromAttribute = False ):
189+ def consumeEntity (self ,allowedChar = None , fromAttribute = False ):
189190char = None
190191charStack = [self .stream .char ()]
191- if charStack [0 ]in spaceCharacters or charStack [0 ]in (EOF ,"<" ,"&" ):
192+ if charStack [0 ]in spaceCharacters or charStack [0 ]in (EOF ,"<" ,"&" )\
193+ or (allowedChar is not None and allowedChar == charStack [0 ]):
192194self .stream .unget (charStack )
193195elif charStack [0 ]== u"#" :
194196# We might have a number entity here.
@@ -260,10 +262,10 @@ def entitiesStartingWith(name):
260262self .stream .unget (charStack )
261263return char
262264
263- def processEntityInAttribute (self ):
265+ def processEntityInAttribute (self , allowedChar ):
264266"""This method replaces the need for "entityInAttributeValueState".
265267 """
266- entity = self .consumeEntity (True )
268+ entity = self .consumeEntity (allowedChar = allowedChar , fromAttribute = True )
267269if entity :
268270self .currentToken ["data" ][- 1 ][1 ]+= entity
269271else :
@@ -479,6 +481,11 @@ def beforeAttributeNameState(self):
479481self .emitCurrentToken ()
480482elif data == u"/" :
481483self .processSolidusInTag ()
484+ elif data == u"'" or data == u'"' or data == u"=" :
485+ self .tokenQueue .append ({"type" :"ParseError" ,"data" :
486+ "invalid-character-in-attribute-name" })
487+ self .currentToken ["data" ].append ([data ,"" ])
488+ self .state = self .states ["attributeName" ]
482489elif data == EOF :
483490self .tokenQueue .append ({"type" :"ParseError" ,"data" :
484491"expected-attribute-name-but-got-eof" })
@@ -508,6 +515,11 @@ def attributeNameState(self):
508515elif data == u"/" :
509516self .processSolidusInTag ()
510517self .state = self .states ["beforeAttributeName" ]
518+ elif data == u"'" or data == u'"' :
519+ self .tokenQueue .append ({"type" :"ParseError" ,"data" :
520+ "invalid-character-in-attribute-name" })
521+ self .currentToken ["data" ][- 1 ][0 ]+= data
522+ leavingThisState = False
511523elif data == EOF :
512524self .tokenQueue .append ({"type" :"ParseError" ,"data" :
513525"eof-in-attribute-name" })
@@ -570,6 +582,11 @@ def beforeAttributeValueState(self):
570582self .state = self .states ["attributeValueSingleQuoted" ]
571583elif data == u">" :
572584self .emitCurrentToken ()
585+ elif data == u"=" :
586+ self .tokenQueue .append ({"type" :"ParseError" ,"data" :
587+ "equals-in-unquoted-attribute-value" })
588+ self .currentToken ["data" ][- 1 ][1 ]+= data
589+ self .state = self .states ["attributeValueUnQuoted" ]
573590elif data == EOF :
574591self .tokenQueue .append ({"type" :"ParseError" ,"data" :
575592"expected-attribute-value-but-got-eof" })
@@ -582,9 +599,9 @@ def beforeAttributeValueState(self):
582599def attributeValueDoubleQuotedState (self ):
583600data = self .stream .char ()
584601if data == "\" " :
585- self .state = self .states ["beforeAttributeName " ]
602+ self .state = self .states ["afterAttributeValue " ]
586603elif data == u"&" :
587- self .processEntityInAttribute ()
604+ self .processEntityInAttribute (u'"' )
588605elif data == EOF :
589606self .tokenQueue .append ({"type" :"ParseError" ,"data" :
590607"eof-in-attribute-value-double-quote" })
@@ -597,9 +614,9 @@ def attributeValueDoubleQuotedState(self):
597614def attributeValueSingleQuotedState (self ):
598615data = self .stream .char ()
599616if data == "'" :
600- self .state = self .states ["beforeAttributeName " ]
617+ self .state = self .states ["afterAttributeValue " ]
601618elif data == u"&" :
602- self .processEntityInAttribute ()
619+ self .processEntityInAttribute (u"'" )
603620elif data == EOF :
604621self .tokenQueue .append ({"type" :"ParseError" ,"data" :
605622"eof-in-attribute-value-single-quote" })
@@ -614,16 +631,37 @@ def attributeValueUnQuotedState(self):
614631if data in spaceCharacters :
615632self .state = self .states ["beforeAttributeName" ]
616633elif data == u"&" :
617- self .processEntityInAttribute ()
634+ self .processEntityInAttribute (None )
618635elif data == u">" :
619636self .emitCurrentToken ()
637+ elif data == u'"' or data == u"'" or data == u"=" :
638+ self .tokenQueue .append ({"type" :"ParseError" ,"data" :
639+ "unexpected-character-in-unquoted-attribute-value" })
640+ self .currentToken ["data" ][- 1 ][1 ]+= data
620641elif data == EOF :
621642self .tokenQueue .append ({"type" :"ParseError" ,"data" :
622643"eof-in-attribute-value-no-quotes" })
623644self .emitCurrentToken ()
624645else :
625646self .currentToken ["data" ][- 1 ][1 ]+= data + self .stream .charsUntil ( \
626- frozenset (("&" ,">" ,"<" ))| spaceCharacters )
647+ frozenset (("&" ,">" ,"<" ,"=" ,"'" ,'"' ))| spaceCharacters )
648+ return True
649+
650+ def afterAttributeValueState (self ):
651+ data = self .stream .char ()
652+ if data in spaceCharacters :
653+ self .state = self .states ["beforeAttributeName" ]
654+ elif data == u">" :
655+ self .emitCurrentToken ()
656+ self .state = self .states ["data" ]
657+ elif data == u"/" :
658+ self .processSolidusInTag ()
659+ self .state = self .states ["beforeAttributeName" ]
660+ else :
661+ self .tokenQueue .append ({"type" :"ParseError" ,"data" :
662+ "unexpected-character-after-attribute-value" })
663+ self .stream .unget (data )
664+ self .state = self .states ["beforeAttributeName" ]
627665return True
628666
629667def bogusCommentState (self ):