@@ -471,7 +471,7 @@ class EncodingBytes(str):
471471 If the position is ever greater than the string length then an exception is
472472 raised"""
473473def __new__ (self ,value ):
474- return str .__new__ (self ,value )
474+ return str .__new__ (self ,value . lower () )
475475
476476def __init__ (self ,value ):
477477self ._position = - 1
@@ -539,14 +539,12 @@ def skipUntil(self, chars):
539539self ._position = p
540540return None
541541
542- def matchBytes (self ,bytes , lower = False ):
542+ def matchBytes (self ,bytes ):
543543"""Look for a sequence of bytes at the start of a string. If the bytes
544544 are found return True and advance the position to the byte after the
545545 match. Otherwise return False and leave the position alone"""
546546p = self .position
547547data = self [p :p + len (bytes )]
548- if lower :
549- data = data .lower ()
550548rv = data .startswith (bytes )
551549if rv :
552550self .position += len (bytes )
@@ -557,6 +555,9 @@ def jumpTo(self, bytes):
557555 a match is found advance the position to the last byte of the match"""
558556newPosition = self [self .position :].find (bytes )
559557if newPosition > - 1 :
558+ # XXX: This is ugly, but I can't see a nicer way to fix this.
559+ if self ._position == - 1 :
560+ self ._position = 0
560561self ._position += (newPosition + len (bytes )- 1 )
561562return True
562563else :
@@ -581,7 +582,7 @@ def getEncoding(self):
581582for byte in self .data :
582583keepParsing = True
583584for key ,method in methodDispatch :
584- if self .data .matchBytes (key , lower = True ):
585+ if self .data .matchBytes (key ):
585586try :
586587keepParsing = method ()
587588break
@@ -659,72 +660,75 @@ def getAttribute(self):
659660"""Return a name,value pair for the next attribute in the stream,
660661 if one is found, or None"""
661662data = self .data
663+ # Step 1 (skip chars)
662664c = data .skip (spaceCharactersBytes | frozenset ("/" ))
663- if c == "<" :
664- data .previous ()
665- return None
666- elif c == ">" or c is None :
665+ # Step 2
666+ if c in (">" ,None ):
667667return None
668+ # Step 3
668669attrName = []
669670attrValue = []
670- spaceFound = False
671- #Step 5 attribute name
671+ #Step 4 attribute name
672672while True :
673673if c == "=" and attrName :
674674break
675675elif c in spaceCharactersBytes :
676- spaceFound = True
676+ #Step 6!
677+ c = data .skip ()
678+ c = data .next ()
677679break
678- elif c in ("/" ,"<" , " >" ):
680+ elif c in ("/" ,">" ):
679681return "" .join (attrName ),""
680682elif c in asciiUppercaseBytes :
681683attrName .append (c .lower ())
684+ elif c == None :
685+ return None
682686else :
683687attrName .append (c )
684- #Step6
688+ #Step5
685689c = data .next ()
686690#Step 7
687- if spaceFound :
688- c = data .skip ()
689- #Step 8
690- if c != "=" :
691- data .previous ()
692- return "" .join (attrName ),""
693- #XXX need to advance position in both spaces and value case
694- #Step 9
691+ if c != "=" :
692+ data .previous ()
693+ return "" .join (attrName ),""
694+ #Step 8
695695data .next ()
696- #Step10
696+ #Step9
697697c = data .skip ()
698- #Step11
698+ #Step10
699699if c in ("'" ,'"' ):
700- #11 .1
700+ #10 .1
701701quoteChar = c
702702while True :
703- #11.3
703+ #10.2
704704c = data .next ()
705+ #10.3
705706if c == quoteChar :
706707data .next ()
707708return "" .join (attrName ),"" .join (attrValue )
708- #11 .4
709+ #10 .4
709710elif c in asciiUppercaseBytes :
710711attrValue .append (c .lower ())
711- #11 .5
712+ #10 .5
712713else :
713714attrValue .append (c )
714- elif c in ( ">" , "<" ) :
715+ elif c == ">" :
715716return "" .join (attrName ),""
716717elif c in asciiUppercaseBytes :
717718attrValue .append (c .lower ())
718719elif c is None :
719720return None
720721else :
721722attrValue .append (c )
723+ # Step 11
722724while True :
723725c = data .next ()
724726if c in spacesAngleBrackets :
725727return "" .join (attrName ),"" .join (attrValue )
726728elif c in asciiUppercaseBytes :
727729attrValue .append (c .lower ())
730+ elif c is None :
731+ return None
728732else :
729733attrValue .append (c )
730734
@@ -734,10 +738,6 @@ def __init__(self, data):
734738self .data = data
735739def parse (self ):
736740try :
737- #Skip to the first ";"
738- self .data .jumpTo (";" )
739- self .data .position += 1
740- self .data .skip ()
741741#Check if the attr name is charset
742742#otherwise return
743743self .data .jumpTo ("charset" )
@@ -753,8 +753,10 @@ def parse(self):
753753quoteMark = self .data .currentByte
754754self .data .position += 1
755755oldPosition = self .data .position
756- self .data .jumpTo (quoteMark )
757- return self .data [oldPosition :self .data .position ]
756+ if self .data .jumpTo (quoteMark ):
757+ return self .data [oldPosition :self .data .position ]
758+ else :
759+ return None
758760else :
759761#Unquoted value
760762oldPosition = self .data .position