66from constants import encodings
77from utils import MethodDispatcher
88
9+ #Non-unicode versions of constants for use in the pre-parser
10+ spaceCharactersBytes = [str (item )for item in spaceCharacters ]
11+ asciiLettersBytes = [str (item )for item in asciiLetters ]
12+ asciiUppercaseBytes = [str (item )for item in asciiUppercase ]
13+
914try :
1015from collections import deque
1116except ImportError :
@@ -357,7 +362,7 @@ def getCurrentByte(self):
357362
358363currentByte = property (getCurrentByte )
359364
360- def skip (self ,chars = spaceCharacters ):
365+ def skip (self ,chars = spaceCharactersBytes ):
361366"""Skip past a list of characters"""
362367while self .currentByte in chars :
363368self .position += 1
@@ -432,7 +437,7 @@ def handleComment(self):
432437return self .data .jumpTo ("-->" )
433438
434439def handleMeta (self ):
435- if self .data .currentByte not in spaceCharacters :
440+ if self .data .currentByte not in spaceCharactersBytes :
436441#if we have <meta not followed by a space so just keep going
437442return True
438443#We have a valid meta element we want to search for attributes
@@ -462,7 +467,7 @@ def handlePossibleEndTag(self):
462467return self .handlePossibleTag (True )
463468
464469def handlePossibleTag (self ,endTag ):
465- if self .data .currentByte not in asciiLetters :
470+ if self .data .currentByte not in asciiLettersBytes :
466471#If the next byte is not an ascii letter either ignore this
467472#fragment (possible start tag case) or treat it according to
468473#handleOther
@@ -471,7 +476,7 @@ def handlePossibleTag(self, endTag):
471476self .handleOther ()
472477return True
473478
474- self .data .findNext (list (spaceCharacters )+ ["<" ,">" ])
479+ self .data .findNext (list (spaceCharactersBytes )+ ["<" ,">" ])
475480if self .data .currentByte == "<" :
476481#return to the first step in the overall "two step" algorithm
477482#reprocessing the < byte
@@ -489,7 +494,7 @@ def handleOther(self):
489494def getAttribute (self ):
490495"""Return a name,value pair for the next attribute in the stream,
491496 if one is found, or None"""
492- self .data .skip (list (spaceCharacters )+ ["/" ])
497+ self .data .skip (list (spaceCharactersBytes )+ ["/" ])
493498if self .data .currentByte == "<" :
494499self .data .position -= 1
495500return None
@@ -502,12 +507,12 @@ def getAttribute(self):
502507while True :
503508if self .data .currentByte == "=" and attrName :
504509break
505- elif self .data .currentByte in spaceCharacters :
510+ elif self .data .currentByte in spaceCharactersBytes :
506511spaceFound = True
507512break
508513elif self .data .currentByte in ("/" ,"<" ,">" ):
509514return "" .join (attrName ),""
510- elif self .data .currentByte in asciiUppercase :
515+ elif self .data .currentByte in asciiUppercaseBytes :
511516attrName .extend (self .data .currentByte .lower ())
512517else :
513518attrName .extend (self .data .currentByte )
@@ -536,23 +541,23 @@ def getAttribute(self):
536541self .data .position += 1
537542return "" .join (attrName ),"" .join (attrValue )
538543#11.4
539- elif self .data .currentByte in asciiUppercase :
544+ elif self .data .currentByte in asciiUppercaseBytes :
540545attrValue .extend (self .data .currentByte .lower ())
541546#11.5
542547else :
543548attrValue .extend (self .data .currentByte )
544549elif self .data .currentByte in (">" ,"<" ):
545550return "" .join (attrName ),""
546- elif self .data .currentByte in asciiUppercase :
551+ elif self .data .currentByte in asciiUppercaseBytes :
547552attrValue .extend (self .data .currentByte .lower ())
548553else :
549554attrValue .extend (self .data .currentByte )
550555while True :
551556self .data .position += 1
552557if self .data .currentByte in (
553- list (spaceCharacters )+ [">" ,"<" ]):
558+ list (spaceCharactersBytes )+ [">" ,"<" ]):
554559return "" .join (attrName ),"" .join (attrValue )
555- elif self .data .currentByte in asciiUppercase :
560+ elif self .data .currentByte in asciiUppercaseBytes :
556561attrValue .extend (self .data .currentByte .lower ())
557562else :
558563attrValue .extend (self .data .currentByte )
@@ -588,7 +593,7 @@ def parse(self):
588593#Unquoted value
589594oldPosition = self .data .position
590595try :
591- self .data .findNext (spaceCharacters )
596+ self .data .findNext (spaceCharactersBytes )
592597return self .data [oldPosition :self .data .position ]
593598except StopIteration :
594599#Return the whole remaining value