Commit0d0282b

committed

Simplified unget code (and improved speed by ~2%)

--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401246

1 parent22886b1 commit0d0282bCopy full SHA for 0d0282b

File tree

1 file changed

+30

-39

lines changed

src/html5lib
- inputstream.py

1 file changed

+30

-39

lines changed

`‎src/html5lib/inputstream.py‎`

Lines changed: 30 additions & 39 deletions

Original file line number	Diff line number	Diff line change
`@@ -73,8 +73,6 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):`
`73`	`73`	`self.chunkSize=0`
`74`	`74`	`self.chunkOffset=0`
`75`	`75`	`self.errors= []`
`76`		`-# Single-character buffer to handle 'unget'`
`77`		`-self.ungetChar=u""# use u"" to mean 'no character' (because None means EOF)`
`78`	`76`
`79`	`77`	`# Remember the current position in the document`
`80`	`78`	`self.positionLine=1`
`@@ -257,18 +255,13 @@ def char(self):`
`257`	`255`	`""" Read one character from the stream or queue if available. Return`
`258`	`256`	`EOF when EOF is reached.`
`259`	`257`	`"""`
`260`		`-char=self.ungetChar`
`261`		`-ifchar!=u"":`
`262`		`-# Use the ungot character, and reset the buffer`
`263`		`-self.ungetChar=u""`
`264`		`-else:`
`265`		`-# Read a new chunk from the input stream if necessary`
`266`		`-ifself.chunkOffset>=self.chunkSize:`
`267`		`-ifnotself.readChunk():`
`268`		`-returnEOF`
	`258`	`+# Read a new chunk from the input stream if necessary`
	`259`	`+ifself.chunkOffset>=self.chunkSize:`
	`260`	`+ifnotself.readChunk():`
	`261`	`+returnEOF`
`269`	`262`
`270`		`-char=self.chunk[self.chunkOffset]`
`271`		`-self.chunkOffset+=1`
	`263`	`+char=self.chunk[self.chunkOffset]`
	`264`	`+self.chunkOffset+=1`
`272`	`265`
`273`	`266`	`# Update the position attributes`
`274`	`267`	`ifchar==u"\n":`
`@@ -317,18 +310,6 @@ def charsUntil(self, characters, opposite = False):`
`317`	`310`	`characters.`
`318`	`311`	`"""`
`319`	`312`
`320`		`-rv= []`
`321`		`-`
`322`		`-# Check the ungot character, if any.`
`323`		`-# (Since it's only a single character, don't use the regex here)`
`324`		`-char=self.ungetChar`
`325`		`-ifchar!=u"":`
`326`		`-ifcharisEOFor (charincharacters)!=opposite:`
`327`		`-returnu""`
`328`		`-else:`
`329`		`-rv.append(char)`
`330`		`-self.ungetChar=u""`
`331`		`-`
`332`	`313`	`# Use a cache of regexps to find the required characters`
`333`	`314`	`try:`
`334`	`315`	`chars=charsUntilRegEx[(characters,opposite)]`
`@@ -339,6 +320,8 @@ def charsUntil(self, characters, opposite = False):`
`339`	`320`	`regex=u"^%s"%regex`
`340`	`321`	`chars=charsUntilRegEx[(characters,opposite)]=re.compile(u"[%s]+"%regex)`
`341`	`322`
	`323`	`+rv= []`
	`324`	`+`
`342`	`325`	`whileTrue:`
`343`	`326`	`# Find the longest matching prefix`
`344`	`327`	`m=chars.match(self.chunk,self.chunkOffset)`
`@@ -369,21 +352,29 @@ def charsUntil(self, characters, opposite = False):`
`369`	`352`	`defunget(self,char):`
`370`	`353`	`# Only one character is allowed to be ungotten at once - it must`
`371`	`354`	`# be consumed again before any further call to unget`
`372`		`-assertself.ungetChar==u""`
`373`		`-`
`374`		`-self.ungetChar=char`
`375`	`355`
`376`		`-# Update the position attributes`
`377`		`-ifcharisNone:`
`378`		`-pass`
`379`		`-elifchar==u"\n":`
`380`		`-assertself.positionLine>=1`
`381`		`-assertself.lastLineLengthisnotNone`
`382`		`-self.positionLine-=1`
`383`		`-self.positionCol=self.lastLineLength`
`384`		`-self.lastLineLength=None`
`385`		`-else:`
`386`		`-self.positionCol-=1`
	`356`	`+ifcharisnotNone:`
	`357`	`+ifself.chunkOffset==0:`
	`358`	`+# unget is called quite rarely, so it's a good idea to do`
	`359`	`+# more work here if it saves a bit of work in the frequently`
	`360`	`+# called char and charsUntil.`
	`361`	`+# So, just prepend the ungotten character onto the current`
	`362`	`+# chunk:`
	`363`	`+self.chunk=char+self.chunk`
	`364`	`+self.chunkSize+=1`
	`365`	`+else:`
	`366`	`+self.chunkOffset-=1`
	`367`	`+assertself.chunk[self.chunkOffset]==char`
	`368`	`+`
	`369`	`+# Update the position attributes`
	`370`	`+ifchar==u"\n":`
	`371`	`+assertself.positionLine>=1`
	`372`	`+assertself.lastLineLengthisnotNone`
	`373`	`+self.positionLine-=1`
	`374`	`+self.positionCol=self.lastLineLength`
	`375`	`+self.lastLineLength=None`
	`376`	`+else:`
	`377`	`+self.positionCol-=1`
`387`	`378`
`388`	`379`	`classEncodingBytes(str):`
`389`	`380`	`"""String-like object with an assosiated position and various extra methods`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit0d0282b

File tree

1 file changed

1 file changed

`‎src/html5lib/inputstream.py‎`

0 commit comments