Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0d0282b

Browse files
committed
Simplified unget code (and improved speed by ~2%)
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401246
1 parent22886b1 commit0d0282b

File tree

1 file changed

+30
-39
lines changed

1 file changed

+30
-39
lines changed

‎src/html5lib/inputstream.py

Lines changed: 30 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
7373
self.chunkSize=0
7474
self.chunkOffset=0
7575
self.errors= []
76-
# Single-character buffer to handle 'unget'
77-
self.ungetChar=u""# use u"" to mean 'no character' (because None means EOF)
7876

7977
# Remember the current position in the document
8078
self.positionLine=1
@@ -257,18 +255,13 @@ def char(self):
257255
""" Read one character from the stream or queue if available. Return
258256
EOF when EOF is reached.
259257
"""
260-
char=self.ungetChar
261-
ifchar!=u"":
262-
# Use the ungot character, and reset the buffer
263-
self.ungetChar=u""
264-
else:
265-
# Read a new chunk from the input stream if necessary
266-
ifself.chunkOffset>=self.chunkSize:
267-
ifnotself.readChunk():
268-
returnEOF
258+
# Read a new chunk from the input stream if necessary
259+
ifself.chunkOffset>=self.chunkSize:
260+
ifnotself.readChunk():
261+
returnEOF
269262

270-
char=self.chunk[self.chunkOffset]
271-
self.chunkOffset+=1
263+
char=self.chunk[self.chunkOffset]
264+
self.chunkOffset+=1
272265

273266
# Update the position attributes
274267
ifchar==u"\n":
@@ -317,18 +310,6 @@ def charsUntil(self, characters, opposite = False):
317310
characters.
318311
"""
319312

320-
rv= []
321-
322-
# Check the ungot character, if any.
323-
# (Since it's only a single character, don't use the regex here)
324-
char=self.ungetChar
325-
ifchar!=u"":
326-
ifcharisEOFor (charincharacters)!=opposite:
327-
returnu""
328-
else:
329-
rv.append(char)
330-
self.ungetChar=u""
331-
332313
# Use a cache of regexps to find the required characters
333314
try:
334315
chars=charsUntilRegEx[(characters,opposite)]
@@ -339,6 +320,8 @@ def charsUntil(self, characters, opposite = False):
339320
regex=u"^%s"%regex
340321
chars=charsUntilRegEx[(characters,opposite)]=re.compile(u"[%s]+"%regex)
341322

323+
rv= []
324+
342325
whileTrue:
343326
# Find the longest matching prefix
344327
m=chars.match(self.chunk,self.chunkOffset)
@@ -369,21 +352,29 @@ def charsUntil(self, characters, opposite = False):
369352
defunget(self,char):
370353
# Only one character is allowed to be ungotten at once - it must
371354
# be consumed again before any further call to unget
372-
assertself.ungetChar==u""
373-
374-
self.ungetChar=char
375355

376-
# Update the position attributes
377-
ifcharisNone:
378-
pass
379-
elifchar==u"\n":
380-
assertself.positionLine>=1
381-
assertself.lastLineLengthisnotNone
382-
self.positionLine-=1
383-
self.positionCol=self.lastLineLength
384-
self.lastLineLength=None
385-
else:
386-
self.positionCol-=1
356+
ifcharisnotNone:
357+
ifself.chunkOffset==0:
358+
# unget is called quite rarely, so it's a good idea to do
359+
# more work here if it saves a bit of work in the frequently
360+
# called char and charsUntil.
361+
# So, just prepend the ungotten character onto the current
362+
# chunk:
363+
self.chunk=char+self.chunk
364+
self.chunkSize+=1
365+
else:
366+
self.chunkOffset-=1
367+
assertself.chunk[self.chunkOffset]==char
368+
369+
# Update the position attributes
370+
ifchar==u"\n":
371+
assertself.positionLine>=1
372+
assertself.lastLineLengthisnotNone
373+
self.positionLine-=1
374+
self.positionCol=self.lastLineLength
375+
self.lastLineLength=None
376+
else:
377+
self.positionCol-=1
387378

388379
classEncodingBytes(str):
389380
"""String-like object with an assosiated position and various extra methods

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp