Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0c6d178

Browse files
committed
Don't update the stream position each time a character is read
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401015
1 parent13f7c37 commit0c6d178

File tree

2 files changed

+48
-52
lines changed

2 files changed

+48
-52
lines changed

‎src/html5lib/inputstream.py

Lines changed: 36 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
6161
'replace')
6262

6363
self.queue=deque([])
64+
self.readChars= []
6465
self.errors= []
6566

66-
self.line=self.col=0
6767
self.lineLengths= []
6868

6969
#Flag to indicate we may have a CR LF broken across a data chunk
@@ -202,10 +202,33 @@ def detectEncodingMeta(self):
202202
self.seek(buffer,0)
203203
returnparser.getEncoding()
204204

205+
defupdatePosition(self):
206+
#Remove EOF from readChars, if present
207+
ifnotself.readChars:
208+
return
209+
ifself.readCharsandself.readChars[-1]==EOF:
210+
#There may be more than one EOF in readChars so we cannot assume
211+
#readChars.index(EOF) == -1
212+
self.readChars=self.readChars[:self.readChars.index(EOF)]
213+
readChars="".join(self.readChars)
214+
lines=readChars.split("\n")
215+
ifself.lineLengths:
216+
self.lineLengths[-1]+=len(lines[0])
217+
else:
218+
self.lineLengths.append(len(lines[0]))
219+
forlineinlines[1:]:
220+
self.lineLengths.append(len(line))
221+
self.readChars= []
222+
#print self.lineLengths
223+
205224
defposition(self):
206225
"""Returns (line, col) of the current position in the stream."""
207-
line,col=self.line,self.col
208-
return (line+1,col)
226+
self.updatePosition()
227+
ifself.lineLengths:
228+
line,col=len(self.lineLengths),self.lineLengths[-1]
229+
else:
230+
line,col=1,0
231+
return (line,col)
209232

210233
defchar(self):
211234
""" Read one character from the stream or queue if available. Return
@@ -219,13 +242,7 @@ def char(self):
219242

220243
char=self.queue.popleft()
221244

222-
# update position in stream
223-
ifchar=='\n':
224-
self.lineLengths.append(self.col)
225-
self.line+=1
226-
self.col=0
227-
else:
228-
self.col+=1
245+
self.readChars.append(char)
229246
returnchar
230247

231248
defreadChunk(self,chunkSize=10240):
@@ -246,6 +263,8 @@ def readChunk(self, chunkSize=10240):
246263
data=unicode(data)
247264
self.queue.extend([charforcharindata])
248265

266+
self.updatePosition()
267+
249268
defcharsUntil(self,characters,opposite=False):
250269
""" Returns a string of characters from the stream up to but not
251270
including any character in characters or EOF. characters can be
@@ -273,60 +292,27 @@ def charsUntil(self, characters, opposite = False):
273292
#If the queue doesn't grow we have reached EOF
274293
ifi==len(self.queue)orself.queue[i]isEOF:
275294
break
276-
#XXX- wallpaper over bug in calculation below
277-
#Otherwise change the stream position
278-
ifself.queue[i]=='\n':
279-
self.lineLengths.append(self.col)
280-
self.line+=1
281-
self.col=0
282-
else:
283-
self.col+=1
284295

285-
rv=u"".join([self.queue.popleft()forcinrange(i) ])
286-
287-
#Calculate where we now are in the stream
288-
#One possible optimisation would be to store all read characters and
289-
#Calculate this on an as-needed basis (perhaps flushing the read data
290-
#every time we read a new chunk) rather than once per call here and
291-
#in .char()
292-
293-
#XXX Temporarily disable this because there is a bug
296+
rv= [self.queue.popleft()forcinrange(i)]
294297

295-
#lines = rv.split("\n")
296-
#
297-
#if lines:
298-
# #Add number of lines passed onto positon
299-
# oldCol = self.col
300-
# self.line += len(lines)-1
301-
# if len(lines) > 1:
302-
# self.col = len(lines[-1])
303-
# else:
304-
# self.col += len(lines[0])
305-
#
306-
# if self.lineLengths and oldCol > 0:
307-
# self.lineLengths[-1] += len(lines[0])
308-
# lines = lines[1:-1]
309-
# else:
310-
# lines = lines[:-1]
311-
#
312-
# for line in lines:
313-
# self.lineLengths.append(len(line))
314-
#
298+
self.readChars.extend(rv)
315299

300+
rv=u"".join(rv)
316301
returnrv
317302

318303
defunget(self,chars):
304+
self.updatePosition()
319305
ifchars:
320306
l=list(chars)
321307
l.reverse()
322308
self.queue.extendleft(l)
323309
#Alter the current line, col position
324310
forcinchars[::-1]:
325311
ifc=='\n':
326-
self.line-=1
327-
self.col=self.lineLengths[self.line]
312+
assertself.lineLengths[-1]==0
313+
self.lineLengths.pop()
328314
else:
329-
self.col-=1
315+
self.lineLengths[-1]-=1
330316

331317
classEncodingBytes(str):
332318
"""String-like object with an assosiated position and various extra methods

‎tests/test_stream.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,23 @@ def test_newlines(self):
4141
self.assertEquals(stream.position(), (1,0))
4242
self.assertEquals(stream.charsUntil('c'),u"a\nbb\n")
4343
self.assertEquals(stream.position(), (3,0))
44-
self.assertEquals(stream.lineLengths, [1,2])
4544
self.assertEquals(stream.charsUntil('x'),u"ccc\ndddd")
4645
self.assertEquals(stream.position(), (4,4))
47-
self.assertEquals(stream.lineLengths, [1,2,3])
4846
self.assertEquals(stream.charsUntil('e'),u"x")
4947
self.assertEquals(stream.position(), (4,5))
5048

49+
deftest_position(self):
50+
stream=HTMLInputStream(codecs.BOM_UTF8+"a\nbb\nccc\nddd")
51+
self.assertEquals(stream.position(), (1,0))
52+
self.assertEquals(stream.charsUntil('c'),u"a\nbb\n")
53+
self.assertEquals(stream.position(), (3,0))
54+
stream.unget("a\nbb\n")
55+
self.assertEquals(stream.position(), (1,0))
56+
self.assertEquals(stream.charsUntil('c'),u"a\nbb\n")
57+
self.assertEquals(stream.position(), (3,0))
58+
self.assertEquals(stream.charsUntil('e'),u"ccc\nddd")
59+
self.assertEquals(stream.position(), (4,3))
60+
5161
defbuildTestSuite():
5262
returnunittest.defaultTestLoader.loadTestsFromName(__name__)
5363

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp