@@ -61,9 +61,9 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
6161'replace' )
6262
6363self .queue = deque ([])
64+ self .readChars = []
6465self .errors = []
6566
66- self .line = self .col = 0
6767self .lineLengths = []
6868
6969#Flag to indicate we may have a CR LF broken across a data chunk
@@ -202,10 +202,33 @@ def detectEncodingMeta(self):
202202self .seek (buffer ,0 )
203203return parser .getEncoding ()
204204
205+ def updatePosition (self ):
206+ #Remove EOF from readChars, if present
207+ if not self .readChars :
208+ return
209+ if self .readChars and self .readChars [- 1 ]== EOF :
210+ #There may be more than one EOF in readChars so we cannot assume
211+ #readChars.index(EOF) == -1
212+ self .readChars = self .readChars [:self .readChars .index (EOF )]
213+ readChars = "" .join (self .readChars )
214+ lines = readChars .split ("\n " )
215+ if self .lineLengths :
216+ self .lineLengths [- 1 ]+= len (lines [0 ])
217+ else :
218+ self .lineLengths .append (len (lines [0 ]))
219+ for line in lines [1 :]:
220+ self .lineLengths .append (len (line ))
221+ self .readChars = []
222+ #print self.lineLengths
223+
205224def position (self ):
206225"""Returns (line, col) of the current position in the stream."""
207- line ,col = self .line ,self .col
208- return (line + 1 ,col )
226+ self .updatePosition ()
227+ if self .lineLengths :
228+ line ,col = len (self .lineLengths ),self .lineLengths [- 1 ]
229+ else :
230+ line ,col = 1 ,0
231+ return (line ,col )
209232
210233def char (self ):
211234""" Read one character from the stream or queue if available. Return
@@ -219,13 +242,7 @@ def char(self):
219242
220243char = self .queue .popleft ()
221244
222- # update position in stream
223- if char == '\n ' :
224- self .lineLengths .append (self .col )
225- self .line += 1
226- self .col = 0
227- else :
228- self .col += 1
245+ self .readChars .append (char )
229246return char
230247
231248def readChunk (self ,chunkSize = 10240 ):
@@ -246,6 +263,8 @@ def readChunk(self, chunkSize=10240):
246263data = unicode (data )
247264self .queue .extend ([char for char in data ])
248265
266+ self .updatePosition ()
267+
249268def charsUntil (self ,characters ,opposite = False ):
250269""" Returns a string of characters from the stream up to but not
251270 including any character in characters or EOF. characters can be
@@ -273,60 +292,27 @@ def charsUntil(self, characters, opposite = False):
273292#If the queue doesn't grow we have reached EOF
274293if i == len (self .queue )or self .queue [i ]is EOF :
275294break
276- #XXX- wallpaper over bug in calculation below
277- #Otherwise change the stream position
278- if self .queue [i ]== '\n ' :
279- self .lineLengths .append (self .col )
280- self .line += 1
281- self .col = 0
282- else :
283- self .col += 1
284295
285- rv = u"" .join ([self .queue .popleft ()for c in range (i ) ])
286-
287- #Calculate where we now are in the stream
288- #One possible optimisation would be to store all read characters and
289- #Calculate this on an as-needed basis (perhaps flushing the read data
290- #every time we read a new chunk) rather than once per call here and
291- #in .char()
292-
293- #XXX Temporarily disable this because there is a bug
296+ rv = [self .queue .popleft ()for c in range (i )]
294297
295- #lines = rv.split("\n")
296- #
297- #if lines:
298- # #Add number of lines passed onto positon
299- # oldCol = self.col
300- # self.line += len(lines)-1
301- # if len(lines) > 1:
302- # self.col = len(lines[-1])
303- # else:
304- # self.col += len(lines[0])
305- #
306- # if self.lineLengths and oldCol > 0:
307- # self.lineLengths[-1] += len(lines[0])
308- # lines = lines[1:-1]
309- # else:
310- # lines = lines[:-1]
311- #
312- # for line in lines:
313- # self.lineLengths.append(len(line))
314- #
298+ self .readChars .extend (rv )
315299
300+ rv = u"" .join (rv )
316301return rv
317302
318303def unget (self ,chars ):
304+ self .updatePosition ()
319305if chars :
320306l = list (chars )
321307l .reverse ()
322308self .queue .extendleft (l )
323309#Alter the current line, col position
324310for c in chars [::- 1 ]:
325311if c == '\n ' :
326- self .line -= 1
327- self .col = self . lineLengths [ self . line ]
312+ assert self .lineLengths [ - 1 ] == 0
313+ self .lineLengths . pop ()
328314else :
329- self .col -= 1
315+ self .lineLengths [ - 1 ] -= 1
330316
331317class EncodingBytes (str ):
332318"""String-like object with an assosiated position and various extra methods