|
11 | 11 | asciiLettersBytes= [str(item)foriteminasciiLetters]
|
12 | 12 | asciiUppercaseBytes= [str(item)foriteminasciiUppercase]
|
13 | 13 |
|
14 |
| -invalid_unicode_re=re.compile(u"[\u0001-\u0008]|[\u000E-\u001F]|[\u007F-\u009F]|[\uD800-\uDFFF]|[\uFDD0-\uFDDF]|\uFFFE|\uFFFF|\U0001FFFE|\U0001FFFF|\U0002FFFE|\U0002FFFF|\U0003FFFE|\U0003FFFF|\U0004FFFE|\U0004FFFF|\U0005FFFE|\U0005FFFF|\U0006FFFE|\U0006FFFF|\U0007FFFE|\U0007FFFF|\U0008FFFE|\U0008FFFF|\U0009FFFE|\U0009FFFF|\U000AFFFE|\U000AFFFF|\U000BFFFE\U000BFFFF|\U000CFFFE|\U000CFFFF|\U000DFFFE|\U000DFFFF|\U000EFFFE|\U000EFFFF|\U000FFFFE|\U000FFFFF|\U0010FFFE|\U0010FFFF") |
| 14 | +invalid_unicode_re=re.compile(u"[\u0001-\u0008\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDDF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]") |
15 | 15 |
|
16 | 16 | # Cache for charsUntil()
|
17 | 17 | charsUntilRegEx= {}
|
@@ -320,9 +320,10 @@ def charsUntil(self, characters, opposite = False):
|
320 | 320 | # Find the longest matching prefix
|
321 | 321 | m=chars.match(self.chunk,self.chunkOffset)
|
322 | 322 | # If not everything matched, return everything up to the part that didn't match
|
323 |
| -ifm.end()!=len(self.chunk): |
324 |
| -rv.append(self.chunk[self.chunkOffset:m.end()]) |
325 |
| -self.chunkOffset=m.end() |
| 323 | +end=m.end() |
| 324 | +ifend!=len(self.chunk): |
| 325 | +rv.append(self.chunk[self.chunkOffset:end]) |
| 326 | +self.chunkOffset=end |
326 | 327 | break
|
327 | 328 | # If the whole chunk matched, use it all and read the next chunk
|
328 | 329 | rv.append(self.chunk[self.chunkOffset:])
|
|