Commit05cf33a

Harmon758

authored and

Byron

committed

Remove surrogateescape error handler for Python 2

1 parente50ee0a commit05cf33aCopy full SHA for 05cf33a

File tree

1 file changed

-179

lines changed

git
- compat.py

1 file changed

-179

lines changed

`‎git/compat.py‎`

Lines changed: 0 additions & 179 deletions

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,6 @@`
`10`	`10`	`importlocale`
`11`	`11`	`importos`
`12`	`12`	`importsys`
`13`		`-importcodecs`
`14`	`13`
`15`	`14`
`16`	`15`	`fromgitdb.utils.compatimport (`
`@@ -91,181 +90,3 @@ def __str__(self):`
`91`	`90`	`else:# Python 2`
`92`	`91`	`def__str__(self):`
`93`	`92`	`returnself.__unicode__().encode(defenc)`
`94`		`-`
`95`		`-`
`96`		`-"""`
`97`		`-This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error`
`98`		`-handler of Python 3.`
`99`		`-Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc`
`100`		`-"""`
`101`		`-`
`102`		`-# This code is released under the Python license and the BSD 2-clause license`
`103`		`-`
`104`		`-`
`105`		`-FS_ERRORS='surrogateescape'`
`106`		`-`
`107`		`-# # -- Python 2/3 compatibility -------------------------------------`
`108`		`-# FS_ERRORS = 'my_surrogateescape'`
`109`		`-`
`110`		`-defu(text):`
`111`		`-ifPY3:`
`112`		`-returntext`
`113`		`-returntext.decode('unicode_escape')`
`114`		`-`
`115`		`-defb(data):`
`116`		`-ifPY3:`
`117`		`-returndata.encode('latin1')`
`118`		`-returndata`
`119`		`-`
`120`		`-defsurrogateescape_handler(exc):`
`121`		`-"""`
`122`		`- Pure Python implementation of the PEP 383: the "surrogateescape" error`
`123`		`- handler of Python 3. Undecodable bytes will be replaced by a Unicode`
`124`		`- character U+DCxx on decoding, and these are translated into the`
`125`		`- original bytes on encoding.`
`126`		`- """`
`127`		`-mystring=exc.object[exc.start:exc.end]`
`128`		`-`
`129`		`-try:`
`130`		`-ifisinstance(exc,UnicodeDecodeError):`
`131`		`-# mystring is a byte-string in this case`
`132`		`-decoded=replace_surrogate_decode(mystring)`
`133`		`-elifisinstance(exc,UnicodeEncodeError):`
`134`		`-# In the case of u'\udcc3'.encode('ascii',`
`135`		`-# 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an`
`136`		`-# exception anyway after this function is called, even though I think`
`137`		`-# it's doing what it should. It seems that the strict encoder is called`
`138`		`-# to encode the unicode string that this function returns ...`
`139`		`-decoded=replace_surrogate_encode(mystring,exc)`
`140`		`-else:`
`141`		`-raiseexc`
`142`		`-exceptNotASurrogateError:`
`143`		`-raiseexc`
`144`		`-return (decoded,exc.end)`
`145`		`-`
`146`		`-`
`147`		`-classNotASurrogateError(Exception):`
`148`		`-pass`
`149`		`-`
`150`		`-`
`151`		`-defreplace_surrogate_encode(mystring,exc):`
`152`		`-"""`
`153`		`- Returns a (unicode) string, not the more logical bytes, because the codecs`
`154`		`- register_error functionality expects this.`
`155`		`- """`
`156`		`-decoded= []`
`157`		`-forchinmystring:`
`158`		`-# if PY3:`
`159`		`-# code = ch`
`160`		`-# else:`
`161`		`-code=ord(ch)`
`162`		`-`
`163`		`-# The following magic comes from Py3.3's Python/codecs.c file:`
`164`		`-ifnot0xD800<=code<=0xDCFF:`
`165`		`-# Not a surrogate. Fail with the original exception.`
`166`		`-raiseexc`
`167`		`-# mybytes = [0xe0 \| (code >> 12),`
`168`		`-# 0x80 \| ((code >> 6) & 0x3f),`
`169`		`-# 0x80 \| (code & 0x3f)]`
`170`		`-# Is this a good idea?`
`171`		`-if0xDC00<=code<=0xDC7F:`
`172`		`-decoded.append(chr(code-0xDC00))`
`173`		`-elifcode<=0xDCFF:`
`174`		`-decoded.append(chr(code-0xDC00))`
`175`		`-else:`
`176`		`-raiseNotASurrogateError`
`177`		`-returnstr().join(decoded)`
`178`		`-`
`179`		`-`
`180`		`-defreplace_surrogate_decode(mybytes):`
`181`		`-"""`
`182`		`- Returns a (unicode) string`
`183`		`- """`
`184`		`-decoded= []`
`185`		`-forchinmybytes:`
`186`		`-# We may be parsing newbytes (in which case ch is an int) or a native`
`187`		`-# str on Py2`
`188`		`-ifisinstance(ch,int):`
`189`		`-code=ch`
`190`		`-else:`
`191`		`-code=ord(ch)`
`192`		`-if0x80<=code<=0xFF:`
`193`		`-decoded.append(chr(0xDC00+code))`
`194`		`-elifcode<=0x7F:`
`195`		`-decoded.append(chr(code))`
`196`		`-else:`
`197`		`-# # It may be a bad byte`
`198`		`-# # Try swallowing it.`
`199`		`-# continue`
`200`		`-# print("RAISE!")`
`201`		`-raiseNotASurrogateError`
`202`		`-returnstr().join(decoded)`
`203`		`-`
`204`		`-`
`205`		`-defencodefilename(fn):`
`206`		`-ifFS_ENCODING=='ascii':`
`207`		`-# ASCII encoder of Python 2 expects that the error handler returns a`
`208`		`-# Unicode string encodable to ASCII, whereas our surrogateescape error`
`209`		`-# handler has to return bytes in 0x80-0xFF range.`
`210`		`-encoded= []`
`211`		`-forindex,chinenumerate(fn):`
`212`		`-code=ord(ch)`
`213`		`-ifcode<128:`
`214`		`-ch=bytes((code,))`
`215`		`-elif0xDC80<=code<=0xDCFF:`
`216`		`-ch=bytes((code-0xDC00,))`
`217`		`-else:`
`218`		`-raiseUnicodeEncodeError(FS_ENCODING,`
`219`		`-fn,index,index+1,`
`220`		`-'ordinal not in range(128)')`
`221`		`-encoded.append(ch)`
`222`		`-returnbytes().join(encoded)`
`223`		`-elifFS_ENCODING=='utf-8':`
`224`		`-# UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF`
`225`		`-# doesn't go through our error handler`
`226`		`-encoded= []`
`227`		`-forindex,chinenumerate(fn):`
`228`		`-code=ord(ch)`
`229`		`-if0xD800<=code<=0xDFFF:`
`230`		`-if0xDC80<=code<=0xDCFF:`
`231`		`-ch=bytes((code-0xDC00,))`
`232`		`-encoded.append(ch)`
`233`		`-else:`
`234`		`-raiseUnicodeEncodeError(`
`235`		`-FS_ENCODING,`
`236`		`-fn,index,index+1,'surrogates not allowed')`
`237`		`-else:`
`238`		`-ch_utf8=ch.encode('utf-8')`
`239`		`-encoded.append(ch_utf8)`
`240`		`-returnbytes().join(encoded)`
`241`		`-returnfn.encode(FS_ENCODING,FS_ERRORS)`
`242`		`-`
`243`		`-defdecodefilename(fn):`
`244`		`-returnfn.decode(FS_ENCODING,FS_ERRORS)`
`245`		`-`
`246`		`-FS_ENCODING='ascii';fn=b('[abc\xff]');encoded=u('[abc\udcff]')`
`247`		`-# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')`
`248`		`-# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')`
`249`		`-`
`250`		`-`
`251`		`-# normalize the filesystem encoding name.`
`252`		`-# For example, we expect "utf-8", not "UTF8".`
`253`		`-FS_ENCODING=codecs.lookup(FS_ENCODING).name`
`254`		`-`
`255`		`-`
`256`		`-defregister_surrogateescape():`
`257`		`-"""`
`258`		`- Registers the surrogateescape error handler on Python 2 (only)`
`259`		`- """`
`260`		`-ifPY3:`
`261`		`-return`
`262`		`-try:`
`263`		`-codecs.lookup_error(FS_ERRORS)`
`264`		`-exceptLookupError:`
`265`		`-codecs.register_error(FS_ERRORS,surrogateescape_handler)`
`266`		`-`
`267`		`-`
`268`		`-try:`
`269`		`-b"100644\x9f\0aaa".decode(defenc,"surrogateescape")`
`270`		`-exceptException:`
`271`		`-register_surrogateescape()`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commit05cf33a

File tree

1 file changed

1 file changed

`‎git/compat.py‎`

0 commit comments