gitpython-developers/GitPythonPublic

NotificationsYou must be signed in to change notification settings
Fork940
Star4.9k

Commitec731f4

committed

Merge with#532, fix unicode filenames with escapesurogates

2 parentsb2efa1b +9e4a454 commitec731f4Copy full SHA for ec731f4

File tree

7 files changed

+209

-18

lines changed

VERSION
git
- compat.py
- ext
  - gitdb
- objects
  - fun.py
- test
  - performance
    - test_commit.py
  - test_fun.py
setup.py

7 files changed

+209

-18

lines changed

`‎VERSION`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-2.0.9dev0`
	`1`	`+2.0.10dev0`

`‎git/compat.py`

Lines changed: 191 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,8 @@`
`10`	`10`	`importlocale`
`11`	`11`	`importos`
`12`	`12`	`importsys`
	`13`	`+importcodecs`
	`14`	`+`
`13`	`15`
`14`	`16`	`fromgitdb.utils.compatimport (`
`15`	`17`	`xrange,`
`@@ -67,7 +69,7 @@ def safe_decode(s):`
`67`	`69`	`ifisinstance(s,unicode):`
`68`	`70`	`returns`
`69`	`71`	`elifisinstance(s,bytes):`
`70`		`-returns.decode(defenc,'replace')`
	`72`	`+returns.decode(defenc,'surrogateescape')`
`71`	`73`	`elifsisnotNone:`
`72`	`74`	`raiseTypeError('Expected bytes or text, but got %r'% (s,))`
`73`	`75`
`@@ -121,3 +123,191 @@ def __str__(self):`
`121`	`123`	`else:# Python 2`
`122`	`124`	`def__str__(self):`
`123`	`125`	`returnself.__unicode__().encode(defenc)`
	`126`	`+`
	`127`	`+`
	`128`	`+"""`
	`129`	`+This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error`
	`130`	`+handler of Python 3.`
	`131`	`+Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc`
	`132`	`+"""`
	`133`	`+`
	`134`	`+# This code is released under the Python license and the BSD 2-clause license`
	`135`	`+`
	`136`	`+`
	`137`	`+FS_ERRORS='surrogateescape'`
	`138`	`+`
	`139`	`+# # -- Python 2/3 compatibility -------------------------------------`
	`140`	`+# FS_ERRORS = 'my_surrogateescape'`
	`141`	`+`
	`142`	`+defu(text):`
	`143`	`+ifPY3:`
	`144`	`+returntext`
	`145`	`+else:`
	`146`	`+returntext.decode('unicode_escape')`
	`147`	`+`
	`148`	`+defb(data):`
	`149`	`+ifPY3:`
	`150`	`+returndata.encode('latin1')`
	`151`	`+else:`
	`152`	`+returndata`
	`153`	`+`
	`154`	`+ifPY3:`
	`155`	`+_unichr=chr`
	`156`	`+bytes_chr=lambdacode:bytes((code,))`
	`157`	`+else:`
	`158`	`+_unichr=unichr`
	`159`	`+bytes_chr=chr`
	`160`	`+`
	`161`	`+defsurrogateescape_handler(exc):`
	`162`	`+"""`
	`163`	`+ Pure Python implementation of the PEP 383: the "surrogateescape" error`
	`164`	`+ handler of Python 3. Undecodable bytes will be replaced by a Unicode`
	`165`	`+ character U+DCxx on decoding, and these are translated into the`
	`166`	`+ original bytes on encoding.`
	`167`	`+ """`
	`168`	`+mystring=exc.object[exc.start:exc.end]`
	`169`	`+`
	`170`	`+try:`
	`171`	`+ifisinstance(exc,UnicodeDecodeError):`
	`172`	`+# mystring is a byte-string in this case`
	`173`	`+decoded=replace_surrogate_decode(mystring)`
	`174`	`+elifisinstance(exc,UnicodeEncodeError):`
	`175`	`+# In the case of u'\udcc3'.encode('ascii',`
	`176`	`+# 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an`
	`177`	`+# exception anyway after this function is called, even though I think`
	`178`	`+# it's doing what it should. It seems that the strict encoder is called`
	`179`	`+# to encode the unicode string that this function returns ...`
	`180`	`+decoded=replace_surrogate_encode(mystring)`
	`181`	`+else:`
	`182`	`+raiseexc`
	`183`	`+exceptNotASurrogateError:`
	`184`	`+raiseexc`
	`185`	`+return (decoded,exc.end)`
	`186`	`+`
	`187`	`+`
	`188`	`+classNotASurrogateError(Exception):`
	`189`	`+pass`
	`190`	`+`
	`191`	`+`
	`192`	`+defreplace_surrogate_encode(mystring):`
	`193`	`+"""`
	`194`	`+ Returns a (unicode) string, not the more logical bytes, because the codecs`
	`195`	`+ register_error functionality expects this.`
	`196`	`+ """`
	`197`	`+decoded= []`
	`198`	`+forchinmystring:`
	`199`	`+# if PY3:`
	`200`	`+# code = ch`
	`201`	`+# else:`
	`202`	`+code=ord(ch)`
	`203`	`+`
	`204`	`+# The following magic comes from Py3.3's Python/codecs.c file:`
	`205`	`+ifnot0xD800<=code<=0xDCFF:`
	`206`	`+# Not a surrogate. Fail with the original exception.`
	`207`	`+raiseexc`
	`208`	`+# mybytes = [0xe0 \| (code >> 12),`
	`209`	`+# 0x80 \| ((code >> 6) & 0x3f),`
	`210`	`+# 0x80 \| (code & 0x3f)]`
	`211`	`+# Is this a good idea?`
	`212`	`+if0xDC00<=code<=0xDC7F:`
	`213`	`+decoded.append(_unichr(code-0xDC00))`
	`214`	`+elifcode<=0xDCFF:`
	`215`	`+decoded.append(_unichr(code-0xDC00))`
	`216`	`+else:`
	`217`	`+raiseNotASurrogateError`
	`218`	`+returnstr().join(decoded)`
	`219`	`+`
	`220`	`+`
	`221`	`+defreplace_surrogate_decode(mybytes):`
	`222`	`+"""`
	`223`	`+ Returns a (unicode) string`
	`224`	`+ """`
	`225`	`+decoded= []`
	`226`	`+forchinmybytes:`
	`227`	`+# We may be parsing newbytes (in which case ch is an int) or a native`
	`228`	`+# str on Py2`
	`229`	`+ifisinstance(ch,int):`
	`230`	`+code=ch`
	`231`	`+else:`
	`232`	`+code=ord(ch)`
	`233`	`+if0x80<=code<=0xFF:`
	`234`	`+decoded.append(_unichr(0xDC00+code))`
	`235`	`+elifcode<=0x7F:`
	`236`	`+decoded.append(_unichr(code))`
	`237`	`+else:`
	`238`	`+# # It may be a bad byte`
	`239`	`+# # Try swallowing it.`
	`240`	`+# continue`
	`241`	`+# print("RAISE!")`
	`242`	`+raiseNotASurrogateError`
	`243`	`+returnstr().join(decoded)`
	`244`	`+`
	`245`	`+`
	`246`	`+defencodefilename(fn):`
	`247`	`+ifFS_ENCODING=='ascii':`
	`248`	`+# ASCII encoder of Python 2 expects that the error handler returns a`
	`249`	`+# Unicode string encodable to ASCII, whereas our surrogateescape error`
	`250`	`+# handler has to return bytes in 0x80-0xFF range.`
	`251`	`+encoded= []`
	`252`	`+forindex,chinenumerate(fn):`
	`253`	`+code=ord(ch)`
	`254`	`+ifcode<128:`
	`255`	`+ch=bytes_chr(code)`
	`256`	`+elif0xDC80<=code<=0xDCFF:`
	`257`	`+ch=bytes_chr(code-0xDC00)`
	`258`	`+else:`
	`259`	`+raiseUnicodeEncodeError(FS_ENCODING,`
	`260`	`+fn,index,index+1,`
	`261`	`+'ordinal not in range(128)')`
	`262`	`+encoded.append(ch)`
	`263`	`+returnbytes().join(encoded)`
	`264`	`+elifFS_ENCODING=='utf-8':`
	`265`	`+# UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF`
	`266`	`+# doesn't go through our error handler`
	`267`	`+encoded= []`
	`268`	`+forindex,chinenumerate(fn):`
	`269`	`+code=ord(ch)`
	`270`	`+if0xD800<=code<=0xDFFF:`
	`271`	`+if0xDC80<=code<=0xDCFF:`
	`272`	`+ch=bytes_chr(code-0xDC00)`
	`273`	`+encoded.append(ch)`
	`274`	`+else:`
	`275`	`+raiseUnicodeEncodeError(`
	`276`	`+FS_ENCODING,`
	`277`	`+fn,index,index+1,'surrogates not allowed')`
	`278`	`+else:`
	`279`	`+ch_utf8=ch.encode('utf-8')`
	`280`	`+encoded.append(ch_utf8)`
	`281`	`+returnbytes().join(encoded)`
	`282`	`+else:`
	`283`	`+returnfn.encode(FS_ENCODING,FS_ERRORS)`
	`284`	`+`
	`285`	`+defdecodefilename(fn):`
	`286`	`+returnfn.decode(FS_ENCODING,FS_ERRORS)`
	`287`	`+`
	`288`	`+FS_ENCODING='ascii';fn=b('[abc\xff]');encoded=u('[abc\udcff]')`
	`289`	`+# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')`
	`290`	`+# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')`
	`291`	`+`
	`292`	`+`
	`293`	`+# normalize the filesystem encoding name.`
	`294`	`+# For example, we expect "utf-8", not "UTF8".`
	`295`	`+FS_ENCODING=codecs.lookup(FS_ENCODING).name`
	`296`	`+`
	`297`	`+`
	`298`	`+defregister_surrogateescape():`
	`299`	`+"""`
	`300`	`+ Registers the surrogateescape error handler on Python 2 (only)`
	`301`	`+ """`
	`302`	`+ifPY3:`
	`303`	`+return`
	`304`	`+try:`
	`305`	`+codecs.lookup_error(FS_ERRORS)`
	`306`	`+exceptLookupError:`
	`307`	`+codecs.register_error(FS_ERRORS,surrogateescape_handler)`
	`308`	`+`
	`309`	`+`
	`310`	`+try:`
	`311`	`+b"100644\x9f\0aaa".decode(defenc,"surrogateescape")`
	`312`	`+except:`
	`313`	`+register_surrogateescape()`

`‎git/ext/gitdb`

Submodulegitdb updated10 files

`‎git/objects/fun.py`

Lines changed: 2 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`	`fromstatimportS_ISDIR`
`3`	`3`	`fromgit.compatimport (`
`4`	`4`	`byte_ord,`
	`5`	`+safe_decode,`
`5`	`6`	`defenc,`
`6`	`7`	`xrange,`
`7`	`8`	`text_type,`
`@@ -76,11 +77,7 @@ def tree_entries_from_data(data):`
`76`	`77`	`# default encoding for strings in git is utf8`
`77`	`78`	`# Only use the respective unicode object if the byte stream was encoded`
`78`	`79`	`name=data[ns:i]`
`79`		`-try:`
`80`		`-name=name.decode(defenc)`
`81`		`-exceptUnicodeDecodeError:`
`82`		`-pass`
`83`		`-# END handle encoding`
	`80`	`+name=safe_decode(name)`
`84`	`81`
`85`	`82`	`# byte is NULL, get next 20`
`86`	`83`	`i+=1`

`‎git/test/performance/test_commit.py`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,7 @@ def test_iteration(self):`
`52`	`52`	`# END for each object`
`53`	`53`	`# END for each commit`
`54`	`54`	`elapsed_time=time()-st`
`55`		`-print("Traversed %i Trees and a total of %iunchached objects in %s [s] ( %f objs/s )"`
	`55`	`+print("Traversed %i Trees and a total of %iuncached objects in %s [s] ( %f objs/s )"`
`56`	`56`	`% (nc,no,elapsed_time,no/elapsed_time),file=sys.stderr)`
`57`	`57`
`58`	`58`	`deftest_commit_traversal(self):`

`‎git/test/test_fun.py`

Lines changed: 11 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,8 @@`
`1`	`1`	`fromioimportBytesIO`
`2`		`-fromstatimport (`
`3`		`-S_IFDIR,`
`4`		`-S_IFREG,`
`5`		`-S_IFLNK`
`6`		`-)`
	`2`	`+fromstatimportS_IFDIR,S_IFREG,S_IFLNK`
	`3`	`+fromunittest.caseimportskipIf`
`7`	`4`
	`5`	`+fromgit.compatimportPY3`
`8`	`6`	`fromgit.indeximportIndexFile`
`9`	`7`	`fromgit.index.funimport (`
`10`	`8`	`aggressive_tree_merge`
`@@ -253,6 +251,12 @@ def test_tree_traversal_single(self):`
`253`	`251`	`assertentries`
`254`	`252`	`# END for each commit`
`255`	`253`
`256`		`-deftest_tree_entries_from_data_with_failing_name_decode(self):`
	`254`	`+@skipIf(PY3,'odd types returned ... maybe figure it out one day')`
	`255`	`+deftest_tree_entries_from_data_with_failing_name_decode_py2(self):`
	`256`	`+r=tree_entries_from_data(b'100644\x9f\0aaa')`
	`257`	`+assertr== [('aaa',33188,u'\udc9f')],r`
	`258`	`+`
	`259`	`+@skipIf(notPY3,'odd types returned ... maybe figure it out one day')`
	`260`	`+deftest_tree_entries_from_data_with_failing_name_decode_py3(self):`
`257`	`261`	`r=tree_entries_from_data(b'100644\x9f\0aaa')`
`258`		`-assertr== [(b'aaa',33188,b'\x9f')],r`
	`262`	`+assertr== [(b'aaa',33188,'\udc9f')],r`

`‎setup.py`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@ def _stamp_version(filename):`
`64`	`64`	`else:`
`65`	`65`	`print("WARNING: Couldn't find version line in file %s"%filename,file=sys.stderr)`
`66`	`66`
`67`		`-install_requires= ['gitdb >=0.6.4']`
	`67`	`+install_requires= ['gitdb2 >=2.0.0']`
`68`	`68`	`extras_require= {`
`69`	`69`	`':python_version == "2.6"': ['ordereddict'],`
`70`	`70`	`}`
`@@ -100,7 +100,7 @@ def _stamp_version(filename):`
`100`	`100`	`package_data={'git.test': ['fixtures/*']},`
`101`	`101`	`package_dir={'git':'git'},`
`102`	`102`	`license="BSD License",`
`103`		`-requires=['gitdb (>=0.6.4)'],`
	`103`	`+requires=['gitdb2 (>=2.0.0)'],`
`104`	`104`	`install_requires=install_requires,`
`105`	`105`	`test_requirements=test_requires+install_requires,`
`106`	`106`	`zip_safe=False,`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commitec731f4

File tree

7 files changed

7 files changed

`‎VERSION`

`‎git/compat.py`

`‎git/ext/gitdb`

`‎git/objects/fun.py`

`‎git/test/performance/test_commit.py`

`‎git/test/test_fun.py`

`‎setup.py`

0 commit comments