|
10 | 10 | importlocale |
11 | 11 | importos |
12 | 12 | importsys |
13 | | -importcodecs |
14 | 13 |
|
15 | 14 |
|
16 | 15 | fromgitdb.utils.compatimport ( |
@@ -91,181 +90,3 @@ def __str__(self): |
91 | 90 | else:# Python 2 |
92 | 91 | def__str__(self): |
93 | 92 | returnself.__unicode__().encode(defenc) |
94 | | - |
95 | | - |
96 | | -""" |
97 | | -This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error |
98 | | -handler of Python 3. |
99 | | -Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc |
100 | | -""" |
101 | | - |
102 | | -# This code is released under the Python license and the BSD 2-clause license |
103 | | - |
104 | | - |
105 | | -FS_ERRORS='surrogateescape' |
106 | | - |
107 | | -# # -- Python 2/3 compatibility ------------------------------------- |
108 | | -# FS_ERRORS = 'my_surrogateescape' |
109 | | - |
110 | | -defu(text): |
111 | | -ifPY3: |
112 | | -returntext |
113 | | -returntext.decode('unicode_escape') |
114 | | - |
115 | | -defb(data): |
116 | | -ifPY3: |
117 | | -returndata.encode('latin1') |
118 | | -returndata |
119 | | - |
120 | | -defsurrogateescape_handler(exc): |
121 | | -""" |
122 | | - Pure Python implementation of the PEP 383: the "surrogateescape" error |
123 | | - handler of Python 3. Undecodable bytes will be replaced by a Unicode |
124 | | - character U+DCxx on decoding, and these are translated into the |
125 | | - original bytes on encoding. |
126 | | - """ |
127 | | -mystring=exc.object[exc.start:exc.end] |
128 | | - |
129 | | -try: |
130 | | -ifisinstance(exc,UnicodeDecodeError): |
131 | | -# mystring is a byte-string in this case |
132 | | -decoded=replace_surrogate_decode(mystring) |
133 | | -elifisinstance(exc,UnicodeEncodeError): |
134 | | -# In the case of u'\udcc3'.encode('ascii', |
135 | | -# 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an |
136 | | -# exception anyway after this function is called, even though I think |
137 | | -# it's doing what it should. It seems that the strict encoder is called |
138 | | -# to encode the unicode string that this function returns ... |
139 | | -decoded=replace_surrogate_encode(mystring,exc) |
140 | | -else: |
141 | | -raiseexc |
142 | | -exceptNotASurrogateError: |
143 | | -raiseexc |
144 | | -return (decoded,exc.end) |
145 | | - |
146 | | - |
147 | | -classNotASurrogateError(Exception): |
148 | | -pass |
149 | | - |
150 | | - |
151 | | -defreplace_surrogate_encode(mystring,exc): |
152 | | -""" |
153 | | - Returns a (unicode) string, not the more logical bytes, because the codecs |
154 | | - register_error functionality expects this. |
155 | | - """ |
156 | | -decoded= [] |
157 | | -forchinmystring: |
158 | | -# if PY3: |
159 | | -# code = ch |
160 | | -# else: |
161 | | -code=ord(ch) |
162 | | - |
163 | | -# The following magic comes from Py3.3's Python/codecs.c file: |
164 | | -ifnot0xD800<=code<=0xDCFF: |
165 | | -# Not a surrogate. Fail with the original exception. |
166 | | -raiseexc |
167 | | -# mybytes = [0xe0 | (code >> 12), |
168 | | -# 0x80 | ((code >> 6) & 0x3f), |
169 | | -# 0x80 | (code & 0x3f)] |
170 | | -# Is this a good idea? |
171 | | -if0xDC00<=code<=0xDC7F: |
172 | | -decoded.append(chr(code-0xDC00)) |
173 | | -elifcode<=0xDCFF: |
174 | | -decoded.append(chr(code-0xDC00)) |
175 | | -else: |
176 | | -raiseNotASurrogateError |
177 | | -returnstr().join(decoded) |
178 | | - |
179 | | - |
180 | | -defreplace_surrogate_decode(mybytes): |
181 | | -""" |
182 | | - Returns a (unicode) string |
183 | | - """ |
184 | | -decoded= [] |
185 | | -forchinmybytes: |
186 | | -# We may be parsing newbytes (in which case ch is an int) or a native |
187 | | -# str on Py2 |
188 | | -ifisinstance(ch,int): |
189 | | -code=ch |
190 | | -else: |
191 | | -code=ord(ch) |
192 | | -if0x80<=code<=0xFF: |
193 | | -decoded.append(chr(0xDC00+code)) |
194 | | -elifcode<=0x7F: |
195 | | -decoded.append(chr(code)) |
196 | | -else: |
197 | | -# # It may be a bad byte |
198 | | -# # Try swallowing it. |
199 | | -# continue |
200 | | -# print("RAISE!") |
201 | | -raiseNotASurrogateError |
202 | | -returnstr().join(decoded) |
203 | | - |
204 | | - |
205 | | -defencodefilename(fn): |
206 | | -ifFS_ENCODING=='ascii': |
207 | | -# ASCII encoder of Python 2 expects that the error handler returns a |
208 | | -# Unicode string encodable to ASCII, whereas our surrogateescape error |
209 | | -# handler has to return bytes in 0x80-0xFF range. |
210 | | -encoded= [] |
211 | | -forindex,chinenumerate(fn): |
212 | | -code=ord(ch) |
213 | | -ifcode<128: |
214 | | -ch=bytes((code,)) |
215 | | -elif0xDC80<=code<=0xDCFF: |
216 | | -ch=bytes((code-0xDC00,)) |
217 | | -else: |
218 | | -raiseUnicodeEncodeError(FS_ENCODING, |
219 | | -fn,index,index+1, |
220 | | -'ordinal not in range(128)') |
221 | | -encoded.append(ch) |
222 | | -returnbytes().join(encoded) |
223 | | -elifFS_ENCODING=='utf-8': |
224 | | -# UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF |
225 | | -# doesn't go through our error handler |
226 | | -encoded= [] |
227 | | -forindex,chinenumerate(fn): |
228 | | -code=ord(ch) |
229 | | -if0xD800<=code<=0xDFFF: |
230 | | -if0xDC80<=code<=0xDCFF: |
231 | | -ch=bytes((code-0xDC00,)) |
232 | | -encoded.append(ch) |
233 | | -else: |
234 | | -raiseUnicodeEncodeError( |
235 | | -FS_ENCODING, |
236 | | -fn,index,index+1,'surrogates not allowed') |
237 | | -else: |
238 | | -ch_utf8=ch.encode('utf-8') |
239 | | -encoded.append(ch_utf8) |
240 | | -returnbytes().join(encoded) |
241 | | -returnfn.encode(FS_ENCODING,FS_ERRORS) |
242 | | - |
243 | | -defdecodefilename(fn): |
244 | | -returnfn.decode(FS_ENCODING,FS_ERRORS) |
245 | | - |
246 | | -FS_ENCODING='ascii';fn=b('[abc\xff]');encoded=u('[abc\udcff]') |
247 | | -# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]') |
248 | | -# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') |
249 | | - |
250 | | - |
251 | | -# normalize the filesystem encoding name. |
252 | | -# For example, we expect "utf-8", not "UTF8". |
253 | | -FS_ENCODING=codecs.lookup(FS_ENCODING).name |
254 | | - |
255 | | - |
256 | | -defregister_surrogateescape(): |
257 | | -""" |
258 | | - Registers the surrogateescape error handler on Python 2 (only) |
259 | | - """ |
260 | | -ifPY3: |
261 | | -return |
262 | | -try: |
263 | | -codecs.lookup_error(FS_ERRORS) |
264 | | -exceptLookupError: |
265 | | -codecs.register_error(FS_ERRORS,surrogateescape_handler) |
266 | | - |
267 | | - |
268 | | -try: |
269 | | -b"100644\x9f\0aaa".decode(defenc,"surrogateescape") |
270 | | -exceptException: |
271 | | -register_surrogateescape() |