gitpython-developers/GitPythonPublic

NotificationsYou must be signed in to change notification settings
Fork966
Star5.1k

compat.py

Latest commit

History

313 lines (261 loc) · 9.13 KB

compat.py

File metadata and controls

313 lines (261 loc) · 9.13 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

# -*- coding: utf-8 -*-

# config.py

# This module is part of GitPython and is released under

# the BSD License: http://www.opensource.org/licenses/bsd-license.php

"""utilities to help provide compatibility with python 3"""

# flake8: noqa

importlocale

importos

importsys

importcodecs

fromgitdb.utils.compatimport (

xrange,

MAXSIZE,# @UnusedImport

izip,# @UnusedImport

)

fromgitdb.utils.encodingimport (

string_types,# @UnusedImport

text_type,# @UnusedImport

force_bytes,# @UnusedImport

force_text# @UnusedImport

)

PY3=sys.version_info[0]>=3

is_win= (os.name=='nt')

is_posix= (os.name=='posix')

is_darwin= (os.name=='darwin')

defenc=sys.getdefaultencoding()

ifPY3:

importio

FileType=io.IOBase

defbyte_ord(b):

returnb

defbchr(n):

returnbytes([n])

defmviter(d):

returnd.values()

range=xrange# @ReservedAssignment

unicode=str

binary_type=bytes

else:

FileType=file# @UndefinedVariable on PY3

# usually, this is just ascii, which might not enough for our encoding needs

# Unless it's set specifically, we override it to be utf-8

ifdefenc=='ascii':

defenc='utf-8'

byte_ord=ord

bchr=chr

unicode=unicode

binary_type=str

range=xrange# @ReservedAssignment

defmviter(d):

returnd.itervalues()

defsafe_decode(s):

"""Safely decodes a binary string to unicode"""

ifisinstance(s,unicode):

returns

elifisinstance(s,bytes):

returns.decode(defenc,'surrogateescape')

elifsisnotNone:

raiseTypeError('Expected bytes or text, but got %r'% (s,))

defsafe_encode(s):

"""Safely decodes a binary string to unicode"""

ifisinstance(s,unicode):

returns.encode(defenc)

elifisinstance(s,bytes):

returns

elifsisnotNone:

raiseTypeError('Expected bytes or text, but got %r'% (s,))

defwin_encode(s):

"""Encode unicodes for process arguments on Windows."""

ifisinstance(s,unicode):

returns.encode(locale.getpreferredencoding(False))

elifisinstance(s,bytes):

returns

elifsisnotNone:

raiseTypeError('Expected bytes or text, but got %r'% (s,))

defwith_metaclass(meta,*bases):

"""copied from https://github.com/Byron/bcore/blob/master/src/python/butility/future.py#L15"""

classmetaclass(meta):

__call__=type.__call__

__init__=type.__init__

def__new__(cls,name,nbases,d):

ifnbasesisNone:

returntype.__new__(cls,name, (),d)

# There may be clients who rely on this attribute to be set to a reasonable value, which is why

# we set the __metaclass__ attribute explicitly

ifnotPY3and'___metaclass__'notind:

d['__metaclass__']=meta

returnmeta(name,bases,d)

returnmetaclass(meta.__name__+'Helper',None, {})

## From https://docs.python.org/3.3/howto/pyporting.html

classUnicodeMixin(object):

"""Mixin class to handle defining the proper __str__/__unicode__

methods in Python 2 or 3."""

ifPY3:

def__str__(self):

returnself.__unicode__()

else:# Python 2

def__str__(self):

returnself.__unicode__().encode(defenc)

"""

This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error

handler of Python 3.

Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc

"""

# This code is released under the Python license and the BSD 2-clause license

FS_ERRORS='surrogateescape'

# # -- Python 2/3 compatibility -------------------------------------

# FS_ERRORS = 'my_surrogateescape'

defu(text):

ifPY3:

returntext

else:

returntext.decode('unicode_escape')

defb(data):

ifPY3:

returndata.encode('latin1')

else:

returndata

ifPY3:

_unichr=chr

bytes_chr=lambdacode:bytes((code,))

else:

_unichr=unichr

bytes_chr=chr

defsurrogateescape_handler(exc):

"""

Pure Python implementation of the PEP 383: the "surrogateescape" error

handler of Python 3. Undecodable bytes will be replaced by a Unicode

character U+DCxx on decoding, and these are translated into the

original bytes on encoding.

"""

mystring=exc.object[exc.start:exc.end]

try:

ifisinstance(exc,UnicodeDecodeError):

# mystring is a byte-string in this case

decoded=replace_surrogate_decode(mystring)

elifisinstance(exc,UnicodeEncodeError):

# In the case of u'\udcc3'.encode('ascii',

# 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an

# exception anyway after this function is called, even though I think

# it's doing what it should. It seems that the strict encoder is called

# to encode the unicode string that this function returns ...

decoded=replace_surrogate_encode(mystring)

else:

raiseexc

exceptNotASurrogateError:

raiseexc

return (decoded,exc.end)

classNotASurrogateError(Exception):

pass

defreplace_surrogate_encode(mystring):

"""

Returns a (unicode) string, not the more logical bytes, because the codecs

register_error functionality expects this.

"""

decoded= []

forchinmystring:

# if PY3:

# code = ch

# else:

code=ord(ch)

# The following magic comes from Py3.3's Python/codecs.c file:

ifnot0xD800<=code<=0xDCFF:

# Not a surrogate. Fail with the original exception.

raiseexc

# mybytes = [0xe0 | (code >> 12),

# 0x80 | ((code >> 6) & 0x3f),

# 0x80 | (code & 0x3f)]

# Is this a good idea?

if0xDC00<=code<=0xDC7F:

decoded.append(_unichr(code-0xDC00))

elifcode<=0xDCFF:

decoded.append(_unichr(code-0xDC00))

else:

raiseNotASurrogateError

returnstr().join(decoded)

defreplace_surrogate_decode(mybytes):

"""

Returns a (unicode) string

"""

decoded= []

forchinmybytes:

# We may be parsing newbytes (in which case ch is an int) or a native

# str on Py2

ifisinstance(ch,int):

code=ch

else:

code=ord(ch)

if0x80<=code<=0xFF:

decoded.append(_unichr(0xDC00+code))

elifcode<=0x7F:

decoded.append(_unichr(code))

else:

# # It may be a bad byte

# # Try swallowing it.

# continue

# print("RAISE!")

raiseNotASurrogateError

returnstr().join(decoded)

defencodefilename(fn):

ifFS_ENCODING=='ascii':

# ASCII encoder of Python 2 expects that the error handler returns a

# Unicode string encodable to ASCII, whereas our surrogateescape error

# handler has to return bytes in 0x80-0xFF range.

encoded= []

forindex,chinenumerate(fn):

code=ord(ch)

ifcode<128:

ch=bytes_chr(code)

elif0xDC80<=code<=0xDCFF:

ch=bytes_chr(code-0xDC00)

else:

raiseUnicodeEncodeError(FS_ENCODING,

fn,index,index+1,

'ordinal not in range(128)')

encoded.append(ch)

returnbytes().join(encoded)

elifFS_ENCODING=='utf-8':

# UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF

# doesn't go through our error handler

encoded= []

forindex,chinenumerate(fn):

code=ord(ch)

if0xD800<=code<=0xDFFF:

if0xDC80<=code<=0xDCFF:

ch=bytes_chr(code-0xDC00)

encoded.append(ch)

else:

raiseUnicodeEncodeError(

FS_ENCODING,

fn,index,index+1,'surrogates not allowed')

else:

ch_utf8=ch.encode('utf-8')

encoded.append(ch_utf8)

returnbytes().join(encoded)

else:

returnfn.encode(FS_ENCODING,FS_ERRORS)

defdecodefilename(fn):

returnfn.decode(FS_ENCODING,FS_ERRORS)

FS_ENCODING='ascii';fn=b('[abc\xff]');encoded=u('[abc\udcff]')

# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')

# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')

# normalize the filesystem encoding name.

# For example, we expect "utf-8", not "UTF8".

FS_ENCODING=codecs.lookup(FS_ENCODING).name

defregister_surrogateescape():

"""

Registers the surrogateescape error handler on Python 2 (only)

"""

ifPY3:

return

try:

codecs.lookup_error(FS_ERRORS)

exceptLookupError:

codecs.register_error(FS_ERRORS,surrogateescape_handler)

try:

b"100644\x9f\0aaa".decode(defenc,"surrogateescape")

except:

register_surrogateescape()

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

compat.py

Latest commit

History

compat.py

File metadata and controls

Movatterモバイル変換

Uh oh!

FilesExpand file tree

compat.py

Latest commit

History

compat.py

File metadata and controls