upstream/mercurial-mirror Commit - r49726:79009cca

1

# pycompat.py - portability shim for python 3

1

# pycompat.py - portability shim for python 3

2

#

2

#

3

# This software may be used and distributed according to the terms of the

3

# This software may be used and distributed according to the terms of the

4

# GNU General Public License version 2 or any later version.

4

# GNU General Public License version 2 or any later version.

5

6

"""Mercurial portability shim for python 3.

6

"""Mercurial portability shim for python 3.

7

8

This contains aliases to hide python version-specific details from the core.

8

This contains aliases to hide python version-specific details from the core.

9

"""

9

"""

10

11

from __future__ import absolute_import

11

from __future__ import absolute_import

12

13

import builtins

13

import builtins

14

import codecs

14

import concurrent.futures as futures

15

import concurrent.futures as futures

16

import functools

15

import getopt

17

import getopt

16

import http.client as httplib

18

import http.client as httplib

17

import http.cookiejar as cookielib

19

import http.cookiejar as cookielib

18

import inspect

20

import inspect

21

import io

19

import json

22

import json

20

import os

23

import os

21

import pickle

24

import pickle

22

import queue

25

import queue

23

import shlex

26

import shlex

24

import socketserver

27

import socketserver

28

import struct

25

import sys

29

import sys

26

import tempfile

30

import tempfile

27

import xmlrpc.client as xmlrpclib

31

import xmlrpc.client as xmlrpclib

28

32

33

29

ispy3 = sys.version_info[0] >= 3

34

ispy3 = sys.version_info[0] >= 3

30

ispypy = '__pypy__' in sys.builtin_module_names

35

ispypy = '__pypy__' in sys.builtin_module_names

31

TYPE_CHECKING = False

36

TYPE_CHECKING = False

32

37

33

if not globals(): # hide this from non-pytype users

38

if not globals(): # hide this from non-pytype users

34

import typing

39

import typing

35

40

36

TYPE_CHECKING = typing.TYPE_CHECKING

41

TYPE_CHECKING = typing.TYPE_CHECKING

37

42

38

43

39

def future_set_exception_info(f, exc_info):

44

def future_set_exception_info(f, exc_info):

40

f.set_exception(exc_info[0])

45

f.set_exception(exc_info[0])

41

46

42

47

43

FileNotFoundError = builtins.FileNotFoundError

48

FileNotFoundError = builtins.FileNotFoundError

44

49

45

50

46

def identity(a):

51

def identity(a):

47

return a

52

return a

48

53

49

54

50

def _rapply(f, xs):

55

def _rapply(f, xs):

51

if xs is None:

56

if xs is None:

52

# assume None means non-value of optional data

57

# assume None means non-value of optional data

53

return xs

58

return xs

54

if isinstance(xs, (list, set, tuple)):

59

if isinstance(xs, (list, set, tuple)):

55

return type(xs)(_rapply(f, x) for x in xs)

60

return type(xs)(_rapply(f, x) for x in xs)

56

if isinstance(xs, dict):

61

if isinstance(xs, dict):

57

return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())

62

return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())

58

return f(xs)

63

return f(xs)

59

64

60

65

61

def rapply(f, xs):

66

def rapply(f, xs):

62

"""Apply function recursively to every item preserving the data structure

67

"""Apply function recursively to every item preserving the data structure

63

68

64

>>> def f(x):

69

>>> def f(x):

65

... return 'f(%s)' % x

70

... return 'f(%s)' % x

66

>>> rapply(f, None) is None

71

>>> rapply(f, None) is None

67

True

72

True

68

>>> rapply(f, 'a')

73

>>> rapply(f, 'a')

69

'f(a)'

74

'f(a)'

70

>>> rapply(f, {'a'}) == {'f(a)'}

75

>>> rapply(f, {'a'}) == {'f(a)'}

71

True

76

True

72

>>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])

77

>>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])

73

['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]

78

['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]

74

79

75

>>> xs = [object()]

80

>>> xs = [object()]

76

>>> rapply(identity, xs) is xs

81

>>> rapply(identity, xs) is xs

77

True

82

True

78

"""

83

"""

79

if f is identity:

84

if f is identity:

80

# fast path mainly for py2

85

# fast path mainly for py2

81

return xs

86

return xs

82

return _rapply(f, xs)

87

return _rapply(f, xs)

83

88

84

89

85

if ispy3:

90

if os.name == r'nt' and sys.version_info >= (3, 6):

86

import builtins

91

# MBCS (or ANSI) filesystem encoding must be used as before.

87

import codecs

92

# Otherwise non-ASCII filenames in existing repositories would be

88

import functools

93

# corrupted.

89

import io

94

# This must be set once prior to any fsencode/fsdecode calls.

90

import struct

95

sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr

91

92

if os.name == r'nt' and sys.version_info >= (3, 6):

93

# MBCS (or ANSI) filesystem encoding must be used as before.

94

# Otherwise non-ASCII filenames in existing repositories would be

95

# corrupted.

96

# This must be set once prior to any fsencode/fsdecode calls.

97

sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr

98

96

99

fsencode = os.fsencode

97

fsencode = os.fsencode

100

fsdecode = os.fsdecode

98

fsdecode = os.fsdecode

101

oscurdir = os.curdir.encode('ascii')

99

oscurdir = os.curdir.encode('ascii')

102

oslinesep = os.linesep.encode('ascii')

100

oslinesep = os.linesep.encode('ascii')

103

osname = os.name.encode('ascii')

101

osname = os.name.encode('ascii')

104

ospathsep = os.pathsep.encode('ascii')

102

ospathsep = os.pathsep.encode('ascii')

105

ospardir = os.pardir.encode('ascii')

103

ospardir = os.pardir.encode('ascii')

106

ossep = os.sep.encode('ascii')

104

ossep = os.sep.encode('ascii')

107

osaltsep = os.altsep

105

osaltsep = os.altsep

108

if osaltsep:

106

if osaltsep:

109

osaltsep = osaltsep.encode('ascii')

107

osaltsep = osaltsep.encode('ascii')

110

osdevnull = os.devnull.encode('ascii')

108

osdevnull = os.devnull.encode('ascii')

111

109

112

sysplatform = sys.platform.encode('ascii')

110

sysplatform = sys.platform.encode('ascii')

113

sysexecutable = sys.executable

111

sysexecutable = sys.executable

114

if sysexecutable:

112

if sysexecutable:

115

sysexecutable = os.fsencode(sysexecutable)

113

sysexecutable = os.fsencode(sysexecutable)

116

bytesio = io.BytesIO

114

bytesio = io.BytesIO

117

# TODO deprecate stringio name, as it is a lie on Python 3.

115

# TODO deprecate stringio name, as it is a lie on Python 3.

118

stringio = bytesio

116

stringio = bytesio

119

117

120

def maplist(*args):

118

121

return list(map(*args))

119

def maplist(*args):

120

return list(map(*args))

122

121

123

def rangelist(*args):

122

124

return list(range(*args))

123

def rangelist(*args):

124

return list(range(*args))

125

126

def ziplist(*args):

126

127

return list(zip(*args))

127

def ziplist(*args):

128

return list(zip(*args))

129

128

130

129

rawinput = input

131

rawinput = input

130

getargspec = inspect.getfullargspec

132

getargspec = inspect.getfullargspec

131

133

132

long = int

134

long = int

133

135

134

if getattr(sys, 'argv', None) is not None:

136

if getattr(sys, 'argv', None) is not None:

135

# On POSIX, the char** argv array is converted to Python str using

137

# On POSIX, the char** argv array is converted to Python str using

136

# Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which

138

# Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which

137

# isn't directly callable from Python code. In practice, os.fsencode()

139

# isn't directly callable from Python code. In practice, os.fsencode()

138

# can be used instead (this is recommended by Python's documentation

140

# can be used instead (this is recommended by Python's documentation

139

# for sys.argv).

141

# for sys.argv).

140

#

142

#

141

# On Windows, the wchar_t **argv is passed into the interpreter as-is.

143

# On Windows, the wchar_t **argv is passed into the interpreter as-is.

142

# Like POSIX, we need to emulate what Py_EncodeLocale() would do. But

144

# Like POSIX, we need to emulate what Py_EncodeLocale() would do. But

143

# there's an additional wrinkle. What we really want to access is the

145

# there's an additional wrinkle. What we really want to access is the

144

# ANSI codepage representation of the arguments, as this is what

146

# ANSI codepage representation of the arguments, as this is what

145

# `int main()` would receive if Python 3 didn't define `int wmain()`

147

# `int main()` would receive if Python 3 didn't define `int wmain()`

146

# (this is how Python 2 worked). To get that, we encode with the mbcs

148

# (this is how Python 2 worked). To get that, we encode with the mbcs

147

# encoding, which will pass CP_ACP to the underlying Windows API to

149

# encoding, which will pass CP_ACP to the underlying Windows API to

148

# produce bytes.

150

# produce bytes.

149

if os.name == r'nt':

151

if os.name == r'nt':

150

sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]

152

sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]

151

else:

153

else:

152

sysargv = [fsencode(a) for a in sys.argv]

154

sysargv = [fsencode(a) for a in sys.argv]

153

155

154

bytechr = struct.Struct('>B').pack

156

bytechr = struct.Struct('>B').pack

155

byterepr = b'%r'.__mod__

157

byterepr = b'%r'.__mod__

156

157

class bytestr(bytes):

158

"""A bytes which mostly acts as a Python 2 str

159

158

160

>>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)

161

('', 'foo', 'ascii', '1')

162

>>> s = bytestr(b'foo')

163

>>> assert s is bytestr(s)

164

159

165

__bytes__() should be called if provided:

160

class bytestr(bytes):

161

"""A bytes which mostly acts as a Python 2 str

166

162

167

>>> class bytesable(object):

163

>>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)

168

... def __bytes__(self):

164

('', 'foo', 'ascii', '1')

169

... return b'bytes'

165

>>> s = bytestr(b'foo')

170

>>> bytestr(bytesable())

166

>>> assert s is bytestr(s)

171

'bytes'

167

168

__bytes__() should be called if provided:

172

169

173

There's no implicit conversion from non-ascii str as its encoding is

170

>>> class bytesable(object):

174

unknown:

171

... def __bytes__(self):

172

... return b'bytes'

173

>>> bytestr(bytesable())

174

'bytes'

175

176

>>> bytestr(chr(0x80)) # doctest: +ELLIPSIS

176

There's no implicit conversion from non-ascii str as its encoding is

177

Traceback (most recent call last):

177

unknown:

178

...

179

UnicodeEncodeError: ...

180

181

Comparison between bytestr and bytes should work:

182

178

183

>>> assert bytestr(b'foo') == b'foo'

179

>>> bytestr(chr(0x80)) # doctest: +ELLIPSIS

184

>>> assert b'foo' == bytestr(b'foo')

180

Traceback (most recent call last):

185

>>> assert b'f' in bytestr(b'foo')

181

...

186

>>> assert bytestr(b'f') in b'foo'

182

UnicodeEncodeError: ...

187

183

188

Sliced elements should be bytes, not integer:

184

Comparison between bytestr and bytes should work:

189

185

190

>>> s[1], s[:2]

186

>>> assert bytestr(b'foo') == b'foo'

191

(b'o', b'fo')

187

>>> assert b'foo' == bytestr(b'foo')

192

>>> list(s), list(reversed(s))

188

>>> assert b'f' in bytestr(b'foo')

193

([b'f', b'o', b'o'], [b'o', b'o', b'f'])

189

>>> assert bytestr(b'f') in b'foo'

194

195

As bytestr type isn't propagated across operations, you need to cast

196

bytes to bytestr explicitly:

197

190

198

>>> s = bytestr(b'foo').upper()

191

Sliced elements should be bytes, not integer:

199

>>> t = bytestr(s)

200

>>> s[0], t[0]

201

(70, b'F')

202

192

203

Be careful to not pass a bytestr object to a function which expects

193

>>> s[1], s[:2]

204

bytearray-like behavior.

194

(b'o', b'fo')

195

>>> list(s), list(reversed(s))

196

([b'f', b'o', b'o'], [b'o', b'o', b'f'])

205

197

206

>>> t = bytes(t) # cast to bytes

198

As bytestr type isn't propagated across operations, you need to cast

207

>>> assert type(t) is bytes

199

bytes to bytestr explicitly:

208

"""

209

200

210

# Trick pytype into not demanding Iterable[int] be passed to __new__(),

201

>>> s = bytestr(b'foo').upper()

211

# since the appropriate bytes format is done internally.

202

>>> t = bytestr(s)

212

#

203

>>> s[0], t[0]

213

# https://github.com/google/pytype/issues/500

204

(70, b'F')

214

if TYPE_CHECKING:

215

205

216

def __init__(self, s=b''):

206

Be careful to not pass a bytestr object to a function which expects

217

pass

207

bytearray-like behavior.

208

209

>>> t = bytes(t) # cast to bytes

210

>>> assert type(t) is bytes

211

"""

218

212

219

def __new__(cls, s=b''):

213

# Trick pytype into not demanding Iterable[int] be passed to __new__(),

220

if isinstance(s, bytestr):

214

# since the appropriate bytes format is done internally.

221

return s

215

#

222

if not isinstance(

216

# https://github.com/google/pytype/issues/500

223

s, (bytes, bytearray)

217

if TYPE_CHECKING:

224

) and not hasattr( # hasattr-py3-only

225

s, u'__bytes__'

226

):

227

s = str(s).encode('ascii')

228

return bytes.__new__(cls, s)

229

218

230

def __~~getitem~~__(self, ~~key~~):

219

def __init__(self, s=b''):

231

s = bytes.__getitem__(self, key)

220

pass

232

if not isinstance(s, bytes):

221

233

s = bytechr(s)

222

def __new__(cls, s=b''):

223

if isinstance(s, bytestr):

234

return s

224

return s

235

225

if not isinstance(

236

def __iter__(self):

226

s, (bytes, bytearray)

237

return iterbytestr(bytes.__iter__(self))

227

) and not hasattr( # hasattr-py3-only

238

228

s, u'__bytes__'

239

def __repr__(self):

229

):

240

return bytes.__repr__(self)[1:] # drop b''

230

s = str(s).encode('ascii')

231

return bytes.__new__(cls, s)

241

232

242

def iterbytestr(s):

233

def __getitem__(self, key):

243

"""Iterate bytes as if it were a str object of Python 2"""

234

s = bytes.__getitem__(self, key)

244

return map(bytechr, s)

235

if not isinstance(s, bytes):

245

236

s = bytechr(s)

246

def maybebytestr(s):

247

"""Promote bytes to bytestr"""

248

if isinstance(s, bytes):

249

return bytestr(s)

250

return s

237

return s

251

238

252

def ~~sysbytes~~(s):

239

def __iter__(self):

253

"""Convert an internal str (e.g. keyword, __doc__) back to bytes

240

return iterbytestr(bytes.__iter__(self))

241

242

def __repr__(self):

243

return bytes.__repr__(self)[1:] # drop b''

244

254

245

255

This never raises UnicodeEncodeError, but only ASCII characters

246

def iterbytestr(s):

256

can be round-trip by sysstr(sysbytes(s)).

247

"""Iterate bytes as if it were a str object of Python 2"""

257

"""

248

return map(bytechr, s)

258

if isinstance(s, bytes):

249

259

return s

260

return s.encode('utf-8')

261

250

262

def ~~sys~~str(s):

251

def maybebytestr(s):

263

"""Return a keyword str to be passed to Python functions such as

252

"""Promote bytes to bytestr"""

264

getattr() and str.encode()

253

if isinstance(s, bytes):

254

return bytestr(s)

255

return s

256

265

257

266

This never raises UnicodeDecodeError. Non-ascii characters are

258

def sysbytes(s):

267

considered invalid and mapped to arbitrary but unique code points

259

"""Convert an internal str (e.g. keyword, __doc__) back to bytes

268

such that 'sysstr(a) != sysstr(b)' for all 'a != b'.

260

269

"""

261

This never raises UnicodeEncodeError, but only ASCII characters

270

if isinstance(s, builtins.str):

262

can be round-trip by sysstr(sysbytes(s)).

271

return s

263

"""

272

return s.decode('latin-1')

264

if isinstance(s, bytes):

265

return s

266

return s.encode('utf-8')

267

273

268

274

def strurl(url):

269

def sysstr(s):

275

"""Converts a bytes url back to str"""

270

"""Return a keyword str to be passed to Python functions such as

276

if isinstance(url, bytes):

271

getattr() and str.encode()

277

return url.decode('ascii')

272

278

return url

273

This never raises UnicodeDecodeError. Non-ascii characters are

274

considered invalid and mapped to arbitrary but unique code points

275

such that 'sysstr(a) != sysstr(b)' for all 'a != b'.

276

"""

277

if isinstance(s, builtins.str):

278

return s

279

return s.decode('latin-1')

280

279

281

280

def ~~byte~~surl(url):

282

def strurl(url):

281

"""Converts a ~~str url to~~ bytes ~~by encoding in ascii~~"""

283

"""Converts a bytes url back to str"""

282

if isinstance(url, ~~str~~):

284

if isinstance(url, bytes):

283

return url.encode('ascii')

285

return url.decode('ascii')

284

return url

286

return url

287

285

288

286

def raisewithtb(exc, tb):

289

def bytesurl(url):

287

"""Raise exception with the given traceback"""

290

"""Converts a str url to bytes by encoding in ascii"""

288

raise exc.with_traceback(tb)

291

if isinstance(url, str):

292

return url.encode('ascii')

293

return url

289

294

290

def getdoc(obj):

295

291

"""Get docstring as bytes; may be None so gettext() won't confuse it

296

def raisewithtb(exc, tb):

292

with _('')"""

297

"""Raise exception with the given traceback"""

293

doc = getattr(obj, '__doc__', None)

298

raise exc.with_traceback(tb)

294

if doc is None:

299

295

return doc

296

return sysbytes(doc)

297

300

298

def _wrapattrfunc(f):

301

def getdoc(obj):

299

@functools.wraps(f)

302

"""Get docstring as bytes; may be None so gettext() won't confuse it

300

def w(object, name, *args):

303

with _('')"""

301

return f(object, sysstr(name), *args)

304

doc = getattr(obj, '__doc__', None)

305

if doc is None:

306

return doc

307

return sysbytes(doc)

302

308

303

return w

309

310

def _wrapattrfunc(f):

311

@functools.wraps(f)

312

def w(object, name, *args):

313

return f(object, sysstr(name), *args)

304

314

305

# these wrappers are automagically imported by hgloader

315

return w

306

delattr = _wrapattrfunc(builtins.delattr)

316

307

getattr = _wrapattrfunc(builtins.getattr)

308

hasattr = _wrapattrfunc(builtins.hasattr)

309

setattr = _wrapattrfunc(builtins.setattr)

310

xrange = builtins.range

311

unicode = str

312

317

313

def open(name, mode=b'r', buffering=-1, encoding=None):

318

# these wrappers are automagically imported by hgloader

314

return builtins.open(name, sysstr(mode), buffering, encoding)

319

delattr = _wrapattrfunc(builtins.delattr)

320

getattr = _wrapattrfunc(builtins.getattr)

321

hasattr = _wrapattrfunc(builtins.hasattr)

322

setattr = _wrapattrfunc(builtins.setattr)

323

xrange = builtins.range

324

unicode = str

315

325

316

safehasattr = _wrapattrfunc(builtins.hasattr)

326

327

def open(name, mode=b'r', buffering=-1, encoding=None):

328

return builtins.open(name, sysstr(mode), buffering, encoding)

329

317

330

318

def _getoptbwrapper(orig, args, shortlist, namelist):

331

safehasattr = _wrapattrfunc(builtins.hasattr)

319

"""

332

320

Takes bytes arguments, converts them to unicode, pass them to

333

321

getopt.getopt(), convert the returned values back to bytes and then

334

def _getoptbwrapper(orig, args, shortlist, namelist):

322

return them for Python 3 compatibility as getopt.getopt() don't accepts

335

"""

323

bytes on Python 3.

336

Takes bytes arguments, converts them to unicode, pass them to

324

"""

337

getopt.getopt(), convert the returned values back to bytes and then

325

args = [a.decode('latin-1') for a in args]

338

return them for Python 3 compatibility as getopt.getopt() don't accepts

326

shortlist = shortlist.decode('latin-1')

339

bytes on Python 3.

327

namelist = [a.decode('latin-1') for a in namelist]

340

"""

328

opts, args = orig(args, shortlist, namelist)

341

args = [a.decode('latin-1') for a in args]

329

opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]

342

shortlist = shortlist.decode('latin-1')

330

~~args~~ = [a.encode('latin-1') for a in ~~args~~]

343

namelist = [a.decode('latin-1') for a in namelist]

331

return opts, args

344

opts, args = orig(args, shortlist, namelist)

345

opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]

346

args = [a.encode('latin-1') for a in args]

347

return opts, args

348

332

349

333

def strkwargs(dic):

350

def strkwargs(dic):

334

"""

351

"""

335

Converts the keys of a python dictonary to str i.e. unicodes so that

352

Converts the keys of a python dictonary to str i.e. unicodes so that

336

they can be passed as keyword arguments as dictionaries with bytes keys

353

they can be passed as keyword arguments as dictionaries with bytes keys

337

can't be passed as keyword arguments to functions on Python 3.

354

can't be passed as keyword arguments to functions on Python 3.

338

"""

355

"""

339

dic = {k.decode('latin-1'): v for k, v in dic.items()}

356

dic = {k.decode('latin-1'): v for k, v in dic.items()}

340

return dic

357

return dic

341

358

342

def byteskwargs(dic):

343

"""

344

Converts keys of python dictionaries to bytes as they were converted to

345

str to pass that dictonary as a keyword argument on Python 3.

346

"""

347

dic = {k.encode('latin-1'): v for k, v in dic.items()}

348

return dic

349

359

350

# TODO: handle shlex.shlex().

360

def byteskwargs(dic):

351

def shlexsplit(s, comments=False, posix=True):

361

"""

352

"""

362

Converts keys of python dictionaries to bytes as they were converted to

353

Takes bytes argument, convert it to str i.e. unicodes, pass that into

363

str to pass that dictonary as a keyword argument on Python 3.

354

shlex.split(), convert the returned value to bytes and return that for

364

"""

355

Python 3 compatibility as shelx.split() don't accept bytes on Python 3.

365

dic = {k.encode('latin-1'): v for k, v in dic.items()}

356

"""

366

return dic

357

ret = shlex.split(s.decode('latin-1'), comments, posix)

367

358

return [a.encode('latin-1') for a in ret]

359

368

360

iteritems = lambda x: x.items()

369

# TODO: handle shlex.shlex().

361

itervalues = lambda x: x.values()

370

def shlexsplit(s, comments=False, posix=True):

371

"""

372

Takes bytes argument, convert it to str i.e. unicodes, pass that into

373

shlex.split(), convert the returned value to bytes and return that for

374

Python 3 compatibility as shelx.split() don't accept bytes on Python 3.

375

"""

376

ret = shlex.split(s.decode('latin-1'), comments, posix)

377

return [a.encode('latin-1') for a in ret]

362

378

363

# Python 3.5's json.load and json.loads require str. We polyfill its

364

# code for detecting encoding from bytes.

365

if sys.version_info[0:2] < (3, 6):

366

379

367

def _detect_encoding(b):

380

iteritems = lambda x: x.items()

368

bstartswith = b.startswith

381

itervalues = lambda x: x.values()

369

if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):

382

370

return 'utf-32'

383

# Python 3.5's json.load and json.loads require str. We polyfill its

371

if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):

384

# code for detecting encoding from bytes.

372

return 'utf-16'

385

if sys.version_info[0:2] < (3, 6):

373

if bstartswith(codecs.BOM_UTF8):

386

374

return 'utf-8-sig'

387

def _detect_encoding(b):

388

bstartswith = b.startswith

389

if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):

390

return 'utf-32'

391

if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):

392

return 'utf-16'

393

if bstartswith(codecs.BOM_UTF8):

394

return 'utf-8-sig'

375

395

376

if len(b) >= 4:

396

if len(b) >= 4:

377

if not b[0]:

397

if not b[0]:

378

# 00 00 -- -- - utf-32-be

398

# 00 00 -- -- - utf-32-be

379

# 00 XX -- -- - utf-16-be

399

# 00 XX -- -- - utf-16-be

380

return 'utf-16-be' if b[1] else 'utf-32-be'

400

return 'utf-16-be' if b[1] else 'utf-32-be'

381

if not b[1]:

401

if not b[1]:

382

# XX 00 00 00 - utf-32-le

402

# XX 00 00 00 - utf-32-le

383

# XX 00 00 XX - utf-16-le

403

# XX 00 00 XX - utf-16-le

384

# XX 00 XX -- - utf-16-le

404

# XX 00 XX -- - utf-16-le

385

return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'

405

return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'

386

elif len(b) == 2:

406

elif len(b) == 2:

387

if not b[0]:

407

if not b[0]:

388

# 00 XX - utf-16-be

408

# 00 XX - utf-16-be

389

return 'utf-16-be'

409

return 'utf-16-be'

390

if not b[1]:

410

if not b[1]:

391

# XX 00 - utf-16-le

411

# XX 00 - utf-16-le

392

return 'utf-16-le'

412

return 'utf-16-le'

393

# default

413

# default

394

return 'utf-8'

414

return 'utf-8'

395

415

396

def json_loads(s, *args, **kwargs):

416

def json_loads(s, *args, **kwargs):

397

if isinstance(s, (bytes, bytearray)):

417

if isinstance(s, (bytes, bytearray)):

398

s = s.decode(_detect_encoding(s), 'surrogatepass')

418

s = s.decode(_detect_encoding(s), 'surrogatepass')

399

419

400

return json.loads(s, *args, **kwargs)

420

return json.loads(s, *args, **kwargs)

401

421

402

else:

403

json_loads = json.loads

404

422

405

else:

423

else:

406

import cStringIO

407

408

xrange = xrange

409

unicode = unicode

410

bytechr = chr

411

byterepr = repr

412

bytestr = str

413

iterbytestr = iter

414

maybebytestr = identity

415

sysbytes = identity

416

sysstr = identity

417

strurl = identity

418

bytesurl = identity

419

open = open

420

delattr = delattr

421

getattr = getattr

422

hasattr = hasattr

423

setattr = setattr

424

425

# this can't be parsed on Python 3

426

exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')

427

428

def fsencode(filename):

429

"""

430

Partial backport from os.py in Python 3, which only accepts bytes.

431

In Python 2, our paths should only ever be bytes, a unicode path

432

indicates a bug.

433

"""

434

if isinstance(filename, str):

435

return filename

436

else:

437

raise TypeError("expect str, not %s" % type(filename).__name__)

438

439

# In Python 2, fsdecode() has a very chance to receive bytes. So it's

440

# better not to touch Python 2 part as it's already working fine.

441

fsdecode = identity

442

443

def getdoc(obj):

444

return getattr(obj, '__doc__', None)

445

446

_notset = object()

447

448

def safehasattr(thing, attr):

449

return getattr(thing, attr, _notset) is not _notset

450

451

def _getoptbwrapper(orig, args, shortlist, namelist):

452

return orig(args, shortlist, namelist)

453

454

strkwargs = identity

455

byteskwargs = identity

456

457

oscurdir = os.curdir

458

oslinesep = os.linesep

459

osname = os.name

460

ospathsep = os.pathsep

461

ospardir = os.pardir

462

ossep = os.sep

463

osaltsep = os.altsep

464

osdevnull = os.devnull

465

long = long

466

if getattr(sys, 'argv', None) is not None:

467

sysargv = sys.argv

468

sysplatform = sys.platform

469

sysexecutable = sys.executable

470

shlexsplit = shlex.split

471

bytesio = cStringIO.StringIO

472

stringio = bytesio

473

maplist = map

474

rangelist = range

475

ziplist = zip

476

rawinput = raw_input

477

getargspec = inspect.getargspec

478

iteritems = lambda x: x.iteritems()

479

itervalues = lambda x: x.itervalues()

480

json_loads = json.loads

424

json_loads = json.loads

481

425

482

isjython = sysplatform.startswith(b'java')

426

isjython = sysplatform.startswith(b'java')

483

427

484

isdarwin = sysplatform.startswith(b'darwin')

428

isdarwin = sysplatform.startswith(b'darwin')

485

islinux = sysplatform.startswith(b'linux')

429

islinux = sysplatform.startswith(b'linux')

486

isposix = osname == b'posix'

430

isposix = osname == b'posix'

487

iswindows = osname == b'nt'

431

iswindows = osname == b'nt'

488

432

489

433

490

def getoptb(args, shortlist, namelist):

434

def getoptb(args, shortlist, namelist):

491

return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)

435

return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)

492

436

493

437

494

def gnugetoptb(args, shortlist, namelist):

438

def gnugetoptb(args, shortlist, namelist):

495

return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)

439

return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)

496

440

497

441

498

def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):

442

def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):

499

return tempfile.mkdtemp(suffix, prefix, dir)

443

return tempfile.mkdtemp(suffix, prefix, dir)

500

444

501

445

502

# text=True is not supported; use util.from/tonativeeol() instead

446

# text=True is not supported; use util.from/tonativeeol() instead

503

def mkstemp(suffix=b'', prefix=b'tmp', dir=None):

447

def mkstemp(suffix=b'', prefix=b'tmp', dir=None):

504

return tempfile.mkstemp(suffix, prefix, dir)

448

return tempfile.mkstemp(suffix, prefix, dir)

505

449

506

450

507

# TemporaryFile does not support an "encoding=" argument on python2.

451

# TemporaryFile does not support an "encoding=" argument on python2.

508

# This wrapper file are always open in byte mode.

452

# This wrapper file are always open in byte mode.

509

def unnamedtempfile(mode=None, *args, **kwargs):

453

def unnamedtempfile(mode=None, *args, **kwargs):

510

if mode is None:

454

if mode is None:

511

mode = 'w+b'

455

mode = 'w+b'

512

else:

456

else:

513

mode = sysstr(mode)

457

mode = sysstr(mode)

514

assert 'b' in mode

458

assert 'b' in mode

515

return tempfile.TemporaryFile(mode, *args, **kwargs)

459

return tempfile.TemporaryFile(mode, *args, **kwargs)

516

460

517

461

518

# NamedTemporaryFile does not support an "encoding=" argument on python2.

462

# NamedTemporaryFile does not support an "encoding=" argument on python2.

519

# This wrapper file are always open in byte mode.

463

# This wrapper file are always open in byte mode.

520

def namedtempfile(

464

def namedtempfile(

521

mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True

465

mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True

522

):

466

):

523

mode = sysstr(mode)

467

mode = sysstr(mode)

524

assert 'b' in mode

468

assert 'b' in mode

525

return tempfile.NamedTemporaryFile(

469

return tempfile.NamedTemporaryFile(

526

mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete

470

mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete

527

)

471

)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages