upstream/mercurial-mirror Commit - r32373:57008258

1

# __init__.py - Startup and module loading logic for Mercurial.

1

# __init__.py - Startup and module loading logic for Mercurial.

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import imp

11

import os

12

import sys

10

import sys

13

import zipimport

14

15

from . import (

16

policy

17

)

18

11

19

__all__ = []

12

__all__ = []

20

13

21

modulepolicy = policy.policy

22

23

# Modules that have both Python and C implementations. See also the

24

# set of .py files under mercurial/pure/.

25

_dualmodules = {

26

}

27

28

class hgimporter(object):

29

"""Object that conforms to import hook interface defined in PEP-302."""

30

def find_module(self, name, path=None):

31

# We only care about modules that have both C and pure implementations.

32

if name in _dualmodules:

33

return self

34

return None

35

36

def load_module(self, name):

37

mod = sys.modules.get(name, None)

38

if mod:

39

return mod

40

41

mercurial = sys.modules['mercurial']

42

43

# The zip importer behaves sufficiently differently from the default

44

# importer to warrant its own code path.

45

loader = getattr(mercurial, '__loader__', None)

46

if isinstance(loader, zipimport.zipimporter):

47

def ziploader(*paths):

48

"""Obtain a zipimporter for a directory under the main zip."""

49

path = os.path.join(loader.archive, *paths)

50

zl = sys.path_importer_cache.get(path)

51

if not zl:

52

zl = zipimport.zipimporter(path)

53

return zl

54

55

try:

56

if modulepolicy in policy.policynoc:

57

raise ImportError()

58

59

zl = ziploader('mercurial')

60

mod = zl.load_module(name)

61

# Unlike imp, ziploader doesn't expose module metadata that

62

# indicates the type of module. So just assume what we found

63

# is OK (even though it could be a pure Python module).

64

except ImportError:

65

if modulepolicy == b'c':

66

raise

67

zl = ziploader('mercurial', 'pure')

68

mod = zl.load_module(name)

69

70

sys.modules[name] = mod

71

return mod

72

73

# Unlike the default importer which searches special locations and

74

# sys.path, we only look in the directory where "mercurial" was

75

# imported from.

76

77

# imp.find_module doesn't support submodules (modules with ".").

78

# Instead you have to pass the parent package's __path__ attribute

79

# as the path argument.

80

stem = name.split('.')[-1]

81

82

try:

83

if modulepolicy in policy.policynoc:

84

raise ImportError()

85

86

modinfo = imp.find_module(stem, mercurial.__path__)

87

88

# The Mercurial installer used to copy files from

89

# mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible

90

# for some installations to have .py files under mercurial/*.

91

# Loading Python modules when we expected C versions could result

92

# in a) poor performance b) loading a version from a previous

93

# Mercurial version, potentially leading to incompatibility. Either

94

# scenario is bad. So we verify that modules loaded from

95

# mercurial/* are C extensions. If the current policy allows the

96

# loading of .py modules, the module will be re-imported from

97

# mercurial/pure/* below.

98

if modinfo[2][2] != imp.C_EXTENSION:

99

raise ImportError('.py version of %s found where C '

100

'version should exist' % name)

101

102

except ImportError:

103

if modulepolicy == b'c':

104

raise

105

106

# Could not load the C extension and pure Python is allowed. So

107

# try to load them.

108

from . import pure

109

modinfo = imp.find_module(stem, pure.__path__)

110

if not modinfo:

111

raise ImportError('could not find mercurial module %s' %

112

name)

113

114

mod = imp.load_module(name, *modinfo)

115

sys.modules[name] = mod

116

return mod

117

118

# Python 3 uses a custom module loader that transforms source code between

14

# Python 3 uses a custom module loader that transforms source code between

119

# source file reading and compilation. This is done by registering a custom

15

# source file reading and compilation. This is done by registering a custom

120

# finder that changes the spec for Mercurial modules to use a custom loader.

16

# finder that changes the spec for Mercurial modules to use a custom loader.

121

if sys.version_info[0] >= 3:

17

if sys.version_info[0] >= 3:

122

from . import pure

123

import importlib

18

import importlib

19

import importlib.abc

124

import io

20

import io

125

import token

21

import token

126

import tokenize

22

import tokenize

127

23

128

class hgpathentryfinder(importlib.abc.MetaPathFinder):

24

class hgpathentryfinder(importlib.abc.MetaPathFinder):

129

"""A sys.meta_path finder that uses a custom module loader."""

25

"""A sys.meta_path finder that uses a custom module loader."""

130

def find_spec(self, fullname, path, target=None):

26

def find_spec(self, fullname, path, target=None):

131

# Only handle Mercurial-related modules.

27

# Only handle Mercurial-related modules.

132

if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):

28

if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):

133

return None

29

return None

134

# zstd is already dual-version clean, don't try and mangle it

30

# zstd is already dual-version clean, don't try and mangle it

135

if fullname.startswith('mercurial.zstd'):

31

if fullname.startswith('mercurial.zstd'):

136

return None

32

return None

137

33

138

# This assumes Python 3 doesn't support loading C modules.

139

if fullname in _dualmodules:

140

stem = fullname.split('.')[-1]

141

fullname = 'mercurial.pure.%s' % stem

142

target = pure

143

assert len(path) == 1

144

path = [os.path.join(path[0], 'pure')]

145

146

# Try to find the module using other registered finders.

34

# Try to find the module using other registered finders.

147

spec = None

35

spec = None

148

for finder in sys.meta_path:

36

for finder in sys.meta_path:

149

if finder == self:

37

if finder == self:

150

continue

38

continue

151

39

152

spec = finder.find_spec(fullname, path, target=target)

40

spec = finder.find_spec(fullname, path, target=target)

153

if spec:

41

if spec:

154

break

42

break

155

43

156

# This is a Mercurial-related module but we couldn't find it

44

# This is a Mercurial-related module but we couldn't find it

157

# using the previously-registered finders. This likely means

45

# using the previously-registered finders. This likely means

158

# the module doesn't exist.

46

# the module doesn't exist.

159

if not spec:

47

if not spec:

160

return None

48

return None

161

49

162

if (fullname.startswith('mercurial.pure.')

163

and fullname.replace('.pure.', '.') in _dualmodules):

164

spec.name = spec.name.replace('.pure.', '.')

165

166

# TODO need to support loaders from alternate specs, like zip

50

# TODO need to support loaders from alternate specs, like zip

167

# loaders.

51

# loaders.

168

spec.loader = hgloader(spec.name, spec.origin)

52

spec.loader = hgloader(spec.name, spec.origin)

169

return spec

53

return spec

170

54

171

def replacetokens(tokens, fullname):

55

def replacetokens(tokens, fullname):

172

"""Transform a stream of tokens from raw to Python 3.

56

"""Transform a stream of tokens from raw to Python 3.

173

57

174

It is called by the custom module loading machinery to rewrite

58

It is called by the custom module loading machinery to rewrite

175

source/tokens between source decoding and compilation.

59

source/tokens between source decoding and compilation.

176

60

177

Returns a generator of possibly rewritten tokens.

61

Returns a generator of possibly rewritten tokens.

178

62

179

The input token list may be mutated as part of processing. However,

63

The input token list may be mutated as part of processing. However,

180

its changes do not necessarily match the output token stream.

64

its changes do not necessarily match the output token stream.

181

65

182

REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION

66

REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION

183

OR CACHED FILES WON'T GET INVALIDATED PROPERLY.

67

OR CACHED FILES WON'T GET INVALIDATED PROPERLY.

184

"""

68

"""

185

futureimpline = False

69

futureimpline = False

186

70

187

# The following utility functions access the tokens list and i index of

71

# The following utility functions access the tokens list and i index of

188

# the for i, t enumerate(tokens) loop below

72

# the for i, t enumerate(tokens) loop below

189

def _isop(j, *o):

73

def _isop(j, *o):

190

"""Assert that tokens[j] is an OP with one of the given values"""

74

"""Assert that tokens[j] is an OP with one of the given values"""

191

try:

75

try:

192

return tokens[j].type == token.OP and tokens[j].string in o

76

return tokens[j].type == token.OP and tokens[j].string in o

193

except IndexError:

77

except IndexError:

194

return False

78

return False

195

79

196

def _findargnofcall(n):

80

def _findargnofcall(n):

197

"""Find arg n of a call expression (start at 0)

81

"""Find arg n of a call expression (start at 0)

198

82

199

Returns index of the first token of that argument, or None if

83

Returns index of the first token of that argument, or None if

200

there is not that many arguments.

84

there is not that many arguments.

201

85

202

Assumes that token[i + 1] is '('.

86

Assumes that token[i + 1] is '('.

203

87

204

"""

88

"""

205

nested = 0

89

nested = 0

206

for j in range(i + 2, len(tokens)):

90

for j in range(i + 2, len(tokens)):

207

if _isop(j, ')', ']', '}'):

91

if _isop(j, ')', ']', '}'):

208

# end of call, tuple, subscription or dict / set

92

# end of call, tuple, subscription or dict / set

209

nested -= 1

93

nested -= 1

210

if nested < 0:

94

if nested < 0:

211

return None

95

return None

212

elif n == 0:

96

elif n == 0:

213

# this is the starting position of arg

97

# this is the starting position of arg

214

return j

98

return j

215

elif _isop(j, '(', '[', '{'):

99

elif _isop(j, '(', '[', '{'):

216

nested += 1

100

nested += 1

217

elif _isop(j, ',') and nested == 0:

101

elif _isop(j, ',') and nested == 0:

218

n -= 1

102

n -= 1

219

103

220

return None

104

return None

221

105

222

def _ensureunicode(j):

106

def _ensureunicode(j):

223

"""Make sure the token at j is a unicode string

107

"""Make sure the token at j is a unicode string

224

108

225

This rewrites a string token to include the unicode literal prefix

109

This rewrites a string token to include the unicode literal prefix

226

so the string transformer won't add the byte prefix.

110

so the string transformer won't add the byte prefix.

227

111

228

Ignores tokens that are not strings. Assumes bounds checking has

112

Ignores tokens that are not strings. Assumes bounds checking has

229

already been done.

113

already been done.

230

114

231

"""

115

"""

232

st = tokens[j]

116

st = tokens[j]

233

if st.type == token.STRING and st.string.startswith(("'", '"')):

117

if st.type == token.STRING and st.string.startswith(("'", '"')):

234

tokens[j] = st._replace(string='u%s' % st.string)

118

tokens[j] = st._replace(string='u%s' % st.string)

235

119

236

for i, t in enumerate(tokens):

120

for i, t in enumerate(tokens):

237

# Convert most string literals to byte literals. String literals

121

# Convert most string literals to byte literals. String literals

238

# in Python 2 are bytes. String literals in Python 3 are unicode.

122

# in Python 2 are bytes. String literals in Python 3 are unicode.

239

# Most strings in Mercurial are bytes and unicode strings are rare.

123

# Most strings in Mercurial are bytes and unicode strings are rare.

240

# Rather than rewrite all string literals to use ``b''`` to indicate

124

# Rather than rewrite all string literals to use ``b''`` to indicate

241

# byte strings, we apply this token transformer to insert the ``b``

125

# byte strings, we apply this token transformer to insert the ``b``

242

# prefix nearly everywhere.

126

# prefix nearly everywhere.

243

if t.type == token.STRING:

127

if t.type == token.STRING:

244

s = t.string

128

s = t.string

245

129

246

# Preserve docstrings as string literals. This is inconsistent

130

# Preserve docstrings as string literals. This is inconsistent

247

# with regular unprefixed strings. However, the

131

# with regular unprefixed strings. However, the

248

# "from __future__" parsing (which allows a module docstring to

132

# "from __future__" parsing (which allows a module docstring to

249

# exist before it) doesn't properly handle the docstring if it

133

# exist before it) doesn't properly handle the docstring if it

250

# is b''' prefixed, leading to a SyntaxError. We leave all

134

# is b''' prefixed, leading to a SyntaxError. We leave all

251

# docstrings as unprefixed to avoid this. This means Mercurial

135

# docstrings as unprefixed to avoid this. This means Mercurial

252

# components touching docstrings need to handle unicode,

136

# components touching docstrings need to handle unicode,

253

# unfortunately.

137

# unfortunately.

254

if s[0:3] in ("'''", '"""'):

138

if s[0:3] in ("'''", '"""'):

255

yield t

139

yield t

256

continue

140

continue

257

141

258

# If the first character isn't a quote, it is likely a string

142

# If the first character isn't a quote, it is likely a string

259

# prefixing character (such as 'b', 'u', or 'r'. Ignore.

143

# prefixing character (such as 'b', 'u', or 'r'. Ignore.

260

if s[0] not in ("'", '"'):

144

if s[0] not in ("'", '"'):

261

yield t

145

yield t

262

continue

146

continue

263

147

264

# String literal. Prefix to make a b'' string.

148

# String literal. Prefix to make a b'' string.

265

yield t._replace(string='b%s' % t.string)

149

yield t._replace(string='b%s' % t.string)

266

continue

150

continue

267

151

268

# Insert compatibility imports at "from __future__ import" line.

152

# Insert compatibility imports at "from __future__ import" line.

269

# No '\n' should be added to preserve line numbers.

153

# No '\n' should be added to preserve line numbers.

270

if (t.type == token.NAME and t.string == 'import' and

154

if (t.type == token.NAME and t.string == 'import' and

271

all(u.type == token.NAME for u in tokens[i - 2:i]) and

155

all(u.type == token.NAME for u in tokens[i - 2:i]) and

272

[u.string for u in tokens[i - 2:i]] == ['from', '__future__']):

156

[u.string for u in tokens[i - 2:i]] == ['from', '__future__']):

273

futureimpline = True

157

futureimpline = True

274

if t.type == token.NEWLINE and futureimpline:

158

if t.type == token.NEWLINE and futureimpline:

275

futureimpline = False

159

futureimpline = False

276

if fullname == 'mercurial.pycompat':

160

if fullname == 'mercurial.pycompat':

277

yield t

161

yield t

278

continue

162

continue

279

r, c = t.start

163

r, c = t.start

280

l = (b'; from mercurial.pycompat import '

164

l = (b'; from mercurial.pycompat import '

281

b'delattr, getattr, hasattr, setattr, xrange, '

165

b'delattr, getattr, hasattr, setattr, xrange, '

282

b'open, unicode\n')

166

b'open, unicode\n')

283

for u in tokenize.tokenize(io.BytesIO(l).readline):

167

for u in tokenize.tokenize(io.BytesIO(l).readline):

284

if u.type in (tokenize.ENCODING, token.ENDMARKER):

168

if u.type in (tokenize.ENCODING, token.ENDMARKER):

285

continue

169

continue

286

yield u._replace(

170

yield u._replace(

287

start=(r, c + u.start[1]), end=(r, c + u.end[1]))

171

start=(r, c + u.start[1]), end=(r, c + u.end[1]))

288

continue

172

continue

289

173

290

# This looks like a function call.

174

# This looks like a function call.

291

if t.type == token.NAME and _isop(i + 1, '('):

175

if t.type == token.NAME and _isop(i + 1, '('):

292

fn = t.string

176

fn = t.string

293

177

294

# *attr() builtins don't accept byte strings to 2nd argument.

178

# *attr() builtins don't accept byte strings to 2nd argument.

295

if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and

179

if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and

296

not _isop(i - 1, '.')):

180

not _isop(i - 1, '.')):

297

arg1idx = _findargnofcall(1)

181

arg1idx = _findargnofcall(1)

298

if arg1idx is not None:

182

if arg1idx is not None:

299

_ensureunicode(arg1idx)

183

_ensureunicode(arg1idx)

300

184

301

# .encode() and .decode() on str/bytes/unicode don't accept

185

# .encode() and .decode() on str/bytes/unicode don't accept

302

# byte strings on Python 3.

186

# byte strings on Python 3.

303

elif fn in ('encode', 'decode') and _isop(i - 1, '.'):

187

elif fn in ('encode', 'decode') and _isop(i - 1, '.'):

304

for argn in range(2):

188

for argn in range(2):

305

argidx = _findargnofcall(argn)

189

argidx = _findargnofcall(argn)

306

if argidx is not None:

190

if argidx is not None:

307

_ensureunicode(argidx)

191

_ensureunicode(argidx)

308

192

309

# It changes iteritems/values to items/values as they are not

193

# It changes iteritems/values to items/values as they are not

310

# present in Python 3 world.

194

# present in Python 3 world.

311

elif fn in ('iteritems', 'itervalues'):

195

elif fn in ('iteritems', 'itervalues'):

312

yield t._replace(string=fn[4:])

196

yield t._replace(string=fn[4:])

313

continue

197

continue

314

198

315

# Emit unmodified token.

199

# Emit unmodified token.

316

yield t

200

yield t

317

201

318

# Header to add to bytecode files. This MUST be changed when

202

# Header to add to bytecode files. This MUST be changed when

319

# ``replacetoken`` or any mechanism that changes semantics of module

203

# ``replacetoken`` or any mechanism that changes semantics of module

320

# loading is changed. Otherwise cached bytecode may get loaded without

204

# loading is changed. Otherwise cached bytecode may get loaded without

321

# the new transformation mechanisms applied.

205

# the new transformation mechanisms applied.

322

BYTECODEHEADER = b'HG\x00\x0a'

206

BYTECODEHEADER = b'HG\x00\x0a'

323

207

324

class hgloader(importlib.machinery.SourceFileLoader):

208

class hgloader(importlib.machinery.SourceFileLoader):

325

"""Custom module loader that transforms source code.

209

"""Custom module loader that transforms source code.

326

210

327

When the source code is converted to a code object, we transform

211

When the source code is converted to a code object, we transform

328

certain patterns to be Python 3 compatible. This allows us to write code

212

certain patterns to be Python 3 compatible. This allows us to write code

329

that is natively Python 2 and compatible with Python 3 without

213

that is natively Python 2 and compatible with Python 3 without

330

making the code excessively ugly.

214

making the code excessively ugly.

331

215

332

We do this by transforming the token stream between parse and compile.

216

We do this by transforming the token stream between parse and compile.

333

217

334

Implementing transformations invalidates caching assumptions made

218

Implementing transformations invalidates caching assumptions made

335

by the built-in importer. The built-in importer stores a header on

219

by the built-in importer. The built-in importer stores a header on

336

saved bytecode files indicating the Python/bytecode version. If the

220

saved bytecode files indicating the Python/bytecode version. If the

337

version changes, the cached bytecode is ignored. The Mercurial

221

version changes, the cached bytecode is ignored. The Mercurial

338

transformations could change at any time. This means we need to check

222

transformations could change at any time. This means we need to check

339

that cached bytecode was generated with the current transformation

223

that cached bytecode was generated with the current transformation

340

code or there could be a mismatch between cached bytecode and what

224

code or there could be a mismatch between cached bytecode and what

341

would be generated from this class.

225

would be generated from this class.

342

226

343

We supplement the bytecode caching layer by wrapping ``get_data``

227

We supplement the bytecode caching layer by wrapping ``get_data``

344

and ``set_data``. These functions are called when the

228

and ``set_data``. These functions are called when the

345

``SourceFileLoader`` retrieves and saves bytecode cache files,

229

``SourceFileLoader`` retrieves and saves bytecode cache files,

346

respectively. We simply add an additional header on the file. As

230

respectively. We simply add an additional header on the file. As

347

long as the version in this file is changed when semantics change,

231

long as the version in this file is changed when semantics change,

348

cached bytecode should be invalidated when transformations change.

232

cached bytecode should be invalidated when transformations change.

349

233

350

The added header has the form ``HG<VERSION>``. That is a literal

234

The added header has the form ``HG<VERSION>``. That is a literal

351

``HG`` with 2 binary bytes indicating the transformation version.

235

``HG`` with 2 binary bytes indicating the transformation version.

352

"""

236

"""

353

def get_data(self, path):

237

def get_data(self, path):

354

data = super(hgloader, self).get_data(path)

238

data = super(hgloader, self).get_data(path)

355

239

356

if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):

240

if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):

357

return data

241

return data

358

242

359

# There should be a header indicating the Mercurial transformation

243

# There should be a header indicating the Mercurial transformation

360

# version. If it doesn't exist or doesn't match the current version,

244

# version. If it doesn't exist or doesn't match the current version,

361

# we raise an OSError because that is what

245

# we raise an OSError because that is what

362

# ``SourceFileLoader.get_code()`` expects when loading bytecode

246

# ``SourceFileLoader.get_code()`` expects when loading bytecode

363

# paths to indicate the cached file is "bad."

247

# paths to indicate the cached file is "bad."

364

if data[0:2] != b'HG':

248

if data[0:2] != b'HG':

365

raise OSError('no hg header')

249

raise OSError('no hg header')

366

if data[0:4] != BYTECODEHEADER:

250

if data[0:4] != BYTECODEHEADER:

367

raise OSError('hg header version mismatch')

251

raise OSError('hg header version mismatch')

368

252

369

return data[4:]

253

return data[4:]

370

254

371

def set_data(self, path, data, *args, **kwargs):

255

def set_data(self, path, data, *args, **kwargs):

372

if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):

256

if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):

373

data = BYTECODEHEADER + data

257

data = BYTECODEHEADER + data

374

258

375

return super(hgloader, self).set_data(path, data, *args, **kwargs)

259

return super(hgloader, self).set_data(path, data, *args, **kwargs)

376

260

377

def source_to_code(self, data, path):

261

def source_to_code(self, data, path):

378

"""Perform token transformation before compilation."""

262

"""Perform token transformation before compilation."""

379

buf = io.BytesIO(data)

263

buf = io.BytesIO(data)

380

tokens = tokenize.tokenize(buf.readline)

264

tokens = tokenize.tokenize(buf.readline)

381

data = tokenize.untokenize(replacetokens(list(tokens), self.name))

265

data = tokenize.untokenize(replacetokens(list(tokens), self.name))

382

# Python's built-in importer strips frames from exceptions raised

266

# Python's built-in importer strips frames from exceptions raised

383

# for this code. Unfortunately, that mechanism isn't extensible

267

# for this code. Unfortunately, that mechanism isn't extensible

384

# and our frame will be blamed for the import failure. There

268

# and our frame will be blamed for the import failure. There

385

# are extremely hacky ways to do frame stripping. We haven't

269

# are extremely hacky ways to do frame stripping. We haven't

386

# implemented them because they are very ugly.

270

# implemented them because they are very ugly.

387

return super(hgloader, self).source_to_code(data, path)

271

return super(hgloader, self).source_to_code(data, path)

388

272

389

# We automagically register our custom importer as a side-effect of ~~loading.~~

273

# We automagically register our custom importer as a side-effect of

390

# This is necessary to ensure that any entry points are able ~~to import~~

274

# loading. This is necessary to ensure that any entry points are able

391

# mercurial.* modules without having to perform this ~~registration themselves.~~

275

# to import mercurial.* modules without having to perform this

392

if sys.version_info[0] >= 3:

276

# registration themselves.

393

_importercls = hgpathentryfinder

277

if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):

394

else:

278

# meta_path is used before any implicit finders and before sys.path.

395

_importercls = hgimporter

279

sys.meta_path.insert(0, hgpathentryfinder())

396

if not any(isinstance(x, _importercls) for x in sys.meta_path):

397

# meta_path is used before any implicit finders and before sys.path.

398

sys.meta_path.insert(0, _importercls())

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # __init__.py - Startup and module loading logic for Mercurial.
             #
             # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
-            import imp
-            import os
             import sys
-            import zipimport
-            from . import (
-                policy
             __all__ = []
-            modulepolicy = policy.policy
-            # Modules that have both Python and C implementations. See also the
-            # set of .py files under mercurial/pure/.
-            _dualmodules = {
-            class hgimporter(object):
-                """Object that conforms to import hook interface defined in PEP-302."""
-                def find_module(self, name, path=None):
-                    # We only care about modules that have both C and pure implementations.
-                    if name in _dualmodules:
-                        return self
-                    return None
-                def load_module(self, name):
-                    mod = sys.modules.get(name, None)
-                    if mod:
-                        return mod
-                    mercurial = sys.modules['mercurial']
-                    # The zip importer behaves sufficiently differently from the default
-                    # importer to warrant its own code path.
-                    loader = getattr(mercurial, '__loader__', None)
-                    if isinstance(loader, zipimport.zipimporter):
-                        def ziploader(*paths):
-                            """Obtain a zipimporter for a directory under the main zip."""
-                            path = os.path.join(loader.archive, *paths)
-                            zl = sys.path_importer_cache.get(path)
-                            if not zl:
-                                zl = zipimport.zipimporter(path)
-                            return zl
-                        try:
-                            if modulepolicy in policy.policynoc:
-                                raise ImportError()
-                            zl = ziploader('mercurial')
-                            mod = zl.load_module(name)
-                            # Unlike imp, ziploader doesn't expose module metadata that
-                            # indicates the type of module. So just assume what we found
-                            # is OK (even though it could be a pure Python module).
-                        except ImportError:
-                            if modulepolicy == b'c':
-                                raise
-                            zl = ziploader('mercurial', 'pure')
-                            mod = zl.load_module(name)
-                        sys.modules[name] = mod
-                        return mod
-                    # Unlike the default importer which searches special locations and
-                    # sys.path, we only look in the directory where "mercurial" was
-                    # imported from.
-                    # imp.find_module doesn't support submodules (modules with ".").
-                    # Instead you have to pass the parent package's __path__ attribute
-                    # as the path argument.
-                    stem = name.split('.')[-1]
-                    try:
-                        if modulepolicy in policy.policynoc:
-                            raise ImportError()
-                        modinfo = imp.find_module(stem, mercurial.__path__)
-                        # The Mercurial installer used to copy files from
-                        # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible
-                        # for some installations to have .py files under mercurial/*.
-                        # Loading Python modules when we expected C versions could result
-                        # in a) poor performance b) loading a version from a previous
-                        # Mercurial version, potentially leading to incompatibility. Either
-                        # scenario is bad. So we verify that modules loaded from
-                        # mercurial/* are C extensions. If the current policy allows the
-                        # loading of .py modules, the module will be re-imported from
-                        # mercurial/pure/* below.
-                        if modinfo[2][2] != imp.C_EXTENSION:
-                            raise ImportError('.py version of %s found where C '
-                                              'version should exist' % name)
-                    except ImportError:
-                        if modulepolicy == b'c':
-                            raise
-                        # Could not load the C extension and pure Python is allowed. So
-                        # try to load them.
-                        from . import pure
-                        modinfo = imp.find_module(stem, pure.__path__)
-                        if not modinfo:
-                            raise ImportError('could not find mercurial module %s' %
-                                              name)
-                    mod = imp.load_module(name, *modinfo)
-                    sys.modules[name] = mod
-                    return mod
             # Python 3 uses a custom module loader that transforms source code between
             # source file reading and compilation. This is done by registering a custom
             # finder that changes the spec for Mercurial modules to use a custom loader.
             if sys.version_info[0] >= 3:
-                from . import pure
                 import importlib
+                import importlib.abc
                 import io
                 import token
                 import tokenize
                 class hgpathentryfinder(importlib.abc.MetaPathFinder):
                     """A sys.meta_path finder that uses a custom module loader."""
                     def find_spec(self, fullname, path, target=None):
                         # Only handle Mercurial-related modules.
                         if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
                             return None
                         # zstd is already dual-version clean, don't try and mangle it
                         if fullname.startswith('mercurial.zstd'):
                             return None
-                        # This assumes Python 3 doesn't support loading C modules.
-                        if fullname in _dualmodules:
-                            stem = fullname.split('.')[-1]
-                            fullname = 'mercurial.pure.%s' % stem
-                            target = pure
-                            assert len(path) == 1
-                            path = [os.path.join(path[0], 'pure')]
                         # Try to find the module using other registered finders.
                         spec = None
                         for finder in sys.meta_path:
                             if finder == self:
                                 continue
                             spec = finder.find_spec(fullname, path, target=target)
                             if spec:
                                 break
                         # This is a Mercurial-related module but we couldn't find it
                         # using the previously-registered finders. This likely means
                         # the module doesn't exist.
                         if not spec:
                             return None
-                        if (fullname.startswith('mercurial.pure.')
-                            and fullname.replace('.pure.', '.') in _dualmodules):
-                            spec.name = spec.name.replace('.pure.', '.')
                         # TODO need to support loaders from alternate specs, like zip
                         # loaders.
                         spec.loader = hgloader(spec.name, spec.origin)
                         return spec
                 def replacetokens(tokens, fullname):
                     """Transform a stream of tokens from raw to Python 3.
                     It is called by the custom module loading machinery to rewrite
                     source/tokens between source decoding and compilation.
                     Returns a generator of possibly rewritten tokens.
                     The input token list may be mutated as part of processing. However,
                     its changes do not necessarily match the output token stream.
                     REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
                     OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
                     """
                     futureimpline = False
                     # The following utility functions access the tokens list and i index of
                     # the for i, t enumerate(tokens) loop below
                     def _isop(j, *o):
                         """Assert that tokens[j] is an OP with one of the given values"""
                         try:
                             return tokens[j].type == token.OP and tokens[j].string in o
                         except IndexError:
                             return False
                     def _findargnofcall(n):
                         """Find arg n of a call expression (start at 0)
                         Returns index of the first token of that argument, or None if
                         there is not that many arguments.
                         Assumes that token[i + 1] is '('.
                         """
                         nested = 0
                         for j in range(i + 2, len(tokens)):
                             if _isop(j, ')', ']', '}'):
                                 # end of call, tuple, subscription or dict / set
                                 nested -= 1
                                 if nested < 0:
                                     return None
                             elif n == 0:
                                 # this is the starting position of arg
                                 return j
                             elif _isop(j, '(', '[', '{'):
                                 nested += 1
                             elif _isop(j, ',') and nested == 0:
                                 n -= 1
                         return None
                     def _ensureunicode(j):
                         """Make sure the token at j is a unicode string
                         This rewrites a string token to include the unicode literal prefix
                         so the string transformer won't add the byte prefix.
                         Ignores tokens that are not strings. Assumes bounds checking has
                         already been done.
                         """
                         st = tokens[j]
                         if st.type == token.STRING and st.string.startswith(("'", '"')):
                             tokens[j] = st._replace(string='u%s' % st.string)
                     for i, t in enumerate(tokens):
                         # Convert most string literals to byte literals. String literals
                         # in Python 2 are bytes. String literals in Python 3 are unicode.
                         # Most strings in Mercurial are bytes and unicode strings are rare.
                         # Rather than rewrite all string literals to use ``b''`` to indicate
                         # byte strings, we apply this token transformer to insert the ``b``
                         # prefix nearly everywhere.
                         if t.type == token.STRING:
                             s = t.string
                             # Preserve docstrings as string literals. This is inconsistent
                             # with regular unprefixed strings. However, the
                             # "from __future__" parsing (which allows a module docstring to
                             # exist before it) doesn't properly handle the docstring if it
                             # is b''' prefixed, leading to a SyntaxError. We leave all
                             # docstrings as unprefixed to avoid this. This means Mercurial
                             # components touching docstrings need to handle unicode,
                             # unfortunately.
                             if s[0:3] in ("'''", '"""'):
                                 yield t
                                 continue
                             # If the first character isn't a quote, it is likely a string
                             # prefixing character (such as 'b', 'u', or 'r'. Ignore.
                             if s[0] not in ("'", '"'):
                                 yield t
                                 continue
                             # String literal. Prefix to make a b'' string.
                             yield t._replace(string='b%s' % t.string)
                             continue
                         # Insert compatibility imports at "from __future__ import" line.
                         # No '\n' should be added to preserve line numbers.
                         if (t.type == token.NAME and t.string == 'import' and
                             all(u.type == token.NAME for u in tokens[i - 2:i]) and
                             [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
                             futureimpline = True
                         if t.type == token.NEWLINE and futureimpline:
                             futureimpline = False
                             if fullname == 'mercurial.pycompat':
                                 yield t
                                 continue
                             r, c = t.start
                             l = (b'; from mercurial.pycompat import '
                                  b'delattr, getattr, hasattr, setattr, xrange, '
                                  b'open, unicode\n')
                             for u in tokenize.tokenize(io.BytesIO(l).readline):
                                 if u.type in (tokenize.ENCODING, token.ENDMARKER):
                                     continue
                                 yield u._replace(
                                     start=(r, c + u.start[1]), end=(r, c + u.end[1]))
                             continue
                         # This looks like a function call.
                         if t.type == token.NAME and _isop(i + 1, '('):
                             fn = t.string
                             # *attr() builtins don't accept byte strings to 2nd argument.
                             if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
                                     not _isop(i - 1, '.')):
                                 arg1idx = _findargnofcall(1)
                                 if arg1idx is not None:
                                     _ensureunicode(arg1idx)
                             # .encode() and .decode() on str/bytes/unicode don't accept
                             # byte strings on Python 3.
                             elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
                                 for argn in range(2):
                                     argidx = _findargnofcall(argn)
                                     if argidx is not None:
                                         _ensureunicode(argidx)
                             # It changes iteritems/values to items/values as they are not
                             # present in Python 3 world.
                             elif fn in ('iteritems', 'itervalues'):
                                 yield t._replace(string=fn[4:])
                                 continue
                         # Emit unmodified token.
                         yield t
                 # Header to add to bytecode files. This MUST be changed when
                 # ``replacetoken`` or any mechanism that changes semantics of module
                 # loading is changed. Otherwise cached bytecode may get loaded without
                 # the new transformation mechanisms applied.
                 BYTECODEHEADER = b'HG\x00\x0a'
                 class hgloader(importlib.machinery.SourceFileLoader):
                     """Custom module loader that transforms source code.
                     When the source code is converted to a code object, we transform
                     certain patterns to be Python 3 compatible. This allows us to write code
                     that is natively Python 2 and compatible with Python 3 without
                     making the code excessively ugly.
                     We do this by transforming the token stream between parse and compile.
                     Implementing transformations invalidates caching assumptions made
                     by the built-in importer. The built-in importer stores a header on
                     saved bytecode files indicating the Python/bytecode version. If the
                     version changes, the cached bytecode is ignored. The Mercurial
                     transformations could change at any time. This means we need to check
                     that cached bytecode was generated with the current transformation
                     code or there could be a mismatch between cached bytecode and what
                     would be generated from this class.
                     We supplement the bytecode caching layer by wrapping ``get_data``
                     and ``set_data``. These functions are called when the
                     ``SourceFileLoader`` retrieves and saves bytecode cache files,
                     respectively. We simply add an additional header on the file. As
                     long as the version in this file is changed when semantics change,
                     cached bytecode should be invalidated when transformations change.
                     The added header has the form ``HG<VERSION>``. That is a literal
                     ``HG`` with 2 binary bytes indicating the transformation version.
                     """
                     def get_data(self, path):
                         data = super(hgloader, self).get_data(path)
                         if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
                             return data
                         # There should be a header indicating the Mercurial transformation
                         # version. If it doesn't exist or doesn't match the current version,
                         # we raise an OSError because that is what
                         # ``SourceFileLoader.get_code()`` expects when loading bytecode
                         # paths to indicate the cached file is "bad."
                         if data[0:2] != b'HG':
                             raise OSError('no hg header')
                         if data[0:4] != BYTECODEHEADER:
                             raise OSError('hg header version mismatch')
                         return data[4:]
                     def set_data(self, path, data, *args, **kwargs):
                         if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
                             data = BYTECODEHEADER + data
                         return super(hgloader, self).set_data(path, data, *args, **kwargs)
                     def source_to_code(self, data, path):
                         """Perform token transformation before compilation."""
                         buf = io.BytesIO(data)
                         tokens = tokenize.tokenize(buf.readline)
                         data = tokenize.untokenize(replacetokens(list(tokens), self.name))
                         # Python's built-in importer strips frames from exceptions raised
                         # for this code. Unfortunately, that mechanism isn't extensible
                         # and our frame will be blamed for the import failure. There
                         # are extremely hacky ways to do frame stripping. We haven't
                         # implemented them because they are very ugly.
                         return super(hgloader, self).source_to_code(data, path)
-            # We automagically register our custom importer as a side-effect of loading.
+                # We automagically register our custom importer as a side-effect of
-            # This is necessary to ensure that any entry points are able to import
+                # loading. This is necessary to ensure that any entry points are able
-            # mercurial.* modules without having to perform this registration themselves.
+                # to import mercurial.* modules without having to perform this
-            if sys.version_info[0] >= 3:
+                # registration themselves.
-                _importercls = hgpathentryfinder
+                if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
-            else:
+                    # meta_path is used before any implicit finders and before sys.path.
-                _importercls = hgimporter
+                    sys.meta_path.insert(0, hgpathentryfinder())
-            if not any(isinstance(x, _importercls) for x in sys.meta_path):
-                # meta_path is used before any implicit finders and before sys.path.
-                sys.meta_path.insert(0, _importercls())