upstream/mercurial-mirror Commit - r53067:9fbdf355

1

# store.py - repository store handling for Mercurial)

1

# store.py - repository store handling for Mercurial)

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import annotations

8

from __future__ import annotations

9

10

import collections

10

import collections

11

import functools

11

import functools

12

import os

12

import os

13

import re

13

import re

14

import stat

14

import stat

15

import typing

15

import typing

16

17

from typing import (

17

from typing import (

18

Generator,

18

Generator,

19

List,

19

List,

20

Optional,

20

Optional,

21

)

21

)

22

23

from .i18n import _

23

from .i18n import _

24

from .thirdparty import attr

24

from .thirdparty import attr

25

26

# Force pytype to use the non-vendored package

26

# Force pytype to use the non-vendored package

27

if typing.TYPE_CHECKING:

27

if typing.TYPE_CHECKING:

28

# noinspection PyPackageRequirements

28

# noinspection PyPackageRequirements

29

import attr

29

import attr

30

31

from .node import hex

31

from .node import hex

32

from .revlogutils.constants import (

32

from .revlogutils.constants import (

33

INDEX_HEADER,

33

INDEX_HEADER,

34

KIND_CHANGELOG,

34

KIND_CHANGELOG,

35

KIND_FILELOG,

35

KIND_FILELOG,

36

KIND_MANIFESTLOG,

36

KIND_MANIFESTLOG,

37

)

37

)

38

from . import (

38

from . import (

39

changelog,

39

changelog,

40

error,

40

error,

41

filelog,

41

filelog,

42

manifest,

42

manifest,

43

policy,

43

policy,

44

pycompat,

44

pycompat,

45

revlog as revlogmod,

45

revlog as revlogmod,

46

util,

46

util,

47

vfs as vfsmod,

47

vfs as vfsmod,

48

)

48

)

49

from .utils import hashutil

49

from .utils import hashutil

50

51

parsers = policy.importmod('parsers')

51

parsers = policy.importmod('parsers')

52

# how much bytes should be read from fncache in one read

52

# how much bytes should be read from fncache in one read

53

# It is done to prevent loading large fncache files into memory

53

# It is done to prevent loading large fncache files into memory

54

fncache_chunksize = 10**6

54

fncache_chunksize = 10**6

55

56

57

def _match_tracked_entry(entry: "BaseStoreEntry", matcher):

57

def _match_tracked_entry(entry: "BaseStoreEntry", matcher):

58

"""parses a fncache entry and returns whether the entry is tracking a path

58

"""parses a fncache entry and returns whether the entry is tracking a path

59

matched by matcher or not.

59

matched by matcher or not.

60

61

If matcher is None, returns True"""

61

If matcher is None, returns True"""

62

63

if matcher is None:

63

if matcher is None:

64

return True

64

return True

65

66

# TODO: make this safe for other entry types. Currently, the various

66

# TODO: make this safe for other entry types. Currently, the various

67

# store.data_entry generators only yield RevlogStoreEntry, so the

67

# store.data_entry generators only yield RevlogStoreEntry, so the

68

# attributes do exist on `entry`.

68

# attributes do exist on `entry`.

69

# pytype: disable=attribute-error

69

# pytype: disable=attribute-error

70

if entry.is_filelog:

70

if entry.is_filelog:

71

return matcher(entry.target_id)

71

return matcher(entry.target_id)

72

elif entry.is_manifestlog:

72

elif entry.is_manifestlog:

73

return matcher.visitdir(entry.target_id.rstrip(b'/'))

73

return matcher.visitdir(entry.target_id.rstrip(b'/'))

74

# pytype: enable=attribute-error

74

# pytype: enable=attribute-error

75

raise error.ProgrammingError(b"cannot process entry %r" % entry)

75

raise error.ProgrammingError(b"cannot process entry %r" % entry)

76

77

78

# This avoids a collision between a file named foo and a dir named

78

# This avoids a collision between a file named foo and a dir named

79

# foo.i or foo.d

79

# foo.i or foo.d

80

def _encodedir(path):

80

def _encodedir(path):

81

"""

81

"""

82

>>> _encodedir(b'data/foo.i')

82

>>> _encodedir(b'data/foo.i')

83

'data/foo.i'

83

'data/foo.i'

84

>>> _encodedir(b'data/foo.i/bla.i')

84

>>> _encodedir(b'data/foo.i/bla.i')

85

'data/foo.i.hg/bla.i'

85

'data/foo.i.hg/bla.i'

86

>>> _encodedir(b'data/foo.i.hg/bla.i')

86

>>> _encodedir(b'data/foo.i.hg/bla.i')

87

'data/foo.i.hg.hg/bla.i'

87

'data/foo.i.hg.hg/bla.i'

88

>>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')

88

>>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')

89

'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'

89

'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'

90

"""

90

"""

91

return (

91

return (

92

path.replace(b".hg/", b".hg.hg/")

92

path.replace(b".hg/", b".hg.hg/")

93

.replace(b".i/", b".i.hg/")

93

.replace(b".i/", b".i.hg/")

94

.replace(b".d/", b".d.hg/")

94

.replace(b".d/", b".d.hg/")

95

)

95

)

96

97

98

encodedir = getattr(parsers, 'encodedir', _encodedir)

98

encodedir = getattr(parsers, 'encodedir', _encodedir)

99

100

101

def decodedir(path):

101

def decodedir(path):

102

"""

102

"""

103

>>> decodedir(b'data/foo.i')

103

>>> decodedir(b'data/foo.i')

104

'data/foo.i'

104

'data/foo.i'

105

>>> decodedir(b'data/foo.i.hg/bla.i')

105

>>> decodedir(b'data/foo.i.hg/bla.i')

106

'data/foo.i/bla.i'

106

'data/foo.i/bla.i'

107

>>> decodedir(b'data/foo.i.hg.hg/bla.i')

107

>>> decodedir(b'data/foo.i.hg.hg/bla.i')

108

'data/foo.i.hg/bla.i'

108

'data/foo.i.hg/bla.i'

109

"""

109

"""

110

if b".hg/" not in path:

110

if b".hg/" not in path:

111

return path

111

return path

112

return (

112

return (

113

path.replace(b".d.hg/", b".d/")

113

path.replace(b".d.hg/", b".d/")

114

.replace(b".i.hg/", b".i/")

114

.replace(b".i.hg/", b".i/")

115

.replace(b".hg.hg/", b".hg/")

115

.replace(b".hg.hg/", b".hg/")

116

)

116

)

117

118

119

def _reserved():

119

def _reserved():

120

"""characters that are problematic for filesystems

120

"""characters that are problematic for filesystems

121

122

* ascii escapes (0..31)

122

* ascii escapes (0..31)

123

* ascii hi (126..255)

123

* ascii hi (126..255)

124

* windows specials

124

* windows specials

125

126

these characters will be escaped by encodefunctions

126

these characters will be escaped by encodefunctions

127

"""

127

"""

128

winreserved = [ord(x) for x in u'\\:*?"<>|']

128

winreserved = [ord(x) for x in u'\\:*?"<>|']

129

for x in range(32):

129

for x in range(32):

130

yield x

130

yield x

131

for x in range(126, 256):

131

for x in range(126, 256):

132

yield x

132

yield x

133

for x in winreserved:

133

for x in winreserved:

134

yield x

134

yield x

135

136

137

def _buildencodefun():

137

def _buildencodefun():

138

"""

138

"""

139

>>> enc, dec = _buildencodefun()

139

>>> enc, dec = _buildencodefun()

140

141

>>> enc(b'nothing/special.txt')

141

>>> enc(b'nothing/special.txt')

142

'nothing/special.txt'

142

'nothing/special.txt'

143

>>> dec(b'nothing/special.txt')

143

>>> dec(b'nothing/special.txt')

144

'nothing/special.txt'

144

'nothing/special.txt'

145

146

>>> enc(b'HELLO')

146

>>> enc(b'HELLO')

147

'_h_e_l_l_o'

147

'_h_e_l_l_o'

148

>>> dec(b'_h_e_l_l_o')

148

>>> dec(b'_h_e_l_l_o')

149

'HELLO'

149

'HELLO'

150

151

>>> enc(b'hello:world?')

151

>>> enc(b'hello:world?')

152

'hello~3aworld~3f'

152

'hello~3aworld~3f'

153

>>> dec(b'hello~3aworld~3f')

153

>>> dec(b'hello~3aworld~3f')

154

'hello:world?'

154

'hello:world?'

155

156

>>> enc(b'the\\x07quick\\xADshot')

156

>>> enc(b'the\\x07quick\\xADshot')

157

'the~07quick~adshot'

157

'the~07quick~adshot'

158

>>> dec(b'the~07quick~adshot')

158

>>> dec(b'the~07quick~adshot')

159

'the\\x07quick\\xadshot'

159

'the\\x07quick\\xadshot'

160

"""

160

"""

161

e = b'_'

161

e = b'_'

162

xchr = pycompat.bytechr

162

xchr = pycompat.bytechr

163

asciistr = list(map(xchr, range(127)))

163

asciistr = list(map(xchr, range(127)))

164

capitals = list(range(ord(b"A"), ord(b"Z") + 1))

164

capitals = list(range(ord(b"A"), ord(b"Z") + 1))

165

166

cmap = {x: x for x in asciistr}

166

cmap = {x: x for x in asciistr}

167

for x in _reserved():

167

for x in _reserved():

168

cmap[xchr(x)] = b"~%02x" % x

168

cmap[xchr(x)] = b"~%02x" % x

169

for x in capitals + [ord(e)]:

169

for x in capitals + [ord(e)]:

170

cmap[xchr(x)] = e + xchr(x).lower()

170

cmap[xchr(x)] = e + xchr(x).lower()

171

172

dmap = {}

172

dmap = {}

173

for k, v in cmap.items():

173

for k, v in cmap.items():

174

dmap[v] = k

174

dmap[v] = k

175

176

def decode(s):

176

def decode(s):

177

i = 0

177

i = 0

178

while i < len(s):

178

while i < len(s):

179

for l in range(1, 4):

179

for l in range(1, 4):

180

try:

180

try:

181

yield dmap[s[i : i + l]]

181

yield dmap[s[i : i + l]]

182

i += l

182

i += l

183

break

183

break

184

except KeyError:

184

except KeyError:

185

pass

185

pass

186

else:

186

else:

187

raise KeyError

187

raise KeyError

188

189

return (

189

return (

190

lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),

190

lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),

191

lambda s: b''.join(list(decode(s))),

191

lambda s: b''.join(list(decode(s))),

192

)

192

)

193

194

195

_encodefname, _decodefname = _buildencodefun()

195

_encodefname, _decodefname = _buildencodefun()

196

197

198

def encodefilename(s):

198

def encodefilename(s):

199

"""

199

"""

200

>>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')

200

>>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')

201

'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'

201

'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'

202

"""

202

"""

203

return _encodefname(encodedir(s))

203

return _encodefname(encodedir(s))

204

205

206

def decodefilename(s):

206

def decodefilename(s):

207

"""

207

"""

208

>>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')

208

>>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')

209

'foo.i/bar.d/bla.hg/hi:world?/HELLO'

209

'foo.i/bar.d/bla.hg/hi:world?/HELLO'

210

"""

210

"""

211

return decodedir(_decodefname(s))

211

return decodedir(_decodefname(s))

212

213

214

def _buildlowerencodefun():

214

def _buildlowerencodefun():

215

"""

215

"""

216

>>> f = _buildlowerencodefun()

216

>>> f = _buildlowerencodefun()

217

>>> f(b'nothing/special.txt')

217

>>> f(b'nothing/special.txt')

218

'nothing/special.txt'

218

'nothing/special.txt'

219

>>> f(b'HELLO')

219

>>> f(b'HELLO')

220

'hello'

220

'hello'

221

>>> f(b'hello:world?')

221

>>> f(b'hello:world?')

222

'hello~3aworld~3f'

222

'hello~3aworld~3f'

223

>>> f(b'the\\x07quick\\xADshot')

223

>>> f(b'the\\x07quick\\xADshot')

224

'the~07quick~adshot'

224

'the~07quick~adshot'

225

"""

225

"""

226

xchr = pycompat.bytechr

226

xchr = pycompat.bytechr

227

cmap = {xchr(x): xchr(x) for x in range(127)}

227

cmap = {xchr(x): xchr(x) for x in range(127)}

228

for x in _reserved():

228

for x in _reserved():

229

cmap[xchr(x)] = b"~%02x" % x

229

cmap[xchr(x)] = b"~%02x" % x

230

for x in range(ord(b"A"), ord(b"Z") + 1):

230

for x in range(ord(b"A"), ord(b"Z") + 1):

231

cmap[xchr(x)] = xchr(x).lower()

231

cmap[xchr(x)] = xchr(x).lower()

232

233

def lowerencode(s):

233

def lowerencode(s):

234

return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])

234

return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])

235

236

return lowerencode

236

return lowerencode

237

238

239

lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()

239

lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()

240

241

# Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9

241

# Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9

242

_winres3 = (b'aux', b'con', b'prn', b'nul') # length 3

242

_winres3 = (b'aux', b'con', b'prn', b'nul') # length 3

243

_winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)

243

_winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)

244

245

246

def _auxencode(path, dotencode):

246

def _auxencode(path, dotencode):

247

"""

247

"""

248

Encodes filenames containing names reserved by Windows or which end in

248

Encodes filenames containing names reserved by Windows or which end in

249

period or space. Does not touch other single reserved characters c.

249

period or space. Does not touch other single reserved characters c.

250

Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.

250

Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.

251

Additionally encodes space or period at the beginning, if dotencode is

251

Additionally encodes space or period at the beginning, if dotencode is

252

True. Parameter path is assumed to be all lowercase.

252

True. Parameter path is assumed to be all lowercase.

253

A segment only needs encoding if a reserved name appears as a

253

A segment only needs encoding if a reserved name appears as a

254

basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"

254

basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"

255

doesn't need encoding.

255

doesn't need encoding.

256

257

>>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'

257

>>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'

258

>>> _auxencode(s.split(b'/'), True)

258

>>> _auxencode(s.split(b'/'), True)

259

['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']

259

['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']

260

>>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'

260

>>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'

261

>>> _auxencode(s.split(b'/'), False)

261

>>> _auxencode(s.split(b'/'), False)

262

['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']

262

['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']

263

>>> _auxencode([b'foo. '], True)

263

>>> _auxencode([b'foo. '], True)

264

['foo.~20']

264

['foo.~20']

265

>>> _auxencode([b' .foo'], True)

265

>>> _auxencode([b' .foo'], True)

266

['~20.foo']

266

['~20.foo']

267

"""

267

"""

268

for i, n in enumerate(path):

268

for i, n in enumerate(path):

269

if not n:

269

if not n:

270

continue

270

continue

271

if dotencode and n[0] in b'. ':

271

if dotencode and n[0] in b'. ':

272

n = b"~%02x" % ord(n[0:1]) + n[1:]

272

n = b"~%02x" % ord(n[0:1]) + n[1:]

273

path[i] = n

273

path[i] = n

274

else:

274

else:

275

l = n.find(b'.')

275

l = n.find(b'.')

276

if l == -1:

276

if l == -1:

277

l = len(n)

277

l = len(n)

278

if (l == 3 and n[:3] in _winres3) or (

278

if (l == 3 and n[:3] in _winres3) or (

279

l == 4

279

l == 4

280

and n[3:4] <= b'9'

280

and n[3:4] <= b'9'

281

and n[3:4] >= b'1'

281

and n[3:4] >= b'1'

282

and n[:3] in _winres4

282

and n[:3] in _winres4

283

):

283

):

284

# encode third letter ('aux' -> 'au~78')

284

# encode third letter ('aux' -> 'au~78')

285

ec = b"~%02x" % ord(n[2:3])

285

ec = b"~%02x" % ord(n[2:3])

286

n = n[0:2] + ec + n[3:]

286

n = n[0:2] + ec + n[3:]

287

path[i] = n

287

path[i] = n

288

if n[-1] in b'. ':

288

if n[-1] in b'. ':

289

# encode last period or space ('foo...' -> 'foo..~2e')

289

# encode last period or space ('foo...' -> 'foo..~2e')

290

path[i] = n[:-1] + b"~%02x" % ord(n[-1:])

290

path[i] = n[:-1] + b"~%02x" % ord(n[-1:])

291

return path

291

return path

292

293

294

_maxstorepathlen = 120

294

_maxstorepathlen = 120

295

_dirprefixlen = 8

295

_dirprefixlen = 8

296

_maxshortdirslen = 8 * (_dirprefixlen + 1) - 4

296

_maxshortdirslen = 8 * (_dirprefixlen + 1) - 4

297

298

299

def _hashencode(path, dotencode):

299

def _hashencode(path, dotencode):

300

digest = hex(hashutil.sha1(path).digest())

300

digest = hex(hashutil.sha1(path).digest())

301

le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'

301

le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'

302

parts = _auxencode(le, dotencode)

302

parts = _auxencode(le, dotencode)

303

basename = parts[-1]

303

basename = parts[-1]

304

_root, ext = os.path.splitext(basename)

304

_root, ext = os.path.splitext(basename)

305

sdirs = []

305

sdirs = []

306

sdirslen = 0

306

sdirslen = 0

307

for p in parts[:-1]:

307

for p in parts[:-1]:

308

d = p[:_dirprefixlen]

308

d = p[:_dirprefixlen]

309

if d[-1] in b'. ':

309

if d[-1] in b'. ':

310

# Windows can't access dirs ending in period or space

310

# Windows can't access dirs ending in period or space

311

d = d[:-1] + b'_'

311

d = d[:-1] + b'_'

312

if sdirslen == 0:

312

if sdirslen == 0:

313

t = len(d)

313

t = len(d)

314

else:

314

else:

315

t = sdirslen + 1 + len(d)

315

t = sdirslen + 1 + len(d)

316

if t > _maxshortdirslen:

316

if t > _maxshortdirslen:

317

break

317

break

318

sdirs.append(d)

318

sdirs.append(d)

319

sdirslen = t

319

sdirslen = t

320

dirs = b'/'.join(sdirs)

320

dirs = b'/'.join(sdirs)

321

if len(dirs) > 0:

321

if len(dirs) > 0:

322

dirs += b'/'

322

dirs += b'/'

323

res = b'dh/' + dirs + digest + ext

323

res = b'dh/' + dirs + digest + ext

324

spaceleft = _maxstorepathlen - len(res)

324

spaceleft = _maxstorepathlen - len(res)

325

if spaceleft > 0:

325

if spaceleft > 0:

326

filler = basename[:spaceleft]

326

filler = basename[:spaceleft]

327

res = b'dh/' + dirs + filler + digest + ext

327

res = b'dh/' + dirs + filler + digest + ext

328

return res

328

return res

329

330

331

def _hybridencode(path, dotencode):

331

def _hybridencode(path, dotencode):

332

"""encodes path with a length limit

332

"""encodes path with a length limit

333

334

Encodes all paths that begin with 'data/', according to the following.

334

Encodes all paths that begin with 'data/', according to the following.

335

336

Default encoding (reversible):

336

Default encoding (reversible):

337

338

Encodes all uppercase letters 'X' as '_x'. All reserved or illegal

338

Encodes all uppercase letters 'X' as '_x'. All reserved or illegal

339

characters are encoded as '~xx', where xx is the two digit hex code

339

characters are encoded as '~xx', where xx is the two digit hex code

340

of the character (see encodefilename).

340

of the character (see encodefilename).

341

Relevant path components consisting of Windows reserved filenames are

341

Relevant path components consisting of Windows reserved filenames are

342

masked by encoding the third character ('aux' -> 'au~78', see _auxencode).

342

masked by encoding the third character ('aux' -> 'au~78', see _auxencode).

343

344

Hashed encoding (not reversible):

344

Hashed encoding (not reversible):

345

346

If the default-encoded path is longer than _maxstorepathlen, a

346

If the default-encoded path is longer than _maxstorepathlen, a

347

non-reversible hybrid hashing of the path is done instead.

347

non-reversible hybrid hashing of the path is done instead.

348

This encoding uses up to _dirprefixlen characters of all directory

348

This encoding uses up to _dirprefixlen characters of all directory

349

levels of the lowerencoded path, but not more levels than can fit into

349

levels of the lowerencoded path, but not more levels than can fit into

350

_maxshortdirslen.

350

_maxshortdirslen.

351

Then follows the filler followed by the sha digest of the full path.

351

Then follows the filler followed by the sha digest of the full path.

352

The filler is the beginning of the basename of the lowerencoded path

352

The filler is the beginning of the basename of the lowerencoded path

353

(the basename is everything after the last path separator). The filler

353

(the basename is everything after the last path separator). The filler

354

is as long as possible, filling in characters from the basename until

354

is as long as possible, filling in characters from the basename until

355

the encoded path has _maxstorepathlen characters (or all chars of the

355

the encoded path has _maxstorepathlen characters (or all chars of the

356

basename have been taken).

356

basename have been taken).

357

The extension (e.g. '.i' or '.d') is preserved.

357

The extension (e.g. '.i' or '.d') is preserved.

358

359

The string 'data/' at the beginning is replaced with 'dh/', if the hashed

359

The string 'data/' at the beginning is replaced with 'dh/', if the hashed

360

encoding was used.

360

encoding was used.

361

"""

361

"""

362

path = encodedir(path)

362

path = encodedir(path)

363

ef = _encodefname(path).split(b'/')

363

ef = _encodefname(path).split(b'/')

364

res = b'/'.join(_auxencode(ef, dotencode))

364

res = b'/'.join(_auxencode(ef, dotencode))

365

if len(res) > _maxstorepathlen:

365

if len(res) > _maxstorepathlen:

366

res = _hashencode(path, dotencode)

366

res = _hashencode(path, dotencode)

367

return res

367

return res

368

369

370

def _pathencode(path):

370

def _pathencode(path):

371

de = encodedir(path)

371

de = encodedir(path)

372

if len(path) > _maxstorepathlen:

372

if len(path) > _maxstorepathlen:

373

return _hashencode(de, True)

373

return _hashencode(de, True)

374

ef = _encodefname(de).split(b'/')

374

ef = _encodefname(de).split(b'/')

375

res = b'/'.join(_auxencode(ef, True))

375

res = b'/'.join(_auxencode(ef, True))

376

if len(res) > _maxstorepathlen:

376

if len(res) > _maxstorepathlen:

377

return _hashencode(de, True)

377

return _hashencode(de, True)

378

return res

378

return res

379

380

381

_pathencode = getattr(parsers, 'pathencode', _pathencode)

381

_pathencode = getattr(parsers, 'pathencode', _pathencode)

382

383

384

def _plainhybridencode(f):

384

def _plainhybridencode(f):

385

return _hybridencode(f, False)

385

return _hybridencode(f, False)

386

387

388

def _calcmode(vfs):

388

def _calcmode(vfs):

389

try:

389

try:

390

# files in .hg/ will be created using this mode

390

# files in .hg/ will be created using this mode

391

mode = vfs.stat().st_mode

391

mode = vfs.stat().st_mode

392

# avoid some useless chmods

392

# avoid some useless chmods

393

if (0o777 & ~util.umask) == (0o777 & mode):

393

if (0o777 & ~util.umask) == (0o777 & mode):

394

mode = None

394

mode = None

395

except OSError:

395

except OSError:

396

mode = None

396

mode = None

397

return mode

397

return mode

398

399

400

_data = [

400

_data = [

401

b'bookmarks',

401

b'bookmarks',

402

b'narrowspec',

402

b'narrowspec',

403

b'data',

403

b'data',

404

b'meta',

404

b'meta',

405

b'00manifest.d',

405

b'00manifest.d',

406

b'00manifest.i',

406

b'00manifest.i',

407

b'00changelog.d',

407

b'00changelog.d',

408

b'00changelog.i',

408

b'00changelog.i',

409

b'phaseroots',

409

b'phaseroots',

410

b'obsstore',

410

b'obsstore',

411

b'requires',

411

b'requires',

412

]

412

]

413

414

REVLOG_FILES_EXT = (

414

REVLOG_FILES_EXT = (

415

b'.i',

415

b'.i',

416

b'.idx',

416

b'.idx',

417

b'.d',

417

b'.d',

418

b'.dat',

418

b'.dat',

419

b'.n',

419

b'.n',

420

b'.nd',

420

b'.nd',

421

b'.sda',

421

b'.sda',

422

)

422

)

423

# file extension that also use a `-SOMELONGIDHASH.ext` form

423

# file extension that also use a `-SOMELONGIDHASH.ext` form

424

REVLOG_FILES_LONG_EXT = (

424

REVLOG_FILES_LONG_EXT = (

425

b'.nd',

425

b'.nd',

426

b'.idx',

426

b'.idx',

427

b'.dat',

427

b'.dat',

428

b'.sda',

428

b'.sda',

429

)

429

)

430

# files that are "volatile" and might change between listing and streaming

430

# files that are "volatile" and might change between listing and streaming

431

#

431

#

432

# note: the ".nd" file are nodemap data and won't "change" but they might be

432

# note: the ".nd" file are nodemap data and won't "change" but they might be

433

# deleted.

433

# deleted.

434

REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')

434

REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')

435

436

# some exception to the above matching

436

# some exception to the above matching

437

#

437

#

438

# XXX This is currently not in use because of issue6542

438

# XXX This is currently not in use because of issue6542

439

EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')

439

EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')

440

441

442

def is_revlog(f, kind, st):

442

def is_revlog(f, kind, st):

443

if kind != stat.S_IFREG:

443

if kind != stat.S_IFREG:

444

return False

444

return False

445

if f.endswith(REVLOG_FILES_EXT):

445

if f.endswith(REVLOG_FILES_EXT):

446

return True

446

return True

447

return False

447

return False

448

449

450

def is_revlog_file(f):

450

def is_revlog_file(f):

451

if f.endswith(REVLOG_FILES_EXT):

451

if f.endswith(REVLOG_FILES_EXT):

452

return True

452

return True

453

return False

453

return False

454

455

456

@attr.s(slots=True)

456

@attr.s(slots=True)

457

class StoreFile:

457

class StoreFile:

458

"""a file matching a store entry"""

458

"""a file matching a store entry"""

459

460

unencoded_path = attr.ib()

460

unencoded_path = attr.ib()

461

_file_size = attr.ib(default=None)

461

_file_size = attr.ib(default=None)

462

is_volatile = attr.ib(default=False)

462

is_volatile = attr.ib(default=False)

463

464

def file_size(self, vfs):

464

def file_size(self, vfs):

465

if self._file_size is None:

465

if self._file_size is None:

466

if vfs is None:

466

if vfs is None:

467

msg = b"calling vfs-less file_size without prior call: %s"

467

msg = b"calling vfs-less file_size without prior call: %s"

468

msg %= self.unencoded_path

468

msg %= self.unencoded_path

469

raise error.ProgrammingError(msg)

469

raise error.ProgrammingError(msg)

470

try:

470

try:

471

self._file_size = vfs.stat(self.unencoded_path).st_size

471

self._file_size = vfs.stat(self.unencoded_path).st_size

472

except FileNotFoundError:

472

except FileNotFoundError:

473

self._file_size = 0

473

self._file_size = 0

474

return self._file_size

474

return self._file_size

475

476

@property

476

@property

477

def has_size(self):

477

def has_size(self):

478

return self._file_size is not None

478

return self._file_size is not None

479

480

def get_stream(self, vfs, volatiles):

480

def get_stream(self, vfs, volatiles):

481

"""return data "stream" information for this file

481

"""return data "stream" information for this file

482

483

(unencoded_file_path, content_iterator, content_size)

483

(unencoded_file_path, content_iterator, content_size)

484

"""

484

"""

485

size = self.file_size(None)

485

size = self.file_size(None)

486

487

def get_stream():

487

def get_stream():

488

path = vfs.join(self.unencoded_path)

488

path = vfs.join(self.unencoded_path)

489

with volatiles.open(path) as fp:

489

with volatiles.open(path) as fp:

490

yield None # ready to stream

490

yield None # ready to stream

491

if size <= 65536:

491

if size <= 65536:

492

yield fp.read(size)

492

yield fp.read(size)

493

else:

493

else:

494

yield from util.filechunkiter(fp, limit=size)

494

yield from util.filechunkiter(fp, limit=size)

495

496

s = get_stream()

496

s = get_stream()

497

next(s)

497

next(s)

498

return (self.unencoded_path, s, size)

498

return (self.unencoded_path, s, size)

499

500

501

@attr.s(slots=True, init=False)

501

@attr.s(slots=True, init=False)

502

class BaseStoreEntry:

502

class BaseStoreEntry:

503

"""An entry in the store

503

"""An entry in the store

504

505

This is returned by `store.walk` and represent some data in the store."""

505

This is returned by `store.walk` and represent some data in the store."""

506

507

maybe_volatile = True

507

maybe_volatile = True

508

509

def files(self) -> List[StoreFile]:

509

def files(self) -> List[StoreFile]:

510

raise NotImplementedError

510

raise NotImplementedError

511

512

def get_streams(

512

def get_streams(

513

self,

513

self,

514

repo=None,

514

repo=None,

515

vfs=None,

515

vfs=None,

516

volatiles=None,

516

volatiles=None,

517

max_changeset=None,

517

max_changeset=None,

518

preserve_file_count=False,

518

preserve_file_count=False,

519

):

519

):

520

"""return a list of data stream associated to files for this entry

520

"""return a list of data stream associated to files for this entry

521

522

return [(unencoded_file_path, content_iterator, content_size), …]

522

return [(unencoded_file_path, content_iterator, content_size), …]

523

"""

523

"""

524

assert vfs is not None

524

assert vfs is not None

525

return [f.get_stream(vfs, volatiles) for f in self.files()]

525

return [f.get_stream(vfs, volatiles) for f in self.files()]

526

527

528

@attr.s(slots=True, init=False)

528

@attr.s(slots=True, init=False)

529

class SimpleStoreEntry(BaseStoreEntry):

529

class SimpleStoreEntry(BaseStoreEntry):

530

"""A generic entry in the store"""

530

"""A generic entry in the store"""

531

532

is_revlog = False

532

is_revlog = False

533

534

maybe_volatile = attr.ib()

534

maybe_volatile = attr.ib()

535

_entry_path = attr.ib()

535

_entry_path = attr.ib()

536

_is_volatile = attr.ib(default=False)

536

_is_volatile = attr.ib(default=False)

537

_file_size = attr.ib(default=None)

537

_file_size = attr.ib(default=None)

538

_files = attr.ib(default=None)

538

_files = attr.ib(default=None)

539

540

def __init__(

540

def __init__(

541

self,

541

self,

542

entry_path,

542

entry_path,

543

is_volatile=False,

543

is_volatile=False,

544

file_size=None,

544

file_size=None,

545

):

545

):

546

super().__init__()

546

super().__init__()

547

self._entry_path = entry_path

547

self._entry_path = entry_path

548

self._is_volatile = is_volatile

548

self._is_volatile = is_volatile

549

self._file_size = file_size

549

self._file_size = file_size

550

self._files = None

550

self._files = None

551

self.maybe_volatile = is_volatile

551

self.maybe_volatile = is_volatile

552

553

def files(self) -> List[StoreFile]:

553

def files(self) -> List[StoreFile]:

554

if self._files is None:

554

if self._files is None:

555

self._files = [

555

self._files = [

556

StoreFile(

556

StoreFile(

557

unencoded_path=self._entry_path,

557

unencoded_path=self._entry_path,

558

file_size=self._file_size,

558

file_size=self._file_size,

559

is_volatile=self._is_volatile,

559

is_volatile=self._is_volatile,

560

)

560

)

561

]

561

]

562

return self._files

562

return self._files

563

564

565

@attr.s(slots=True, init=False)

565

@attr.s(slots=True, init=False)

566

class RevlogStoreEntry(BaseStoreEntry):

566

class RevlogStoreEntry(BaseStoreEntry):

567

"""A revlog entry in the store"""

567

"""A revlog entry in the store"""

568

569

is_revlog = True

569

is_revlog = True

570

571

revlog_type = attr.ib(default=None)

571

revlog_type = attr.ib(default=None)

572

target_id = attr.ib(default=None)

572

target_id = attr.ib(default=None)

573

maybe_volatile = attr.ib(default=True)

573

maybe_volatile = attr.ib(default=True)

574

_path_prefix = attr.ib(default=None)

574

_path_prefix = attr.ib(default=None)

575

_details = attr.ib(default=None)

575

_details = attr.ib(default=None)

576

_files = attr.ib(default=None)

576

_files = attr.ib(default=None)

577

578

def __init__(

578

def __init__(

579

self,

579

self,

580

revlog_type,

580

revlog_type,

581

path_prefix,

581

path_prefix,

582

target_id,

582

target_id,

583

details,

583

details,

584

):

584

):

585

super().__init__()

585

super().__init__()

586

self.revlog_type = revlog_type

586

self.revlog_type = revlog_type

587

self.target_id = target_id

587

self.target_id = target_id

588

self._path_prefix = path_prefix

588

self._path_prefix = path_prefix

589

assert b'.i' in details, (path_prefix, details)

589

assert b'.i' in details, (path_prefix, details)

590

for ext in details:

590

for ext in details:

591

if ext.endswith(REVLOG_FILES_VOLATILE_EXT):

591

if ext.endswith(REVLOG_FILES_VOLATILE_EXT):

592

self.maybe_volatile = True

592

self.maybe_volatile = True

593

break

593

break

594

else:

594

else:

595

self.maybe_volatile = False

595

self.maybe_volatile = False

596

self._details = details

596

self._details = details

597

self._files = None

597

self._files = None

598

599

@property

599

@property

600

def is_changelog(self):

600

def is_changelog(self):

601

return self.revlog_type == KIND_CHANGELOG

601

return self.revlog_type == KIND_CHANGELOG

602

603

@property

603

@property

604

def is_manifestlog(self):

604

def is_manifestlog(self):

605

return self.revlog_type == KIND_MANIFESTLOG

605

return self.revlog_type == KIND_MANIFESTLOG

606

607

@property

607

@property

608

def is_filelog(self):

608

def is_filelog(self):

609

return self.revlog_type == KIND_FILELOG

609

return self.revlog_type == KIND_FILELOG

610

611

def main_file_path(self):

611

def main_file_path(self):

612

"""unencoded path of the main revlog file"""

612

"""unencoded path of the main revlog file"""

613

return self._path_prefix + b'.i'

613

return self._path_prefix + b'.i'

614

615

def files(self) -> List[StoreFile]:

615

def files(self) -> List[StoreFile]:

616

if self._files is None:

616

if self._files is None:

617

self._files = []

617

self._files = []

618

for ext in sorted(self._details, key=_ext_key):

618

for ext in sorted(self._details, key=_ext_key):

619

path = self._path_prefix + ext

619

path = self._path_prefix + ext

620

file_size = self._details[ext]

620

file_size = self._details[ext]

621

# files that are "volatile" and might change between

621

# files that are "volatile" and might change between

622

# listing and streaming

622

# listing and streaming

623

#

623

#

624

# note: the ".nd" file are nodemap data and won't "change"

624

# note: the ".nd" file are nodemap data and won't "change"

625

# but they might be deleted.

625

# but they might be deleted.

626

volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)

626

volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)

627

f = StoreFile(path, file_size, volatile)

627

f = StoreFile(path, file_size, volatile)

628

self._files.append(f)

628

self._files.append(f)

629

return self._files

629

return self._files

630

631

def get_streams(

631

def get_streams(

632

self,

632

self,

633

repo=None,

633

repo=None,

634

vfs=None,

634

vfs=None,

635

volatiles=None,

635

volatiles=None,

636

max_changeset=None,

636

max_changeset=None,

637

preserve_file_count=False,

637

preserve_file_count=False,

638

):

638

):

639

pre_sized = all(f.has_size for f in self.files())

639

pre_sized = all(f.has_size for f in self.files())

640

if pre_sized and (

640

if pre_sized and (

641

repo is None

641

repo is None

642

or max_changeset is None

642

or max_changeset is None

643

# This use revlog-v2, ignore for now

643

# This use revlog-v2, ignore for now

644

or any(k.endswith(b'.idx') for k in self._details.keys())

644

or any(k.endswith(b'.idx') for k in self._details.keys())

645

# This is not inline, no race expected

645

# This is not inline, no race expected

646

or b'.d' in self._details

646

or b'.d' in self._details

647

):

647

):

648

return super().get_streams(

648

return super().get_streams(

649

repo=repo,

649

repo=repo,

650

vfs=vfs,

650

vfs=vfs,

651

volatiles=volatiles,

651

volatiles=volatiles,

652

max_changeset=max_changeset,

652

max_changeset=max_changeset,

653

preserve_file_count=preserve_file_count,

653

preserve_file_count=preserve_file_count,

654

)

654

)

655

elif not preserve_file_count:

655

elif not preserve_file_count:

656

stream = [

656

stream = [

657

f.get_stream(vfs, volatiles)

657

f.get_stream(vfs, volatiles)

658

for f in self.files()

658

for f in self.files()

659

if not f.unencoded_path.endswith((b'.i', b'.d'))

659

if not f.unencoded_path.endswith((b'.i', b'.d'))

660

]

660

]

661

rl = self.get_revlog_instance(repo).get_revlog()

661

rl = self.get_revlog_instance(repo).get_revlog()

662

rl_stream = rl.get_streams(max_changeset)

662

rl_stream = rl.get_streams(max_changeset)

663

stream.extend(rl_stream)

663

stream.extend(rl_stream)

664

return stream

664

return stream

665

666

name_to_size = {}

666

name_to_size = {}

667

for f in self.files():

667

for f in self.files():

668

name_to_size[f.unencoded_path] = f.file_size(None)

668

name_to_size[f.unencoded_path] = f.file_size(None)

669

670

stream = [

670

stream = [

671

f.get_stream(vfs, volatiles)

671

f.get_stream(vfs, volatiles)

672

for f in self.files()

672

for f in self.files()

673

if not f.unencoded_path.endswith(b'.i')

673

if not f.unencoded_path.endswith(b'.i')

674

]

674

]

675

676

index_path = self._path_prefix + b'.i'

676

index_path = self._path_prefix + b'.i'

677

678

index_file = None

678

index_file = None

679

try:

679

try:

680

index_file = vfs(index_path)

680

index_file = vfs(index_path)

681

header = index_file.read(INDEX_HEADER.size)

681

header = index_file.read(INDEX_HEADER.size)

682

if revlogmod.revlog.is_inline_index(header):

682

if revlogmod.revlog.is_inline_index(header):

683

size = name_to_size[index_path]

683

size = name_to_size[index_path]

684

685

# no split underneath, just return the stream

685

# no split underneath, just return the stream

686

def get_stream():

686

def get_stream():

687

fp = index_file

687

fp = index_file

688

try:

688

try:

689

fp.seek(0)

689

fp.seek(0)

690

yield None

690

yield None

691

if size <= 65536:

691

if size <= 65536:

692

yield fp.read(size)

692

yield fp.read(size)

693

else:

693

else:

694

yield from util.filechunkiter(fp, limit=size)

694

yield from util.filechunkiter(fp, limit=size)

695

finally:

695

finally:

696

fp.close()

696

fp.close()

697

698

s = get_stream()

698

s = get_stream()

699

next(s)

699

next(s)

700

index_file = None

700

index_file = None

701

stream.append((index_path, s, size))

701

stream.append((index_path, s, size))

702

else:

702

else:

703

rl = self.get_revlog_instance(repo).get_revlog()

703

rl = self.get_revlog_instance(repo).get_revlog()

704

rl_stream = rl.get_streams(max_changeset, force_inline=True)

704

rl_stream = rl.get_streams(max_changeset, force_inline=True)

705

for name, s, size in rl_stream:

705

for name, s, size in rl_stream:

706

if name_to_size.get(name, 0) != size:

706

if name_to_size.get(name, 0) != size:

707

msg = _(b"expected %d bytes but %d provided for %s")

707

msg = _(b"expected %d bytes but %d provided for %s")

708

msg %= name_to_size.get(name, 0), size, name

708

msg %= name_to_size.get(name, 0), size, name

709

raise error.Abort(msg)

709

raise error.Abort(msg)

710

stream.extend(rl_stream)

710

stream.extend(rl_stream)

711

finally:

711

finally:

712

if index_file is not None:

712

if index_file is not None:

713

index_file.close()

713

index_file.close()

714

715

files = self.files()

715

files = self.files()

716

assert len(stream) == len(files), (

716

assert len(stream) == len(files), (

717

stream,

717

stream,

718

files,

718

files,

719

self._path_prefix,

719

self._path_prefix,

720

self.target_id,

720

self.target_id,

721

)

721

)

722

return stream

722

return stream

723

724

def get_revlog_instance(self, repo):

724

def get_revlog_instance(self, repo):

725

"""Obtain a revlog instance from this store entry

725

"""Obtain a revlog instance from this store entry

726

727

An instance of the appropriate class is returned.

727

An instance of the appropriate class is returned.

728

"""

728

"""

729

if self.is_changelog:

729

if self.is_changelog:

730

return changelog.changelog(repo.svfs)

730

return changelog.changelog(repo.svfs)

731

elif self.is_manifestlog:

731

elif self.is_manifestlog:

732

mandir = self.target_id

732

mandir = self.target_id

733

return manifest.manifestrevlog(

733

return manifest.manifestrevlog(

734

repo.nodeconstants, repo.svfs, tree=mandir

734

repo.nodeconstants, repo.svfs, tree=mandir

735

)

735

)

736

else:

736

else:

737

return filelog.filelog(repo.svfs, self.target_id)

737

return filelog.filelog(repo.svfs, self.target_id)

738

739

740

def _gather_revlog(files_data):

740

def _gather_revlog(files_data):

741

"""group files per revlog prefix

741

"""group files per revlog prefix

742

743

The returns a two level nested dict. The top level key is the revlog prefix

743

The returns a two level nested dict. The top level key is the revlog prefix

744

without extension, the second level is all the file "suffix" that were

744

without extension, the second level is all the file "suffix" that were

745

seen for this revlog and arbitrary file data as value.

745

seen for this revlog and arbitrary file data as value.

746

"""

746

"""

747

revlogs = collections.defaultdict(dict)

747

revlogs = collections.defaultdict(dict)

748

for u, value in files_data:

748

for u, value in files_data:

749

name, ext = _split_revlog_ext(u)

749

name, ext = _split_revlog_ext(u)

750

revlogs[name][ext] = value

750

revlogs[name][ext] = value

751

return sorted(revlogs.items())

751

return sorted(revlogs.items())

752

753

754

def _split_revlog_ext(filename):

754

def _split_revlog_ext(filename):

755

"""split the revlog file prefix from the variable extension"""

755

"""split the revlog file prefix from the variable extension"""

756

if filename.endswith(REVLOG_FILES_LONG_EXT):

756

if filename.endswith(REVLOG_FILES_LONG_EXT):

757

char = b'-'

757

char = b'-'

758

else:

758

else:

759

char = b'.'

759

char = b'.'

760

idx = filename.rfind(char)

760

idx = filename.rfind(char)

761

return filename[:idx], filename[idx:]

761

return filename[:idx], filename[idx:]

762

763

764

def _ext_key(ext):

764

def _ext_key(ext):

765

"""a key to order revlog suffix

765

"""a key to order revlog suffix

766

767

important to issue .i after other entry."""

767

important to issue .i after other entry."""

768

# the only important part of this order is to keep the `.i` last.

768

# the only important part of this order is to keep the `.i` last.

769

if ext.endswith(b'.n'):

769

if ext.endswith(b'.n'):

770

return (0, ext)

770

return (0, ext)

771

elif ext.endswith(b'.nd'):

771

elif ext.endswith(b'.nd'):

772

return (10, ext)

772

return (10, ext)

773

elif ext.endswith(b'.d'):

773

elif ext.endswith(b'.d'):

774

return (20, ext)

774

return (20, ext)

775

elif ext.endswith(b'.i'):

775

elif ext.endswith(b'.i'):

776

return (50, ext)

776

return (50, ext)

777

else:

777

else:

778

return (40, ext)

778

return (40, ext)

779

780

781

class basicstore:

781

class basicstore:

782

'''base class for local repository stores'''

782

'''base class for local repository stores'''

783

784

def __init__(self, path, vfstype):

784

def __init__(self, path, vfstype):

785

vfs = vfstype(path)

785

vfs = vfstype(path)

786

self.path = vfs.base

786

self.path = vfs.base

787

self.createmode = _calcmode(vfs)

787

self.createmode = _calcmode(vfs)

788

vfs.createmode = self.createmode

788

vfs.createmode = self.createmode

789

self.rawvfs = vfs

789

self.rawvfs = vfs

790

self.vfs = vfsmod.filtervfs(vfs, encodedir)

790

self.vfs = vfsmod.filtervfs(vfs, encodedir)

791

self.opener = self.vfs

791

self.opener = self.vfs

792

793

def join(self, f):

793

def join(self, f):

794

return self.path + b'/' + encodedir(f)

794

return self.path + b'/' + encodedir(f)

795

796

def _walk(self, relpath, recurse, undecodable=None):

796

def _walk(self, relpath, recurse, undecodable=None):

797

'''yields (revlog_type, unencoded, size)'''

797

'''yields (revlog_type, unencoded, size)'''

798

path = self.path

798

path = self.path

799

if relpath:

799

if relpath:

800

path += b'/' + relpath

800

path += b'/' + relpath

801

striplen = len(self.path) + 1

801

striplen = len(self.path) + 1

802

l = []

802

l = []

803

if self.rawvfs.isdir(path):

803

if self.rawvfs.isdir(path):

804

visit = [path]

804

visit = [path]

805

readdir = self.rawvfs.readdir

805

readdir = self.rawvfs.readdir

806

while visit:

806

while visit:

807

p = visit.pop()

807

p = visit.pop()

808

for f, kind, st in readdir(p, stat=True):

808

for f, kind, st in readdir(p, stat=True):

809

fp = p + b'/' + f

809

fp = p + b'/' + f

810

if is_revlog(f, kind, st):

810

if is_revlog(f, kind, st):

811

n = util.pconvert(fp[striplen:])

811

n = util.pconvert(fp[striplen:])

812

l.append((decodedir(n), st.st_size))

812

l.append((decodedir(n), st.st_size))

813

elif kind == stat.S_IFDIR and recurse:

813

elif kind == stat.S_IFDIR and recurse:

814

visit.append(fp)

814

visit.append(fp)

815

816

l.sort()

816

l.sort()

817

return l

817

return l

818

819

def changelog(self, trypending, concurrencychecker=None):

819

def changelog(self, trypending, concurrencychecker=None):

820

return changelog.changelog(

820

return changelog.changelog(

821

self.vfs,

821

self.vfs,

822

trypending=trypending,

822

trypending=trypending,

823

concurrencychecker=concurrencychecker,

823

concurrencychecker=concurrencychecker,

824

)

824

)

825

826

def manifestlog(self, repo, storenarrowmatch) -> manifest.manifestlog:

826

def manifestlog(self, repo, storenarrowmatch) -> manifest.manifestlog:

827

rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)

827

rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)

828

return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)

828

return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)

829

830

def data_entries(

830

def data_entries(

831

self, matcher=None, undecodable=None

831

self, matcher=None, undecodable=None

832

) -> Generator[BaseStoreEntry, None, None]:

832

) -> Generator[BaseStoreEntry, None, None]:

833

"""Like walk, but excluding the changelog and root manifest.

833

"""Like walk, but excluding the changelog and root manifest.

834

835

When [undecodable] is None, revlogs names that can't be

835

When [undecodable] is None, revlogs names that can't be

836

decoded cause an exception. When it is provided, it should

836

decoded cause an exception. When it is provided, it should

837

be a list and the filenames that can't be decoded are added

837

be a list and the filenames that can't be decoded are added

838

to it instead. This is very rarely needed."""

838

to it instead. This is very rarely needed."""

839

dirs = [

839

dirs = [

840

(b'data', KIND_FILELOG, False),

840

(b'data', KIND_FILELOG, False),

841

(b'meta', KIND_MANIFESTLOG, True),

841

(b'meta', KIND_MANIFESTLOG, True),

842

]

842

]

843

for base_dir, rl_type, strip_filename in dirs:

843

for base_dir, rl_type, strip_filename in dirs:

844

files = self._walk(base_dir, True, undecodable=undecodable)

844

files = self._walk(base_dir, True, undecodable=undecodable)

845

for revlog, details in _gather_revlog(files):

845

for revlog, details in _gather_revlog(files):

846

revlog_target_id = revlog.split(b'/', 1)[1]

846

revlog_target_id = revlog.split(b'/', 1)[1]

847

if strip_filename and b'/' in revlog:

847

if strip_filename and b'/' in revlog:

848

revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]

848

revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]

849

revlog_target_id += b'/'

849

revlog_target_id += b'/'

850

yield RevlogStoreEntry(

850

yield RevlogStoreEntry(

851

path_prefix=revlog,

851

path_prefix=revlog,

852

revlog_type=rl_type,

852

revlog_type=rl_type,

853

target_id=revlog_target_id,

853

target_id=revlog_target_id,

854

details=details,

854

details=details,

855

)

855

)

856

857

def top_entries(

857

def top_entries(

858

self, phase=False, obsolescence=False

858

self, phase=False, obsolescence=False

859

) -> Generator[BaseStoreEntry, None, None]:

859

) -> Generator[BaseStoreEntry, None, None]:

860

if phase and self.vfs.exists(b'phaseroots'):

860

if phase and self.vfs.exists(b'phaseroots'):

861

yield SimpleStoreEntry(

861

yield SimpleStoreEntry(

862

entry_path=b'phaseroots',

862

entry_path=b'phaseroots',

863

is_volatile=True,

863

is_volatile=True,

864

)

864

)

865

866

if obsolescence and self.vfs.exists(b'obsstore'):

866

if obsolescence and self.vfs.exists(b'obsstore'):

867

# XXX if we had the file size it could be non-volatile

867

# XXX if we had the file size it could be non-volatile

868

yield SimpleStoreEntry(

868

yield SimpleStoreEntry(

869

entry_path=b'obsstore',

869

entry_path=b'obsstore',

870

is_volatile=True,

870

is_volatile=True,

871

)

871

)

872

873

files = reversed(self._walk(b'', False))

873

files = reversed(self._walk(b'', False))

874

875

changelogs = collections.defaultdict(dict)

875

changelogs = collections.defaultdict(dict)

876

manifestlogs = collections.defaultdict(dict)

876

manifestlogs = collections.defaultdict(dict)

877

878

for u, s in files:

878

for u, s in files:

879

if u.startswith(b'00changelog'):

879

if u.startswith(b'00changelog'):

880

name, ext = _split_revlog_ext(u)

880

name, ext = _split_revlog_ext(u)

881

changelogs[name][ext] = s

881

changelogs[name][ext] = s

882

elif u.startswith(b'00manifest'):

882

elif u.startswith(b'00manifest'):

883

name, ext = _split_revlog_ext(u)

883

name, ext = _split_revlog_ext(u)

884

manifestlogs[name][ext] = s

884

manifestlogs[name][ext] = s

885

else:

885

else:

886

yield SimpleStoreEntry(

886

yield SimpleStoreEntry(

887

entry_path=u,

887

entry_path=u,

888

is_volatile=False,

888

is_volatile=False,

889

file_size=s,

889

file_size=s,

890

)

890

)

891

# yield manifest before changelog

891

# yield manifest before changelog

892

top_rl = [

892

top_rl = [

893

(manifestlogs, KIND_MANIFESTLOG),

893

(manifestlogs, KIND_MANIFESTLOG),

894

(changelogs, KIND_CHANGELOG),

894

(changelogs, KIND_CHANGELOG),

895

]

895

]

896

assert len(manifestlogs) <= 1

896

assert len(manifestlogs) <= 1

897

assert len(changelogs) <= 1

897

assert len(changelogs) <= 1

898

for data, revlog_type in top_rl:

898

for data, revlog_type in top_rl:

899

for revlog, details in sorted(data.items()):

899

for revlog, details in sorted(data.items()):

900

yield RevlogStoreEntry(

900

yield RevlogStoreEntry(

901

path_prefix=revlog,

901

path_prefix=revlog,

902

revlog_type=revlog_type,

902

revlog_type=revlog_type,

903

target_id=b'',

903

target_id=b'',

904

details=details,

904

details=details,

905

)

905

)

906

907

def walk(

907

def walk(

908

self, matcher=None, phase=False, obsolescence=False

908

self, matcher=None, phase=False, obsolescence=False

909

) -> Generator[BaseStoreEntry, None, None]:

909

) -> Generator[BaseStoreEntry, None, None]:

910

"""return files related to data storage (ie: revlogs)

910

"""return files related to data storage (ie: revlogs)

911

912

yields instance from BaseStoreEntry subclasses

912

yields instance from BaseStoreEntry subclasses

913

914

if a matcher is passed, storage files of only those tracked paths

914

if a matcher is passed, storage files of only those tracked paths

915

are passed with matches the matcher

915

are passed with matches the matcher

916

"""

916

"""

917

# yield data files first

917

# yield data files first

918

for x in self.data_entries(matcher):

918

for x in self.data_entries(matcher):

919

yield x

919

yield x

920

for x in self.top_entries(phase=phase, obsolescence=obsolescence):

920

for x in self.top_entries(phase=phase, obsolescence=obsolescence):

921

yield x

921

yield x

922

923

def copylist(self):

923

def copylist(self):

924

return _data

924

return _data

925

926

def write(self, tr):

926

def write(self, tr):

927

pass

927

pass

928

929

def invalidatecaches(self):

929

def invalidatecaches(self):

930

pass

930

pass

931

932

def markremoved(self, fn):

932

def markremoved(self, fn):

933

pass

933

pass

934

935

def __contains__(self, path):

935

def __contains__(self, path):

936

'''Checks if the store contains path'''

936

'''Checks if the store contains path'''

937

path = b"/".join((b"data", path))

937

path = b"/".join((b"data", path))

938

# file?

938

# file?

939

if self.vfs.exists(path + b".i"):

939

if self.vfs.exists(path + b".i"):

940

return True

940

return True

941

# dir?

941

# dir?

942

if not path.endswith(b"/"):

942

if not path.endswith(b"/"):

943

path = path + b"/"

943

path = path + b"/"

944

return self.vfs.exists(path)

944

return self.vfs.exists(path)

945

946

947

class encodedstore(basicstore):

947

class encodedstore(basicstore):

948

def __init__(self, path, vfstype):

948

def __init__(self, path, vfstype):

949

vfs = vfstype(path + b'/store')

949

vfs = vfstype(path + b'/store')

950

self.path = vfs.base

950

self.path = vfs.base

951

self.createmode = _calcmode(vfs)

951

self.createmode = _calcmode(vfs)

952

vfs.createmode = self.createmode

952

vfs.createmode = self.createmode

953

self.rawvfs = vfs

953

self.rawvfs = vfs

954

self.vfs = vfsmod.filtervfs(vfs, encodefilename)

954

self.vfs = vfsmod.filtervfs(vfs, encodefilename)

955

self.opener = self.vfs

955

self.opener = self.vfs

956

957

def _walk(self, relpath, recurse, undecodable=None):

957

def _walk(self, relpath, recurse, undecodable=None):

958

old = super()._walk(relpath, recurse)

958

old = super()._walk(relpath, recurse)

959

new = []

959

new = []

960

for f1, value in old:

960

for f1, value in old:

961

try:

961

try:

962

f2 = decodefilename(f1)

962

f2 = decodefilename(f1)

963

except KeyError:

963

except KeyError:

964

if undecodable is None:

964

if undecodable is None:

965

msg = _(b'undecodable revlog name %s') % f1

965

msg = _(b'undecodable revlog name %s') % f1

966

raise error.StorageError(msg)

966

raise error.StorageError(msg)

967

else:

967

else:

968

undecodable.append(f1)

968

undecodable.append(f1)

969

continue

969

continue

970

new.append((f2, value))

970

new.append((f2, value))

971

return new

971

return new

972

973

def data_entries(

973

def data_entries(

974

self, matcher=None, undecodable=None

974

self, matcher=None, undecodable=None

975

) -> Generator[BaseStoreEntry, None, None]:

975

) -> Generator[BaseStoreEntry, None, None]:

976

entries = super(encodedstore, self).data_entries(

976

entries = super(encodedstore, self).data_entries(

977

undecodable=undecodable

977

undecodable=undecodable

978

)

978

)

979

for entry in entries:

979

for entry in entries:

980

if _match_tracked_entry(entry, matcher):

980

if _match_tracked_entry(entry, matcher):

981

yield entry

981

yield entry

982

983

def join(self, f):

983

def join(self, f):

984

return self.path + b'/' + encodefilename(f)

984

return self.path + b'/' + encodefilename(f)

985

986

def copylist(self):

986

def copylist(self):

987

return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]

987

return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]

988

989

990

class fncache:

990

class fncache:

991

# the filename used to be partially encoded

991

# the filename used to be partially encoded

992

# hence the encodedir/decodedir dance

992

# hence the encodedir/decodedir dance

993

def __init__(self, vfs):

993

def __init__(self, vfs):

994

self.vfs = vfs

994

self.vfs = vfs

995

self._ignores = set()

995

self._ignores = set()

996

self.entries = None

996

self.entries = None

997

self._dirty = False

997

self._dirty = False

998

# set of new additions to fncache

998

# set of new additions to fncache

999

self.addls = set()

999

self.addls = set()

1000

1001

@property

1001

@property

1002

def is_loaded(self):

1002

def is_loaded(self):

1003

return self.entries is not None

1003

return self.entries is not None

1004

1005

def ensureloaded(self, warn=None):

1005

def ensureloaded(self, warn=None):

1006

"""read the fncache file if not already read.

1006

"""read the fncache file if not already read.

1007

1008

If the file on disk is corrupted, raise. If warn is provided,

1008

If the file on disk is corrupted, raise. If warn is provided,

1009

warn and keep going instead."""

1009

warn and keep going instead."""

1010

if not self.is_loaded:

1010

if not self.is_loaded:

1011

self._load(warn)

1011

self._load(warn)

1012

1013

def _load(self, warn=None):

1013

def _load(self, warn=None):

1014

'''fill the entries from the fncache file'''

1014

'''fill the entries from the fncache file'''

1015

self._dirty = False

1015

self._dirty = False

1016

try:

1016

try:

1017

fp = self.vfs(b'fncache', mode=b'rb')

1017

fp = self.vfs(b'fncache', mode=b'rb')

1018

except IOError:

1018

except IOError:

1019

# skip nonexistent file

1019

# skip nonexistent file

1020

self.entries = set()

1020

self.entries = set()

1021

return

1021

return

1022

1023

self.entries = set()

1023

self.entries = set()

1024

chunk = b''

1024

chunk = b''

1025

for c in iter(functools.partial(fp.read, fncache_chunksize), b''):

1025

for c in iter(functools.partial(fp.read, fncache_chunksize), b''):

1026

chunk += c

1026

chunk += c

1027

try:

1027

try:

1028

p = chunk.rindex(b'\n')

1028

p = chunk.rindex(b'\n')

1029

self.entries.update(decodedir(chunk[: p + 1]).splitlines())

1029

self.entries.update(decodedir(chunk[: p + 1]).splitlines())

1030

chunk = chunk[p + 1 :]

1030

chunk = chunk[p + 1 :]

1031

except ValueError:

1031

except ValueError:

1032

# substring '\n' not found, maybe the entry is bigger than the

1032

# substring '\n' not found, maybe the entry is bigger than the

1033

# chunksize, so let's keep iterating

1033

# chunksize, so let's keep iterating

1034

pass

1034

pass

1035

1036

if chunk:

1036

if chunk:

1037

msg = _(b"fncache does not ends with a newline")

1037

msg = _(b"fncache does not ends with a newline")

1038

if warn:

1038

if warn:

1039

warn(msg + b'\n')

1039

warn(msg + b'\n')

1040

else:

1040

else:

1041

raise error.Abort(

1041

raise error.Abort(

1042

msg,

1042

msg,

1043

hint=_(

1043

hint=_(

1044

b"use 'hg debugrebuildfncache' to "

1044

b"use 'hg debugrebuildfncache' to "

1045

b"rebuild the fncache"

1045

b"rebuild the fncache"

1046

),

1046

),

1047

)

1047

)

1048

self._checkentries(fp, warn)

1048

self._checkentries(fp, warn)

1049

fp.close()

1049

fp.close()

1050

1051

def _checkentries(self, fp, warn):

1051

def _checkentries(self, fp, warn):

1052

"""make sure there is no empty string in entries"""

1052

"""make sure there is no empty string in entries"""

1053

if b'' in self.entries:

1053

if b'' in self.entries:

1054

fp.seek(0)

1054

fp.seek(0)

1055

for n, line in enumerate(fp):

1055

for n, line in enumerate(fp):

1056

if not line.rstrip(b'\n'):

1056

if not line.rstrip(b'\n'):

1057

t = _(b'invalid entry in fncache, line %d') % (n + 1)

1057

t = _(b'invalid entry in fncache, line %d') % (n + 1)

1058

if warn:

1058

if warn:

1059

warn(t + b'\n')

1059

warn(t + b'\n')

1060

else:

1060

else:

1061

raise error.Abort(t)

1061

raise error.Abort(t)

1062

1063

def write(self, tr):

1063

def write(self, tr):

1064

if self._dirty:

1064

if self._dirty:

1065

assert self.is_loaded

1065

assert self.is_loaded

1066

self.entries = self.entries | self.addls

1066

self.entries = self.entries | self.addls

1067

self.addls = set()

1067

self.addls = set()

1068

tr.addbackup(b'fncache')

1068

tr.addbackup(b'fncache')

1069

fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)

1069

fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)

1070

if self.entries:

1070

if self.entries:

1071

fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))

1071

fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))

1072

fp.close()

1072

fp.close()

1073

self._dirty = False

1073

self._dirty = False

1074

if self.addls:

1074

if self.addls:

1075

# if we have just new entries, let's append them to the fncache

1075

# if we have just new entries, let's append them to the fncache

1076

tr.addbackup(b'fncache')

1076

tr.addbackup(b'fncache')

1077

fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)

1077

fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)

1078

if self.addls:

1078

if self.addls:

1079

fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))

1079

fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))

1080

fp.close()

1080

fp.close()

1081

self.entries = None

1081

self.entries = None

1082

self.addls = set()

1082

self.addls = set()

1083

1084

def addignore(self, fn):

1084

def addignore(self, fn):

1085

self._ignores.add(fn)

1085

self._ignores.add(fn)

1086

1087

def add(self, fn):

1087

def add(self, fn):

1088

if fn in self._ignores:

1088

if fn in self._ignores:

1089

return

1089

return

1090

if not self.is_loaded:

1090

if not self.is_loaded:

1091

self._load()

1091

self._load()

1092

if fn not in self.entries:

1092

if fn not in self.entries:

1093

self.addls.add(fn)

1093

self.addls.add(fn)

1094

1095

def remove(self, fn):

1095

def remove(self, fn):

1096

if not self.is_loaded:

1096

if not self.is_loaded:

1097

self._load()

1097

self._load()

1098

if fn in self.addls:

1098

if fn in self.addls:

1099

self.addls.remove(fn)

1099

self.addls.remove(fn)

1100

return

1100

return

1101

try:

1101

try:

1102

self.entries.remove(fn)

1102

self.entries.remove(fn)

1103

self._dirty = True

1103

self._dirty = True

1104

except KeyError:

1104

except KeyError:

1105

pass

1105

pass

1106

1107

def __contains__(self, fn):

1107

def __contains__(self, fn):

1108

if fn in self.addls:

1108

if fn in self.addls:

1109

return True

1109

return True

1110

if not self.is_loaded:

1110

if not self.is_loaded:

1111

self._load()

1111

self._load()

1112

return fn in self.entries

1112

return fn in self.entries

1113

1114

def __iter__(self):

1114

def __iter__(self):

1115

if not self.is_loaded:

1115

if not self.is_loaded:

1116

self._load()

1116

self._load()

1117

return iter(self.entries | self.addls)

1117

return iter(self.entries | self.addls)

1118

1119

1120

class _fncachevfs(vfsmod.proxyvfs):

1120

class _fncachevfs(vfsmod.proxyvfs):

1121

def __init__(self, vfs, fnc, encode):

1121

def __init__(self, vfs, fnc, encode):

1122

vfsmod.proxyvfs.__init__(self, vfs)

1122

vfsmod.proxyvfs.__init__(self, vfs)

1123

self.fncache = fnc

1123

self.fncache: fncache = fnc

1124

self.encode = encode

1124

self.encode = encode

1125

1126

def __call__(self, path, mode=b'r', *args, **kw):

1126

def __call__(self, path, mode=b'r', *args, **kw):

1127

encoded = self.encode(path)

1127

encoded = self.encode(path)

1128

if (

1128

if (

1129

mode not in (b'r', b'rb')

1129

mode not in (b'r', b'rb')

1130

and (path.startswith(b'data/') or path.startswith(b'meta/'))

1130

and (path.startswith(b'data/') or path.startswith(b'meta/'))

1131

and is_revlog_file(path)

1131

and is_revlog_file(path)

1132

):

1132

):

1133

# do not trigger a fncache load when adding a file that already is

1133

# do not trigger a fncache load when adding a file that already is

1134

# known to exist.

1134

# known to exist.

1135

notload = not self.fncache.is_loaded and (

1135

notload = not self.fncache.is_loaded and (

1136

# if the file has size zero, it should be considered as missing.

1136

# if the file has size zero, it should be considered as missing.

1137

# Such zero-size files are the result of truncation when a

1137

# Such zero-size files are the result of truncation when a

1138

# transaction is aborted.

1138

# transaction is aborted.

1139

self.vfs.exists(encoded)

1139

self.vfs.exists(encoded)

1140

and self.vfs.stat(encoded).st_size

1140

and self.vfs.stat(encoded).st_size

1141

)

1141

)

1142

if not notload:

1142

if not notload:

1143

self.fncache.add(path)

1143

self.fncache.add(path)

1144

return self.vfs(encoded, mode, *args, **kw)

1144

return self.vfs(encoded, mode, *args, **kw)

1145

1146

def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:

1146

def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:

1147

insidef = (self.encode(f) for f in insidef)

1147

insidef = (self.encode(f) for f in insidef)

1148

1149

if path:

1149

if path:

1150

return self.vfs.join(self.encode(path), *insidef)

1150

return self.vfs.join(self.encode(path), *insidef)

1151

else:

1151

else:

1152

return self.vfs.join(path, *insidef)

1152

return self.vfs.join(path, *insidef)

1153

1154

def register_file(self, path):

1154

def register_file(self, path):

1155

"""generic hook point to lets fncache steer its stew"""

1155

"""generic hook point to lets fncache steer its stew"""

1156

if path.startswith(b'data/') or path.startswith(b'meta/'):

1156

if path.startswith(b'data/') or path.startswith(b'meta/'):

1157

self.fncache.add(path)

1157

self.fncache.add(path)

1158

1159

1160

class fncachestore(basicstore):

1160

class fncachestore(basicstore):

1161

def __init__(self, path, vfstype, dotencode):

1161

def __init__(self, path, vfstype, dotencode):

1162

if dotencode:

1162

if dotencode:

1163

encode = _pathencode

1163

encode = _pathencode

1164

else:

1164

else:

1165

encode = _plainhybridencode

1165

encode = _plainhybridencode

1166

self.encode = encode

1166

self.encode = encode

1167

vfs = vfstype(path + b'/store')

1167

vfs = vfstype(path + b'/store')

1168

self.path = vfs.base

1168

self.path = vfs.base

1169

self.pathsep = self.path + b'/'

1169

self.pathsep = self.path + b'/'

1170

self.createmode = _calcmode(vfs)

1170

self.createmode = _calcmode(vfs)

1171

vfs.createmode = self.createmode

1171

vfs.createmode = self.createmode

1172

self.rawvfs = vfs

1172

self.rawvfs = vfs

1173

fnc = fncache(vfs)

1173

fnc = fncache(vfs)

1174

self.fncache = fnc

1174

self.fncache = fnc

1175

self.vfs = _fncachevfs(vfs, fnc, encode)

1175

self.vfs = _fncachevfs(vfs, fnc, encode)

1176

self.opener = self.vfs

1176

self.opener = self.vfs

1177

1178

def join(self, f):

1178

def join(self, f):

1179

return self.pathsep + self.encode(f)

1179

return self.pathsep + self.encode(f)

1180

1181

def getsize(self, path):

1181

def getsize(self, path):

1182

return self.rawvfs.stat(path).st_size

1182

return self.rawvfs.stat(path).st_size

1183

1184

def data_entries(

1184

def data_entries(

1185

self, matcher=None, undecodable=None

1185

self, matcher=None, undecodable=None

1186

) -> Generator[BaseStoreEntry, None, None]:

1186

) -> Generator[BaseStoreEntry, None, None]:

1187

# Note: all files in fncache should be revlog related, However the

1187

# Note: all files in fncache should be revlog related, However the

1188

# fncache might contains such file added by previous version of

1188

# fncache might contains such file added by previous version of

1189

# Mercurial.

1189

# Mercurial.

1190

files = ((f, None) for f in self.fncache if is_revlog_file(f))

1190

files = ((f, None) for f in self.fncache if is_revlog_file(f))

1191

by_revlog = _gather_revlog(files)

1191

by_revlog = _gather_revlog(files)

1192

for revlog, details in by_revlog:

1192

for revlog, details in by_revlog:

1193

if revlog.startswith(b'data/'):

1193

if revlog.startswith(b'data/'):

1194

rl_type = KIND_FILELOG

1194

rl_type = KIND_FILELOG

1195

revlog_target_id = revlog.split(b'/', 1)[1]

1195

revlog_target_id = revlog.split(b'/', 1)[1]

1196

elif revlog.startswith(b'meta/'):

1196

elif revlog.startswith(b'meta/'):

1197

rl_type = KIND_MANIFESTLOG

1197

rl_type = KIND_MANIFESTLOG

1198

# drop the initial directory and the `00manifest` file part

1198

# drop the initial directory and the `00manifest` file part

1199

tmp = revlog.split(b'/', 1)[1]

1199

tmp = revlog.split(b'/', 1)[1]

1200

revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'

1200

revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'

1201

else:

1201

else:

1202

# unreachable

1202

# unreachable

1203

assert False, revlog

1203

assert False, revlog

1204

entry = RevlogStoreEntry(

1204

entry = RevlogStoreEntry(

1205

path_prefix=revlog,

1205

path_prefix=revlog,

1206

revlog_type=rl_type,

1206

revlog_type=rl_type,

1207

target_id=revlog_target_id,

1207

target_id=revlog_target_id,

1208

details=details,

1208

details=details,

1209

)

1209

)

1210

if _match_tracked_entry(entry, matcher):

1210

if _match_tracked_entry(entry, matcher):

1211

yield entry

1211

yield entry

1212

1213

def copylist(self):

1213

def copylist(self):

1214

d = (

1214

d = (

1215

b'bookmarks',

1215

b'bookmarks',

1216

b'narrowspec',

1216

b'narrowspec',

1217

b'data',

1217

b'data',

1218

b'meta',

1218

b'meta',

1219

b'dh',

1219

b'dh',

1220

b'fncache',

1220

b'fncache',

1221

b'phaseroots',

1221

b'phaseroots',

1222

b'obsstore',

1222

b'obsstore',

1223

b'00manifest.d',

1223

b'00manifest.d',

1224

b'00manifest.i',

1224

b'00manifest.i',

1225

b'00changelog.d',

1225

b'00changelog.d',

1226

b'00changelog.i',

1226

b'00changelog.i',

1227

b'requires',

1227

b'requires',

1228

)

1228

)

1229

return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]

1229

return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]

1230

1231

def write(self, tr):

1231

def write(self, tr):

1232

self.fncache.write(tr)

1232

self.fncache.write(tr)

1233

1234

def invalidatecaches(self):

1234

def invalidatecaches(self):

1235

self.fncache.entries = None

1235

self.fncache.entries = None

1236

self.fncache.addls = set()

1236

self.fncache.addls = set()

1237

1238

def markremoved(self, fn):

1238

def markremoved(self, fn):

1239

self.fncache.remove(fn)

1239

self.fncache.remove(fn)

1240

1241

def _exists(self, f):

1241

def _exists(self, f):

1242

ef = self.encode(f)

1242

ef = self.encode(f)

1243

try:

1243

try:

1244

self.getsize(ef)

1244

self.getsize(ef)

1245

return True

1245

return True

1246

except FileNotFoundError:

1246

except FileNotFoundError:

1247

return False

1247

return False

1248

1249

def __contains__(self, path):

1249

def __contains__(self, path):

1250

'''Checks if the store contains path'''

1250

'''Checks if the store contains path'''

1251

path = b"/".join((b"data", path))

1251

path = b"/".join((b"data", path))

1252

# check for files (exact match)

1252

# check for files (exact match)

1253

e = path + b'.i'

1253

e = path + b'.i'

1254

if e in self.fncache and self._exists(e):

1254

if e in self.fncache and self._exists(e):

1255

return True

1255

return True

1256

# now check for directories (prefix match)

1256

# now check for directories (prefix match)

1257

if not path.endswith(b'/'):

1257

if not path.endswith(b'/'):

1258

path += b'/'

1258

path += b'/'

1259

for e in self.fncache:

1259

for e in self.fncache:

1260

if e.startswith(path) and self._exists(e):

1260

if e.startswith(path) and self._exists(e):

1261

return True

1261

return True

1262

return False

1262

return False

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # store.py - repository store handling for Mercurial)
             #
             # Copyright 2008 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import annotations
             import collections
             import functools
             import os
             import re
             import stat
             import typing
             from typing import (
                 Generator,
                 List,
                 Optional,
             )
             from .i18n import _
             from .thirdparty import attr
             # Force pytype to use the non-vendored package
             if typing.TYPE_CHECKING:
                 # noinspection PyPackageRequirements
                 import attr
             from .node import hex
             from .revlogutils.constants import (
                 INDEX_HEADER,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
             )
             from . import (
                 changelog,
                 error,
                 filelog,
                 manifest,
                 policy,
                 pycompat,
                 revlog as revlogmod,
                 util,
                 vfs as vfsmod,
             )
             from .utils import hashutil
             parsers = policy.importmod('parsers')
             # how much bytes should be read from fncache in one read
             # It is done to prevent loading large fncache files into memory
             fncache_chunksize = 10**6
             def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
                 """parses a fncache entry and returns whether the entry is tracking a path
                 matched by matcher or not.
                 If matcher is None, returns True"""
                 if matcher is None:
                     return True
                 # TODO: make this safe for other entry types.  Currently, the various
                 #  store.data_entry generators only yield  RevlogStoreEntry, so the
                 #  attributes do exist on `entry`.
                 # pytype: disable=attribute-error
                 if entry.is_filelog:
                     return matcher(entry.target_id)
                 elif entry.is_manifestlog:
                     return matcher.visitdir(entry.target_id.rstrip(b'/'))
                 # pytype: enable=attribute-error
                 raise error.ProgrammingError(b"cannot process entry %r" % entry)
             # This avoids a collision between a file named foo and a dir named
             # foo.i or foo.d
             def _encodedir(path):
                 """
                 >>> _encodedir(b'data/foo.i')
                 'data/foo.i'
                 >>> _encodedir(b'data/foo.i/bla.i')
                 'data/foo.i.hg/bla.i'
                 >>> _encodedir(b'data/foo.i.hg/bla.i')
                 'data/foo.i.hg.hg/bla.i'
                 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
                 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
                 """
                 return (
                     path.replace(b".hg/", b".hg.hg/")
                     .replace(b".i/", b".i.hg/")
                     .replace(b".d/", b".d.hg/")
                 )
             encodedir = getattr(parsers, 'encodedir', _encodedir)
             def decodedir(path):
                 """
                 >>> decodedir(b'data/foo.i')
                 'data/foo.i'
                 >>> decodedir(b'data/foo.i.hg/bla.i')
                 'data/foo.i/bla.i'
                 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
                 'data/foo.i.hg/bla.i'
                 """
                 if b".hg/" not in path:
                     return path
                 return (
                     path.replace(b".d.hg/", b".d/")
                     .replace(b".i.hg/", b".i/")
                     .replace(b".hg.hg/", b".hg/")
                 )
             def _reserved():
                 """characters that are problematic for filesystems
                 * ascii escapes (0..31)
                 * ascii hi (126..255)
                 * windows specials
                 these characters will be escaped by encodefunctions
                 """
                 winreserved = [ord(x) for x in u'\\:*?"<>|']
                 for x in range(32):
                     yield x
                 for x in range(126, 256):
                     yield x
                 for x in winreserved:
                     yield x
             def _buildencodefun():
                 """
                 >>> enc, dec = _buildencodefun()
                 >>> enc(b'nothing/special.txt')
                 'nothing/special.txt'
                 >>> dec(b'nothing/special.txt')
                 'nothing/special.txt'
                 >>> enc(b'HELLO')
                 '_h_e_l_l_o'
                 >>> dec(b'_h_e_l_l_o')
                 'HELLO'
                 >>> enc(b'hello:world?')
                 'hello~3aworld~3f'
                 >>> dec(b'hello~3aworld~3f')
                 'hello:world?'
                 >>> enc(b'the\\x07quick\\xADshot')
                 'the~07quick~adshot'
                 >>> dec(b'the~07quick~adshot')
                 'the\\x07quick\\xadshot'
                 """
                 e = b'_'
                 xchr = pycompat.bytechr
                 asciistr = list(map(xchr, range(127)))
                 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
                 cmap = {x: x for x in asciistr}
                 for x in _reserved():
                     cmap[xchr(x)] = b"~%02x" % x
                 for x in capitals + [ord(e)]:
                     cmap[xchr(x)] = e + xchr(x).lower()
                 dmap = {}
                 for k, v in cmap.items():
                     dmap[v] = k
                 def decode(s):
                     i = 0
                     while i < len(s):
                         for l in range(1, 4):
                             try:
                                 yield dmap[s[i : i + l]]
                                 i += l
                                 break
                             except KeyError:
                                 pass
                         else:
                             raise KeyError
                 return (
                     lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
                     lambda s: b''.join(list(decode(s))),
                 )
             _encodefname, _decodefname = _buildencodefun()
             def encodefilename(s):
                 """
                 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
                 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
                 """
                 return _encodefname(encodedir(s))
             def decodefilename(s):
                 """
                 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
                 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
                 """
                 return decodedir(_decodefname(s))
             def _buildlowerencodefun():
                 """
                 >>> f = _buildlowerencodefun()
                 >>> f(b'nothing/special.txt')
                 'nothing/special.txt'
                 >>> f(b'HELLO')
                 'hello'
                 >>> f(b'hello:world?')
                 'hello~3aworld~3f'
                 >>> f(b'the\\x07quick\\xADshot')
                 'the~07quick~adshot'
                 """
                 xchr = pycompat.bytechr
                 cmap = {xchr(x): xchr(x) for x in range(127)}
                 for x in _reserved():
                     cmap[xchr(x)] = b"~%02x" % x
                 for x in range(ord(b"A"), ord(b"Z") + 1):
                     cmap[xchr(x)] = xchr(x).lower()
                 def lowerencode(s):
                     return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
                 return lowerencode
             lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
             # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
             _winres3 = (b'aux', b'con', b'prn', b'nul')  # length 3
             _winres4 = (b'com', b'lpt')  # length 4 (with trailing 1..9)
             def _auxencode(path, dotencode):
                 """
                 Encodes filenames containing names reserved by Windows or which end in
                 period or space. Does not touch other single reserved characters c.
                 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
                 Additionally encodes space or period at the beginning, if dotencode is
                 True. Parameter path is assumed to be all lowercase.
                 A segment only needs encoding if a reserved name appears as a
                 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
                 doesn't need encoding.
                 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
                 >>> _auxencode(s.split(b'/'), True)
                 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
                 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
                 >>> _auxencode(s.split(b'/'), False)
                 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
                 >>> _auxencode([b'foo. '], True)
                 ['foo.~20']
                 >>> _auxencode([b' .foo'], True)
                 ['~20.foo']
                 """
                 for i, n in enumerate(path):
                     if not n:
                         continue
                     if dotencode and n[0] in b'. ':
                         n = b"~%02x" % ord(n[0:1]) + n[1:]
                         path[i] = n
                     else:
                         l = n.find(b'.')
                         if l == -1:
                             l = len(n)
                         if (l == 3 and n[:3] in _winres3) or (
                             l == 4
                             and n[3:4] <= b'9'
                             and n[3:4] >= b'1'
                             and n[:3] in _winres4
                         ):
                             # encode third letter ('aux' -> 'au~78')
                             ec = b"~%02x" % ord(n[2:3])
                             n = n[0:2] + ec + n[3:]
                             path[i] = n
                     if n[-1] in b'. ':
                         # encode last period or space ('foo...' -> 'foo..~2e')
                         path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
                 return path
             _maxstorepathlen = 120
             _dirprefixlen = 8
             _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
             def _hashencode(path, dotencode):
                 digest = hex(hashutil.sha1(path).digest())
                 le = lowerencode(path[5:]).split(b'/')  # skips prefix 'data/' or 'meta/'
                 parts = _auxencode(le, dotencode)
                 basename = parts[-1]
                 _root, ext = os.path.splitext(basename)
                 sdirs = []
                 sdirslen = 0
                 for p in parts[:-1]:
                     d = p[:_dirprefixlen]
                     if d[-1] in b'. ':
                         # Windows can't access dirs ending in period or space
                         d = d[:-1] + b'_'
                     if sdirslen == 0:
                         t = len(d)
                     else:
                         t = sdirslen + 1 + len(d)
                         if t > _maxshortdirslen:
                             break
                     sdirs.append(d)
                     sdirslen = t
                 dirs = b'/'.join(sdirs)
                 if len(dirs) > 0:
                     dirs += b'/'
                 res = b'dh/' + dirs + digest + ext
                 spaceleft = _maxstorepathlen - len(res)
                 if spaceleft > 0:
                     filler = basename[:spaceleft]
                     res = b'dh/' + dirs + filler + digest + ext
                 return res
             def _hybridencode(path, dotencode):
                 """encodes path with a length limit
                 Encodes all paths that begin with 'data/', according to the following.
                 Default encoding (reversible):
                 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
                 characters are encoded as '~xx', where xx is the two digit hex code
                 of the character (see encodefilename).
                 Relevant path components consisting of Windows reserved filenames are
                 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
                 Hashed encoding (not reversible):
                 If the default-encoded path is longer than _maxstorepathlen, a
                 non-reversible hybrid hashing of the path is done instead.
                 This encoding uses up to _dirprefixlen characters of all directory
                 levels of the lowerencoded path, but not more levels than can fit into
                 _maxshortdirslen.
                 Then follows the filler followed by the sha digest of the full path.
                 The filler is the beginning of the basename of the lowerencoded path
                 (the basename is everything after the last path separator). The filler
                 is as long as possible, filling in characters from the basename until
                 the encoded path has _maxstorepathlen characters (or all chars of the
                 basename have been taken).
                 The extension (e.g. '.i' or '.d') is preserved.
                 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
                 encoding was used.
                 """
                 path = encodedir(path)
                 ef = _encodefname(path).split(b'/')
                 res = b'/'.join(_auxencode(ef, dotencode))
                 if len(res) > _maxstorepathlen:
                     res = _hashencode(path, dotencode)
                 return res
             def _pathencode(path):
                 de = encodedir(path)
                 if len(path) > _maxstorepathlen:
                     return _hashencode(de, True)
                 ef = _encodefname(de).split(b'/')
                 res = b'/'.join(_auxencode(ef, True))
                 if len(res) > _maxstorepathlen:
                     return _hashencode(de, True)
                 return res
             _pathencode = getattr(parsers, 'pathencode', _pathencode)
             def _plainhybridencode(f):
                 return _hybridencode(f, False)
             def _calcmode(vfs):
                 try:
                     # files in .hg/ will be created using this mode
                     mode = vfs.stat().st_mode
                     # avoid some useless chmods
                     if (0o777 & ~util.umask) == (0o777 & mode):
                         mode = None
                 except OSError:
                     mode = None
                 return mode
             _data = [
                 b'bookmarks',
                 b'narrowspec',
                 b'data',
                 b'meta',
                 b'00manifest.d',
                 b'00manifest.i',
                 b'00changelog.d',
                 b'00changelog.i',
                 b'phaseroots',
                 b'obsstore',
                 b'requires',
             ]
             REVLOG_FILES_EXT = (
                 b'.i',
                 b'.idx',
                 b'.d',
                 b'.dat',
                 b'.n',
                 b'.nd',
                 b'.sda',
             )
             # file extension that also use a `-SOMELONGIDHASH.ext` form
             REVLOG_FILES_LONG_EXT = (
                 b'.nd',
                 b'.idx',
                 b'.dat',
                 b'.sda',
             )
             # files that are "volatile" and might change between listing and streaming
             #
             # note: the ".nd" file are nodemap data and won't "change" but they might be
             # deleted.
             REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
             # some exception to the above matching
             #
             # XXX This is currently not in use because of issue6542
             EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
             def is_revlog(f, kind, st):
                 if kind != stat.S_IFREG:
                     return False
                 if f.endswith(REVLOG_FILES_EXT):
                     return True
                 return False
             def is_revlog_file(f):
                 if f.endswith(REVLOG_FILES_EXT):
                     return True
                 return False
             @attr.s(slots=True)
             class StoreFile:
                 """a file matching a store entry"""
                 unencoded_path = attr.ib()
                 _file_size = attr.ib(default=None)
                 is_volatile = attr.ib(default=False)
                 def file_size(self, vfs):
                     if self._file_size is None:
                         if vfs is None:
                             msg = b"calling vfs-less file_size without prior call: %s"
                             msg %= self.unencoded_path
                             raise error.ProgrammingError(msg)
                         try:
                             self._file_size = vfs.stat(self.unencoded_path).st_size
                         except FileNotFoundError:
                             self._file_size = 0
                     return self._file_size
                 @property
                 def has_size(self):
                     return self._file_size is not None
                 def get_stream(self, vfs, volatiles):
                     """return data "stream" information for this file
                     (unencoded_file_path, content_iterator, content_size)
                     """
                     size = self.file_size(None)
                     def get_stream():
                         path = vfs.join(self.unencoded_path)
                         with volatiles.open(path) as fp:
                             yield None  # ready to stream
                             if size <= 65536:
                                 yield fp.read(size)
                             else:
                                 yield from util.filechunkiter(fp, limit=size)
                     s = get_stream()
                     next(s)
                     return (self.unencoded_path, s, size)
             @attr.s(slots=True, init=False)
             class BaseStoreEntry:
                 """An entry in the store
                 This is returned by `store.walk` and represent some data in the store."""
                 maybe_volatile = True
                 def files(self) -> List[StoreFile]:
                     raise NotImplementedError
                 def get_streams(
                     self,
                     repo=None,
                     vfs=None,
                     volatiles=None,
                     max_changeset=None,
                     preserve_file_count=False,
                 ):
                     """return a list of data stream associated to files for this entry
                     return [(unencoded_file_path, content_iterator, content_size), …]
                     """
                     assert vfs is not None
                     return [f.get_stream(vfs, volatiles) for f in self.files()]
             @attr.s(slots=True, init=False)
             class SimpleStoreEntry(BaseStoreEntry):
                 """A generic entry in the store"""
                 is_revlog = False
                 maybe_volatile = attr.ib()
                 _entry_path = attr.ib()
                 _is_volatile = attr.ib(default=False)
                 _file_size = attr.ib(default=None)
                 _files = attr.ib(default=None)
                 def __init__(
                     self,
                     entry_path,
                     is_volatile=False,
                     file_size=None,
                 ):
                     super().__init__()
                     self._entry_path = entry_path
                     self._is_volatile = is_volatile
                     self._file_size = file_size
                     self._files = None
                     self.maybe_volatile = is_volatile
                 def files(self) -> List[StoreFile]:
                     if self._files is None:
                         self._files = [
                             StoreFile(
                                 unencoded_path=self._entry_path,
                                 file_size=self._file_size,
                                 is_volatile=self._is_volatile,
                             )
                         ]
                     return self._files
             @attr.s(slots=True, init=False)
             class RevlogStoreEntry(BaseStoreEntry):
                 """A revlog entry in the store"""
                 is_revlog = True
                 revlog_type = attr.ib(default=None)
                 target_id = attr.ib(default=None)
                 maybe_volatile = attr.ib(default=True)
                 _path_prefix = attr.ib(default=None)
                 _details = attr.ib(default=None)
                 _files = attr.ib(default=None)
                 def __init__(
                     self,
                     revlog_type,
                     path_prefix,
                     target_id,
                     details,
                 ):
                     super().__init__()
                     self.revlog_type = revlog_type
                     self.target_id = target_id
                     self._path_prefix = path_prefix
                     assert b'.i' in details, (path_prefix, details)
                     for ext in details:
                         if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
                             self.maybe_volatile = True
                             break
                     else:
                         self.maybe_volatile = False
                     self._details = details
                     self._files = None
                 @property
                 def is_changelog(self):
                     return self.revlog_type == KIND_CHANGELOG
                 @property
                 def is_manifestlog(self):
                     return self.revlog_type == KIND_MANIFESTLOG
                 @property
                 def is_filelog(self):
                     return self.revlog_type == KIND_FILELOG
                 def main_file_path(self):
                     """unencoded path of the main revlog file"""
                     return self._path_prefix + b'.i'
                 def files(self) -> List[StoreFile]:
                     if self._files is None:
                         self._files = []
                         for ext in sorted(self._details, key=_ext_key):
                             path = self._path_prefix + ext
                             file_size = self._details[ext]
                             # files that are "volatile" and might change between
                             # listing and streaming
                             #
                             # note: the ".nd" file are nodemap data and won't "change"
                             # but they might be deleted.
                             volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
                             f = StoreFile(path, file_size, volatile)
                             self._files.append(f)
                     return self._files
                 def get_streams(
                     self,
                     repo=None,
                     vfs=None,
                     volatiles=None,
                     max_changeset=None,
                     preserve_file_count=False,
                 ):
                     pre_sized = all(f.has_size for f in self.files())
                     if pre_sized and (
                         repo is None
                         or max_changeset is None
                         # This use revlog-v2, ignore for now
                         or any(k.endswith(b'.idx') for k in self._details.keys())
                         # This is not inline, no race expected
                         or b'.d' in self._details
                     ):
                         return super().get_streams(
                             repo=repo,
                             vfs=vfs,
                             volatiles=volatiles,
                             max_changeset=max_changeset,
                             preserve_file_count=preserve_file_count,
                         )
                     elif not preserve_file_count:
                         stream = [
                             f.get_stream(vfs, volatiles)
                             for f in self.files()
                             if not f.unencoded_path.endswith((b'.i', b'.d'))
                         ]
                         rl = self.get_revlog_instance(repo).get_revlog()
                         rl_stream = rl.get_streams(max_changeset)
                         stream.extend(rl_stream)
                         return stream
                     name_to_size = {}
                     for f in self.files():
                         name_to_size[f.unencoded_path] = f.file_size(None)
                     stream = [
                         f.get_stream(vfs, volatiles)
                         for f in self.files()
                         if not f.unencoded_path.endswith(b'.i')
                     ]
                     index_path = self._path_prefix + b'.i'
                     index_file = None
                     try:
                         index_file = vfs(index_path)
                         header = index_file.read(INDEX_HEADER.size)
                         if revlogmod.revlog.is_inline_index(header):
                             size = name_to_size[index_path]
                             # no split underneath, just return the stream
                             def get_stream():
                                 fp = index_file
                                 try:
                                     fp.seek(0)
                                     yield None
                                     if size <= 65536:
                                         yield fp.read(size)
                                     else:
                                         yield from util.filechunkiter(fp, limit=size)
                                 finally:
                                     fp.close()
                             s = get_stream()
                             next(s)
                             index_file = None
                             stream.append((index_path, s, size))
                         else:
                             rl = self.get_revlog_instance(repo).get_revlog()
                             rl_stream = rl.get_streams(max_changeset, force_inline=True)
                             for name, s, size in rl_stream:
                                 if name_to_size.get(name, 0) != size:
                                     msg = _(b"expected %d bytes but %d provided for %s")
                                     msg %= name_to_size.get(name, 0), size, name
                                     raise error.Abort(msg)
                             stream.extend(rl_stream)
                     finally:
                         if index_file is not None:
                             index_file.close()
                     files = self.files()
                     assert len(stream) == len(files), (
                         stream,
                         files,
                         self._path_prefix,
                         self.target_id,
                     )
                     return stream
                 def get_revlog_instance(self, repo):
                     """Obtain a revlog instance from this store entry
                     An instance of the appropriate class is returned.
                     """
                     if self.is_changelog:
                         return changelog.changelog(repo.svfs)
                     elif self.is_manifestlog:
                         mandir = self.target_id
                         return manifest.manifestrevlog(
                             repo.nodeconstants, repo.svfs, tree=mandir
                         )
                     else:
                         return filelog.filelog(repo.svfs, self.target_id)
             def _gather_revlog(files_data):
                 """group files per revlog prefix
                 The returns a two level nested dict. The top level key is the revlog prefix
                 without extension, the second level is all the file "suffix" that were
                 seen for this revlog and arbitrary file data as value.
                 """
                 revlogs = collections.defaultdict(dict)
                 for u, value in files_data:
                     name, ext = _split_revlog_ext(u)
                     revlogs[name][ext] = value
                 return sorted(revlogs.items())
             def _split_revlog_ext(filename):
                 """split the revlog file prefix from the variable extension"""
                 if filename.endswith(REVLOG_FILES_LONG_EXT):
                     char = b'-'
                 else:
                     char = b'.'
                 idx = filename.rfind(char)
                 return filename[:idx], filename[idx:]
             def _ext_key(ext):
                 """a key to order revlog suffix
                 important to issue .i after other entry."""
                 # the only important part of this order is to keep the `.i` last.
                 if ext.endswith(b'.n'):
                     return (0, ext)
                 elif ext.endswith(b'.nd'):
                     return (10, ext)
                 elif ext.endswith(b'.d'):
                     return (20, ext)
                 elif ext.endswith(b'.i'):
                     return (50, ext)
                 else:
                     return (40, ext)
             class basicstore:
                 '''base class for local repository stores'''
                 def __init__(self, path, vfstype):
                     vfs = vfstype(path)
                     self.path = vfs.base
                     self.createmode = _calcmode(vfs)
                     vfs.createmode = self.createmode
                     self.rawvfs = vfs
                     self.vfs = vfsmod.filtervfs(vfs, encodedir)
                     self.opener = self.vfs
                 def join(self, f):
                     return self.path + b'/' + encodedir(f)
                 def _walk(self, relpath, recurse, undecodable=None):
                     '''yields (revlog_type, unencoded, size)'''
                     path = self.path
                     if relpath:
                         path += b'/' + relpath
                     striplen = len(self.path) + 1
                     l = []
                     if self.rawvfs.isdir(path):
                         visit = [path]
                         readdir = self.rawvfs.readdir
                         while visit:
                             p = visit.pop()
                             for f, kind, st in readdir(p, stat=True):
                                 fp = p + b'/' + f
                                 if is_revlog(f, kind, st):
                                     n = util.pconvert(fp[striplen:])
                                     l.append((decodedir(n), st.st_size))
                                 elif kind == stat.S_IFDIR and recurse:
                                     visit.append(fp)
                     l.sort()
                     return l
                 def changelog(self, trypending, concurrencychecker=None):
                     return changelog.changelog(
                         self.vfs,
                         trypending=trypending,
                         concurrencychecker=concurrencychecker,
                     )
                 def manifestlog(self, repo, storenarrowmatch) -> manifest.manifestlog:
                     rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
                     return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
                 def data_entries(
                     self, matcher=None, undecodable=None
                 ) -> Generator[BaseStoreEntry, None, None]:
                     """Like walk, but excluding the changelog and root manifest.
                     When [undecodable] is None, revlogs names that can't be
                     decoded cause an exception. When it is provided, it should
                     be a list and the filenames that can't be decoded are added
                     to it instead. This is very rarely needed."""
                     dirs = [
                         (b'data', KIND_FILELOG, False),
                         (b'meta', KIND_MANIFESTLOG, True),
                     ]
                     for base_dir, rl_type, strip_filename in dirs:
                         files = self._walk(base_dir, True, undecodable=undecodable)
                         for revlog, details in _gather_revlog(files):
                             revlog_target_id = revlog.split(b'/', 1)[1]
                             if strip_filename and b'/' in revlog:
                                 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
                                 revlog_target_id += b'/'
                             yield RevlogStoreEntry(
                                 path_prefix=revlog,
                                 revlog_type=rl_type,
                                 target_id=revlog_target_id,
                                 details=details,
                             )
                 def top_entries(
                     self, phase=False, obsolescence=False
                 ) -> Generator[BaseStoreEntry, None, None]:
                     if phase and self.vfs.exists(b'phaseroots'):
                         yield SimpleStoreEntry(
                             entry_path=b'phaseroots',
                             is_volatile=True,
                         )
                     if obsolescence and self.vfs.exists(b'obsstore'):
                         # XXX if we had the file size it could be non-volatile
                         yield SimpleStoreEntry(
                             entry_path=b'obsstore',
                             is_volatile=True,
                         )
                     files = reversed(self._walk(b'', False))
                     changelogs = collections.defaultdict(dict)
                     manifestlogs = collections.defaultdict(dict)
                     for u, s in files:
                         if u.startswith(b'00changelog'):
                             name, ext = _split_revlog_ext(u)
                             changelogs[name][ext] = s
                         elif u.startswith(b'00manifest'):
                             name, ext = _split_revlog_ext(u)
                             manifestlogs[name][ext] = s
                         else:
                             yield SimpleStoreEntry(
                                 entry_path=u,
                                 is_volatile=False,
                                 file_size=s,
                             )
                     # yield manifest before changelog
                     top_rl = [
                         (manifestlogs, KIND_MANIFESTLOG),
                         (changelogs, KIND_CHANGELOG),
                     ]
                     assert len(manifestlogs) <= 1
                     assert len(changelogs) <= 1
                     for data, revlog_type in top_rl:
                         for revlog, details in sorted(data.items()):
                             yield RevlogStoreEntry(
                                 path_prefix=revlog,
                                 revlog_type=revlog_type,
                                 target_id=b'',
                                 details=details,
                             )
                 def walk(
                     self, matcher=None, phase=False, obsolescence=False
                 ) -> Generator[BaseStoreEntry, None, None]:
                     """return files related to data storage (ie: revlogs)
                     yields instance from BaseStoreEntry subclasses
                     if a matcher is passed, storage files of only those tracked paths
                     are passed with matches the matcher
                     """
                     # yield data files first
                     for x in self.data_entries(matcher):
                         yield x
                     for x in self.top_entries(phase=phase, obsolescence=obsolescence):
                         yield x
                 def copylist(self):
                     return _data
                 def write(self, tr):
                     pass
                 def invalidatecaches(self):
                     pass
                 def markremoved(self, fn):
                     pass
                 def __contains__(self, path):
                     '''Checks if the store contains path'''
                     path = b"/".join((b"data", path))
                     # file?
                     if self.vfs.exists(path + b".i"):
                         return True
                     # dir?
                     if not path.endswith(b"/"):
                         path = path + b"/"
                     return self.vfs.exists(path)
             class encodedstore(basicstore):
                 def __init__(self, path, vfstype):
                     vfs = vfstype(path + b'/store')
                     self.path = vfs.base
                     self.createmode = _calcmode(vfs)
                     vfs.createmode = self.createmode
                     self.rawvfs = vfs
                     self.vfs = vfsmod.filtervfs(vfs, encodefilename)
                     self.opener = self.vfs
                 def _walk(self, relpath, recurse, undecodable=None):
                     old = super()._walk(relpath, recurse)
                     new = []
                     for f1, value in old:
                         try:
                             f2 = decodefilename(f1)
                         except KeyError:
                             if undecodable is None:
                                 msg = _(b'undecodable revlog name %s') % f1
                                 raise error.StorageError(msg)
                             else:
                                 undecodable.append(f1)
                                 continue
                         new.append((f2, value))
                     return new
                 def data_entries(
                     self, matcher=None, undecodable=None
                 ) -> Generator[BaseStoreEntry, None, None]:
                     entries = super(encodedstore, self).data_entries(
                         undecodable=undecodable
                     )
                     for entry in entries:
                         if _match_tracked_entry(entry, matcher):
                             yield entry
                 def join(self, f):
                     return self.path + b'/' + encodefilename(f)
                 def copylist(self):
                     return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
             class fncache:
                 # the filename used to be partially encoded
                 # hence the encodedir/decodedir dance
                 def __init__(self, vfs):
                     self.vfs = vfs
                     self._ignores = set()
                     self.entries = None
                     self._dirty = False
                     # set of new additions to fncache
                     self.addls = set()
                 @property
                 def is_loaded(self):
                     return self.entries is not None
                 def ensureloaded(self, warn=None):
                     """read the fncache file if not already read.
                     If the file on disk is corrupted, raise. If warn is provided,
                     warn and keep going instead."""
                     if not self.is_loaded:
                         self._load(warn)
                 def _load(self, warn=None):
                     '''fill the entries from the fncache file'''
                     self._dirty = False
                     try:
                         fp = self.vfs(b'fncache', mode=b'rb')
                     except IOError:
                         # skip nonexistent file
                         self.entries = set()
                         return
                     self.entries = set()
                     chunk = b''
                     for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
                         chunk += c
                         try:
                             p = chunk.rindex(b'\n')
                             self.entries.update(decodedir(chunk[: p + 1]).splitlines())
                             chunk = chunk[p + 1 :]
                         except ValueError:
                             # substring '\n' not found, maybe the entry is bigger than the
                             # chunksize, so let's keep iterating
                             pass
                     if chunk:
                         msg = _(b"fncache does not ends with a newline")
                         if warn:
                             warn(msg + b'\n')
                         else:
                             raise error.Abort(
                                 msg,
                                 hint=_(
                                     b"use 'hg debugrebuildfncache' to "
                                     b"rebuild the fncache"
                                 ),
                             )
                     self._checkentries(fp, warn)
                     fp.close()
                 def _checkentries(self, fp, warn):
                     """make sure there is no empty string in entries"""
                     if b'' in self.entries:
                         fp.seek(0)
                         for n, line in enumerate(fp):
                             if not line.rstrip(b'\n'):
                                 t = _(b'invalid entry in fncache, line %d') % (n + 1)
                                 if warn:
                                     warn(t + b'\n')
                                 else:
                                     raise error.Abort(t)
                 def write(self, tr):
                     if self._dirty:
                         assert self.is_loaded
                         self.entries = self.entries | self.addls
                         self.addls = set()
                         tr.addbackup(b'fncache')
                         fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
                         if self.entries:
                             fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
                         fp.close()
                         self._dirty = False
                     if self.addls:
                         # if we have just new entries, let's append them to the fncache
                         tr.addbackup(b'fncache')
                         fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
                         if self.addls:
                             fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
                         fp.close()
                         self.entries = None
                         self.addls = set()
                 def addignore(self, fn):
                     self._ignores.add(fn)
                 def add(self, fn):
                     if fn in self._ignores:
                         return
                     if not self.is_loaded:
                         self._load()
                     if fn not in self.entries:
                         self.addls.add(fn)
                 def remove(self, fn):
                     if not self.is_loaded:
                         self._load()
                     if fn in self.addls:
                         self.addls.remove(fn)
                         return
                     try:
                         self.entries.remove(fn)
                         self._dirty = True
                     except KeyError:
                         pass
                 def __contains__(self, fn):
                     if fn in self.addls:
                         return True
                     if not self.is_loaded:
                         self._load()
                     return fn in self.entries
                 def __iter__(self):
                     if not self.is_loaded:
                         self._load()
                     return iter(self.entries | self.addls)
             class _fncachevfs(vfsmod.proxyvfs):
                 def __init__(self, vfs, fnc, encode):
                     vfsmod.proxyvfs.__init__(self, vfs)
-                    self.fncache = fnc
+                    self.fncache: fncache = fnc
                     self.encode = encode
                 def __call__(self, path, mode=b'r', *args, **kw):
                     encoded = self.encode(path)
                     if (
                         mode not in (b'r', b'rb')
                         and (path.startswith(b'data/') or path.startswith(b'meta/'))
                         and is_revlog_file(path)
                     ):
                         # do not trigger a fncache load when adding a file that already is
                         # known to exist.
                         notload = not self.fncache.is_loaded and (
                             # if the file has size zero, it should be considered as missing.
                             # Such zero-size files are the result of truncation when a
                             # transaction is aborted.
                             self.vfs.exists(encoded)
                             and self.vfs.stat(encoded).st_size
                         )
                         if not notload:
                             self.fncache.add(path)
                     return self.vfs(encoded, mode, *args, **kw)
                 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
                     insidef = (self.encode(f) for f in insidef)
                     if path:
                         return self.vfs.join(self.encode(path), *insidef)
                     else:
                         return self.vfs.join(path, *insidef)
                 def register_file(self, path):
                     """generic hook point to lets fncache steer its stew"""
                     if path.startswith(b'data/') or path.startswith(b'meta/'):
                         self.fncache.add(path)
             class fncachestore(basicstore):
                 def __init__(self, path, vfstype, dotencode):
                     if dotencode:
                         encode = _pathencode
                     else:
                         encode = _plainhybridencode
                     self.encode = encode
                     vfs = vfstype(path + b'/store')
                     self.path = vfs.base
                     self.pathsep = self.path + b'/'
                     self.createmode = _calcmode(vfs)
                     vfs.createmode = self.createmode
                     self.rawvfs = vfs
                     fnc = fncache(vfs)
                     self.fncache = fnc
                     self.vfs = _fncachevfs(vfs, fnc, encode)
                     self.opener = self.vfs
                 def join(self, f):
                     return self.pathsep + self.encode(f)
                 def getsize(self, path):
                     return self.rawvfs.stat(path).st_size
                 def data_entries(
                     self, matcher=None, undecodable=None
                 ) -> Generator[BaseStoreEntry, None, None]:
                     # Note: all files in fncache should be revlog related, However the
                     # fncache might contains such file added by previous version of
                     # Mercurial.
                     files = ((f, None) for f in self.fncache if is_revlog_file(f))
                     by_revlog = _gather_revlog(files)
                     for revlog, details in by_revlog:
                         if revlog.startswith(b'data/'):
                             rl_type = KIND_FILELOG
                             revlog_target_id = revlog.split(b'/', 1)[1]
                         elif revlog.startswith(b'meta/'):
                             rl_type = KIND_MANIFESTLOG
                             # drop the initial directory and the `00manifest` file part
                             tmp = revlog.split(b'/', 1)[1]
                             revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
                         else:
                             # unreachable
                             assert False, revlog
                         entry = RevlogStoreEntry(
                             path_prefix=revlog,
                             revlog_type=rl_type,
                             target_id=revlog_target_id,
                             details=details,
                         )
                         if _match_tracked_entry(entry, matcher):
                             yield entry
                 def copylist(self):
                     d = (
                         b'bookmarks',
                         b'narrowspec',
                         b'data',
                         b'meta',
                         b'dh',
                         b'fncache',
                         b'phaseroots',
                         b'obsstore',
                         b'00manifest.d',
                         b'00manifest.i',
                         b'00changelog.d',
                         b'00changelog.i',
                         b'requires',
                     )
                     return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
                 def write(self, tr):
                     self.fncache.write(tr)
                 def invalidatecaches(self):
                     self.fncache.entries = None
                     self.fncache.addls = set()
                 def markremoved(self, fn):
                     self.fncache.remove(fn)
                 def _exists(self, f):
                     ef = self.encode(f)
                     try:
                         self.getsize(ef)
                         return True
                     except FileNotFoundError:
                         return False
                 def __contains__(self, path):
                     '''Checks if the store contains path'''
                     path = b"/".join((b"data", path))
                     # check for files (exact match)
                     e = path + b'.i'
                     if e in self.fncache and self._exists(e):
                         return True
                     # now check for directories (prefix match)
                     if not path.endswith(b'/'):
                         path += b'/'
                     for e in self.fncache:
                         if e.startswith(path) and self._exists(e):
                             return True
                     return False