upstream/mercurial-mirror Commit - r52671:7e5ea2a0

1

import os

1

import os

2

import time

2

import time

3

4

from mercurial.i18n import _

4

from mercurial.i18n import _

5

from mercurial.node import short

5

from mercurial.node import short

6

from mercurial import (

6

from mercurial import (

7

encoding,

7

encoding,

8

error,

8

error,

9

lock as lockmod,

9

lock as lockmod,

10

mdiff,

10

mdiff,

11

policy,

11

policy,

12

scmutil,

12

scmutil,

13

util,

13

util,

14

vfs,

14

vfs,

15

)

15

)

16

from mercurial.utils import procutil

16

from mercurial.utils import procutil

17

from . import (

17

from . import (

18

constants,

18

constants,

19

contentstore,

19

contentstore,

20

datapack,

20

datapack,

21

historypack,

21

historypack,

22

metadatastore,

22

metadatastore,

23

shallowutil,

23

shallowutil,

24

)

24

)

25

26

osutil = policy.importmod('osutil')

26

osutil = policy.importmod('osutil')

27

28

29

class RepackAlreadyRunning(error.Abort):

29

class RepackAlreadyRunning(error.Abort):

30

pass

30

pass

31

32

33

def backgroundrepack(repo, incremental=True, packsonly=False):

33

def backgroundrepack(repo, incremental=True, packsonly=False):

34

cmd = [procutil.hgexecutable(), b'-R', repo.origroot, b'repack']

34

cmd = [procutil.hgexecutable(), b'-R', repo.origroot, b'repack']

35

msg = _(b"(running background repack)\n")

35

msg = _(b"(running background repack)\n")

36

if incremental:

36

if incremental:

37

cmd.append(b'--incremental')

37

cmd.append(b'--incremental')

38

msg = _(b"(running background incremental repack)\n")

38

msg = _(b"(running background incremental repack)\n")

39

if packsonly:

39

if packsonly:

40

cmd.append(b'--packsonly')

40

cmd.append(b'--packsonly')

41

repo.ui.warn(msg)

41

repo.ui.warn(msg)

42

# We know this command will find a binary, so don't block on it starting.

42

# We know this command will find a binary, so don't block on it starting.

43

kwargs = {}

43

kwargs = {}

44

if repo.ui.configbool(b'devel', b'remotefilelog.bg-wait'):

44

if repo.ui.configbool(b'devel', b'remotefilelog.bg-wait'):

45

kwargs['record_wait'] = repo.ui.atexit

45

kwargs['record_wait'] = repo.ui.atexit

46

47

procutil.runbgcommand(cmd, encoding.environ, ensurestart=False, **kwargs)

47

procutil.runbgcommand(cmd, encoding.environ, ensurestart=False, **kwargs)

48

49

50

def fullrepack(repo, options=None):

50

def fullrepack(repo, options=None):

51

"""If ``packsonly`` is True, stores creating only loose objects are skipped."""

51

"""If ``packsonly`` is True, stores creating only loose objects are skipped."""

52

if hasattr(repo, 'shareddatastores'):

52

if hasattr(repo, 'shareddatastores'):

53

datasource = contentstore.unioncontentstore(*repo.shareddatastores)

53

datasource = contentstore.unioncontentstore(*repo.shareddatastores)

54

historysource = metadatastore.unionmetadatastore(

54

historysource = metadatastore.unionmetadatastore(

55

*repo.sharedhistorystores, allowincomplete=True

55

*repo.sharedhistorystores, allowincomplete=True

56

)

56

)

57

58

packpath = shallowutil.getcachepackpath(

58

packpath = shallowutil.getcachepackpath(

59

repo, constants.FILEPACK_CATEGORY

59

repo, constants.FILEPACK_CATEGORY

60

)

60

)

61

_runrepack(

61

_runrepack(

62

repo,

62

repo,

63

datasource,

63

datasource,

64

historysource,

64

historysource,

65

packpath,

65

packpath,

66

constants.FILEPACK_CATEGORY,

66

constants.FILEPACK_CATEGORY,

67

options=options,

67

options=options,

68

)

68

)

69

70

if hasattr(repo.manifestlog, 'datastore'):

70

if hasattr(repo.manifestlog, 'datastore'):

71

localdata, shareddata = _getmanifeststores(repo)

71

localdata, shareddata = _getmanifeststores(repo)

72

lpackpath, ldstores, lhstores = localdata

72

lpackpath, ldstores, lhstores = localdata

73

spackpath, sdstores, shstores = shareddata

73

spackpath, sdstores, shstores = shareddata

74

75

# Repack the shared manifest store

75

# Repack the shared manifest store

76

datasource = contentstore.unioncontentstore(*sdstores)

76

datasource = contentstore.unioncontentstore(*sdstores)

77

historysource = metadatastore.unionmetadatastore(

77

historysource = metadatastore.unionmetadatastore(

78

*shstores, allowincomplete=True

78

*shstores, allowincomplete=True

79

)

79

)

80

_runrepack(

80

_runrepack(

81

repo,

81

repo,

82

datasource,

82

datasource,

83

historysource,

83

historysource,

84

spackpath,

84

spackpath,

85

constants.TREEPACK_CATEGORY,

85

constants.TREEPACK_CATEGORY,

86

options=options,

86

options=options,

87

)

87

)

88

89

# Repack the local manifest store

89

# Repack the local manifest store

90

datasource = contentstore.unioncontentstore(

90

datasource = contentstore.unioncontentstore(

91

*ldstores, allowincomplete=True

91

*ldstores, allowincomplete=True

92

)

92

)

93

historysource = metadatastore.unionmetadatastore(

93

historysource = metadatastore.unionmetadatastore(

94

*lhstores, allowincomplete=True

94

*lhstores, allowincomplete=True

95

)

95

)

96

_runrepack(

96

_runrepack(

97

repo,

97

repo,

98

datasource,

98

datasource,

99

historysource,

99

historysource,

100

lpackpath,

100

lpackpath,

101

constants.TREEPACK_CATEGORY,

101

constants.TREEPACK_CATEGORY,

102

options=options,

102

options=options,

103

)

103

)

104

105

106

def incrementalrepack(repo, options=None):

106

def incrementalrepack(repo, options=None):

107

"""This repacks the repo by looking at the distribution of pack files in the

107

"""This repacks the repo by looking at the distribution of pack files in the

108

repo and performing the most minimal repack to keep the repo in good shape.

108

repo and performing the most minimal repack to keep the repo in good shape.

109

"""

109

"""

110

if hasattr(repo, 'shareddatastores'):

110

if hasattr(repo, 'shareddatastores'):

111

packpath = shallowutil.getcachepackpath(

111

packpath = shallowutil.getcachepackpath(

112

repo, constants.FILEPACK_CATEGORY

112

repo, constants.FILEPACK_CATEGORY

113

)

113

)

114

_incrementalrepack(

114

_incrementalrepack(

115

repo,

115

repo,

116

repo.shareddatastores,

116

repo.shareddatastores,

117

repo.sharedhistorystores,

117

repo.sharedhistorystores,

118

packpath,

118

packpath,

119

constants.FILEPACK_CATEGORY,

119

constants.FILEPACK_CATEGORY,

120

options=options,

120

options=options,

121

)

121

)

122

123

if hasattr(repo.manifestlog, 'datastore'):

123

if hasattr(repo.manifestlog, 'datastore'):

124

localdata, shareddata = _getmanifeststores(repo)

124

localdata, shareddata = _getmanifeststores(repo)

125

lpackpath, ldstores, lhstores = localdata

125

lpackpath, ldstores, lhstores = localdata

126

spackpath, sdstores, shstores = shareddata

126

spackpath, sdstores, shstores = shareddata

127

128

# Repack the shared manifest store

128

# Repack the shared manifest store

129

_incrementalrepack(

129

_incrementalrepack(

130

repo,

130

repo,

131

sdstores,

131

sdstores,

132

shstores,

132

shstores,

133

spackpath,

133

spackpath,

134

constants.TREEPACK_CATEGORY,

134

constants.TREEPACK_CATEGORY,

135

options=options,

135

options=options,

136

)

136

)

137

138

# Repack the local manifest store

138

# Repack the local manifest store

139

_incrementalrepack(

139

_incrementalrepack(

140

repo,

140

repo,

141

ldstores,

141

ldstores,

142

lhstores,

142

lhstores,

143

lpackpath,

143

lpackpath,

144

constants.TREEPACK_CATEGORY,

144

constants.TREEPACK_CATEGORY,

145

allowincompletedata=True,

145

allowincompletedata=True,

146

options=options,

146

options=options,

147

)

147

)

148

149

150

def _getmanifeststores(repo):

150

def _getmanifeststores(repo):

151

shareddatastores = repo.manifestlog.shareddatastores

151

shareddatastores = repo.manifestlog.shareddatastores

152

localdatastores = repo.manifestlog.localdatastores

152

localdatastores = repo.manifestlog.localdatastores

153

sharedhistorystores = repo.manifestlog.sharedhistorystores

153

sharedhistorystores = repo.manifestlog.sharedhistorystores

154

localhistorystores = repo.manifestlog.localhistorystores

154

localhistorystores = repo.manifestlog.localhistorystores

155

156

sharedpackpath = shallowutil.getcachepackpath(

156

sharedpackpath = shallowutil.getcachepackpath(

157

repo, constants.TREEPACK_CATEGORY

157

repo, constants.TREEPACK_CATEGORY

158

)

158

)

159

localpackpath = shallowutil.getlocalpackpath(

159

localpackpath = shallowutil.getlocalpackpath(

160

repo.svfs.vfs.base, constants.TREEPACK_CATEGORY

160

repo.svfs.vfs.base, constants.TREEPACK_CATEGORY

161

)

161

)

162

163

return (

163

return (

164

(localpackpath, localdatastores, localhistorystores),

164

(localpackpath, localdatastores, localhistorystores),

165

(sharedpackpath, shareddatastores, sharedhistorystores),

165

(sharedpackpath, shareddatastores, sharedhistorystores),

166

)

166

)

167

168

169

def _topacks(packpath, files, constructor):

169

def _topacks(packpath, files, constructor):

170

paths = list(os.path.join(packpath, p) for p in files)

170

paths = list(os.path.join(packpath, p) for p in files)

171

packs = list(constructor(p) for p in paths)

171

packs = list(constructor(p) for p in paths)

172

return packs

172

return packs

173

174

175

def _deletebigpacks(repo, folder, files):

175

def _deletebigpacks(repo, folder, files):

176

"""Deletes packfiles that are bigger than ``packs.maxpacksize``.

176

"""Deletes packfiles that are bigger than ``packs.maxpacksize``.

177

178

Returns ``files` with the removed files omitted."""

178

Returns ``files` with the removed files omitted."""

179

maxsize = repo.ui.configbytes(b"packs", b"maxpacksize")

179

maxsize = repo.ui.configbytes(b"packs", b"maxpacksize")

180

if maxsize <= 0:

180

if maxsize <= 0:

181

return files

181

return files

182

183

# This only considers datapacks today, but we could broaden it to include

183

# This only considers datapacks today, but we could broaden it to include

184

# historypacks.

184

# historypacks.

185

VALIDEXTS = [b".datapack", b".dataidx"]

185

VALIDEXTS = [b".datapack", b".dataidx"]

186

187

# Either an oversize index or datapack will trigger cleanup of the whole

187

# Either an oversize index or datapack will trigger cleanup of the whole

188

# pack:

188

# pack:

189

oversized = {

189

oversized = {

190

os.path.splitext(path)[0]

190

os.path.splitext(path)[0]

191

for path, ftype, stat in files

191

for path, ftype, stat in files

192

if (stat.st_size > maxsize and (os.path.splitext(path)[1] in VALIDEXTS))

192

if (stat.st_size > maxsize and (os.path.splitext(path)[1] in VALIDEXTS))

193

}

193

}

194

195

for rootfname in oversized:

195

for rootfname in oversized:

196

rootpath = os.path.join(folder, rootfname)

196

rootpath = os.path.join(folder, rootfname)

197

for ext in VALIDEXTS:

197

for ext in VALIDEXTS:

198

path = rootpath + ext

198

path = rootpath + ext

199

repo.ui.debug(

199

repo.ui.debug(

200

b'removing oversize packfile %s (%s)\n'

200

b'removing oversize packfile %s (%s)\n'

201

% (path, util.bytecount(os.stat(path).st_size))

201

% (path, util.bytecount(os.stat(path).st_size))

202

)

202

)

203

os.unlink(path)

203

os.unlink(path)

204

return [row for row in files if os.path.basename(row[0]) not in oversized]

204

return [row for row in files if os.path.basename(row[0]) not in oversized]

205

206

207

def _incrementalrepack(

207

def _incrementalrepack(

208

repo,

208

repo,

209

datastore,

209

datastore,

210

historystore,

210

historystore,

211

packpath,

211

packpath,

212

category,

212

category,

213

allowincompletedata=False,

213

allowincompletedata=False,

214

options=None,

214

options=None,

215

):

215

):

216

shallowutil.mkstickygroupdir(repo.ui, packpath)

216

shallowutil.mkstickygroupdir(repo.ui, packpath)

217

218

files = osutil.listdir(packpath, stat=True)

218

files = osutil.listdir(packpath, stat=True)

219

files = _deletebigpacks(repo, packpath, files)

219

files = _deletebigpacks(repo, packpath, files)

220

datapacks = _topacks(

220

datapacks = _topacks(

221

packpath, _computeincrementaldatapack(repo.ui, files), datapack.datapack

221

packpath, _computeincrementaldatapack(repo.ui, files), datapack.datapack

222

)

222

)

223

datapacks.extend(

223

datapacks.extend(

224

s for s in datastore if not isinstance(s, datapack.datapackstore)

224

s for s in datastore if not isinstance(s, datapack.datapackstore)

225

)

225

)

226

227

historypacks = _topacks(

227

historypacks = _topacks(

228

packpath,

228

packpath,

229

_computeincrementalhistorypack(repo.ui, files),

229

_computeincrementalhistorypack(repo.ui, files),

230

historypack.historypack,

230

historypack.historypack,

231

)

231

)

232

historypacks.extend(

232

historypacks.extend(

233

s

233

s

234

for s in historystore

234

for s in historystore

235

if not isinstance(s, historypack.historypackstore)

235

if not isinstance(s, historypack.historypackstore)

236

)

236

)

237

238

# ``allhistory{files,packs}`` contains all known history packs, even ones we

238

# ``allhistory{files,packs}`` contains all known history packs, even ones we

239

# don't plan to repack. They are used during the datapack repack to ensure

239

# don't plan to repack. They are used during the datapack repack to ensure

240

# good ordering of nodes.

240

# good ordering of nodes.

241

allhistoryfiles = _allpackfileswithsuffix(

241

allhistoryfiles = _allpackfileswithsuffix(

242

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX

242

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX

243

)

243

)

244

allhistorypacks = _topacks(

244

allhistorypacks = _topacks(

245

packpath,

245

packpath,

246

(f for f, mode, stat in allhistoryfiles),

246

(f for f, mode, stat in allhistoryfiles),

247

historypack.historypack,

247

historypack.historypack,

248

)

248

)

249

allhistorypacks.extend(

249

allhistorypacks.extend(

250

s

250

s

251

for s in historystore

251

for s in historystore

252

if not isinstance(s, historypack.historypackstore)

252

if not isinstance(s, historypack.historypackstore)

253

)

253

)

254

_runrepack(

254

_runrepack(

255

repo,

255

repo,

256

contentstore.unioncontentstore(

256

contentstore.unioncontentstore(

257

*datapacks, allowincomplete=allowincompletedata

257

*datapacks, allowincomplete=allowincompletedata

258

),

258

),

259

metadatastore.unionmetadatastore(*historypacks, allowincomplete=True),

259

metadatastore.unionmetadatastore(*historypacks, allowincomplete=True),

260

packpath,

260

packpath,

261

category,

261

category,

262

fullhistory=metadatastore.unionmetadatastore(

262

fullhistory=metadatastore.unionmetadatastore(

263

*allhistorypacks, allowincomplete=True

263

*allhistorypacks, allowincomplete=True

264

),

264

),

265

options=options,

265

options=options,

266

)

266

)

267

268

269

def _computeincrementaldatapack(ui, files):

269

def _computeincrementaldatapack(ui, files):

270

opts = {

270

opts = {

271

b'gencountlimit': ui.configint(b'remotefilelog', b'data.gencountlimit'),

271

b'gencountlimit': ui.configint(b'remotefilelog', b'data.gencountlimit'),

272

b'generations': ui.configlist(b'remotefilelog', b'data.generations'),

272

b'generations': ui.configlist(b'remotefilelog', b'data.generations'),

273

b'maxrepackpacks': ui.configint(

273

b'maxrepackpacks': ui.configint(

274

b'remotefilelog', b'data.maxrepackpacks'

274

b'remotefilelog', b'data.maxrepackpacks'

275

),

275

),

276

b'repackmaxpacksize': ui.configbytes(

276

b'repackmaxpacksize': ui.configbytes(

277

b'remotefilelog', b'data.repackmaxpacksize'

277

b'remotefilelog', b'data.repackmaxpacksize'

278

),

278

),

279

b'repacksizelimit': ui.configbytes(

279

b'repacksizelimit': ui.configbytes(

280

b'remotefilelog', b'data.repacksizelimit'

280

b'remotefilelog', b'data.repacksizelimit'

281

),

281

),

282

}

282

}

283

284

packfiles = _allpackfileswithsuffix(

284

packfiles = _allpackfileswithsuffix(

285

files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX

285

files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX

286

)

286

)

287

return _computeincrementalpack(packfiles, opts)

287

return _computeincrementalpack(packfiles, opts)

288

289

290

def _computeincrementalhistorypack(ui, files):

290

def _computeincrementalhistorypack(ui, files):

291

opts = {

291

opts = {

292

b'gencountlimit': ui.configint(

292

b'gencountlimit': ui.configint(

293

b'remotefilelog', b'history.gencountlimit'

293

b'remotefilelog', b'history.gencountlimit'

294

),

294

),

295

b'generations': ui.configlist(

295

b'generations': ui.configlist(

296

b'remotefilelog', b'history.generations', [b'100MB']

296

b'remotefilelog', b'history.generations', [b'100MB']

297

),

297

),

298

b'maxrepackpacks': ui.configint(

298

b'maxrepackpacks': ui.configint(

299

b'remotefilelog', b'history.maxrepackpacks'

299

b'remotefilelog', b'history.maxrepackpacks'

300

),

300

),

301

b'repackmaxpacksize': ui.configbytes(

301

b'repackmaxpacksize': ui.configbytes(

302

b'remotefilelog', b'history.repackmaxpacksize', b'400MB'

302

b'remotefilelog', b'history.repackmaxpacksize', b'400MB'

303

),

303

),

304

b'repacksizelimit': ui.configbytes(

304

b'repacksizelimit': ui.configbytes(

305

b'remotefilelog', b'history.repacksizelimit'

305

b'remotefilelog', b'history.repacksizelimit'

306

),

306

),

307

}

307

}

308

309

packfiles = _allpackfileswithsuffix(

309

packfiles = _allpackfileswithsuffix(

310

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX

310

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX

311

)

311

)

312

return _computeincrementalpack(packfiles, opts)

312

return _computeincrementalpack(packfiles, opts)

313

314

315

def _allpackfileswithsuffix(files, packsuffix, indexsuffix):

315

def _allpackfileswithsuffix(files, packsuffix, indexsuffix):

316

result = []

316

result = []

317

fileset = {fn for fn, mode, stat in files}

317

fileset = {fn for fn, mode, stat in files}

318

for filename, mode, stat in files:

318

for filename, mode, stat in files:

319

if not filename.endswith(packsuffix):

319

if not filename.endswith(packsuffix):

320

continue

320

continue

321

322

prefix = filename[: -len(packsuffix)]

322

prefix = filename[: -len(packsuffix)]

323

324

# Don't process a pack if it doesn't have an index.

324

# Don't process a pack if it doesn't have an index.

325

if (prefix + indexsuffix) not in fileset:

325

if (prefix + indexsuffix) not in fileset:

326

continue

326

continue

327

result.append((prefix, mode, stat))

327

result.append((prefix, mode, stat))

328

329

return result

329

return result

330

331

332

def _computeincrementalpack(files, opts):

332

def _computeincrementalpack(files, opts):

333

"""Given a set of pack files along with the configuration options, this

333

"""Given a set of pack files along with the configuration options, this

334

function computes the list of files that should be packed as part of an

334

function computes the list of files that should be packed as part of an

335

incremental repack.

335

incremental repack.

336

337

It tries to strike a balance between keeping incremental repacks cheap (i.e.

337

It tries to strike a balance between keeping incremental repacks cheap (i.e.

338

packing small things when possible, and rolling the packs up to the big ones

338

packing small things when possible, and rolling the packs up to the big ones

339

over time).

339

over time).

340

"""

340

"""

341

342

limits = list(

342

limits = list(

343

sorted((util.sizetoint(s) for s in opts[b'generations']), reverse=True)

343

sorted((util.sizetoint(s) for s in opts[b'generations']), reverse=True)

344

)

344

)

345

limits.append(0)

345

limits.append(0)

346

347

# Group the packs by generation (i.e. by size)

347

# Group the packs by generation (i.e. by size)

348

generations = []

348

generations = []

349

for i in range(len(limits)):

349

for i in range(len(limits)):

350

generations.append([])

350

generations.append([])

351

352

sizes = {}

352

sizes = {}

353

for prefix, mode, stat in files:

353

for prefix, mode, stat in files:

354

size = stat.st_size

354

size = stat.st_size

355

if size > opts[b'repackmaxpacksize']:

355

if size > opts[b'repackmaxpacksize']:

356

continue

356

continue

357

358

sizes[prefix] = size

358

sizes[prefix] = size

359

for i, limit in enumerate(limits):

359

for i, limit in enumerate(limits):

360

if size > limit:

360

if size > limit:

361

generations[i].append(prefix)

361

generations[i].append(prefix)

362

break

362

break

363

364

# Steps for picking what packs to repack:

364

# Steps for picking what packs to repack:

365

# 1. Pick the largest generation with > gencountlimit pack files.

365

# 1. Pick the largest generation with > gencountlimit pack files.

366

# 2. Take the smallest three packs.

366

# 2. Take the smallest three packs.

367

# 3. While total-size-of-packs < repacksizelimit: add another pack

367

# 3. While total-size-of-packs < repacksizelimit: add another pack

368

369

# Find the largest generation with more than gencountlimit packs

369

# Find the largest generation with more than gencountlimit packs

370

genpacks = []

370

genpacks = []

371

for i, limit in enumerate(limits):

371

for i, limit in enumerate(limits):

372

if len(generations[i]) > opts[b'gencountlimit']:

372

if len(generations[i]) > opts[b'gencountlimit']:

373

# Sort to be smallest last, for easy popping later

373

# Sort to be smallest last, for easy popping later

374

genpacks.extend(

374

genpacks.extend(

375

sorted(generations[i], reverse=True, key=lambda x: sizes[x])

375

sorted(generations[i], reverse=True, key=lambda x: sizes[x])

376

)

376

)

377

break

377

break

378

379

# Take as many packs from the generation as we can

379

# Take as many packs from the generation as we can

380

chosenpacks = genpacks[-3:]

380

chosenpacks = genpacks[-3:]

381

genpacks = genpacks[:-3]

381

genpacks = genpacks[:-3]

382

repacksize = sum(sizes[n] for n in chosenpacks)

382

repacksize = sum(sizes[n] for n in chosenpacks)

383

while (

383

while (

384

repacksize < opts[b'repacksizelimit']

384

repacksize < opts[b'repacksizelimit']

385

and genpacks

385

and genpacks

386

and len(chosenpacks) < opts[b'maxrepackpacks']

386

and len(chosenpacks) < opts[b'maxrepackpacks']

387

):

387

):

388

chosenpacks.append(genpacks.pop())

388

chosenpacks.append(genpacks.pop())

389

repacksize += sizes[chosenpacks[-1]]

389

repacksize += sizes[chosenpacks[-1]]

390

391

return chosenpacks

391

return chosenpacks

392

393

394

def _runrepack(

394

def _runrepack(

395

repo, data, history, packpath, category, fullhistory=None, options=None

395

repo, data, history, packpath, category, fullhistory=None, options=None

396

):

396

):

397

shallowutil.mkstickygroupdir(repo.ui, packpath)

397

shallowutil.mkstickygroupdir(repo.ui, packpath)

398

399

def isold(repo, filename, node):

399

def isold(repo, filename, node):

400

"""Check if the file node is older than a limit.

400

"""Check if the file node is older than a limit.

401

Unless a limit is specified in the config the default limit is taken.

401

Unless a limit is specified in the config the default limit is taken.

402

"""

402

"""

403

filectx = repo.filectx(filename, fileid=node)

403

filectx = repo.filectx(filename, fileid=node)

404

filetime = repo[filectx.linkrev()].date()

404

filetime = repo[filectx.linkrev()].date()

405

406

ttl = repo.ui.configint(b'remotefilelog', b'nodettl')

406

ttl = repo.ui.configint(b'remotefilelog', b'nodettl')

407

408

limit = time.time() - ttl

408

limit = time.time() - ttl

409

return filetime[0] < limit

409

return filetime[0] < limit

410

411

garbagecollect = repo.ui.configbool(b'remotefilelog', b'gcrepack')

411

garbagecollect = repo.ui.configbool(b'remotefilelog', b'gcrepack')

412

if not fullhistory:

412

if not fullhistory:

413

fullhistory = history

413

fullhistory = history

414

packer = repacker(

414

packer = repacker(

415

repo,

415

repo,

416

data,

416

data,

417

history,

417

history,

418

fullhistory,

418

fullhistory,

419

category,

419

category,

420

gc=garbagecollect,

420

gc=garbagecollect,

421

isold=isold,

421

isold=isold,

422

options=options,

422

options=options,

423

)

423

)

424

425

with datapack.mutabledatapack(repo.ui, packpath) as dpack:

425

with datapack.mutabledatapack(repo.ui, packpath) as dpack:

426

with historypack.mutablehistorypack(repo.ui, packpath) as hpack:

426

with historypack.mutablehistorypack(repo.ui, packpath) as hpack:

427

try:

427

try:

428

packer.run(dpack, hpack)

428

packer.run(dpack, hpack)

429

except error.LockHeld:

429

except error.LockHeld:

430

raise RepackAlreadyRunning(

430

raise RepackAlreadyRunning(

431

_(

431

_(

432

b"skipping repack - another repack "

432

b"skipping repack - another repack "

433

b"is already running"

433

b"is already running"

434

)

434

)

435

)

435

)

436

437

438

def keepset(repo, keyfn, lastkeepkeys=None):

438

def keepset(repo, keyfn, lastkeepkeys=None):

439

"""Computes a keepset which is not garbage collected.

439

"""Computes a keepset which is not garbage collected.

440

'keyfn' is a function that maps filename, node to a unique key.

440

'keyfn' is a function that maps filename, node to a unique key.

441

'lastkeepkeys' is an optional argument and if provided the keepset

441

'lastkeepkeys' is an optional argument and if provided the keepset

442

function updates lastkeepkeys with more keys and returns the result.

442

function updates lastkeepkeys with more keys and returns the result.

443

"""

443

"""

444

if not lastkeepkeys:

444

if not lastkeepkeys:

445

keepkeys = set()

445

keepkeys = set()

446

else:

446

else:

447

keepkeys = lastkeepkeys

447

keepkeys = lastkeepkeys

448

449

# We want to keep:

449

# We want to keep:

450

# 1. Working copy parent

450

# 1. Working copy parent

451

# 2. Draft commits

451

# 2. Draft commits

452

# 3. Parents of draft commits

452

# 3. Parents of draft commits

453

# 4. Pullprefetch and bgprefetchrevs revsets if specified

453

# 4. Pullprefetch and bgprefetchrevs revsets if specified

454

revs = [b'.', b'draft()', b'parents(draft())']

454

revs = [b'.', b'draft()', b'parents(draft())']

455

prefetchrevs = repo.ui.config(b'remotefilelog', b'pullprefetch', None)

455

prefetchrevs = repo.ui.config(b'remotefilelog', b'pullprefetch', None)

456

if prefetchrevs:

456

if prefetchrevs:

457

revs.append(b'(%s)' % prefetchrevs)

457

revs.append(b'(%s)' % prefetchrevs)

458

prefetchrevs = repo.ui.config(b'remotefilelog', b'bgprefetchrevs', None)

458

prefetchrevs = repo.ui.config(b'remotefilelog', b'bgprefetchrevs', None)

459

if prefetchrevs:

459

if prefetchrevs:

460

revs.append(b'(%s)' % prefetchrevs)

460

revs.append(b'(%s)' % prefetchrevs)

461

revs = b'+'.join(revs)

461

revs = b'+'.join(revs)

462

463

revs = [b'sort((%s), "topo")' % revs]

463

revs = [b'sort((%s), "topo")' % revs]

464

keep = scmutil.revrange(repo, revs)

464

keep = scmutil.revrange(repo, revs)

465

466

processed = set()

466

processed = set()

467

lastmanifest = None

467

lastmanifest = None

468

469

# process the commits in toposorted order starting from the oldest

469

# process the commits in toposorted order starting from the oldest

470

for r in reversed(keep._list):

470

for r in reversed(keep._list):

471

if repo[r].p1().rev() in processed:

471

delta_from, m = repo[r].manifestctx().read_any_fast_delta(processed)

472

# if the direct parent has already been processed

472

if delta_from is None and lastmanifest is not None:

473

# then we only need to process the delta

473

# could not find a delta, compute one.

474

m = repo[r].manifestctx().readdelta()

474

# XXX (is this really faster?)

475

else:

475

full = m

476

# otherwise take the manifest and diff it

477

# with the previous manifest if one exists

478

if lastmanifest:

476

if lastmanifest:

479

m = ~~repo~~[r].m~~anifest~~().diff(lastmanifest)

477

m = m.diff(lastmanifest)

480

else:

478

lastmanifest = full

481

m = repo[r].manifest()

482

lastmanifest = repo[r].manifest()

483

processed.add(r)

479

processed.add(r)

484

480

485

# populate keepkeys with keys from the current manifest

481

# populate keepkeys with keys from the current manifest

486

if type(m) is dict:

482

if type(m) is dict:

487

# m is a result of diff of two manifests and is a dictionary that

483

# m is a result of diff of two manifests and is a dictionary that

488

# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple

484

# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple

489

for filename, diff in m.items():

485

for filename, diff in m.items():

490

if diff[0][0] is not None:

486

if diff[0][0] is not None:

491

keepkeys.add(keyfn(filename, diff[0][0]))

487

keepkeys.add(keyfn(filename, diff[0][0]))

492

else:

488

else:

493

# m is a manifest object

489

# m is a manifest object

494

for filename, filenode in m.items():

490

for filename, filenode in m.items():

495

keepkeys.add(keyfn(filename, filenode))

491

keepkeys.add(keyfn(filename, filenode))

496

492

497

return keepkeys

493

return keepkeys

498

494

499

495

500

class repacker:

496

class repacker:

501

"""Class for orchestrating the repack of data and history information into a

497

"""Class for orchestrating the repack of data and history information into a

502

new format.

498

new format.

503

"""

499

"""

504

500

505

def __init__(

501

def __init__(

506

self,

502

self,

507

repo,

503

repo,

508

data,

504

data,

509

history,

505

history,

510

fullhistory,

506

fullhistory,

511

category,

507

category,

512

gc=False,

508

gc=False,

513

isold=None,

509

isold=None,

514

options=None,

510

options=None,

515

):

511

):

516

self.repo = repo

512

self.repo = repo

517

self.data = data

513

self.data = data

518

self.history = history

514

self.history = history

519

self.fullhistory = fullhistory

515

self.fullhistory = fullhistory

520

self.unit = constants.getunits(category)

516

self.unit = constants.getunits(category)

521

self.garbagecollect = gc

517

self.garbagecollect = gc

522

self.options = options

518

self.options = options

523

if self.garbagecollect:

519

if self.garbagecollect:

524

if not isold:

520

if not isold:

525

raise ValueError(b"Function 'isold' is not properly specified")

521

raise ValueError(b"Function 'isold' is not properly specified")

526

# use (filename, node) tuple as a keepset key

522

# use (filename, node) tuple as a keepset key

527

self.keepkeys = keepset(repo, lambda f, n: (f, n))

523

self.keepkeys = keepset(repo, lambda f, n: (f, n))

528

self.isold = isold

524

self.isold = isold

529

525

530

def run(self, targetdata, targethistory):

526

def run(self, targetdata, targethistory):

531

ledger = repackledger()

527

ledger = repackledger()

532

528

533

with lockmod.lock(

529

with lockmod.lock(

534

repacklockvfs(self.repo), b"repacklock", desc=None, timeout=0

530

repacklockvfs(self.repo), b"repacklock", desc=None, timeout=0

535

):

531

):

536

self.repo.hook(b'prerepack')

532

self.repo.hook(b'prerepack')

537

533

538

# Populate ledger from source

534

# Populate ledger from source

539

self.data.markledger(ledger, options=self.options)

535

self.data.markledger(ledger, options=self.options)

540

self.history.markledger(ledger, options=self.options)

536

self.history.markledger(ledger, options=self.options)

541

537

542

# Run repack

538

# Run repack

543

self.repackdata(ledger, targetdata)

539

self.repackdata(ledger, targetdata)

544

self.repackhistory(ledger, targethistory)

540

self.repackhistory(ledger, targethistory)

545

541

546

# Call cleanup on each source

542

# Call cleanup on each source

547

for source in ledger.sources:

543

for source in ledger.sources:

548

source.cleanup(ledger)

544

source.cleanup(ledger)

549

545

550

def _chainorphans(self, ui, filename, nodes, orphans, deltabases):

546

def _chainorphans(self, ui, filename, nodes, orphans, deltabases):

551

"""Reorderes ``orphans`` into a single chain inside ``nodes`` and

547

"""Reorderes ``orphans`` into a single chain inside ``nodes`` and

552

``deltabases``.

548

``deltabases``.

553

549

554

We often have orphan entries (nodes without a base that aren't

550

We often have orphan entries (nodes without a base that aren't

555

referenced by other nodes -- i.e., part of a chain) due to gaps in

551

referenced by other nodes -- i.e., part of a chain) due to gaps in

556

history. Rather than store them as individual fulltexts, we prefer to

552

history. Rather than store them as individual fulltexts, we prefer to

557

insert them as one chain sorted by size.

553

insert them as one chain sorted by size.

558

"""

554

"""

559

if not orphans:

555

if not orphans:

560

return nodes

556

return nodes

561

557

562

def getsize(node, default=0):

558

def getsize(node, default=0):

563

meta = self.data.getmeta(filename, node)

559

meta = self.data.getmeta(filename, node)

564

if constants.METAKEYSIZE in meta:

560

if constants.METAKEYSIZE in meta:

565

return meta[constants.METAKEYSIZE]

561

return meta[constants.METAKEYSIZE]

566

else:

562

else:

567

return default

563

return default

568

564

569

# Sort orphans by size; biggest first is preferred, since it's more

565

# Sort orphans by size; biggest first is preferred, since it's more

570

# likely to be the newest version assuming files grow over time.

566

# likely to be the newest version assuming files grow over time.

571

# (Sort by node first to ensure the sort is stable.)

567

# (Sort by node first to ensure the sort is stable.)

572

orphans = sorted(orphans)

568

orphans = sorted(orphans)

573

orphans = list(sorted(orphans, key=getsize, reverse=True))

569

orphans = list(sorted(orphans, key=getsize, reverse=True))

574

if ui.debugflag:

570

if ui.debugflag:

575

ui.debug(

571

ui.debug(

576

b"%s: orphan chain: %s\n"

572

b"%s: orphan chain: %s\n"

577

% (filename, b", ".join([short(s) for s in orphans]))

573

% (filename, b", ".join([short(s) for s in orphans]))

578

)

574

)

579

575

580

# Create one contiguous chain and reassign deltabases.

576

# Create one contiguous chain and reassign deltabases.

581

for i, node in enumerate(orphans):

577

for i, node in enumerate(orphans):

582

if i == 0:

578

if i == 0:

583

deltabases[node] = (self.repo.nullid, 0)

579

deltabases[node] = (self.repo.nullid, 0)

584

else:

580

else:

585

parent = orphans[i - 1]

581

parent = orphans[i - 1]

586

deltabases[node] = (parent, deltabases[parent][1] + 1)

582

deltabases[node] = (parent, deltabases[parent][1] + 1)

587

nodes = [n for n in nodes if n not in orphans]

583

nodes = [n for n in nodes if n not in orphans]

588

nodes += orphans

584

nodes += orphans

589

return nodes

585

return nodes

590

586

591

def repackdata(self, ledger, target):

587

def repackdata(self, ledger, target):

592

ui = self.repo.ui

588

ui = self.repo.ui

593

maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000)

589

maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000)

594

590

595

byfile = {}

591

byfile = {}

596

for entry in ledger.entries.values():

592

for entry in ledger.entries.values():

597

if entry.datasource:

593

if entry.datasource:

598

byfile.setdefault(entry.filename, {})[entry.node] = entry

594

byfile.setdefault(entry.filename, {})[entry.node] = entry

599

595

600

count = 0

596

count = 0

601

repackprogress = ui.makeprogress(

597

repackprogress = ui.makeprogress(

602

_(b"repacking data"), unit=self.unit, total=len(byfile)

598

_(b"repacking data"), unit=self.unit, total=len(byfile)

603

)

599

)

604

for filename, entries in sorted(byfile.items()):

600

for filename, entries in sorted(byfile.items()):

605

repackprogress.update(count)

601

repackprogress.update(count)

606

602

607

ancestors = {}

603

ancestors = {}

608

nodes = list(node for node in entries)

604

nodes = list(node for node in entries)

609

nohistory = []

605

nohistory = []

610

buildprogress = ui.makeprogress(

606

buildprogress = ui.makeprogress(

611

_(b"building history"), unit=b'nodes', total=len(nodes)

607

_(b"building history"), unit=b'nodes', total=len(nodes)

612

)

608

)

613

for i, node in enumerate(nodes):

609

for i, node in enumerate(nodes):

614

if node in ancestors:

610

if node in ancestors:

615

continue

611

continue

616

buildprogress.update(i)

612

buildprogress.update(i)

617

try:

613

try:

618

ancestors.update(

614

ancestors.update(

619

self.fullhistory.getancestors(

615

self.fullhistory.getancestors(

620

filename, node, known=ancestors

616

filename, node, known=ancestors

621

)

617

)

622

)

618

)

623

except KeyError:

619

except KeyError:

624

# Since we're packing data entries, we may not have the

620

# Since we're packing data entries, we may not have the

625

# corresponding history entries for them. It's not a big

621

# corresponding history entries for them. It's not a big

626

# deal, but the entries won't be delta'd perfectly.

622

# deal, but the entries won't be delta'd perfectly.

627

nohistory.append(node)

623

nohistory.append(node)

628

buildprogress.complete()

624

buildprogress.complete()

629

625

630

# Order the nodes children first, so we can produce reverse deltas

626

# Order the nodes children first, so we can produce reverse deltas

631

orderednodes = list(reversed(self._toposort(ancestors)))

627

orderednodes = list(reversed(self._toposort(ancestors)))

632

if len(nohistory) > 0:

628

if len(nohistory) > 0:

633

ui.debug(

629

ui.debug(

634

b'repackdata: %d nodes without history\n' % len(nohistory)

630

b'repackdata: %d nodes without history\n' % len(nohistory)

635

)

631

)

636

orderednodes.extend(sorted(nohistory))

632

orderednodes.extend(sorted(nohistory))

637

633

638

# Filter orderednodes to just the nodes we want to serialize (it

634

# Filter orderednodes to just the nodes we want to serialize (it

639

# currently also has the edge nodes' ancestors).

635

# currently also has the edge nodes' ancestors).

640

orderednodes = list(

636

orderednodes = list(

641

filter(lambda node: node in nodes, orderednodes)

637

filter(lambda node: node in nodes, orderednodes)

642

)

638

)

643

639

644

# Garbage collect old nodes:

640

# Garbage collect old nodes:

645

if self.garbagecollect:

641

if self.garbagecollect:

646

neworderednodes = []

642

neworderednodes = []

647

for node in orderednodes:

643

for node in orderednodes:

648

# If the node is old and is not in the keepset, we skip it,

644

# If the node is old and is not in the keepset, we skip it,

649

# and mark as garbage collected

645

# and mark as garbage collected

650

if (filename, node) not in self.keepkeys and self.isold(

646

if (filename, node) not in self.keepkeys and self.isold(

651

self.repo, filename, node

647

self.repo, filename, node

652

):

648

):

653

entries[node].gced = True

649

entries[node].gced = True

654

continue

650

continue

655

neworderednodes.append(node)

651

neworderednodes.append(node)

656

orderednodes = neworderednodes

652

orderednodes = neworderednodes

657

653

658

# Compute delta bases for nodes:

654

# Compute delta bases for nodes:

659

deltabases = {}

655

deltabases = {}

660

nobase = set()

656

nobase = set()

661

referenced = set()

657

referenced = set()

662

nodes = set(nodes)

658

nodes = set(nodes)

663

processprogress = ui.makeprogress(

659

processprogress = ui.makeprogress(

664

_(b"processing nodes"), unit=b'nodes', total=len(orderednodes)

660

_(b"processing nodes"), unit=b'nodes', total=len(orderednodes)

665

)

661

)

666

for i, node in enumerate(orderednodes):

662

for i, node in enumerate(orderednodes):

667

processprogress.update(i)

663

processprogress.update(i)

668

# Find delta base

664

# Find delta base

669

# TODO: allow delta'ing against most recent descendant instead

665

# TODO: allow delta'ing against most recent descendant instead

670

# of immediate child

666

# of immediate child

671

deltatuple = deltabases.get(node, None)

667

deltatuple = deltabases.get(node, None)

672

if deltatuple is None:

668

if deltatuple is None:

673

deltabase, chainlen = self.repo.nullid, 0

669

deltabase, chainlen = self.repo.nullid, 0

674

deltabases[node] = (self.repo.nullid, 0)

670

deltabases[node] = (self.repo.nullid, 0)

675

nobase.add(node)

671

nobase.add(node)

676

else:

672

else:

677

deltabase, chainlen = deltatuple

673

deltabase, chainlen = deltatuple

678

referenced.add(deltabase)

674

referenced.add(deltabase)

679

675

680

# Use available ancestor information to inform our delta choices

676

# Use available ancestor information to inform our delta choices

681

ancestorinfo = ancestors.get(node)

677

ancestorinfo = ancestors.get(node)

682

if ancestorinfo:

678

if ancestorinfo:

683

p1, p2, linknode, copyfrom = ancestorinfo

679

p1, p2, linknode, copyfrom = ancestorinfo

684

680

685

# The presence of copyfrom means we're at a point where the

681

# The presence of copyfrom means we're at a point where the

686

# file was copied from elsewhere. So don't attempt to do any

682

# file was copied from elsewhere. So don't attempt to do any

687

# deltas with the other file.

683

# deltas with the other file.

688

if copyfrom:

684

if copyfrom:

689

p1 = self.repo.nullid

685

p1 = self.repo.nullid

690

686

691

if chainlen < maxchainlen:

687

if chainlen < maxchainlen:

692

# Record this child as the delta base for its parents.

688

# Record this child as the delta base for its parents.

693

# This may be non optimal, since the parents may have

689

# This may be non optimal, since the parents may have

694

# many children, and this will only choose the last one.

690

# many children, and this will only choose the last one.

695

# TODO: record all children and try all deltas to find

691

# TODO: record all children and try all deltas to find

696

# best

692

# best

697

if p1 != self.repo.nullid:

693

if p1 != self.repo.nullid:

698

deltabases[p1] = (node, chainlen + 1)

694

deltabases[p1] = (node, chainlen + 1)

699

if p2 != self.repo.nullid:

695

if p2 != self.repo.nullid:

700

deltabases[p2] = (node, chainlen + 1)

696

deltabases[p2] = (node, chainlen + 1)

701

697

702

# experimental config: repack.chainorphansbysize

698

# experimental config: repack.chainorphansbysize

703

if ui.configbool(b'repack', b'chainorphansbysize'):

699

if ui.configbool(b'repack', b'chainorphansbysize'):

704

orphans = nobase - referenced

700

orphans = nobase - referenced

705

orderednodes = self._chainorphans(

701

orderednodes = self._chainorphans(

706

ui, filename, orderednodes, orphans, deltabases

702

ui, filename, orderednodes, orphans, deltabases

707

)

703

)

708

704

709

# Compute deltas and write to the pack

705

# Compute deltas and write to the pack

710

for i, node in enumerate(orderednodes):

706

for i, node in enumerate(orderednodes):

711

deltabase, chainlen = deltabases[node]

707

deltabase, chainlen = deltabases[node]

712

# Compute delta

708

# Compute delta

713

# TODO: Optimize the deltachain fetching. Since we're

709

# TODO: Optimize the deltachain fetching. Since we're

714

# iterating over the different version of the file, we may

710

# iterating over the different version of the file, we may

715

# be fetching the same deltachain over and over again.

711

# be fetching the same deltachain over and over again.

716

if deltabase != self.repo.nullid:

712

if deltabase != self.repo.nullid:

717

deltaentry = self.data.getdelta(filename, node)

713

deltaentry = self.data.getdelta(filename, node)

718

delta, deltabasename, origdeltabase, meta = deltaentry

714

delta, deltabasename, origdeltabase, meta = deltaentry

719

size = meta.get(constants.METAKEYSIZE)

715

size = meta.get(constants.METAKEYSIZE)

720

if (

716

if (

721

deltabasename != filename

717

deltabasename != filename

722

or origdeltabase != deltabase

718

or origdeltabase != deltabase

723

or size is None

719

or size is None

724

):

720

):

725

deltabasetext = self.data.get(filename, deltabase)

721

deltabasetext = self.data.get(filename, deltabase)

726

original = self.data.get(filename, node)

722

original = self.data.get(filename, node)

727

size = len(original)

723

size = len(original)

728

delta = mdiff.textdiff(deltabasetext, original)

724

delta = mdiff.textdiff(deltabasetext, original)

729

else:

725

else:

730

delta = self.data.get(filename, node)

726

delta = self.data.get(filename, node)

731

size = len(delta)

727

size = len(delta)

732

meta = self.data.getmeta(filename, node)

728

meta = self.data.getmeta(filename, node)

733

729

734

# TODO: don't use the delta if it's larger than the fulltext

730

# TODO: don't use the delta if it's larger than the fulltext

735

if constants.METAKEYSIZE not in meta:

731

if constants.METAKEYSIZE not in meta:

736

meta[constants.METAKEYSIZE] = size

732

meta[constants.METAKEYSIZE] = size

737

target.add(filename, node, deltabase, delta, meta)

733

target.add(filename, node, deltabase, delta, meta)

738

734

739

entries[node].datarepacked = True

735

entries[node].datarepacked = True

740

736

741

processprogress.complete()

737

processprogress.complete()

742

count += 1

738

count += 1

743

739

744

repackprogress.complete()

740

repackprogress.complete()

745

target.close(ledger=ledger)

741

target.close(ledger=ledger)

746

742

747

def repackhistory(self, ledger, target):

743

def repackhistory(self, ledger, target):

748

ui = self.repo.ui

744

ui = self.repo.ui

749

745

750

byfile = {}

746

byfile = {}

751

for entry in ledger.entries.values():

747

for entry in ledger.entries.values():

752

if entry.historysource:

748

if entry.historysource:

753

byfile.setdefault(entry.filename, {})[entry.node] = entry

749

byfile.setdefault(entry.filename, {})[entry.node] = entry

754

750

755

progress = ui.makeprogress(

751

progress = ui.makeprogress(

756

_(b"repacking history"), unit=self.unit, total=len(byfile)

752

_(b"repacking history"), unit=self.unit, total=len(byfile)

757

)

753

)

758

for filename, entries in sorted(byfile.items()):

754

for filename, entries in sorted(byfile.items()):

759

ancestors = {}

755

ancestors = {}

760

nodes = list(node for node in entries)

756

nodes = list(node for node in entries)

761

757

762

for node in nodes:

758

for node in nodes:

763

if node in ancestors:

759

if node in ancestors:

764

continue

760

continue

765

ancestors.update(

761

ancestors.update(

766

self.history.getancestors(filename, node, known=ancestors)

762

self.history.getancestors(filename, node, known=ancestors)

767

)

763

)

768

764

769

# Order the nodes children first

765

# Order the nodes children first

770

orderednodes = reversed(self._toposort(ancestors))

766

orderednodes = reversed(self._toposort(ancestors))

771

767

772

# Write to the pack

768

# Write to the pack

773

dontprocess = set()

769

dontprocess = set()

774

for node in orderednodes:

770

for node in orderednodes:

775

p1, p2, linknode, copyfrom = ancestors[node]

771

p1, p2, linknode, copyfrom = ancestors[node]

776

772

777

# If the node is marked dontprocess, but it's also in the

773

# If the node is marked dontprocess, but it's also in the

778

# explicit entries set, that means the node exists both in this

774

# explicit entries set, that means the node exists both in this

779

# file and in another file that was copied to this file.

775

# file and in another file that was copied to this file.

780

# Usually this happens if the file was copied to another file,

776

# Usually this happens if the file was copied to another file,

781

# then the copy was deleted, then reintroduced without copy

777

# then the copy was deleted, then reintroduced without copy

782

# metadata. The original add and the new add have the same hash

778

# metadata. The original add and the new add have the same hash

783

# since the content is identical and the parents are null.

779

# since the content is identical and the parents are null.

784

if node in dontprocess and node not in entries:

780

if node in dontprocess and node not in entries:

785

# If copyfrom == filename, it means the copy history

781

# If copyfrom == filename, it means the copy history

786

# went to come other file, then came back to this one, so we

782

# went to come other file, then came back to this one, so we

787

# should continue processing it.

783

# should continue processing it.

788

if p1 != self.repo.nullid and copyfrom != filename:

784

if p1 != self.repo.nullid and copyfrom != filename:

789

dontprocess.add(p1)

785

dontprocess.add(p1)

790

if p2 != self.repo.nullid:

786

if p2 != self.repo.nullid:

791

dontprocess.add(p2)

787

dontprocess.add(p2)

792

continue

788

continue

793

789

794

if copyfrom:

790

if copyfrom:

795

dontprocess.add(p1)

791

dontprocess.add(p1)

796

792

797

target.add(filename, node, p1, p2, linknode, copyfrom)

793

target.add(filename, node, p1, p2, linknode, copyfrom)

798

794

799

if node in entries:

795

if node in entries:

800

entries[node].historyrepacked = True

796

entries[node].historyrepacked = True

801

797

802

progress.increment()

798

progress.increment()

803

799

804

progress.complete()

800

progress.complete()

805

target.close(ledger=ledger)

801

target.close(ledger=ledger)

806

802

807

def _toposort(self, ancestors):

803

def _toposort(self, ancestors):

808

def parentfunc(node):

804

def parentfunc(node):

809

p1, p2, linknode, copyfrom = ancestors[node]

805

p1, p2, linknode, copyfrom = ancestors[node]

810

parents = []

806

parents = []

811

if p1 != self.repo.nullid:

807

if p1 != self.repo.nullid:

812

parents.append(p1)

808

parents.append(p1)

813

if p2 != self.repo.nullid:

809

if p2 != self.repo.nullid:

814

parents.append(p2)

810

parents.append(p2)

815

return parents

811

return parents

816

812

817

sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)

813

sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)

818

return sortednodes

814

return sortednodes

819

815

820

816

821

class repackledger:

817

class repackledger:

822

"""Storage for all the bookkeeping that happens during a repack. It contains

818

"""Storage for all the bookkeeping that happens during a repack. It contains

823

the list of revisions being repacked, what happened to each revision, and

819

the list of revisions being repacked, what happened to each revision, and

824

which source store contained which revision originally (for later cleanup).

820

which source store contained which revision originally (for later cleanup).

825

"""

821

"""

826

822

827

def __init__(self):

823

def __init__(self):

828

self.entries = {}

824

self.entries = {}

829

self.sources = {}

825

self.sources = {}

830

self.created = set()

826

self.created = set()

831

827

832

def markdataentry(self, source, filename, node):

828

def markdataentry(self, source, filename, node):

833

"""Mark the given filename+node revision as having a data rev in the

829

"""Mark the given filename+node revision as having a data rev in the

834

given source.

830

given source.

835

"""

831

"""

836

entry = self._getorcreateentry(filename, node)

832

entry = self._getorcreateentry(filename, node)

837

entry.datasource = True

833

entry.datasource = True

838

entries = self.sources.get(source)

834

entries = self.sources.get(source)

839

if not entries:

835

if not entries:

840

entries = set()

836

entries = set()

841

self.sources[source] = entries

837

self.sources[source] = entries

842

entries.add(entry)

838

entries.add(entry)

843

839

844

def markhistoryentry(self, source, filename, node):

840

def markhistoryentry(self, source, filename, node):

845

"""Mark the given filename+node revision as having a history rev in the

841

"""Mark the given filename+node revision as having a history rev in the

846

given source.

842

given source.

847

"""

843

"""

848

entry = self._getorcreateentry(filename, node)

844

entry = self._getorcreateentry(filename, node)

849

entry.historysource = True

845

entry.historysource = True

850

entries = self.sources.get(source)

846

entries = self.sources.get(source)

851

if not entries:

847

if not entries:

852

entries = set()

848

entries = set()

853

self.sources[source] = entries

849

self.sources[source] = entries

854

entries.add(entry)

850

entries.add(entry)

855

851

856

def _getorcreateentry(self, filename, node):

852

def _getorcreateentry(self, filename, node):

857

key = (filename, node)

853

key = (filename, node)

858

value = self.entries.get(key)

854

value = self.entries.get(key)

859

if not value:

855

if not value:

860

value = repackentry(filename, node)

856

value = repackentry(filename, node)

861

self.entries[key] = value

857

self.entries[key] = value

862

858

863

return value

859

return value

864

860

865

def addcreated(self, value):

861

def addcreated(self, value):

866

self.created.add(value)

862

self.created.add(value)

867

863

868

864

869

class repackentry:

865

class repackentry:

870

"""Simple class representing a single revision entry in the repackledger."""

866

"""Simple class representing a single revision entry in the repackledger."""

871

867

872

__slots__ = (

868

__slots__ = (

873

'filename',

869

'filename',

874

'node',

870

'node',

875

'datasource',

871

'datasource',

876

'historysource',

872

'historysource',

877

'datarepacked',

873

'datarepacked',

878

'historyrepacked',

874

'historyrepacked',

879

'gced',

875

'gced',

880

)

876

)

881

877

882

def __init__(self, filename, node):

878

def __init__(self, filename, node):

883

self.filename = filename

879

self.filename = filename

884

self.node = node

880

self.node = node

885

# If the revision has a data entry in the source

881

# If the revision has a data entry in the source

886

self.datasource = False

882

self.datasource = False

887

# If the revision has a history entry in the source

883

# If the revision has a history entry in the source

888

self.historysource = False

884

self.historysource = False

889

# If the revision's data entry was repacked into the repack target

885

# If the revision's data entry was repacked into the repack target

890

self.datarepacked = False

886

self.datarepacked = False

891

# If the revision's history entry was repacked into the repack target

887

# If the revision's history entry was repacked into the repack target

892

self.historyrepacked = False

888

self.historyrepacked = False

893

# If garbage collected

889

# If garbage collected

894

self.gced = False

890

self.gced = False

895

891

896

892

897

def repacklockvfs(repo):

893

def repacklockvfs(repo):

898

if hasattr(repo, 'name'):

894

if hasattr(repo, 'name'):

899

# Lock in the shared cache so repacks across multiple copies of the same

895

# Lock in the shared cache so repacks across multiple copies of the same

900

# repo are coordinated.

896

# repo are coordinated.

901

sharedcachepath = shallowutil.getcachepackpath(

897

sharedcachepath = shallowutil.getcachepackpath(

902

repo, constants.FILEPACK_CATEGORY

898

repo, constants.FILEPACK_CATEGORY

903

)

899

)

904

return vfs.vfs(sharedcachepath)

900

return vfs.vfs(sharedcachepath)

905

else:

901

else:

906

return repo.svfs

902

return repo.svfs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             import os
             import time
             from mercurial.i18n import _
             from mercurial.node import short
             from mercurial import (
                 encoding,
                 error,
                 lock as lockmod,
                 mdiff,
                 policy,
                 scmutil,
                 util,
                 vfs,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 datapack,
                 historypack,
                 metadatastore,
                 shallowutil,
             )
             osutil = policy.importmod('osutil')
             class RepackAlreadyRunning(error.Abort):
                 pass
             def backgroundrepack(repo, incremental=True, packsonly=False):
                 cmd = [procutil.hgexecutable(), b'-R', repo.origroot, b'repack']
                 msg = _(b"(running background repack)\n")
                 if incremental:
                     cmd.append(b'--incremental')
                     msg = _(b"(running background incremental repack)\n")
                 if packsonly:
                     cmd.append(b'--packsonly')
                 repo.ui.warn(msg)
                 # We know this command will find a binary, so don't block on it starting.
                 kwargs = {}
                 if repo.ui.configbool(b'devel', b'remotefilelog.bg-wait'):
                     kwargs['record_wait'] = repo.ui.atexit
                 procutil.runbgcommand(cmd, encoding.environ, ensurestart=False, **kwargs)
             def fullrepack(repo, options=None):
                 """If ``packsonly`` is True, stores creating only loose objects are skipped."""
                 if hasattr(repo, 'shareddatastores'):
                     datasource = contentstore.unioncontentstore(*repo.shareddatastores)
                     historysource = metadatastore.unionmetadatastore(
                         *repo.sharedhistorystores, allowincomplete=True
                     )
                     packpath = shallowutil.getcachepackpath(
                         repo, constants.FILEPACK_CATEGORY
                     )
                     _runrepack(
                         repo,
                         datasource,
                         historysource,
                         packpath,
                         constants.FILEPACK_CATEGORY,
                         options=options,
                     )
                 if hasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     datasource = contentstore.unioncontentstore(*sdstores)
                     historysource = metadatastore.unionmetadatastore(
                         *shstores, allowincomplete=True
                     )
                     _runrepack(
                         repo,
                         datasource,
                         historysource,
                         spackpath,
                         constants.TREEPACK_CATEGORY,
                         options=options,
                     )
                     # Repack the local manifest store
                     datasource = contentstore.unioncontentstore(
                         *ldstores, allowincomplete=True
                     )
                     historysource = metadatastore.unionmetadatastore(
                         *lhstores, allowincomplete=True
                     )
                     _runrepack(
                         repo,
                         datasource,
                         historysource,
                         lpackpath,
                         constants.TREEPACK_CATEGORY,
                         options=options,
                     )
             def incrementalrepack(repo, options=None):
                 """This repacks the repo by looking at the distribution of pack files in the
                 repo and performing the most minimal repack to keep the repo in good shape.
                 """
                 if hasattr(repo, 'shareddatastores'):
                     packpath = shallowutil.getcachepackpath(
                         repo, constants.FILEPACK_CATEGORY
                     )
                     _incrementalrepack(
                         repo,
                         repo.shareddatastores,
                         repo.sharedhistorystores,
                         packpath,
                         constants.FILEPACK_CATEGORY,
                         options=options,
                     )
                 if hasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     _incrementalrepack(
                         repo,
                         sdstores,
                         shstores,
                         spackpath,
                         constants.TREEPACK_CATEGORY,
                         options=options,
                     )
                     # Repack the local manifest store
                     _incrementalrepack(
                         repo,
                         ldstores,
                         lhstores,
                         lpackpath,
                         constants.TREEPACK_CATEGORY,
                         allowincompletedata=True,
                         options=options,
                     )
             def _getmanifeststores(repo):
                 shareddatastores = repo.manifestlog.shareddatastores
                 localdatastores = repo.manifestlog.localdatastores
                 sharedhistorystores = repo.manifestlog.sharedhistorystores
                 localhistorystores = repo.manifestlog.localhistorystores
                 sharedpackpath = shallowutil.getcachepackpath(
                     repo, constants.TREEPACK_CATEGORY
                 )
                 localpackpath = shallowutil.getlocalpackpath(
                     repo.svfs.vfs.base, constants.TREEPACK_CATEGORY
                 )
                 return (
                     (localpackpath, localdatastores, localhistorystores),
                     (sharedpackpath, shareddatastores, sharedhistorystores),
                 )
             def _topacks(packpath, files, constructor):
                 paths = list(os.path.join(packpath, p) for p in files)
                 packs = list(constructor(p) for p in paths)
                 return packs
             def _deletebigpacks(repo, folder, files):
                 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
                 Returns ``files` with the removed files omitted."""
                 maxsize = repo.ui.configbytes(b"packs", b"maxpacksize")
                 if maxsize <= 0:
                     return files
                 # This only considers datapacks today, but we could broaden it to include
                 # historypacks.
                 VALIDEXTS = [b".datapack", b".dataidx"]
                 # Either an oversize index or datapack will trigger cleanup of the whole
                 # pack:
                 oversized = {
                     os.path.splitext(path)[0]
                     for path, ftype, stat in files
                     if (stat.st_size > maxsize and (os.path.splitext(path)[1] in VALIDEXTS))
                 }
                 for rootfname in oversized:
                     rootpath = os.path.join(folder, rootfname)
                     for ext in VALIDEXTS:
                         path = rootpath + ext
                         repo.ui.debug(
                             b'removing oversize packfile %s (%s)\n'
                             % (path, util.bytecount(os.stat(path).st_size))
                         )
                         os.unlink(path)
                 return [row for row in files if os.path.basename(row[0]) not in oversized]
             def _incrementalrepack(
                 repo,
                 datastore,
                 historystore,
                 packpath,
                 category,
                 allowincompletedata=False,
                 options=None,
             ):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 files = osutil.listdir(packpath, stat=True)
                 files = _deletebigpacks(repo, packpath, files)
                 datapacks = _topacks(
                     packpath, _computeincrementaldatapack(repo.ui, files), datapack.datapack
                 )
                 datapacks.extend(
                     s for s in datastore if not isinstance(s, datapack.datapackstore)
                 )
                 historypacks = _topacks(
                     packpath,
                     _computeincrementalhistorypack(repo.ui, files),
                     historypack.historypack,
                 )
                 historypacks.extend(
                     s
                     for s in historystore
                     if not isinstance(s, historypack.historypackstore)
                 )
                 # ``allhistory{files,packs}`` contains all known history packs, even ones we
                 # don't plan to repack. They are used during the datapack repack to ensure
                 # good ordering of nodes.
                 allhistoryfiles = _allpackfileswithsuffix(
                     files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
                 )
                 allhistorypacks = _topacks(
                     packpath,
                     (f for f, mode, stat in allhistoryfiles),
                     historypack.historypack,
                 )
                 allhistorypacks.extend(
                     s
                     for s in historystore
                     if not isinstance(s, historypack.historypackstore)
                 )
                 _runrepack(
                     repo,
                     contentstore.unioncontentstore(
                         *datapacks, allowincomplete=allowincompletedata
                     ),
                     metadatastore.unionmetadatastore(*historypacks, allowincomplete=True),
                     packpath,
                     category,
                     fullhistory=metadatastore.unionmetadatastore(
                         *allhistorypacks, allowincomplete=True
                     ),
                     options=options,
                 )
             def _computeincrementaldatapack(ui, files):
                 opts = {
                     b'gencountlimit': ui.configint(b'remotefilelog', b'data.gencountlimit'),
                     b'generations': ui.configlist(b'remotefilelog', b'data.generations'),
                     b'maxrepackpacks': ui.configint(
                         b'remotefilelog', b'data.maxrepackpacks'
                     ),
                     b'repackmaxpacksize': ui.configbytes(
                         b'remotefilelog', b'data.repackmaxpacksize'
                     ),
                     b'repacksizelimit': ui.configbytes(
                         b'remotefilelog', b'data.repacksizelimit'
                     ),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX
                 )
                 return _computeincrementalpack(packfiles, opts)
             def _computeincrementalhistorypack(ui, files):
                 opts = {
                     b'gencountlimit': ui.configint(
                         b'remotefilelog', b'history.gencountlimit'
                     ),
                     b'generations': ui.configlist(
                         b'remotefilelog', b'history.generations', [b'100MB']
                     ),
                     b'maxrepackpacks': ui.configint(
                         b'remotefilelog', b'history.maxrepackpacks'
                     ),
                     b'repackmaxpacksize': ui.configbytes(
                         b'remotefilelog', b'history.repackmaxpacksize', b'400MB'
                     ),
                     b'repacksizelimit': ui.configbytes(
                         b'remotefilelog', b'history.repacksizelimit'
                     ),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
                 )
                 return _computeincrementalpack(packfiles, opts)
             def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
                 result = []
                 fileset = {fn for fn, mode, stat in files}
                 for filename, mode, stat in files:
                     if not filename.endswith(packsuffix):
                         continue
                     prefix = filename[: -len(packsuffix)]
                     # Don't process a pack if it doesn't have an index.
                     if (prefix + indexsuffix) not in fileset:
                         continue
                     result.append((prefix, mode, stat))
                 return result
             def _computeincrementalpack(files, opts):
                 """Given a set of pack files along with the configuration options, this
                 function computes the list of files that should be packed as part of an
                 incremental repack.
                 It tries to strike a balance between keeping incremental repacks cheap (i.e.
                 packing small things when possible, and rolling the packs up to the big ones
                 over time).
                 """
                 limits = list(
                     sorted((util.sizetoint(s) for s in opts[b'generations']), reverse=True)
                 )
                 limits.append(0)
                 # Group the packs by generation (i.e. by size)
                 generations = []
                 for i in range(len(limits)):
                     generations.append([])
                 sizes = {}
                 for prefix, mode, stat in files:
                     size = stat.st_size
                     if size > opts[b'repackmaxpacksize']:
                         continue
                     sizes[prefix] = size
                     for i, limit in enumerate(limits):
                         if size > limit:
                             generations[i].append(prefix)
                             break
                 # Steps for picking what packs to repack:
                 # 1. Pick the largest generation with > gencountlimit pack files.
                 # 2. Take the smallest three packs.
                 # 3. While total-size-of-packs < repacksizelimit: add another pack
                 # Find the largest generation with more than gencountlimit packs
                 genpacks = []
                 for i, limit in enumerate(limits):
                     if len(generations[i]) > opts[b'gencountlimit']:
                         # Sort to be smallest last, for easy popping later
                         genpacks.extend(
                             sorted(generations[i], reverse=True, key=lambda x: sizes[x])
                         )
                         break
                 # Take as many packs from the generation as we can
                 chosenpacks = genpacks[-3:]
                 genpacks = genpacks[:-3]
                 repacksize = sum(sizes[n] for n in chosenpacks)
                 while (
                     repacksize < opts[b'repacksizelimit']
                     and genpacks
                     and len(chosenpacks) < opts[b'maxrepackpacks']
                 ):
                     chosenpacks.append(genpacks.pop())
                     repacksize += sizes[chosenpacks[-1]]
                 return chosenpacks
             def _runrepack(
                 repo, data, history, packpath, category, fullhistory=None, options=None
             ):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 def isold(repo, filename, node):
                     """Check if the file node is older than a limit.
                     Unless a limit is specified in the config the default limit is taken.
                     """
                     filectx = repo.filectx(filename, fileid=node)
                     filetime = repo[filectx.linkrev()].date()
                     ttl = repo.ui.configint(b'remotefilelog', b'nodettl')
                     limit = time.time() - ttl
                     return filetime[0] < limit
                 garbagecollect = repo.ui.configbool(b'remotefilelog', b'gcrepack')
                 if not fullhistory:
                     fullhistory = history
                 packer = repacker(
                     repo,
                     data,
                     history,
                     fullhistory,
                     category,
                     gc=garbagecollect,
                     isold=isold,
                     options=options,
                 )
                 with datapack.mutabledatapack(repo.ui, packpath) as dpack:
                     with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
                         try:
                             packer.run(dpack, hpack)
                         except error.LockHeld:
                             raise RepackAlreadyRunning(
                                 _(
                                     b"skipping repack - another repack "
                                     b"is already running"
                                 )
                             )
             def keepset(repo, keyfn, lastkeepkeys=None):
                 """Computes a keepset which is not garbage collected.
                 'keyfn' is a function that maps filename, node to a unique key.
                 'lastkeepkeys' is an optional argument and if provided the keepset
                 function updates lastkeepkeys with more keys and returns the result.
                 """
                 if not lastkeepkeys:
                     keepkeys = set()
                 else:
                     keepkeys = lastkeepkeys
                 # We want to keep:
                 # 1. Working copy parent
                 # 2. Draft commits
                 # 3. Parents of draft commits
                 # 4. Pullprefetch and bgprefetchrevs revsets if specified
                 revs = [b'.', b'draft()', b'parents(draft())']
                 prefetchrevs = repo.ui.config(b'remotefilelog', b'pullprefetch', None)
                 if prefetchrevs:
                     revs.append(b'(%s)' % prefetchrevs)
                 prefetchrevs = repo.ui.config(b'remotefilelog', b'bgprefetchrevs', None)
                 if prefetchrevs:
                     revs.append(b'(%s)' % prefetchrevs)
                 revs = b'+'.join(revs)
                 revs = [b'sort((%s), "topo")' % revs]
                 keep = scmutil.revrange(repo, revs)
                 processed = set()
                 lastmanifest = None
                 # process the commits in toposorted order starting from the oldest
                 for r in reversed(keep._list):
-                    if repo[r].p1().rev() in processed:
+                    delta_from, m = repo[r].manifestctx().read_any_fast_delta(processed)
-                        # if the direct parent has already been processed
+                    if delta_from is None and lastmanifest is not None:
-                        # then we only need to process the delta
+                        # could not find a delta, compute one.
-                        m = repo[r].manifestctx().readdelta()
+                        # XXX (is this really faster?)
-                    else:
+                        full = m
-                        # otherwise take the manifest and diff it
-                        # with the previous manifest if one exists
                         if lastmanifest:
-                            m = repo[r].manifest().diff(lastmanifest)
+                            m = m.diff(lastmanifest)
-                        else:
+                        lastmanifest = full
-                            m = repo[r].manifest()
-                    lastmanifest = repo[r].manifest()
                     processed.add(r)
                     # populate keepkeys with keys from the current manifest
                     if type(m) is dict:
                         # m is a result of diff of two manifests and is a dictionary that
                         # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
                         for filename, diff in m.items():
                             if diff[0][0] is not None:
                                 keepkeys.add(keyfn(filename, diff[0][0]))
                     else:
                         # m is a manifest object
                         for filename, filenode in m.items():
                             keepkeys.add(keyfn(filename, filenode))
                 return keepkeys
             class repacker:
                 """Class for orchestrating the repack of data and history information into a
                 new format.
                 """
                 def __init__(
                     self,
                     repo,
                     data,
                     history,
                     fullhistory,
                     category,
                     gc=False,
                     isold=None,
                     options=None,
                 ):
                     self.repo = repo
                     self.data = data
                     self.history = history
                     self.fullhistory = fullhistory
                     self.unit = constants.getunits(category)
                     self.garbagecollect = gc
                     self.options = options
                     if self.garbagecollect:
                         if not isold:
                             raise ValueError(b"Function 'isold' is not properly specified")
                         # use (filename, node) tuple as a keepset key
                         self.keepkeys = keepset(repo, lambda f, n: (f, n))
                         self.isold = isold
                 def run(self, targetdata, targethistory):
                     ledger = repackledger()
                     with lockmod.lock(
                         repacklockvfs(self.repo), b"repacklock", desc=None, timeout=0
                     ):
                         self.repo.hook(b'prerepack')
                         # Populate ledger from source
                         self.data.markledger(ledger, options=self.options)
                         self.history.markledger(ledger, options=self.options)
                         # Run repack
                         self.repackdata(ledger, targetdata)
                         self.repackhistory(ledger, targethistory)
                         # Call cleanup on each source
                         for source in ledger.sources:
                             source.cleanup(ledger)
                 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
                     """Reorderes ``orphans`` into a single chain inside ``nodes`` and
                     ``deltabases``.
                     We often have orphan entries (nodes without a base that aren't
                     referenced by other nodes -- i.e., part of a chain) due to gaps in
                     history. Rather than store them as individual fulltexts, we prefer to
                     insert them as one chain sorted by size.
                     """
                     if not orphans:
                         return nodes
                     def getsize(node, default=0):
                         meta = self.data.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             return meta[constants.METAKEYSIZE]
                         else:
                             return default
                     # Sort orphans by size; biggest first is preferred, since it's more
                     # likely to be the newest version assuming files grow over time.
                     # (Sort by node first to ensure the sort is stable.)
                     orphans = sorted(orphans)
                     orphans = list(sorted(orphans, key=getsize, reverse=True))
                     if ui.debugflag:
                         ui.debug(
                             b"%s: orphan chain: %s\n"
                             % (filename, b", ".join([short(s) for s in orphans]))
                         )
                     # Create one contiguous chain and reassign deltabases.
                     for i, node in enumerate(orphans):
                         if i == 0:
                             deltabases[node] = (self.repo.nullid, 0)
                         else:
                             parent = orphans[i - 1]
                             deltabases[node] = (parent, deltabases[parent][1] + 1)
                     nodes = [n for n in nodes if n not in orphans]
                     nodes += orphans
                     return nodes
                 def repackdata(self, ledger, target):
                     ui = self.repo.ui
                     maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000)
                     byfile = {}
                     for entry in ledger.entries.values():
                         if entry.datasource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     repackprogress = ui.makeprogress(
                         _(b"repacking data"), unit=self.unit, total=len(byfile)
                     )
                     for filename, entries in sorted(byfile.items()):
                         repackprogress.update(count)
                         ancestors = {}
                         nodes = list(node for node in entries)
                         nohistory = []
                         buildprogress = ui.makeprogress(
                             _(b"building history"), unit=b'nodes', total=len(nodes)
                         )
                         for i, node in enumerate(nodes):
                             if node in ancestors:
                                 continue
                             buildprogress.update(i)
                             try:
                                 ancestors.update(
                                     self.fullhistory.getancestors(
                                         filename, node, known=ancestors
                                     )
                                 )
                             except KeyError:
                                 # Since we're packing data entries, we may not have the
                                 # corresponding history entries for them. It's not a big
                                 # deal, but the entries won't be delta'd perfectly.
                                 nohistory.append(node)
                         buildprogress.complete()
                         # Order the nodes children first, so we can produce reverse deltas
                         orderednodes = list(reversed(self._toposort(ancestors)))
                         if len(nohistory) > 0:
                             ui.debug(
                                 b'repackdata: %d nodes without history\n' % len(nohistory)
                             )
                         orderednodes.extend(sorted(nohistory))
                         # Filter orderednodes to just the nodes we want to serialize (it
                         # currently also has the edge nodes' ancestors).
                         orderednodes = list(
                             filter(lambda node: node in nodes, orderednodes)
                         )
                         # Garbage collect old nodes:
                         if self.garbagecollect:
                             neworderednodes = []
                             for node in orderednodes:
                                 # If the node is old and is not in the keepset, we skip it,
                                 # and mark as garbage collected
                                 if (filename, node) not in self.keepkeys and self.isold(
                                     self.repo, filename, node
                                 ):
                                     entries[node].gced = True
                                     continue
                                 neworderednodes.append(node)
                             orderednodes = neworderednodes
                         # Compute delta bases for nodes:
                         deltabases = {}
                         nobase = set()
                         referenced = set()
                         nodes = set(nodes)
                         processprogress = ui.makeprogress(
                             _(b"processing nodes"), unit=b'nodes', total=len(orderednodes)
                         )
                         for i, node in enumerate(orderednodes):
                             processprogress.update(i)
                             # Find delta base
                             # TODO: allow delta'ing against most recent descendant instead
                             # of immediate child
                             deltatuple = deltabases.get(node, None)
                             if deltatuple is None:
                                 deltabase, chainlen = self.repo.nullid, 0
                                 deltabases[node] = (self.repo.nullid, 0)
                                 nobase.add(node)
                             else:
                                 deltabase, chainlen = deltatuple
                                 referenced.add(deltabase)
                             # Use available ancestor information to inform our delta choices
                             ancestorinfo = ancestors.get(node)
                             if ancestorinfo:
                                 p1, p2, linknode, copyfrom = ancestorinfo
                                 # The presence of copyfrom means we're at a point where the
                                 # file was copied from elsewhere. So don't attempt to do any
                                 # deltas with the other file.
                                 if copyfrom:
                                     p1 = self.repo.nullid
                                 if chainlen < maxchainlen:
                                     # Record this child as the delta base for its parents.
                                     # This may be non optimal, since the parents may have
                                     # many children, and this will only choose the last one.
                                     # TODO: record all children and try all deltas to find
                                     # best
                                     if p1 != self.repo.nullid:
                                         deltabases[p1] = (node, chainlen + 1)
                                     if p2 != self.repo.nullid:
                                         deltabases[p2] = (node, chainlen + 1)
                         # experimental config: repack.chainorphansbysize
                         if ui.configbool(b'repack', b'chainorphansbysize'):
                             orphans = nobase - referenced
                             orderednodes = self._chainorphans(
                                 ui, filename, orderednodes, orphans, deltabases
                             )
                         # Compute deltas and write to the pack
                         for i, node in enumerate(orderednodes):
                             deltabase, chainlen = deltabases[node]
                             # Compute delta
                             # TODO: Optimize the deltachain fetching. Since we're
                             # iterating over the different version of the file, we may
                             # be fetching the same deltachain over and over again.
                             if deltabase != self.repo.nullid:
                                 deltaentry = self.data.getdelta(filename, node)
                                 delta, deltabasename, origdeltabase, meta = deltaentry
                                 size = meta.get(constants.METAKEYSIZE)
                                 if (
                                     deltabasename != filename
                                     or origdeltabase != deltabase
                                     or size is None
                                 ):
                                     deltabasetext = self.data.get(filename, deltabase)
                                     original = self.data.get(filename, node)
                                     size = len(original)
                                     delta = mdiff.textdiff(deltabasetext, original)
                             else:
                                 delta = self.data.get(filename, node)
                                 size = len(delta)
                                 meta = self.data.getmeta(filename, node)
                             # TODO: don't use the delta if it's larger than the fulltext
                             if constants.METAKEYSIZE not in meta:
                                 meta[constants.METAKEYSIZE] = size
                             target.add(filename, node, deltabase, delta, meta)
                             entries[node].datarepacked = True
                         processprogress.complete()
                         count += 1
                     repackprogress.complete()
                     target.close(ledger=ledger)
                 def repackhistory(self, ledger, target):
                     ui = self.repo.ui
                     byfile = {}
                     for entry in ledger.entries.values():
                         if entry.historysource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     progress = ui.makeprogress(
                         _(b"repacking history"), unit=self.unit, total=len(byfile)
                     )
                     for filename, entries in sorted(byfile.items()):
                         ancestors = {}
                         nodes = list(node for node in entries)
                         for node in nodes:
                             if node in ancestors:
                                 continue
                             ancestors.update(
                                 self.history.getancestors(filename, node, known=ancestors)
                             )
                         # Order the nodes children first
                         orderednodes = reversed(self._toposort(ancestors))
                         # Write to the pack
                         dontprocess = set()
                         for node in orderednodes:
                             p1, p2, linknode, copyfrom = ancestors[node]
                             # If the node is marked dontprocess, but it's also in the
                             # explicit entries set, that means the node exists both in this
                             # file and in another file that was copied to this file.
                             # Usually this happens if the file was copied to another file,
                             # then the copy was deleted, then reintroduced without copy
                             # metadata. The original add and the new add have the same hash
                             # since the content is identical and the parents are null.
                             if node in dontprocess and node not in entries:
                                 # If copyfrom == filename, it means the copy history
                                 # went to come other file, then came back to this one, so we
                                 # should continue processing it.
                                 if p1 != self.repo.nullid and copyfrom != filename:
                                     dontprocess.add(p1)
                                 if p2 != self.repo.nullid:
                                     dontprocess.add(p2)
                                 continue
                             if copyfrom:
                                 dontprocess.add(p1)
                             target.add(filename, node, p1, p2, linknode, copyfrom)
                             if node in entries:
                                 entries[node].historyrepacked = True
                         progress.increment()
                     progress.complete()
                     target.close(ledger=ledger)
                 def _toposort(self, ancestors):
                     def parentfunc(node):
                         p1, p2, linknode, copyfrom = ancestors[node]
                         parents = []
                         if p1 != self.repo.nullid:
                             parents.append(p1)
                         if p2 != self.repo.nullid:
                             parents.append(p2)
                         return parents
                     sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
                     return sortednodes
             class repackledger:
                 """Storage for all the bookkeeping that happens during a repack. It contains
                 the list of revisions being repacked, what happened to each revision, and
                 which source store contained which revision originally (for later cleanup).
                 """
                 def __init__(self):
                     self.entries = {}
                     self.sources = {}
                     self.created = set()
                 def markdataentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a data rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.datasource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def markhistoryentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a history rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.historysource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def _getorcreateentry(self, filename, node):
                     key = (filename, node)
                     value = self.entries.get(key)
                     if not value:
                         value = repackentry(filename, node)
                         self.entries[key] = value
                     return value
                 def addcreated(self, value):
                     self.created.add(value)
             class repackentry:
                 """Simple class representing a single revision entry in the repackledger."""
                 __slots__ = (
                     'filename',
                     'node',
                     'datasource',
                     'historysource',
                     'datarepacked',
                     'historyrepacked',
                     'gced',
                 )
                 def __init__(self, filename, node):
                     self.filename = filename
                     self.node = node
                     # If the revision has a data entry in the source
                     self.datasource = False
                     # If the revision has a history entry in the source
                     self.historysource = False
                     # If the revision's data entry was repacked into the repack target
                     self.datarepacked = False
                     # If the revision's history entry was repacked into the repack target
                     self.historyrepacked = False
                     # If garbage collected
                     self.gced = False
             def repacklockvfs(repo):
                 if hasattr(repo, 'name'):
                     # Lock in the shared cache so repacks across multiple copies of the same
                     # repo are coordinated.
                     sharedcachepath = shallowutil.getcachepackpath(
                         repo, constants.FILEPACK_CATEGORY
                     )
                     return vfs.vfs(sharedcachepath)
                 else:
                     return repo.svfs