upstream/mercurial-mirror Commit - r41291:60b3edcc

1

from __future__ import absolute_import

1

from __future__ import absolute_import

2

3

import os

3

import os

4

import time

4

import time

5

6

from mercurial.i18n import _

6

from mercurial.i18n import _

7

from mercurial.node import (

7

from mercurial.node import (

8

nullid,

8

nullid,

9

short,

9

short,

10

)

10

)

11

from mercurial import (

11

from mercurial import (

12

encoding,

12

encoding,

13

error,

13

error,

14

mdiff,

14

mdiff,

15

policy,

15

policy,

16

pycompat,

16

pycompat,

17

scmutil,

17

scmutil,

18

util,

18

util,

19

vfs,

19

vfs,

20

)

20

)

21

from mercurial.utils import procutil

21

from mercurial.utils import procutil

22

from . import (

22

from . import (

23

constants,

23

constants,

24

contentstore,

24

contentstore,

25

datapack,

25

datapack,

26

extutil,

26

extutil,

27

historypack,

27

historypack,

28

metadatastore,

28

metadatastore,

29

shallowutil,

29

shallowutil,

30

)

30

)

31

32

osutil = policy.importmod(r'osutil')

32

osutil = policy.importmod(r'osutil')

33

34

class RepackAlreadyRunning(error.Abort):

34

class RepackAlreadyRunning(error.Abort):

35

pass

35

pass

36

37

def backgroundrepack(repo, incremental=True, packsonly=False):

37

def backgroundrepack(repo, incremental=True, packsonly=False):

38

cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']

38

cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']

39

msg = _("(running background repack)\n")

39

msg = _("(running background repack)\n")

40

if incremental:

40

if incremental:

41

cmd.append('--incremental')

41

cmd.append('--incremental')

42

msg = _("(running background incremental repack)\n")

42

msg = _("(running background incremental repack)\n")

43

if packsonly:

43

if packsonly:

44

cmd.append('--packsonly')

44

cmd.append('--packsonly')

45

repo.ui.warn(msg)

45

repo.ui.warn(msg)

46

procutil.runbgcommand(cmd, encoding.environ)

46

procutil.runbgcommand(cmd, encoding.environ)

47

48

def fullrepack(repo, options=None):

48

def fullrepack(repo, options=None):

49

"""If ``packsonly`` is True, stores creating only loose objects are skipped.

49

"""If ``packsonly`` is True, stores creating only loose objects are skipped.

50

"""

50

"""

51

if util.safehasattr(repo, 'shareddatastores'):

51

if util.safehasattr(repo, 'shareddatastores'):

52

datasource = contentstore.unioncontentstore(

52

datasource = contentstore.unioncontentstore(

53

*repo.shareddatastores)

53

*repo.shareddatastores)

54

historysource = metadatastore.unionmetadatastore(

54

historysource = metadatastore.unionmetadatastore(

55

*repo.sharedhistorystores,

55

*repo.sharedhistorystores,

56

allowincomplete=True)

56

allowincomplete=True)

57

58

packpath = shallowutil.getcachepackpath(

58

packpath = shallowutil.getcachepackpath(

59

repo,

59

repo,

60

constants.FILEPACK_CATEGORY)

60

constants.FILEPACK_CATEGORY)

61

_runrepack(repo, datasource, historysource, packpath,

61

_runrepack(repo, datasource, historysource, packpath,

62

constants.FILEPACK_CATEGORY, options=options)

62

constants.FILEPACK_CATEGORY, options=options)

63

64

if util.safehasattr(repo.manifestlog, 'datastore'):

64

if util.safehasattr(repo.manifestlog, 'datastore'):

65

localdata, shareddata = _getmanifeststores(repo)

65

localdata, shareddata = _getmanifeststores(repo)

66

lpackpath, ldstores, lhstores = localdata

66

lpackpath, ldstores, lhstores = localdata

67

spackpath, sdstores, shstores = shareddata

67

spackpath, sdstores, shstores = shareddata

68

69

# Repack the shared manifest store

69

# Repack the shared manifest store

70

datasource = contentstore.unioncontentstore(*sdstores)

70

datasource = contentstore.unioncontentstore(*sdstores)

71

historysource = metadatastore.unionmetadatastore(

71

historysource = metadatastore.unionmetadatastore(

72

*shstores,

72

*shstores,

73

allowincomplete=True)

73

allowincomplete=True)

74

_runrepack(repo, datasource, historysource, spackpath,

74

_runrepack(repo, datasource, historysource, spackpath,

75

constants.TREEPACK_CATEGORY, options=options)

75

constants.TREEPACK_CATEGORY, options=options)

76

77

# Repack the local manifest store

77

# Repack the local manifest store

78

datasource = contentstore.unioncontentstore(

78

datasource = contentstore.unioncontentstore(

79

*ldstores,

79

*ldstores,

80

allowincomplete=True)

80

allowincomplete=True)

81

historysource = metadatastore.unionmetadatastore(

81

historysource = metadatastore.unionmetadatastore(

82

*lhstores,

82

*lhstores,

83

allowincomplete=True)

83

allowincomplete=True)

84

_runrepack(repo, datasource, historysource, lpackpath,

84

_runrepack(repo, datasource, historysource, lpackpath,

85

constants.TREEPACK_CATEGORY, options=options)

85

constants.TREEPACK_CATEGORY, options=options)

86

87

def incrementalrepack(repo, options=None):

87

def incrementalrepack(repo, options=None):

88

"""This repacks the repo by looking at the distribution of pack files in the

88

"""This repacks the repo by looking at the distribution of pack files in the

89

repo and performing the most minimal repack to keep the repo in good shape.

89

repo and performing the most minimal repack to keep the repo in good shape.

90

"""

90

"""

91

if util.safehasattr(repo, 'shareddatastores'):

91

if util.safehasattr(repo, 'shareddatastores'):

92

packpath = shallowutil.getcachepackpath(

92

packpath = shallowutil.getcachepackpath(

93

repo,

93

repo,

94

constants.FILEPACK_CATEGORY)

94

constants.FILEPACK_CATEGORY)

95

_incrementalrepack(repo,

95

_incrementalrepack(repo,

96

repo.shareddatastores,

96

repo.shareddatastores,

97

repo.sharedhistorystores,

97

repo.sharedhistorystores,

98

packpath,

98

packpath,

99

constants.FILEPACK_CATEGORY,

99

constants.FILEPACK_CATEGORY,

100

options=options)

100

options=options)

101

102

if util.safehasattr(repo.manifestlog, 'datastore'):

102

if util.safehasattr(repo.manifestlog, 'datastore'):

103

localdata, shareddata = _getmanifeststores(repo)

103

localdata, shareddata = _getmanifeststores(repo)

104

lpackpath, ldstores, lhstores = localdata

104

lpackpath, ldstores, lhstores = localdata

105

spackpath, sdstores, shstores = shareddata

105

spackpath, sdstores, shstores = shareddata

106

107

# Repack the shared manifest store

107

# Repack the shared manifest store

108

_incrementalrepack(repo,

108

_incrementalrepack(repo,

109

sdstores,

109

sdstores,

110

shstores,

110

shstores,

111

spackpath,

111

spackpath,

112

constants.TREEPACK_CATEGORY,

112

constants.TREEPACK_CATEGORY,

113

options=options)

113

options=options)

114

115

# Repack the local manifest store

115

# Repack the local manifest store

116

_incrementalrepack(repo,

116

_incrementalrepack(repo,

117

ldstores,

117

ldstores,

118

lhstores,

118

lhstores,

119

lpackpath,

119

lpackpath,

120

constants.TREEPACK_CATEGORY,

120

constants.TREEPACK_CATEGORY,

121

allowincompletedata=True,

121

allowincompletedata=True,

122

options=options)

122

options=options)

123

124

def _getmanifeststores(repo):

124

def _getmanifeststores(repo):

125

shareddatastores = repo.manifestlog.shareddatastores

125

shareddatastores = repo.manifestlog.shareddatastores

126

localdatastores = repo.manifestlog.localdatastores

126

localdatastores = repo.manifestlog.localdatastores

127

sharedhistorystores = repo.manifestlog.sharedhistorystores

127

sharedhistorystores = repo.manifestlog.sharedhistorystores

128

localhistorystores = repo.manifestlog.localhistorystores

128

localhistorystores = repo.manifestlog.localhistorystores

129

130

sharedpackpath = shallowutil.getcachepackpath(repo,

130

sharedpackpath = shallowutil.getcachepackpath(repo,

131

constants.TREEPACK_CATEGORY)

131

constants.TREEPACK_CATEGORY)

132

localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,

132

localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,

133

constants.TREEPACK_CATEGORY)

133

constants.TREEPACK_CATEGORY)

134

135

return ((localpackpath, localdatastores, localhistorystores),

135

return ((localpackpath, localdatastores, localhistorystores),

136

(sharedpackpath, shareddatastores, sharedhistorystores))

136

(sharedpackpath, shareddatastores, sharedhistorystores))

137

138

def _topacks(packpath, files, constructor):

138

def _topacks(packpath, files, constructor):

139

paths = list(os.path.join(packpath, p) for p in files)

139

paths = list(os.path.join(packpath, p) for p in files)

140

packs = list(constructor(p) for p in paths)

140

packs = list(constructor(p) for p in paths)

141

return packs

141

return packs

142

143

def _deletebigpacks(repo, folder, files):

143

def _deletebigpacks(repo, folder, files):

144

"""Deletes packfiles that are bigger than ``packs.maxpacksize``.

144

"""Deletes packfiles that are bigger than ``packs.maxpacksize``.

145

146

Returns ``files` with the removed files omitted."""

146

Returns ``files` with the removed files omitted."""

147

maxsize = repo.ui.configbytes("packs", "maxpacksize")

147

maxsize = repo.ui.configbytes("packs", "maxpacksize")

148

if maxsize <= 0:

148

if maxsize <= 0:

149

return files

149

return files

150

151

# This only considers datapacks today, but we could broaden it to include

151

# This only considers datapacks today, but we could broaden it to include

152

# historypacks.

152

# historypacks.

153

VALIDEXTS = [".datapack", ".dataidx"]

153

VALIDEXTS = [".datapack", ".dataidx"]

154

155

# Either an oversize index or datapack will trigger cleanup of the whole

155

# Either an oversize index or datapack will trigger cleanup of the whole

156

# pack:

156

# pack:

157

oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files

157

oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files

158

if (stat.st_size > maxsize and (os.path.splitext(path)[1]

158

if (stat.st_size > maxsize and (os.path.splitext(path)[1]

159

in VALIDEXTS))])

159

in VALIDEXTS))])

160

161

for rootfname in oversized:

161

for rootfname in oversized:

162

rootpath = os.path.join(folder, rootfname)

162

rootpath = os.path.join(folder, rootfname)

163

for ext in VALIDEXTS:

163

for ext in VALIDEXTS:

164

path = rootpath + ext

164

path = rootpath + ext

165

repo.ui.debug('removing oversize packfile %s (%s)\n' %

165

repo.ui.debug('removing oversize packfile %s (%s)\n' %

166

(path, util.bytecount(os.stat(path).st_size)))

166

(path, util.bytecount(os.stat(path).st_size)))

167

os.unlink(path)

167

os.unlink(path)

168

return [row for row in files if os.path.basename(row[0]) not in oversized]

168

return [row for row in files if os.path.basename(row[0]) not in oversized]

169

170

def _incrementalrepack(repo, datastore, historystore, packpath, category,

170

def _incrementalrepack(repo, datastore, historystore, packpath, category,

171

allowincompletedata=False, options=None):

171

allowincompletedata=False, options=None):

172

shallowutil.mkstickygroupdir(repo.ui, packpath)

172

shallowutil.mkstickygroupdir(repo.ui, packpath)

173

174

files = osutil.listdir(packpath, stat=True)

174

files = osutil.listdir(packpath, stat=True)

175

files = _deletebigpacks(repo, packpath, files)

175

files = _deletebigpacks(repo, packpath, files)

176

datapacks = _topacks(packpath,

176

datapacks = _topacks(packpath,

177

_computeincrementaldatapack(repo.ui, files),

177

_computeincrementaldatapack(repo.ui, files),

178

datapack.datapack)

178

datapack.datapack)

179

datapacks.extend(s for s in datastore

179

datapacks.extend(s for s in datastore

180

if not isinstance(s, datapack.datapackstore))

180

if not isinstance(s, datapack.datapackstore))

181

182

historypacks = _topacks(packpath,

182

historypacks = _topacks(packpath,

183

_computeincrementalhistorypack(repo.ui, files),

183

_computeincrementalhistorypack(repo.ui, files),

184

historypack.historypack)

184

historypack.historypack)

185

historypacks.extend(s for s in historystore

185

historypacks.extend(s for s in historystore

186

if not isinstance(s, historypack.historypackstore))

186

if not isinstance(s, historypack.historypackstore))

187

188

# ``allhistory{files,packs}`` contains all known history packs, even ones we

188

# ``allhistory{files,packs}`` contains all known history packs, even ones we

189

# don't plan to repack. They are used during the datapack repack to ensure

189

# don't plan to repack. They are used during the datapack repack to ensure

190

# good ordering of nodes.

190

# good ordering of nodes.

191

allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,

191

allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,

192

historypack.INDEXSUFFIX)

192

historypack.INDEXSUFFIX)

193

allhistorypacks = _topacks(packpath,

193

allhistorypacks = _topacks(packpath,

194

(f for f, mode, stat in allhistoryfiles),

194

(f for f, mode, stat in allhistoryfiles),

195

historypack.historypack)

195

historypack.historypack)

196

allhistorypacks.extend(s for s in historystore

196

allhistorypacks.extend(s for s in historystore

197

if not isinstance(s, historypack.historypackstore))

197

if not isinstance(s, historypack.historypackstore))

198

_runrepack(repo,

198

_runrepack(repo,

199

contentstore.unioncontentstore(

199

contentstore.unioncontentstore(

200

*datapacks,

200

*datapacks,

201

allowincomplete=allowincompletedata),

201

allowincomplete=allowincompletedata),

202

metadatastore.unionmetadatastore(

202

metadatastore.unionmetadatastore(

203

*historypacks,

203

*historypacks,

204

allowincomplete=True),

204

allowincomplete=True),

205

packpath, category,

205

packpath, category,

206

fullhistory=metadatastore.unionmetadatastore(

206

fullhistory=metadatastore.unionmetadatastore(

207

*allhistorypacks,

207

*allhistorypacks,

208

allowincomplete=True),

208

allowincomplete=True),

209

options=options)

209

options=options)

210

211

def _computeincrementaldatapack(ui, files):

211

def _computeincrementaldatapack(ui, files):

212

opts = {

212

opts = {

213

'gencountlimit' : ui.configint(

213

'gencountlimit' : ui.configint(

214

'remotefilelog', 'data.gencountlimit'),

214

'remotefilelog', 'data.gencountlimit'),

215

'generations' : ui.configlist(

215

'generations' : ui.configlist(

216

'remotefilelog', 'data.generations'),

216

'remotefilelog', 'data.generations'),

217

'maxrepackpacks' : ui.configint(

217

'maxrepackpacks' : ui.configint(

218

'remotefilelog', 'data.maxrepackpacks'),

218

'remotefilelog', 'data.maxrepackpacks'),

219

'repackmaxpacksize' : ui.configbytes(

219

'repackmaxpacksize' : ui.configbytes(

220

'remotefilelog', 'data.repackmaxpacksize'),

220

'remotefilelog', 'data.repackmaxpacksize'),

221

'repacksizelimit' : ui.configbytes(

221

'repacksizelimit' : ui.configbytes(

222

'remotefilelog', 'data.repacksizelimit'),

222

'remotefilelog', 'data.repacksizelimit'),

223

}

223

}

224

225

packfiles = _allpackfileswithsuffix(

225

packfiles = _allpackfileswithsuffix(

226

files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)

226

files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)

227

return _computeincrementalpack(packfiles, opts)

227

return _computeincrementalpack(packfiles, opts)

228

229

def _computeincrementalhistorypack(ui, files):

229

def _computeincrementalhistorypack(ui, files):

230

opts = {

230

opts = {

231

'gencountlimit' : ui.configint(

231

'gencountlimit' : ui.configint(

232

'remotefilelog', 'history.gencountlimit'),

232

'remotefilelog', 'history.gencountlimit'),

233

'generations' : ui.configlist(

233

'generations' : ui.configlist(

234

'remotefilelog', 'history.generations', ['100MB']),

234

'remotefilelog', 'history.generations', ['100MB']),

235

'maxrepackpacks' : ui.configint(

235

'maxrepackpacks' : ui.configint(

236

'remotefilelog', 'history.maxrepackpacks'),

236

'remotefilelog', 'history.maxrepackpacks'),

237

'repackmaxpacksize' : ui.configbytes(

237

'repackmaxpacksize' : ui.configbytes(

238

'remotefilelog', 'history.repackmaxpacksize', '400MB'),

238

'remotefilelog', 'history.repackmaxpacksize', '400MB'),

239

'repacksizelimit' : ui.configbytes(

239

'repacksizelimit' : ui.configbytes(

240

'remotefilelog', 'history.repacksizelimit'),

240

'remotefilelog', 'history.repacksizelimit'),

241

}

241

}

242

243

packfiles = _allpackfileswithsuffix(

243

packfiles = _allpackfileswithsuffix(

244

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)

244

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)

245

return _computeincrementalpack(packfiles, opts)

245

return _computeincrementalpack(packfiles, opts)

246

247

def _allpackfileswithsuffix(files, packsuffix, indexsuffix):

247

def _allpackfileswithsuffix(files, packsuffix, indexsuffix):

248

result = []

248

result = []

249

fileset = set(fn for fn, mode, stat in files)

249

fileset = set(fn for fn, mode, stat in files)

250

for filename, mode, stat in files:

250

for filename, mode, stat in files:

251

if not filename.endswith(packsuffix):

251

if not filename.endswith(packsuffix):

252

continue

252

continue

253

254

prefix = filename[:-len(packsuffix)]

254

prefix = filename[:-len(packsuffix)]

255

256

# Don't process a pack if it doesn't have an index.

256

# Don't process a pack if it doesn't have an index.

257

if (prefix + indexsuffix) not in fileset:

257

if (prefix + indexsuffix) not in fileset:

258

continue

258

continue

259

result.append((prefix, mode, stat))

259

result.append((prefix, mode, stat))

260

261

return result

261

return result

262

263

def _computeincrementalpack(files, opts):

263

def _computeincrementalpack(files, opts):

264

"""Given a set of pack files along with the configuration options, this

264

"""Given a set of pack files along with the configuration options, this

265

function computes the list of files that should be packed as part of an

265

function computes the list of files that should be packed as part of an

266

incremental repack.

266

incremental repack.

267

268

It tries to strike a balance between keeping incremental repacks cheap (i.e.

268

It tries to strike a balance between keeping incremental repacks cheap (i.e.

269

packing small things when possible, and rolling the packs up to the big ones

269

packing small things when possible, and rolling the packs up to the big ones

270

over time).

270

over time).

271

"""

271

"""

272

273

limits = list(sorted((util.sizetoint(s) for s in opts['generations']),

273

limits = list(sorted((util.sizetoint(s) for s in opts['generations']),

274

reverse=True))

274

reverse=True))

275

limits.append(0)

275

limits.append(0)

276

277

# Group the packs by generation (i.e. by size)

277

# Group the packs by generation (i.e. by size)

278

generations = []

278

generations = []

279

for i in pycompat.xrange(len(limits)):

279

for i in pycompat.xrange(len(limits)):

280

generations.append([])

280

generations.append([])

281

282

sizes = {}

282

sizes = {}

283

for prefix, mode, stat in files:

283

for prefix, mode, stat in files:

284

size = stat.st_size

284

size = stat.st_size

285

if size > opts['repackmaxpacksize']:

285

if size > opts['repackmaxpacksize']:

286

continue

286

continue

287

288

sizes[prefix] = size

288

sizes[prefix] = size

289

for i, limit in enumerate(limits):

289

for i, limit in enumerate(limits):

290

if size > limit:

290

if size > limit:

291

generations[i].append(prefix)

291

generations[i].append(prefix)

292

break

292

break

293

294

# Steps for picking what packs to repack:

294

# Steps for picking what packs to repack:

295

# 1. Pick the largest generation with > gencountlimit pack files.

295

# 1. Pick the largest generation with > gencountlimit pack files.

296

# 2. Take the smallest three packs.

296

# 2. Take the smallest three packs.

297

# 3. While total-size-of-packs < repacksizelimit: add another pack

297

# 3. While total-size-of-packs < repacksizelimit: add another pack

298

299

# Find the largest generation with more than gencountlimit packs

299

# Find the largest generation with more than gencountlimit packs

300

genpacks = []

300

genpacks = []

301

for i, limit in enumerate(limits):

301

for i, limit in enumerate(limits):

302

if len(generations[i]) > opts['gencountlimit']:

302

if len(generations[i]) > opts['gencountlimit']:

303

# Sort to be smallest last, for easy popping later

303

# Sort to be smallest last, for easy popping later

304

genpacks.extend(sorted(generations[i], reverse=True,

304

genpacks.extend(sorted(generations[i], reverse=True,

305

key=lambda x: sizes[x]))

305

key=lambda x: sizes[x]))

306

break

306

break

307

308

# Take as many packs from the generation as we can

308

# Take as many packs from the generation as we can

309

chosenpacks = genpacks[-3:]

309

chosenpacks = genpacks[-3:]

310

genpacks = genpacks[:-3]

310

genpacks = genpacks[:-3]

311

repacksize = sum(sizes[n] for n in chosenpacks)

311

repacksize = sum(sizes[n] for n in chosenpacks)

312

while (repacksize < opts['repacksizelimit'] and genpacks and

312

while (repacksize < opts['repacksizelimit'] and genpacks and

313

len(chosenpacks) < opts['maxrepackpacks']):

313

len(chosenpacks) < opts['maxrepackpacks']):

314

chosenpacks.append(genpacks.pop())

314

chosenpacks.append(genpacks.pop())

315

repacksize += sizes[chosenpacks[-1]]

315

repacksize += sizes[chosenpacks[-1]]

316

317

return chosenpacks

317

return chosenpacks

318

319

def _runrepack(repo, data, history, packpath, category, fullhistory=None,

319

def _runrepack(repo, data, history, packpath, category, fullhistory=None,

320

options=None):

320

options=None):

321

shallowutil.mkstickygroupdir(repo.ui, packpath)

321

shallowutil.mkstickygroupdir(repo.ui, packpath)

322

323

def isold(repo, filename, node):

323

def isold(repo, filename, node):

324

"""Check if the file node is older than a limit.

324

"""Check if the file node is older than a limit.

325

Unless a limit is specified in the config the default limit is taken.

325

Unless a limit is specified in the config the default limit is taken.

326

"""

326

"""

327

filectx = repo.filectx(filename, fileid=node)

327

filectx = repo.filectx(filename, fileid=node)

328

filetime = repo[filectx.linkrev()].date()

328

filetime = repo[filectx.linkrev()].date()

329

330

ttl = repo.ui.configint('remotefilelog', 'nodettl')

330

ttl = repo.ui.configint('remotefilelog', 'nodettl')

331

332

limit = time.time() - ttl

332

limit = time.time() - ttl

333

return filetime[0] < limit

333

return filetime[0] < limit

334

335

garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')

335

garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')

336

if not fullhistory:

336

if not fullhistory:

337

fullhistory = history

337

fullhistory = history

338

packer = repacker(repo, data, history, fullhistory, category,

338

packer = repacker(repo, data, history, fullhistory, category,

339

gc=garbagecollect, isold=isold, options=options)

339

gc=garbagecollect, isold=isold, options=options)

340

341

with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:

341

with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:

342

with historypack.mutablehistorypack(repo.ui, packpath) as hpack:

342

with historypack.mutablehistorypack(repo.ui, packpath) as hpack:

343

try:

343

try:

344

packer.run(dpack, hpack)

344

packer.run(dpack, hpack)

345

except error.LockHeld:

345

except error.LockHeld:

346

raise RepackAlreadyRunning(_("skipping repack - another repack "

346

raise RepackAlreadyRunning(_("skipping repack - another repack "

347

"is already running"))

347

"is already running"))

348

349

def keepset(repo, keyfn, lastkeepkeys=None):

349

def keepset(repo, keyfn, lastkeepkeys=None):

350

"""Computes a keepset which is not garbage collected.

350

"""Computes a keepset which is not garbage collected.

351

'keyfn' is a function that maps filename, node to a unique key.

351

'keyfn' is a function that maps filename, node to a unique key.

352

'lastkeepkeys' is an optional argument and if provided the keepset

352

'lastkeepkeys' is an optional argument and if provided the keepset

353

function updates lastkeepkeys with more keys and returns the result.

353

function updates lastkeepkeys with more keys and returns the result.

354

"""

354

"""

355

if not lastkeepkeys:

355

if not lastkeepkeys:

356

keepkeys = set()

356

keepkeys = set()

357

else:

357

else:

358

keepkeys = lastkeepkeys

358

keepkeys = lastkeepkeys

359

360

# We want to keep:

360

# We want to keep:

361

# 1. Working copy parent

361

# 1. Working copy parent

362

# 2. Draft commits

362

# 2. Draft commits

363

# 3. Parents of draft commits

363

# 3. Parents of draft commits

364

# 4. Pullprefetch and bgprefetchrevs revsets if specified

364

# 4. Pullprefetch and bgprefetchrevs revsets if specified

365

revs = ['.', 'draft()', 'parents(draft())']

365

revs = ['.', 'draft()', 'parents(draft())']

366

prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)

366

prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)

367

if prefetchrevs:

367

if prefetchrevs:

368

revs.append('(%s)' % prefetchrevs)

368

revs.append('(%s)' % prefetchrevs)

369

prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)

369

prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)

370

if prefetchrevs:

370

if prefetchrevs:

371

revs.append('(%s)' % prefetchrevs)

371

revs.append('(%s)' % prefetchrevs)

372

revs = '+'.join(revs)

372

revs = '+'.join(revs)

373

374

revs = ['sort((%s), "topo")' % revs]

374

revs = ['sort((%s), "topo")' % revs]

375

keep = scmutil.revrange(repo, revs)

375

keep = scmutil.revrange(repo, revs)

376

377

processed = set()

377

processed = set()

378

lastmanifest = None

378

lastmanifest = None

379

380

# process the commits in toposorted order starting from the oldest

380

# process the commits in toposorted order starting from the oldest

381

for r in reversed(keep._list):

381

for r in reversed(keep._list):

382

if repo[r].p1().rev() in processed:

382

if repo[r].p1().rev() in processed:

383

# if the direct parent has already been processed

383

# if the direct parent has already been processed

384

# then we only need to process the delta

384

# then we only need to process the delta

385

m = repo[r].manifestctx().readdelta()

385

m = repo[r].manifestctx().readdelta()

386

else:

386

else:

387

# otherwise take the manifest and diff it

387

# otherwise take the manifest and diff it

388

# with the previous manifest if one exists

388

# with the previous manifest if one exists

389

if lastmanifest:

389

if lastmanifest:

390

m = repo[r].manifest().diff(lastmanifest)

390

m = repo[r].manifest().diff(lastmanifest)

391

else:

391

else:

392

m = repo[r].manifest()

392

m = repo[r].manifest()

393

lastmanifest = repo[r].manifest()

393

lastmanifest = repo[r].manifest()

394

processed.add(r)

394

processed.add(r)

395

396

# populate keepkeys with keys from the current manifest

396

# populate keepkeys with keys from the current manifest

397

if type(m) is dict:

397

if type(m) is dict:

398

# m is a result of diff of two manifests and is a dictionary that

398

# m is a result of diff of two manifests and is a dictionary that

399

# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple

399

# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple

400

for filename, diff in m.iteritems():

400

for filename, diff in m.iteritems():

401

if diff[0][0] is not None:

401

if diff[0][0] is not None:

402

keepkeys.add(keyfn(filename, diff[0][0]))

402

keepkeys.add(keyfn(filename, diff[0][0]))

403

else:

403

else:

404

# m is a manifest object

404

# m is a manifest object

405

for filename, filenode in m.iteritems():

405

for filename, filenode in m.iteritems():

406

keepkeys.add(keyfn(filename, filenode))

406

keepkeys.add(keyfn(filename, filenode))

407

408

return keepkeys

408

return keepkeys

409

410

class repacker(object):

410

class repacker(object):

411

"""Class for orchestrating the repack of data and history information into a

411

"""Class for orchestrating the repack of data and history information into a

412

new format.

412

new format.

413

"""

413

"""

414

def __init__(self, repo, data, history, fullhistory, category, gc=False,

414

def __init__(self, repo, data, history, fullhistory, category, gc=False,

415

isold=None, options=None):

415

isold=None, options=None):

416

self.repo = repo

416

self.repo = repo

417

self.data = data

417

self.data = data

418

self.history = history

418

self.history = history

419

self.fullhistory = fullhistory

419

self.fullhistory = fullhistory

420

self.unit = constants.getunits(category)

420

self.unit = constants.getunits(category)

421

self.garbagecollect = gc

421

self.garbagecollect = gc

422

self.options = options

422

self.options = options

423

if self.garbagecollect:

423

if self.garbagecollect:

424

if not isold:

424

if not isold:

425

raise ValueError("Function 'isold' is not properly specified")

425

raise ValueError("Function 'isold' is not properly specified")

426

# use (filename, node) tuple as a keepset key

426

# use (filename, node) tuple as a keepset key

427

self.keepkeys = keepset(repo, lambda f, n : (f, n))

427

self.keepkeys = keepset(repo, lambda f, n : (f, n))

428

self.isold = isold

428

self.isold = isold

429

430

def run(self, targetdata, targethistory):

430

def run(self, targetdata, targethistory):

431

ledger = repackledger()

431

ledger = repackledger()

432

433

with extutil.flock(repacklockvfs(self.repo).join("repacklock"),

433

with extutil.flock(repacklockvfs(self.repo).join("repacklock"),

434

_('repacking %s') % self.repo.origroot, timeout=0):

434

_('repacking %s') % self.repo.origroot, timeout=0):

435

self.repo.hook('prerepack')

435

self.repo.hook('prerepack')

436

437

# Populate ledger from source

437

# Populate ledger from source

438

self.data.markledger(ledger, options=self.options)

438

self.data.markledger(ledger, options=self.options)

439

self.history.markledger(ledger, options=self.options)

439

self.history.markledger(ledger, options=self.options)

440

441

# Run repack

441

# Run repack

442

self.repackdata(ledger, targetdata)

442

self.repackdata(ledger, targetdata)

443

self.repackhistory(ledger, targethistory)

443

self.repackhistory(ledger, targethistory)

444

445

# Call cleanup on each source

445

# Call cleanup on each source

446

for source in ledger.sources:

446

for source in ledger.sources:

447

source.cleanup(ledger)

447

source.cleanup(ledger)

448

449

def _chainorphans(self, ui, filename, nodes, orphans, deltabases):

449

def _chainorphans(self, ui, filename, nodes, orphans, deltabases):

450

"""Reorderes ``orphans`` into a single chain inside ``nodes`` and

450

"""Reorderes ``orphans`` into a single chain inside ``nodes`` and

451

``deltabases``.

451

``deltabases``.

452

453

We often have orphan entries (nodes without a base that aren't

453

We often have orphan entries (nodes without a base that aren't

454

referenced by other nodes -- i.e., part of a chain) due to gaps in

454

referenced by other nodes -- i.e., part of a chain) due to gaps in

455

history. Rather than store them as individual fulltexts, we prefer to

455

history. Rather than store them as individual fulltexts, we prefer to

456

insert them as one chain sorted by size.

456

insert them as one chain sorted by size.

457

"""

457

"""

458

if not orphans:

458

if not orphans:

459

return nodes

459

return nodes

460

461

def getsize(node, default=0):

461

def getsize(node, default=0):

462

meta = self.data.getmeta(filename, node)

462

meta = self.data.getmeta(filename, node)

463

if constants.METAKEYSIZE in meta:

463

if constants.METAKEYSIZE in meta:

464

return meta[constants.METAKEYSIZE]

464

return meta[constants.METAKEYSIZE]

465

else:

465

else:

466

return default

466

return default

467

468

# Sort orphans by size; biggest first is preferred, since it's more

468

# Sort orphans by size; biggest first is preferred, since it's more

469

# likely to be the newest version assuming files grow over time.

469

# likely to be the newest version assuming files grow over time.

470

# (Sort by node first to ensure the sort is stable.)

470

# (Sort by node first to ensure the sort is stable.)

471

orphans = sorted(orphans)

471

orphans = sorted(orphans)

472

orphans = list(sorted(orphans, key=getsize, reverse=True))

472

orphans = list(sorted(orphans, key=getsize, reverse=True))

473

if ui.debugflag:

473

if ui.debugflag:

474

ui.debug("%s: orphan chain: %s\n" % (filename,

474

ui.debug("%s: orphan chain: %s\n" % (filename,

475

", ".join([short(s) for s in orphans])))

475

", ".join([short(s) for s in orphans])))

476

477

# Create one contiguous chain and reassign deltabases.

477

# Create one contiguous chain and reassign deltabases.

478

for i, node in enumerate(orphans):

478

for i, node in enumerate(orphans):

479

if i == 0:

479

if i == 0:

480

deltabases[node] = (nullid, 0)

480

deltabases[node] = (nullid, 0)

481

else:

481

else:

482

parent = orphans[i - 1]

482

parent = orphans[i - 1]

483

deltabases[node] = (parent, deltabases[parent][1] + 1)

483

deltabases[node] = (parent, deltabases[parent][1] + 1)

484

nodes = ~~filter~~(~~lambda~~ ~~node~~: node not in orphans, ~~nodes~~)

484

nodes = [n for n in nodes if n not in orphans]

485

nodes += orphans

485

nodes += orphans

486

return nodes

486

return nodes

487

488

def repackdata(self, ledger, target):

488

def repackdata(self, ledger, target):

489

ui = self.repo.ui

489

ui = self.repo.ui

490

maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

490

maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

491

492

byfile = {}

492

byfile = {}

493

for entry in ledger.entries.itervalues():

493

for entry in ledger.entries.itervalues():

494

if entry.datasource:

494

if entry.datasource:

495

byfile.setdefault(entry.filename, {})[entry.node] = entry

495

byfile.setdefault(entry.filename, {})[entry.node] = entry

496

497

count = 0

497

count = 0

498

repackprogress = ui.makeprogress(_("repacking data"), unit=self.unit,

498

repackprogress = ui.makeprogress(_("repacking data"), unit=self.unit,

499

total=len(byfile))

499

total=len(byfile))

500

for filename, entries in sorted(byfile.iteritems()):

500

for filename, entries in sorted(byfile.iteritems()):

501

repackprogress.update(count)

501

repackprogress.update(count)

502

503

ancestors = {}

503

ancestors = {}

504

nodes = list(node for node in entries)

504

nodes = list(node for node in entries)

505

nohistory = []

505

nohistory = []

506

buildprogress = ui.makeprogress(_("building history"), unit='nodes',

506

buildprogress = ui.makeprogress(_("building history"), unit='nodes',

507

total=len(nodes))

507

total=len(nodes))

508

for i, node in enumerate(nodes):

508

for i, node in enumerate(nodes):

509

if node in ancestors:

509

if node in ancestors:

510

continue

510

continue

511

buildprogress.update(i)

511

buildprogress.update(i)

512

try:

512

try:

513

ancestors.update(self.fullhistory.getancestors(filename,

513

ancestors.update(self.fullhistory.getancestors(filename,

514

node, known=ancestors))

514

node, known=ancestors))

515

except KeyError:

515

except KeyError:

516

# Since we're packing data entries, we may not have the

516

# Since we're packing data entries, we may not have the

517

# corresponding history entries for them. It's not a big

517

# corresponding history entries for them. It's not a big

518

# deal, but the entries won't be delta'd perfectly.

518

# deal, but the entries won't be delta'd perfectly.

519

nohistory.append(node)

519

nohistory.append(node)

520

buildprogress.complete()

520

buildprogress.complete()

521

522

# Order the nodes children first, so we can produce reverse deltas

522

# Order the nodes children first, so we can produce reverse deltas

523

orderednodes = list(reversed(self._toposort(ancestors)))

523

orderednodes = list(reversed(self._toposort(ancestors)))

524

if len(nohistory) > 0:

524

if len(nohistory) > 0:

525

ui.debug('repackdata: %d nodes without history\n' %

525

ui.debug('repackdata: %d nodes without history\n' %

526

len(nohistory))

526

len(nohistory))

527

orderednodes.extend(sorted(nohistory))

527

orderednodes.extend(sorted(nohistory))

528

529

# Filter orderednodes to just the nodes we want to serialize (it

529

# Filter orderednodes to just the nodes we want to serialize (it

530

# currently also has the edge nodes' ancestors).

530

# currently also has the edge nodes' ancestors).

531

orderednodes = list(filter(lambda node: node in nodes,

531

orderednodes = list(filter(lambda node: node in nodes,

532

orderednodes))

532

orderednodes))

533

534

# Garbage collect old nodes:

534

# Garbage collect old nodes:

535

if self.garbagecollect:

535

if self.garbagecollect:

536

neworderednodes = []

536

neworderednodes = []

537

for node in orderednodes:

537

for node in orderednodes:

538

# If the node is old and is not in the keepset, we skip it,

538

# If the node is old and is not in the keepset, we skip it,

539

# and mark as garbage collected

539

# and mark as garbage collected

540

if ((filename, node) not in self.keepkeys and

540

if ((filename, node) not in self.keepkeys and

541

self.isold(self.repo, filename, node)):

541

self.isold(self.repo, filename, node)):

542

entries[node].gced = True

542

entries[node].gced = True

543

continue

543

continue

544

neworderednodes.append(node)

544

neworderednodes.append(node)

545

orderednodes = neworderednodes

545

orderednodes = neworderednodes

546

547

# Compute delta bases for nodes:

547

# Compute delta bases for nodes:

548

deltabases = {}

548

deltabases = {}

549

nobase = set()

549

nobase = set()

550

referenced = set()

550

referenced = set()

551

nodes = set(nodes)

551

nodes = set(nodes)

552

processprogress = ui.makeprogress(_("processing nodes"),

552

processprogress = ui.makeprogress(_("processing nodes"),

553

unit='nodes',

553

unit='nodes',

554

total=len(orderednodes))

554

total=len(orderednodes))

555

for i, node in enumerate(orderednodes):

555

for i, node in enumerate(orderednodes):

556

processprogress.update(i)

556

processprogress.update(i)

557

# Find delta base

557

# Find delta base

558

# TODO: allow delta'ing against most recent descendant instead

558

# TODO: allow delta'ing against most recent descendant instead

559

# of immediate child

559

# of immediate child

560

deltatuple = deltabases.get(node, None)

560

deltatuple = deltabases.get(node, None)

561

if deltatuple is None:

561

if deltatuple is None:

562

deltabase, chainlen = nullid, 0

562

deltabase, chainlen = nullid, 0

563

deltabases[node] = (nullid, 0)

563

deltabases[node] = (nullid, 0)

564

nobase.add(node)

564

nobase.add(node)

565

else:

565

else:

566

deltabase, chainlen = deltatuple

566

deltabase, chainlen = deltatuple

567

referenced.add(deltabase)

567

referenced.add(deltabase)

568

569

# Use available ancestor information to inform our delta choices

569

# Use available ancestor information to inform our delta choices

570

ancestorinfo = ancestors.get(node)

570

ancestorinfo = ancestors.get(node)

571

if ancestorinfo:

571

if ancestorinfo:

572

p1, p2, linknode, copyfrom = ancestorinfo

572

p1, p2, linknode, copyfrom = ancestorinfo

573

574

# The presence of copyfrom means we're at a point where the

574

# The presence of copyfrom means we're at a point where the

575

# file was copied from elsewhere. So don't attempt to do any

575

# file was copied from elsewhere. So don't attempt to do any

576

# deltas with the other file.

576

# deltas with the other file.

577

if copyfrom:

577

if copyfrom:

578

p1 = nullid

578

p1 = nullid

579

580

if chainlen < maxchainlen:

580

if chainlen < maxchainlen:

581

# Record this child as the delta base for its parents.

581

# Record this child as the delta base for its parents.

582

# This may be non optimal, since the parents may have

582

# This may be non optimal, since the parents may have

583

# many children, and this will only choose the last one.

583

# many children, and this will only choose the last one.

584

# TODO: record all children and try all deltas to find

584

# TODO: record all children and try all deltas to find

585

# best

585

# best

586

if p1 != nullid:

586

if p1 != nullid:

587

deltabases[p1] = (node, chainlen + 1)

587

deltabases[p1] = (node, chainlen + 1)

588

if p2 != nullid:

588

if p2 != nullid:

589

deltabases[p2] = (node, chainlen + 1)

589

deltabases[p2] = (node, chainlen + 1)

590

591

# experimental config: repack.chainorphansbysize

591

# experimental config: repack.chainorphansbysize

592

if ui.configbool('repack', 'chainorphansbysize'):

592

if ui.configbool('repack', 'chainorphansbysize'):

593

orphans = nobase - referenced

593

orphans = nobase - referenced

594

orderednodes = self._chainorphans(ui, filename, orderednodes,

594

orderednodes = self._chainorphans(ui, filename, orderednodes,

595

orphans, deltabases)

595

orphans, deltabases)

596

597

# Compute deltas and write to the pack

597

# Compute deltas and write to the pack

598

for i, node in enumerate(orderednodes):

598

for i, node in enumerate(orderednodes):

599

deltabase, chainlen = deltabases[node]

599

deltabase, chainlen = deltabases[node]

600

# Compute delta

600

# Compute delta

601

# TODO: Optimize the deltachain fetching. Since we're

601

# TODO: Optimize the deltachain fetching. Since we're

602

# iterating over the different version of the file, we may

602

# iterating over the different version of the file, we may

603

# be fetching the same deltachain over and over again.

603

# be fetching the same deltachain over and over again.

604

meta = None

604

meta = None

605

if deltabase != nullid:

605

if deltabase != nullid:

606

deltaentry = self.data.getdelta(filename, node)

606

deltaentry = self.data.getdelta(filename, node)

607

delta, deltabasename, origdeltabase, meta = deltaentry

607

delta, deltabasename, origdeltabase, meta = deltaentry

608

size = meta.get(constants.METAKEYSIZE)

608

size = meta.get(constants.METAKEYSIZE)

609

if (deltabasename != filename or origdeltabase != deltabase

609

if (deltabasename != filename or origdeltabase != deltabase

610

or size is None):

610

or size is None):

611

deltabasetext = self.data.get(filename, deltabase)

611

deltabasetext = self.data.get(filename, deltabase)

612

original = self.data.get(filename, node)

612

original = self.data.get(filename, node)

613

size = len(original)

613

size = len(original)

614

delta = mdiff.textdiff(deltabasetext, original)

614

delta = mdiff.textdiff(deltabasetext, original)

615

else:

615

else:

616

delta = self.data.get(filename, node)

616

delta = self.data.get(filename, node)

617

size = len(delta)

617

size = len(delta)

618

meta = self.data.getmeta(filename, node)

618

meta = self.data.getmeta(filename, node)

619

620

# TODO: don't use the delta if it's larger than the fulltext

620

# TODO: don't use the delta if it's larger than the fulltext

621

if constants.METAKEYSIZE not in meta:

621

if constants.METAKEYSIZE not in meta:

622

meta[constants.METAKEYSIZE] = size

622

meta[constants.METAKEYSIZE] = size

623

target.add(filename, node, deltabase, delta, meta)

623

target.add(filename, node, deltabase, delta, meta)

624

625

entries[node].datarepacked = True

625

entries[node].datarepacked = True

626

627

processprogress.complete()

627

processprogress.complete()

628

count += 1

628

count += 1

629

630

repackprogress.complete()

630

repackprogress.complete()

631

target.close(ledger=ledger)

631

target.close(ledger=ledger)

632

633

def repackhistory(self, ledger, target):

633

def repackhistory(self, ledger, target):

634

ui = self.repo.ui

634

ui = self.repo.ui

635

636

byfile = {}

636

byfile = {}

637

for entry in ledger.entries.itervalues():

637

for entry in ledger.entries.itervalues():

638

if entry.historysource:

638

if entry.historysource:

639

byfile.setdefault(entry.filename, {})[entry.node] = entry

639

byfile.setdefault(entry.filename, {})[entry.node] = entry

640

641

progress = ui.makeprogress(_("repacking history"), unit=self.unit,

641

progress = ui.makeprogress(_("repacking history"), unit=self.unit,

642

total=len(byfile))

642

total=len(byfile))

643

for filename, entries in sorted(byfile.iteritems()):

643

for filename, entries in sorted(byfile.iteritems()):

644

ancestors = {}

644

ancestors = {}

645

nodes = list(node for node in entries)

645

nodes = list(node for node in entries)

646

647

for node in nodes:

647

for node in nodes:

648

if node in ancestors:

648

if node in ancestors:

649

continue

649

continue

650

ancestors.update(self.history.getancestors(filename, node,

650

ancestors.update(self.history.getancestors(filename, node,

651

known=ancestors))

651

known=ancestors))

652

653

# Order the nodes children first

653

# Order the nodes children first

654

orderednodes = reversed(self._toposort(ancestors))

654

orderednodes = reversed(self._toposort(ancestors))

655

656

# Write to the pack

656

# Write to the pack

657

dontprocess = set()

657

dontprocess = set()

658

for node in orderednodes:

658

for node in orderednodes:

659

p1, p2, linknode, copyfrom = ancestors[node]

659

p1, p2, linknode, copyfrom = ancestors[node]

660

661

# If the node is marked dontprocess, but it's also in the

661

# If the node is marked dontprocess, but it's also in the

662

# explicit entries set, that means the node exists both in this

662

# explicit entries set, that means the node exists both in this

663

# file and in another file that was copied to this file.

663

# file and in another file that was copied to this file.

664

# Usually this happens if the file was copied to another file,

664

# Usually this happens if the file was copied to another file,

665

# then the copy was deleted, then reintroduced without copy

665

# then the copy was deleted, then reintroduced without copy

666

# metadata. The original add and the new add have the same hash

666

# metadata. The original add and the new add have the same hash

667

# since the content is identical and the parents are null.

667

# since the content is identical and the parents are null.

668

if node in dontprocess and node not in entries:

668

if node in dontprocess and node not in entries:

669

# If copyfrom == filename, it means the copy history

669

# If copyfrom == filename, it means the copy history

670

# went to come other file, then came back to this one, so we

670

# went to come other file, then came back to this one, so we

671

# should continue processing it.

671

# should continue processing it.

672

if p1 != nullid and copyfrom != filename:

672

if p1 != nullid and copyfrom != filename:

673

dontprocess.add(p1)

673

dontprocess.add(p1)

674

if p2 != nullid:

674

if p2 != nullid:

675

dontprocess.add(p2)

675

dontprocess.add(p2)

676

continue

676

continue

677

678

if copyfrom:

678

if copyfrom:

679

dontprocess.add(p1)

679

dontprocess.add(p1)

680

681

target.add(filename, node, p1, p2, linknode, copyfrom)

681

target.add(filename, node, p1, p2, linknode, copyfrom)

682

683

if node in entries:

683

if node in entries:

684

entries[node].historyrepacked = True

684

entries[node].historyrepacked = True

685

686

progress.increment()

686

progress.increment()

687

688

progress.complete()

688

progress.complete()

689

target.close(ledger=ledger)

689

target.close(ledger=ledger)

690

691

def _toposort(self, ancestors):

691

def _toposort(self, ancestors):

692

def parentfunc(node):

692

def parentfunc(node):

693

p1, p2, linknode, copyfrom = ancestors[node]

693

p1, p2, linknode, copyfrom = ancestors[node]

694

parents = []

694

parents = []

695

if p1 != nullid:

695

if p1 != nullid:

696

parents.append(p1)

696

parents.append(p1)

697

if p2 != nullid:

697

if p2 != nullid:

698

parents.append(p2)

698

parents.append(p2)

699

return parents

699

return parents

700

701

sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)

701

sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)

702

return sortednodes

702

return sortednodes

703

704

class repackledger(object):

704

class repackledger(object):

705

"""Storage for all the bookkeeping that happens during a repack. It contains

705

"""Storage for all the bookkeeping that happens during a repack. It contains

706

the list of revisions being repacked, what happened to each revision, and

706

the list of revisions being repacked, what happened to each revision, and

707

which source store contained which revision originally (for later cleanup).

707

which source store contained which revision originally (for later cleanup).

708

"""

708

"""

709

def __init__(self):

709

def __init__(self):

710

self.entries = {}

710

self.entries = {}

711

self.sources = {}

711

self.sources = {}

712

self.created = set()

712

self.created = set()

713

714

def markdataentry(self, source, filename, node):

714

def markdataentry(self, source, filename, node):

715

"""Mark the given filename+node revision as having a data rev in the

715

"""Mark the given filename+node revision as having a data rev in the

716

given source.

716

given source.

717

"""

717

"""

718

entry = self._getorcreateentry(filename, node)

718

entry = self._getorcreateentry(filename, node)

719

entry.datasource = True

719

entry.datasource = True

720

entries = self.sources.get(source)

720

entries = self.sources.get(source)

721

if not entries:

721

if not entries:

722

entries = set()

722

entries = set()

723

self.sources[source] = entries

723

self.sources[source] = entries

724

entries.add(entry)

724

entries.add(entry)

725

726

def markhistoryentry(self, source, filename, node):

726

def markhistoryentry(self, source, filename, node):

727

"""Mark the given filename+node revision as having a history rev in the

727

"""Mark the given filename+node revision as having a history rev in the

728

given source.

728

given source.

729

"""

729

"""

730

entry = self._getorcreateentry(filename, node)

730

entry = self._getorcreateentry(filename, node)

731

entry.historysource = True

731

entry.historysource = True

732

entries = self.sources.get(source)

732

entries = self.sources.get(source)

733

if not entries:

733

if not entries:

734

entries = set()

734

entries = set()

735

self.sources[source] = entries

735

self.sources[source] = entries

736

entries.add(entry)

736

entries.add(entry)

737

738

def _getorcreateentry(self, filename, node):

738

def _getorcreateentry(self, filename, node):

739

key = (filename, node)

739

key = (filename, node)

740

value = self.entries.get(key)

740

value = self.entries.get(key)

741

if not value:

741

if not value:

742

value = repackentry(filename, node)

742

value = repackentry(filename, node)

743

self.entries[key] = value

743

self.entries[key] = value

744

745

return value

745

return value

746

747

def addcreated(self, value):

747

def addcreated(self, value):

748

self.created.add(value)

748

self.created.add(value)

749

750

class repackentry(object):

750

class repackentry(object):

751

"""Simple class representing a single revision entry in the repackledger.

751

"""Simple class representing a single revision entry in the repackledger.

752

"""

752

"""

753

__slots__ = (r'filename', r'node', r'datasource', r'historysource',

753

__slots__ = (r'filename', r'node', r'datasource', r'historysource',

754

r'datarepacked', r'historyrepacked', r'gced')

754

r'datarepacked', r'historyrepacked', r'gced')

755

def __init__(self, filename, node):

755

def __init__(self, filename, node):

756

self.filename = filename

756

self.filename = filename

757

self.node = node

757

self.node = node

758

# If the revision has a data entry in the source

758

# If the revision has a data entry in the source

759

self.datasource = False

759

self.datasource = False

760

# If the revision has a history entry in the source

760

# If the revision has a history entry in the source

761

self.historysource = False

761

self.historysource = False

762

# If the revision's data entry was repacked into the repack target

762

# If the revision's data entry was repacked into the repack target

763

self.datarepacked = False

763

self.datarepacked = False

764

# If the revision's history entry was repacked into the repack target

764

# If the revision's history entry was repacked into the repack target

765

self.historyrepacked = False

765

self.historyrepacked = False

766

# If garbage collected

766

# If garbage collected

767

self.gced = False

767

self.gced = False

768

769

def repacklockvfs(repo):

769

def repacklockvfs(repo):

770

if util.safehasattr(repo, 'name'):

770

if util.safehasattr(repo, 'name'):

771

# Lock in the shared cache so repacks across multiple copies of the same

771

# Lock in the shared cache so repacks across multiple copies of the same

772

# repo are coordinated.

772

# repo are coordinated.

773

sharedcachepath = shallowutil.getcachepackpath(

773

sharedcachepath = shallowutil.getcachepackpath(

774

repo,

774

repo,

775

constants.FILEPACK_CATEGORY)

775

constants.FILEPACK_CATEGORY)

776

return vfs.vfs(sharedcachepath)

776

return vfs.vfs(sharedcachepath)

777

else:

777

else:

778

return repo.svfs

778

return repo.svfs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             from __future__ import absolute_import
             import os
             import time
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 short,
             )
             from mercurial import (
                 encoding,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 scmutil,
                 util,
                 vfs,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 datapack,
                 extutil,
                 historypack,
                 metadatastore,
                 shallowutil,
             )
             osutil = policy.importmod(r'osutil')
             class RepackAlreadyRunning(error.Abort):
                 pass
             def backgroundrepack(repo, incremental=True, packsonly=False):
                 cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
                 msg = _("(running background repack)\n")
                 if incremental:
                     cmd.append('--incremental')
                     msg = _("(running background incremental repack)\n")
                 if packsonly:
                     cmd.append('--packsonly')
                 repo.ui.warn(msg)
                 procutil.runbgcommand(cmd, encoding.environ)
             def fullrepack(repo, options=None):
                 """If ``packsonly`` is True, stores creating only loose objects are skipped.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     datasource = contentstore.unioncontentstore(
                         *repo.shareddatastores)
                     historysource = metadatastore.unionmetadatastore(
                         *repo.sharedhistorystores,
                         allowincomplete=True)
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _runrepack(repo, datasource, historysource, packpath,
                                constants.FILEPACK_CATEGORY, options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     datasource = contentstore.unioncontentstore(*sdstores)
                     historysource = metadatastore.unionmetadatastore(
                                     *shstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, spackpath,
                                constants.TREEPACK_CATEGORY, options=options)
                     # Repack the local manifest store
                     datasource = contentstore.unioncontentstore(
                                     *ldstores,
                                     allowincomplete=True)
                     historysource = metadatastore.unionmetadatastore(
                                     *lhstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, lpackpath,
                                constants.TREEPACK_CATEGORY, options=options)
             def incrementalrepack(repo, options=None):
                 """This repacks the repo by looking at the distribution of pack files in the
                 repo and performing the most minimal repack to keep the repo in good shape.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _incrementalrepack(repo,
                                        repo.shareddatastores,
                                        repo.sharedhistorystores,
                                        packpath,
                                        constants.FILEPACK_CATEGORY,
                                        options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     _incrementalrepack(repo,
                                        sdstores,
                                        shstores,
                                        spackpath,
                                        constants.TREEPACK_CATEGORY,
                                        options=options)
                     # Repack the local manifest store
                     _incrementalrepack(repo,
                                        ldstores,
                                        lhstores,
                                        lpackpath,
                                        constants.TREEPACK_CATEGORY,
                                        allowincompletedata=True,
                                        options=options)
             def _getmanifeststores(repo):
                 shareddatastores = repo.manifestlog.shareddatastores
                 localdatastores = repo.manifestlog.localdatastores
                 sharedhistorystores = repo.manifestlog.sharedhistorystores
                 localhistorystores = repo.manifestlog.localhistorystores
                 sharedpackpath = shallowutil.getcachepackpath(repo,
                                                         constants.TREEPACK_CATEGORY)
                 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
                                                         constants.TREEPACK_CATEGORY)
                 return ((localpackpath, localdatastores, localhistorystores),
                         (sharedpackpath, shareddatastores, sharedhistorystores))
             def _topacks(packpath, files, constructor):
                 paths = list(os.path.join(packpath, p) for p in files)
                 packs = list(constructor(p) for p in paths)
                 return packs
             def _deletebigpacks(repo, folder, files):
                 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
                 Returns ``files` with the removed files omitted."""
                 maxsize = repo.ui.configbytes("packs", "maxpacksize")
                 if maxsize <= 0:
                     return files
                 # This only considers datapacks today, but we could broaden it to include
                 # historypacks.
                 VALIDEXTS = [".datapack", ".dataidx"]
                 # Either an oversize index or datapack will trigger cleanup of the whole
                 # pack:
                 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
                     if (stat.st_size > maxsize and (os.path.splitext(path)[1]
                                                     in VALIDEXTS))])
                 for rootfname in oversized:
                     rootpath = os.path.join(folder, rootfname)
                     for ext in VALIDEXTS:
                         path = rootpath + ext
                         repo.ui.debug('removing oversize packfile %s (%s)\n' %
                                       (path, util.bytecount(os.stat(path).st_size)))
                         os.unlink(path)
                 return [row for row in files if os.path.basename(row[0]) not in oversized]
             def _incrementalrepack(repo, datastore, historystore, packpath, category,
                     allowincompletedata=False, options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 files = osutil.listdir(packpath, stat=True)
                 files = _deletebigpacks(repo, packpath, files)
                 datapacks = _topacks(packpath,
                     _computeincrementaldatapack(repo.ui, files),
                     datapack.datapack)
                 datapacks.extend(s for s in datastore
                                  if not isinstance(s, datapack.datapackstore))
                 historypacks = _topacks(packpath,
                     _computeincrementalhistorypack(repo.ui, files),
                     historypack.historypack)
                 historypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 # ``allhistory{files,packs}`` contains all known history packs, even ones we
                 # don't plan to repack. They are used during the datapack repack to ensure
                 # good ordering of nodes.
                 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
                                         historypack.INDEXSUFFIX)
                 allhistorypacks = _topacks(packpath,
                     (f for f, mode, stat in allhistoryfiles),
                     historypack.historypack)
                 allhistorypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 _runrepack(repo,
                            contentstore.unioncontentstore(
                                *datapacks,
                                allowincomplete=allowincompletedata),
                            metadatastore.unionmetadatastore(
                                *historypacks,
                                allowincomplete=True),
                            packpath, category,
                            fullhistory=metadatastore.unionmetadatastore(
                                *allhistorypacks,
                                allowincomplete=True),
                             options=options)
             def _computeincrementaldatapack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'data.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'data.generations'),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'data.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'data.repackmaxpacksize'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'data.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _computeincrementalhistorypack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'history.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'history.generations', ['100MB']),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'history.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'history.repackmaxpacksize', '400MB'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'history.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
                 result = []
                 fileset = set(fn for fn, mode, stat in files)
                 for filename, mode, stat in files:
                     if not filename.endswith(packsuffix):
                         continue
                     prefix = filename[:-len(packsuffix)]
                     # Don't process a pack if it doesn't have an index.
                     if (prefix + indexsuffix) not in fileset:
                         continue
                     result.append((prefix, mode, stat))
                 return result
             def _computeincrementalpack(files, opts):
                 """Given a set of pack files along with the configuration options, this
                 function computes the list of files that should be packed as part of an
                 incremental repack.
                 It tries to strike a balance between keeping incremental repacks cheap (i.e.
                 packing small things when possible, and rolling the packs up to the big ones
                 over time).
                 """
                 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
                                             reverse=True))
                 limits.append(0)
                 # Group the packs by generation (i.e. by size)
                 generations = []
                 for i in pycompat.xrange(len(limits)):
                     generations.append([])
                 sizes = {}
                 for prefix, mode, stat in files:
                     size = stat.st_size
                     if size > opts['repackmaxpacksize']:
                         continue
                     sizes[prefix] = size
                     for i, limit in enumerate(limits):
                         if size > limit:
                             generations[i].append(prefix)
                             break
                 # Steps for picking what packs to repack:
                 # 1. Pick the largest generation with > gencountlimit pack files.
                 # 2. Take the smallest three packs.
                 # 3. While total-size-of-packs < repacksizelimit: add another pack
                 # Find the largest generation with more than gencountlimit packs
                 genpacks = []
                 for i, limit in enumerate(limits):
                     if len(generations[i]) > opts['gencountlimit']:
                         # Sort to be smallest last, for easy popping later
                         genpacks.extend(sorted(generations[i], reverse=True,
                                                key=lambda x: sizes[x]))
                         break
                 # Take as many packs from the generation as we can
                 chosenpacks = genpacks[-3:]
                 genpacks = genpacks[:-3]
                 repacksize = sum(sizes[n] for n in chosenpacks)
                 while (repacksize < opts['repacksizelimit'] and genpacks and
                        len(chosenpacks) < opts['maxrepackpacks']):
                     chosenpacks.append(genpacks.pop())
                     repacksize += sizes[chosenpacks[-1]]
                 return chosenpacks
             def _runrepack(repo, data, history, packpath, category, fullhistory=None,
                            options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 def isold(repo, filename, node):
                     """Check if the file node is older than a limit.
                     Unless a limit is specified in the config the default limit is taken.
                     """
                     filectx = repo.filectx(filename, fileid=node)
                     filetime = repo[filectx.linkrev()].date()
                     ttl = repo.ui.configint('remotefilelog', 'nodettl')
                     limit = time.time() - ttl
                     return filetime[0] < limit
                 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
                 if not fullhistory:
                     fullhistory = history
                 packer = repacker(repo, data, history, fullhistory, category,
                                   gc=garbagecollect, isold=isold, options=options)
                 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
                     with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
                         try:
                             packer.run(dpack, hpack)
                         except error.LockHeld:
                             raise RepackAlreadyRunning(_("skipping repack - another repack "
                                                          "is already running"))
             def keepset(repo, keyfn, lastkeepkeys=None):
                 """Computes a keepset which is not garbage collected.
                 'keyfn' is a function that maps filename, node to a unique key.
                 'lastkeepkeys' is an optional argument and if provided the keepset
                 function updates lastkeepkeys with more keys and returns the result.
                 """
                 if not lastkeepkeys:
                     keepkeys = set()
                 else:
                     keepkeys = lastkeepkeys
                 # We want to keep:
                 # 1. Working copy parent
                 # 2. Draft commits
                 # 3. Parents of draft commits
                 # 4. Pullprefetch and bgprefetchrevs revsets if specified
                 revs = ['.', 'draft()', 'parents(draft())']
                 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 revs = '+'.join(revs)
                 revs = ['sort((%s), "topo")' % revs]
                 keep = scmutil.revrange(repo, revs)
                 processed = set()
                 lastmanifest = None
                 # process the commits in toposorted order starting from the oldest
                 for r in reversed(keep._list):
                     if repo[r].p1().rev() in processed:
                         # if the direct parent has already been processed
                         # then we only need to process the delta
                         m = repo[r].manifestctx().readdelta()
                     else:
                         # otherwise take the manifest and diff it
                         # with the previous manifest if one exists
                         if lastmanifest:
                             m = repo[r].manifest().diff(lastmanifest)
                         else:
                             m = repo[r].manifest()
                     lastmanifest = repo[r].manifest()
                     processed.add(r)
                     # populate keepkeys with keys from the current manifest
                     if type(m) is dict:
                         # m is a result of diff of two manifests and is a dictionary that
                         # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
                         for filename, diff in m.iteritems():
                             if diff[0][0] is not None:
                                 keepkeys.add(keyfn(filename, diff[0][0]))
                     else:
                         # m is a manifest object
                         for filename, filenode in m.iteritems():
                             keepkeys.add(keyfn(filename, filenode))
                 return keepkeys
             class repacker(object):
                 """Class for orchestrating the repack of data and history information into a
                 new format.
                 """
                 def __init__(self, repo, data, history, fullhistory, category, gc=False,
                              isold=None, options=None):
                     self.repo = repo
                     self.data = data
                     self.history = history
                     self.fullhistory = fullhistory
                     self.unit = constants.getunits(category)
                     self.garbagecollect = gc
                     self.options = options
                     if self.garbagecollect:
                         if not isold:
                             raise ValueError("Function 'isold' is not properly specified")
                         # use (filename, node) tuple as a keepset key
                         self.keepkeys = keepset(repo, lambda f, n : (f, n))
                         self.isold = isold
                 def run(self, targetdata, targethistory):
                     ledger = repackledger()
                     with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
                                        _('repacking %s') % self.repo.origroot, timeout=0):
                         self.repo.hook('prerepack')
                         # Populate ledger from source
                         self.data.markledger(ledger, options=self.options)
                         self.history.markledger(ledger, options=self.options)
                         # Run repack
                         self.repackdata(ledger, targetdata)
                         self.repackhistory(ledger, targethistory)
                         # Call cleanup on each source
                         for source in ledger.sources:
                             source.cleanup(ledger)
                 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
                     """Reorderes ``orphans`` into a single chain inside ``nodes`` and
                     ``deltabases``.
                     We often have orphan entries (nodes without a base that aren't
                     referenced by other nodes -- i.e., part of a chain) due to gaps in
                     history. Rather than store them as individual fulltexts, we prefer to
                     insert them as one chain sorted by size.
                     """
                     if not orphans:
                         return nodes
                     def getsize(node, default=0):
                         meta = self.data.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             return meta[constants.METAKEYSIZE]
                         else:
                             return default
                     # Sort orphans by size; biggest first is preferred, since it's more
                     # likely to be the newest version assuming files grow over time.
                     # (Sort by node first to ensure the sort is stable.)
                     orphans = sorted(orphans)
                     orphans = list(sorted(orphans, key=getsize, reverse=True))
                     if ui.debugflag:
                         ui.debug("%s: orphan chain: %s\n" % (filename,
                             ", ".join([short(s) for s in orphans])))
                     # Create one contiguous chain and reassign deltabases.
                     for i, node in enumerate(orphans):
                         if i == 0:
                             deltabases[node] = (nullid, 0)
                         else:
                             parent = orphans[i - 1]
                             deltabases[node] = (parent, deltabases[parent][1] + 1)
-                    nodes = filter(lambda node: node not in orphans, nodes)
+                    nodes = [n for n in nodes if n not in orphans]
                     nodes += orphans
                     return nodes
                 def repackdata(self, ledger, target):
                     ui = self.repo.ui
                     maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.datasource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     repackprogress = ui.makeprogress(_("repacking data"), unit=self.unit,
                                                         total=len(byfile))
                     for filename, entries in sorted(byfile.iteritems()):
                         repackprogress.update(count)
                         ancestors = {}
                         nodes = list(node for node in entries)
                         nohistory = []
                         buildprogress = ui.makeprogress(_("building history"), unit='nodes',
                                                         total=len(nodes))
                         for i, node in enumerate(nodes):
                             if node in ancestors:
                                 continue
                             buildprogress.update(i)
                             try:
                                 ancestors.update(self.fullhistory.getancestors(filename,
                                     node, known=ancestors))
                             except KeyError:
                                 # Since we're packing data entries, we may not have the
                                 # corresponding history entries for them. It's not a big
                                 # deal, but the entries won't be delta'd perfectly.
                                 nohistory.append(node)
                         buildprogress.complete()
                         # Order the nodes children first, so we can produce reverse deltas
                         orderednodes = list(reversed(self._toposort(ancestors)))
                         if len(nohistory) > 0:
                             ui.debug('repackdata: %d nodes without history\n' %
                                      len(nohistory))
                         orderednodes.extend(sorted(nohistory))
                         # Filter orderednodes to just the nodes we want to serialize (it
                         # currently also has the edge nodes' ancestors).
                         orderednodes = list(filter(lambda node: node in nodes,
                                             orderednodes))
                         # Garbage collect old nodes:
                         if self.garbagecollect:
                             neworderednodes = []
                             for node in orderednodes:
                                 # If the node is old and is not in the keepset, we skip it,
                                 # and mark as garbage collected
                                 if ((filename, node) not in self.keepkeys and
                                     self.isold(self.repo, filename, node)):
                                     entries[node].gced = True
                                     continue
                                 neworderednodes.append(node)
                             orderednodes = neworderednodes
                         # Compute delta bases for nodes:
                         deltabases = {}
                         nobase = set()
                         referenced = set()
                         nodes = set(nodes)
                         processprogress = ui.makeprogress(_("processing nodes"),
                                                           unit='nodes',
                                                           total=len(orderednodes))
                         for i, node in enumerate(orderednodes):
                             processprogress.update(i)
                             # Find delta base
                             # TODO: allow delta'ing against most recent descendant instead
                             # of immediate child
                             deltatuple = deltabases.get(node, None)
                             if deltatuple is None:
                                 deltabase, chainlen = nullid, 0
                                 deltabases[node] = (nullid, 0)
                                 nobase.add(node)
                             else:
                                 deltabase, chainlen = deltatuple
                                 referenced.add(deltabase)
                             # Use available ancestor information to inform our delta choices
                             ancestorinfo = ancestors.get(node)
                             if ancestorinfo:
                                 p1, p2, linknode, copyfrom = ancestorinfo
                                 # The presence of copyfrom means we're at a point where the
                                 # file was copied from elsewhere. So don't attempt to do any
                                 # deltas with the other file.
                                 if copyfrom:
                                     p1 = nullid
                                 if chainlen < maxchainlen:
                                     # Record this child as the delta base for its parents.
                                     # This may be non optimal, since the parents may have
                                     # many children, and this will only choose the last one.
                                     # TODO: record all children and try all deltas to find
                                     # best
                                     if p1 != nullid:
                                         deltabases[p1] = (node, chainlen + 1)
                                     if p2 != nullid:
                                         deltabases[p2] = (node, chainlen + 1)
                         # experimental config: repack.chainorphansbysize
                         if ui.configbool('repack', 'chainorphansbysize'):
                             orphans = nobase - referenced
                             orderednodes = self._chainorphans(ui, filename, orderednodes,
                                 orphans, deltabases)
                         # Compute deltas and write to the pack
                         for i, node in enumerate(orderednodes):
                             deltabase, chainlen = deltabases[node]
                             # Compute delta
                             # TODO: Optimize the deltachain fetching. Since we're
                             # iterating over the different version of the file, we may
                             # be fetching the same deltachain over and over again.
                             meta = None
                             if deltabase != nullid:
                                 deltaentry = self.data.getdelta(filename, node)
                                 delta, deltabasename, origdeltabase, meta = deltaentry
                                 size = meta.get(constants.METAKEYSIZE)
                                 if (deltabasename != filename or origdeltabase != deltabase
                                     or size is None):
                                     deltabasetext = self.data.get(filename, deltabase)
                                     original = self.data.get(filename, node)
                                     size = len(original)
                                     delta = mdiff.textdiff(deltabasetext, original)
                             else:
                                 delta = self.data.get(filename, node)
                                 size = len(delta)
                                 meta = self.data.getmeta(filename, node)
                             # TODO: don't use the delta if it's larger than the fulltext
                             if constants.METAKEYSIZE not in meta:
                                 meta[constants.METAKEYSIZE] = size
                             target.add(filename, node, deltabase, delta, meta)
                             entries[node].datarepacked = True
                         processprogress.complete()
                         count += 1
                     repackprogress.complete()
                     target.close(ledger=ledger)
                 def repackhistory(self, ledger, target):
                     ui = self.repo.ui
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.historysource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     progress = ui.makeprogress(_("repacking history"), unit=self.unit,
                                                total=len(byfile))
                     for filename, entries in sorted(byfile.iteritems()):
                         ancestors = {}
                         nodes = list(node for node in entries)
                         for node in nodes:
                             if node in ancestors:
                                 continue
                             ancestors.update(self.history.getancestors(filename, node,
                                                                        known=ancestors))
                         # Order the nodes children first
                         orderednodes = reversed(self._toposort(ancestors))
                         # Write to the pack
                         dontprocess = set()
                         for node in orderednodes:
                             p1, p2, linknode, copyfrom = ancestors[node]
                             # If the node is marked dontprocess, but it's also in the
                             # explicit entries set, that means the node exists both in this
                             # file and in another file that was copied to this file.
                             # Usually this happens if the file was copied to another file,
                             # then the copy was deleted, then reintroduced without copy
                             # metadata. The original add and the new add have the same hash
                             # since the content is identical and the parents are null.
                             if node in dontprocess and node not in entries:
                                 # If copyfrom == filename, it means the copy history
                                 # went to come other file, then came back to this one, so we
                                 # should continue processing it.
                                 if p1 != nullid and copyfrom != filename:
                                     dontprocess.add(p1)
                                 if p2 != nullid:
                                     dontprocess.add(p2)
                                 continue
                             if copyfrom:
                                 dontprocess.add(p1)
                             target.add(filename, node, p1, p2, linknode, copyfrom)
                             if node in entries:
                                 entries[node].historyrepacked = True
                         progress.increment()
                     progress.complete()
                     target.close(ledger=ledger)
                 def _toposort(self, ancestors):
                     def parentfunc(node):
                         p1, p2, linknode, copyfrom = ancestors[node]
                         parents = []
                         if p1 != nullid:
                             parents.append(p1)
                         if p2 != nullid:
                             parents.append(p2)
                         return parents
                     sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
                     return sortednodes
             class repackledger(object):
                 """Storage for all the bookkeeping that happens during a repack. It contains
                 the list of revisions being repacked, what happened to each revision, and
                 which source store contained which revision originally (for later cleanup).
                 """
                 def __init__(self):
                     self.entries = {}
                     self.sources = {}
                     self.created = set()
                 def markdataentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a data rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.datasource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def markhistoryentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a history rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.historysource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def _getorcreateentry(self, filename, node):
                     key = (filename, node)
                     value = self.entries.get(key)
                     if not value:
                         value = repackentry(filename, node)
                         self.entries[key] = value
                     return value
                 def addcreated(self, value):
                     self.created.add(value)
             class repackentry(object):
                 """Simple class representing a single revision entry in the repackledger.
                 """
                 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
                              r'datarepacked', r'historyrepacked', r'gced')
                 def __init__(self, filename, node):
                     self.filename = filename
                     self.node = node
                     # If the revision has a data entry in the source
                     self.datasource = False
                     # If the revision has a history entry in the source
                     self.historysource = False
                     # If the revision's data entry was repacked into the repack target
                     self.datarepacked = False
                     # If the revision's history entry was repacked into the repack target
                     self.historyrepacked = False
                     # If garbage collected
                     self.gced = False
             def repacklockvfs(repo):
                 if util.safehasattr(repo, 'name'):
                     # Lock in the shared cache so repacks across multiple copies of the same
                     # repo are coordinated.
                     sharedcachepath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     return vfs.vfs(sharedcachepath)
                 else:
                     return repo.svfs