upstream/mercurial-mirror Commit - r40840:bad0053e

1

from __future__ import absolute_import

1

from __future__ import absolute_import

2

3

import os

3

import os

4

import time

4

import time

5

6

from mercurial.i18n import _

6

from mercurial.i18n import _

7

from mercurial.node import (

7

from mercurial.node import (

8

nullid,

8

nullid,

9

short,

9

short,

10

)

10

)

11

from mercurial import (

11

from mercurial import (

12

encoding,

12

encoding,

13

error,

13

error,

14

mdiff,

14

mdiff,

15

policy,

15

policy,

16

pycompat,

16

pycompat,

17

scmutil,

17

scmutil,

18

util,

18

util,

19

vfs,

19

vfs,

20

)

20

)

21

from mercurial.utils import procutil

21

from mercurial.utils import procutil

22

from . import (

22

from . import (

23

constants,

23

constants,

24

contentstore,

24

contentstore,

25

datapack,

25

datapack,

26

extutil,

26

extutil,

27

historypack,

27

historypack,

28

metadatastore,

28

metadatastore,

29

shallowutil,

29

shallowutil,

30

)

30

)

31

32

osutil = policy.importmod(r'osutil')

32

osutil = policy.importmod(r'osutil')

33

34

class RepackAlreadyRunning(error.Abort):

34

class RepackAlreadyRunning(error.Abort):

35

pass

35

pass

36

37

def backgroundrepack(repo, incremental=True, packsonly=False):

37

def backgroundrepack(repo, incremental=True, packsonly=False):

38

cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']

38

cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']

39

msg = _("(running background repack)\n")

39

msg = _("(running background repack)\n")

40

if incremental:

40

if incremental:

41

cmd.append('--incremental')

41

cmd.append('--incremental')

42

msg = _("(running background incremental repack)\n")

42

msg = _("(running background incremental repack)\n")

43

if packsonly:

43

if packsonly:

44

cmd.append('--packsonly')

44

cmd.append('--packsonly')

45

repo.ui.warn(msg)

45

repo.ui.warn(msg)

46

procutil.runbgcommand(cmd, encoding.environ)

46

procutil.runbgcommand(cmd, encoding.environ)

47

48

def fullrepack(repo, options=None):

48

def fullrepack(repo, options=None):

49

"""If ``packsonly`` is True, stores creating only loose objects are skipped.

49

"""If ``packsonly`` is True, stores creating only loose objects are skipped.

50

"""

50

"""

51

if util.safehasattr(repo, 'shareddatastores'):

51

if util.safehasattr(repo, 'shareddatastores'):

52

datasource = contentstore.unioncontentstore(

52

datasource = contentstore.unioncontentstore(

53

*repo.shareddatastores)

53

*repo.shareddatastores)

54

historysource = metadatastore.unionmetadatastore(

54

historysource = metadatastore.unionmetadatastore(

55

*repo.sharedhistorystores,

55

*repo.sharedhistorystores,

56

allowincomplete=True)

56

allowincomplete=True)

57

58

packpath = shallowutil.getcachepackpath(

58

packpath = shallowutil.getcachepackpath(

59

repo,

59

repo,

60

constants.FILEPACK_CATEGORY)

60

constants.FILEPACK_CATEGORY)

61

_runrepack(repo, datasource, historysource, packpath,

61

_runrepack(repo, datasource, historysource, packpath,

62

constants.FILEPACK_CATEGORY, options=options)

62

constants.FILEPACK_CATEGORY, options=options)

63

64

if util.safehasattr(repo.manifestlog, 'datastore'):

64

if util.safehasattr(repo.manifestlog, 'datastore'):

65

localdata, shareddata = _getmanifeststores(repo)

65

localdata, shareddata = _getmanifeststores(repo)

66

lpackpath, ldstores, lhstores = localdata

66

lpackpath, ldstores, lhstores = localdata

67

spackpath, sdstores, shstores = shareddata

67

spackpath, sdstores, shstores = shareddata

68

69

# Repack the shared manifest store

69

# Repack the shared manifest store

70

datasource = contentstore.unioncontentstore(*sdstores)

70

datasource = contentstore.unioncontentstore(*sdstores)

71

historysource = metadatastore.unionmetadatastore(

71

historysource = metadatastore.unionmetadatastore(

72

*shstores,

72

*shstores,

73

allowincomplete=True)

73

allowincomplete=True)

74

_runrepack(repo, datasource, historysource, spackpath,

74

_runrepack(repo, datasource, historysource, spackpath,

75

constants.TREEPACK_CATEGORY, options=options)

75

constants.TREEPACK_CATEGORY, options=options)

76

77

# Repack the local manifest store

77

# Repack the local manifest store

78

datasource = contentstore.unioncontentstore(

78

datasource = contentstore.unioncontentstore(

79

*ldstores,

79

*ldstores,

80

allowincomplete=True)

80

allowincomplete=True)

81

historysource = metadatastore.unionmetadatastore(

81

historysource = metadatastore.unionmetadatastore(

82

*lhstores,

82

*lhstores,

83

allowincomplete=True)

83

allowincomplete=True)

84

_runrepack(repo, datasource, historysource, lpackpath,

84

_runrepack(repo, datasource, historysource, lpackpath,

85

constants.TREEPACK_CATEGORY, options=options)

85

constants.TREEPACK_CATEGORY, options=options)

86

87

def incrementalrepack(repo, options=None):

87

def incrementalrepack(repo, options=None):

88

"""This repacks the repo by looking at the distribution of pack files in the

88

"""This repacks the repo by looking at the distribution of pack files in the

89

repo and performing the most minimal repack to keep the repo in good shape.

89

repo and performing the most minimal repack to keep the repo in good shape.

90

"""

90

"""

91

if util.safehasattr(repo, 'shareddatastores'):

91

if util.safehasattr(repo, 'shareddatastores'):

92

packpath = shallowutil.getcachepackpath(

92

packpath = shallowutil.getcachepackpath(

93

repo,

93

repo,

94

constants.FILEPACK_CATEGORY)

94

constants.FILEPACK_CATEGORY)

95

_incrementalrepack(repo,

95

_incrementalrepack(repo,

96

repo.shareddatastores,

96

repo.shareddatastores,

97

repo.sharedhistorystores,

97

repo.sharedhistorystores,

98

packpath,

98

packpath,

99

constants.FILEPACK_CATEGORY,

99

constants.FILEPACK_CATEGORY,

100

options=options)

100

options=options)

101

102

if util.safehasattr(repo.manifestlog, 'datastore'):

102

if util.safehasattr(repo.manifestlog, 'datastore'):

103

localdata, shareddata = _getmanifeststores(repo)

103

localdata, shareddata = _getmanifeststores(repo)

104

lpackpath, ldstores, lhstores = localdata

104

lpackpath, ldstores, lhstores = localdata

105

spackpath, sdstores, shstores = shareddata

105

spackpath, sdstores, shstores = shareddata

106

107

# Repack the shared manifest store

107

# Repack the shared manifest store

108

_incrementalrepack(repo,

108

_incrementalrepack(repo,

109

sdstores,

109

sdstores,

110

shstores,

110

shstores,

111

spackpath,

111

spackpath,

112

constants.TREEPACK_CATEGORY,

112

constants.TREEPACK_CATEGORY,

113

options=options)

113

options=options)

114

115

# Repack the local manifest store

115

# Repack the local manifest store

116

_incrementalrepack(repo,

116

_incrementalrepack(repo,

117

ldstores,

117

ldstores,

118

lhstores,

118

lhstores,

119

lpackpath,

119

lpackpath,

120

constants.TREEPACK_CATEGORY,

120

constants.TREEPACK_CATEGORY,

121

allowincompletedata=True,

121

allowincompletedata=True,

122

options=options)

122

options=options)

123

124

def _getmanifeststores(repo):

124

def _getmanifeststores(repo):

125

shareddatastores = repo.manifestlog.shareddatastores

125

shareddatastores = repo.manifestlog.shareddatastores

126

localdatastores = repo.manifestlog.localdatastores

126

localdatastores = repo.manifestlog.localdatastores

127

sharedhistorystores = repo.manifestlog.sharedhistorystores

127

sharedhistorystores = repo.manifestlog.sharedhistorystores

128

localhistorystores = repo.manifestlog.localhistorystores

128

localhistorystores = repo.manifestlog.localhistorystores

129

130

sharedpackpath = shallowutil.getcachepackpath(repo,

130

sharedpackpath = shallowutil.getcachepackpath(repo,

131

constants.TREEPACK_CATEGORY)

131

constants.TREEPACK_CATEGORY)

132

localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,

132

localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,

133

constants.TREEPACK_CATEGORY)

133

constants.TREEPACK_CATEGORY)

134

135

return ((localpackpath, localdatastores, localhistorystores),

135

return ((localpackpath, localdatastores, localhistorystores),

136

(sharedpackpath, shareddatastores, sharedhistorystores))

136

(sharedpackpath, shareddatastores, sharedhistorystores))

137

138

def _topacks(packpath, files, constructor):

138

def _topacks(packpath, files, constructor):

139

paths = list(os.path.join(packpath, p) for p in files)

139

paths = list(os.path.join(packpath, p) for p in files)

140

packs = list(constructor(p) for p in paths)

140

packs = list(constructor(p) for p in paths)

141

return packs

141

return packs

142

143

def _deletebigpacks(repo, folder, files):

143

def _deletebigpacks(repo, folder, files):

144

"""Deletes packfiles that are bigger than ``packs.maxpacksize``.

144

"""Deletes packfiles that are bigger than ``packs.maxpacksize``.

145

146

Returns ``files` with the removed files omitted."""

146

Returns ``files` with the removed files omitted."""

147

maxsize = repo.ui.configbytes("packs", "maxpacksize")

147

maxsize = repo.ui.configbytes("packs", "maxpacksize")

148

if maxsize <= 0:

148

if maxsize <= 0:

149

return files

149

return files

150

151

# This only considers datapacks today, but we could broaden it to include

151

# This only considers datapacks today, but we could broaden it to include

152

# historypacks.

152

# historypacks.

153

VALIDEXTS = [".datapack", ".dataidx"]

153

VALIDEXTS = [".datapack", ".dataidx"]

154

155

# Either an oversize index or datapack will trigger cleanup of the whole

155

# Either an oversize index or datapack will trigger cleanup of the whole

156

# pack:

156

# pack:

157

oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files

157

oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files

158

if (stat.st_size > maxsize and (os.path.splitext(path)[1]

158

if (stat.st_size > maxsize and (os.path.splitext(path)[1]

159

in VALIDEXTS))])

159

in VALIDEXTS))])

160

161

for rootfname in oversized:

161

for rootfname in oversized:

162

rootpath = os.path.join(folder, rootfname)

162

rootpath = os.path.join(folder, rootfname)

163

for ext in VALIDEXTS:

163

for ext in VALIDEXTS:

164

path = rootpath + ext

164

path = rootpath + ext

165

repo.ui.debug('removing oversize packfile %s (%s)\n' %

165

repo.ui.debug('removing oversize packfile %s (%s)\n' %

166

(path, util.bytecount(os.stat(path).st_size)))

166

(path, util.bytecount(os.stat(path).st_size)))

167

os.unlink(path)

167

os.unlink(path)

168

return [row for row in files if os.path.basename(row[0]) not in oversized]

168

return [row for row in files if os.path.basename(row[0]) not in oversized]

169

170

def _incrementalrepack(repo, datastore, historystore, packpath, category,

170

def _incrementalrepack(repo, datastore, historystore, packpath, category,

171

allowincompletedata=False, options=None):

171

allowincompletedata=False, options=None):

172

shallowutil.mkstickygroupdir(repo.ui, packpath)

172

shallowutil.mkstickygroupdir(repo.ui, packpath)

173

174

files = osutil.listdir(packpath, stat=True)

174

files = osutil.listdir(packpath, stat=True)

175

files = _deletebigpacks(repo, packpath, files)

175

files = _deletebigpacks(repo, packpath, files)

176

datapacks = _topacks(packpath,

176

datapacks = _topacks(packpath,

177

_computeincrementaldatapack(repo.ui, files),

177

_computeincrementaldatapack(repo.ui, files),

178

datapack.datapack)

178

datapack.datapack)

179

datapacks.extend(s for s in datastore

179

datapacks.extend(s for s in datastore

180

if not isinstance(s, datapack.datapackstore))

180

if not isinstance(s, datapack.datapackstore))

181

182

historypacks = _topacks(packpath,

182

historypacks = _topacks(packpath,

183

_computeincrementalhistorypack(repo.ui, files),

183

_computeincrementalhistorypack(repo.ui, files),

184

historypack.historypack)

184

historypack.historypack)

185

historypacks.extend(s for s in historystore

185

historypacks.extend(s for s in historystore

186

if not isinstance(s, historypack.historypackstore))

186

if not isinstance(s, historypack.historypackstore))

187

188

# ``allhistory{files,packs}`` contains all known history packs, even ones we

188

# ``allhistory{files,packs}`` contains all known history packs, even ones we

189

# don't plan to repack. They are used during the datapack repack to ensure

189

# don't plan to repack. They are used during the datapack repack to ensure

190

# good ordering of nodes.

190

# good ordering of nodes.

191

allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,

191

allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,

192

historypack.INDEXSUFFIX)

192

historypack.INDEXSUFFIX)

193

allhistorypacks = _topacks(packpath,

193

allhistorypacks = _topacks(packpath,

194

(f for f, mode, stat in allhistoryfiles),

194

(f for f, mode, stat in allhistoryfiles),

195

historypack.historypack)

195

historypack.historypack)

196

allhistorypacks.extend(s for s in historystore

196

allhistorypacks.extend(s for s in historystore

197

if not isinstance(s, historypack.historypackstore))

197

if not isinstance(s, historypack.historypackstore))

198

_runrepack(repo,

198

_runrepack(repo,

199

contentstore.unioncontentstore(

199

contentstore.unioncontentstore(

200

*datapacks,

200

*datapacks,

201

allowincomplete=allowincompletedata),

201

allowincomplete=allowincompletedata),

202

metadatastore.unionmetadatastore(

202

metadatastore.unionmetadatastore(

203

*historypacks,

203

*historypacks,

204

allowincomplete=True),

204

allowincomplete=True),

205

packpath, category,

205

packpath, category,

206

fullhistory=metadatastore.unionmetadatastore(

206

fullhistory=metadatastore.unionmetadatastore(

207

*allhistorypacks,

207

*allhistorypacks,

208

allowincomplete=True),

208

allowincomplete=True),

209

options=options)

209

options=options)

210

211

def _computeincrementaldatapack(ui, files):

211

def _computeincrementaldatapack(ui, files):

212

opts = {

212

opts = {

213

'gencountlimit' : ui.configint(

213

'gencountlimit' : ui.configint(

214

'remotefilelog', 'data.gencountlimit'),

214

'remotefilelog', 'data.gencountlimit'),

215

'generations' : ui.configlist(

215

'generations' : ui.configlist(

216

'remotefilelog', 'data.generations'),

216

'remotefilelog', 'data.generations'),

217

'maxrepackpacks' : ui.configint(

217

'maxrepackpacks' : ui.configint(

218

'remotefilelog', 'data.maxrepackpacks'),

218

'remotefilelog', 'data.maxrepackpacks'),

219

'repackmaxpacksize' : ui.configbytes(

219

'repackmaxpacksize' : ui.configbytes(

220

'remotefilelog', 'data.repackmaxpacksize'),

220

'remotefilelog', 'data.repackmaxpacksize'),

221

'repacksizelimit' : ui.configbytes(

221

'repacksizelimit' : ui.configbytes(

222

'remotefilelog', 'data.repacksizelimit'),

222

'remotefilelog', 'data.repacksizelimit'),

223

}

223

}

224

225

packfiles = _allpackfileswithsuffix(

225

packfiles = _allpackfileswithsuffix(

226

files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)

226

files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)

227

return _computeincrementalpack(packfiles, opts)

227

return _computeincrementalpack(packfiles, opts)

228

229

def _computeincrementalhistorypack(ui, files):

229

def _computeincrementalhistorypack(ui, files):

230

opts = {

230

opts = {

231

'gencountlimit' : ui.configint(

231

'gencountlimit' : ui.configint(

232

'remotefilelog', 'history.gencountlimit'),

232

'remotefilelog', 'history.gencountlimit'),

233

'generations' : ui.configlist(

233

'generations' : ui.configlist(

234

'remotefilelog', 'history.generations', ['100MB']),

234

'remotefilelog', 'history.generations', ['100MB']),

235

'maxrepackpacks' : ui.configint(

235

'maxrepackpacks' : ui.configint(

236

'remotefilelog', 'history.maxrepackpacks'),

236

'remotefilelog', 'history.maxrepackpacks'),

237

'repackmaxpacksize' : ui.configbytes(

237

'repackmaxpacksize' : ui.configbytes(

238

'remotefilelog', 'history.repackmaxpacksize', '400MB'),

238

'remotefilelog', 'history.repackmaxpacksize', '400MB'),

239

'repacksizelimit' : ui.configbytes(

239

'repacksizelimit' : ui.configbytes(

240

'remotefilelog', 'history.repacksizelimit'),

240

'remotefilelog', 'history.repacksizelimit'),

241

}

241

}

242

243

packfiles = _allpackfileswithsuffix(

243

packfiles = _allpackfileswithsuffix(

244

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)

244

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)

245

return _computeincrementalpack(packfiles, opts)

245

return _computeincrementalpack(packfiles, opts)

246

247

def _allpackfileswithsuffix(files, packsuffix, indexsuffix):

247

def _allpackfileswithsuffix(files, packsuffix, indexsuffix):

248

result = []

248

result = []

249

fileset = set(fn for fn, mode, stat in files)

249

fileset = set(fn for fn, mode, stat in files)

250

for filename, mode, stat in files:

250

for filename, mode, stat in files:

251

if not filename.endswith(packsuffix):

251

if not filename.endswith(packsuffix):

252

continue

252

continue

253

254

prefix = filename[:-len(packsuffix)]

254

prefix = filename[:-len(packsuffix)]

255

256

# Don't process a pack if it doesn't have an index.

256

# Don't process a pack if it doesn't have an index.

257

if (prefix + indexsuffix) not in fileset:

257

if (prefix + indexsuffix) not in fileset:

258

continue

258

continue

259

result.append((prefix, mode, stat))

259

result.append((prefix, mode, stat))

260

261

return result

261

return result

262

263

def _computeincrementalpack(files, opts):

263

def _computeincrementalpack(files, opts):

264

"""Given a set of pack files along with the configuration options, this

264

"""Given a set of pack files along with the configuration options, this

265

function computes the list of files that should be packed as part of an

265

function computes the list of files that should be packed as part of an

266

incremental repack.

266

incremental repack.

267

268

It tries to strike a balance between keeping incremental repacks cheap (i.e.

268

It tries to strike a balance between keeping incremental repacks cheap (i.e.

269

packing small things when possible, and rolling the packs up to the big ones

269

packing small things when possible, and rolling the packs up to the big ones

270

over time).

270

over time).

271

"""

271

"""

272

273

limits = list(sorted((util.sizetoint(s) for s in opts['generations']),

273

limits = list(sorted((util.sizetoint(s) for s in opts['generations']),

274

reverse=True))

274

reverse=True))

275

limits.append(0)

275

limits.append(0)

276

277

# Group the packs by generation (i.e. by size)

277

# Group the packs by generation (i.e. by size)

278

generations = []

278

generations = []

279

for i in pycompat.xrange(len(limits)):

279

for i in pycompat.xrange(len(limits)):

280

generations.append([])

280

generations.append([])

281

282

sizes = {}

282

sizes = {}

283

for prefix, mode, stat in files:

283

for prefix, mode, stat in files:

284

size = stat.st_size

284

size = stat.st_size

285

if size > opts['repackmaxpacksize']:

285

if size > opts['repackmaxpacksize']:

286

continue

286

continue

287

288

sizes[prefix] = size

288

sizes[prefix] = size

289

for i, limit in enumerate(limits):

289

for i, limit in enumerate(limits):

290

if size > limit:

290

if size > limit:

291

generations[i].append(prefix)

291

generations[i].append(prefix)

292

break

292

break

293

294

# Steps for picking what packs to repack:

294

# Steps for picking what packs to repack:

295

# 1. Pick the largest generation with > gencountlimit pack files.

295

# 1. Pick the largest generation with > gencountlimit pack files.

296

# 2. Take the smallest three packs.

296

# 2. Take the smallest three packs.

297

# 3. While total-size-of-packs < repacksizelimit: add another pack

297

# 3. While total-size-of-packs < repacksizelimit: add another pack

298

299

# Find the largest generation with more than gencountlimit packs

299

# Find the largest generation with more than gencountlimit packs

300

genpacks = []

300

genpacks = []

301

for i, limit in enumerate(limits):

301

for i, limit in enumerate(limits):

302

if len(generations[i]) > opts['gencountlimit']:

302

if len(generations[i]) > opts['gencountlimit']:

303

# Sort to be smallest last, for easy popping later

303

# Sort to be smallest last, for easy popping later

304

genpacks.extend(sorted(generations[i], reverse=True,

304

genpacks.extend(sorted(generations[i], reverse=True,

305

key=lambda x: sizes[x]))

305

key=lambda x: sizes[x]))

306

break

306

break

307

308

# Take as many packs from the generation as we can

308

# Take as many packs from the generation as we can

309

chosenpacks = genpacks[-3:]

309

chosenpacks = genpacks[-3:]

310

genpacks = genpacks[:-3]

310

genpacks = genpacks[:-3]

311

repacksize = sum(sizes[n] for n in chosenpacks)

311

repacksize = sum(sizes[n] for n in chosenpacks)

312

while (repacksize < opts['repacksizelimit'] and genpacks and

312

while (repacksize < opts['repacksizelimit'] and genpacks and

313

len(chosenpacks) < opts['maxrepackpacks']):

313

len(chosenpacks) < opts['maxrepackpacks']):

314

chosenpacks.append(genpacks.pop())

314

chosenpacks.append(genpacks.pop())

315

repacksize += sizes[chosenpacks[-1]]

315

repacksize += sizes[chosenpacks[-1]]

316

317

return chosenpacks

317

return chosenpacks

318

319

def _runrepack(repo, data, history, packpath, category, fullhistory=None,

319

def _runrepack(repo, data, history, packpath, category, fullhistory=None,

320

options=None):

320

options=None):

321

shallowutil.mkstickygroupdir(repo.ui, packpath)

321

shallowutil.mkstickygroupdir(repo.ui, packpath)

322

323

def isold(repo, filename, node):

323

def isold(repo, filename, node):

324

"""Check if the file node is older than a limit.

324

"""Check if the file node is older than a limit.

325

Unless a limit is specified in the config the default limit is taken.

325

Unless a limit is specified in the config the default limit is taken.

326

"""

326

"""

327

filectx = repo.filectx(filename, fileid=node)

327

filectx = repo.filectx(filename, fileid=node)

328

filetime = repo[filectx.linkrev()].date()

328

filetime = repo[filectx.linkrev()].date()

329

330

ttl = repo.ui.configint('remotefilelog', 'nodettl')

330

ttl = repo.ui.configint('remotefilelog', 'nodettl')

331

332

limit = time.time() - ttl

332

limit = time.time() - ttl

333

return filetime[0] < limit

333

return filetime[0] < limit

334

335

garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')

335

garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')

336

if not fullhistory:

336

if not fullhistory:

337

fullhistory = history

337

fullhistory = history

338

packer = repacker(repo, data, history, fullhistory, category,

338

packer = repacker(repo, data, history, fullhistory, category,

339

gc=garbagecollect, isold=isold, options=options)

339

gc=garbagecollect, isold=isold, options=options)

340

341

with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:

341

with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:

342

with historypack.mutablehistorypack(repo.ui, packpath) as hpack:

342

with historypack.mutablehistorypack(repo.ui, packpath) as hpack:

343

try:

343

try:

344

packer.run(dpack, hpack)

344

packer.run(dpack, hpack)

345

except error.LockHeld:

345

except error.LockHeld:

346

raise RepackAlreadyRunning(_("skipping repack - another repack "

346

raise RepackAlreadyRunning(_("skipping repack - another repack "

347

"is already running"))

347

"is already running"))

348

349

def keepset(repo, keyfn, lastkeepkeys=None):

349

def keepset(repo, keyfn, lastkeepkeys=None):

350

"""Computes a keepset which is not garbage collected.

350

"""Computes a keepset which is not garbage collected.

351

'keyfn' is a function that maps filename, node to a unique key.

351

'keyfn' is a function that maps filename, node to a unique key.

352

'lastkeepkeys' is an optional argument and if provided the keepset

352

'lastkeepkeys' is an optional argument and if provided the keepset

353

function updates lastkeepkeys with more keys and returns the result.

353

function updates lastkeepkeys with more keys and returns the result.

354

"""

354

"""

355

if not lastkeepkeys:

355

if not lastkeepkeys:

356

keepkeys = set()

356

keepkeys = set()

357

else:

357

else:

358

keepkeys = lastkeepkeys

358

keepkeys = lastkeepkeys

359

360

# We want to keep:

360

# We want to keep:

361

# 1. Working copy parent

361

# 1. Working copy parent

362

# 2. Draft commits

362

# 2. Draft commits

363

# 3. Parents of draft commits

363

# 3. Parents of draft commits

364

# 4. Pullprefetch and bgprefetchrevs revsets if specified

364

# 4. Pullprefetch and bgprefetchrevs revsets if specified

365

revs = ['.', 'draft()', 'parents(draft())']

365

revs = ['.', 'draft()', 'parents(draft())']

366

prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)

366

prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)

367

if prefetchrevs:

367

if prefetchrevs:

368

revs.append('(%s)' % prefetchrevs)

368

revs.append('(%s)' % prefetchrevs)

369

prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)

369

prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)

370

if prefetchrevs:

370

if prefetchrevs:

371

revs.append('(%s)' % prefetchrevs)

371

revs.append('(%s)' % prefetchrevs)

372

revs = '+'.join(revs)

372

revs = '+'.join(revs)

373

374

revs = ['sort((%s), "topo")' % revs]

374

revs = ['sort((%s), "topo")' % revs]

375

keep = scmutil.revrange(repo, revs)

375

keep = scmutil.revrange(repo, revs)

376

377

processed = set()

377

processed = set()

378

lastmanifest = None

378

lastmanifest = None

379

380

# process the commits in toposorted order starting from the oldest

380

# process the commits in toposorted order starting from the oldest

381

for r in reversed(keep._list):

381

for r in reversed(keep._list):

382

if repo[r].p1().rev() in processed:

382

if repo[r].p1().rev() in processed:

383

# if the direct parent has already been processed

383

# if the direct parent has already been processed

384

# then we only need to process the delta

384

# then we only need to process the delta

385

m = repo[r].manifestctx().readdelta()

385

m = repo[r].manifestctx().readdelta()

386

else:

386

else:

387

# otherwise take the manifest and diff it

387

# otherwise take the manifest and diff it

388

# with the previous manifest if one exists

388

# with the previous manifest if one exists

389

if lastmanifest:

389

if lastmanifest:

390

m = repo[r].manifest().diff(lastmanifest)

390

m = repo[r].manifest().diff(lastmanifest)

391

else:

391

else:

392

m = repo[r].manifest()

392

m = repo[r].manifest()

393

lastmanifest = repo[r].manifest()

393

lastmanifest = repo[r].manifest()

394

processed.add(r)

394

processed.add(r)

395

396

# populate keepkeys with keys from the current manifest

396

# populate keepkeys with keys from the current manifest

397

if type(m) is dict:

397

if type(m) is dict:

398

# m is a result of diff of two manifests and is a dictionary that

398

# m is a result of diff of two manifests and is a dictionary that

399

# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple

399

# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple

400

for filename, diff in m.iteritems():

400

for filename, diff in m.iteritems():

401

if diff[0][0] is not None:

401

if diff[0][0] is not None:

402

keepkeys.add(keyfn(filename, diff[0][0]))

402

keepkeys.add(keyfn(filename, diff[0][0]))

403

else:

403

else:

404

# m is a manifest object

404

# m is a manifest object

405

for filename, filenode in m.iteritems():

405

for filename, filenode in m.iteritems():

406

keepkeys.add(keyfn(filename, filenode))

406

keepkeys.add(keyfn(filename, filenode))

407

408

return keepkeys

408

return keepkeys

409

410

class repacker(object):

410

class repacker(object):

411

"""Class for orchestrating the repack of data and history information into a

411

"""Class for orchestrating the repack of data and history information into a

412

new format.

412

new format.

413

"""

413

"""

414

def __init__(self, repo, data, history, fullhistory, category, gc=False,

414

def __init__(self, repo, data, history, fullhistory, category, gc=False,

415

isold=None, options=None):

415

isold=None, options=None):

416

self.repo = repo

416

self.repo = repo

417

self.data = data

417

self.data = data

418

self.history = history

418

self.history = history

419

self.fullhistory = fullhistory

419

self.fullhistory = fullhistory

420

self.unit = constants.getunits(category)

420

self.unit = constants.getunits(category)

421

self.garbagecollect = gc

421

self.garbagecollect = gc

422

self.options = options

422

self.options = options

423

if self.garbagecollect:

423

if self.garbagecollect:

424

if not isold:

424

if not isold:

425

raise ValueError("Function 'isold' is not properly specified")

425

raise ValueError("Function 'isold' is not properly specified")

426

# use (filename, node) tuple as a keepset key

426

# use (filename, node) tuple as a keepset key

427

self.keepkeys = keepset(repo, lambda f, n : (f, n))

427

self.keepkeys = keepset(repo, lambda f, n : (f, n))

428

self.isold = isold

428

self.isold = isold

429

430

def run(self, targetdata, targethistory):

430

def run(self, targetdata, targethistory):

431

ledger = repackledger()

431

ledger = repackledger()

432

433

with extutil.flock(repacklockvfs(self.repo).join("repacklock"),

433

with extutil.flock(repacklockvfs(self.repo).join("repacklock"),

434

_('repacking %s') % self.repo.origroot, timeout=0):

434

_('repacking %s') % self.repo.origroot, timeout=0):

435

self.repo.hook('prerepack')

435

self.repo.hook('prerepack')

436

437

# Populate ledger from source

437

# Populate ledger from source

438

self.data.markledger(ledger, options=self.options)

438

self.data.markledger(ledger, options=self.options)

439

self.history.markledger(ledger, options=self.options)

439

self.history.markledger(ledger, options=self.options)

440

441

# Run repack

441

# Run repack

442

self.repackdata(ledger, targetdata)

442

self.repackdata(ledger, targetdata)

443

self.repackhistory(ledger, targethistory)

443

self.repackhistory(ledger, targethistory)

444

445

# Call cleanup on each source

445

# Call cleanup on each source

446

for source in ledger.sources:

446

for source in ledger.sources:

447

source.cleanup(ledger)

447

source.cleanup(ledger)

448

449

def _chainorphans(self, ui, filename, nodes, orphans, deltabases):

449

def _chainorphans(self, ui, filename, nodes, orphans, deltabases):

450

"""Reorderes ``orphans`` into a single chain inside ``nodes`` and

450

"""Reorderes ``orphans`` into a single chain inside ``nodes`` and

451

``deltabases``.

451

``deltabases``.

452

453

We often have orphan entries (nodes without a base that aren't

453

We often have orphan entries (nodes without a base that aren't

454

referenced by other nodes -- i.e., part of a chain) due to gaps in

454

referenced by other nodes -- i.e., part of a chain) due to gaps in

455

history. Rather than store them as individual fulltexts, we prefer to

455

history. Rather than store them as individual fulltexts, we prefer to

456

insert them as one chain sorted by size.

456

insert them as one chain sorted by size.

457

"""

457

"""

458

if not orphans:

458

if not orphans:

459

return nodes

459

return nodes

460

461

def getsize(node, default=0):

461

def getsize(node, default=0):

462

meta = self.data.getmeta(filename, node)

462

meta = self.data.getmeta(filename, node)

463

if constants.METAKEYSIZE in meta:

463

if constants.METAKEYSIZE in meta:

464

return meta[constants.METAKEYSIZE]

464

return meta[constants.METAKEYSIZE]

465

else:

465

else:

466

return default

466

return default

467

468

# Sort orphans by size; biggest first is preferred, since it's more

468

# Sort orphans by size; biggest first is preferred, since it's more

469

# likely to be the newest version assuming files grow over time.

469

# likely to be the newest version assuming files grow over time.

470

# (Sort by node first to ensure the sort is stable.)

470

# (Sort by node first to ensure the sort is stable.)

471

orphans = sorted(orphans)

471

orphans = sorted(orphans)

472

orphans = list(sorted(orphans, key=getsize, reverse=True))

472

orphans = list(sorted(orphans, key=getsize, reverse=True))

473

if ui.debugflag:

473

if ui.debugflag:

474

ui.debug("%s: orphan chain: %s\n" % (filename,

474

ui.debug("%s: orphan chain: %s\n" % (filename,

475

", ".join([short(s) for s in orphans])))

475

", ".join([short(s) for s in orphans])))

476

477

# Create one contiguous chain and reassign deltabases.

477

# Create one contiguous chain and reassign deltabases.

478

for i, node in enumerate(orphans):

478

for i, node in enumerate(orphans):

479

if i == 0:

479

if i == 0:

480

deltabases[node] = (nullid, 0)

480

deltabases[node] = (nullid, 0)

481

else:

481

else:

482

parent = orphans[i - 1]

482

parent = orphans[i - 1]

483

deltabases[node] = (parent, deltabases[parent][1] + 1)

483

deltabases[node] = (parent, deltabases[parent][1] + 1)

484

nodes = filter(lambda node: node not in orphans, nodes)

484

nodes = filter(lambda node: node not in orphans, nodes)

485

nodes += orphans

485

nodes += orphans

486

return nodes

486

return nodes

487

488

def repackdata(self, ledger, target):

488

def repackdata(self, ledger, target):

489

ui = self.repo.ui

489

ui = self.repo.ui

490

maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

490

maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

491

492

byfile = {}

492

byfile = {}

493

for entry in ledger.entries.itervalues():

493

for entry in ledger.entries.itervalues():

494

if entry.datasource:

494

if entry.datasource:

495

byfile.setdefault(entry.filename, {})[entry.node] = entry

495

byfile.setdefault(entry.filename, {})[entry.node] = entry

496

497

count = 0

497

count = 0

498

for filename, entries in sorted(byfile.iteritems()):

498

for filename, entries in sorted(byfile.iteritems()):

499

ui.progress(_("repacking data"), count, unit=self.unit,

499

ui.progress(_("repacking data"), count, unit=self.unit,

500

total=len(byfile))

500

total=len(byfile))

501

502

ancestors = {}

502

ancestors = {}

503

nodes = list(node for node in entries)

503

nodes = list(node for node in entries)

504

nohistory = []

504

nohistory = []

505

for i, node in enumerate(nodes):

505

for i, node in enumerate(nodes):

506

if node in ancestors:

506

if node in ancestors:

507

continue

507

continue

508

ui.progress(_("building history"), i, unit='nodes',

508

ui.progress(_("building history"), i, unit='nodes',

509

total=len(nodes))

509

total=len(nodes))

510

try:

510

try:

511

ancestors.update(self.fullhistory.getancestors(filename,

511

ancestors.update(self.fullhistory.getancestors(filename,

512

node, known=ancestors))

512

node, known=ancestors))

513

except KeyError:

513

except KeyError:

514

# Since we're packing data entries, we may not have the

514

# Since we're packing data entries, we may not have the

515

# corresponding history entries for them. It's not a big

515

# corresponding history entries for them. It's not a big

516

# deal, but the entries won't be delta'd perfectly.

516

# deal, but the entries won't be delta'd perfectly.

517

nohistory.append(node)

517

nohistory.append(node)

518

ui.progress(_("building history"), None)

518

ui.progress(_("building history"), None)

519

520

# Order the nodes children first, so we can produce reverse deltas

520

# Order the nodes children first, so we can produce reverse deltas

521

orderednodes = list(reversed(self._toposort(ancestors)))

521

orderednodes = list(reversed(self._toposort(ancestors)))

522

if len(nohistory) > 0:

522

if len(nohistory) > 0:

523

ui.debug('repackdata: %d nodes without history\n' %

523

ui.debug('repackdata: %d nodes without history\n' %

524

len(nohistory))

524

len(nohistory))

525

orderednodes.extend(sorted(nohistory))

525

orderednodes.extend(sorted(nohistory))

526

527

# Filter orderednodes to just the nodes we want to serialize (it

527

# Filter orderednodes to just the nodes we want to serialize (it

528

# currently also has the edge nodes' ancestors).

528

# currently also has the edge nodes' ancestors).

529

orderednodes = filter(lambda node: node in nodes, ~~orderednodes~~)

529

orderednodes = list(filter(lambda node: node in nodes,

530

orderednodes))

530

531

# Garbage collect old nodes:

532

# Garbage collect old nodes:

532

if self.garbagecollect:

533

if self.garbagecollect:

533

neworderednodes = []

534

neworderednodes = []

534

for node in orderednodes:

535

for node in orderednodes:

535

# If the node is old and is not in the keepset, we skip it,

536

# If the node is old and is not in the keepset, we skip it,

536

# and mark as garbage collected

537

# and mark as garbage collected

537

if ((filename, node) not in self.keepkeys and

538

if ((filename, node) not in self.keepkeys and

538

self.isold(self.repo, filename, node)):

539

self.isold(self.repo, filename, node)):

539

entries[node].gced = True

540

entries[node].gced = True

540

continue

541

continue

541

neworderednodes.append(node)

542

neworderednodes.append(node)

542

orderednodes = neworderednodes

543

orderednodes = neworderednodes

543

544

# Compute delta bases for nodes:

545

# Compute delta bases for nodes:

545

deltabases = {}

546

deltabases = {}

546

nobase = set()

547

nobase = set()

547

referenced = set()

548

referenced = set()

548

nodes = set(nodes)

549

nodes = set(nodes)

549

for i, node in enumerate(orderednodes):

550

for i, node in enumerate(orderednodes):

550

ui.progress(_("processing nodes"), i, unit='nodes',

551

ui.progress(_("processing nodes"), i, unit='nodes',

551

total=len(orderednodes))

552

total=len(orderednodes))

552

# Find delta base

553

# Find delta base

553

# TODO: allow delta'ing against most recent descendant instead

554

# TODO: allow delta'ing against most recent descendant instead

554

# of immediate child

555

# of immediate child

555

deltatuple = deltabases.get(node, None)

556

deltatuple = deltabases.get(node, None)

556

if deltatuple is None:

557

if deltatuple is None:

557

deltabase, chainlen = nullid, 0

558

deltabase, chainlen = nullid, 0

558

deltabases[node] = (nullid, 0)

559

deltabases[node] = (nullid, 0)

559

nobase.add(node)

560

nobase.add(node)

560

else:

561

else:

561

deltabase, chainlen = deltatuple

562

deltabase, chainlen = deltatuple

562

referenced.add(deltabase)

563

referenced.add(deltabase)

563

564

# Use available ancestor information to inform our delta choices

565

# Use available ancestor information to inform our delta choices

565

ancestorinfo = ancestors.get(node)

566

ancestorinfo = ancestors.get(node)

566

if ancestorinfo:

567

if ancestorinfo:

567

p1, p2, linknode, copyfrom = ancestorinfo

568

p1, p2, linknode, copyfrom = ancestorinfo

568

569

# The presence of copyfrom means we're at a point where the

570

# The presence of copyfrom means we're at a point where the

570

# file was copied from elsewhere. So don't attempt to do any

571

# file was copied from elsewhere. So don't attempt to do any

571

# deltas with the other file.

572

# deltas with the other file.

572

if copyfrom:

573

if copyfrom:

573

p1 = nullid

574

p1 = nullid

574

575

if chainlen < maxchainlen:

576

if chainlen < maxchainlen:

576

# Record this child as the delta base for its parents.

577

# Record this child as the delta base for its parents.

577

# This may be non optimal, since the parents may have

578

# This may be non optimal, since the parents may have

578

# many children, and this will only choose the last one.

579

# many children, and this will only choose the last one.

579

# TODO: record all children and try all deltas to find

580

# TODO: record all children and try all deltas to find

580

# best

581

# best

581

if p1 != nullid:

582

if p1 != nullid:

582

deltabases[p1] = (node, chainlen + 1)

583

deltabases[p1] = (node, chainlen + 1)

583

if p2 != nullid:

584

if p2 != nullid:

584

deltabases[p2] = (node, chainlen + 1)

585

deltabases[p2] = (node, chainlen + 1)

585

586

# experimental config: repack.chainorphansbysize

587

# experimental config: repack.chainorphansbysize

587

if ui.configbool('repack', 'chainorphansbysize'):

588

if ui.configbool('repack', 'chainorphansbysize'):

588

orphans = nobase - referenced

589

orphans = nobase - referenced

589

orderednodes = self._chainorphans(ui, filename, orderednodes,

590

orderednodes = self._chainorphans(ui, filename, orderednodes,

590

orphans, deltabases)

591

orphans, deltabases)

591

592

# Compute deltas and write to the pack

593

# Compute deltas and write to the pack

593

for i, node in enumerate(orderednodes):

594

for i, node in enumerate(orderednodes):

594

deltabase, chainlen = deltabases[node]

595

deltabase, chainlen = deltabases[node]

595

# Compute delta

596

# Compute delta

596

# TODO: Optimize the deltachain fetching. Since we're

597

# TODO: Optimize the deltachain fetching. Since we're

597

# iterating over the different version of the file, we may

598

# iterating over the different version of the file, we may

598

# be fetching the same deltachain over and over again.

599

# be fetching the same deltachain over and over again.

599

meta = None

600

meta = None

600

if deltabase != nullid:

601

if deltabase != nullid:

601

deltaentry = self.data.getdelta(filename, node)

602

deltaentry = self.data.getdelta(filename, node)

602

delta, deltabasename, origdeltabase, meta = deltaentry

603

delta, deltabasename, origdeltabase, meta = deltaentry

603

size = meta.get(constants.METAKEYSIZE)

604

size = meta.get(constants.METAKEYSIZE)

604

if (deltabasename != filename or origdeltabase != deltabase

605

if (deltabasename != filename or origdeltabase != deltabase

605

or size is None):

606

or size is None):

606

deltabasetext = self.data.get(filename, deltabase)

607

deltabasetext = self.data.get(filename, deltabase)

607

original = self.data.get(filename, node)

608

original = self.data.get(filename, node)

608

size = len(original)

609

size = len(original)

609

delta = mdiff.textdiff(deltabasetext, original)

610

delta = mdiff.textdiff(deltabasetext, original)

610

else:

611

else:

611

delta = self.data.get(filename, node)

612

delta = self.data.get(filename, node)

612

size = len(delta)

613

size = len(delta)

613

meta = self.data.getmeta(filename, node)

614

meta = self.data.getmeta(filename, node)

614

615

# TODO: don't use the delta if it's larger than the fulltext

616

# TODO: don't use the delta if it's larger than the fulltext

616

if constants.METAKEYSIZE not in meta:

617

if constants.METAKEYSIZE not in meta:

617

meta[constants.METAKEYSIZE] = size

618

meta[constants.METAKEYSIZE] = size

618

target.add(filename, node, deltabase, delta, meta)

619

target.add(filename, node, deltabase, delta, meta)

619

620

entries[node].datarepacked = True

621

entries[node].datarepacked = True

621

622

ui.progress(_("processing nodes"), None)

623

ui.progress(_("processing nodes"), None)

623

count += 1

624

count += 1

624

625

ui.progress(_("repacking data"), None)

626

ui.progress(_("repacking data"), None)

626

target.close(ledger=ledger)

627

target.close(ledger=ledger)

627

628

def repackhistory(self, ledger, target):

629

def repackhistory(self, ledger, target):

629

ui = self.repo.ui

630

ui = self.repo.ui

630

631

byfile = {}

632

byfile = {}

632

for entry in ledger.entries.itervalues():

633

for entry in ledger.entries.itervalues():

633

if entry.historysource:

634

if entry.historysource:

634

byfile.setdefault(entry.filename, {})[entry.node] = entry

635

byfile.setdefault(entry.filename, {})[entry.node] = entry

635

636

count = 0

637

count = 0

637

for filename, entries in sorted(byfile.iteritems()):

638

for filename, entries in sorted(byfile.iteritems()):

638

ancestors = {}

639

ancestors = {}

639

nodes = list(node for node in entries)

640

nodes = list(node for node in entries)

640

641

for node in nodes:

642

for node in nodes:

642

if node in ancestors:

643

if node in ancestors:

643

continue

644

continue

644

ancestors.update(self.history.getancestors(filename, node,

645

ancestors.update(self.history.getancestors(filename, node,

645

known=ancestors))

646

known=ancestors))

646

647

# Order the nodes children first

648

# Order the nodes children first

648

orderednodes = reversed(self._toposort(ancestors))

649

orderednodes = reversed(self._toposort(ancestors))

649

650

# Write to the pack

651

# Write to the pack

651

dontprocess = set()

652

dontprocess = set()

652

for node in orderednodes:

653

for node in orderednodes:

653

p1, p2, linknode, copyfrom = ancestors[node]

654

p1, p2, linknode, copyfrom = ancestors[node]

654

655

# If the node is marked dontprocess, but it's also in the

656

# If the node is marked dontprocess, but it's also in the

656

# explicit entries set, that means the node exists both in this

657

# explicit entries set, that means the node exists both in this

657

# file and in another file that was copied to this file.

658

# file and in another file that was copied to this file.

658

# Usually this happens if the file was copied to another file,

659

# Usually this happens if the file was copied to another file,

659

# then the copy was deleted, then reintroduced without copy

660

# then the copy was deleted, then reintroduced without copy

660

# metadata. The original add and the new add have the same hash

661

# metadata. The original add and the new add have the same hash

661

# since the content is identical and the parents are null.

662

# since the content is identical and the parents are null.

662

if node in dontprocess and node not in entries:

663

if node in dontprocess and node not in entries:

663

# If copyfrom == filename, it means the copy history

664

# If copyfrom == filename, it means the copy history

664

# went to come other file, then came back to this one, so we

665

# went to come other file, then came back to this one, so we

665

# should continue processing it.

666

# should continue processing it.

666

if p1 != nullid and copyfrom != filename:

667

if p1 != nullid and copyfrom != filename:

667

dontprocess.add(p1)

668

dontprocess.add(p1)

668

if p2 != nullid:

669

if p2 != nullid:

669

dontprocess.add(p2)

670

dontprocess.add(p2)

670

continue

671

continue

671

672

if copyfrom:

673

if copyfrom:

673

dontprocess.add(p1)

674

dontprocess.add(p1)

674

675

target.add(filename, node, p1, p2, linknode, copyfrom)

676

target.add(filename, node, p1, p2, linknode, copyfrom)

676

677

if node in entries:

678

if node in entries:

678

entries[node].historyrepacked = True

679

entries[node].historyrepacked = True

679

680

count += 1

681

count += 1

681

ui.progress(_("repacking history"), count, unit=self.unit,

682

ui.progress(_("repacking history"), count, unit=self.unit,

682

total=len(byfile))

683

total=len(byfile))

683

684

ui.progress(_("repacking history"), None)

685

ui.progress(_("repacking history"), None)

685

target.close(ledger=ledger)

686

target.close(ledger=ledger)

686

687

def _toposort(self, ancestors):

688

def _toposort(self, ancestors):

688

def parentfunc(node):

689

def parentfunc(node):

689

p1, p2, linknode, copyfrom = ancestors[node]

690

p1, p2, linknode, copyfrom = ancestors[node]

690

parents = []

691

parents = []

691

if p1 != nullid:

692

if p1 != nullid:

692

parents.append(p1)

693

parents.append(p1)

693

if p2 != nullid:

694

if p2 != nullid:

694

parents.append(p2)

695

parents.append(p2)

695

return parents

696

return parents

696

697

sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)

698

sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)

698

return sortednodes

699

return sortednodes

699

700

class repackledger(object):

701

class repackledger(object):

701

"""Storage for all the bookkeeping that happens during a repack. It contains

702

"""Storage for all the bookkeeping that happens during a repack. It contains

702

the list of revisions being repacked, what happened to each revision, and

703

the list of revisions being repacked, what happened to each revision, and

703

which source store contained which revision originally (for later cleanup).

704

which source store contained which revision originally (for later cleanup).

704

"""

705

"""

705

def __init__(self):

706

def __init__(self):

706

self.entries = {}

707

self.entries = {}

707

self.sources = {}

708

self.sources = {}

708

self.created = set()

709

self.created = set()

709

710

def markdataentry(self, source, filename, node):

711

def markdataentry(self, source, filename, node):

711

"""Mark the given filename+node revision as having a data rev in the

712

"""Mark the given filename+node revision as having a data rev in the

712

given source.

713

given source.

713

"""

714

"""

714

entry = self._getorcreateentry(filename, node)

715

entry = self._getorcreateentry(filename, node)

715

entry.datasource = True

716

entry.datasource = True

716

entries = self.sources.get(source)

717

entries = self.sources.get(source)

717

if not entries:

718

if not entries:

718

entries = set()

719

entries = set()

719

self.sources[source] = entries

720

self.sources[source] = entries

720

entries.add(entry)

721

entries.add(entry)

721

722

def markhistoryentry(self, source, filename, node):

723

def markhistoryentry(self, source, filename, node):

723

"""Mark the given filename+node revision as having a history rev in the

724

"""Mark the given filename+node revision as having a history rev in the

724

given source.

725

given source.

725

"""

726

"""

726

entry = self._getorcreateentry(filename, node)

727

entry = self._getorcreateentry(filename, node)

727

entry.historysource = True

728

entry.historysource = True

728

entries = self.sources.get(source)

729

entries = self.sources.get(source)

729

if not entries:

730

if not entries:

730

entries = set()

731

entries = set()

731

self.sources[source] = entries

732

self.sources[source] = entries

732

entries.add(entry)

733

entries.add(entry)

733

734

def _getorcreateentry(self, filename, node):

735

def _getorcreateentry(self, filename, node):

735

key = (filename, node)

736

key = (filename, node)

736

value = self.entries.get(key)

737

value = self.entries.get(key)

737

if not value:

738

if not value:

738

value = repackentry(filename, node)

739

value = repackentry(filename, node)

739

self.entries[key] = value

740

self.entries[key] = value

740

741

return value

742

return value

742

743

def addcreated(self, value):

744

def addcreated(self, value):

744

self.created.add(value)

745

self.created.add(value)

745

746

class repackentry(object):

747

class repackentry(object):

747

"""Simple class representing a single revision entry in the repackledger.

748

"""Simple class representing a single revision entry in the repackledger.

748

"""

749

"""

749

__slots__ = (r'filename', r'node', r'datasource', r'historysource',

750

__slots__ = (r'filename', r'node', r'datasource', r'historysource',

750

r'datarepacked', r'historyrepacked', r'gced')

751

r'datarepacked', r'historyrepacked', r'gced')

751

def __init__(self, filename, node):

752

def __init__(self, filename, node):

752

self.filename = filename

753

self.filename = filename

753

self.node = node

754

self.node = node

754

# If the revision has a data entry in the source

755

# If the revision has a data entry in the source

755

self.datasource = False

756

self.datasource = False

756

# If the revision has a history entry in the source

757

# If the revision has a history entry in the source

757

self.historysource = False

758

self.historysource = False

758

# If the revision's data entry was repacked into the repack target

759

# If the revision's data entry was repacked into the repack target

759

self.datarepacked = False

760

self.datarepacked = False

760

# If the revision's history entry was repacked into the repack target

761

# If the revision's history entry was repacked into the repack target

761

self.historyrepacked = False

762

self.historyrepacked = False

762

# If garbage collected

763

# If garbage collected

763

self.gced = False

764

self.gced = False

764

765

def repacklockvfs(repo):

766

def repacklockvfs(repo):

766

if util.safehasattr(repo, 'name'):

767

if util.safehasattr(repo, 'name'):

767

# Lock in the shared cache so repacks across multiple copies of the same

768

# Lock in the shared cache so repacks across multiple copies of the same

768

# repo are coordinated.

769

# repo are coordinated.

769

sharedcachepath = shallowutil.getcachepackpath(

770

sharedcachepath = shallowutil.getcachepackpath(

770

repo,

771

repo,

771

constants.FILEPACK_CATEGORY)

772

constants.FILEPACK_CATEGORY)

772

return vfs.vfs(sharedcachepath)

773

return vfs.vfs(sharedcachepath)

773

else:

774

else:

774

return repo.svfs

775

return repo.svfs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             from __future__ import absolute_import
             import os
             import time
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 short,
             )
             from mercurial import (
                 encoding,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 scmutil,
                 util,
                 vfs,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 datapack,
                 extutil,
                 historypack,
                 metadatastore,
                 shallowutil,
             )
             osutil = policy.importmod(r'osutil')
             class RepackAlreadyRunning(error.Abort):
                 pass
             def backgroundrepack(repo, incremental=True, packsonly=False):
                 cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
                 msg = _("(running background repack)\n")
                 if incremental:
                     cmd.append('--incremental')
                     msg = _("(running background incremental repack)\n")
                 if packsonly:
                     cmd.append('--packsonly')
                 repo.ui.warn(msg)
                 procutil.runbgcommand(cmd, encoding.environ)
             def fullrepack(repo, options=None):
                 """If ``packsonly`` is True, stores creating only loose objects are skipped.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     datasource = contentstore.unioncontentstore(
                         *repo.shareddatastores)
                     historysource = metadatastore.unionmetadatastore(
                         *repo.sharedhistorystores,
                         allowincomplete=True)
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _runrepack(repo, datasource, historysource, packpath,
                                constants.FILEPACK_CATEGORY, options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     datasource = contentstore.unioncontentstore(*sdstores)
                     historysource = metadatastore.unionmetadatastore(
                                     *shstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, spackpath,
                                constants.TREEPACK_CATEGORY, options=options)
                     # Repack the local manifest store
                     datasource = contentstore.unioncontentstore(
                                     *ldstores,
                                     allowincomplete=True)
                     historysource = metadatastore.unionmetadatastore(
                                     *lhstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, lpackpath,
                                constants.TREEPACK_CATEGORY, options=options)
             def incrementalrepack(repo, options=None):
                 """This repacks the repo by looking at the distribution of pack files in the
                 repo and performing the most minimal repack to keep the repo in good shape.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _incrementalrepack(repo,
                                        repo.shareddatastores,
                                        repo.sharedhistorystores,
                                        packpath,
                                        constants.FILEPACK_CATEGORY,
                                        options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     _incrementalrepack(repo,
                                        sdstores,
                                        shstores,
                                        spackpath,
                                        constants.TREEPACK_CATEGORY,
                                        options=options)
                     # Repack the local manifest store
                     _incrementalrepack(repo,
                                        ldstores,
                                        lhstores,
                                        lpackpath,
                                        constants.TREEPACK_CATEGORY,
                                        allowincompletedata=True,
                                        options=options)
             def _getmanifeststores(repo):
                 shareddatastores = repo.manifestlog.shareddatastores
                 localdatastores = repo.manifestlog.localdatastores
                 sharedhistorystores = repo.manifestlog.sharedhistorystores
                 localhistorystores = repo.manifestlog.localhistorystores
                 sharedpackpath = shallowutil.getcachepackpath(repo,
                                                         constants.TREEPACK_CATEGORY)
                 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
                                                         constants.TREEPACK_CATEGORY)
                 return ((localpackpath, localdatastores, localhistorystores),
                         (sharedpackpath, shareddatastores, sharedhistorystores))
             def _topacks(packpath, files, constructor):
                 paths = list(os.path.join(packpath, p) for p in files)
                 packs = list(constructor(p) for p in paths)
                 return packs
             def _deletebigpacks(repo, folder, files):
                 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
                 Returns ``files` with the removed files omitted."""
                 maxsize = repo.ui.configbytes("packs", "maxpacksize")
                 if maxsize <= 0:
                     return files
                 # This only considers datapacks today, but we could broaden it to include
                 # historypacks.
                 VALIDEXTS = [".datapack", ".dataidx"]
                 # Either an oversize index or datapack will trigger cleanup of the whole
                 # pack:
                 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
                     if (stat.st_size > maxsize and (os.path.splitext(path)[1]
                                                     in VALIDEXTS))])
                 for rootfname in oversized:
                     rootpath = os.path.join(folder, rootfname)
                     for ext in VALIDEXTS:
                         path = rootpath + ext
                         repo.ui.debug('removing oversize packfile %s (%s)\n' %
                                       (path, util.bytecount(os.stat(path).st_size)))
                         os.unlink(path)
                 return [row for row in files if os.path.basename(row[0]) not in oversized]
             def _incrementalrepack(repo, datastore, historystore, packpath, category,
                     allowincompletedata=False, options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 files = osutil.listdir(packpath, stat=True)
                 files = _deletebigpacks(repo, packpath, files)
                 datapacks = _topacks(packpath,
                     _computeincrementaldatapack(repo.ui, files),
                     datapack.datapack)
                 datapacks.extend(s for s in datastore
                                  if not isinstance(s, datapack.datapackstore))
                 historypacks = _topacks(packpath,
                     _computeincrementalhistorypack(repo.ui, files),
                     historypack.historypack)
                 historypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 # ``allhistory{files,packs}`` contains all known history packs, even ones we
                 # don't plan to repack. They are used during the datapack repack to ensure
                 # good ordering of nodes.
                 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
                                         historypack.INDEXSUFFIX)
                 allhistorypacks = _topacks(packpath,
                     (f for f, mode, stat in allhistoryfiles),
                     historypack.historypack)
                 allhistorypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 _runrepack(repo,
                            contentstore.unioncontentstore(
                                *datapacks,
                                allowincomplete=allowincompletedata),
                            metadatastore.unionmetadatastore(
                                *historypacks,
                                allowincomplete=True),
                            packpath, category,
                            fullhistory=metadatastore.unionmetadatastore(
                                *allhistorypacks,
                                allowincomplete=True),
                             options=options)
             def _computeincrementaldatapack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'data.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'data.generations'),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'data.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'data.repackmaxpacksize'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'data.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _computeincrementalhistorypack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'history.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'history.generations', ['100MB']),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'history.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'history.repackmaxpacksize', '400MB'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'history.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
                 result = []
                 fileset = set(fn for fn, mode, stat in files)
                 for filename, mode, stat in files:
                     if not filename.endswith(packsuffix):
                         continue
                     prefix = filename[:-len(packsuffix)]
                     # Don't process a pack if it doesn't have an index.
                     if (prefix + indexsuffix) not in fileset:
                         continue
                     result.append((prefix, mode, stat))
                 return result
             def _computeincrementalpack(files, opts):
                 """Given a set of pack files along with the configuration options, this
                 function computes the list of files that should be packed as part of an
                 incremental repack.
                 It tries to strike a balance between keeping incremental repacks cheap (i.e.
                 packing small things when possible, and rolling the packs up to the big ones
                 over time).
                 """
                 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
                                             reverse=True))
                 limits.append(0)
                 # Group the packs by generation (i.e. by size)
                 generations = []
                 for i in pycompat.xrange(len(limits)):
                     generations.append([])
                 sizes = {}
                 for prefix, mode, stat in files:
                     size = stat.st_size
                     if size > opts['repackmaxpacksize']:
                         continue
                     sizes[prefix] = size
                     for i, limit in enumerate(limits):
                         if size > limit:
                             generations[i].append(prefix)
                             break
                 # Steps for picking what packs to repack:
                 # 1. Pick the largest generation with > gencountlimit pack files.
                 # 2. Take the smallest three packs.
                 # 3. While total-size-of-packs < repacksizelimit: add another pack
                 # Find the largest generation with more than gencountlimit packs
                 genpacks = []
                 for i, limit in enumerate(limits):
                     if len(generations[i]) > opts['gencountlimit']:
                         # Sort to be smallest last, for easy popping later
                         genpacks.extend(sorted(generations[i], reverse=True,
                                                key=lambda x: sizes[x]))
                         break
                 # Take as many packs from the generation as we can
                 chosenpacks = genpacks[-3:]
                 genpacks = genpacks[:-3]
                 repacksize = sum(sizes[n] for n in chosenpacks)
                 while (repacksize < opts['repacksizelimit'] and genpacks and
                        len(chosenpacks) < opts['maxrepackpacks']):
                     chosenpacks.append(genpacks.pop())
                     repacksize += sizes[chosenpacks[-1]]
                 return chosenpacks
             def _runrepack(repo, data, history, packpath, category, fullhistory=None,
                            options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 def isold(repo, filename, node):
                     """Check if the file node is older than a limit.
                     Unless a limit is specified in the config the default limit is taken.
                     """
                     filectx = repo.filectx(filename, fileid=node)
                     filetime = repo[filectx.linkrev()].date()
                     ttl = repo.ui.configint('remotefilelog', 'nodettl')
                     limit = time.time() - ttl
                     return filetime[0] < limit
                 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
                 if not fullhistory:
                     fullhistory = history
                 packer = repacker(repo, data, history, fullhistory, category,
                                   gc=garbagecollect, isold=isold, options=options)
                 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
                     with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
                         try:
                             packer.run(dpack, hpack)
                         except error.LockHeld:
                             raise RepackAlreadyRunning(_("skipping repack - another repack "
                                                          "is already running"))
             def keepset(repo, keyfn, lastkeepkeys=None):
                 """Computes a keepset which is not garbage collected.
                 'keyfn' is a function that maps filename, node to a unique key.
                 'lastkeepkeys' is an optional argument and if provided the keepset
                 function updates lastkeepkeys with more keys and returns the result.
                 """
                 if not lastkeepkeys:
                     keepkeys = set()
                 else:
                     keepkeys = lastkeepkeys
                 # We want to keep:
                 # 1. Working copy parent
                 # 2. Draft commits
                 # 3. Parents of draft commits
                 # 4. Pullprefetch and bgprefetchrevs revsets if specified
                 revs = ['.', 'draft()', 'parents(draft())']
                 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 revs = '+'.join(revs)
                 revs = ['sort((%s), "topo")' % revs]
                 keep = scmutil.revrange(repo, revs)
                 processed = set()
                 lastmanifest = None
                 # process the commits in toposorted order starting from the oldest
                 for r in reversed(keep._list):
                     if repo[r].p1().rev() in processed:
                         # if the direct parent has already been processed
                         # then we only need to process the delta
                         m = repo[r].manifestctx().readdelta()
                     else:
                         # otherwise take the manifest and diff it
                         # with the previous manifest if one exists
                         if lastmanifest:
                             m = repo[r].manifest().diff(lastmanifest)
                         else:
                             m = repo[r].manifest()
                     lastmanifest = repo[r].manifest()
                     processed.add(r)
                     # populate keepkeys with keys from the current manifest
                     if type(m) is dict:
                         # m is a result of diff of two manifests and is a dictionary that
                         # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
                         for filename, diff in m.iteritems():
                             if diff[0][0] is not None:
                                 keepkeys.add(keyfn(filename, diff[0][0]))
                     else:
                         # m is a manifest object
                         for filename, filenode in m.iteritems():
                             keepkeys.add(keyfn(filename, filenode))
                 return keepkeys
             class repacker(object):
                 """Class for orchestrating the repack of data and history information into a
                 new format.
                 """
                 def __init__(self, repo, data, history, fullhistory, category, gc=False,
                              isold=None, options=None):
                     self.repo = repo
                     self.data = data
                     self.history = history
                     self.fullhistory = fullhistory
                     self.unit = constants.getunits(category)
                     self.garbagecollect = gc
                     self.options = options
                     if self.garbagecollect:
                         if not isold:
                             raise ValueError("Function 'isold' is not properly specified")
                         # use (filename, node) tuple as a keepset key
                         self.keepkeys = keepset(repo, lambda f, n : (f, n))
                         self.isold = isold
                 def run(self, targetdata, targethistory):
                     ledger = repackledger()
                     with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
                                        _('repacking %s') % self.repo.origroot, timeout=0):
                         self.repo.hook('prerepack')
                         # Populate ledger from source
                         self.data.markledger(ledger, options=self.options)
                         self.history.markledger(ledger, options=self.options)
                         # Run repack
                         self.repackdata(ledger, targetdata)
                         self.repackhistory(ledger, targethistory)
                         # Call cleanup on each source
                         for source in ledger.sources:
                             source.cleanup(ledger)
                 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
                     """Reorderes ``orphans`` into a single chain inside ``nodes`` and
                     ``deltabases``.
                     We often have orphan entries (nodes without a base that aren't
                     referenced by other nodes -- i.e., part of a chain) due to gaps in
                     history. Rather than store them as individual fulltexts, we prefer to
                     insert them as one chain sorted by size.
                     """
                     if not orphans:
                         return nodes
                     def getsize(node, default=0):
                         meta = self.data.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             return meta[constants.METAKEYSIZE]
                         else:
                             return default
                     # Sort orphans by size; biggest first is preferred, since it's more
                     # likely to be the newest version assuming files grow over time.
                     # (Sort by node first to ensure the sort is stable.)
                     orphans = sorted(orphans)
                     orphans = list(sorted(orphans, key=getsize, reverse=True))
                     if ui.debugflag:
                         ui.debug("%s: orphan chain: %s\n" % (filename,
                             ", ".join([short(s) for s in orphans])))
                     # Create one contiguous chain and reassign deltabases.
                     for i, node in enumerate(orphans):
                         if i == 0:
                             deltabases[node] = (nullid, 0)
                         else:
                             parent = orphans[i - 1]
                             deltabases[node] = (parent, deltabases[parent][1] + 1)
                     nodes = filter(lambda node: node not in orphans, nodes)
                     nodes += orphans
                     return nodes
                 def repackdata(self, ledger, target):
                     ui = self.repo.ui
                     maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.datasource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     for filename, entries in sorted(byfile.iteritems()):
                         ui.progress(_("repacking data"), count, unit=self.unit,
                                     total=len(byfile))
                         ancestors = {}
                         nodes = list(node for node in entries)
                         nohistory = []
                         for i, node in enumerate(nodes):
                             if node in ancestors:
                                 continue
                             ui.progress(_("building history"), i, unit='nodes',
                                         total=len(nodes))
                             try:
                                 ancestors.update(self.fullhistory.getancestors(filename,
                                     node, known=ancestors))
                             except KeyError:
                                 # Since we're packing data entries, we may not have the
                                 # corresponding history entries for them. It's not a big
                                 # deal, but the entries won't be delta'd perfectly.
                                 nohistory.append(node)
                         ui.progress(_("building history"), None)
                         # Order the nodes children first, so we can produce reverse deltas
                         orderednodes = list(reversed(self._toposort(ancestors)))
                         if len(nohistory) > 0:
                             ui.debug('repackdata: %d nodes without history\n' %
                                      len(nohistory))
                         orderednodes.extend(sorted(nohistory))
                         # Filter orderednodes to just the nodes we want to serialize (it
                         # currently also has the edge nodes' ancestors).
-                        orderednodes = filter(lambda node: node in nodes, orderednodes)
+                        orderednodes = list(filter(lambda node: node in nodes,
+                                            orderednodes))
                         # Garbage collect old nodes:
                         if self.garbagecollect:
                             neworderednodes = []
                             for node in orderednodes:
                                 # If the node is old and is not in the keepset, we skip it,
                                 # and mark as garbage collected
                                 if ((filename, node) not in self.keepkeys and
                                     self.isold(self.repo, filename, node)):
                                     entries[node].gced = True
                                     continue
                                 neworderednodes.append(node)
                             orderednodes = neworderednodes
                         # Compute delta bases for nodes:
                         deltabases = {}
                         nobase = set()
                         referenced = set()
                         nodes = set(nodes)
                         for i, node in enumerate(orderednodes):
                             ui.progress(_("processing nodes"), i, unit='nodes',
                                         total=len(orderednodes))
                             # Find delta base
                             # TODO: allow delta'ing against most recent descendant instead
                             # of immediate child
                             deltatuple = deltabases.get(node, None)
                             if deltatuple is None:
                                 deltabase, chainlen = nullid, 0
                                 deltabases[node] = (nullid, 0)
                                 nobase.add(node)
                             else:
                                 deltabase, chainlen = deltatuple
                                 referenced.add(deltabase)
                             # Use available ancestor information to inform our delta choices
                             ancestorinfo = ancestors.get(node)
                             if ancestorinfo:
                                 p1, p2, linknode, copyfrom = ancestorinfo
                                 # The presence of copyfrom means we're at a point where the
                                 # file was copied from elsewhere. So don't attempt to do any
                                 # deltas with the other file.
                                 if copyfrom:
                                     p1 = nullid
                                 if chainlen < maxchainlen:
                                     # Record this child as the delta base for its parents.
                                     # This may be non optimal, since the parents may have
                                     # many children, and this will only choose the last one.
                                     # TODO: record all children and try all deltas to find
                                     # best
                                     if p1 != nullid:
                                         deltabases[p1] = (node, chainlen + 1)
                                     if p2 != nullid:
                                         deltabases[p2] = (node, chainlen + 1)
                         # experimental config: repack.chainorphansbysize
                         if ui.configbool('repack', 'chainorphansbysize'):
                             orphans = nobase - referenced
                             orderednodes = self._chainorphans(ui, filename, orderednodes,
                                 orphans, deltabases)
                         # Compute deltas and write to the pack
                         for i, node in enumerate(orderednodes):
                             deltabase, chainlen = deltabases[node]
                             # Compute delta
                             # TODO: Optimize the deltachain fetching. Since we're
                             # iterating over the different version of the file, we may
                             # be fetching the same deltachain over and over again.
                             meta = None
                             if deltabase != nullid:
                                 deltaentry = self.data.getdelta(filename, node)
                                 delta, deltabasename, origdeltabase, meta = deltaentry
                                 size = meta.get(constants.METAKEYSIZE)
                                 if (deltabasename != filename or origdeltabase != deltabase
                                     or size is None):
                                     deltabasetext = self.data.get(filename, deltabase)
                                     original = self.data.get(filename, node)
                                     size = len(original)
                                     delta = mdiff.textdiff(deltabasetext, original)
                             else:
                                 delta = self.data.get(filename, node)
                                 size = len(delta)
                                 meta = self.data.getmeta(filename, node)
                             # TODO: don't use the delta if it's larger than the fulltext
                             if constants.METAKEYSIZE not in meta:
                                 meta[constants.METAKEYSIZE] = size
                             target.add(filename, node, deltabase, delta, meta)
                             entries[node].datarepacked = True
                         ui.progress(_("processing nodes"), None)
                         count += 1
                     ui.progress(_("repacking data"), None)
                     target.close(ledger=ledger)
                 def repackhistory(self, ledger, target):
                     ui = self.repo.ui
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.historysource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     for filename, entries in sorted(byfile.iteritems()):
                         ancestors = {}
                         nodes = list(node for node in entries)
                         for node in nodes:
                             if node in ancestors:
                                 continue
                             ancestors.update(self.history.getancestors(filename, node,
                                                                        known=ancestors))
                         # Order the nodes children first
                         orderednodes = reversed(self._toposort(ancestors))
                         # Write to the pack
                         dontprocess = set()
                         for node in orderednodes:
                             p1, p2, linknode, copyfrom = ancestors[node]
                             # If the node is marked dontprocess, but it's also in the
                             # explicit entries set, that means the node exists both in this
                             # file and in another file that was copied to this file.
                             # Usually this happens if the file was copied to another file,
                             # then the copy was deleted, then reintroduced without copy
                             # metadata. The original add and the new add have the same hash
                             # since the content is identical and the parents are null.
                             if node in dontprocess and node not in entries:
                                 # If copyfrom == filename, it means the copy history
                                 # went to come other file, then came back to this one, so we
                                 # should continue processing it.
                                 if p1 != nullid and copyfrom != filename:
                                     dontprocess.add(p1)
                                 if p2 != nullid:
                                     dontprocess.add(p2)
                                 continue
                             if copyfrom:
                                 dontprocess.add(p1)
                             target.add(filename, node, p1, p2, linknode, copyfrom)
                             if node in entries:
                                 entries[node].historyrepacked = True
                         count += 1
                         ui.progress(_("repacking history"), count, unit=self.unit,
                                     total=len(byfile))
                     ui.progress(_("repacking history"), None)
                     target.close(ledger=ledger)
                 def _toposort(self, ancestors):
                     def parentfunc(node):
                         p1, p2, linknode, copyfrom = ancestors[node]
                         parents = []
                         if p1 != nullid:
                             parents.append(p1)
                         if p2 != nullid:
                             parents.append(p2)
                         return parents
                     sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
                     return sortednodes
             class repackledger(object):
                 """Storage for all the bookkeeping that happens during a repack. It contains
                 the list of revisions being repacked, what happened to each revision, and
                 which source store contained which revision originally (for later cleanup).
                 """
                 def __init__(self):
                     self.entries = {}
                     self.sources = {}
                     self.created = set()
                 def markdataentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a data rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.datasource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def markhistoryentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a history rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.historysource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def _getorcreateentry(self, filename, node):
                     key = (filename, node)
                     value = self.entries.get(key)
                     if not value:
                         value = repackentry(filename, node)
                         self.entries[key] = value
                     return value
                 def addcreated(self, value):
                     self.created.add(value)
             class repackentry(object):
                 """Simple class representing a single revision entry in the repackledger.
                 """
                 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
                              r'datarepacked', r'historyrepacked', r'gced')
                 def __init__(self, filename, node):
                     self.filename = filename
                     self.node = node
                     # If the revision has a data entry in the source
                     self.datasource = False
                     # If the revision has a history entry in the source
                     self.historysource = False
                     # If the revision's data entry was repacked into the repack target
                     self.datarepacked = False
                     # If the revision's history entry was repacked into the repack target
                     self.historyrepacked = False
                     # If garbage collected
                     self.gced = False
             def repacklockvfs(repo):
                 if util.safehasattr(repo, 'name'):
                     # Lock in the shared cache so repacks across multiple copies of the same
                     # repo are coordinated.
                     sharedcachepath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     return vfs.vfs(sharedcachepath)
                 else:
                     return repo.svfs