upstream/mercurial-mirror Commit - r40747:72d88a97

1

from __future__ import absolute_import

1

from __future__ import absolute_import

2

3

import os

3

import os

4

import time

4

import time

5

6

from mercurial.i18n import _

6

from mercurial.i18n import _

7

from mercurial.node import (

7

from mercurial.node import (

8

nullid,

8

nullid,

9

short,

9

short,

10

)

10

)

11

from mercurial import (

11

from mercurial import (

12

encoding,

12

encoding,

13

error,

13

error,

14

mdiff,

14

mdiff,

15

policy,

15

policy,

16

pycompat,

16

pycompat,

17

scmutil,

17

scmutil,

18

util,

18

util,

19

vfs,

19

vfs,

20

)

20

)

21

from mercurial.utils import procutil

21

from mercurial.utils import procutil

22

from . import (

22

from . import (

23

constants,

23

constants,

24

contentstore,

24

contentstore,

25

datapack,

25

datapack,

26

extutil,

26

extutil,

27

historypack,

27

historypack,

28

metadatastore,

28

metadatastore,

29

shallowutil,

29

shallowutil,

30

)

30

)

31

32

osutil = policy.importmod(r'osutil')

32

osutil = policy.importmod(r'osutil')

33

34

class RepackAlreadyRunning(error.Abort):

34

class RepackAlreadyRunning(error.Abort):

35

pass

35

pass

36

37

if util.safehasattr(util, '_hgexecutable'):

38

# Before 5be286db

39

_hgexecutable = util.hgexecutable

40

else:

41

from mercurial.utils import procutil

42

_hgexecutable = procutil.hgexecutable

43

44

def backgroundrepack(repo, incremental=True, packsonly=False):

37

def backgroundrepack(repo, incremental=True, packsonly=False):

45

cmd = [_hgexecutable(), '-R', repo.origroot, 'repack']

38

cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']

46

msg = _("(running background repack)\n")

39

msg = _("(running background repack)\n")

47

if incremental:

40

if incremental:

48

cmd.append('--incremental')

41

cmd.append('--incremental')

49

msg = _("(running background incremental repack)\n")

42

msg = _("(running background incremental repack)\n")

50

if packsonly:

43

if packsonly:

51

cmd.append('--packsonly')

44

cmd.append('--packsonly')

52

repo.ui.warn(msg)

45

repo.ui.warn(msg)

53

procutil.runbgcommand(cmd, encoding.environ)

46

procutil.runbgcommand(cmd, encoding.environ)

54

47

55

def fullrepack(repo, options=None):

48

def fullrepack(repo, options=None):

56

"""If ``packsonly`` is True, stores creating only loose objects are skipped.

49

"""If ``packsonly`` is True, stores creating only loose objects are skipped.

57

"""

50

"""

58

if util.safehasattr(repo, 'shareddatastores'):

51

if util.safehasattr(repo, 'shareddatastores'):

59

datasource = contentstore.unioncontentstore(

52

datasource = contentstore.unioncontentstore(

60

*repo.shareddatastores)

53

*repo.shareddatastores)

61

historysource = metadatastore.unionmetadatastore(

54

historysource = metadatastore.unionmetadatastore(

62

*repo.sharedhistorystores,

55

*repo.sharedhistorystores,

63

allowincomplete=True)

56

allowincomplete=True)

64

57

65

packpath = shallowutil.getcachepackpath(

58

packpath = shallowutil.getcachepackpath(

66

repo,

59

repo,

67

constants.FILEPACK_CATEGORY)

60

constants.FILEPACK_CATEGORY)

68

_runrepack(repo, datasource, historysource, packpath,

61

_runrepack(repo, datasource, historysource, packpath,

69

constants.FILEPACK_CATEGORY, options=options)

62

constants.FILEPACK_CATEGORY, options=options)

70

63

71

if util.safehasattr(repo.manifestlog, 'datastore'):

64

if util.safehasattr(repo.manifestlog, 'datastore'):

72

localdata, shareddata = _getmanifeststores(repo)

65

localdata, shareddata = _getmanifeststores(repo)

73

lpackpath, ldstores, lhstores = localdata

66

lpackpath, ldstores, lhstores = localdata

74

spackpath, sdstores, shstores = shareddata

67

spackpath, sdstores, shstores = shareddata

75

68

76

# Repack the shared manifest store

69

# Repack the shared manifest store

77

datasource = contentstore.unioncontentstore(*sdstores)

70

datasource = contentstore.unioncontentstore(*sdstores)

78

historysource = metadatastore.unionmetadatastore(

71

historysource = metadatastore.unionmetadatastore(

79

*shstores,

72

*shstores,

80

allowincomplete=True)

73

allowincomplete=True)

81

_runrepack(repo, datasource, historysource, spackpath,

74

_runrepack(repo, datasource, historysource, spackpath,

82

constants.TREEPACK_CATEGORY, options=options)

75

constants.TREEPACK_CATEGORY, options=options)

83

76

84

# Repack the local manifest store

77

# Repack the local manifest store

85

datasource = contentstore.unioncontentstore(

78

datasource = contentstore.unioncontentstore(

86

*ldstores,

79

*ldstores,

87

allowincomplete=True)

80

allowincomplete=True)

88

historysource = metadatastore.unionmetadatastore(

81

historysource = metadatastore.unionmetadatastore(

89

*lhstores,

82

*lhstores,

90

allowincomplete=True)

83

allowincomplete=True)

91

_runrepack(repo, datasource, historysource, lpackpath,

84

_runrepack(repo, datasource, historysource, lpackpath,

92

constants.TREEPACK_CATEGORY, options=options)

85

constants.TREEPACK_CATEGORY, options=options)

93

86

94

def incrementalrepack(repo, options=None):

87

def incrementalrepack(repo, options=None):

95

"""This repacks the repo by looking at the distribution of pack files in the

88

"""This repacks the repo by looking at the distribution of pack files in the

96

repo and performing the most minimal repack to keep the repo in good shape.

89

repo and performing the most minimal repack to keep the repo in good shape.

97

"""

90

"""

98

if util.safehasattr(repo, 'shareddatastores'):

91

if util.safehasattr(repo, 'shareddatastores'):

99

packpath = shallowutil.getcachepackpath(

92

packpath = shallowutil.getcachepackpath(

100

repo,

93

repo,

101

constants.FILEPACK_CATEGORY)

94

constants.FILEPACK_CATEGORY)

102

_incrementalrepack(repo,

95

_incrementalrepack(repo,

103

repo.shareddatastores,

96

repo.shareddatastores,

104

repo.sharedhistorystores,

97

repo.sharedhistorystores,

105

packpath,

98

packpath,

106

constants.FILEPACK_CATEGORY,

99

constants.FILEPACK_CATEGORY,

107

options=options)

100

options=options)

108

101

109

if util.safehasattr(repo.manifestlog, 'datastore'):

102

if util.safehasattr(repo.manifestlog, 'datastore'):

110

localdata, shareddata = _getmanifeststores(repo)

103

localdata, shareddata = _getmanifeststores(repo)

111

lpackpath, ldstores, lhstores = localdata

104

lpackpath, ldstores, lhstores = localdata

112

spackpath, sdstores, shstores = shareddata

105

spackpath, sdstores, shstores = shareddata

113

106

114

# Repack the shared manifest store

107

# Repack the shared manifest store

115

_incrementalrepack(repo,

108

_incrementalrepack(repo,

116

sdstores,

109

sdstores,

117

shstores,

110

shstores,

118

spackpath,

111

spackpath,

119

constants.TREEPACK_CATEGORY,

112

constants.TREEPACK_CATEGORY,

120

options=options)

113

options=options)

121

114

122

# Repack the local manifest store

115

# Repack the local manifest store

123

_incrementalrepack(repo,

116

_incrementalrepack(repo,

124

ldstores,

117

ldstores,

125

lhstores,

118

lhstores,

126

lpackpath,

119

lpackpath,

127

constants.TREEPACK_CATEGORY,

120

constants.TREEPACK_CATEGORY,

128

allowincompletedata=True,

121

allowincompletedata=True,

129

options=options)

122

options=options)

130

123

131

def _getmanifeststores(repo):

124

def _getmanifeststores(repo):

132

shareddatastores = repo.manifestlog.shareddatastores

125

shareddatastores = repo.manifestlog.shareddatastores

133

localdatastores = repo.manifestlog.localdatastores

126

localdatastores = repo.manifestlog.localdatastores

134

sharedhistorystores = repo.manifestlog.sharedhistorystores

127

sharedhistorystores = repo.manifestlog.sharedhistorystores

135

localhistorystores = repo.manifestlog.localhistorystores

128

localhistorystores = repo.manifestlog.localhistorystores

136

129

137

sharedpackpath = shallowutil.getcachepackpath(repo,

130

sharedpackpath = shallowutil.getcachepackpath(repo,

138

constants.TREEPACK_CATEGORY)

131

constants.TREEPACK_CATEGORY)

139

localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,

132

localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,

140

constants.TREEPACK_CATEGORY)

133

constants.TREEPACK_CATEGORY)

141

134

142

return ((localpackpath, localdatastores, localhistorystores),

135

return ((localpackpath, localdatastores, localhistorystores),

143

(sharedpackpath, shareddatastores, sharedhistorystores))

136

(sharedpackpath, shareddatastores, sharedhistorystores))

144

137

145

def _topacks(packpath, files, constructor):

138

def _topacks(packpath, files, constructor):

146

paths = list(os.path.join(packpath, p) for p in files)

139

paths = list(os.path.join(packpath, p) for p in files)

147

packs = list(constructor(p) for p in paths)

140

packs = list(constructor(p) for p in paths)

148

return packs

141

return packs

149

142

150

def _deletebigpacks(repo, folder, files):

143

def _deletebigpacks(repo, folder, files):

151

"""Deletes packfiles that are bigger than ``packs.maxpacksize``.

144

"""Deletes packfiles that are bigger than ``packs.maxpacksize``.

152

145

153

Returns ``files` with the removed files omitted."""

146

Returns ``files` with the removed files omitted."""

154

maxsize = repo.ui.configbytes("packs", "maxpacksize")

147

maxsize = repo.ui.configbytes("packs", "maxpacksize")

155

if maxsize <= 0:

148

if maxsize <= 0:

156

return files

149

return files

157

150

158

# This only considers datapacks today, but we could broaden it to include

151

# This only considers datapacks today, but we could broaden it to include

159

# historypacks.

152

# historypacks.

160

VALIDEXTS = [".datapack", ".dataidx"]

153

VALIDEXTS = [".datapack", ".dataidx"]

161

154

162

# Either an oversize index or datapack will trigger cleanup of the whole

155

# Either an oversize index or datapack will trigger cleanup of the whole

163

# pack:

156

# pack:

164

oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files

157

oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files

165

if (stat.st_size > maxsize and (os.path.splitext(path)[1]

158

if (stat.st_size > maxsize and (os.path.splitext(path)[1]

166

in VALIDEXTS))])

159

in VALIDEXTS))])

167

160

168

for rootfname in oversized:

161

for rootfname in oversized:

169

rootpath = os.path.join(folder, rootfname)

162

rootpath = os.path.join(folder, rootfname)

170

for ext in VALIDEXTS:

163

for ext in VALIDEXTS:

171

path = rootpath + ext

164

path = rootpath + ext

172

repo.ui.debug('removing oversize packfile %s (%s)\n' %

165

repo.ui.debug('removing oversize packfile %s (%s)\n' %

173

(path, util.bytecount(os.stat(path).st_size)))

166

(path, util.bytecount(os.stat(path).st_size)))

174

os.unlink(path)

167

os.unlink(path)

175

return [row for row in files if os.path.basename(row[0]) not in oversized]

168

return [row for row in files if os.path.basename(row[0]) not in oversized]

176

169

177

def _incrementalrepack(repo, datastore, historystore, packpath, category,

170

def _incrementalrepack(repo, datastore, historystore, packpath, category,

178

allowincompletedata=False, options=None):

171

allowincompletedata=False, options=None):

179

shallowutil.mkstickygroupdir(repo.ui, packpath)

172

shallowutil.mkstickygroupdir(repo.ui, packpath)

180

173

181

files = osutil.listdir(packpath, stat=True)

174

files = osutil.listdir(packpath, stat=True)

182

files = _deletebigpacks(repo, packpath, files)

175

files = _deletebigpacks(repo, packpath, files)

183

datapacks = _topacks(packpath,

176

datapacks = _topacks(packpath,

184

_computeincrementaldatapack(repo.ui, files),

177

_computeincrementaldatapack(repo.ui, files),

185

datapack.datapack)

178

datapack.datapack)

186

datapacks.extend(s for s in datastore

179

datapacks.extend(s for s in datastore

187

if not isinstance(s, datapack.datapackstore))

180

if not isinstance(s, datapack.datapackstore))

188

181

189

historypacks = _topacks(packpath,

182

historypacks = _topacks(packpath,

190

_computeincrementalhistorypack(repo.ui, files),

183

_computeincrementalhistorypack(repo.ui, files),

191

historypack.historypack)

184

historypack.historypack)

192

historypacks.extend(s for s in historystore

185

historypacks.extend(s for s in historystore

193

if not isinstance(s, historypack.historypackstore))

186

if not isinstance(s, historypack.historypackstore))

194

187

195

# ``allhistory{files,packs}`` contains all known history packs, even ones we

188

# ``allhistory{files,packs}`` contains all known history packs, even ones we

196

# don't plan to repack. They are used during the datapack repack to ensure

189

# don't plan to repack. They are used during the datapack repack to ensure

197

# good ordering of nodes.

190

# good ordering of nodes.

198

allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,

191

allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,

199

historypack.INDEXSUFFIX)

192

historypack.INDEXSUFFIX)

200

allhistorypacks = _topacks(packpath,

193

allhistorypacks = _topacks(packpath,

201

(f for f, mode, stat in allhistoryfiles),

194

(f for f, mode, stat in allhistoryfiles),

202

historypack.historypack)

195

historypack.historypack)

203

allhistorypacks.extend(s for s in historystore

196

allhistorypacks.extend(s for s in historystore

204

if not isinstance(s, historypack.historypackstore))

197

if not isinstance(s, historypack.historypackstore))

205

_runrepack(repo,

198

_runrepack(repo,

206

contentstore.unioncontentstore(

199

contentstore.unioncontentstore(

207

*datapacks,

200

*datapacks,

208

allowincomplete=allowincompletedata),

201

allowincomplete=allowincompletedata),

209

metadatastore.unionmetadatastore(

202

metadatastore.unionmetadatastore(

210

*historypacks,

203

*historypacks,

211

allowincomplete=True),

204

allowincomplete=True),

212

packpath, category,

205

packpath, category,

213

fullhistory=metadatastore.unionmetadatastore(

206

fullhistory=metadatastore.unionmetadatastore(

214

*allhistorypacks,

207

*allhistorypacks,

215

allowincomplete=True),

208

allowincomplete=True),

216

options=options)

209

options=options)

217

210

218

def _computeincrementaldatapack(ui, files):

211

def _computeincrementaldatapack(ui, files):

219

opts = {

212

opts = {

220

'gencountlimit' : ui.configint(

213

'gencountlimit' : ui.configint(

221

'remotefilelog', 'data.gencountlimit'),

214

'remotefilelog', 'data.gencountlimit'),

222

'generations' : ui.configlist(

215

'generations' : ui.configlist(

223

'remotefilelog', 'data.generations'),

216

'remotefilelog', 'data.generations'),

224

'maxrepackpacks' : ui.configint(

217

'maxrepackpacks' : ui.configint(

225

'remotefilelog', 'data.maxrepackpacks'),

218

'remotefilelog', 'data.maxrepackpacks'),

226

'repackmaxpacksize' : ui.configbytes(

219

'repackmaxpacksize' : ui.configbytes(

227

'remotefilelog', 'data.repackmaxpacksize'),

220

'remotefilelog', 'data.repackmaxpacksize'),

228

'repacksizelimit' : ui.configbytes(

221

'repacksizelimit' : ui.configbytes(

229

'remotefilelog', 'data.repacksizelimit'),

222

'remotefilelog', 'data.repacksizelimit'),

230

}

223

}

231

224

232

packfiles = _allpackfileswithsuffix(

225

packfiles = _allpackfileswithsuffix(

233

files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)

226

files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)

234

return _computeincrementalpack(packfiles, opts)

227

return _computeincrementalpack(packfiles, opts)

235

228

236

def _computeincrementalhistorypack(ui, files):

229

def _computeincrementalhistorypack(ui, files):

237

opts = {

230

opts = {

238

'gencountlimit' : ui.configint(

231

'gencountlimit' : ui.configint(

239

'remotefilelog', 'history.gencountlimit'),

232

'remotefilelog', 'history.gencountlimit'),

240

'generations' : ui.configlist(

233

'generations' : ui.configlist(

241

'remotefilelog', 'history.generations', ['100MB']),

234

'remotefilelog', 'history.generations', ['100MB']),

242

'maxrepackpacks' : ui.configint(

235

'maxrepackpacks' : ui.configint(

243

'remotefilelog', 'history.maxrepackpacks'),

236

'remotefilelog', 'history.maxrepackpacks'),

244

'repackmaxpacksize' : ui.configbytes(

237

'repackmaxpacksize' : ui.configbytes(

245

'remotefilelog', 'history.repackmaxpacksize', '400MB'),

238

'remotefilelog', 'history.repackmaxpacksize', '400MB'),

246

'repacksizelimit' : ui.configbytes(

239

'repacksizelimit' : ui.configbytes(

247

'remotefilelog', 'history.repacksizelimit'),

240

'remotefilelog', 'history.repacksizelimit'),

248

}

241

}

249

242

250

packfiles = _allpackfileswithsuffix(

243

packfiles = _allpackfileswithsuffix(

251

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)

244

files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)

252

return _computeincrementalpack(packfiles, opts)

245

return _computeincrementalpack(packfiles, opts)

253

246

254

def _allpackfileswithsuffix(files, packsuffix, indexsuffix):

247

def _allpackfileswithsuffix(files, packsuffix, indexsuffix):

255

result = []

248

result = []

256

fileset = set(fn for fn, mode, stat in files)

249

fileset = set(fn for fn, mode, stat in files)

257

for filename, mode, stat in files:

250

for filename, mode, stat in files:

258

if not filename.endswith(packsuffix):

251

if not filename.endswith(packsuffix):

259

continue

252

continue

260

253

261

prefix = filename[:-len(packsuffix)]

254

prefix = filename[:-len(packsuffix)]

262

255

263

# Don't process a pack if it doesn't have an index.

256

# Don't process a pack if it doesn't have an index.

264

if (prefix + indexsuffix) not in fileset:

257

if (prefix + indexsuffix) not in fileset:

265

continue

258

continue

266

result.append((prefix, mode, stat))

259

result.append((prefix, mode, stat))

267

260

268

return result

261

return result

269

262

270

def _computeincrementalpack(files, opts):

263

def _computeincrementalpack(files, opts):

271

"""Given a set of pack files along with the configuration options, this

264

"""Given a set of pack files along with the configuration options, this

272

function computes the list of files that should be packed as part of an

265

function computes the list of files that should be packed as part of an

273

incremental repack.

266

incremental repack.

274

267

275

It tries to strike a balance between keeping incremental repacks cheap (i.e.

268

It tries to strike a balance between keeping incremental repacks cheap (i.e.

276

packing small things when possible, and rolling the packs up to the big ones

269

packing small things when possible, and rolling the packs up to the big ones

277

over time).

270

over time).

278

"""

271

"""

279

272

280

limits = list(sorted((util.sizetoint(s) for s in opts['generations']),

273

limits = list(sorted((util.sizetoint(s) for s in opts['generations']),

281

reverse=True))

274

reverse=True))

282

limits.append(0)

275

limits.append(0)

283

276

284

# Group the packs by generation (i.e. by size)

277

# Group the packs by generation (i.e. by size)

285

generations = []

278

generations = []

286

for i in pycompat.xrange(len(limits)):

279

for i in pycompat.xrange(len(limits)):

287

generations.append([])

280

generations.append([])

288

281

289

sizes = {}

282

sizes = {}

290

for prefix, mode, stat in files:

283

for prefix, mode, stat in files:

291

size = stat.st_size

284

size = stat.st_size

292

if size > opts['repackmaxpacksize']:

285

if size > opts['repackmaxpacksize']:

293

continue

286

continue

294

287

295

sizes[prefix] = size

288

sizes[prefix] = size

296

for i, limit in enumerate(limits):

289

for i, limit in enumerate(limits):

297

if size > limit:

290

if size > limit:

298

generations[i].append(prefix)

291

generations[i].append(prefix)

299

break

292

break

300

293

301

# Steps for picking what packs to repack:

294

# Steps for picking what packs to repack:

302

# 1. Pick the largest generation with > gencountlimit pack files.

295

# 1. Pick the largest generation with > gencountlimit pack files.

303

# 2. Take the smallest three packs.

296

# 2. Take the smallest three packs.

304

# 3. While total-size-of-packs < repacksizelimit: add another pack

297

# 3. While total-size-of-packs < repacksizelimit: add another pack

305

298

306

# Find the largest generation with more than gencountlimit packs

299

# Find the largest generation with more than gencountlimit packs

307

genpacks = []

300

genpacks = []

308

for i, limit in enumerate(limits):

301

for i, limit in enumerate(limits):

309

if len(generations[i]) > opts['gencountlimit']:

302

if len(generations[i]) > opts['gencountlimit']:

310

# Sort to be smallest last, for easy popping later

303

# Sort to be smallest last, for easy popping later

311

genpacks.extend(sorted(generations[i], reverse=True,

304

genpacks.extend(sorted(generations[i], reverse=True,

312

key=lambda x: sizes[x]))

305

key=lambda x: sizes[x]))

313

break

306

break

314

307

315

# Take as many packs from the generation as we can

308

# Take as many packs from the generation as we can

316

chosenpacks = genpacks[-3:]

309

chosenpacks = genpacks[-3:]

317

genpacks = genpacks[:-3]

310

genpacks = genpacks[:-3]

318

repacksize = sum(sizes[n] for n in chosenpacks)

311

repacksize = sum(sizes[n] for n in chosenpacks)

319

while (repacksize < opts['repacksizelimit'] and genpacks and

312

while (repacksize < opts['repacksizelimit'] and genpacks and

320

len(chosenpacks) < opts['maxrepackpacks']):

313

len(chosenpacks) < opts['maxrepackpacks']):

321

chosenpacks.append(genpacks.pop())

314

chosenpacks.append(genpacks.pop())

322

repacksize += sizes[chosenpacks[-1]]

315

repacksize += sizes[chosenpacks[-1]]

323

316

324

return chosenpacks

317

return chosenpacks

325

318

326

def _runrepack(repo, data, history, packpath, category, fullhistory=None,

319

def _runrepack(repo, data, history, packpath, category, fullhistory=None,

327

options=None):

320

options=None):

328

shallowutil.mkstickygroupdir(repo.ui, packpath)

321

shallowutil.mkstickygroupdir(repo.ui, packpath)

329

322

330

def isold(repo, filename, node):

323

def isold(repo, filename, node):

331

"""Check if the file node is older than a limit.

324

"""Check if the file node is older than a limit.

332

Unless a limit is specified in the config the default limit is taken.

325

Unless a limit is specified in the config the default limit is taken.

333

"""

326

"""

334

filectx = repo.filectx(filename, fileid=node)

327

filectx = repo.filectx(filename, fileid=node)

335

filetime = repo[filectx.linkrev()].date()

328

filetime = repo[filectx.linkrev()].date()

336

329

337

ttl = repo.ui.configint('remotefilelog', 'nodettl')

330

ttl = repo.ui.configint('remotefilelog', 'nodettl')

338

331

339

limit = time.time() - ttl

332

limit = time.time() - ttl

340

return filetime[0] < limit

333

return filetime[0] < limit

341

334

342

garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')

335

garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')

343

if not fullhistory:

336

if not fullhistory:

344

fullhistory = history

337

fullhistory = history

345

packer = repacker(repo, data, history, fullhistory, category,

338

packer = repacker(repo, data, history, fullhistory, category,

346

gc=garbagecollect, isold=isold, options=options)

339

gc=garbagecollect, isold=isold, options=options)

347

340

348

with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:

341

with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:

349

with historypack.mutablehistorypack(repo.ui, packpath) as hpack:

342

with historypack.mutablehistorypack(repo.ui, packpath) as hpack:

350

try:

343

try:

351

packer.run(dpack, hpack)

344

packer.run(dpack, hpack)

352

except error.LockHeld:

345

except error.LockHeld:

353

raise RepackAlreadyRunning(_("skipping repack - another repack "

346

raise RepackAlreadyRunning(_("skipping repack - another repack "

354

"is already running"))

347

"is already running"))

355

348

356

def keepset(repo, keyfn, lastkeepkeys=None):

349

def keepset(repo, keyfn, lastkeepkeys=None):

357

"""Computes a keepset which is not garbage collected.

350

"""Computes a keepset which is not garbage collected.

358

'keyfn' is a function that maps filename, node to a unique key.

351

'keyfn' is a function that maps filename, node to a unique key.

359

'lastkeepkeys' is an optional argument and if provided the keepset

352

'lastkeepkeys' is an optional argument and if provided the keepset

360

function updates lastkeepkeys with more keys and returns the result.

353

function updates lastkeepkeys with more keys and returns the result.

361

"""

354

"""

362

if not lastkeepkeys:

355

if not lastkeepkeys:

363

keepkeys = set()

356

keepkeys = set()

364

else:

357

else:

365

keepkeys = lastkeepkeys

358

keepkeys = lastkeepkeys

366

359

367

# We want to keep:

360

# We want to keep:

368

# 1. Working copy parent

361

# 1. Working copy parent

369

# 2. Draft commits

362

# 2. Draft commits

370

# 3. Parents of draft commits

363

# 3. Parents of draft commits

371

# 4. Pullprefetch and bgprefetchrevs revsets if specified

364

# 4. Pullprefetch and bgprefetchrevs revsets if specified

372

revs = ['.', 'draft()', 'parents(draft())']

365

revs = ['.', 'draft()', 'parents(draft())']

373

prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)

366

prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)

374

if prefetchrevs:

367

if prefetchrevs:

375

revs.append('(%s)' % prefetchrevs)

368

revs.append('(%s)' % prefetchrevs)

376

prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)

369

prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)

377

if prefetchrevs:

370

if prefetchrevs:

378

revs.append('(%s)' % prefetchrevs)

371

revs.append('(%s)' % prefetchrevs)

379

revs = '+'.join(revs)

372

revs = '+'.join(revs)

380

373

381

revs = ['sort((%s), "topo")' % revs]

374

revs = ['sort((%s), "topo")' % revs]

382

keep = scmutil.revrange(repo, revs)

375

keep = scmutil.revrange(repo, revs)

383

376

384

processed = set()

377

processed = set()

385

lastmanifest = None

378

lastmanifest = None

386

379

387

# process the commits in toposorted order starting from the oldest

380

# process the commits in toposorted order starting from the oldest

388

for r in reversed(keep._list):

381

for r in reversed(keep._list):

389

if repo[r].p1().rev() in processed:

382

if repo[r].p1().rev() in processed:

390

# if the direct parent has already been processed

383

# if the direct parent has already been processed

391

# then we only need to process the delta

384

# then we only need to process the delta

392

m = repo[r].manifestctx().readdelta()

385

m = repo[r].manifestctx().readdelta()

393

else:

386

else:

394

# otherwise take the manifest and diff it

387

# otherwise take the manifest and diff it

395

# with the previous manifest if one exists

388

# with the previous manifest if one exists

396

if lastmanifest:

389

if lastmanifest:

397

m = repo[r].manifest().diff(lastmanifest)

390

m = repo[r].manifest().diff(lastmanifest)

398

else:

391

else:

399

m = repo[r].manifest()

392

m = repo[r].manifest()

400

lastmanifest = repo[r].manifest()

393

lastmanifest = repo[r].manifest()

401

processed.add(r)

394

processed.add(r)

402

395

403

# populate keepkeys with keys from the current manifest

396

# populate keepkeys with keys from the current manifest

404

if type(m) is dict:

397

if type(m) is dict:

405

# m is a result of diff of two manifests and is a dictionary that

398

# m is a result of diff of two manifests and is a dictionary that

406

# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple

399

# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple

407

for filename, diff in m.iteritems():

400

for filename, diff in m.iteritems():

408

if diff[0][0] is not None:

401

if diff[0][0] is not None:

409

keepkeys.add(keyfn(filename, diff[0][0]))

402

keepkeys.add(keyfn(filename, diff[0][0]))

410

else:

403

else:

411

# m is a manifest object

404

# m is a manifest object

412

for filename, filenode in m.iteritems():

405

for filename, filenode in m.iteritems():

413

keepkeys.add(keyfn(filename, filenode))

406

keepkeys.add(keyfn(filename, filenode))

414

407

415

return keepkeys

408

return keepkeys

416

409

417

class repacker(object):

410

class repacker(object):

418

"""Class for orchestrating the repack of data and history information into a

411

"""Class for orchestrating the repack of data and history information into a

419

new format.

412

new format.

420

"""

413

"""

421

def __init__(self, repo, data, history, fullhistory, category, gc=False,

414

def __init__(self, repo, data, history, fullhistory, category, gc=False,

422

isold=None, options=None):

415

isold=None, options=None):

423

self.repo = repo

416

self.repo = repo

424

self.data = data

417

self.data = data

425

self.history = history

418

self.history = history

426

self.fullhistory = fullhistory

419

self.fullhistory = fullhistory

427

self.unit = constants.getunits(category)

420

self.unit = constants.getunits(category)

428

self.garbagecollect = gc

421

self.garbagecollect = gc

429

self.options = options

422

self.options = options

430

if self.garbagecollect:

423

if self.garbagecollect:

431

if not isold:

424

if not isold:

432

raise ValueError("Function 'isold' is not properly specified")

425

raise ValueError("Function 'isold' is not properly specified")

433

# use (filename, node) tuple as a keepset key

426

# use (filename, node) tuple as a keepset key

434

self.keepkeys = keepset(repo, lambda f, n : (f, n))

427

self.keepkeys = keepset(repo, lambda f, n : (f, n))

435

self.isold = isold

428

self.isold = isold

436

429

437

def run(self, targetdata, targethistory):

430

def run(self, targetdata, targethistory):

438

ledger = repackledger()

431

ledger = repackledger()

439

432

440

with extutil.flock(repacklockvfs(self.repo).join("repacklock"),

433

with extutil.flock(repacklockvfs(self.repo).join("repacklock"),

441

_('repacking %s') % self.repo.origroot, timeout=0):

434

_('repacking %s') % self.repo.origroot, timeout=0):

442

self.repo.hook('prerepack')

435

self.repo.hook('prerepack')

443

436

444

# Populate ledger from source

437

# Populate ledger from source

445

self.data.markledger(ledger, options=self.options)

438

self.data.markledger(ledger, options=self.options)

446

self.history.markledger(ledger, options=self.options)

439

self.history.markledger(ledger, options=self.options)

447

440

448

# Run repack

441

# Run repack

449

self.repackdata(ledger, targetdata)

442

self.repackdata(ledger, targetdata)

450

self.repackhistory(ledger, targethistory)

443

self.repackhistory(ledger, targethistory)

451

444

452

# Call cleanup on each source

445

# Call cleanup on each source

453

for source in ledger.sources:

446

for source in ledger.sources:

454

source.cleanup(ledger)

447

source.cleanup(ledger)

455

448

456

def _chainorphans(self, ui, filename, nodes, orphans, deltabases):

449

def _chainorphans(self, ui, filename, nodes, orphans, deltabases):

457

"""Reorderes ``orphans`` into a single chain inside ``nodes`` and

450

"""Reorderes ``orphans`` into a single chain inside ``nodes`` and

458

``deltabases``.

451

``deltabases``.

459

452

460

We often have orphan entries (nodes without a base that aren't

453

We often have orphan entries (nodes without a base that aren't

461

referenced by other nodes -- i.e., part of a chain) due to gaps in

454

referenced by other nodes -- i.e., part of a chain) due to gaps in

462

history. Rather than store them as individual fulltexts, we prefer to

455

history. Rather than store them as individual fulltexts, we prefer to

463

insert them as one chain sorted by size.

456

insert them as one chain sorted by size.

464

"""

457

"""

465

if not orphans:

458

if not orphans:

466

return nodes

459

return nodes

467

460

468

def getsize(node, default=0):

461

def getsize(node, default=0):

469

meta = self.data.getmeta(filename, node)

462

meta = self.data.getmeta(filename, node)

470

if constants.METAKEYSIZE in meta:

463

if constants.METAKEYSIZE in meta:

471

return meta[constants.METAKEYSIZE]

464

return meta[constants.METAKEYSIZE]

472

else:

465

else:

473

return default

466

return default

474

467

475

# Sort orphans by size; biggest first is preferred, since it's more

468

# Sort orphans by size; biggest first is preferred, since it's more

476

# likely to be the newest version assuming files grow over time.

469

# likely to be the newest version assuming files grow over time.

477

# (Sort by node first to ensure the sort is stable.)

470

# (Sort by node first to ensure the sort is stable.)

478

orphans = sorted(orphans)

471

orphans = sorted(orphans)

479

orphans = list(sorted(orphans, key=getsize, reverse=True))

472

orphans = list(sorted(orphans, key=getsize, reverse=True))

480

if ui.debugflag:

473

if ui.debugflag:

481

ui.debug("%s: orphan chain: %s\n" % (filename,

474

ui.debug("%s: orphan chain: %s\n" % (filename,

482

", ".join([short(s) for s in orphans])))

475

", ".join([short(s) for s in orphans])))

483

476

484

# Create one contiguous chain and reassign deltabases.

477

# Create one contiguous chain and reassign deltabases.

485

for i, node in enumerate(orphans):

478

for i, node in enumerate(orphans):

486

if i == 0:

479

if i == 0:

487

deltabases[node] = (nullid, 0)

480

deltabases[node] = (nullid, 0)

488

else:

481

else:

489

parent = orphans[i - 1]

482

parent = orphans[i - 1]

490

deltabases[node] = (parent, deltabases[parent][1] + 1)

483

deltabases[node] = (parent, deltabases[parent][1] + 1)

491

nodes = filter(lambda node: node not in orphans, nodes)

484

nodes = filter(lambda node: node not in orphans, nodes)

492

nodes += orphans

485

nodes += orphans

493

return nodes

486

return nodes

494

487

495

def repackdata(self, ledger, target):

488

def repackdata(self, ledger, target):

496

ui = self.repo.ui

489

ui = self.repo.ui

497

maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

490

maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

498

491

499

byfile = {}

492

byfile = {}

500

for entry in ledger.entries.itervalues():

493

for entry in ledger.entries.itervalues():

501

if entry.datasource:

494

if entry.datasource:

502

byfile.setdefault(entry.filename, {})[entry.node] = entry

495

byfile.setdefault(entry.filename, {})[entry.node] = entry

503

496

504

count = 0

497

count = 0

505

for filename, entries in sorted(byfile.iteritems()):

498

for filename, entries in sorted(byfile.iteritems()):

506

ui.progress(_("repacking data"), count, unit=self.unit,

499

ui.progress(_("repacking data"), count, unit=self.unit,

507

total=len(byfile))

500

total=len(byfile))

508

501

509

ancestors = {}

502

ancestors = {}

510

nodes = list(node for node in entries)

503

nodes = list(node for node in entries)

511

nohistory = []

504

nohistory = []

512

for i, node in enumerate(nodes):

505

for i, node in enumerate(nodes):

513

if node in ancestors:

506

if node in ancestors:

514

continue

507

continue

515

ui.progress(_("building history"), i, unit='nodes',

508

ui.progress(_("building history"), i, unit='nodes',

516

total=len(nodes))

509

total=len(nodes))

517

try:

510

try:

518

ancestors.update(self.fullhistory.getancestors(filename,

511

ancestors.update(self.fullhistory.getancestors(filename,

519

node, known=ancestors))

512

node, known=ancestors))

520

except KeyError:

513

except KeyError:

521

# Since we're packing data entries, we may not have the

514

# Since we're packing data entries, we may not have the

522

# corresponding history entries for them. It's not a big

515

# corresponding history entries for them. It's not a big

523

# deal, but the entries won't be delta'd perfectly.

516

# deal, but the entries won't be delta'd perfectly.

524

nohistory.append(node)

517

nohistory.append(node)

525

ui.progress(_("building history"), None)

518

ui.progress(_("building history"), None)

526

519

527

# Order the nodes children first, so we can produce reverse deltas

520

# Order the nodes children first, so we can produce reverse deltas

528

orderednodes = list(reversed(self._toposort(ancestors)))

521

orderednodes = list(reversed(self._toposort(ancestors)))

529

if len(nohistory) > 0:

522

if len(nohistory) > 0:

530

ui.debug('repackdata: %d nodes without history\n' %

523

ui.debug('repackdata: %d nodes without history\n' %

531

len(nohistory))

524

len(nohistory))

532

orderednodes.extend(sorted(nohistory))

525

orderednodes.extend(sorted(nohistory))

533

526

534

# Filter orderednodes to just the nodes we want to serialize (it

527

# Filter orderednodes to just the nodes we want to serialize (it

535

# currently also has the edge nodes' ancestors).

528

# currently also has the edge nodes' ancestors).

536

orderednodes = filter(lambda node: node in nodes, orderednodes)

529

orderednodes = filter(lambda node: node in nodes, orderednodes)

537

530

538

# Garbage collect old nodes:

531

# Garbage collect old nodes:

539

if self.garbagecollect:

532

if self.garbagecollect:

540

neworderednodes = []

533

neworderednodes = []

541

for node in orderednodes:

534

for node in orderednodes:

542

# If the node is old and is not in the keepset, we skip it,

535

# If the node is old and is not in the keepset, we skip it,

543

# and mark as garbage collected

536

# and mark as garbage collected

544

if ((filename, node) not in self.keepkeys and

537

if ((filename, node) not in self.keepkeys and

545

self.isold(self.repo, filename, node)):

538

self.isold(self.repo, filename, node)):

546

entries[node].gced = True

539

entries[node].gced = True

547

continue

540

continue

548

neworderednodes.append(node)

541

neworderednodes.append(node)

549

orderednodes = neworderednodes

542

orderednodes = neworderednodes

550

543

551

# Compute delta bases for nodes:

544

# Compute delta bases for nodes:

552

deltabases = {}

545

deltabases = {}

553

nobase = set()

546

nobase = set()

554

referenced = set()

547

referenced = set()

555

nodes = set(nodes)

548

nodes = set(nodes)

556

for i, node in enumerate(orderednodes):

549

for i, node in enumerate(orderednodes):

557

ui.progress(_("processing nodes"), i, unit='nodes',

550

ui.progress(_("processing nodes"), i, unit='nodes',

558

total=len(orderednodes))

551

total=len(orderednodes))

559

# Find delta base

552

# Find delta base

560

# TODO: allow delta'ing against most recent descendant instead

553

# TODO: allow delta'ing against most recent descendant instead

561

# of immediate child

554

# of immediate child

562

deltatuple = deltabases.get(node, None)

555

deltatuple = deltabases.get(node, None)

563

if deltatuple is None:

556

if deltatuple is None:

564

deltabase, chainlen = nullid, 0

557

deltabase, chainlen = nullid, 0

565

deltabases[node] = (nullid, 0)

558

deltabases[node] = (nullid, 0)

566

nobase.add(node)

559

nobase.add(node)

567

else:

560

else:

568

deltabase, chainlen = deltatuple

561

deltabase, chainlen = deltatuple

569

referenced.add(deltabase)

562

referenced.add(deltabase)

570

563

571

# Use available ancestor information to inform our delta choices

564

# Use available ancestor information to inform our delta choices

572

ancestorinfo = ancestors.get(node)

565

ancestorinfo = ancestors.get(node)

573

if ancestorinfo:

566

if ancestorinfo:

574

p1, p2, linknode, copyfrom = ancestorinfo

567

p1, p2, linknode, copyfrom = ancestorinfo

575

568

576

# The presence of copyfrom means we're at a point where the

569

# The presence of copyfrom means we're at a point where the

577

# file was copied from elsewhere. So don't attempt to do any

570

# file was copied from elsewhere. So don't attempt to do any

578

# deltas with the other file.

571

# deltas with the other file.

579

if copyfrom:

572

if copyfrom:

580

p1 = nullid

573

p1 = nullid

581

574

582

if chainlen < maxchainlen:

575

if chainlen < maxchainlen:

583

# Record this child as the delta base for its parents.

576

# Record this child as the delta base for its parents.

584

# This may be non optimal, since the parents may have

577

# This may be non optimal, since the parents may have

585

# many children, and this will only choose the last one.

578

# many children, and this will only choose the last one.

586

# TODO: record all children and try all deltas to find

579

# TODO: record all children and try all deltas to find

587

# best

580

# best

588

if p1 != nullid:

581

if p1 != nullid:

589

deltabases[p1] = (node, chainlen + 1)

582

deltabases[p1] = (node, chainlen + 1)

590

if p2 != nullid:

583

if p2 != nullid:

591

deltabases[p2] = (node, chainlen + 1)

584

deltabases[p2] = (node, chainlen + 1)

592

585

593

# experimental config: repack.chainorphansbysize

586

# experimental config: repack.chainorphansbysize

594

if ui.configbool('repack', 'chainorphansbysize'):

587

if ui.configbool('repack', 'chainorphansbysize'):

595

orphans = nobase - referenced

588

orphans = nobase - referenced

596

orderednodes = self._chainorphans(ui, filename, orderednodes,

589

orderednodes = self._chainorphans(ui, filename, orderednodes,

597

orphans, deltabases)

590

orphans, deltabases)

598

591

599

# Compute deltas and write to the pack

592

# Compute deltas and write to the pack

600

for i, node in enumerate(orderednodes):

593

for i, node in enumerate(orderednodes):

601

deltabase, chainlen = deltabases[node]

594

deltabase, chainlen = deltabases[node]

602

# Compute delta

595

# Compute delta

603

# TODO: Optimize the deltachain fetching. Since we're

596

# TODO: Optimize the deltachain fetching. Since we're

604

# iterating over the different version of the file, we may

597

# iterating over the different version of the file, we may

605

# be fetching the same deltachain over and over again.

598

# be fetching the same deltachain over and over again.

606

meta = None

599

meta = None

607

if deltabase != nullid:

600

if deltabase != nullid:

608

deltaentry = self.data.getdelta(filename, node)

601

deltaentry = self.data.getdelta(filename, node)

609

delta, deltabasename, origdeltabase, meta = deltaentry

602

delta, deltabasename, origdeltabase, meta = deltaentry

610

size = meta.get(constants.METAKEYSIZE)

603

size = meta.get(constants.METAKEYSIZE)

611

if (deltabasename != filename or origdeltabase != deltabase

604

if (deltabasename != filename or origdeltabase != deltabase

612

or size is None):

605

or size is None):

613

deltabasetext = self.data.get(filename, deltabase)

606

deltabasetext = self.data.get(filename, deltabase)

614

original = self.data.get(filename, node)

607

original = self.data.get(filename, node)

615

size = len(original)

608

size = len(original)

616

delta = mdiff.textdiff(deltabasetext, original)

609

delta = mdiff.textdiff(deltabasetext, original)

617

else:

610

else:

618

delta = self.data.get(filename, node)

611

delta = self.data.get(filename, node)

619

size = len(delta)

612

size = len(delta)

620

meta = self.data.getmeta(filename, node)

613

meta = self.data.getmeta(filename, node)

621

614

622

# TODO: don't use the delta if it's larger than the fulltext

615

# TODO: don't use the delta if it's larger than the fulltext

623

if constants.METAKEYSIZE not in meta:

616

if constants.METAKEYSIZE not in meta:

624

meta[constants.METAKEYSIZE] = size

617

meta[constants.METAKEYSIZE] = size

625

target.add(filename, node, deltabase, delta, meta)

618

target.add(filename, node, deltabase, delta, meta)

626

619

627

entries[node].datarepacked = True

620

entries[node].datarepacked = True

628

621

629

ui.progress(_("processing nodes"), None)

622

ui.progress(_("processing nodes"), None)

630

count += 1

623

count += 1

631

624

632

ui.progress(_("repacking data"), None)

625

ui.progress(_("repacking data"), None)

633

target.close(ledger=ledger)

626

target.close(ledger=ledger)

634

627

635

def repackhistory(self, ledger, target):

628

def repackhistory(self, ledger, target):

636

ui = self.repo.ui

629

ui = self.repo.ui

637

630

638

byfile = {}

631

byfile = {}

639

for entry in ledger.entries.itervalues():

632

for entry in ledger.entries.itervalues():

640

if entry.historysource:

633

if entry.historysource:

641

byfile.setdefault(entry.filename, {})[entry.node] = entry

634

byfile.setdefault(entry.filename, {})[entry.node] = entry

642

635

643

count = 0

636

count = 0

644

for filename, entries in sorted(byfile.iteritems()):

637

for filename, entries in sorted(byfile.iteritems()):

645

ancestors = {}

638

ancestors = {}

646

nodes = list(node for node in entries)

639

nodes = list(node for node in entries)

647

640

648

for node in nodes:

641

for node in nodes:

649

if node in ancestors:

642

if node in ancestors:

650

continue

643

continue

651

ancestors.update(self.history.getancestors(filename, node,

644

ancestors.update(self.history.getancestors(filename, node,

652

known=ancestors))

645

known=ancestors))

653

646

654

# Order the nodes children first

647

# Order the nodes children first

655

orderednodes = reversed(self._toposort(ancestors))

648

orderednodes = reversed(self._toposort(ancestors))

656

649

657

# Write to the pack

650

# Write to the pack

658

dontprocess = set()

651

dontprocess = set()

659

for node in orderednodes:

652

for node in orderednodes:

660

p1, p2, linknode, copyfrom = ancestors[node]

653

p1, p2, linknode, copyfrom = ancestors[node]

661

654

662

# If the node is marked dontprocess, but it's also in the

655

# If the node is marked dontprocess, but it's also in the

663

# explicit entries set, that means the node exists both in this

656

# explicit entries set, that means the node exists both in this

664

# file and in another file that was copied to this file.

657

# file and in another file that was copied to this file.

665

# Usually this happens if the file was copied to another file,

658

# Usually this happens if the file was copied to another file,

666

# then the copy was deleted, then reintroduced without copy

659

# then the copy was deleted, then reintroduced without copy

667

# metadata. The original add and the new add have the same hash

660

# metadata. The original add and the new add have the same hash

668

# since the content is identical and the parents are null.

661

# since the content is identical and the parents are null.

669

if node in dontprocess and node not in entries:

662

if node in dontprocess and node not in entries:

670

# If copyfrom == filename, it means the copy history

663

# If copyfrom == filename, it means the copy history

671

# went to come other file, then came back to this one, so we

664

# went to come other file, then came back to this one, so we

672

# should continue processing it.

665

# should continue processing it.

673

if p1 != nullid and copyfrom != filename:

666

if p1 != nullid and copyfrom != filename:

674

dontprocess.add(p1)

667

dontprocess.add(p1)

675

if p2 != nullid:

668

if p2 != nullid:

676

dontprocess.add(p2)

669

dontprocess.add(p2)

677

continue

670

continue

678

671

679

if copyfrom:

672

if copyfrom:

680

dontprocess.add(p1)

673

dontprocess.add(p1)

681

674

682

target.add(filename, node, p1, p2, linknode, copyfrom)

675

target.add(filename, node, p1, p2, linknode, copyfrom)

683

676

684

if node in entries:

677

if node in entries:

685

entries[node].historyrepacked = True

678

entries[node].historyrepacked = True

686

679

687

count += 1

680

count += 1

688

ui.progress(_("repacking history"), count, unit=self.unit,

681

ui.progress(_("repacking history"), count, unit=self.unit,

689

total=len(byfile))

682

total=len(byfile))

690

683

691

ui.progress(_("repacking history"), None)

684

ui.progress(_("repacking history"), None)

692

target.close(ledger=ledger)

685

target.close(ledger=ledger)

693

686

694

def _toposort(self, ancestors):

687

def _toposort(self, ancestors):

695

def parentfunc(node):

688

def parentfunc(node):

696

p1, p2, linknode, copyfrom = ancestors[node]

689

p1, p2, linknode, copyfrom = ancestors[node]

697

parents = []

690

parents = []

698

if p1 != nullid:

691

if p1 != nullid:

699

parents.append(p1)

692

parents.append(p1)

700

if p2 != nullid:

693

if p2 != nullid:

701

parents.append(p2)

694

parents.append(p2)

702

return parents

695

return parents

703

696

704

sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)

697

sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)

705

return sortednodes

698

return sortednodes

706

699

707

class repackledger(object):

700

class repackledger(object):

708

"""Storage for all the bookkeeping that happens during a repack. It contains

701

"""Storage for all the bookkeeping that happens during a repack. It contains

709

the list of revisions being repacked, what happened to each revision, and

702

the list of revisions being repacked, what happened to each revision, and

710

which source store contained which revision originally (for later cleanup).

703

which source store contained which revision originally (for later cleanup).

711

"""

704

"""

712

def __init__(self):

705

def __init__(self):

713

self.entries = {}

706

self.entries = {}

714

self.sources = {}

707

self.sources = {}

715

self.created = set()

708

self.created = set()

716

709

717

def markdataentry(self, source, filename, node):

710

def markdataentry(self, source, filename, node):

718

"""Mark the given filename+node revision as having a data rev in the

711

"""Mark the given filename+node revision as having a data rev in the

719

given source.

712

given source.

720

"""

713

"""

721

entry = self._getorcreateentry(filename, node)

714

entry = self._getorcreateentry(filename, node)

722

entry.datasource = True

715

entry.datasource = True

723

entries = self.sources.get(source)

716

entries = self.sources.get(source)

724

if not entries:

717

if not entries:

725

entries = set()

718

entries = set()

726

self.sources[source] = entries

719

self.sources[source] = entries

727

entries.add(entry)

720

entries.add(entry)

728

721

729

def markhistoryentry(self, source, filename, node):

722

def markhistoryentry(self, source, filename, node):

730

"""Mark the given filename+node revision as having a history rev in the

723

"""Mark the given filename+node revision as having a history rev in the

731

given source.

724

given source.

732

"""

725

"""

733

entry = self._getorcreateentry(filename, node)

726

entry = self._getorcreateentry(filename, node)

734

entry.historysource = True

727

entry.historysource = True

735

entries = self.sources.get(source)

728

entries = self.sources.get(source)

736

if not entries:

729

if not entries:

737

entries = set()

730

entries = set()

738

self.sources[source] = entries

731

self.sources[source] = entries

739

entries.add(entry)

732

entries.add(entry)

740

733

741

def _getorcreateentry(self, filename, node):

734

def _getorcreateentry(self, filename, node):

742

key = (filename, node)

735

key = (filename, node)

743

value = self.entries.get(key)

736

value = self.entries.get(key)

744

if not value:

737

if not value:

745

value = repackentry(filename, node)

738

value = repackentry(filename, node)

746

self.entries[key] = value

739

self.entries[key] = value

747

740

748

return value

741

return value

749

742

750

def addcreated(self, value):

743

def addcreated(self, value):

751

self.created.add(value)

744

self.created.add(value)

752

745

753

class repackentry(object):

746

class repackentry(object):

754

"""Simple class representing a single revision entry in the repackledger.

747

"""Simple class representing a single revision entry in the repackledger.

755

"""

748

"""

756

__slots__ = (r'filename', r'node', r'datasource', r'historysource',

749

__slots__ = (r'filename', r'node', r'datasource', r'historysource',

757

r'datarepacked', r'historyrepacked', r'gced')

750

r'datarepacked', r'historyrepacked', r'gced')

758

def __init__(self, filename, node):

751

def __init__(self, filename, node):

759

self.filename = filename

752

self.filename = filename

760

self.node = node

753

self.node = node

761

# If the revision has a data entry in the source

754

# If the revision has a data entry in the source

762

self.datasource = False

755

self.datasource = False

763

# If the revision has a history entry in the source

756

# If the revision has a history entry in the source

764

self.historysource = False

757

self.historysource = False

765

# If the revision's data entry was repacked into the repack target

758

# If the revision's data entry was repacked into the repack target

766

self.datarepacked = False

759

self.datarepacked = False

767

# If the revision's history entry was repacked into the repack target

760

# If the revision's history entry was repacked into the repack target

768

self.historyrepacked = False

761

self.historyrepacked = False

769

# If garbage collected

762

# If garbage collected

770

self.gced = False

763

self.gced = False

771

764

772

def repacklockvfs(repo):

765

def repacklockvfs(repo):

773

if util.safehasattr(repo, 'name'):

766

if util.safehasattr(repo, 'name'):

774

# Lock in the shared cache so repacks across multiple copies of the same

767

# Lock in the shared cache so repacks across multiple copies of the same

775

# repo are coordinated.

768

# repo are coordinated.

776

sharedcachepath = shallowutil.getcachepackpath(

769

sharedcachepath = shallowutil.getcachepackpath(

777

repo,

770

repo,

778

constants.FILEPACK_CATEGORY)

771

constants.FILEPACK_CATEGORY)

779

return vfs.vfs(sharedcachepath)

772

return vfs.vfs(sharedcachepath)

780

else:

773

else:

781

return repo.svfs

774

return repo.svfs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             from __future__ import absolute_import
             import os
             import time
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 short,
             )
             from mercurial import (
                 encoding,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 scmutil,
                 util,
                 vfs,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 datapack,
                 extutil,
                 historypack,
                 metadatastore,
                 shallowutil,
             )
             osutil = policy.importmod(r'osutil')
             class RepackAlreadyRunning(error.Abort):
                 pass
-            if util.safehasattr(util, '_hgexecutable'):
-                # Before 5be286db
-                _hgexecutable = util.hgexecutable
-            else:
-                from mercurial.utils import procutil
-                _hgexecutable = procutil.hgexecutable
             def backgroundrepack(repo, incremental=True, packsonly=False):
-                cmd = [_hgexecutable(), '-R', repo.origroot, 'repack']
+                cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
                 msg = _("(running background repack)\n")
                 if incremental:
                     cmd.append('--incremental')
                     msg = _("(running background incremental repack)\n")
                 if packsonly:
                     cmd.append('--packsonly')
                 repo.ui.warn(msg)
                 procutil.runbgcommand(cmd, encoding.environ)
             def fullrepack(repo, options=None):
                 """If ``packsonly`` is True, stores creating only loose objects are skipped.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     datasource = contentstore.unioncontentstore(
                         *repo.shareddatastores)
                     historysource = metadatastore.unionmetadatastore(
                         *repo.sharedhistorystores,
                         allowincomplete=True)
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _runrepack(repo, datasource, historysource, packpath,
                                constants.FILEPACK_CATEGORY, options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     datasource = contentstore.unioncontentstore(*sdstores)
                     historysource = metadatastore.unionmetadatastore(
                                     *shstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, spackpath,
                                constants.TREEPACK_CATEGORY, options=options)
                     # Repack the local manifest store
                     datasource = contentstore.unioncontentstore(
                                     *ldstores,
                                     allowincomplete=True)
                     historysource = metadatastore.unionmetadatastore(
                                     *lhstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, lpackpath,
                                constants.TREEPACK_CATEGORY, options=options)
             def incrementalrepack(repo, options=None):
                 """This repacks the repo by looking at the distribution of pack files in the
                 repo and performing the most minimal repack to keep the repo in good shape.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _incrementalrepack(repo,
                                        repo.shareddatastores,
                                        repo.sharedhistorystores,
                                        packpath,
                                        constants.FILEPACK_CATEGORY,
                                        options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     _incrementalrepack(repo,
                                        sdstores,
                                        shstores,
                                        spackpath,
                                        constants.TREEPACK_CATEGORY,
                                        options=options)
                     # Repack the local manifest store
                     _incrementalrepack(repo,
                                        ldstores,
                                        lhstores,
                                        lpackpath,
                                        constants.TREEPACK_CATEGORY,
                                        allowincompletedata=True,
                                        options=options)
             def _getmanifeststores(repo):
                 shareddatastores = repo.manifestlog.shareddatastores
                 localdatastores = repo.manifestlog.localdatastores
                 sharedhistorystores = repo.manifestlog.sharedhistorystores
                 localhistorystores = repo.manifestlog.localhistorystores
                 sharedpackpath = shallowutil.getcachepackpath(repo,
                                                         constants.TREEPACK_CATEGORY)
                 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
                                                         constants.TREEPACK_CATEGORY)
                 return ((localpackpath, localdatastores, localhistorystores),
                         (sharedpackpath, shareddatastores, sharedhistorystores))
             def _topacks(packpath, files, constructor):
                 paths = list(os.path.join(packpath, p) for p in files)
                 packs = list(constructor(p) for p in paths)
                 return packs
             def _deletebigpacks(repo, folder, files):
                 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
                 Returns ``files` with the removed files omitted."""
                 maxsize = repo.ui.configbytes("packs", "maxpacksize")
                 if maxsize <= 0:
                     return files
                 # This only considers datapacks today, but we could broaden it to include
                 # historypacks.
                 VALIDEXTS = [".datapack", ".dataidx"]
                 # Either an oversize index or datapack will trigger cleanup of the whole
                 # pack:
                 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
                     if (stat.st_size > maxsize and (os.path.splitext(path)[1]
                                                     in VALIDEXTS))])
                 for rootfname in oversized:
                     rootpath = os.path.join(folder, rootfname)
                     for ext in VALIDEXTS:
                         path = rootpath + ext
                         repo.ui.debug('removing oversize packfile %s (%s)\n' %
                                       (path, util.bytecount(os.stat(path).st_size)))
                         os.unlink(path)
                 return [row for row in files if os.path.basename(row[0]) not in oversized]
             def _incrementalrepack(repo, datastore, historystore, packpath, category,
                     allowincompletedata=False, options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 files = osutil.listdir(packpath, stat=True)
                 files = _deletebigpacks(repo, packpath, files)
                 datapacks = _topacks(packpath,
                     _computeincrementaldatapack(repo.ui, files),
                     datapack.datapack)
                 datapacks.extend(s for s in datastore
                                  if not isinstance(s, datapack.datapackstore))
                 historypacks = _topacks(packpath,
                     _computeincrementalhistorypack(repo.ui, files),
                     historypack.historypack)
                 historypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 # ``allhistory{files,packs}`` contains all known history packs, even ones we
                 # don't plan to repack. They are used during the datapack repack to ensure
                 # good ordering of nodes.
                 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
                                         historypack.INDEXSUFFIX)
                 allhistorypacks = _topacks(packpath,
                     (f for f, mode, stat in allhistoryfiles),
                     historypack.historypack)
                 allhistorypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 _runrepack(repo,
                            contentstore.unioncontentstore(
                                *datapacks,
                                allowincomplete=allowincompletedata),
                            metadatastore.unionmetadatastore(
                                *historypacks,
                                allowincomplete=True),
                            packpath, category,
                            fullhistory=metadatastore.unionmetadatastore(
                                *allhistorypacks,
                                allowincomplete=True),
                             options=options)
             def _computeincrementaldatapack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'data.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'data.generations'),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'data.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'data.repackmaxpacksize'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'data.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _computeincrementalhistorypack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'history.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'history.generations', ['100MB']),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'history.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'history.repackmaxpacksize', '400MB'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'history.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
                 result = []
                 fileset = set(fn for fn, mode, stat in files)
                 for filename, mode, stat in files:
                     if not filename.endswith(packsuffix):
                         continue
                     prefix = filename[:-len(packsuffix)]
                     # Don't process a pack if it doesn't have an index.
                     if (prefix + indexsuffix) not in fileset:
                         continue
                     result.append((prefix, mode, stat))
                 return result
             def _computeincrementalpack(files, opts):
                 """Given a set of pack files along with the configuration options, this
                 function computes the list of files that should be packed as part of an
                 incremental repack.
                 It tries to strike a balance between keeping incremental repacks cheap (i.e.
                 packing small things when possible, and rolling the packs up to the big ones
                 over time).
                 """
                 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
                                             reverse=True))
                 limits.append(0)
                 # Group the packs by generation (i.e. by size)
                 generations = []
                 for i in pycompat.xrange(len(limits)):
                     generations.append([])
                 sizes = {}
                 for prefix, mode, stat in files:
                     size = stat.st_size
                     if size > opts['repackmaxpacksize']:
                         continue
                     sizes[prefix] = size
                     for i, limit in enumerate(limits):
                         if size > limit:
                             generations[i].append(prefix)
                             break
                 # Steps for picking what packs to repack:
                 # 1. Pick the largest generation with > gencountlimit pack files.
                 # 2. Take the smallest three packs.
                 # 3. While total-size-of-packs < repacksizelimit: add another pack
                 # Find the largest generation with more than gencountlimit packs
                 genpacks = []
                 for i, limit in enumerate(limits):
                     if len(generations[i]) > opts['gencountlimit']:
                         # Sort to be smallest last, for easy popping later
                         genpacks.extend(sorted(generations[i], reverse=True,
                                                key=lambda x: sizes[x]))
                         break
                 # Take as many packs from the generation as we can
                 chosenpacks = genpacks[-3:]
                 genpacks = genpacks[:-3]
                 repacksize = sum(sizes[n] for n in chosenpacks)
                 while (repacksize < opts['repacksizelimit'] and genpacks and
                        len(chosenpacks) < opts['maxrepackpacks']):
                     chosenpacks.append(genpacks.pop())
                     repacksize += sizes[chosenpacks[-1]]
                 return chosenpacks
             def _runrepack(repo, data, history, packpath, category, fullhistory=None,
                            options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 def isold(repo, filename, node):
                     """Check if the file node is older than a limit.
                     Unless a limit is specified in the config the default limit is taken.
                     """
                     filectx = repo.filectx(filename, fileid=node)
                     filetime = repo[filectx.linkrev()].date()
                     ttl = repo.ui.configint('remotefilelog', 'nodettl')
                     limit = time.time() - ttl
                     return filetime[0] < limit
                 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
                 if not fullhistory:
                     fullhistory = history
                 packer = repacker(repo, data, history, fullhistory, category,
                                   gc=garbagecollect, isold=isold, options=options)
                 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
                     with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
                         try:
                             packer.run(dpack, hpack)
                         except error.LockHeld:
                             raise RepackAlreadyRunning(_("skipping repack - another repack "
                                                          "is already running"))
             def keepset(repo, keyfn, lastkeepkeys=None):
                 """Computes a keepset which is not garbage collected.
                 'keyfn' is a function that maps filename, node to a unique key.
                 'lastkeepkeys' is an optional argument and if provided the keepset
                 function updates lastkeepkeys with more keys and returns the result.
                 """
                 if not lastkeepkeys:
                     keepkeys = set()
                 else:
                     keepkeys = lastkeepkeys
                 # We want to keep:
                 # 1. Working copy parent
                 # 2. Draft commits
                 # 3. Parents of draft commits
                 # 4. Pullprefetch and bgprefetchrevs revsets if specified
                 revs = ['.', 'draft()', 'parents(draft())']
                 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 revs = '+'.join(revs)
                 revs = ['sort((%s), "topo")' % revs]
                 keep = scmutil.revrange(repo, revs)
                 processed = set()
                 lastmanifest = None
                 # process the commits in toposorted order starting from the oldest
                 for r in reversed(keep._list):
                     if repo[r].p1().rev() in processed:
                         # if the direct parent has already been processed
                         # then we only need to process the delta
                         m = repo[r].manifestctx().readdelta()
                     else:
                         # otherwise take the manifest and diff it
                         # with the previous manifest if one exists
                         if lastmanifest:
                             m = repo[r].manifest().diff(lastmanifest)
                         else:
                             m = repo[r].manifest()
                     lastmanifest = repo[r].manifest()
                     processed.add(r)
                     # populate keepkeys with keys from the current manifest
                     if type(m) is dict:
                         # m is a result of diff of two manifests and is a dictionary that
                         # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
                         for filename, diff in m.iteritems():
                             if diff[0][0] is not None:
                                 keepkeys.add(keyfn(filename, diff[0][0]))
                     else:
                         # m is a manifest object
                         for filename, filenode in m.iteritems():
                             keepkeys.add(keyfn(filename, filenode))
                 return keepkeys
             class repacker(object):
                 """Class for orchestrating the repack of data and history information into a
                 new format.
                 """
                 def __init__(self, repo, data, history, fullhistory, category, gc=False,
                              isold=None, options=None):
                     self.repo = repo
                     self.data = data
                     self.history = history
                     self.fullhistory = fullhistory
                     self.unit = constants.getunits(category)
                     self.garbagecollect = gc
                     self.options = options
                     if self.garbagecollect:
                         if not isold:
                             raise ValueError("Function 'isold' is not properly specified")
                         # use (filename, node) tuple as a keepset key
                         self.keepkeys = keepset(repo, lambda f, n : (f, n))
                         self.isold = isold
                 def run(self, targetdata, targethistory):
                     ledger = repackledger()
                     with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
                                        _('repacking %s') % self.repo.origroot, timeout=0):
                         self.repo.hook('prerepack')
                         # Populate ledger from source
                         self.data.markledger(ledger, options=self.options)
                         self.history.markledger(ledger, options=self.options)
                         # Run repack
                         self.repackdata(ledger, targetdata)
                         self.repackhistory(ledger, targethistory)
                         # Call cleanup on each source
                         for source in ledger.sources:
                             source.cleanup(ledger)
                 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
                     """Reorderes ``orphans`` into a single chain inside ``nodes`` and
                     ``deltabases``.
                     We often have orphan entries (nodes without a base that aren't
                     referenced by other nodes -- i.e., part of a chain) due to gaps in
                     history. Rather than store them as individual fulltexts, we prefer to
                     insert them as one chain sorted by size.
                     """
                     if not orphans:
                         return nodes
                     def getsize(node, default=0):
                         meta = self.data.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             return meta[constants.METAKEYSIZE]
                         else:
                             return default
                     # Sort orphans by size; biggest first is preferred, since it's more
                     # likely to be the newest version assuming files grow over time.
                     # (Sort by node first to ensure the sort is stable.)
                     orphans = sorted(orphans)
                     orphans = list(sorted(orphans, key=getsize, reverse=True))
                     if ui.debugflag:
                         ui.debug("%s: orphan chain: %s\n" % (filename,
                             ", ".join([short(s) for s in orphans])))
                     # Create one contiguous chain and reassign deltabases.
                     for i, node in enumerate(orphans):
                         if i == 0:
                             deltabases[node] = (nullid, 0)
                         else:
                             parent = orphans[i - 1]
                             deltabases[node] = (parent, deltabases[parent][1] + 1)
                     nodes = filter(lambda node: node not in orphans, nodes)
                     nodes += orphans
                     return nodes
                 def repackdata(self, ledger, target):
                     ui = self.repo.ui
                     maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.datasource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     for filename, entries in sorted(byfile.iteritems()):
                         ui.progress(_("repacking data"), count, unit=self.unit,
                                     total=len(byfile))
                         ancestors = {}
                         nodes = list(node for node in entries)
                         nohistory = []
                         for i, node in enumerate(nodes):
                             if node in ancestors:
                                 continue
                             ui.progress(_("building history"), i, unit='nodes',
                                         total=len(nodes))
                             try:
                                 ancestors.update(self.fullhistory.getancestors(filename,
                                     node, known=ancestors))
                             except KeyError:
                                 # Since we're packing data entries, we may not have the
                                 # corresponding history entries for them. It's not a big
                                 # deal, but the entries won't be delta'd perfectly.
                                 nohistory.append(node)
                         ui.progress(_("building history"), None)
                         # Order the nodes children first, so we can produce reverse deltas
                         orderednodes = list(reversed(self._toposort(ancestors)))
                         if len(nohistory) > 0:
                             ui.debug('repackdata: %d nodes without history\n' %
                                      len(nohistory))
                         orderednodes.extend(sorted(nohistory))
                         # Filter orderednodes to just the nodes we want to serialize (it
                         # currently also has the edge nodes' ancestors).
                         orderednodes = filter(lambda node: node in nodes, orderednodes)
                         # Garbage collect old nodes:
                         if self.garbagecollect:
                             neworderednodes = []
                             for node in orderednodes:
                                 # If the node is old and is not in the keepset, we skip it,
                                 # and mark as garbage collected
                                 if ((filename, node) not in self.keepkeys and
                                     self.isold(self.repo, filename, node)):
                                     entries[node].gced = True
                                     continue
                                 neworderednodes.append(node)
                             orderednodes = neworderednodes
                         # Compute delta bases for nodes:
                         deltabases = {}
                         nobase = set()
                         referenced = set()
                         nodes = set(nodes)
                         for i, node in enumerate(orderednodes):
                             ui.progress(_("processing nodes"), i, unit='nodes',
                                         total=len(orderednodes))
                             # Find delta base
                             # TODO: allow delta'ing against most recent descendant instead
                             # of immediate child
                             deltatuple = deltabases.get(node, None)
                             if deltatuple is None:
                                 deltabase, chainlen = nullid, 0
                                 deltabases[node] = (nullid, 0)
                                 nobase.add(node)
                             else:
                                 deltabase, chainlen = deltatuple
                                 referenced.add(deltabase)
                             # Use available ancestor information to inform our delta choices
                             ancestorinfo = ancestors.get(node)
                             if ancestorinfo:
                                 p1, p2, linknode, copyfrom = ancestorinfo
                                 # The presence of copyfrom means we're at a point where the
                                 # file was copied from elsewhere. So don't attempt to do any
                                 # deltas with the other file.
                                 if copyfrom:
                                     p1 = nullid
                                 if chainlen < maxchainlen:
                                     # Record this child as the delta base for its parents.
                                     # This may be non optimal, since the parents may have
                                     # many children, and this will only choose the last one.
                                     # TODO: record all children and try all deltas to find
                                     # best
                                     if p1 != nullid:
                                         deltabases[p1] = (node, chainlen + 1)
                                     if p2 != nullid:
                                         deltabases[p2] = (node, chainlen + 1)
                         # experimental config: repack.chainorphansbysize
                         if ui.configbool('repack', 'chainorphansbysize'):
                             orphans = nobase - referenced
                             orderednodes = self._chainorphans(ui, filename, orderednodes,
                                 orphans, deltabases)
                         # Compute deltas and write to the pack
                         for i, node in enumerate(orderednodes):
                             deltabase, chainlen = deltabases[node]
                             # Compute delta
                             # TODO: Optimize the deltachain fetching. Since we're
                             # iterating over the different version of the file, we may
                             # be fetching the same deltachain over and over again.
                             meta = None
                             if deltabase != nullid:
                                 deltaentry = self.data.getdelta(filename, node)
                                 delta, deltabasename, origdeltabase, meta = deltaentry
                                 size = meta.get(constants.METAKEYSIZE)
                                 if (deltabasename != filename or origdeltabase != deltabase
                                     or size is None):
                                     deltabasetext = self.data.get(filename, deltabase)
                                     original = self.data.get(filename, node)
                                     size = len(original)
                                     delta = mdiff.textdiff(deltabasetext, original)
                             else:
                                 delta = self.data.get(filename, node)
                                 size = len(delta)
                                 meta = self.data.getmeta(filename, node)
                             # TODO: don't use the delta if it's larger than the fulltext
                             if constants.METAKEYSIZE not in meta:
                                 meta[constants.METAKEYSIZE] = size
                             target.add(filename, node, deltabase, delta, meta)
                             entries[node].datarepacked = True
                         ui.progress(_("processing nodes"), None)
                         count += 1
                     ui.progress(_("repacking data"), None)
                     target.close(ledger=ledger)
                 def repackhistory(self, ledger, target):
                     ui = self.repo.ui
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.historysource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     for filename, entries in sorted(byfile.iteritems()):
                         ancestors = {}
                         nodes = list(node for node in entries)
                         for node in nodes:
                             if node in ancestors:
                                 continue
                             ancestors.update(self.history.getancestors(filename, node,
                                                                        known=ancestors))
                         # Order the nodes children first
                         orderednodes = reversed(self._toposort(ancestors))
                         # Write to the pack
                         dontprocess = set()
                         for node in orderednodes:
                             p1, p2, linknode, copyfrom = ancestors[node]
                             # If the node is marked dontprocess, but it's also in the
                             # explicit entries set, that means the node exists both in this
                             # file and in another file that was copied to this file.
                             # Usually this happens if the file was copied to another file,
                             # then the copy was deleted, then reintroduced without copy
                             # metadata. The original add and the new add have the same hash
                             # since the content is identical and the parents are null.
                             if node in dontprocess and node not in entries:
                                 # If copyfrom == filename, it means the copy history
                                 # went to come other file, then came back to this one, so we
                                 # should continue processing it.
                                 if p1 != nullid and copyfrom != filename:
                                     dontprocess.add(p1)
                                 if p2 != nullid:
                                     dontprocess.add(p2)
                                 continue
                             if copyfrom:
                                 dontprocess.add(p1)
                             target.add(filename, node, p1, p2, linknode, copyfrom)
                             if node in entries:
                                 entries[node].historyrepacked = True
                         count += 1
                         ui.progress(_("repacking history"), count, unit=self.unit,
                                     total=len(byfile))
                     ui.progress(_("repacking history"), None)
                     target.close(ledger=ledger)
                 def _toposort(self, ancestors):
                     def parentfunc(node):
                         p1, p2, linknode, copyfrom = ancestors[node]
                         parents = []
                         if p1 != nullid:
                             parents.append(p1)
                         if p2 != nullid:
                             parents.append(p2)
                         return parents
                     sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
                     return sortednodes
             class repackledger(object):
                 """Storage for all the bookkeeping that happens during a repack. It contains
                 the list of revisions being repacked, what happened to each revision, and
                 which source store contained which revision originally (for later cleanup).
                 """
                 def __init__(self):
                     self.entries = {}
                     self.sources = {}
                     self.created = set()
                 def markdataentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a data rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.datasource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def markhistoryentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a history rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.historysource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def _getorcreateentry(self, filename, node):
                     key = (filename, node)
                     value = self.entries.get(key)
                     if not value:
                         value = repackentry(filename, node)
                         self.entries[key] = value
                     return value
                 def addcreated(self, value):
                     self.created.add(value)
             class repackentry(object):
                 """Simple class representing a single revision entry in the repackledger.
                 """
                 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
                              r'datarepacked', r'historyrepacked', r'gced')
                 def __init__(self, filename, node):
                     self.filename = filename
                     self.node = node
                     # If the revision has a data entry in the source
                     self.datasource = False
                     # If the revision has a history entry in the source
                     self.historysource = False
                     # If the revision's data entry was repacked into the repack target
                     self.datarepacked = False
                     # If the revision's history entry was repacked into the repack target
                     self.historyrepacked = False
                     # If garbage collected
                     self.gced = False
             def repacklockvfs(repo):
                 if util.safehasattr(repo, 'name'):
                     # Lock in the shared cache so repacks across multiple copies of the same
                     # repo are coordinated.
                     sharedcachepath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     return vfs.vfs(sharedcachepath)
                 else:
                     return repo.svfs