upstream/mercurial-mirror Commit - r25823:2406e2ba

1

# changegroup.py - Mercurial changegroup manipulation functions

1

# changegroup.py - Mercurial changegroup manipulation functions

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import weakref

8

import weakref

9

from i18n import _

9

from i18n import _

10

from node import nullrev, nullid, hex, short

10

from node import nullrev, nullid, hex, short

11

import mdiff, util, dagutil

11

import mdiff, util, dagutil

12

import struct, os, bz2, zlib, tempfile

12

import struct, os, bz2, zlib, tempfile

13

import discovery, error, phases, branchmap

13

import discovery, error, phases, branchmap

14

15

_CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"

15

_CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"

16

_CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"

16

_CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"

17

18

def readexactly(stream, n):

18

def readexactly(stream, n):

19

'''read n bytes from stream.read and abort if less was available'''

19

'''read n bytes from stream.read and abort if less was available'''

20

s = stream.read(n)

20

s = stream.read(n)

21

if len(s) < n:

21

if len(s) < n:

22

raise util.Abort(_("stream ended unexpectedly"

22

raise util.Abort(_("stream ended unexpectedly"

23

" (got %d bytes, expected %d)")

23

" (got %d bytes, expected %d)")

24

% (len(s), n))

24

% (len(s), n))

25

return s

25

return s

26

27

def getchunk(stream):

27

def getchunk(stream):

28

"""return the next chunk from stream as a string"""

28

"""return the next chunk from stream as a string"""

29

d = readexactly(stream, 4)

29

d = readexactly(stream, 4)

30

l = struct.unpack(">l", d)[0]

30

l = struct.unpack(">l", d)[0]

31

if l <= 4:

31

if l <= 4:

32

if l:

32

if l:

33

raise util.Abort(_("invalid chunk length %d") % l)

33

raise util.Abort(_("invalid chunk length %d") % l)

34

return ""

34

return ""

35

return readexactly(stream, l - 4)

35

return readexactly(stream, l - 4)

36

37

def chunkheader(length):

37

def chunkheader(length):

38

"""return a changegroup chunk header (string)"""

38

"""return a changegroup chunk header (string)"""

39

return struct.pack(">l", length + 4)

39

return struct.pack(">l", length + 4)

40

41

def closechunk():

41

def closechunk():

42

"""return a changegroup chunk header (string) for a zero-length chunk"""

42

"""return a changegroup chunk header (string) for a zero-length chunk"""

43

return struct.pack(">l", 0)

43

return struct.pack(">l", 0)

44

45

def combineresults(results):

45

def combineresults(results):

46

"""logic to combine 0 or more addchangegroup results into one"""

46

"""logic to combine 0 or more addchangegroup results into one"""

47

changedheads = 0

47

changedheads = 0

48

result = 1

48

result = 1

49

for ret in results:

49

for ret in results:

50

# If any changegroup result is 0, return 0

50

# If any changegroup result is 0, return 0

51

if ret == 0:

51

if ret == 0:

52

result = 0

52

result = 0

53

break

53

break

54

if ret < -1:

54

if ret < -1:

55

changedheads += ret + 1

55

changedheads += ret + 1

56

elif ret > 1:

56

elif ret > 1:

57

changedheads += ret - 1

57

changedheads += ret - 1

58

if changedheads > 0:

58

if changedheads > 0:

59

result = 1 + changedheads

59

result = 1 + changedheads

60

elif changedheads < 0:

60

elif changedheads < 0:

61

result = -1 + changedheads

61

result = -1 + changedheads

62

return result

62

return result

63

64

class nocompress(object):

64

class nocompress(object):

65

def compress(self, x):

65

def compress(self, x):

66

return x

66

return x

67

def flush(self):

67

def flush(self):

68

return ""

68

return ""

69

70

bundletypes = {

70

bundletypes = {

71

"": ("", nocompress), # only when using unbundle on ssh and old http servers

71

"": ("", nocompress), # only when using unbundle on ssh and old http servers

72

# since the unification ssh accepts a header but there

72

# since the unification ssh accepts a header but there

73

# is no capability signaling it.

73

# is no capability signaling it.

74

"HG20": (), # special-cased below

74

"HG20": (), # special-cased below

75

"HG10UN": ("HG10UN", nocompress),

75

"HG10UN": ("HG10UN", nocompress),

76

"HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),

76

"HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),

77

"HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),

77

"HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),

78

}

78

}

79

80

# hgweb uses this list to communicate its preferred type

80

# hgweb uses this list to communicate its preferred type

81

bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']

81

bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']

82

83

def writebundle(ui, cg, filename, bundletype, vfs=None):

83

def writebundle(ui, cg, filename, bundletype, vfs=None):

84

"""Write a bundle file and return its filename.

84

"""Write a bundle file and return its filename.

85

86

Existing files will not be overwritten.

86

Existing files will not be overwritten.

87

If no filename is specified, a temporary file is created.

87

If no filename is specified, a temporary file is created.

88

bz2 compression can be turned off.

88

bz2 compression can be turned off.

89

The bundle file will be deleted in case of errors.

89

The bundle file will be deleted in case of errors.

90

"""

90

"""

91

92

fh = None

92

fh = None

93

cleanup = None

93

cleanup = None

94

try:

94

try:

95

if filename:

95

if filename:

96

if vfs:

96

if vfs:

97

fh = vfs.open(filename, "wb")

97

fh = vfs.open(filename, "wb")

98

else:

98

else:

99

fh = open(filename, "wb")

99

fh = open(filename, "wb")

100

else:

100

else:

101

fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")

101

fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")

102

fh = os.fdopen(fd, "wb")

102

fh = os.fdopen(fd, "wb")

103

cleanup = filename

103

cleanup = filename

104

105

if bundletype == "HG20":

105

if bundletype == "HG20":

106

import bundle2

106

import bundle2

107

bundle = bundle2.bundle20(ui)

107

bundle = bundle2.bundle20(ui)

108

part = bundle.newpart('changegroup', data=cg.getchunks())

108

part = bundle.newpart('changegroup', data=cg.getchunks())

109

part.addparam('version', cg.version)

109

part.addparam('version', cg.version)

110

z = nocompress()

110

z = nocompress()

111

chunkiter = bundle.getchunks()

111

chunkiter = bundle.getchunks()

112

else:

112

else:

113

if cg.version != '01':

113

if cg.version != '01':

114

raise util.Abort(_('old bundle types only supports v1 '

114

raise util.Abort(_('old bundle types only supports v1 '

115

'changegroups'))

115

'changegroups'))

116

header, compressor = bundletypes[bundletype]

116

header, compressor = bundletypes[bundletype]

117

fh.write(header)

117

fh.write(header)

118

z = compressor()

118

z = compressor()

119

chunkiter = cg.getchunks()

119

chunkiter = cg.getchunks()

120

121

# parse the changegroup data, otherwise we will block

121

# parse the changegroup data, otherwise we will block

122

# in case of sshrepo because we don't know the end of the stream

122

# in case of sshrepo because we don't know the end of the stream

123

124

# an empty chunkgroup is the end of the changegroup

124

# an empty chunkgroup is the end of the changegroup

125

# a changegroup has at least 2 chunkgroups (changelog and manifest).

125

# a changegroup has at least 2 chunkgroups (changelog and manifest).

126

# after that, an empty chunkgroup is the end of the changegroup

126

# after that, an empty chunkgroup is the end of the changegroup

127

for chunk in chunkiter:

127

for chunk in chunkiter:

128

fh.write(z.compress(chunk))

128

fh.write(z.compress(chunk))

129

fh.write(z.flush())

129

fh.write(z.flush())

130

cleanup = None

130

cleanup = None

131

return filename

131

return filename

132

finally:

132

finally:

133

if fh is not None:

133

if fh is not None:

134

fh.close()

134

fh.close()

135

if cleanup is not None:

135

if cleanup is not None:

136

if filename and vfs:

136

if filename and vfs:

137

vfs.unlink(cleanup)

137

vfs.unlink(cleanup)

138

else:

138

else:

139

os.unlink(cleanup)

139

os.unlink(cleanup)

140

141

def decompressor(fh, alg):

141

def decompressor(fh, alg):

142

if alg == 'UN':

142

if alg == 'UN':

143

return fh

143

return fh

144

elif alg == 'GZ':

144

elif alg == 'GZ':

145

def generator(f):

145

def generator(f):

146

zd = zlib.decompressobj()

146

zd = zlib.decompressobj()

147

for chunk in util.filechunkiter(f):

147

for chunk in util.filechunkiter(f):

148

yield zd.decompress(chunk)

148

yield zd.decompress(chunk)

149

elif alg == 'BZ':

149

elif alg == 'BZ':

150

def generator(f):

150

def generator(f):

151

zd = bz2.BZ2Decompressor()

151

zd = bz2.BZ2Decompressor()

152

zd.decompress("BZ")

152

zd.decompress("BZ")

153

for chunk in util.filechunkiter(f, 4096):

153

for chunk in util.filechunkiter(f, 4096):

154

yield zd.decompress(chunk)

154

yield zd.decompress(chunk)

155

else:

155

else:

156

raise util.Abort("unknown bundle compression '%s'" % alg)

156

raise util.Abort("unknown bundle compression '%s'" % alg)

157

return util.chunkbuffer(generator(fh))

157

return util.chunkbuffer(generator(fh))

158

159

class cg1unpacker(object):

159

class cg1unpacker(object):

160

deltaheader = _CHANGEGROUPV1_DELTA_HEADER

160

deltaheader = _CHANGEGROUPV1_DELTA_HEADER

161

deltaheadersize = struct.calcsize(deltaheader)

161

deltaheadersize = struct.calcsize(deltaheader)

162

version = '01'

162

version = '01'

163

def __init__(self, fh, alg):

163

def __init__(self, fh, alg):

164

self._stream = decompressor(fh, alg)

164

self._stream = decompressor(fh, alg)

165

self._type = alg

165

self._type = alg

166

self.callback = None

166

self.callback = None

167

def compressed(self):

167

def compressed(self):

168

return self._type != 'UN'

168

return self._type != 'UN'

169

def read(self, l):

169

def read(self, l):

170

return self._stream.read(l)

170

return self._stream.read(l)

171

def seek(self, pos):

171

def seek(self, pos):

172

return self._stream.seek(pos)

172

return self._stream.seek(pos)

173

def tell(self):

173

def tell(self):

174

return self._stream.tell()

174

return self._stream.tell()

175

def close(self):

175

def close(self):

176

return self._stream.close()

176

return self._stream.close()

177

178

def chunklength(self):

178

def chunklength(self):

179

d = readexactly(self._stream, 4)

179

d = readexactly(self._stream, 4)

180

l = struct.unpack(">l", d)[0]

180

l = struct.unpack(">l", d)[0]

181

if l <= 4:

181

if l <= 4:

182

if l:

182

if l:

183

raise util.Abort(_("invalid chunk length %d") % l)

183

raise util.Abort(_("invalid chunk length %d") % l)

184

return 0

184

return 0

185

if self.callback:

185

if self.callback:

186

self.callback()

186

self.callback()

187

return l - 4

187

return l - 4

188

189

def changelogheader(self):

189

def changelogheader(self):

190

"""v10 does not have a changelog header chunk"""

190

"""v10 does not have a changelog header chunk"""

191

return {}

191

return {}

192

193

def manifestheader(self):

193

def manifestheader(self):

194

"""v10 does not have a manifest header chunk"""

194

"""v10 does not have a manifest header chunk"""

195

return {}

195

return {}

196

197

def filelogheader(self):

197

def filelogheader(self):

198

"""return the header of the filelogs chunk, v10 only has the filename"""

198

"""return the header of the filelogs chunk, v10 only has the filename"""

199

l = self.chunklength()

199

l = self.chunklength()

200

if not l:

200

if not l:

201

return {}

201

return {}

202

fname = readexactly(self._stream, l)

202

fname = readexactly(self._stream, l)

203

return {'filename': fname}

203

return {'filename': fname}

204

205

def _deltaheader(self, headertuple, prevnode):

205

def _deltaheader(self, headertuple, prevnode):

206

node, p1, p2, cs = headertuple

206

node, p1, p2, cs = headertuple

207

if prevnode is None:

207

if prevnode is None:

208

deltabase = p1

208

deltabase = p1

209

else:

209

else:

210

deltabase = prevnode

210

deltabase = prevnode

211

return node, p1, p2, deltabase, cs

211

return node, p1, p2, deltabase, cs

212

213

def deltachunk(self, prevnode):

213

def deltachunk(self, prevnode):

214

l = self.chunklength()

214

l = self.chunklength()

215

if not l:

215

if not l:

216

return {}

216

return {}

217

headerdata = readexactly(self._stream, self.deltaheadersize)

217

headerdata = readexactly(self._stream, self.deltaheadersize)

218

header = struct.unpack(self.deltaheader, headerdata)

218

header = struct.unpack(self.deltaheader, headerdata)

219

delta = readexactly(self._stream, l - self.deltaheadersize)

219

delta = readexactly(self._stream, l - self.deltaheadersize)

220

node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)

220

node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)

221

return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,

221

return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,

222

'deltabase': deltabase, 'delta': delta}

222

'deltabase': deltabase, 'delta': delta}

223

224

def getchunks(self):

224

def getchunks(self):

225

"""returns all the chunks contains in the bundle

225

"""returns all the chunks contains in the bundle

226

227

Used when you need to forward the binary stream to a file or another

227

Used when you need to forward the binary stream to a file or another

228

network API. To do so, it parse the changegroup data, otherwise it will

228

network API. To do so, it parse the changegroup data, otherwise it will

229

block in case of sshrepo because it don't know the end of the stream.

229

block in case of sshrepo because it don't know the end of the stream.

230

"""

230

"""

231

# an empty chunkgroup is the end of the changegroup

231

# an empty chunkgroup is the end of the changegroup

232

# a changegroup has at least 2 chunkgroups (changelog and manifest).

232

# a changegroup has at least 2 chunkgroups (changelog and manifest).

233

# after that, an empty chunkgroup is the end of the changegroup

233

# after that, an empty chunkgroup is the end of the changegroup

234

empty = False

234

empty = False

235

count = 0

235

count = 0

236

while not empty or count <= 2:

236

while not empty or count <= 2:

237

empty = True

237

empty = True

238

count += 1

238

count += 1

239

while True:

239

while True:

240

chunk = getchunk(self)

240

chunk = getchunk(self)

241

if not chunk:

241

if not chunk:

242

break

242

break

243

empty = False

243

empty = False

244

yield chunkheader(len(chunk))

244

yield chunkheader(len(chunk))

245

pos = 0

245

pos = 0

246

while pos < len(chunk):

246

while pos < len(chunk):

247

next = pos + 2**20

247

next = pos + 2**20

248

yield chunk[pos:next]

248

yield chunk[pos:next]

249

pos = next

249

pos = next

250

yield closechunk()

250

yield closechunk()

251

252

class cg2unpacker(cg1unpacker):

252

class cg2unpacker(cg1unpacker):

253

deltaheader = _CHANGEGROUPV2_DELTA_HEADER

253

deltaheader = _CHANGEGROUPV2_DELTA_HEADER

254

deltaheadersize = struct.calcsize(deltaheader)

254

deltaheadersize = struct.calcsize(deltaheader)

255

version = '02'

255

version = '02'

256

257

def _deltaheader(self, headertuple, prevnode):

257

def _deltaheader(self, headertuple, prevnode):

258

node, p1, p2, deltabase, cs = headertuple

258

node, p1, p2, deltabase, cs = headertuple

259

return node, p1, p2, deltabase, cs

259

return node, p1, p2, deltabase, cs

260

261

class headerlessfixup(object):

261

class headerlessfixup(object):

262

def __init__(self, fh, h):

262

def __init__(self, fh, h):

263

self._h = h

263

self._h = h

264

self._fh = fh

264

self._fh = fh

265

def read(self, n):

265

def read(self, n):

266

if self._h:

266

if self._h:

267

d, self._h = self._h[:n], self._h[n:]

267

d, self._h = self._h[:n], self._h[n:]

268

if len(d) < n:

268

if len(d) < n:

269

d += readexactly(self._fh, n - len(d))

269

d += readexactly(self._fh, n - len(d))

270

return d

270

return d

271

return readexactly(self._fh, n)

271

return readexactly(self._fh, n)

272

273

class cg1packer(object):

273

class cg1packer(object):

274

deltaheader = _CHANGEGROUPV1_DELTA_HEADER

274

deltaheader = _CHANGEGROUPV1_DELTA_HEADER

275

version = '01'

275

version = '01'

276

def __init__(self, repo, bundlecaps=None):

276

def __init__(self, repo, bundlecaps=None):

277

"""Given a source repo, construct a bundler.

277

"""Given a source repo, construct a bundler.

278

279

bundlecaps is optional and can be used to specify the set of

279

bundlecaps is optional and can be used to specify the set of

280

capabilities which can be used to build the bundle.

280

capabilities which can be used to build the bundle.

281

"""

281

"""

282

# Set of capabilities we can use to build the bundle.

282

# Set of capabilities we can use to build the bundle.

283

if bundlecaps is None:

283

if bundlecaps is None:

284

bundlecaps = set()

284

bundlecaps = set()

285

self._bundlecaps = bundlecaps

285

self._bundlecaps = bundlecaps

286

reorder = repo.ui.config('bundle', 'reorder', 'auto')

286

reorder = repo.ui.config('bundle', 'reorder', 'auto')

287

if reorder == 'auto':

287

if reorder == 'auto':

288

reorder = None

288

reorder = None

289

else:

289

else:

290

reorder = util.parsebool(reorder)

290

reorder = util.parsebool(reorder)

291

self._repo = repo

291

self._repo = repo

292

self._reorder = reorder

292

self._reorder = reorder

293

self._progress = repo.ui.progress

293

self._progress = repo.ui.progress

294

if self._repo.ui.verbose and not self._repo.ui.debugflag:

294

if self._repo.ui.verbose and not self._repo.ui.debugflag:

295

self._verbosenote = self._repo.ui.note

295

self._verbosenote = self._repo.ui.note

296

else:

296

else:

297

self._verbosenote = lambda s: None

297

self._verbosenote = lambda s: None

298

299

def close(self):

299

def close(self):

300

return closechunk()

300

return closechunk()

301

302

def fileheader(self, fname):

302

def fileheader(self, fname):

303

return chunkheader(len(fname)) + fname

303

return chunkheader(len(fname)) + fname

304

305

def group(self, nodelist, revlog, lookup, units=None):

305

def group(self, nodelist, revlog, lookup, units=None):

306

"""Calculate a delta group, yielding a sequence of changegroup chunks

306

"""Calculate a delta group, yielding a sequence of changegroup chunks

307

(strings).

307

(strings).

308

309

Given a list of changeset revs, return a set of deltas and

309

Given a list of changeset revs, return a set of deltas and

310

metadata corresponding to nodes. The first delta is

310

metadata corresponding to nodes. The first delta is

311

first parent(nodelist[0]) -> nodelist[0], the receiver is

311

first parent(nodelist[0]) -> nodelist[0], the receiver is

312

guaranteed to have this parent as it has all history before

312

guaranteed to have this parent as it has all history before

313

these changesets. In the case firstparent is nullrev the

313

these changesets. In the case firstparent is nullrev the

314

changegroup starts with a full revision.

314

changegroup starts with a full revision.

315

316

If units is not None, progress detail will be generated, units specifies

316

If units is not None, progress detail will be generated, units specifies

317

the type of revlog that is touched (changelog, manifest, etc.).

317

the type of revlog that is touched (changelog, manifest, etc.).

318

"""

318

"""

319

# if we don't have any revisions touched by these changesets, bail

319

# if we don't have any revisions touched by these changesets, bail

320

if len(nodelist) == 0:

320

if len(nodelist) == 0:

321

yield self.close()

321

yield self.close()

322

return

322

return

323

324

# for generaldelta revlogs, we linearize the revs; this will both be

324

# for generaldelta revlogs, we linearize the revs; this will both be

325

# much quicker and generate a much smaller bundle

325

# much quicker and generate a much smaller bundle

326

if (revlog._generaldelta and self._reorder is None) or self._reorder:

326

if (revlog._generaldelta and self._reorder is None) or self._reorder:

327

dag = dagutil.revlogdag(revlog)

327

dag = dagutil.revlogdag(revlog)

328

revs = set(revlog.rev(n) for n in nodelist)

328

revs = set(revlog.rev(n) for n in nodelist)

329

revs = dag.linearize(revs)

329

revs = dag.linearize(revs)

330

else:

330

else:

331

revs = sorted([revlog.rev(n) for n in nodelist])

331

revs = sorted([revlog.rev(n) for n in nodelist])

332

333

# add the parent of the first rev

333

# add the parent of the first rev

334

p = revlog.parentrevs(revs[0])[0]

334

p = revlog.parentrevs(revs[0])[0]

335

revs.insert(0, p)

335

revs.insert(0, p)

336

337

# build deltas

337

# build deltas

338

total = len(revs) - 1

338

total = len(revs) - 1

339

msgbundling = _('bundling')

339

msgbundling = _('bundling')

340

for r in xrange(len(revs) - 1):

340

for r in xrange(len(revs) - 1):

341

if units is not None:

341

if units is not None:

342

self._progress(msgbundling, r + 1, unit=units, total=total)

342

self._progress(msgbundling, r + 1, unit=units, total=total)

343

prev, curr = revs[r], revs[r + 1]

343

prev, curr = revs[r], revs[r + 1]

344

linknode = lookup(revlog.node(curr))

344

linknode = lookup(revlog.node(curr))

345

for c in self.revchunk(revlog, curr, prev, linknode):

345

for c in self.revchunk(revlog, curr, prev, linknode):

346

yield c

346

yield c

347

348

if units is not None:

348

if units is not None:

349

self._progress(msgbundling, None)

349

self._progress(msgbundling, None)

350

yield self.close()

350

yield self.close()

351

352

# filter any nodes that claim to be part of the known set

352

# filter any nodes that claim to be part of the known set

353

def prune(self, revlog, missing, commonrevs):

353

def prune(self, revlog, missing, commonrevs):

354

rr, rl = revlog.rev, revlog.linkrev

354

rr, rl = revlog.rev, revlog.linkrev

355

return [n for n in missing if rl(rr(n)) not in commonrevs]

355

return [n for n in missing if rl(rr(n)) not in commonrevs]

356

357

def generate(self, commonrevs, clnodes, fastpathlinkrev, source):

357

def generate(self, commonrevs, clnodes, fastpathlinkrev, source):

358

'''yield a sequence of changegroup chunks (strings)'''

358

'''yield a sequence of changegroup chunks (strings)'''

359

repo = self._repo

359

repo = self._repo

360

cl = repo.changelog

360

cl = repo.changelog

361

ml = repo.manifest

361

ml = repo.manifest

362

363

clrevorder = {}

363

clrevorder = {}

364

mfs = {} # needed manifests

364

mfs = {} # needed manifests

365

fnodes = {} # needed file nodes

365

fnodes = {} # needed file nodes

366

changedfiles = set()

366

changedfiles = set()

367

368

# Callback for the changelog, used to collect changed files and manifest

368

# Callback for the changelog, used to collect changed files and manifest

369

# nodes.

369

# nodes.

370

# Returns the linkrev node (identity in the changelog case).

370

# Returns the linkrev node (identity in the changelog case).

371

def lookupcl(x):

371

def lookupcl(x):

372

c = cl.read(x)

372

c = cl.read(x)

373

clrevorder[x] = len(clrevorder)

373

clrevorder[x] = len(clrevorder)

374

changedfiles.update(c[3])

374

changedfiles.update(c[3])

375

# record the first changeset introducing this manifest version

375

# record the first changeset introducing this manifest version

376

mfs.setdefault(c[0], x)

376

mfs.setdefault(c[0], x)

377

return x

377

return x

378

379

self._verbosenote(_('uncompressed size of bundle content:\n'))

379

self._verbosenote(_('uncompressed size of bundle content:\n'))

380

size = 0

380

size = 0

381

for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):

381

for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):

382

size += len(chunk)

382

size += len(chunk)

383

yield chunk

383

yield chunk

384

self._verbosenote(_('%8.i (changelog)\n') % size)

384

self._verbosenote(_('%8.i (changelog)\n') % size)

385

386

# We need to make sure that the linkrev in the changegroup refers to

386

# We need to make sure that the linkrev in the changegroup refers to

387

# the first changeset that introduced the manifest or file revision.

387

# the first changeset that introduced the manifest or file revision.

388

# The fastpath is usually safer than the slowpath, because the filelogs

388

# The fastpath is usually safer than the slowpath, because the filelogs

389

# are walked in revlog order.

389

# are walked in revlog order.

390

#

390

#

391

# When taking the slowpath with reorder=None and the manifest revlog

391

# When taking the slowpath with reorder=None and the manifest revlog

392

# uses generaldelta, the manifest may be walked in the "wrong" order.

392

# uses generaldelta, the manifest may be walked in the "wrong" order.

393

# Without 'clrevorder', we would get an incorrect linkrev (see fix in

393

# Without 'clrevorder', we would get an incorrect linkrev (see fix in

394

# cc0ff93d0c0c).

394

# cc0ff93d0c0c).

395

#

395

#

396

# When taking the fastpath, we are only vulnerable to reordering

396

# When taking the fastpath, we are only vulnerable to reordering

397

# of the changelog itself. The changelog never uses generaldelta, so

397

# of the changelog itself. The changelog never uses generaldelta, so

398

# it is only reordered when reorder=True. To handle this case, we

398

# it is only reordered when reorder=True. To handle this case, we

399

# simply take the slowpath, which already has the 'clrevorder' logic.

399

# simply take the slowpath, which already has the 'clrevorder' logic.

400

# This was also fixed in cc0ff93d0c0c.

400

# This was also fixed in cc0ff93d0c0c.

401

fastpathlinkrev = fastpathlinkrev and not self._reorder

401

fastpathlinkrev = fastpathlinkrev and not self._reorder

402

# Callback for the manifest, used to collect linkrevs for filelog

402

# Callback for the manifest, used to collect linkrevs for filelog

403

# revisions.

403

# revisions.

404

# Returns the linkrev node (collected in lookupcl).

404

# Returns the linkrev node (collected in lookupcl).

405

def lookupmf(x):

405

def lookupmf(x):

406

clnode = mfs[x]

406

clnode = mfs[x]

407

if not fastpathlinkrev:

407

if not fastpathlinkrev:

408

mdata = ml.readfast(x)

408

mdata = ml.readfast(x)

409

for f, n in mdata.iteritems():

409

for f, n in mdata.iteritems():

410

if f in changedfiles:

410

if f in changedfiles:

411

# record the first changeset introducing this filelog

411

# record the first changeset introducing this filelog

412

# version

412

# version

413

fclnodes = fnodes.setdefault(f, {})

413

fclnodes = fnodes.setdefault(f, {})

414

fclnode = fclnodes.setdefault(n, clnode)

414

fclnode = fclnodes.setdefault(n, clnode)

415

if clrevorder[clnode] < clrevorder[fclnode]:

415

if clrevorder[clnode] < clrevorder[fclnode]:

416

fclnodes[n] = clnode

416

fclnodes[n] = clnode

417

return clnode

417

return clnode

418

419

mfnodes = self.prune(ml, mfs, commonrevs)

419

mfnodes = self.prune(ml, mfs, commonrevs)

420

size = 0

420

size = 0

421

for chunk in self.group(mfnodes, ml, lookupmf, units=_('manifests')):

421

for chunk in self.group(mfnodes, ml, lookupmf, units=_('manifests')):

422

size += len(chunk)

422

size += len(chunk)

423

yield chunk

423

yield chunk

424

self._verbosenote(_('%8.i (manifests)\n') % size)

424

self._verbosenote(_('%8.i (manifests)\n') % size)

425

426

mfs.clear()

426

mfs.clear()

427

clrevs = set(cl.rev(x) for x in clnodes)

427

clrevs = set(cl.rev(x) for x in clnodes)

428

429

def linknodes(filerevlog, fname):

429

def linknodes(filerevlog, fname):

430

if fastpathlinkrev:

430

if fastpathlinkrev:

431

llr = filerevlog.linkrev

431

llr = filerevlog.linkrev

432

def genfilenodes():

432

def genfilenodes():

433

for r in filerevlog:

433

for r in filerevlog:

434

linkrev = llr(r)

434

linkrev = llr(r)

435

if linkrev in clrevs:

435

if linkrev in clrevs:

436

yield filerevlog.node(r), cl.node(linkrev)

436

yield filerevlog.node(r), cl.node(linkrev)

437

return dict(genfilenodes())

437

return dict(genfilenodes())

438

return fnodes.get(fname, {})

438

return fnodes.get(fname, {})

439

440

for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,

440

for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,

441

source):

441

source):

442

yield chunk

442

yield chunk

443

444

yield self.close()

444

yield self.close()

445

446

if clnodes:

446

if clnodes:

447

repo.hook('outgoing', node=hex(clnodes[0]), source=source)

447

repo.hook('outgoing', node=hex(clnodes[0]), source=source)

448

449

# The 'source' parameter is useful for extensions

449

# The 'source' parameter is useful for extensions

450

def generatefiles(self, changedfiles, linknodes, commonrevs, source):

450

def generatefiles(self, changedfiles, linknodes, commonrevs, source):

451

repo = self._repo

451

repo = self._repo

452

progress = self._progress

452

progress = self._progress

453

msgbundling = _('bundling')

453

msgbundling = _('bundling')

454

455

total = len(changedfiles)

455

total = len(changedfiles)

456

# for progress output

456

# for progress output

457

msgfiles = _('files')

457

msgfiles = _('files')

458

for i, fname in enumerate(sorted(changedfiles)):

458

for i, fname in enumerate(sorted(changedfiles)):

459

filerevlog = repo.file(fname)

459

filerevlog = repo.file(fname)

460

if not filerevlog:

460

if not filerevlog:

461

raise util.Abort(_("empty or missing revlog for %s") % fname)

461

raise util.Abort(_("empty or missing revlog for %s") % fname)

462

463

linkrevnodes = linknodes(filerevlog, fname)

463

linkrevnodes = linknodes(filerevlog, fname)

464

# Lookup for filenodes, we collected the linkrev nodes above in the

464

# Lookup for filenodes, we collected the linkrev nodes above in the

465

# fastpath case and with lookupmf in the slowpath case.

465

# fastpath case and with lookupmf in the slowpath case.

466

def lookupfilelog(x):

466

def lookupfilelog(x):

467

return linkrevnodes[x]

467

return linkrevnodes[x]

468

469

filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)

469

filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)

470

if filenodes:

470

if filenodes:

471

progress(msgbundling, i + 1, item=fname, unit=msgfiles,

471

progress(msgbundling, i + 1, item=fname, unit=msgfiles,

472

total=total)

472

total=total)

473

h = self.fileheader(fname)

473

h = self.fileheader(fname)

474

size = len(h)

474

size = len(h)

475

yield h

475

yield h

476

for chunk in self.group(filenodes, filerevlog, lookupfilelog):

476

for chunk in self.group(filenodes, filerevlog, lookupfilelog):

477

size += len(chunk)

477

size += len(chunk)

478

yield chunk

478

yield chunk

479

self._verbosenote(_('%8.i %s\n') % (size, fname))

479

self._verbosenote(_('%8.i %s\n') % (size, fname))

480

progress(msgbundling, None)

480

progress(msgbundling, None)

481

482

def deltaparent(self, revlog, rev, p1, p2, prev):

482

def deltaparent(self, revlog, rev, p1, p2, prev):

483

return prev

483

return prev

484

485

def revchunk(self, revlog, rev, prev, linknode):

485

def revchunk(self, revlog, rev, prev, linknode):

486

node = revlog.node(rev)

486

node = revlog.node(rev)

487

p1, p2 = revlog.parentrevs(rev)

487

p1, p2 = revlog.parentrevs(rev)

488

base = self.deltaparent(revlog, rev, p1, p2, prev)

488

base = self.deltaparent(revlog, rev, p1, p2, prev)

489

490

prefix = ''

490

prefix = ''

491

if revlog.iscensored(base) or revlog.iscensored(rev):

491

if revlog.iscensored(base) or revlog.iscensored(rev):

492

try:

492

try:

493

delta = revlog.revision(node)

493

delta = revlog.revision(node)

494

except error.CensoredNodeError as e:

494

except error.CensoredNodeError as e:

495

delta = e.tombstone

495

delta = e.tombstone

496

if base == nullrev:

496

if base == nullrev:

497

prefix = mdiff.trivialdiffheader(len(delta))

497

prefix = mdiff.trivialdiffheader(len(delta))

498

else:

498

else:

499

baselen = revlog.rawsize(base)

499

baselen = revlog.rawsize(base)

500

prefix = mdiff.replacediffheader(baselen, len(delta))

500

prefix = mdiff.replacediffheader(baselen, len(delta))

501

elif base == nullrev:

501

elif base == nullrev:

502

delta = revlog.revision(node)

502

delta = revlog.revision(node)

503

prefix = mdiff.trivialdiffheader(len(delta))

503

prefix = mdiff.trivialdiffheader(len(delta))

504

else:

504

else:

505

delta = revlog.revdiff(base, rev)

505

delta = revlog.revdiff(base, rev)

506

p1n, p2n = revlog.parents(node)

506

p1n, p2n = revlog.parents(node)

507

basenode = revlog.node(base)

507

basenode = revlog.node(base)

508

meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode)

508

meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode)

509

meta += prefix

509

meta += prefix

510

l = len(meta) + len(delta)

510

l = len(meta) + len(delta)

511

yield chunkheader(l)

511

yield chunkheader(l)

512

yield meta

512

yield meta

513

yield delta

513

yield delta

514

def builddeltaheader(self, node, p1n, p2n, basenode, linknode):

514

def builddeltaheader(self, node, p1n, p2n, basenode, linknode):

515

# do nothing with basenode, it is implicitly the previous one in HG10

515

# do nothing with basenode, it is implicitly the previous one in HG10

516

return struct.pack(self.deltaheader, node, p1n, p2n, linknode)

516

return struct.pack(self.deltaheader, node, p1n, p2n, linknode)

517

518

class cg2packer(cg1packer):

518

class cg2packer(cg1packer):

519

version = '02'

519

version = '02'

520

deltaheader = _CHANGEGROUPV2_DELTA_HEADER

520

deltaheader = _CHANGEGROUPV2_DELTA_HEADER

521

522

def __init__(self, repo, bundlecaps=None):

522

def __init__(self, repo, bundlecaps=None):

523

super(cg2packer, self).__init__(repo, bundlecaps)

523

super(cg2packer, self).__init__(repo, bundlecaps)

524

if self._reorder is None:

524

if self._reorder is None:

525

# Since generaldelta is directly supported by cg2, reordering

525

# Since generaldelta is directly supported by cg2, reordering

526

# generally doesn't help, so we disable it by default (treating

526

# generally doesn't help, so we disable it by default (treating

527

# bundle.reorder=auto just like bundle.reorder=False).

527

# bundle.reorder=auto just like bundle.reorder=False).

528

self._reorder = False

528

self._reorder = False

529

530

def deltaparent(self, revlog, rev, p1, p2, prev):

530

def deltaparent(self, revlog, rev, p1, p2, prev):

531

dp = revlog.deltaparent(rev)

531

dp = revlog.deltaparent(rev)

532

# avoid storing full revisions; pick prev in those cases

532

# avoid storing full revisions; pick prev in those cases

533

# also pick prev when we can't be sure remote has dp

533

# also pick prev when we can't be sure remote has dp

534

if dp == nullrev or (dp != p1 and dp != p2 and dp != prev):

534

if dp == nullrev or (dp != p1 and dp != p2 and dp != prev):

535

return prev

535

return prev

536

return dp

536

return dp

537

538

def builddeltaheader(self, node, p1n, p2n, basenode, linknode):

538

def builddeltaheader(self, node, p1n, p2n, basenode, linknode):

539

return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)

539

return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)

540

541

packermap = {'01': (cg1packer, cg1unpacker),

541

packermap = {'01': (cg1packer, cg1unpacker),

542

'02': (cg2packer, cg2unpacker)}

542

'02': (cg2packer, cg2unpacker)}

543

544

def _changegroupinfo(repo, nodes, source):

544

def _changegroupinfo(repo, nodes, source):

545

if repo.ui.verbose or source == 'bundle':

545

if repo.ui.verbose or source == 'bundle':

546

repo.ui.status(_("%d changesets found\n") % len(nodes))

546

repo.ui.status(_("%d changesets found\n") % len(nodes))

547

if repo.ui.debugflag:

547

if repo.ui.debugflag:

548

repo.ui.debug("list of changesets:\n")

548

repo.ui.debug("list of changesets:\n")

549

for node in nodes:

549

for node in nodes:

550

repo.ui.debug("%s\n" % hex(node))

550

repo.ui.debug("%s\n" % hex(node))

551

552

def getsubsetraw(repo, outgoing, bundler, source, fastpath=False):

552

def getsubsetraw(repo, outgoing, bundler, source, fastpath=False):

553

repo = repo.unfiltered()

553

repo = repo.unfiltered()

554

commonrevs = outgoing.common

554

commonrevs = outgoing.common

555

csets = outgoing.missing

555

csets = outgoing.missing

556

heads = outgoing.missingheads

556

heads = outgoing.missingheads

557

# We go through the fast path if we get told to, or if all (unfiltered

557

# We go through the fast path if we get told to, or if all (unfiltered

558

# heads have been requested (since we then know there all linkrevs will

558

# heads have been requested (since we then know there all linkrevs will

559

# be pulled by the client).

559

# be pulled by the client).

560

heads.sort()

560

heads.sort()

561

fastpathlinkrev = fastpath or (

561

fastpathlinkrev = fastpath or (

562

repo.filtername is None and heads == sorted(repo.heads()))

562

repo.filtername is None and heads == sorted(repo.heads()))

563

564

repo.hook('preoutgoing', throw=True, source=source)

564

repo.hook('preoutgoing', throw=True, source=source)

565

_changegroupinfo(repo, csets, source)

565

_changegroupinfo(repo, csets, source)

566

return bundler.generate(commonrevs, csets, fastpathlinkrev, source)

566

return bundler.generate(commonrevs, csets, fastpathlinkrev, source)

567

568

def getsubset(repo, outgoing, bundler, source, fastpath=False, version='01'):

568

def getsubset(repo, outgoing, bundler, source, fastpath=False, version='01'):

569

gengroup = getsubsetraw(repo, outgoing, bundler, source, fastpath)

569

gengroup = getsubsetraw(repo, outgoing, bundler, source, fastpath)

570

return packermap[version][1](util.chunkbuffer(gengroup), 'UN')

570

return packermap[version][1](util.chunkbuffer(gengroup), 'UN')

571

572

def changegroupsubset(repo, roots, heads, source, version='01'):

572

def changegroupsubset(repo, roots, heads, source, version='01'):

573

"""Compute a changegroup consisting of all the nodes that are

573

"""Compute a changegroup consisting of all the nodes that are

574

descendants of any of the roots and ancestors of any of the heads.

574

descendants of any of the roots and ancestors of any of the heads.

575

Return a chunkbuffer object whose read() method will return

575

Return a chunkbuffer object whose read() method will return

576

successive changegroup chunks.

576

successive changegroup chunks.

577

578

It is fairly complex as determining which filenodes and which

578

It is fairly complex as determining which filenodes and which

579

manifest nodes need to be included for the changeset to be complete

579

manifest nodes need to be included for the changeset to be complete

580

is non-trivial.

580

is non-trivial.

581

582

Another wrinkle is doing the reverse, figuring out which changeset in

582

Another wrinkle is doing the reverse, figuring out which changeset in

583

the changegroup a particular filenode or manifestnode belongs to.

583

the changegroup a particular filenode or manifestnode belongs to.

584

"""

584

"""

585

cl = repo.changelog

585

cl = repo.changelog

586

if not roots:

586

if not roots:

587

roots = [nullid]

587

roots = [nullid]

588

discbases = []

588

discbases = []

589

for n in roots:

589

for n in roots:

590

discbases.extend([p for p in cl.parents(n) if p != nullid])

590

discbases.extend([p for p in cl.parents(n) if p != nullid])

591

# TODO: remove call to nodesbetween.

591

# TODO: remove call to nodesbetween.

592

csets, roots, heads = cl.nodesbetween(roots, heads)

592

csets, roots, heads = cl.nodesbetween(roots, heads)

593

included = set(csets)

593

included = set(csets)

594

discbases = [n for n in discbases if n not in included]

594

discbases = [n for n in discbases if n not in included]

595

outgoing = discovery.outgoing(cl, discbases, heads)

595

outgoing = discovery.outgoing(cl, discbases, heads)

596

bundler = packermap[version][0](repo)

596

bundler = packermap[version][0](repo)

597

return getsubset(repo, outgoing, bundler, source, version=version)

597

return getsubset(repo, outgoing, bundler, source, version=version)

598

599

def getlocalchangegroupraw(repo, source, outgoing, bundlecaps=None,

599

def getlocalchangegroupraw(repo, source, outgoing, bundlecaps=None,

600

version='01'):

600

version='01'):

601

"""Like getbundle, but taking a discovery.outgoing as an argument.

601

"""Like getbundle, but taking a discovery.outgoing as an argument.

602

603

This is only implemented for local repos and reuses potentially

603

This is only implemented for local repos and reuses potentially

604

precomputed sets in outgoing. Returns a raw changegroup generator."""

604

precomputed sets in outgoing. Returns a raw changegroup generator."""

605

if not outgoing.missing:

605

if not outgoing.missing:

606

return None

606

return None

607

bundler = packermap[version][0](repo, bundlecaps)

607

bundler = packermap[version][0](repo, bundlecaps)

608

return getsubsetraw(repo, outgoing, bundler, source)

608

return getsubsetraw(repo, outgoing, bundler, source)

609

610

def getlocalchangegroup(repo, source, outgoing, bundlecaps=None):

610

def getlocalchangegroup(repo, source, outgoing, bundlecaps=None):

611

"""Like getbundle, but taking a discovery.outgoing as an argument.

611

"""Like getbundle, but taking a discovery.outgoing as an argument.

612

613

This is only implemented for local repos and reuses potentially

613

This is only implemented for local repos and reuses potentially

614

precomputed sets in outgoing."""

614

precomputed sets in outgoing."""

615

if not outgoing.missing:

615

if not outgoing.missing:

616

return None

616

return None

617

bundler = cg1packer(repo, bundlecaps)

617

bundler = cg1packer(repo, bundlecaps)

618

return getsubset(repo, outgoing, bundler, source)

618

return getsubset(repo, outgoing, bundler, source)

619

620

def computeoutgoing(repo, heads, common):

620

def computeoutgoing(repo, heads, common):

621

"""Computes which revs are outgoing given a set of common

621

"""Computes which revs are outgoing given a set of common

622

and a set of heads.

622

and a set of heads.

623

624

This is a separate function so extensions can have access to

624

This is a separate function so extensions can have access to

625

the logic.

625

the logic.

626

627

Returns a discovery.outgoing object.

627

Returns a discovery.outgoing object.

628

"""

628

"""

629

cl = repo.changelog

629

cl = repo.changelog

630

if common:

630

if common:

631

hasnode = cl.hasnode

631

hasnode = cl.hasnode

632

common = [n for n in common if hasnode(n)]

632

common = [n for n in common if hasnode(n)]

633

else:

633

else:

634

common = [nullid]

634

common = [nullid]

635

if not heads:

635

if not heads:

636

heads = cl.heads()

636

heads = cl.heads()

637

return discovery.outgoing(cl, common, heads)

637

return discovery.outgoing(cl, common, heads)

638

639

def getchangegroup(repo, source, heads=None, common=None, bundlecaps=None):

639

def getchangegroup(repo, source, heads=None, common=None, bundlecaps=None):

640

"""Like changegroupsubset, but returns the set difference between the

640

"""Like changegroupsubset, but returns the set difference between the

641

ancestors of heads and the ancestors common.

641

ancestors of heads and the ancestors common.

642

643

If heads is None, use the local heads. If common is None, use [nullid].

643

If heads is None, use the local heads. If common is None, use [nullid].

644

645

The nodes in common might not all be known locally due to the way the

645

The nodes in common might not all be known locally due to the way the

646

current discovery protocol works.

646

current discovery protocol works.

647

"""

647

"""

648

outgoing = computeoutgoing(repo, heads, common)

648

outgoing = computeoutgoing(repo, heads, common)

649

return getlocalchangegroup(repo, source, outgoing, bundlecaps=bundlecaps)

649

return getlocalchangegroup(repo, source, outgoing, bundlecaps=bundlecaps)

650

651

def changegroup(repo, basenodes, source):

651

def changegroup(repo, basenodes, source):

652

# to avoid a race we use changegroupsubset() (issue1320)

652

# to avoid a race we use changegroupsubset() (issue1320)

653

return changegroupsubset(repo, basenodes, repo.heads(), source)

653

return changegroupsubset(repo, basenodes, repo.heads(), source)

654

655

def addchangegroupfiles(repo, source, revmap, trp, pr, needfiles):

655

def addchangegroupfiles(repo, source, revmap, trp, pr, needfiles):

656

revisions = 0

656

revisions = 0

657

files = 0

657

files = 0

658

while True:

658

while True:

659

chunkdata = source.filelogheader()

659

chunkdata = source.filelogheader()

660

if not chunkdata:

660

if not chunkdata:

661

break

661

break

662

f = chunkdata["filename"]

662

f = chunkdata["filename"]

663

repo.ui.debug("adding %s revisions\n" % f)

663

repo.ui.debug("adding %s revisions\n" % f)

664

pr()

664

pr()

665

fl = repo.file(f)

665

fl = repo.file(f)

666

o = len(fl)

666

o = len(fl)

667

try:

667

try:

668

if not fl.addgroup(source, revmap, trp):

668

if not fl.addgroup(source, revmap, trp):

669

raise util.Abort(_("received file revlog group is empty"))

669

raise util.Abort(_("received file revlog group is empty"))

670

except error.CensoredBaseError as e:

670

except error.CensoredBaseError as e:

671

raise util.Abort(_("received delta base is censored: %s") % e)

671

raise util.Abort(_("received delta base is censored: %s") % e)

672

revisions += len(fl) - o

672

revisions += len(fl) - o

673

files += 1

673

files += 1

674

if f in needfiles:

674

if f in needfiles:

675

needs = needfiles[f]

675

needs = needfiles[f]

676

for new in xrange(o, len(fl)):

676

for new in xrange(o, len(fl)):

677

n = fl.node(new)

677

n = fl.node(new)

678

if n in needs:

678

if n in needs:

679

needs.remove(n)

679

needs.remove(n)

680

else:

680

else:

681

raise util.Abort(

681

raise util.Abort(

682

_("received spurious file revlog entry"))

682

_("received spurious file revlog entry"))

683

if not needs:

683

if not needs:

684

del needfiles[f]

684

del needfiles[f]

685

repo.ui.progress(_('files'), None)

685

repo.ui.progress(_('files'), None)

686

687

for f, needs in needfiles.iteritems():

687

for f, needs in needfiles.iteritems():

688

fl = repo.file(f)

688

fl = repo.file(f)

689

for n in needs:

689

for n in needs:

690

try:

690

try:

691

fl.rev(n)

691

fl.rev(n)

692

except error.LookupError:

692

except error.LookupError:

693

raise util.Abort(

693

raise util.Abort(

694

_('missing file data for %s:%s - run hg verify') %

694

_('missing file data for %s:%s - run hg verify') %

695

(f, hex(n)))

695

(f, hex(n)))

696

697

return revisions, files

697

return revisions, files

698

699

def addchangegroup(repo, source, srctype, url, emptyok=False,

699

def addchangegroup(repo, source, srctype, url, emptyok=False,

700

targetphase=phases.draft, expectedtotal=None):

700

targetphase=phases.draft, expectedtotal=None):

701

"""Add the changegroup returned by source.read() to this repo.

701

"""Add the changegroup returned by source.read() to this repo.

702

srctype is a string like 'push', 'pull', or 'unbundle'. url is

702

srctype is a string like 'push', 'pull', or 'unbundle'. url is

703

the URL of the repo where this changegroup is coming from.

703

the URL of the repo where this changegroup is coming from.

704

705

Return an integer summarizing the change to this repo:

705

Return an integer summarizing the change to this repo:

706

- nothing changed or no source: 0

706

- nothing changed or no source: 0

707

- more heads than before: 1+added heads (2..n)

707

- more heads than before: 1+added heads (2..n)

708

- fewer heads than before: -1-removed heads (-2..-n)

708

- fewer heads than before: -1-removed heads (-2..-n)

709

- number of heads stays the same: 1

709

- number of heads stays the same: 1

710

"""

710

"""

711

repo = repo.unfiltered()

711

repo = repo.unfiltered()

712

def csmap(x):

712

def csmap(x):

713

repo.ui.debug("add changeset %s\n" % short(x))

713

repo.ui.debug("add changeset %s\n" % short(x))

714

return len(cl)

714

return len(cl)

715

716

def revmap(x):

716

def revmap(x):

717

return cl.rev(x)

717

return cl.rev(x)

718

719

if not source:

719

if not source:

720

return 0

720

return 0

721

722

changesets = files = revisions = 0

722

changesets = files = revisions = 0

723

efiles = set()

724

723

725

tr = repo.transaction("\n".join([srctype, util.hidepassword(url)]))

724

tr = repo.transaction("\n".join([srctype, util.hidepassword(url)]))

726

# The transaction could have been created before and already carries source

725

# The transaction could have been created before and already carries source

727

# information. In this case we use the top level data. We overwrite the

726

# information. In this case we use the top level data. We overwrite the

728

# argument because we need to use the top level value (if they exist) in

727

# argument because we need to use the top level value (if they exist) in

729

# this function.

728

# this function.

730

srctype = tr.hookargs.setdefault('source', srctype)

729

srctype = tr.hookargs.setdefault('source', srctype)

731

url = tr.hookargs.setdefault('url', url)

730

url = tr.hookargs.setdefault('url', url)

732

731

733

# write changelog data to temp files so concurrent readers will not see

732

# write changelog data to temp files so concurrent readers will not see

734

# inconsistent view

733

# inconsistent view

735

cl = repo.changelog

734

cl = repo.changelog

736

cl.delayupdate(tr)

735

cl.delayupdate(tr)

737

oldheads = cl.heads()

736

oldheads = cl.heads()

738

try:

737

try:

739

repo.hook('prechangegroup', throw=True, **tr.hookargs)

738

repo.hook('prechangegroup', throw=True, **tr.hookargs)

740

739

741

trp = weakref.proxy(tr)

740

trp = weakref.proxy(tr)

742

# pull off the changeset group

741

# pull off the changeset group

743

repo.ui.status(_("adding changesets\n"))

742

repo.ui.status(_("adding changesets\n"))

744

clstart = len(cl)

743

clstart = len(cl)

745

class prog(object):

744

class prog(object):

746

def __init__(self, step, total):

745

def __init__(self, step, total):

747

self._step = step

746

self._step = step

748

self._total = total

747

self._total = total

749

self._count = 1

748

self._count = 1

750

def __call__(self):

749

def __call__(self):

751

repo.ui.progress(self._step, self._count, unit=_('chunks'),

750

repo.ui.progress(self._step, self._count, unit=_('chunks'),

752

total=self._total)

751

total=self._total)

753

self._count += 1

752

self._count += 1

754

source.callback = prog(_('changesets'), expectedtotal)

753

source.callback = prog(_('changesets'), expectedtotal)

755

754

755

efiles = set()

756

def onchangelog(cl, node):

757

efiles.update(cl.read(node)[3])

758

756

source.changelogheader()

759

source.changelogheader()

757

srccontent = cl.addgroup(source, csmap, trp)

760

srccontent = cl.addgroup(source, csmap, trp,

761

addrevisioncb=onchangelog)

762

efiles = len(efiles)

763

758

if not (srccontent or emptyok):

764

if not (srccontent or emptyok):

759

raise util.Abort(_("received changelog group is empty"))

765

raise util.Abort(_("received changelog group is empty"))

760

clend = len(cl)

766

clend = len(cl)

761

changesets = clend - clstart

767

changesets = clend - clstart

762

for c in xrange(clstart, clend):

763

efiles.update(repo[c].files())

764

efiles = len(efiles)

765

repo.ui.progress(_('changesets'), None)

768

repo.ui.progress(_('changesets'), None)

766

769

767

# pull off the manifest group

770

# pull off the manifest group

768

repo.ui.status(_("adding manifests\n"))

771

repo.ui.status(_("adding manifests\n"))

769

# manifests <= changesets

772

# manifests <= changesets

770

source.callback = prog(_('manifests'), changesets)

773

source.callback = prog(_('manifests'), changesets)

771

# no need to check for empty manifest group here:

774

# no need to check for empty manifest group here:

772

# if the result of the merge of 1 and 2 is the same in 3 and 4,

775

# if the result of the merge of 1 and 2 is the same in 3 and 4,

773

# no new manifest will be created and the manifest group will

776

# no new manifest will be created and the manifest group will

774

# be empty during the pull

777

# be empty during the pull

775

source.manifestheader()

778

source.manifestheader()

776

repo.manifest.addgroup(source, revmap, trp)

779

repo.manifest.addgroup(source, revmap, trp)

777

repo.ui.progress(_('manifests'), None)

780

repo.ui.progress(_('manifests'), None)

778

781

779

needfiles = {}

782

needfiles = {}

780

if repo.ui.configbool('server', 'validate', default=False):

783

if repo.ui.configbool('server', 'validate', default=False):

781

# validate incoming csets have their manifests

784

# validate incoming csets have their manifests

782

for cset in xrange(clstart, clend):

785

for cset in xrange(clstart, clend):

783

mfnode = repo.changelog.read(repo.changelog.node(cset))[0]

786

mfnode = repo.changelog.read(repo.changelog.node(cset))[0]

784

mfest = repo.manifest.readdelta(mfnode)

787

mfest = repo.manifest.readdelta(mfnode)

785

# store file nodes we must see

788

# store file nodes we must see

786

for f, n in mfest.iteritems():

789

for f, n in mfest.iteritems():

787

needfiles.setdefault(f, set()).add(n)

790

needfiles.setdefault(f, set()).add(n)

788

791

789

# process the files

792

# process the files

790

repo.ui.status(_("adding file changes\n"))

793

repo.ui.status(_("adding file changes\n"))

791

source.callback = None

794

source.callback = None

792

pr = prog(_('files'), efiles)

795

pr = prog(_('files'), efiles)

793

newrevs, newfiles = addchangegroupfiles(repo, source, revmap, trp, pr,

796

newrevs, newfiles = addchangegroupfiles(repo, source, revmap, trp, pr,

794

needfiles)

797

needfiles)

795

revisions += newrevs

798

revisions += newrevs

796

files += newfiles

799

files += newfiles

797

800

798

dh = 0

801

dh = 0

799

if oldheads:

802

if oldheads:

800

heads = cl.heads()

803

heads = cl.heads()

801

dh = len(heads) - len(oldheads)

804

dh = len(heads) - len(oldheads)

802

for h in heads:

805

for h in heads:

803

if h not in oldheads and repo[h].closesbranch():

806

if h not in oldheads and repo[h].closesbranch():

804

dh -= 1

807

dh -= 1

805

htext = ""

808

htext = ""

806

if dh:

809

if dh:

807

htext = _(" (%+d heads)") % dh

810

htext = _(" (%+d heads)") % dh

808

811

809

repo.ui.status(_("added %d changesets"

812

repo.ui.status(_("added %d changesets"

810

" with %d changes to %d files%s\n")

813

" with %d changes to %d files%s\n")

811

% (changesets, revisions, files, htext))

814

% (changesets, revisions, files, htext))

812

repo.invalidatevolatilesets()

815

repo.invalidatevolatilesets()

813

816

814

if changesets > 0:

817

if changesets > 0:

815

p = lambda: tr.writepending() and repo.root or ""

818

p = lambda: tr.writepending() and repo.root or ""

816

if 'node' not in tr.hookargs:

819

if 'node' not in tr.hookargs:

817

tr.hookargs['node'] = hex(cl.node(clstart))

820

tr.hookargs['node'] = hex(cl.node(clstart))

818

hookargs = dict(tr.hookargs)

821

hookargs = dict(tr.hookargs)

819

else:

822

else:

820

hookargs = dict(tr.hookargs)

823

hookargs = dict(tr.hookargs)

821

hookargs['node'] = hex(cl.node(clstart))

824

hookargs['node'] = hex(cl.node(clstart))

822

repo.hook('pretxnchangegroup', throw=True, pending=p, **hookargs)

825

repo.hook('pretxnchangegroup', throw=True, pending=p, **hookargs)

823

826

824

added = [cl.node(r) for r in xrange(clstart, clend)]

827

added = [cl.node(r) for r in xrange(clstart, clend)]

825

publishing = repo.publishing()

828

publishing = repo.publishing()

826

if srctype in ('push', 'serve'):

829

if srctype in ('push', 'serve'):

827

# Old servers can not push the boundary themselves.

830

# Old servers can not push the boundary themselves.

828

# New servers won't push the boundary if changeset already

831

# New servers won't push the boundary if changeset already

829

# exists locally as secret

832

# exists locally as secret

830

#

833

#

831

# We should not use added here but the list of all change in

834

# We should not use added here but the list of all change in

832

# the bundle

835

# the bundle

833

if publishing:

836

if publishing:

834

phases.advanceboundary(repo, tr, phases.public, srccontent)

837

phases.advanceboundary(repo, tr, phases.public, srccontent)

835

else:

838

else:

836

# Those changesets have been pushed from the outside, their

839

# Those changesets have been pushed from the outside, their

837

# phases are going to be pushed alongside. Therefor

840

# phases are going to be pushed alongside. Therefor

838

# `targetphase` is ignored.

841

# `targetphase` is ignored.

839

phases.advanceboundary(repo, tr, phases.draft, srccontent)

842

phases.advanceboundary(repo, tr, phases.draft, srccontent)

840

phases.retractboundary(repo, tr, phases.draft, added)

843

phases.retractboundary(repo, tr, phases.draft, added)

841

elif srctype != 'strip':

844

elif srctype != 'strip':

842

# publishing only alter behavior during push

845

# publishing only alter behavior during push

843

#

846

#

844

# strip should not touch boundary at all

847

# strip should not touch boundary at all

845

phases.retractboundary(repo, tr, targetphase, added)

848

phases.retractboundary(repo, tr, targetphase, added)

846

849

847

if changesets > 0:

850

if changesets > 0:

848

if srctype != 'strip':

851

if srctype != 'strip':

849

# During strip, branchcache is invalid but coming call to

852

# During strip, branchcache is invalid but coming call to

850

# `destroyed` will repair it.

853

# `destroyed` will repair it.

851

# In other case we can safely update cache on disk.

854

# In other case we can safely update cache on disk.

852

branchmap.updatecache(repo.filtered('served'))

855

branchmap.updatecache(repo.filtered('served'))

853

856

854

def runhooks():

857

def runhooks():

855

# These hooks run when the lock releases, not when the

858

# These hooks run when the lock releases, not when the

856

# transaction closes. So it's possible for the changelog

859

# transaction closes. So it's possible for the changelog

857

# to have changed since we last saw it.

860

# to have changed since we last saw it.

858

if clstart >= len(repo):

861

if clstart >= len(repo):

859

return

862

return

860

863

861

# forcefully update the on-disk branch cache

864

# forcefully update the on-disk branch cache

862

repo.ui.debug("updating the branch cache\n")

865

repo.ui.debug("updating the branch cache\n")

863

repo.hook("changegroup", **hookargs)

866

repo.hook("changegroup", **hookargs)

864

867

865

for n in added:

868

for n in added:

866

args = hookargs.copy()

869

args = hookargs.copy()

867

args['node'] = hex(n)

870

args['node'] = hex(n)

868

repo.hook("incoming", **args)

871

repo.hook("incoming", **args)

869

872

870

newheads = [h for h in repo.heads() if h not in oldheads]

873

newheads = [h for h in repo.heads() if h not in oldheads]

871

repo.ui.log("incoming",

874

repo.ui.log("incoming",

872

"%s incoming changes - new heads: %s\n",

875

"%s incoming changes - new heads: %s\n",

873

len(added),

876

len(added),

874

', '.join([hex(c[:6]) for c in newheads]))

877

', '.join([hex(c[:6]) for c in newheads]))

875

878

876

tr.addpostclose('changegroup-runhooks-%020i' % clstart,

879

tr.addpostclose('changegroup-runhooks-%020i' % clstart,

877

lambda tr: repo._afterlock(runhooks))

880

lambda tr: repo._afterlock(runhooks))

878

881

879

tr.close()

882

tr.close()

880

883

881

finally:

884

finally:

882

tr.release()

885

tr.release()

883

repo.ui.flush()

886

repo.ui.flush()

884

# never return 0 here:

887

# never return 0 here:

885

if dh < 0:

888

if dh < 0:

886

return dh - 1

889

return dh - 1

887

else:

890

else:

888

return dh + 1

891

return dh + 1

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # changegroup.py - Mercurial changegroup manipulation functions
             #
             #  Copyright 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import weakref
             from i18n import _
             from node import nullrev, nullid, hex, short
             import mdiff, util, dagutil
             import struct, os, bz2, zlib, tempfile
             import discovery, error, phases, branchmap
             _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
             _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
             def readexactly(stream, n):
                 '''read n bytes from stream.read and abort if less was available'''
                 s = stream.read(n)
                 if len(s) < n:
                     raise util.Abort(_("stream ended unexpectedly"
                                        " (got %d bytes, expected %d)")
                                       % (len(s), n))
                 return s
             def getchunk(stream):
                 """return the next chunk from stream as a string"""
                 d = readexactly(stream, 4)
                 l = struct.unpack(">l", d)[0]
                 if l <= 4:
                     if l:
                         raise util.Abort(_("invalid chunk length %d") % l)
                     return ""
                 return readexactly(stream, l - 4)
             def chunkheader(length):
                 """return a changegroup chunk header (string)"""
                 return struct.pack(">l", length + 4)
             def closechunk():
                 """return a changegroup chunk header (string) for a zero-length chunk"""
                 return struct.pack(">l", 0)
             def combineresults(results):
                 """logic to combine 0 or more addchangegroup results into one"""
                 changedheads = 0
                 result = 1
                 for ret in results:
                     # If any changegroup result is 0, return 0
                     if ret == 0:
                         result = 0
                         break
                     if ret < -1:
                         changedheads += ret + 1
                     elif ret > 1:
                         changedheads += ret - 1
                 if changedheads > 0:
                     result = 1 + changedheads
                 elif changedheads < 0:
                     result = -1 + changedheads
                 return result
             class nocompress(object):
                 def compress(self, x):
                     return x
                 def flush(self):
                     return ""
             bundletypes = {
                 "": ("", nocompress), # only when using unbundle on ssh and old http servers
                                       # since the unification ssh accepts a header but there
                                       # is no capability signaling it.
                 "HG20": (), # special-cased below
                 "HG10UN": ("HG10UN", nocompress),
                 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
                 "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
             }
             # hgweb uses this list to communicate its preferred type
             bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
             def writebundle(ui, cg, filename, bundletype, vfs=None):
                 """Write a bundle file and return its filename.
                 Existing files will not be overwritten.
                 If no filename is specified, a temporary file is created.
                 bz2 compression can be turned off.
                 The bundle file will be deleted in case of errors.
                 """
                 fh = None
                 cleanup = None
                 try:
                     if filename:
                         if vfs:
                             fh = vfs.open(filename, "wb")
                         else:
                             fh = open(filename, "wb")
                     else:
                         fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
                         fh = os.fdopen(fd, "wb")
                     cleanup = filename
                     if bundletype == "HG20":
                         import bundle2
                         bundle = bundle2.bundle20(ui)
                         part = bundle.newpart('changegroup', data=cg.getchunks())
                         part.addparam('version', cg.version)
                         z = nocompress()
                         chunkiter = bundle.getchunks()
                     else:
                         if cg.version != '01':
                             raise util.Abort(_('old bundle types only supports v1 '
                                                'changegroups'))
                         header, compressor = bundletypes[bundletype]
                         fh.write(header)
                         z = compressor()
                         chunkiter = cg.getchunks()
                     # parse the changegroup data, otherwise we will block
                     # in case of sshrepo because we don't know the end of the stream
                     # an empty chunkgroup is the end of the changegroup
                     # a changegroup has at least 2 chunkgroups (changelog and manifest).
                     # after that, an empty chunkgroup is the end of the changegroup
                     for chunk in chunkiter:
                         fh.write(z.compress(chunk))
                     fh.write(z.flush())
                     cleanup = None
                     return filename
                 finally:
                     if fh is not None:
                         fh.close()
                     if cleanup is not None:
                         if filename and vfs:
                             vfs.unlink(cleanup)
                         else:
                             os.unlink(cleanup)
             def decompressor(fh, alg):
                 if alg == 'UN':
                     return fh
                 elif alg == 'GZ':
                     def generator(f):
                         zd = zlib.decompressobj()
                         for chunk in util.filechunkiter(f):
                             yield zd.decompress(chunk)
                 elif alg == 'BZ':
                     def generator(f):
                         zd = bz2.BZ2Decompressor()
                         zd.decompress("BZ")
                         for chunk in util.filechunkiter(f, 4096):
                             yield zd.decompress(chunk)
                 else:
                     raise util.Abort("unknown bundle compression '%s'" % alg)
                 return util.chunkbuffer(generator(fh))
             class cg1unpacker(object):
                 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
                 deltaheadersize = struct.calcsize(deltaheader)
                 version = '01'
                 def __init__(self, fh, alg):
                     self._stream = decompressor(fh, alg)
                     self._type = alg
                     self.callback = None
                 def compressed(self):
                     return self._type != 'UN'
                 def read(self, l):
                     return self._stream.read(l)
                 def seek(self, pos):
                     return self._stream.seek(pos)
                 def tell(self):
                     return self._stream.tell()
                 def close(self):
                     return self._stream.close()
                 def chunklength(self):
                     d = readexactly(self._stream, 4)
                     l = struct.unpack(">l", d)[0]
                     if l <= 4:
                         if l:
                             raise util.Abort(_("invalid chunk length %d") % l)
                         return 0
                     if self.callback:
                         self.callback()
                     return l - 4
                 def changelogheader(self):
                     """v10 does not have a changelog header chunk"""
                     return {}
                 def manifestheader(self):
                     """v10 does not have a manifest header chunk"""
                     return {}
                 def filelogheader(self):
                     """return the header of the filelogs chunk, v10 only has the filename"""
                     l = self.chunklength()
                     if not l:
                         return {}
                     fname = readexactly(self._stream, l)
                     return {'filename': fname}
                 def _deltaheader(self, headertuple, prevnode):
                     node, p1, p2, cs = headertuple
                     if prevnode is None:
                         deltabase = p1
                     else:
                         deltabase = prevnode
                     return node, p1, p2, deltabase, cs
                 def deltachunk(self, prevnode):
                     l = self.chunklength()
                     if not l:
                         return {}
                     headerdata = readexactly(self._stream, self.deltaheadersize)
                     header = struct.unpack(self.deltaheader, headerdata)
                     delta = readexactly(self._stream, l - self.deltaheadersize)
                     node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)
                     return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,
                             'deltabase': deltabase, 'delta': delta}
                 def getchunks(self):
                     """returns all the chunks contains in the bundle
                     Used when you need to forward the binary stream to a file or another
                     network API. To do so, it parse the changegroup data, otherwise it will
                     block in case of sshrepo because it don't know the end of the stream.
                     """
                     # an empty chunkgroup is the end of the changegroup
                     # a changegroup has at least 2 chunkgroups (changelog and manifest).
                     # after that, an empty chunkgroup is the end of the changegroup
                     empty = False
                     count = 0
                     while not empty or count <= 2:
                         empty = True
                         count += 1
                         while True:
                             chunk = getchunk(self)
                             if not chunk:
                                 break
                             empty = False
                             yield chunkheader(len(chunk))
                             pos = 0
                             while pos < len(chunk):
                                 next = pos + 2**20
                                 yield chunk[pos:next]
                                 pos = next
                         yield closechunk()
             class cg2unpacker(cg1unpacker):
                 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
                 deltaheadersize = struct.calcsize(deltaheader)
                 version = '02'
                 def _deltaheader(self, headertuple, prevnode):
                     node, p1, p2, deltabase, cs = headertuple
                     return node, p1, p2, deltabase, cs
             class headerlessfixup(object):
                 def __init__(self, fh, h):
                     self._h = h
                     self._fh = fh
                 def read(self, n):
                     if self._h:
                         d, self._h = self._h[:n], self._h[n:]
                         if len(d) < n:
                             d += readexactly(self._fh, n - len(d))
                         return d
                     return readexactly(self._fh, n)
             class cg1packer(object):
                 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
                 version = '01'
                 def __init__(self, repo, bundlecaps=None):
                     """Given a source repo, construct a bundler.
                     bundlecaps is optional and can be used to specify the set of
                     capabilities which can be used to build the bundle.
                     """
                     # Set of capabilities we can use to build the bundle.
                     if bundlecaps is None:
                         bundlecaps = set()
                     self._bundlecaps = bundlecaps
                     reorder = repo.ui.config('bundle', 'reorder', 'auto')
                     if reorder == 'auto':
                         reorder = None
                     else:
                         reorder = util.parsebool(reorder)
                     self._repo = repo
                     self._reorder = reorder
                     self._progress = repo.ui.progress
                     if self._repo.ui.verbose and not self._repo.ui.debugflag:
                         self._verbosenote = self._repo.ui.note
                     else:
                         self._verbosenote = lambda s: None
                 def close(self):
                     return closechunk()
                 def fileheader(self, fname):
                     return chunkheader(len(fname)) + fname
                 def group(self, nodelist, revlog, lookup, units=None):
                     """Calculate a delta group, yielding a sequence of changegroup chunks
                     (strings).
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. The first delta is
                     first parent(nodelist[0]) -> nodelist[0], the receiver is
                     guaranteed to have this parent as it has all history before
                     these changesets. In the case firstparent is nullrev the
                     changegroup starts with a full revision.
                     If units is not None, progress detail will be generated, units specifies
                     the type of revlog that is touched (changelog, manifest, etc.).
                     """
                     # if we don't have any revisions touched by these changesets, bail
                     if len(nodelist) == 0:
                         yield self.close()
                         return
                     # for generaldelta revlogs, we linearize the revs; this will both be
                     # much quicker and generate a much smaller bundle
                     if (revlog._generaldelta and self._reorder is None) or self._reorder:
                         dag = dagutil.revlogdag(revlog)
                         revs = set(revlog.rev(n) for n in nodelist)
                         revs = dag.linearize(revs)
                     else:
                         revs = sorted([revlog.rev(n) for n in nodelist])
                     # add the parent of the first rev
                     p = revlog.parentrevs(revs[0])[0]
                     revs.insert(0, p)
                     # build deltas
                     total = len(revs) - 1
                     msgbundling = _('bundling')
                     for r in xrange(len(revs) - 1):
                         if units is not None:
                             self._progress(msgbundling, r + 1, unit=units, total=total)
                         prev, curr = revs[r], revs[r + 1]
                         linknode = lookup(revlog.node(curr))
                         for c in self.revchunk(revlog, curr, prev, linknode):
                             yield c
                     if units is not None:
                         self._progress(msgbundling, None)
                     yield self.close()
                 # filter any nodes that claim to be part of the known set
                 def prune(self, revlog, missing, commonrevs):
                     rr, rl = revlog.rev, revlog.linkrev
                     return [n for n in missing if rl(rr(n)) not in commonrevs]
                 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
                     '''yield a sequence of changegroup chunks (strings)'''
                     repo = self._repo
                     cl = repo.changelog
                     ml = repo.manifest
                     clrevorder = {}
                     mfs = {} # needed manifests
                     fnodes = {} # needed file nodes
                     changedfiles = set()
                     # Callback for the changelog, used to collect changed files and manifest
                     # nodes.
                     # Returns the linkrev node (identity in the changelog case).
                     def lookupcl(x):
                         c = cl.read(x)
                         clrevorder[x] = len(clrevorder)
                         changedfiles.update(c[3])
                         # record the first changeset introducing this manifest version
                         mfs.setdefault(c[0], x)
                         return x
                     self._verbosenote(_('uncompressed size of bundle content:\n'))
                     size = 0
                     for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
                         size += len(chunk)
                         yield chunk
                     self._verbosenote(_('%8.i (changelog)\n') % size)
                     # We need to make sure that the linkrev in the changegroup refers to
                     # the first changeset that introduced the manifest or file revision.
                     # The fastpath is usually safer than the slowpath, because the filelogs
                     # are walked in revlog order.
                     #
                     # When taking the slowpath with reorder=None and the manifest revlog
                     # uses generaldelta, the manifest may be walked in the "wrong" order.
                     # Without 'clrevorder', we would get an incorrect linkrev (see fix in
                     # cc0ff93d0c0c).
                     #
                     # When taking the fastpath, we are only vulnerable to reordering
                     # of the changelog itself. The changelog never uses generaldelta, so
                     # it is only reordered when reorder=True. To handle this case, we
                     # simply take the slowpath, which already has the 'clrevorder' logic.
                     # This was also fixed in cc0ff93d0c0c.
                     fastpathlinkrev = fastpathlinkrev and not self._reorder
                     # Callback for the manifest, used to collect linkrevs for filelog
                     # revisions.
                     # Returns the linkrev node (collected in lookupcl).
                     def lookupmf(x):
                         clnode = mfs[x]
                         if not fastpathlinkrev:
                             mdata = ml.readfast(x)
                             for f, n in mdata.iteritems():
                                 if f in changedfiles:
                                     # record the first changeset introducing this filelog
                                     # version
                                     fclnodes = fnodes.setdefault(f, {})
                                     fclnode = fclnodes.setdefault(n, clnode)
                                     if clrevorder[clnode] < clrevorder[fclnode]:
                                         fclnodes[n] = clnode
                         return clnode
                     mfnodes = self.prune(ml, mfs, commonrevs)
                     size = 0
                     for chunk in self.group(mfnodes, ml, lookupmf, units=_('manifests')):
                         size += len(chunk)
                         yield chunk
                     self._verbosenote(_('%8.i (manifests)\n') % size)
                     mfs.clear()
                     clrevs = set(cl.rev(x) for x in clnodes)
                     def linknodes(filerevlog, fname):
                         if fastpathlinkrev:
                             llr = filerevlog.linkrev
                             def genfilenodes():
                                 for r in filerevlog:
                                     linkrev = llr(r)
                                     if linkrev in clrevs:
                                         yield filerevlog.node(r), cl.node(linkrev)
                             return dict(genfilenodes())
                         return fnodes.get(fname, {})
                     for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
                                                     source):
                         yield chunk
                     yield self.close()
                     if clnodes:
                         repo.hook('outgoing', node=hex(clnodes[0]), source=source)
                 # The 'source' parameter is useful for extensions
                 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
                     repo = self._repo
                     progress = self._progress
                     msgbundling = _('bundling')
                     total = len(changedfiles)
                     # for progress output
                     msgfiles = _('files')
                     for i, fname in enumerate(sorted(changedfiles)):
                         filerevlog = repo.file(fname)
                         if not filerevlog:
                             raise util.Abort(_("empty or missing revlog for %s") % fname)
                         linkrevnodes = linknodes(filerevlog, fname)
                         # Lookup for filenodes, we collected the linkrev nodes above in the
                         # fastpath case and with lookupmf in the slowpath case.
                         def lookupfilelog(x):
                             return linkrevnodes[x]
                         filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
                         if filenodes:
                             progress(msgbundling, i + 1, item=fname, unit=msgfiles,
                                      total=total)
                             h = self.fileheader(fname)
                             size = len(h)
                             yield h
                             for chunk in self.group(filenodes, filerevlog, lookupfilelog):
                                 size += len(chunk)
                                 yield chunk
                             self._verbosenote(_('%8.i  %s\n') % (size, fname))
                     progress(msgbundling, None)
                 def deltaparent(self, revlog, rev, p1, p2, prev):
                     return prev
                 def revchunk(self, revlog, rev, prev, linknode):
                     node = revlog.node(rev)
                     p1, p2 = revlog.parentrevs(rev)
                     base = self.deltaparent(revlog, rev, p1, p2, prev)
                     prefix = ''
                     if revlog.iscensored(base) or revlog.iscensored(rev):
                         try:
                             delta = revlog.revision(node)
                         except error.CensoredNodeError as e:
                             delta = e.tombstone
                         if base == nullrev:
                             prefix = mdiff.trivialdiffheader(len(delta))
                         else:
                             baselen = revlog.rawsize(base)
                             prefix = mdiff.replacediffheader(baselen, len(delta))
                     elif base == nullrev:
                         delta = revlog.revision(node)
                         prefix = mdiff.trivialdiffheader(len(delta))
                     else:
                         delta = revlog.revdiff(base, rev)
                     p1n, p2n = revlog.parents(node)
                     basenode = revlog.node(base)
                     meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode)
                     meta += prefix
                     l = len(meta) + len(delta)
                     yield chunkheader(l)
                     yield meta
                     yield delta
                 def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
                     # do nothing with basenode, it is implicitly the previous one in HG10
                     return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
             class cg2packer(cg1packer):
                 version = '02'
                 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
                 def __init__(self, repo, bundlecaps=None):
                     super(cg2packer, self).__init__(repo, bundlecaps)
                     if self._reorder is None:
                         # Since generaldelta is directly supported by cg2, reordering
                         # generally doesn't help, so we disable it by default (treating
                         # bundle.reorder=auto just like bundle.reorder=False).
                         self._reorder = False
                 def deltaparent(self, revlog, rev, p1, p2, prev):
                     dp = revlog.deltaparent(rev)
                     # avoid storing full revisions; pick prev in those cases
                     # also pick prev when we can't be sure remote has dp
                     if dp == nullrev or (dp != p1 and dp != p2 and dp != prev):
                         return prev
                     return dp
                 def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
                     return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
             packermap = {'01': (cg1packer, cg1unpacker),
                          '02': (cg2packer, cg2unpacker)}
             def _changegroupinfo(repo, nodes, source):
                 if repo.ui.verbose or source == 'bundle':
                     repo.ui.status(_("%d changesets found\n") % len(nodes))
                 if repo.ui.debugflag:
                     repo.ui.debug("list of changesets:\n")
                     for node in nodes:
                         repo.ui.debug("%s\n" % hex(node))
             def getsubsetraw(repo, outgoing, bundler, source, fastpath=False):
                 repo = repo.unfiltered()
                 commonrevs = outgoing.common
                 csets = outgoing.missing
                 heads = outgoing.missingheads
                 # We go through the fast path if we get told to, or if all (unfiltered
                 # heads have been requested (since we then know there all linkrevs will
                 # be pulled by the client).
                 heads.sort()
                 fastpathlinkrev = fastpath or (
                         repo.filtername is None and heads == sorted(repo.heads()))
                 repo.hook('preoutgoing', throw=True, source=source)
                 _changegroupinfo(repo, csets, source)
                 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
             def getsubset(repo, outgoing, bundler, source, fastpath=False, version='01'):
                 gengroup = getsubsetraw(repo, outgoing, bundler, source, fastpath)
                 return packermap[version][1](util.chunkbuffer(gengroup), 'UN')
             def changegroupsubset(repo, roots, heads, source, version='01'):
                 """Compute a changegroup consisting of all the nodes that are
                 descendants of any of the roots and ancestors of any of the heads.
                 Return a chunkbuffer object whose read() method will return
                 successive changegroup chunks.
                 It is fairly complex as determining which filenodes and which
                 manifest nodes need to be included for the changeset to be complete
                 is non-trivial.
                 Another wrinkle is doing the reverse, figuring out which changeset in
                 the changegroup a particular filenode or manifestnode belongs to.
                 """
                 cl = repo.changelog
                 if not roots:
                     roots = [nullid]
                 discbases = []
                 for n in roots:
                     discbases.extend([p for p in cl.parents(n) if p != nullid])
                 # TODO: remove call to nodesbetween.
                 csets, roots, heads = cl.nodesbetween(roots, heads)
                 included = set(csets)
                 discbases = [n for n in discbases if n not in included]
                 outgoing = discovery.outgoing(cl, discbases, heads)
                 bundler = packermap[version][0](repo)
                 return getsubset(repo, outgoing, bundler, source, version=version)
             def getlocalchangegroupraw(repo, source, outgoing, bundlecaps=None,
                                        version='01'):
                 """Like getbundle, but taking a discovery.outgoing as an argument.
                 This is only implemented for local repos and reuses potentially
                 precomputed sets in outgoing. Returns a raw changegroup generator."""
                 if not outgoing.missing:
                     return None
                 bundler = packermap[version][0](repo, bundlecaps)
                 return getsubsetraw(repo, outgoing, bundler, source)
             def getlocalchangegroup(repo, source, outgoing, bundlecaps=None):
                 """Like getbundle, but taking a discovery.outgoing as an argument.
                 This is only implemented for local repos and reuses potentially
                 precomputed sets in outgoing."""
                 if not outgoing.missing:
                     return None
                 bundler = cg1packer(repo, bundlecaps)
                 return getsubset(repo, outgoing, bundler, source)
             def computeoutgoing(repo, heads, common):
                 """Computes which revs are outgoing given a set of common
                 and a set of heads.
                 This is a separate function so extensions can have access to
                 the logic.
                 Returns a discovery.outgoing object.
                 """
                 cl = repo.changelog
                 if common:
                     hasnode = cl.hasnode
                     common = [n for n in common if hasnode(n)]
                 else:
                     common = [nullid]
                 if not heads:
                     heads = cl.heads()
                 return discovery.outgoing(cl, common, heads)
             def getchangegroup(repo, source, heads=None, common=None, bundlecaps=None):
                 """Like changegroupsubset, but returns the set difference between the
                 ancestors of heads and the ancestors common.
                 If heads is None, use the local heads. If common is None, use [nullid].
                 The nodes in common might not all be known locally due to the way the
                 current discovery protocol works.
                 """
                 outgoing = computeoutgoing(repo, heads, common)
                 return getlocalchangegroup(repo, source, outgoing, bundlecaps=bundlecaps)
             def changegroup(repo, basenodes, source):
                 # to avoid a race we use changegroupsubset() (issue1320)
                 return changegroupsubset(repo, basenodes, repo.heads(), source)
             def addchangegroupfiles(repo, source, revmap, trp, pr, needfiles):
                 revisions = 0
                 files = 0
                 while True:
                     chunkdata = source.filelogheader()
                     if not chunkdata:
                         break
                     f = chunkdata["filename"]
                     repo.ui.debug("adding %s revisions\n" % f)
                     pr()
                     fl = repo.file(f)
                     o = len(fl)
                     try:
                         if not fl.addgroup(source, revmap, trp):
                             raise util.Abort(_("received file revlog group is empty"))
                     except error.CensoredBaseError as e:
                         raise util.Abort(_("received delta base is censored: %s") % e)
                     revisions += len(fl) - o
                     files += 1
                     if f in needfiles:
                         needs = needfiles[f]
                         for new in xrange(o, len(fl)):
                             n = fl.node(new)
                             if n in needs:
                                 needs.remove(n)
                             else:
                                 raise util.Abort(
                                     _("received spurious file revlog entry"))
                         if not needs:
                             del needfiles[f]
                 repo.ui.progress(_('files'), None)
                 for f, needs in needfiles.iteritems():
                     fl = repo.file(f)
                     for n in needs:
                         try:
                             fl.rev(n)
                         except error.LookupError:
                             raise util.Abort(
                                 _('missing file data for %s:%s - run hg verify') %
                                 (f, hex(n)))
                 return revisions, files
             def addchangegroup(repo, source, srctype, url, emptyok=False,
                                targetphase=phases.draft, expectedtotal=None):
                 """Add the changegroup returned by source.read() to this repo.
                 srctype is a string like 'push', 'pull', or 'unbundle'.  url is
                 the URL of the repo where this changegroup is coming from.
                 Return an integer summarizing the change to this repo:
                 - nothing changed or no source: 0
                 - more heads than before: 1+added heads (2..n)
                 - fewer heads than before: -1-removed heads (-2..-n)
                 - number of heads stays the same: 1
                 """
                 repo = repo.unfiltered()
                 def csmap(x):
                     repo.ui.debug("add changeset %s\n" % short(x))
                     return len(cl)
                 def revmap(x):
                     return cl.rev(x)
                 if not source:
                     return 0
                 changesets = files = revisions = 0
-                efiles = set()
                 tr = repo.transaction("\n".join([srctype, util.hidepassword(url)]))
                 # The transaction could have been created before and already carries source
                 # information. In this case we use the top level data. We overwrite the
                 # argument because we need to use the top level value (if they exist) in
                 # this function.
                 srctype = tr.hookargs.setdefault('source', srctype)
                 url = tr.hookargs.setdefault('url', url)
                 # write changelog data to temp files so concurrent readers will not see
                 # inconsistent view
                 cl = repo.changelog
                 cl.delayupdate(tr)
                 oldheads = cl.heads()
                 try:
                     repo.hook('prechangegroup', throw=True, **tr.hookargs)
                     trp = weakref.proxy(tr)
                     # pull off the changeset group
                     repo.ui.status(_("adding changesets\n"))
                     clstart = len(cl)
                     class prog(object):
                         def __init__(self, step, total):
                             self._step = step
                             self._total = total
                             self._count = 1
                         def __call__(self):
                             repo.ui.progress(self._step, self._count, unit=_('chunks'),
                                              total=self._total)
                             self._count += 1
                     source.callback = prog(_('changesets'), expectedtotal)
+                    efiles = set()
+                    def onchangelog(cl, node):
+                        efiles.update(cl.read(node)[3])
                     source.changelogheader()
-                    srccontent = cl.addgroup(source, csmap, trp)
+                    srccontent = cl.addgroup(source, csmap, trp,
+                                             addrevisioncb=onchangelog)
+                    efiles = len(efiles)
                     if not (srccontent or emptyok):
                         raise util.Abort(_("received changelog group is empty"))
                     clend = len(cl)
                     changesets = clend - clstart
-                    for c in xrange(clstart, clend):
-                        efiles.update(repo[c].files())
-                    efiles = len(efiles)
                     repo.ui.progress(_('changesets'), None)
                     # pull off the manifest group
                     repo.ui.status(_("adding manifests\n"))
                     # manifests <= changesets
                     source.callback = prog(_('manifests'), changesets)
                     # no need to check for empty manifest group here:
                     # if the result of the merge of 1 and 2 is the same in 3 and 4,
                     # no new manifest will be created and the manifest group will
                     # be empty during the pull
                     source.manifestheader()
                     repo.manifest.addgroup(source, revmap, trp)
                     repo.ui.progress(_('manifests'), None)
                     needfiles = {}
                     if repo.ui.configbool('server', 'validate', default=False):
                         # validate incoming csets have their manifests
                         for cset in xrange(clstart, clend):
                             mfnode = repo.changelog.read(repo.changelog.node(cset))[0]
                             mfest = repo.manifest.readdelta(mfnode)
                             # store file nodes we must see
                             for f, n in mfest.iteritems():
                                 needfiles.setdefault(f, set()).add(n)
                     # process the files
                     repo.ui.status(_("adding file changes\n"))
                     source.callback = None
                     pr = prog(_('files'), efiles)
                     newrevs, newfiles = addchangegroupfiles(repo, source, revmap, trp, pr,
                                                             needfiles)
                     revisions += newrevs
                     files += newfiles
                     dh = 0
                     if oldheads:
                         heads = cl.heads()
                         dh = len(heads) - len(oldheads)
                         for h in heads:
                             if h not in oldheads and repo[h].closesbranch():
                                 dh -= 1
                     htext = ""
                     if dh:
                         htext = _(" (%+d heads)") % dh
                     repo.ui.status(_("added %d changesets"
                                      " with %d changes to %d files%s\n")
                                      % (changesets, revisions, files, htext))
                     repo.invalidatevolatilesets()
                     if changesets > 0:
                         p = lambda: tr.writepending() and repo.root or ""
                         if 'node' not in tr.hookargs:
                             tr.hookargs['node'] = hex(cl.node(clstart))
                             hookargs = dict(tr.hookargs)
                         else:
                             hookargs = dict(tr.hookargs)
                             hookargs['node'] = hex(cl.node(clstart))
                         repo.hook('pretxnchangegroup', throw=True, pending=p, **hookargs)
                     added = [cl.node(r) for r in xrange(clstart, clend)]
                     publishing = repo.publishing()
                     if srctype in ('push', 'serve'):
                         # Old servers can not push the boundary themselves.
                         # New servers won't push the boundary if changeset already
                         # exists locally as secret
                         #
                         # We should not use added here but the list of all change in
                         # the bundle
                         if publishing:
                             phases.advanceboundary(repo, tr, phases.public, srccontent)
                         else:
                             # Those changesets have been pushed from the outside, their
                             # phases are going to be pushed alongside. Therefor
                             # `targetphase` is ignored.
                             phases.advanceboundary(repo, tr, phases.draft, srccontent)
                             phases.retractboundary(repo, tr, phases.draft, added)
                     elif srctype != 'strip':
                         # publishing only alter behavior during push
                         #
                         # strip should not touch boundary at all
                         phases.retractboundary(repo, tr, targetphase, added)
                     if changesets > 0:
                         if srctype != 'strip':
                             # During strip, branchcache is invalid but coming call to
                             # `destroyed` will repair it.
                             # In other case we can safely update cache on disk.
                             branchmap.updatecache(repo.filtered('served'))
                         def runhooks():
                             # These hooks run when the lock releases, not when the
                             # transaction closes. So it's possible for the changelog
                             # to have changed since we last saw it.
                             if clstart >= len(repo):
                                 return
                             # forcefully update the on-disk branch cache
                             repo.ui.debug("updating the branch cache\n")
                             repo.hook("changegroup", **hookargs)
                             for n in added:
                                 args = hookargs.copy()
                                 args['node'] = hex(n)
                                 repo.hook("incoming", **args)
                             newheads = [h for h in repo.heads() if h not in oldheads]
                             repo.ui.log("incoming",
                                         "%s incoming changes - new heads: %s\n",
                                         len(added),
                                         ', '.join([hex(c[:6]) for c in newheads]))
                         tr.addpostclose('changegroup-runhooks-%020i' % clstart,
                                         lambda tr: repo._afterlock(runhooks))
                     tr.close()
                 finally:
                     tr.release()
                     repo.ui.flush()
                 # never return 0 here:
                 if dh < 0:
                     return dh - 1
                 else:
                     return dh + 1