upstream/mercurial-mirror Commit - r40209:b797150a

1

# exchangev2.py - repository exchange for wire protocol version 2

1

# exchangev2.py - repository exchange for wire protocol version 2

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import weakref

11

import weakref

12

13

from .i18n import _

13

from .i18n import _

14

from .node import (

14

from .node import (

15

nullid,

15

nullid,

16

short,

16

short,

17

)

17

)

18

from . import (

18

from . import (

19

bookmarks,

19

bookmarks,

20

error,

20

error,

21

mdiff,

21

mdiff,

22

phases,

22

phases,

23

pycompat,

23

pycompat,

24

setdiscovery,

24

setdiscovery,

25

)

25

)

26

27

def pull(pullop):

27

def pull(pullop):

28

"""Pull using wire protocol version 2."""

28

"""Pull using wire protocol version 2."""

29

repo = pullop.repo

29

repo = pullop.repo

30

remote = pullop.remote

30

remote = pullop.remote

31

tr = pullop.trmanager.transaction()

31

tr = pullop.trmanager.transaction()

32

33

# Figure out what needs to be fetched.

33

# Figure out what needs to be fetched.

34

common, fetch, remoteheads = _pullchangesetdiscovery(

34

common, fetch, remoteheads = _pullchangesetdiscovery(

35

repo, remote, pullop.heads, abortwhenunrelated=pullop.force)

35

repo, remote, pullop.heads, abortwhenunrelated=pullop.force)

36

37

# And fetch the data.

37

# And fetch the data.

38

pullheads = pullop.heads or remoteheads

38

pullheads = pullop.heads or remoteheads

39

csetres = _fetchchangesets(repo, tr, remote, common, fetch, pullheads)

39

csetres = _fetchchangesets(repo, tr, remote, common, fetch, pullheads)

40

41

# New revisions are written to the changelog. But all other updates

41

# New revisions are written to the changelog. But all other updates

42

# are deferred. Do those now.

42

# are deferred. Do those now.

43

44

# Ensure all new changesets are draft by default. If the repo is

44

# Ensure all new changesets are draft by default. If the repo is

45

# publishing, the phase will be adjusted by the loop below.

45

# publishing, the phase will be adjusted by the loop below.

46

if csetres['added']:

46

if csetres['added']:

47

phases.registernew(repo, tr, phases.draft, csetres['added'])

47

phases.registernew(repo, tr, phases.draft, csetres['added'])

48

49

# And adjust the phase of all changesets accordingly.

49

# And adjust the phase of all changesets accordingly.

50

for phase in phases.phasenames:

50

for phase in phases.phasenames:

51

if phase == b'secret' or not csetres['nodesbyphase'][phase]:

51

if phase == b'secret' or not csetres['nodesbyphase'][phase]:

52

continue

52

continue

53

54

phases.advanceboundary(repo, tr, phases.phasenames.index(phase),

54

phases.advanceboundary(repo, tr, phases.phasenames.index(phase),

55

csetres['nodesbyphase'][phase])

55

csetres['nodesbyphase'][phase])

56

57

# Write bookmark updates.

57

# Write bookmark updates.

58

bookmarks.updatefromremote(repo.ui, repo, csetres['bookmarks'],

58

bookmarks.updatefromremote(repo.ui, repo, csetres['bookmarks'],

59

remote.url(), pullop.gettransaction,

59

remote.url(), pullop.gettransaction,

60

explicit=pullop.explicitbookmarks)

60

explicit=pullop.explicitbookmarks)

61

62

manres = _fetchmanifests(repo, tr, remote, csetres['manifestnodes'])

62

manres = _fetchmanifests(repo, tr, remote, csetres['manifestnodes'])

63

64

# Find all file nodes referenced by added manifests and fetch those

64

# Find all file nodes referenced by added manifests and fetch those

65

# revisions.

65

# revisions.

66

fnodes = _derivefilesfrommanifests(repo, manres['added'])

66

fnodes = _derivefilesfrommanifests(repo, manres['added'])

67

_fetchfiles(repo, tr, remote, fnodes, manres['linkrevs'])

67

_fetchfiles(repo, tr, remote, fnodes, manres['linkrevs'])

68

69

def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):

69

def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):

70

"""Determine which changesets need to be pulled."""

70

"""Determine which changesets need to be pulled."""

71

72

if heads:

72

if heads:

73

knownnode = repo.changelog.hasnode

73

knownnode = repo.changelog.hasnode

74

if all(knownnode(head) for head in heads):

74

if all(knownnode(head) for head in heads):

75

return heads, False, heads

75

return heads, False, heads

76

77

# TODO wire protocol version 2 is capable of more efficient discovery

77

# TODO wire protocol version 2 is capable of more efficient discovery

78

# than setdiscovery. Consider implementing something better.

78

# than setdiscovery. Consider implementing something better.

79

common, fetch, remoteheads = setdiscovery.findcommonheads(

79

common, fetch, remoteheads = setdiscovery.findcommonheads(

80

repo.ui, repo, remote, abortwhenunrelated=abortwhenunrelated)

80

repo.ui, repo, remote, abortwhenunrelated=abortwhenunrelated)

81

82

common = set(common)

82

common = set(common)

83

remoteheads = set(remoteheads)

83

remoteheads = set(remoteheads)

84

85

# If a remote head is filtered locally, put it back in the common set.

85

# If a remote head is filtered locally, put it back in the common set.

86

# See the comment in exchange._pulldiscoverychangegroup() for more.

86

# See the comment in exchange._pulldiscoverychangegroup() for more.

87

88

if fetch and remoteheads:

88

if fetch and remoteheads:

89

nodemap = repo.unfiltered().changelog.nodemap

89

nodemap = repo.unfiltered().changelog.nodemap

90

91

common |= {head for head in remoteheads if head in nodemap}

91

common |= {head for head in remoteheads if head in nodemap}

92

93

if set(remoteheads).issubset(common):

93

if set(remoteheads).issubset(common):

94

fetch = []

94

fetch = []

95

96

common.discard(nullid)

96

common.discard(nullid)

97

98

return common, fetch, remoteheads

98

return common, fetch, remoteheads

99

100

def _fetchchangesets(repo, tr, remote, common, fetch, remoteheads):

100

def _fetchchangesets(repo, tr, remote, common, fetch, remoteheads):

101

# TODO consider adding a step here where we obtain the DAG shape first

101

# TODO consider adding a step here where we obtain the DAG shape first

102

# (or ask the server to slice changesets into chunks for us) so that

102

# (or ask the server to slice changesets into chunks for us) so that

103

# we can perform multiple fetches in batches. This will facilitate

103

# we can perform multiple fetches in batches. This will facilitate

104

# resuming interrupted clones, higher server-side cache hit rates due

104

# resuming interrupted clones, higher server-side cache hit rates due

105

# to smaller segments, etc.

105

# to smaller segments, etc.

106

with remote.commandexecutor() as e:

106

with remote.commandexecutor() as e:

107

objs = e.callcommand(b'changesetdata', {

107

objs = e.callcommand(b'changesetdata', {

108

b'noderange': [sorted(common), sorted(remoteheads)],

108

b'noderange': [sorted(common), sorted(remoteheads)],

109

b'fields': {b'bookmarks', b'parents', b'phase', b'revision'},

109

b'fields': {b'bookmarks', b'parents', b'phase', b'revision'},

110

}).result()

110

}).result()

111

112

# The context manager waits on all response data when exiting. So

112

# The context manager waits on all response data when exiting. So

113

# we need to remain in the context manager in order to stream data.

113

# we need to remain in the context manager in order to stream data.

114

return _processchangesetdata(repo, tr, objs)

114

return _processchangesetdata(repo, tr, objs)

115

116

def _processchangesetdata(repo, tr, objs):

116

def _processchangesetdata(repo, tr, objs):

117

repo.hook('prechangegroup', throw=True,

117

repo.hook('prechangegroup', throw=True,

118

**pycompat.strkwargs(tr.hookargs))

118

**pycompat.strkwargs(tr.hookargs))

119

120

urepo = repo.unfiltered()

120

urepo = repo.unfiltered()

121

cl = urepo.changelog

121

cl = urepo.changelog

122

123

cl.delayupdate(tr)

123

cl.delayupdate(tr)

124

125

# The first emitted object is a header describing the data that

125

# The first emitted object is a header describing the data that

126

# follows.

126

# follows.

127

meta = next(objs)

127

meta = next(objs)

128

129

progress = repo.ui.makeprogress(_('changesets'),

129

progress = repo.ui.makeprogress(_('changesets'),

130

unit=_('chunks'),

130

unit=_('chunks'),

131

total=meta.get(b'totalitems'))

131

total=meta.get(b'totalitems'))

132

133

manifestnodes = {}

133

manifestnodes = {}

134

135

def linkrev(node):

135

def linkrev(node):

136

repo.ui.debug('add changeset %s\n' % short(node))

136

repo.ui.debug('add changeset %s\n' % short(node))

137

# Linkrev for changelog is always self.

137

# Linkrev for changelog is always self.

138

return len(cl)

138

return len(cl)

139

140

def onchangeset(cl, node):

140

def onchangeset(cl, node):

141

progress.increment()

141

progress.increment()

142

143

revision = cl.changelogrevision(node)

143

revision = cl.changelogrevision(node)

144

145

# We need to preserve the mapping of changelog revision to node

145

# We need to preserve the mapping of changelog revision to node

146

# so we can set the linkrev accordingly when manifests are added.

146

# so we can set the linkrev accordingly when manifests are added.

147

manifestnodes[cl.rev(node)] = revision.manifest

147

manifestnodes[cl.rev(node)] = revision.manifest

148

149

nodesbyphase = {phase: set() for phase in phases.phasenames}

149

nodesbyphase = {phase: set() for phase in phases.phasenames}

150

remotebookmarks = {}

150

remotebookmarks = {}

151

152

# addgroup() expects a 7-tuple describing revisions. This normalizes

152

# addgroup() expects a 7-tuple describing revisions. This normalizes

153

# the wire data to that format.

153

# the wire data to that format.

154

#

154

#

155

# This loop also aggregates non-revision metadata, such as phase

155

# This loop also aggregates non-revision metadata, such as phase

156

# data.

156

# data.

157

def iterrevisions():

157

def iterrevisions():

158

for cset in objs:

158

for cset in objs:

159

node = cset[b'node']

159

node = cset[b'node']

160

161

if b'phase' in cset:

161

if b'phase' in cset:

162

nodesbyphase[cset[b'phase']].add(node)

162

nodesbyphase[cset[b'phase']].add(node)

163

164

for mark in cset.get(b'bookmarks', []):

164

for mark in cset.get(b'bookmarks', []):

165

remotebookmarks[mark] = node

165

remotebookmarks[mark] = node

166

167

# TODO add mechanism for extensions to examine records so they

167

# TODO add mechanism for extensions to examine records so they

168

# can siphon off custom data fields.

168

# can siphon off custom data fields.

169

170

extrafields = {}

170

extrafields = {}

171

172

for field, size in cset.get(b'fieldsfollowing', []):

172

for field, size in cset.get(b'fieldsfollowing', []):

173

extrafields[field] = next(objs)

173

extrafields[field] = next(objs)

174

175

# Some entries might only be metadata only updates.

175

# Some entries might only be metadata only updates.

176

if b'revision' not in extrafields:

176

if b'revision' not in extrafields:

177

continue

177

continue

178

179

data = extrafields[b'revision']

179

data = extrafields[b'revision']

180

181

yield (

181

yield (

182

node,

182

node,

183

cset[b'parents'][0],

183

cset[b'parents'][0],

184

cset[b'parents'][1],

184

cset[b'parents'][1],

185

# Linknode is always itself for changesets.

185

# Linknode is always itself for changesets.

186

cset[b'node'],

186

cset[b'node'],

187

# We always send full revisions. So delta base is not set.

187

# We always send full revisions. So delta base is not set.

188

nullid,

188

nullid,

189

mdiff.trivialdiffheader(len(data)) + data,

189

mdiff.trivialdiffheader(len(data)) + data,

190

# Flags not yet supported.

190

# Flags not yet supported.

191

0,

191

0,

192

)

192

)

193

194

added = cl.addgroup(iterrevisions(), linkrev, weakref.proxy(tr),

194

added = cl.addgroup(iterrevisions(), linkrev, weakref.proxy(tr),

195

addrevisioncb=onchangeset)

195

addrevisioncb=onchangeset)

196

197

progress.complete()

197

progress.complete()

198

199

return {

199

return {

200

'added': added,

200

'added': added,

201

'nodesbyphase': nodesbyphase,

201

'nodesbyphase': nodesbyphase,

202

'bookmarks': remotebookmarks,

202

'bookmarks': remotebookmarks,

203

'manifestnodes': manifestnodes,

203

'manifestnodes': manifestnodes,

204

}

204

}

205

206

def _fetchmanifests(repo, tr, remote, manifestnodes):

206

def _fetchmanifests(repo, tr, remote, manifestnodes):

207

rootmanifest = repo.manifestlog.getstorage(b'')

207

rootmanifest = repo.manifestlog.getstorage(b'')

208

209

# Some manifests can be shared between changesets. Filter out revisions

209

# Some manifests can be shared between changesets. Filter out revisions

210

# we already know about.

210

# we already know about.

211

fetchnodes = []

211

fetchnodes = []

212

linkrevs = {}

212

linkrevs = {}

213

seen = set()

213

seen = set()

214

215

for clrev, node in sorted(manifestnodes.iteritems()):

215

for clrev, node in sorted(manifestnodes.iteritems()):

216

if node in seen:

216

if node in seen:

217

continue

217

continue

218

219

try:

219

try:

220

rootmanifest.rev(node)

220

rootmanifest.rev(node)

221

except error.LookupError:

221

except error.LookupError:

222

fetchnodes.append(node)

222

fetchnodes.append(node)

223

linkrevs[node] = clrev

223

linkrevs[node] = clrev

224

225

seen.add(node)

225

seen.add(node)

226

227

# TODO handle tree manifests

227

# TODO handle tree manifests

228

229

# addgroup() expects 7-tuple describing revisions. This normalizes

229

# addgroup() expects 7-tuple describing revisions. This normalizes

230

# the wire data to that format.

230

# the wire data to that format.

231

def iterrevisions(objs, progress):

231

def iterrevisions(objs, progress):

232

for manifest in objs:

232

for manifest in objs:

233

node = manifest[b'node']

233

node = manifest[b'node']

234

235

extrafields = {}

235

extrafields = {}

236

237

for field, size in manifest.get(b'fieldsfollowing', []):

237

for field, size in manifest.get(b'fieldsfollowing', []):

238

extrafields[field] = next(objs)

238

extrafields[field] = next(objs)

239

240

if b'delta' in extrafields:

240

if b'delta' in extrafields:

241

basenode = manifest[b'deltabasenode']

241

basenode = manifest[b'deltabasenode']

242

delta = extrafields[b'delta']

242

delta = extrafields[b'delta']

243

elif b'revision' in extrafields:

243

elif b'revision' in extrafields:

244

basenode = nullid

244

basenode = nullid

245

revision = extrafields[b'revision']

245

revision = extrafields[b'revision']

246

delta = mdiff.trivialdiffheader(len(revision)) + revision

246

delta = mdiff.trivialdiffheader(len(revision)) + revision

247

else:

247

else:

248

continue

248

continue

249

250

yield (

250

yield (

251

node,

251

node,

252

manifest[b'parents'][0],

252

manifest[b'parents'][0],

253

manifest[b'parents'][1],

253

manifest[b'parents'][1],

254

# The value passed in is passed to the lookup function passed

254

# The value passed in is passed to the lookup function passed

255

# to addgroup(). We already have a map of manifest node to

255

# to addgroup(). We already have a map of manifest node to

256

# changelog revision number. So we just pass in the

256

# changelog revision number. So we just pass in the

257

# manifest node here and use linkrevs.__getitem__ as the

257

# manifest node here and use linkrevs.__getitem__ as the

258

# resolution function.

258

# resolution function.

259

node,

259

node,

260

basenode,

260

basenode,

261

delta,

261

delta,

262

# Flags not yet supported.

262

# Flags not yet supported.

263

0

263

0

264

)

264

)

265

266

progress.increment()

266

progress.increment()

267

268

progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),

268

progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),

269

total=len(fetchnodes))

269

total=len(fetchnodes))

270

271

# Fetch manifests 10,000 per command.

271

commandmeta = remote.apidescriptor[b'commands'][b'manifestdata']

272

# TODO have server advertise preferences?

272

batchsize = commandmeta.get(b'recommendedbatchsize', 10000)

273

# TODO make size configurable on client?

273

# TODO make size configurable on client?

274

batchsize = 10000

275

274

276

# We send commands 1 at a time to the remote. This is not the most

275

# We send commands 1 at a time to the remote. This is not the most

277

# efficient because we incur a round trip at the end of each batch.

276

# efficient because we incur a round trip at the end of each batch.

278

# However, the existing frame-based reactor keeps consuming server

277

# However, the existing frame-based reactor keeps consuming server

279

# data in the background. And this results in response data buffering

278

# data in the background. And this results in response data buffering

280

# in memory. This can consume gigabytes of memory.

279

# in memory. This can consume gigabytes of memory.

281

# TODO send multiple commands in a request once background buffering

280

# TODO send multiple commands in a request once background buffering

282

# issues are resolved.

281

# issues are resolved.

283

282

284

added = []

283

added = []

285

284

286

for i in pycompat.xrange(0, len(fetchnodes), batchsize):

285

for i in pycompat.xrange(0, len(fetchnodes), batchsize):

287

batch = [node for node in fetchnodes[i:i + batchsize]]

286

batch = [node for node in fetchnodes[i:i + batchsize]]

288

if not batch:

287

if not batch:

289

continue

288

continue

290

289

291

with remote.commandexecutor() as e:

290

with remote.commandexecutor() as e:

292

objs = e.callcommand(b'manifestdata', {

291

objs = e.callcommand(b'manifestdata', {

293

b'tree': b'',

292

b'tree': b'',

294

b'nodes': batch,

293

b'nodes': batch,

295

b'fields': {b'parents', b'revision'},

294

b'fields': {b'parents', b'revision'},

296

b'haveparents': True,

295

b'haveparents': True,

297

}).result()

296

}).result()

298

297

299

# Chomp off header object.

298

# Chomp off header object.

300

next(objs)

299

next(objs)

301

300

302

added.extend(rootmanifest.addgroup(

301

added.extend(rootmanifest.addgroup(

303

iterrevisions(objs, progress),

302

iterrevisions(objs, progress),

304

linkrevs.__getitem__,

303

linkrevs.__getitem__,

305

weakref.proxy(tr)))

304

weakref.proxy(tr)))

306

305

307

progress.complete()

306

progress.complete()

308

307

309

return {

308

return {

310

'added': added,

309

'added': added,

311

'linkrevs': linkrevs,

310

'linkrevs': linkrevs,

312

}

311

}

313

312

314

def _derivefilesfrommanifests(repo, manifestnodes):

313

def _derivefilesfrommanifests(repo, manifestnodes):

315

"""Determine what file nodes are relevant given a set of manifest nodes.

314

"""Determine what file nodes are relevant given a set of manifest nodes.

316

315

317

Returns a dict mapping file paths to dicts of file node to first manifest

316

Returns a dict mapping file paths to dicts of file node to first manifest

318

node.

317

node.

319

"""

318

"""

320

ml = repo.manifestlog

319

ml = repo.manifestlog

321

fnodes = collections.defaultdict(dict)

320

fnodes = collections.defaultdict(dict)

322

321

323

progress = repo.ui.makeprogress(

322

progress = repo.ui.makeprogress(

324

_('scanning manifests'), total=len(manifestnodes))

323

_('scanning manifests'), total=len(manifestnodes))

325

324

326

with progress:

325

with progress:

327

for manifestnode in manifestnodes:

326

for manifestnode in manifestnodes:

328

m = ml.get(b'', manifestnode)

327

m = ml.get(b'', manifestnode)

329

328

330

# TODO this will pull in unwanted nodes because it takes the storage

329

# TODO this will pull in unwanted nodes because it takes the storage

331

# delta into consideration. What we really want is something that

330

# delta into consideration. What we really want is something that

332

# takes the delta between the manifest's parents. And ideally we

331

# takes the delta between the manifest's parents. And ideally we

333

# would ignore file nodes that are known locally. For now, ignore

332

# would ignore file nodes that are known locally. For now, ignore

334

# both these limitations. This will result in incremental fetches

333

# both these limitations. This will result in incremental fetches

335

# requesting data we already have. So this is far from ideal.

334

# requesting data we already have. So this is far from ideal.

336

md = m.readfast()

335

md = m.readfast()

337

336

338

for path, fnode in md.items():

337

for path, fnode in md.items():

339

fnodes[path].setdefault(fnode, manifestnode)

338

fnodes[path].setdefault(fnode, manifestnode)

340

339

341

progress.increment()

340

progress.increment()

342

341

343

return fnodes

342

return fnodes

344

343

345

def _fetchfiles(repo, tr, remote, fnodes, linkrevs):

344

def _fetchfiles(repo, tr, remote, fnodes, linkrevs):

346

def iterrevisions(objs, progress):

345

def iterrevisions(objs, progress):

347

for filerevision in objs:

346

for filerevision in objs:

348

node = filerevision[b'node']

347

node = filerevision[b'node']

349

348

350

extrafields = {}

349

extrafields = {}

351

350

352

for field, size in filerevision.get(b'fieldsfollowing', []):

351

for field, size in filerevision.get(b'fieldsfollowing', []):

353

extrafields[field] = next(objs)

352

extrafields[field] = next(objs)

354

353

355

if b'delta' in extrafields:

354

if b'delta' in extrafields:

356

basenode = filerevision[b'deltabasenode']

355

basenode = filerevision[b'deltabasenode']

357

delta = extrafields[b'delta']

356

delta = extrafields[b'delta']

358

elif b'revision' in extrafields:

357

elif b'revision' in extrafields:

359

basenode = nullid

358

basenode = nullid

360

revision = extrafields[b'revision']

359

revision = extrafields[b'revision']

361

delta = mdiff.trivialdiffheader(len(revision)) + revision

360

delta = mdiff.trivialdiffheader(len(revision)) + revision

362

else:

361

else:

363

continue

362

continue

364

363

365

yield (

364

yield (

366

node,

365

node,

367

filerevision[b'parents'][0],

366

filerevision[b'parents'][0],

368

filerevision[b'parents'][1],

367

filerevision[b'parents'][1],

369

node,

368

node,

370

basenode,

369

basenode,

371

delta,

370

delta,

372

# Flags not yet supported.

371

# Flags not yet supported.

373

0,

372

0,

374

)

373

)

375

374

376

progress.increment()

375

progress.increment()

377

376

378

progress = repo.ui.makeprogress(

377

progress = repo.ui.makeprogress(

379

_('files'), unit=_('chunks'),

378

_('files'), unit=_('chunks'),

380

total=sum(len(v) for v in fnodes.itervalues()))

379

total=sum(len(v) for v in fnodes.itervalues()))

381

380

382

# TODO make batch size configurable

381

# TODO make batch size configurable

383

batchsize = 10000

382

batchsize = 10000

384

fnodeslist = [x for x in sorted(fnodes.items())]

383

fnodeslist = [x for x in sorted(fnodes.items())]

385

384

386

for i in pycompat.xrange(0, len(fnodeslist), batchsize):

385

for i in pycompat.xrange(0, len(fnodeslist), batchsize):

387

batch = [x for x in fnodeslist[i:i + batchsize]]

386

batch = [x for x in fnodeslist[i:i + batchsize]]

388

if not batch:

387

if not batch:

389

continue

388

continue

390

389

391

with remote.commandexecutor() as e:

390

with remote.commandexecutor() as e:

392

fs = []

391

fs = []

393

locallinkrevs = {}

392

locallinkrevs = {}

394

393

395

for path, nodes in batch:

394

for path, nodes in batch:

396

fs.append((path, e.callcommand(b'filedata', {

395

fs.append((path, e.callcommand(b'filedata', {

397

b'path': path,

396

b'path': path,

398

b'nodes': sorted(nodes),

397

b'nodes': sorted(nodes),

399

b'fields': {b'parents', b'revision'},

398

b'fields': {b'parents', b'revision'},

400

b'haveparents': True,

399

b'haveparents': True,

401

})))

400

})))

402

401

403

locallinkrevs[path] = {

402

locallinkrevs[path] = {

404

node: linkrevs[manifestnode]

403

node: linkrevs[manifestnode]

405

for node, manifestnode in nodes.iteritems()}

404

for node, manifestnode in nodes.iteritems()}

406

405

407

for path, f in fs:

406

for path, f in fs:

408

objs = f.result()

407

objs = f.result()

409

408

410

# Chomp off header objects.

409

# Chomp off header objects.

411

next(objs)

410

next(objs)

412

411

413

store = repo.file(path)

412

store = repo.file(path)

414

store.addgroup(

413

store.addgroup(

415

iterrevisions(objs, progress),

414

iterrevisions(objs, progress),

416

locallinkrevs[path].__getitem__,

415

locallinkrevs[path].__getitem__,

417

weakref.proxy(tr))

416

weakref.proxy(tr))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # exchangev2.py - repository exchange for wire protocol version 2
             #
             # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import weakref
             from .i18n import _
             from .node import (
                 nullid,
                 short,
             )
             from . import (
                 bookmarks,
                 error,
                 mdiff,
                 phases,
                 pycompat,
                 setdiscovery,
             )
             def pull(pullop):
                 """Pull using wire protocol version 2."""
                 repo = pullop.repo
                 remote = pullop.remote
                 tr = pullop.trmanager.transaction()
                 # Figure out what needs to be fetched.
                 common, fetch, remoteheads = _pullchangesetdiscovery(
                     repo, remote, pullop.heads, abortwhenunrelated=pullop.force)
                 # And fetch the data.
                 pullheads = pullop.heads or remoteheads
                 csetres = _fetchchangesets(repo, tr, remote, common, fetch, pullheads)
                 # New revisions are written to the changelog. But all other updates
                 # are deferred. Do those now.
                 # Ensure all new changesets are draft by default. If the repo is
                 # publishing, the phase will be adjusted by the loop below.
                 if csetres['added']:
                     phases.registernew(repo, tr, phases.draft, csetres['added'])
                 # And adjust the phase of all changesets accordingly.
                 for phase in phases.phasenames:
                     if phase == b'secret' or not csetres['nodesbyphase'][phase]:
                         continue
                     phases.advanceboundary(repo, tr, phases.phasenames.index(phase),
                                            csetres['nodesbyphase'][phase])
                 # Write bookmark updates.
                 bookmarks.updatefromremote(repo.ui, repo, csetres['bookmarks'],
                                            remote.url(), pullop.gettransaction,
                                            explicit=pullop.explicitbookmarks)
                 manres = _fetchmanifests(repo, tr, remote, csetres['manifestnodes'])
                 # Find all file nodes referenced by added manifests and fetch those
                 # revisions.
                 fnodes = _derivefilesfrommanifests(repo, manres['added'])
                 _fetchfiles(repo, tr, remote, fnodes, manres['linkrevs'])
             def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):
                 """Determine which changesets need to be pulled."""
                 if heads:
                     knownnode = repo.changelog.hasnode
                     if all(knownnode(head) for head in heads):
                         return heads, False, heads
                 # TODO wire protocol version 2 is capable of more efficient discovery
                 # than setdiscovery. Consider implementing something better.
                 common, fetch, remoteheads = setdiscovery.findcommonheads(
                     repo.ui, repo, remote, abortwhenunrelated=abortwhenunrelated)
                 common = set(common)
                 remoteheads = set(remoteheads)
                 # If a remote head is filtered locally, put it back in the common set.
                 # See the comment in exchange._pulldiscoverychangegroup() for more.
                 if fetch and remoteheads:
                     nodemap = repo.unfiltered().changelog.nodemap
                     common |= {head for head in remoteheads if head in nodemap}
                     if set(remoteheads).issubset(common):
                         fetch = []
                 common.discard(nullid)
                 return common, fetch, remoteheads
             def _fetchchangesets(repo, tr, remote, common, fetch, remoteheads):
                 # TODO consider adding a step here where we obtain the DAG shape first
                 # (or ask the server to slice changesets into chunks for us) so that
                 # we can perform multiple fetches in batches. This will facilitate
                 # resuming interrupted clones, higher server-side cache hit rates due
                 # to smaller segments, etc.
                 with remote.commandexecutor() as e:
                     objs = e.callcommand(b'changesetdata', {
                         b'noderange': [sorted(common), sorted(remoteheads)],
                         b'fields': {b'bookmarks', b'parents', b'phase', b'revision'},
                     }).result()
                     # The context manager waits on all response data when exiting. So
                     # we need to remain in the context manager in order to stream data.
                     return _processchangesetdata(repo, tr, objs)
             def _processchangesetdata(repo, tr, objs):
                 repo.hook('prechangegroup', throw=True,
                           **pycompat.strkwargs(tr.hookargs))
                 urepo = repo.unfiltered()
                 cl = urepo.changelog
                 cl.delayupdate(tr)
                 # The first emitted object is a header describing the data that
                 # follows.
                 meta = next(objs)
                 progress = repo.ui.makeprogress(_('changesets'),
                                                 unit=_('chunks'),
                                                 total=meta.get(b'totalitems'))
                 manifestnodes = {}
                 def linkrev(node):
                     repo.ui.debug('add changeset %s\n' % short(node))
                     # Linkrev for changelog is always self.
                     return len(cl)
                 def onchangeset(cl, node):
                     progress.increment()
                     revision = cl.changelogrevision(node)
                     # We need to preserve the mapping of changelog revision to node
                     # so we can set the linkrev accordingly when manifests are added.
                     manifestnodes[cl.rev(node)] = revision.manifest
                 nodesbyphase = {phase: set() for phase in phases.phasenames}
                 remotebookmarks = {}
                 # addgroup() expects a 7-tuple describing revisions. This normalizes
                 # the wire data to that format.
                 #
                 # This loop also aggregates non-revision metadata, such as phase
                 # data.
                 def iterrevisions():
                     for cset in objs:
                         node = cset[b'node']
                         if b'phase' in cset:
                             nodesbyphase[cset[b'phase']].add(node)
                         for mark in cset.get(b'bookmarks', []):
                             remotebookmarks[mark] = node
                         # TODO add mechanism for extensions to examine records so they
                         # can siphon off custom data fields.
                         extrafields = {}
                         for field, size in cset.get(b'fieldsfollowing', []):
                             extrafields[field] = next(objs)
                         # Some entries might only be metadata only updates.
                         if b'revision' not in extrafields:
                             continue
                         data = extrafields[b'revision']
                         yield (
                             node,
                             cset[b'parents'][0],
                             cset[b'parents'][1],
                             # Linknode is always itself for changesets.
                             cset[b'node'],
                             # We always send full revisions. So delta base is not set.
                             nullid,
                             mdiff.trivialdiffheader(len(data)) + data,
                             # Flags not yet supported.
 ,
                         )
                 added = cl.addgroup(iterrevisions(), linkrev, weakref.proxy(tr),
                                     addrevisioncb=onchangeset)
                 progress.complete()
                 return {
                     'added': added,
                     'nodesbyphase': nodesbyphase,
                     'bookmarks': remotebookmarks,
                     'manifestnodes': manifestnodes,
                 }
             def _fetchmanifests(repo, tr, remote, manifestnodes):
                 rootmanifest = repo.manifestlog.getstorage(b'')
                 # Some manifests can be shared between changesets. Filter out revisions
                 # we already know about.
                 fetchnodes = []
                 linkrevs = {}
                 seen = set()
                 for clrev, node in sorted(manifestnodes.iteritems()):
                     if node in seen:
                         continue
                     try:
                         rootmanifest.rev(node)
                     except error.LookupError:
                         fetchnodes.append(node)
                         linkrevs[node] = clrev
                     seen.add(node)
                 # TODO handle tree manifests
                 # addgroup() expects 7-tuple describing revisions. This normalizes
                 # the wire data to that format.
                 def iterrevisions(objs, progress):
                     for manifest in objs:
                         node = manifest[b'node']
                         extrafields = {}
                         for field, size in manifest.get(b'fieldsfollowing', []):
                             extrafields[field] = next(objs)
                         if b'delta' in extrafields:
                             basenode = manifest[b'deltabasenode']
                             delta = extrafields[b'delta']
                         elif b'revision' in extrafields:
                             basenode = nullid
                             revision = extrafields[b'revision']
                             delta = mdiff.trivialdiffheader(len(revision)) + revision
                         else:
                             continue
                         yield (
                             node,
                             manifest[b'parents'][0],
                             manifest[b'parents'][1],
                             # The value passed in is passed to the lookup function passed
                             # to addgroup(). We already have a map of manifest node to
                             # changelog revision number. So we just pass in the
                             # manifest node here and use linkrevs.__getitem__ as the
                             # resolution function.
                             node,
                             basenode,
                             delta,
                             # Flags not yet supported.
                         )
                         progress.increment()
                 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
                                                 total=len(fetchnodes))
-                # Fetch manifests 10,000 per command.
+                commandmeta = remote.apidescriptor[b'commands'][b'manifestdata']
-                # TODO have server advertise preferences?
+                batchsize = commandmeta.get(b'recommendedbatchsize', 10000)
                 # TODO make size configurable on client?
-                batchsize = 10000
                 # We send commands 1 at a time to the remote. This is not the most
                 # efficient because we incur a round trip at the end of each batch.
                 # However, the existing frame-based reactor keeps consuming server
                 # data in the background. And this results in response data buffering
                 # in memory. This can consume gigabytes of memory.
                 # TODO send multiple commands in a request once background buffering
                 # issues are resolved.
                 added = []
                 for i in pycompat.xrange(0, len(fetchnodes), batchsize):
                     batch = [node for node in fetchnodes[i:i + batchsize]]
                     if not batch:
                         continue
                     with remote.commandexecutor() as e:
                         objs = e.callcommand(b'manifestdata', {
                             b'tree': b'',
                             b'nodes': batch,
                             b'fields': {b'parents', b'revision'},
                             b'haveparents': True,
                         }).result()
                         # Chomp off header object.
                         next(objs)
                         added.extend(rootmanifest.addgroup(
                             iterrevisions(objs, progress),
                             linkrevs.__getitem__,
                             weakref.proxy(tr)))
                 progress.complete()
                 return {
                     'added': added,
                     'linkrevs': linkrevs,
                 }
             def _derivefilesfrommanifests(repo, manifestnodes):
                 """Determine what file nodes are relevant given a set of manifest nodes.
                 Returns a dict mapping file paths to dicts of file node to first manifest
                 node.
                 """
                 ml = repo.manifestlog
                 fnodes = collections.defaultdict(dict)
                 progress = repo.ui.makeprogress(
                     _('scanning manifests'), total=len(manifestnodes))
                 with progress:
                     for manifestnode in manifestnodes:
                         m = ml.get(b'', manifestnode)
                         # TODO this will pull in unwanted nodes because it takes the storage
                         # delta into consideration. What we really want is something that
                         # takes the delta between the manifest's parents. And ideally we
                         # would ignore file nodes that are known locally. For now, ignore
                         # both these limitations. This will result in incremental fetches
                         # requesting data we already have. So this is far from ideal.
                         md = m.readfast()
                         for path, fnode in md.items():
                             fnodes[path].setdefault(fnode, manifestnode)
                         progress.increment()
                 return fnodes
             def _fetchfiles(repo, tr, remote, fnodes, linkrevs):
                 def iterrevisions(objs, progress):
                     for filerevision in objs:
                         node = filerevision[b'node']
                         extrafields = {}
                         for field, size in filerevision.get(b'fieldsfollowing', []):
                             extrafields[field] = next(objs)
                         if b'delta' in extrafields:
                             basenode = filerevision[b'deltabasenode']
                             delta = extrafields[b'delta']
                         elif b'revision' in extrafields:
                             basenode = nullid
                             revision = extrafields[b'revision']
                             delta = mdiff.trivialdiffheader(len(revision)) + revision
                         else:
                             continue
                         yield (
                             node,
                             filerevision[b'parents'][0],
                             filerevision[b'parents'][1],
                             node,
                             basenode,
                             delta,
                             # Flags not yet supported.
 ,
                         )
                         progress.increment()
                 progress = repo.ui.makeprogress(
                     _('files'), unit=_('chunks'),
                      total=sum(len(v) for v in fnodes.itervalues()))
                 # TODO make batch size configurable
                 batchsize = 10000
                 fnodeslist = [x for x in sorted(fnodes.items())]
                 for i in pycompat.xrange(0, len(fnodeslist), batchsize):
                     batch = [x for x in fnodeslist[i:i + batchsize]]
                     if not batch:
                         continue
                     with remote.commandexecutor() as e:
                         fs = []
                         locallinkrevs = {}
                         for path, nodes in batch:
                             fs.append((path, e.callcommand(b'filedata', {
                                 b'path': path,
                                 b'nodes': sorted(nodes),
                                 b'fields': {b'parents', b'revision'},
                                 b'haveparents': True,
                             })))
                             locallinkrevs[path] = {
                                 node: linkrevs[manifestnode]
                                 for node, manifestnode in nodes.iteritems()}
                         for path, f in fs:
                             objs = f.result()
                             # Chomp off header objects.
                             next(objs)
                             store = repo.file(path)
                             store.addgroup(
                                 iterrevisions(objs, progress),
                                 locallinkrevs[path].__getitem__,
                                 weakref.proxy(tr))