##// END OF EJS Templates
setdiscovery: back out changeset 5cfdf6137af8 (issue5809)...
Martin von Zweigbergk -
r36732:613954a1 default
parent child Browse files
Show More
@@ -1,525 +1,525
1 1 # discovery.py - protocol changeset discovery functions
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import functools
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 hex,
15 15 nullid,
16 16 short,
17 17 )
18 18
19 19 from . import (
20 20 bookmarks,
21 21 branchmap,
22 22 error,
23 23 phases,
24 24 scmutil,
25 25 setdiscovery,
26 26 treediscovery,
27 27 util,
28 28 )
29 29
30 30 def findcommonincoming(repo, remote, heads=None, force=False, ancestorsof=None):
31 31 """Return a tuple (common, anyincoming, heads) used to identify the common
32 32 subset of nodes between repo and remote.
33 33
34 34 "common" is a list of (at least) the heads of the common subset.
35 35 "anyincoming" is testable as a boolean indicating if any nodes are missing
36 36 locally. If remote does not support getbundle, this actually is a list of
37 37 roots of the nodes that would be incoming, to be supplied to
38 38 changegroupsubset. No code except for pull should be relying on this fact
39 39 any longer.
40 40 "heads" is either the supplied heads, or else the remote's heads.
41 41 "ancestorsof" if not None, restrict the discovery to a subset defined by
42 42 these nodes. Changeset outside of this set won't be considered (and
43 43 won't appears in "common")
44 44
45 45 If you pass heads and they are all known locally, the response lists just
46 46 these heads in "common" and in "heads".
47 47
48 48 Please use findcommonoutgoing to compute the set of outgoing nodes to give
49 49 extensions a good hook into outgoing.
50 50 """
51 51
52 52 if not remote.capable('getbundle'):
53 53 return treediscovery.findcommonincoming(repo, remote, heads, force)
54 54
55 55 if heads:
56 56 knownnode = repo.changelog.hasnode # no nodemap until it is filtered
57 57 if all(knownnode(h) for h in heads):
58 58 return (heads, False, heads)
59 59
60 res = setdiscovery.findcommonheads(repo.ui, repo, remote, heads,
60 res = setdiscovery.findcommonheads(repo.ui, repo, remote,
61 61 abortwhenunrelated=not force,
62 62 ancestorsof=ancestorsof)
63 63 common, anyinc, srvheads = res
64 64 return (list(common), anyinc, heads or list(srvheads))
65 65
66 66 class outgoing(object):
67 67 '''Represents the set of nodes present in a local repo but not in a
68 68 (possibly) remote one.
69 69
70 70 Members:
71 71
72 72 missing is a list of all nodes present in local but not in remote.
73 73 common is a list of all nodes shared between the two repos.
74 74 excluded is the list of missing changeset that shouldn't be sent remotely.
75 75 missingheads is the list of heads of missing.
76 76 commonheads is the list of heads of common.
77 77
78 78 The sets are computed on demand from the heads, unless provided upfront
79 79 by discovery.'''
80 80
81 81 def __init__(self, repo, commonheads=None, missingheads=None,
82 82 missingroots=None):
83 83 # at least one of them must not be set
84 84 assert None in (commonheads, missingroots)
85 85 cl = repo.changelog
86 86 if missingheads is None:
87 87 missingheads = cl.heads()
88 88 if missingroots:
89 89 discbases = []
90 90 for n in missingroots:
91 91 discbases.extend([p for p in cl.parents(n) if p != nullid])
92 92 # TODO remove call to nodesbetween.
93 93 # TODO populate attributes on outgoing instance instead of setting
94 94 # discbases.
95 95 csets, roots, heads = cl.nodesbetween(missingroots, missingheads)
96 96 included = set(csets)
97 97 missingheads = heads
98 98 commonheads = [n for n in discbases if n not in included]
99 99 elif not commonheads:
100 100 commonheads = [nullid]
101 101 self.commonheads = commonheads
102 102 self.missingheads = missingheads
103 103 self._revlog = cl
104 104 self._common = None
105 105 self._missing = None
106 106 self.excluded = []
107 107
108 108 def _computecommonmissing(self):
109 109 sets = self._revlog.findcommonmissing(self.commonheads,
110 110 self.missingheads)
111 111 self._common, self._missing = sets
112 112
113 113 @util.propertycache
114 114 def common(self):
115 115 if self._common is None:
116 116 self._computecommonmissing()
117 117 return self._common
118 118
119 119 @util.propertycache
120 120 def missing(self):
121 121 if self._missing is None:
122 122 self._computecommonmissing()
123 123 return self._missing
124 124
125 125 def findcommonoutgoing(repo, other, onlyheads=None, force=False,
126 126 commoninc=None, portable=False):
127 127 '''Return an outgoing instance to identify the nodes present in repo but
128 128 not in other.
129 129
130 130 If onlyheads is given, only nodes ancestral to nodes in onlyheads
131 131 (inclusive) are included. If you already know the local repo's heads,
132 132 passing them in onlyheads is faster than letting them be recomputed here.
133 133
134 134 If commoninc is given, it must be the result of a prior call to
135 135 findcommonincoming(repo, other, force) to avoid recomputing it here.
136 136
137 137 If portable is given, compute more conservative common and missingheads,
138 138 to make bundles created from the instance more portable.'''
139 139 # declare an empty outgoing object to be filled later
140 140 og = outgoing(repo, None, None)
141 141
142 142 # get common set if not provided
143 143 if commoninc is None:
144 144 commoninc = findcommonincoming(repo, other, force=force,
145 145 ancestorsof=onlyheads)
146 146 og.commonheads, _any, _hds = commoninc
147 147
148 148 # compute outgoing
149 149 mayexclude = (repo._phasecache.phaseroots[phases.secret] or repo.obsstore)
150 150 if not mayexclude:
151 151 og.missingheads = onlyheads or repo.heads()
152 152 elif onlyheads is None:
153 153 # use visible heads as it should be cached
154 154 og.missingheads = repo.filtered("served").heads()
155 155 og.excluded = [ctx.node() for ctx in repo.set('secret() or extinct()')]
156 156 else:
157 157 # compute common, missing and exclude secret stuff
158 158 sets = repo.changelog.findcommonmissing(og.commonheads, onlyheads)
159 159 og._common, allmissing = sets
160 160 og._missing = missing = []
161 161 og.excluded = excluded = []
162 162 for node in allmissing:
163 163 ctx = repo[node]
164 164 if ctx.phase() >= phases.secret or ctx.extinct():
165 165 excluded.append(node)
166 166 else:
167 167 missing.append(node)
168 168 if len(missing) == len(allmissing):
169 169 missingheads = onlyheads
170 170 else: # update missing heads
171 171 missingheads = phases.newheads(repo, onlyheads, excluded)
172 172 og.missingheads = missingheads
173 173 if portable:
174 174 # recompute common and missingheads as if -r<rev> had been given for
175 175 # each head of missing, and --base <rev> for each head of the proper
176 176 # ancestors of missing
177 177 og._computecommonmissing()
178 178 cl = repo.changelog
179 179 missingrevs = set(cl.rev(n) for n in og._missing)
180 180 og._common = set(cl.ancestors(missingrevs)) - missingrevs
181 181 commonheads = set(og.commonheads)
182 182 og.missingheads = [h for h in og.missingheads if h not in commonheads]
183 183
184 184 return og
185 185
186 186 def _headssummary(pushop):
187 187 """compute a summary of branch and heads status before and after push
188 188
189 189 return {'branch': ([remoteheads], [newheads],
190 190 [unsyncedheads], [discardedheads])} mapping
191 191
192 192 - branch: the branch name,
193 193 - remoteheads: the list of remote heads known locally
194 194 None if the branch is new,
195 195 - newheads: the new remote heads (known locally) with outgoing pushed,
196 196 - unsyncedheads: the list of remote heads unknown locally,
197 197 - discardedheads: the list of heads made obsolete by the push.
198 198 """
199 199 repo = pushop.repo.unfiltered()
200 200 remote = pushop.remote
201 201 outgoing = pushop.outgoing
202 202 cl = repo.changelog
203 203 headssum = {}
204 204 # A. Create set of branches involved in the push.
205 205 branches = set(repo[n].branch() for n in outgoing.missing)
206 206 remotemap = remote.branchmap()
207 207 newbranches = branches - set(remotemap)
208 208 branches.difference_update(newbranches)
209 209
210 210 # A. register remote heads
211 211 remotebranches = set()
212 212 for branch, heads in remote.branchmap().iteritems():
213 213 remotebranches.add(branch)
214 214 known = []
215 215 unsynced = []
216 216 knownnode = cl.hasnode # do not use nodemap until it is filtered
217 217 for h in heads:
218 218 if knownnode(h):
219 219 known.append(h)
220 220 else:
221 221 unsynced.append(h)
222 222 headssum[branch] = (known, list(known), unsynced)
223 223 # B. add new branch data
224 224 missingctx = list(repo[n] for n in outgoing.missing)
225 225 touchedbranches = set()
226 226 for ctx in missingctx:
227 227 branch = ctx.branch()
228 228 touchedbranches.add(branch)
229 229 if branch not in headssum:
230 230 headssum[branch] = (None, [], [])
231 231
232 232 # C drop data about untouched branches:
233 233 for branch in remotebranches - touchedbranches:
234 234 del headssum[branch]
235 235
236 236 # D. Update newmap with outgoing changes.
237 237 # This will possibly add new heads and remove existing ones.
238 238 newmap = branchmap.branchcache((branch, heads[1])
239 239 for branch, heads in headssum.iteritems()
240 240 if heads[0] is not None)
241 241 newmap.update(repo, (ctx.rev() for ctx in missingctx))
242 242 for branch, newheads in newmap.iteritems():
243 243 headssum[branch][1][:] = newheads
244 244 for branch, items in headssum.iteritems():
245 245 for l in items:
246 246 if l is not None:
247 247 l.sort()
248 248 headssum[branch] = items + ([],)
249 249
250 250 # If there are no obsstore, no post processing are needed.
251 251 if repo.obsstore:
252 252 torev = repo.changelog.rev
253 253 futureheads = set(torev(h) for h in outgoing.missingheads)
254 254 futureheads |= set(torev(h) for h in outgoing.commonheads)
255 255 allfuturecommon = repo.changelog.ancestors(futureheads, inclusive=True)
256 256 for branch, heads in sorted(headssum.iteritems()):
257 257 remoteheads, newheads, unsyncedheads, placeholder = heads
258 258 result = _postprocessobsolete(pushop, allfuturecommon, newheads)
259 259 headssum[branch] = (remoteheads, sorted(result[0]), unsyncedheads,
260 260 sorted(result[1]))
261 261 return headssum
262 262
263 263 def _oldheadssummary(repo, remoteheads, outgoing, inc=False):
264 264 """Compute branchmapsummary for repo without branchmap support"""
265 265
266 266 # 1-4b. old servers: Check for new topological heads.
267 267 # Construct {old,new}map with branch = None (topological branch).
268 268 # (code based on update)
269 269 knownnode = repo.changelog.hasnode # no nodemap until it is filtered
270 270 oldheads = sorted(h for h in remoteheads if knownnode(h))
271 271 # all nodes in outgoing.missing are children of either:
272 272 # - an element of oldheads
273 273 # - another element of outgoing.missing
274 274 # - nullrev
275 275 # This explains why the new head are very simple to compute.
276 276 r = repo.set('heads(%ln + %ln)', oldheads, outgoing.missing)
277 277 newheads = sorted(c.node() for c in r)
278 278 # set some unsynced head to issue the "unsynced changes" warning
279 279 if inc:
280 280 unsynced = [None]
281 281 else:
282 282 unsynced = []
283 283 return {None: (oldheads, newheads, unsynced, [])}
284 284
285 285 def _nowarnheads(pushop):
286 286 # Compute newly pushed bookmarks. We don't warn about bookmarked heads.
287 287 repo = pushop.repo.unfiltered()
288 288 remote = pushop.remote
289 289 localbookmarks = repo._bookmarks
290 290 remotebookmarks = remote.listkeys('bookmarks')
291 291 bookmarkedheads = set()
292 292
293 293 # internal config: bookmarks.pushing
294 294 newbookmarks = [localbookmarks.expandname(b)
295 295 for b in pushop.ui.configlist('bookmarks', 'pushing')]
296 296
297 297 for bm in localbookmarks:
298 298 rnode = remotebookmarks.get(bm)
299 299 if rnode and rnode in repo:
300 300 lctx, rctx = repo[bm], repo[rnode]
301 301 if bookmarks.validdest(repo, rctx, lctx):
302 302 bookmarkedheads.add(lctx.node())
303 303 else:
304 304 if bm in newbookmarks and bm not in remotebookmarks:
305 305 bookmarkedheads.add(repo[bm].node())
306 306
307 307 return bookmarkedheads
308 308
309 309 def checkheads(pushop):
310 310 """Check that a push won't add any outgoing head
311 311
312 312 raise Abort error and display ui message as needed.
313 313 """
314 314
315 315 repo = pushop.repo.unfiltered()
316 316 remote = pushop.remote
317 317 outgoing = pushop.outgoing
318 318 remoteheads = pushop.remoteheads
319 319 newbranch = pushop.newbranch
320 320 inc = bool(pushop.incoming)
321 321
322 322 # Check for each named branch if we're creating new remote heads.
323 323 # To be a remote head after push, node must be either:
324 324 # - unknown locally
325 325 # - a local outgoing head descended from update
326 326 # - a remote head that's known locally and not
327 327 # ancestral to an outgoing head
328 328 if remoteheads == [nullid]:
329 329 # remote is empty, nothing to check.
330 330 return
331 331
332 332 if remote.capable('branchmap'):
333 333 headssum = _headssummary(pushop)
334 334 else:
335 335 headssum = _oldheadssummary(repo, remoteheads, outgoing, inc)
336 336 pushop.pushbranchmap = headssum
337 337 newbranches = [branch for branch, heads in headssum.iteritems()
338 338 if heads[0] is None]
339 339 # 1. Check for new branches on the remote.
340 340 if newbranches and not newbranch: # new branch requires --new-branch
341 341 branchnames = ', '.join(sorted(newbranches))
342 342 raise error.Abort(_("push creates new remote branches: %s!")
343 343 % branchnames,
344 344 hint=_("use 'hg push --new-branch' to create"
345 345 " new remote branches"))
346 346
347 347 # 2. Find heads that we need not warn about
348 348 nowarnheads = _nowarnheads(pushop)
349 349
350 350 # 3. Check for new heads.
351 351 # If there are more heads after the push than before, a suitable
352 352 # error message, depending on unsynced status, is displayed.
353 353 errormsg = None
354 354 for branch, heads in sorted(headssum.iteritems()):
355 355 remoteheads, newheads, unsyncedheads, discardedheads = heads
356 356 # add unsynced data
357 357 if remoteheads is None:
358 358 oldhs = set()
359 359 else:
360 360 oldhs = set(remoteheads)
361 361 oldhs.update(unsyncedheads)
362 362 dhs = None # delta heads, the new heads on branch
363 363 newhs = set(newheads)
364 364 newhs.update(unsyncedheads)
365 365 if unsyncedheads:
366 366 if None in unsyncedheads:
367 367 # old remote, no heads data
368 368 heads = None
369 369 else:
370 370 heads = scmutil.nodesummaries(repo, unsyncedheads)
371 371 if heads is None:
372 372 repo.ui.status(_("remote has heads that are "
373 373 "not known locally\n"))
374 374 elif branch is None:
375 375 repo.ui.status(_("remote has heads that are "
376 376 "not known locally: %s\n") % heads)
377 377 else:
378 378 repo.ui.status(_("remote has heads on branch '%s' that are "
379 379 "not known locally: %s\n") % (branch, heads))
380 380 if remoteheads is None:
381 381 if len(newhs) > 1:
382 382 dhs = list(newhs)
383 383 if errormsg is None:
384 384 errormsg = (_("push creates new branch '%s' "
385 385 "with multiple heads") % (branch))
386 386 hint = _("merge or"
387 387 " see 'hg help push' for details about"
388 388 " pushing new heads")
389 389 elif len(newhs) > len(oldhs):
390 390 # remove bookmarked or existing remote heads from the new heads list
391 391 dhs = sorted(newhs - nowarnheads - oldhs)
392 392 if dhs:
393 393 if errormsg is None:
394 394 if branch not in ('default', None):
395 395 errormsg = _("push creates new remote head %s "
396 396 "on branch '%s'!") % (short(dhs[0]), branch)
397 397 elif repo[dhs[0]].bookmarks():
398 398 errormsg = _("push creates new remote head %s "
399 399 "with bookmark '%s'!") % (
400 400 short(dhs[0]), repo[dhs[0]].bookmarks()[0])
401 401 else:
402 402 errormsg = _("push creates new remote head %s!"
403 403 ) % short(dhs[0])
404 404 if unsyncedheads:
405 405 hint = _("pull and merge or"
406 406 " see 'hg help push' for details about"
407 407 " pushing new heads")
408 408 else:
409 409 hint = _("merge or"
410 410 " see 'hg help push' for details about"
411 411 " pushing new heads")
412 412 if branch is None:
413 413 repo.ui.note(_("new remote heads:\n"))
414 414 else:
415 415 repo.ui.note(_("new remote heads on branch '%s':\n") % branch)
416 416 for h in dhs:
417 417 repo.ui.note((" %s\n") % short(h))
418 418 if errormsg:
419 419 raise error.Abort(errormsg, hint=hint)
420 420
421 421 def _postprocessobsolete(pushop, futurecommon, candidate_newhs):
422 422 """post process the list of new heads with obsolescence information
423 423
424 424 Exists as a sub-function to contain the complexity and allow extensions to
425 425 experiment with smarter logic.
426 426
427 427 Returns (newheads, discarded_heads) tuple
428 428 """
429 429 # known issue
430 430 #
431 431 # * We "silently" skip processing on all changeset unknown locally
432 432 #
433 433 # * if <nh> is public on the remote, it won't be affected by obsolete
434 434 # marker and a new is created
435 435
436 436 # define various utilities and containers
437 437 repo = pushop.repo
438 438 unfi = repo.unfiltered()
439 439 tonode = unfi.changelog.node
440 440 torev = unfi.changelog.nodemap.get
441 441 public = phases.public
442 442 getphase = unfi._phasecache.phase
443 443 ispublic = (lambda r: getphase(unfi, r) == public)
444 444 ispushed = (lambda n: torev(n) in futurecommon)
445 445 hasoutmarker = functools.partial(pushingmarkerfor, unfi.obsstore, ispushed)
446 446 successorsmarkers = unfi.obsstore.successors
447 447 newhs = set() # final set of new heads
448 448 discarded = set() # new head of fully replaced branch
449 449
450 450 localcandidate = set() # candidate heads known locally
451 451 unknownheads = set() # candidate heads unknown locally
452 452 for h in candidate_newhs:
453 453 if h in unfi:
454 454 localcandidate.add(h)
455 455 else:
456 456 if successorsmarkers.get(h) is not None:
457 457 msg = ('checkheads: remote head unknown locally has'
458 458 ' local marker: %s\n')
459 459 repo.ui.debug(msg % hex(h))
460 460 unknownheads.add(h)
461 461
462 462 # fast path the simple case
463 463 if len(localcandidate) == 1:
464 464 return unknownheads | set(candidate_newhs), set()
465 465
466 466 # actually process branch replacement
467 467 while localcandidate:
468 468 nh = localcandidate.pop()
469 469 # run this check early to skip the evaluation of the whole branch
470 470 if (torev(nh) in futurecommon or ispublic(torev(nh))):
471 471 newhs.add(nh)
472 472 continue
473 473
474 474 # Get all revs/nodes on the branch exclusive to this head
475 475 # (already filtered heads are "ignored"))
476 476 branchrevs = unfi.revs('only(%n, (%ln+%ln))',
477 477 nh, localcandidate, newhs)
478 478 branchnodes = [tonode(r) for r in branchrevs]
479 479
480 480 # The branch won't be hidden on the remote if
481 481 # * any part of it is public,
482 482 # * any part of it is considered part of the result by previous logic,
483 483 # * if we have no markers to push to obsolete it.
484 484 if (any(ispublic(r) for r in branchrevs)
485 485 or any(torev(n) in futurecommon for n in branchnodes)
486 486 or any(not hasoutmarker(n) for n in branchnodes)):
487 487 newhs.add(nh)
488 488 else:
489 489 # note: there is a corner case if there is a merge in the branch.
490 490 # we might end up with -more- heads. However, these heads are not
491 491 # "added" by the push, but more by the "removal" on the remote so I
492 492 # think is a okay to ignore them,
493 493 discarded.add(nh)
494 494 newhs |= unknownheads
495 495 return newhs, discarded
496 496
497 497 def pushingmarkerfor(obsstore, ispushed, node):
498 498 """true if some markers are to be pushed for node
499 499
500 500 We cannot just look in to the pushed obsmarkers from the pushop because
501 501 discovery might have filtered relevant markers. In addition listing all
502 502 markers relevant to all changesets in the pushed set would be too expensive
503 503 (O(len(repo)))
504 504
505 505 (note: There are cache opportunity in this function. but it would requires
506 506 a two dimensional stack.)
507 507 """
508 508 successorsmarkers = obsstore.successors
509 509 stack = [node]
510 510 seen = set(stack)
511 511 while stack:
512 512 current = stack.pop()
513 513 if ispushed(current):
514 514 return True
515 515 markers = successorsmarkers.get(current, ())
516 516 # markers fields = ('prec', 'succs', 'flag', 'meta', 'date', 'parents')
517 517 for m in markers:
518 518 nexts = m[1] # successors
519 519 if not nexts: # this is a prune marker
520 520 nexts = m[5] or () # parents
521 521 for n in nexts:
522 522 if n not in seen:
523 523 seen.add(n)
524 524 stack.append(n)
525 525 return False
@@ -1,267 +1,263
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """
9 9 Algorithm works in the following way. You have two repository: local and
10 10 remote. They both contains a DAG of changelists.
11 11
12 12 The goal of the discovery protocol is to find one set of node *common*,
13 13 the set of nodes shared by local and remote.
14 14
15 15 One of the issue with the original protocol was latency, it could
16 16 potentially require lots of roundtrips to discover that the local repo was a
17 17 subset of remote (which is a very common case, you usually have few changes
18 18 compared to upstream, while upstream probably had lots of development).
19 19
20 20 The new protocol only requires one interface for the remote repo: `known()`,
21 21 which given a set of changelists tells you if they are present in the DAG.
22 22
23 23 The algorithm then works as follow:
24 24
25 25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 26 all nodes are in `unknown`.
27 27 - Take a sample from `unknown`, call `remote.known(sample)`
28 28 - For each node that remote knows, move it and all its ancestors to `common`
29 29 - For each node that remote doesn't know, move it and all its descendants
30 30 to `missing`
31 31 - Iterate until `unknown` is empty
32 32
33 33 There are a couple optimizations, first is instead of starting with a random
34 34 sample of missing, start by sending all heads, in the case where the local
35 35 repo is a subset, you computed the answer in one round trip.
36 36
37 37 Then you can do something similar to the bisecting strategy used when
38 38 finding faulty changesets. Instead of random samples, you can try picking
39 39 nodes that will maximize the number of nodes that will be
40 40 classified with it (since all ancestors or descendants will be marked as well).
41 41 """
42 42
43 43 from __future__ import absolute_import
44 44
45 45 import collections
46 46 import random
47 47
48 48 from .i18n import _
49 49 from .node import (
50 50 nullid,
51 51 nullrev,
52 52 )
53 53 from . import (
54 54 dagutil,
55 55 error,
56 56 util,
57 57 )
58 58
59 59 def _updatesample(dag, nodes, sample, quicksamplesize=0):
60 60 """update an existing sample to match the expected size
61 61
62 62 The sample is updated with nodes exponentially distant from each head of the
63 63 <nodes> set. (H~1, H~2, H~4, H~8, etc).
64 64
65 65 If a target size is specified, the sampling will stop once this size is
66 66 reached. Otherwise sampling will happen until roots of the <nodes> set are
67 67 reached.
68 68
69 69 :dag: a dag object from dagutil
70 70 :nodes: set of nodes we want to discover (if None, assume the whole dag)
71 71 :sample: a sample to update
72 72 :quicksamplesize: optional target size of the sample"""
73 73 # if nodes is empty we scan the entire graph
74 74 if nodes:
75 75 heads = dag.headsetofconnecteds(nodes)
76 76 else:
77 77 heads = dag.heads()
78 78 dist = {}
79 79 visit = collections.deque(heads)
80 80 seen = set()
81 81 factor = 1
82 82 while visit:
83 83 curr = visit.popleft()
84 84 if curr in seen:
85 85 continue
86 86 d = dist.setdefault(curr, 1)
87 87 if d > factor:
88 88 factor *= 2
89 89 if d == factor:
90 90 sample.add(curr)
91 91 if quicksamplesize and (len(sample) >= quicksamplesize):
92 92 return
93 93 seen.add(curr)
94 94 for p in dag.parents(curr):
95 95 if not nodes or p in nodes:
96 96 dist.setdefault(p, d + 1)
97 97 visit.append(p)
98 98
99 99 def _takequicksample(dag, nodes, size):
100 100 """takes a quick sample of size <size>
101 101
102 102 It is meant for initial sampling and focuses on querying heads and close
103 103 ancestors of heads.
104 104
105 105 :dag: a dag object
106 106 :nodes: set of nodes to discover
107 107 :size: the maximum size of the sample"""
108 108 sample = dag.headsetofconnecteds(nodes)
109 109 if size <= len(sample):
110 110 return _limitsample(sample, size)
111 111 _updatesample(dag, None, sample, quicksamplesize=size)
112 112 return sample
113 113
114 114 def _takefullsample(dag, nodes, size):
115 115 sample = dag.headsetofconnecteds(nodes)
116 116 # update from heads
117 117 _updatesample(dag, nodes, sample)
118 118 # update from roots
119 119 _updatesample(dag.inverse(), nodes, sample)
120 120 assert sample
121 121 sample = _limitsample(sample, size)
122 122 if len(sample) < size:
123 123 more = size - len(sample)
124 124 sample.update(random.sample(list(nodes - sample), more))
125 125 return sample
126 126
127 127 def _limitsample(sample, desiredlen):
128 128 """return a random subset of sample of at most desiredlen item"""
129 129 if len(sample) > desiredlen:
130 130 sample = set(random.sample(sample, desiredlen))
131 131 return sample
132 132
133 def findcommonheads(ui, local, remote, heads=None,
133 def findcommonheads(ui, local, remote,
134 134 initialsamplesize=100,
135 135 fullsamplesize=200,
136 136 abortwhenunrelated=True,
137 137 ancestorsof=None):
138 138 '''Return a tuple (common, anyincoming, remoteheads) used to identify
139 139 missing nodes from or in remote.
140 140 '''
141 141 start = util.timer()
142 142
143 143 roundtrips = 0
144 144 cl = local.changelog
145 145 localsubset = None
146 146 if ancestorsof is not None:
147 147 rev = local.changelog.rev
148 148 localsubset = [rev(n) for n in ancestorsof]
149 149 dag = dagutil.revlogdag(cl, localsubset=localsubset)
150 150
151 151 # early exit if we know all the specified remote heads already
152 152 ui.debug("query 1; heads\n")
153 153 roundtrips += 1
154 154 ownheads = dag.heads()
155 155 sample = _limitsample(ownheads, initialsamplesize)
156 156 # indices between sample and externalized version must match
157 157 sample = list(sample)
158 if heads:
159 srvheadhashes = heads
160 yesno = remote.known(dag.externalizeall(sample))
161 else:
162 batch = remote.iterbatch()
163 batch.heads()
164 batch.known(dag.externalizeall(sample))
165 batch.submit()
166 srvheadhashes, yesno = batch.results()
158 batch = remote.iterbatch()
159 batch.heads()
160 batch.known(dag.externalizeall(sample))
161 batch.submit()
162 srvheadhashes, yesno = batch.results()
167 163
168 164 if cl.tip() == nullid:
169 165 if srvheadhashes != [nullid]:
170 166 return [nullid], True, srvheadhashes
171 167 return [nullid], False, []
172 168
173 169 # start actual discovery (we note this before the next "if" for
174 170 # compatibility reasons)
175 171 ui.status(_("searching for changes\n"))
176 172
177 173 srvheads = dag.internalizeall(srvheadhashes, filterunknown=True)
178 174 if len(srvheads) == len(srvheadhashes):
179 175 ui.debug("all remote heads known locally\n")
180 176 return (srvheadhashes, False, srvheadhashes,)
181 177
182 178 if sample and len(ownheads) <= initialsamplesize and all(yesno):
183 179 ui.note(_("all local heads known remotely\n"))
184 180 ownheadhashes = dag.externalizeall(ownheads)
185 181 return (ownheadhashes, True, srvheadhashes,)
186 182
187 183 # full blown discovery
188 184
189 185 # own nodes I know we both know
190 186 # treat remote heads (and maybe own heads) as a first implicit sample
191 187 # response
192 188 common = cl.incrementalmissingrevs(srvheads)
193 189 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
194 190 common.addbases(commoninsample)
195 191 # own nodes where I don't know if remote knows them
196 192 undecided = set(common.missingancestors(ownheads))
197 193 # own nodes I know remote lacks
198 194 missing = set()
199 195
200 196 full = False
201 197 while undecided:
202 198
203 199 if sample:
204 200 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
205 201 missing.update(dag.descendantset(missinginsample, missing))
206 202
207 203 undecided.difference_update(missing)
208 204
209 205 if not undecided:
210 206 break
211 207
212 208 if full or common.hasbases():
213 209 if full:
214 210 ui.note(_("sampling from both directions\n"))
215 211 else:
216 212 ui.debug("taking initial sample\n")
217 213 samplefunc = _takefullsample
218 214 targetsize = fullsamplesize
219 215 else:
220 216 # use even cheaper initial sample
221 217 ui.debug("taking quick initial sample\n")
222 218 samplefunc = _takequicksample
223 219 targetsize = initialsamplesize
224 220 if len(undecided) < targetsize:
225 221 sample = list(undecided)
226 222 else:
227 223 sample = samplefunc(dag, undecided, targetsize)
228 224 sample = _limitsample(sample, targetsize)
229 225
230 226 roundtrips += 1
231 227 ui.progress(_('searching'), roundtrips, unit=_('queries'))
232 228 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
233 229 % (roundtrips, len(undecided), len(sample)))
234 230 # indices between sample and externalized version must match
235 231 sample = list(sample)
236 232 yesno = remote.known(dag.externalizeall(sample))
237 233 full = True
238 234
239 235 if sample:
240 236 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
241 237 common.addbases(commoninsample)
242 238 common.removeancestorsfrom(undecided)
243 239
244 240 # heads(common) == heads(common.bases) since common represents common.bases
245 241 # and all its ancestors
246 242 result = dag.headsetofconnecteds(common.bases)
247 243 # common.bases can include nullrev, but our contract requires us to not
248 244 # return any heads in that case, so discard that
249 245 result.discard(nullrev)
250 246 elapsed = util.timer() - start
251 247 ui.progress(_('searching'), None)
252 248 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
253 249 msg = ('found %d common and %d unknown server heads,'
254 250 ' %d roundtrips in %.4fs\n')
255 251 missing = set(result) - set(srvheads)
256 252 ui.log('discovery', msg, len(result), len(missing), roundtrips,
257 253 elapsed)
258 254
259 255 if not result and srvheadhashes != [nullid]:
260 256 if abortwhenunrelated:
261 257 raise error.Abort(_("repository is unrelated"))
262 258 else:
263 259 ui.warn(_("warning: repository is unrelated\n"))
264 260 return ({nullid}, True, srvheadhashes,)
265 261
266 262 anyincoming = (srvheadhashes != [nullid])
267 263 return dag.externalizeall(result), anyincoming, srvheadhashes
General Comments 0
You need to be logged in to leave comments. Login now