##// END OF EJS Templates
narrow: filter copies in core...
Martin von Zweigbergk -
r38057:ee7b6fa5 default
parent child Browse files
Show More
@@ -1,97 +1,95
1 1 # __init__.py - narrowhg extension
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 '''create clones which fetch history data for subset of files (EXPERIMENTAL)'''
8 8
9 9 from __future__ import absolute_import
10 10
11 11 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
12 12 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
13 13 # be specifying the version(s) of Mercurial they are tested with, or
14 14 # leave the attribute unspecified.
15 15 testedwith = 'ships-with-hg-core'
16 16
17 17 from mercurial import (
18 18 changegroup,
19 19 extensions,
20 20 hg,
21 21 localrepo,
22 22 registrar,
23 23 verify as verifymod,
24 24 )
25 25
26 26 from . import (
27 27 narrowbundle2,
28 28 narrowchangegroup,
29 29 narrowcommands,
30 30 narrowcopies,
31 31 narrowdirstate,
32 narrowmerge,
33 32 narrowpatch,
34 33 narrowrepo,
35 34 narrowrevlog,
36 35 narrowtemplates,
37 36 narrowwirepeer,
38 37 )
39 38
40 39 configtable = {}
41 40 configitem = registrar.configitem(configtable)
42 41 # Narrowhg *has* support for serving ellipsis nodes (which are used at
43 42 # least by Google's internal server), but that support is pretty
44 43 # fragile and has a lot of problems on real-world repositories that
45 44 # have complex graph topologies. This could probably be corrected, but
46 45 # absent someone needing the full support for ellipsis nodes in
47 46 # repositories with merges, it's unlikely this work will get done. As
48 47 # of this writining in late 2017, all repositories large enough for
49 48 # ellipsis nodes to be a hard requirement also enforce strictly linear
50 49 # history for other scaling reasons.
51 50 configitem('experimental', 'narrowservebrokenellipses',
52 51 default=False,
53 52 alias=[('narrow', 'serveellipses')],
54 53 )
55 54
56 55 # Export the commands table for Mercurial to see.
57 56 cmdtable = narrowcommands.table
58 57
59 58 def featuresetup(ui, features):
60 59 features.add(changegroup.NARROW_REQUIREMENT)
61 60
62 61 def uisetup(ui):
63 62 """Wraps user-facing mercurial commands with narrow-aware versions."""
64 63 localrepo.featuresetupfuncs.add(featuresetup)
65 64 narrowrevlog.setup()
66 65 narrowbundle2.setup()
67 narrowmerge.setup()
68 66 narrowcommands.setup()
69 67 narrowchangegroup.setup()
70 68 narrowwirepeer.uisetup()
71 69
72 70 def reposetup(ui, repo):
73 71 """Wraps local repositories with narrow repo support."""
74 72 if not repo.local():
75 73 return
76 74
77 75 narrowrepo.wraprepo(repo)
78 76 if changegroup.NARROW_REQUIREMENT in repo.requirements:
79 77 narrowcopies.setup(repo)
80 78 narrowdirstate.setup(repo)
81 79 narrowpatch.setup(repo)
82 80 narrowwirepeer.reposetup(repo)
83 81
84 82 def _verifierinit(orig, self, repo, matcher=None):
85 83 # The verifier's matcher argument was desgined for narrowhg, so it should
86 84 # be None from core. If another extension passes a matcher (unlikely),
87 85 # we'll have to fail until matchers can be composed more easily.
88 86 assert matcher is None
89 87 orig(self, repo, repo.narrowmatch())
90 88
91 89 def extsetup(ui):
92 90 extensions.wrapfunction(verifymod.verifier, '__init__', _verifierinit)
93 91 extensions.wrapfunction(hg, 'postshare', narrowrepo.wrappostshare)
94 92 extensions.wrapfunction(hg, 'copystore', narrowrepo.unsharenarrowspec)
95 93
96 94 templatekeyword = narrowtemplates.templatekeyword
97 95 revsetpredicate = narrowtemplates.revsetpredicate
@@ -1,878 +1,883
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 def _findlimit(repo, a, b):
25 25 """
26 26 Find the last revision that needs to be checked to ensure that a full
27 27 transitive closure for file copies can be properly calculated.
28 28 Generally, this means finding the earliest revision number that's an
29 29 ancestor of a or b but not both, except when a or b is a direct descendent
30 30 of the other, in which case we can return the minimum revnum of a and b.
31 31 None if no such revision exists.
32 32 """
33 33
34 34 # basic idea:
35 35 # - mark a and b with different sides
36 36 # - if a parent's children are all on the same side, the parent is
37 37 # on that side, otherwise it is on no side
38 38 # - walk the graph in topological order with the help of a heap;
39 39 # - add unseen parents to side map
40 40 # - clear side of any parent that has children on different sides
41 41 # - track number of interesting revs that might still be on a side
42 42 # - track the lowest interesting rev seen
43 43 # - quit when interesting revs is zero
44 44
45 45 cl = repo.changelog
46 46 working = len(cl) # pseudo rev for the working directory
47 47 if a is None:
48 48 a = working
49 49 if b is None:
50 50 b = working
51 51
52 52 side = {a: -1, b: 1}
53 53 visit = [-a, -b]
54 54 heapq.heapify(visit)
55 55 interesting = len(visit)
56 56 hascommonancestor = False
57 57 limit = working
58 58
59 59 while interesting:
60 60 r = -heapq.heappop(visit)
61 61 if r == working:
62 62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 63 else:
64 64 parents = cl.parentrevs(r)
65 65 for p in parents:
66 66 if p < 0:
67 67 continue
68 68 if p not in side:
69 69 # first time we see p; add it to visit
70 70 side[p] = side[r]
71 71 if side[p]:
72 72 interesting += 1
73 73 heapq.heappush(visit, -p)
74 74 elif side[p] and side[p] != side[r]:
75 75 # p was interesting but now we know better
76 76 side[p] = 0
77 77 interesting -= 1
78 78 hascommonancestor = True
79 79 if side[r]:
80 80 limit = r # lowest rev visited
81 81 interesting -= 1
82 82
83 83 if not hascommonancestor:
84 84 return None
85 85
86 86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 87 # 1/ File 'a0' committed
88 88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 89 # 3/ Move back to first commit
90 90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 92 #
93 93 # During the amend in step five, we will be in this state:
94 94 #
95 95 # @ 3 temporary amend commit for a1-amend
96 96 # |
97 97 # o 2 a1-amend
98 98 # |
99 99 # | o 1 a1
100 100 # |/
101 101 # o 0 a0
102 102 #
103 103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 104 # yet the filelog has the copy information in rev 1 and we will not look
105 105 # back far enough unless we also look at the a and b as candidates.
106 106 # This only occurs when a is a descendent of b or visa-versa.
107 107 return min(limit, a, b)
108 108
109 109 def _chain(src, dst, a, b):
110 110 """chain two sets of copies a->b"""
111 111 t = a.copy()
112 112 for k, v in b.iteritems():
113 113 if v in t:
114 114 # found a chain
115 115 if t[v] != k:
116 116 # file wasn't renamed back to itself
117 117 t[k] = t[v]
118 118 if v not in dst:
119 119 # chain was a rename, not a copy
120 120 del t[v]
121 121 if v in src:
122 122 # file is a copy of an existing file
123 123 t[k] = v
124 124
125 125 # remove criss-crossed copies
126 126 for k, v in list(t.items()):
127 127 if k in src and v in dst:
128 128 del t[k]
129 129
130 130 return t
131 131
132 132 def _tracefile(fctx, am, limit=-1):
133 133 """return file context that is the ancestor of fctx present in ancestor
134 134 manifest am, stopping after the first ancestor lower than limit"""
135 135
136 136 for f in fctx.ancestors():
137 137 if am.get(f.path(), None) == f.filenode():
138 138 return f
139 139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 140 return None
141 141
142 142 def _dirstatecopies(d, match=None):
143 143 ds = d._repo.dirstate
144 144 c = ds.copies().copy()
145 145 for k in list(c):
146 146 if ds[k] not in 'anm' or (match and not match(k)):
147 147 del c[k]
148 148 return c
149 149
150 150 def _computeforwardmissing(a, b, match=None):
151 151 """Computes which files are in b but not a.
152 152 This is its own function so extensions can easily wrap this call to see what
153 153 files _forwardcopies is about to process.
154 154 """
155 155 ma = a.manifest()
156 156 mb = b.manifest()
157 157 return mb.filesnotin(ma, match=match)
158 158
159 159 def _committedforwardcopies(a, b, match):
160 160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
161 161 # files might have to be traced back to the fctx parent of the last
162 162 # one-side-only changeset, but not further back than that
163 163 limit = _findlimit(a._repo, a.rev(), b.rev())
164 164 if limit is None:
165 165 limit = -1
166 166 am = a.manifest()
167 167
168 168 # find where new files came from
169 169 # we currently don't try to find where old files went, too expensive
170 170 # this means we can miss a case like 'hg rm b; hg cp a b'
171 171 cm = {}
172 172
173 173 # Computing the forward missing is quite expensive on large manifests, since
174 174 # it compares the entire manifests. We can optimize it in the common use
175 175 # case of computing what copies are in a commit versus its parent (like
176 176 # during a rebase or histedit). Note, we exclude merge commits from this
177 177 # optimization, since the ctx.files() for a merge commit is not correct for
178 178 # this comparison.
179 179 forwardmissingmatch = match
180 180 if b.p1() == a and b.p2().node() == node.nullid:
181 181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
182 182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
183 183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
184 184
185 185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
186 186 for f in missing:
187 187 fctx = b[f]
188 188 fctx._ancestrycontext = ancestrycontext
189 189 ofctx = _tracefile(fctx, am, limit)
190 190 if ofctx:
191 191 cm[f] = ofctx.path()
192 192 return cm
193 193
194 194 def _forwardcopies(a, b, match=None):
195 195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196 196
197 197 # check for working copy
198 198 if b.rev() is None:
199 199 if a == b.p1():
200 200 # short-circuit to avoid issues with merge states
201 201 return _dirstatecopies(b, match)
202 202
203 203 cm = _committedforwardcopies(a, b.p1(), match)
204 204 # combine copies from dirstate if necessary
205 205 return _chain(a, b, cm, _dirstatecopies(b, match))
206 206 return _committedforwardcopies(a, b, match)
207 207
208 208 def _backwardrenames(a, b):
209 209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 210 return {}
211 211
212 212 # Even though we're not taking copies into account, 1:n rename situations
213 213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 214 # arbitrarily pick one of the renames.
215 215 f = _forwardcopies(b, a)
216 216 r = {}
217 217 for k, v in sorted(f.iteritems()):
218 218 # remove copies
219 219 if v in a:
220 220 continue
221 221 r[v] = k
222 222 return r
223 223
224 224 def pathcopies(x, y, match=None):
225 225 """find {dst@y: src@x} copy mapping for directed compare"""
226 226 if x == y or not x or not y:
227 227 return {}
228 228 a = y.ancestor(x)
229 229 if a == x:
230 230 return _forwardcopies(x, y, match=match)
231 231 if a == y:
232 232 return _backwardrenames(x, y)
233 233 return _chain(x, y, _backwardrenames(x, a),
234 234 _forwardcopies(a, y, match=match))
235 235
236 236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 238 and c2. This is its own function so extensions can easily wrap this call
239 239 to see what files mergecopies is about to process.
240 240
241 241 Even though c1 and c2 are not used in this function, they are useful in
242 242 other extensions for being able to read the file nodes of the changed files.
243 243
244 244 "baselabel" can be passed to help distinguish the multiple computations
245 245 done in the graft case.
246 246 """
247 247 u1 = sorted(addedinm1 - addedinm2)
248 248 u2 = sorted(addedinm2 - addedinm1)
249 249
250 250 header = " unmatched files in %s"
251 251 if baselabel:
252 252 header += ' (from %s)' % baselabel
253 253 if u1:
254 254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 255 if u2:
256 256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257
258 narrowmatch = repo.narrowmatch()
259 if not narrowmatch.always():
260 u1 = [f for f in u1 if narrowmatch(f)]
261 u2 = [f for f in u2 if narrowmatch(f)]
257 262 return u1, u2
258 263
259 264 def _makegetfctx(ctx):
260 265 """return a 'getfctx' function suitable for _checkcopies usage
261 266
262 267 We have to re-setup the function building 'filectx' for each
263 268 '_checkcopies' to ensure the linkrev adjustment is properly setup for
264 269 each. Linkrev adjustment is important to avoid bug in rename
265 270 detection. Moreover, having a proper '_ancestrycontext' setup ensures
266 271 the performance impact of this adjustment is kept limited. Without it,
267 272 each file could do a full dag traversal making the time complexity of
268 273 the operation explode (see issue4537).
269 274
270 275 This function exists here mostly to limit the impact on stable. Feel
271 276 free to refactor on default.
272 277 """
273 278 rev = ctx.rev()
274 279 repo = ctx._repo
275 280 ac = getattr(ctx, '_ancestrycontext', None)
276 281 if ac is None:
277 282 revs = [rev]
278 283 if rev is None:
279 284 revs = [p.rev() for p in ctx.parents()]
280 285 ac = repo.changelog.ancestors(revs, inclusive=True)
281 286 ctx._ancestrycontext = ac
282 287 def makectx(f, n):
283 288 if n in node.wdirfilenodeids: # in a working context?
284 289 if ctx.rev() is None:
285 290 return ctx.filectx(f)
286 291 return repo[None][f]
287 292 fctx = repo.filectx(f, fileid=n)
288 293 # setup only needed for filectx not create from a changectx
289 294 fctx._ancestrycontext = ac
290 295 fctx._descendantrev = rev
291 296 return fctx
292 297 return util.lrucachefunc(makectx)
293 298
294 299 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
295 300 """combine partial copy paths"""
296 301 remainder = {}
297 302 for f in copyfrom:
298 303 if f in copyto:
299 304 finalcopy[copyto[f]] = copyfrom[f]
300 305 del copyto[f]
301 306 for f in incompletediverge:
302 307 assert f not in diverge
303 308 ic = incompletediverge[f]
304 309 if ic[0] in copyto:
305 310 diverge[f] = [copyto[ic[0]], ic[1]]
306 311 else:
307 312 remainder[f] = ic
308 313 return remainder
309 314
310 315 def mergecopies(repo, c1, c2, base):
311 316 """
312 317 The function calling different copytracing algorithms on the basis of config
313 318 which find moves and copies between context c1 and c2 that are relevant for
314 319 merging. 'base' will be used as the merge base.
315 320
316 321 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
317 322 files that were moved/ copied in one merge parent and modified in another.
318 323 For example:
319 324
320 325 o ---> 4 another commit
321 326 |
322 327 | o ---> 3 commit that modifies a.txt
323 328 | /
324 329 o / ---> 2 commit that moves a.txt to b.txt
325 330 |/
326 331 o ---> 1 merge base
327 332
328 333 If we try to rebase revision 3 on revision 4, since there is no a.txt in
329 334 revision 4, and if user have copytrace disabled, we prints the following
330 335 message:
331 336
332 337 ```other changed <file> which local deleted```
333 338
334 339 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
335 340 "dirmove".
336 341
337 342 "copy" is a mapping from destination name -> source name,
338 343 where source is in c1 and destination is in c2 or vice-versa.
339 344
340 345 "movewithdir" is a mapping from source name -> destination name,
341 346 where the file at source present in one context but not the other
342 347 needs to be moved to destination by the merge process, because the
343 348 other context moved the directory it is in.
344 349
345 350 "diverge" is a mapping of source name -> list of destination names
346 351 for divergent renames.
347 352
348 353 "renamedelete" is a mapping of source name -> list of destination
349 354 names for files deleted in c1 that were renamed in c2 or vice-versa.
350 355
351 356 "dirmove" is a mapping of detected source dir -> destination dir renames.
352 357 This is needed for handling changes to new files previously grafted into
353 358 renamed directories.
354 359 """
355 360 # avoid silly behavior for update from empty dir
356 361 if not c1 or not c2 or c1 == c2:
357 362 return {}, {}, {}, {}, {}
358 363
359 364 # avoid silly behavior for parent -> working dir
360 365 if c2.node() is None and c1.node() == repo.dirstate.p1():
361 366 return repo.dirstate.copies(), {}, {}, {}, {}
362 367
363 368 copytracing = repo.ui.config('experimental', 'copytrace')
364 369
365 370 # Copy trace disabling is explicitly below the node == p1 logic above
366 371 # because the logic above is required for a simple copy to be kept across a
367 372 # rebase.
368 373 if copytracing == 'off':
369 374 return {}, {}, {}, {}, {}
370 375 elif copytracing == 'heuristics':
371 376 # Do full copytracing if only non-public revisions are involved as
372 377 # that will be fast enough and will also cover the copies which could
373 378 # be missed by heuristics
374 379 if _isfullcopytraceable(repo, c1, base):
375 380 return _fullcopytracing(repo, c1, c2, base)
376 381 return _heuristicscopytracing(repo, c1, c2, base)
377 382 else:
378 383 return _fullcopytracing(repo, c1, c2, base)
379 384
380 385 def _isfullcopytraceable(repo, c1, base):
381 386 """ Checks that if base, source and destination are all no-public branches,
382 387 if yes let's use the full copytrace algorithm for increased capabilities
383 388 since it will be fast enough.
384 389
385 390 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
386 391 number of changesets from c1 to base such that if number of changesets are
387 392 more than the limit, full copytracing algorithm won't be used.
388 393 """
389 394 if c1.rev() is None:
390 395 c1 = c1.p1()
391 396 if c1.mutable() and base.mutable():
392 397 sourcecommitlimit = repo.ui.configint('experimental',
393 398 'copytrace.sourcecommitlimit')
394 399 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
395 400 return commits < sourcecommitlimit
396 401 return False
397 402
398 403 def _fullcopytracing(repo, c1, c2, base):
399 404 """ The full copytracing algorithm which finds all the new files that were
400 405 added from merge base up to the top commit and for each file it checks if
401 406 this file was copied from another file.
402 407
403 408 This is pretty slow when a lot of changesets are involved but will track all
404 409 the copies.
405 410 """
406 411 # In certain scenarios (e.g. graft, update or rebase), base can be
407 412 # overridden We still need to know a real common ancestor in this case We
408 413 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
409 414 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
410 415 # caller may not know if the revision passed in lieu of the CA is a genuine
411 416 # common ancestor or not without explicitly checking it, it's better to
412 417 # determine that here.
413 418 #
414 419 # base.descendant(wc) and base.descendant(base) are False, work around that
415 420 _c1 = c1.p1() if c1.rev() is None else c1
416 421 _c2 = c2.p1() if c2.rev() is None else c2
417 422 # an endpoint is "dirty" if it isn't a descendant of the merge base
418 423 # if we have a dirty endpoint, we need to trigger graft logic, and also
419 424 # keep track of which endpoint is dirty
420 425 dirtyc1 = not (base == _c1 or base.descendant(_c1))
421 426 dirtyc2 = not (base == _c2 or base.descendant(_c2))
422 427 graft = dirtyc1 or dirtyc2
423 428 tca = base
424 429 if graft:
425 430 tca = _c1.ancestor(_c2)
426 431
427 432 limit = _findlimit(repo, c1.rev(), c2.rev())
428 433 if limit is None:
429 434 # no common ancestor, no copies
430 435 return {}, {}, {}, {}, {}
431 436 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
432 437
433 438 m1 = c1.manifest()
434 439 m2 = c2.manifest()
435 440 mb = base.manifest()
436 441
437 442 # gather data from _checkcopies:
438 443 # - diverge = record all diverges in this dict
439 444 # - copy = record all non-divergent copies in this dict
440 445 # - fullcopy = record all copies in this dict
441 446 # - incomplete = record non-divergent partial copies here
442 447 # - incompletediverge = record divergent partial copies here
443 448 diverge = {} # divergence data is shared
444 449 incompletediverge = {}
445 450 data1 = {'copy': {},
446 451 'fullcopy': {},
447 452 'incomplete': {},
448 453 'diverge': diverge,
449 454 'incompletediverge': incompletediverge,
450 455 }
451 456 data2 = {'copy': {},
452 457 'fullcopy': {},
453 458 'incomplete': {},
454 459 'diverge': diverge,
455 460 'incompletediverge': incompletediverge,
456 461 }
457 462
458 463 # find interesting file sets from manifests
459 464 addedinm1 = m1.filesnotin(mb)
460 465 addedinm2 = m2.filesnotin(mb)
461 466 bothnew = sorted(addedinm1 & addedinm2)
462 467 if tca == base:
463 468 # unmatched file from base
464 469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
465 470 u1u, u2u = u1r, u2r
466 471 else:
467 472 # unmatched file from base (DAG rotation in the graft case)
468 473 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
469 474 baselabel='base')
470 475 # unmatched file from topological common ancestors (no DAG rotation)
471 476 # need to recompute this for directory move handling when grafting
472 477 mta = tca.manifest()
473 478 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
474 479 m2.filesnotin(mta),
475 480 baselabel='topological common ancestor')
476 481
477 482 for f in u1u:
478 483 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
479 484
480 485 for f in u2u:
481 486 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
482 487
483 488 copy = dict(data1['copy'])
484 489 copy.update(data2['copy'])
485 490 fullcopy = dict(data1['fullcopy'])
486 491 fullcopy.update(data2['fullcopy'])
487 492
488 493 if dirtyc1:
489 494 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
490 495 incompletediverge)
491 496 else:
492 497 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
493 498 incompletediverge)
494 499
495 500 renamedelete = {}
496 501 renamedeleteset = set()
497 502 divergeset = set()
498 503 for of, fl in list(diverge.items()):
499 504 if len(fl) == 1 or of in c1 or of in c2:
500 505 del diverge[of] # not actually divergent, or not a rename
501 506 if of not in c1 and of not in c2:
502 507 # renamed on one side, deleted on the other side, but filter
503 508 # out files that have been renamed and then deleted
504 509 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
505 510 renamedeleteset.update(fl) # reverse map for below
506 511 else:
507 512 divergeset.update(fl) # reverse map for below
508 513
509 514 if bothnew:
510 515 repo.ui.debug(" unmatched files new in both:\n %s\n"
511 516 % "\n ".join(bothnew))
512 517 bothdiverge = {}
513 518 bothincompletediverge = {}
514 519 remainder = {}
515 520 both1 = {'copy': {},
516 521 'fullcopy': {},
517 522 'incomplete': {},
518 523 'diverge': bothdiverge,
519 524 'incompletediverge': bothincompletediverge
520 525 }
521 526 both2 = {'copy': {},
522 527 'fullcopy': {},
523 528 'incomplete': {},
524 529 'diverge': bothdiverge,
525 530 'incompletediverge': bothincompletediverge
526 531 }
527 532 for f in bothnew:
528 533 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
529 534 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
530 535 if dirtyc1:
531 536 # incomplete copies may only be found on the "dirty" side for bothnew
532 537 assert not both2['incomplete']
533 538 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
534 539 bothincompletediverge)
535 540 elif dirtyc2:
536 541 assert not both1['incomplete']
537 542 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
538 543 bothincompletediverge)
539 544 else:
540 545 # incomplete copies and divergences can't happen outside grafts
541 546 assert not both1['incomplete']
542 547 assert not both2['incomplete']
543 548 assert not bothincompletediverge
544 549 for f in remainder:
545 550 assert f not in bothdiverge
546 551 ic = remainder[f]
547 552 if ic[0] in (m1 if dirtyc1 else m2):
548 553 # backed-out rename on one side, but watch out for deleted files
549 554 bothdiverge[f] = ic
550 555 for of, fl in bothdiverge.items():
551 556 if len(fl) == 2 and fl[0] == fl[1]:
552 557 copy[fl[0]] = of # not actually divergent, just matching renames
553 558
554 559 if fullcopy and repo.ui.debugflag:
555 560 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
556 561 "% = renamed and deleted):\n")
557 562 for f in sorted(fullcopy):
558 563 note = ""
559 564 if f in copy:
560 565 note += "*"
561 566 if f in divergeset:
562 567 note += "!"
563 568 if f in renamedeleteset:
564 569 note += "%"
565 570 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
566 571 note))
567 572 del divergeset
568 573
569 574 if not fullcopy:
570 575 return copy, {}, diverge, renamedelete, {}
571 576
572 577 repo.ui.debug(" checking for directory renames\n")
573 578
574 579 # generate a directory move map
575 580 d1, d2 = c1.dirs(), c2.dirs()
576 581 # Hack for adding '', which is not otherwise added, to d1 and d2
577 582 d1.addpath('/')
578 583 d2.addpath('/')
579 584 invalid = set()
580 585 dirmove = {}
581 586
582 587 # examine each file copy for a potential directory move, which is
583 588 # when all the files in a directory are moved to a new directory
584 589 for dst, src in fullcopy.iteritems():
585 590 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
586 591 if dsrc in invalid:
587 592 # already seen to be uninteresting
588 593 continue
589 594 elif dsrc in d1 and ddst in d1:
590 595 # directory wasn't entirely moved locally
591 596 invalid.add(dsrc + "/")
592 597 elif dsrc in d2 and ddst in d2:
593 598 # directory wasn't entirely moved remotely
594 599 invalid.add(dsrc + "/")
595 600 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
596 601 # files from the same directory moved to two different places
597 602 invalid.add(dsrc + "/")
598 603 else:
599 604 # looks good so far
600 605 dirmove[dsrc + "/"] = ddst + "/"
601 606
602 607 for i in invalid:
603 608 if i in dirmove:
604 609 del dirmove[i]
605 610 del d1, d2, invalid
606 611
607 612 if not dirmove:
608 613 return copy, {}, diverge, renamedelete, {}
609 614
610 615 for d in dirmove:
611 616 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
612 617 (d, dirmove[d]))
613 618
614 619 movewithdir = {}
615 620 # check unaccounted nonoverlapping files against directory moves
616 621 for f in u1r + u2r:
617 622 if f not in fullcopy:
618 623 for d in dirmove:
619 624 if f.startswith(d):
620 625 # new file added in a directory that was moved, move it
621 626 df = dirmove[d] + f[len(d):]
622 627 if df not in copy:
623 628 movewithdir[f] = df
624 629 repo.ui.debug((" pending file src: '%s' -> "
625 630 "dst: '%s'\n") % (f, df))
626 631 break
627 632
628 633 return copy, movewithdir, diverge, renamedelete, dirmove
629 634
630 635 def _heuristicscopytracing(repo, c1, c2, base):
631 636 """ Fast copytracing using filename heuristics
632 637
633 638 Assumes that moves or renames are of following two types:
634 639
635 640 1) Inside a directory only (same directory name but different filenames)
636 641 2) Move from one directory to another
637 642 (same filenames but different directory names)
638 643
639 644 Works only when there are no merge commits in the "source branch".
640 645 Source branch is commits from base up to c2 not including base.
641 646
642 647 If merge is involved it fallbacks to _fullcopytracing().
643 648
644 649 Can be used by setting the following config:
645 650
646 651 [experimental]
647 652 copytrace = heuristics
648 653
649 654 In some cases the copy/move candidates found by heuristics can be very large
650 655 in number and that will make the algorithm slow. The number of possible
651 656 candidates to check can be limited by using the config
652 657 `experimental.copytrace.movecandidateslimit` which defaults to 100.
653 658 """
654 659
655 660 if c1.rev() is None:
656 661 c1 = c1.p1()
657 662 if c2.rev() is None:
658 663 c2 = c2.p1()
659 664
660 665 copies = {}
661 666
662 667 changedfiles = set()
663 668 m1 = c1.manifest()
664 669 if not repo.revs('%d::%d', base.rev(), c2.rev()):
665 670 # If base is not in c2 branch, we switch to fullcopytracing
666 671 repo.ui.debug("switching to full copytracing as base is not "
667 672 "an ancestor of c2\n")
668 673 return _fullcopytracing(repo, c1, c2, base)
669 674
670 675 ctx = c2
671 676 while ctx != base:
672 677 if len(ctx.parents()) == 2:
673 678 # To keep things simple let's not handle merges
674 679 repo.ui.debug("switching to full copytracing because of merges\n")
675 680 return _fullcopytracing(repo, c1, c2, base)
676 681 changedfiles.update(ctx.files())
677 682 ctx = ctx.p1()
678 683
679 684 cp = _forwardcopies(base, c2)
680 685 for dst, src in cp.iteritems():
681 686 if src in m1:
682 687 copies[dst] = src
683 688
684 689 # file is missing if it isn't present in the destination, but is present in
685 690 # the base and present in the source.
686 691 # Presence in the base is important to exclude added files, presence in the
687 692 # source is important to exclude removed files.
688 693 filt = lambda f: f not in m1 and f in base and f in c2
689 694 missingfiles = [f for f in changedfiles if filt(f)]
690 695
691 696 if missingfiles:
692 697 basenametofilename = collections.defaultdict(list)
693 698 dirnametofilename = collections.defaultdict(list)
694 699
695 700 for f in m1.filesnotin(base.manifest()):
696 701 basename = os.path.basename(f)
697 702 dirname = os.path.dirname(f)
698 703 basenametofilename[basename].append(f)
699 704 dirnametofilename[dirname].append(f)
700 705
701 706 for f in missingfiles:
702 707 basename = os.path.basename(f)
703 708 dirname = os.path.dirname(f)
704 709 samebasename = basenametofilename[basename]
705 710 samedirname = dirnametofilename[dirname]
706 711 movecandidates = samebasename + samedirname
707 712 # f is guaranteed to be present in c2, that's why
708 713 # c2.filectx(f) won't fail
709 714 f2 = c2.filectx(f)
710 715 # we can have a lot of candidates which can slow down the heuristics
711 716 # config value to limit the number of candidates moves to check
712 717 maxcandidates = repo.ui.configint('experimental',
713 718 'copytrace.movecandidateslimit')
714 719
715 720 if len(movecandidates) > maxcandidates:
716 721 repo.ui.status(_("skipping copytracing for '%s', more "
717 722 "candidates than the limit: %d\n")
718 723 % (f, len(movecandidates)))
719 724 continue
720 725
721 726 for candidate in movecandidates:
722 727 f1 = c1.filectx(candidate)
723 728 if _related(f1, f2):
724 729 # if there are a few related copies then we'll merge
725 730 # changes into all of them. This matches the behaviour
726 731 # of upstream copytracing
727 732 copies[candidate] = f
728 733
729 734 return copies, {}, {}, {}, {}
730 735
731 736 def _related(f1, f2):
732 737 """return True if f1 and f2 filectx have a common ancestor
733 738
734 739 Walk back to common ancestor to see if the two files originate
735 740 from the same file. Since workingfilectx's rev() is None it messes
736 741 up the integer comparison logic, hence the pre-step check for
737 742 None (f1 and f2 can only be workingfilectx's initially).
738 743 """
739 744
740 745 if f1 == f2:
741 746 return f1 # a match
742 747
743 748 g1, g2 = f1.ancestors(), f2.ancestors()
744 749 try:
745 750 f1r, f2r = f1.linkrev(), f2.linkrev()
746 751
747 752 if f1r is None:
748 753 f1 = next(g1)
749 754 if f2r is None:
750 755 f2 = next(g2)
751 756
752 757 while True:
753 758 f1r, f2r = f1.linkrev(), f2.linkrev()
754 759 if f1r > f2r:
755 760 f1 = next(g1)
756 761 elif f2r > f1r:
757 762 f2 = next(g2)
758 763 else: # f1 and f2 point to files in the same linkrev
759 764 return f1 == f2 # true if they point to the same file
760 765 except StopIteration:
761 766 return False
762 767
763 768 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
764 769 """
765 770 check possible copies of f from msrc to mdst
766 771
767 772 srcctx = starting context for f in msrc
768 773 dstctx = destination context for f in mdst
769 774 f = the filename to check (as in msrc)
770 775 base = the changectx used as a merge base
771 776 tca = topological common ancestor for graft-like scenarios
772 777 remotebase = True if base is outside tca::srcctx, False otherwise
773 778 limit = the rev number to not search beyond
774 779 data = dictionary of dictionary to store copy data. (see mergecopies)
775 780
776 781 note: limit is only an optimization, and provides no guarantee that
777 782 irrelevant revisions will not be visited
778 783 there is no easy way to make this algorithm stop in a guaranteed way
779 784 once it "goes behind a certain revision".
780 785 """
781 786
782 787 msrc = srcctx.manifest()
783 788 mdst = dstctx.manifest()
784 789 mb = base.manifest()
785 790 mta = tca.manifest()
786 791 # Might be true if this call is about finding backward renames,
787 792 # This happens in the case of grafts because the DAG is then rotated.
788 793 # If the file exists in both the base and the source, we are not looking
789 794 # for a rename on the source side, but on the part of the DAG that is
790 795 # traversed backwards.
791 796 #
792 797 # In the case there is both backward and forward renames (before and after
793 798 # the base) this is more complicated as we must detect a divergence.
794 799 # We use 'backwards = False' in that case.
795 800 backwards = not remotebase and base != tca and f in mb
796 801 getsrcfctx = _makegetfctx(srcctx)
797 802 getdstfctx = _makegetfctx(dstctx)
798 803
799 804 if msrc[f] == mb.get(f) and not remotebase:
800 805 # Nothing to merge
801 806 return
802 807
803 808 of = None
804 809 seen = {f}
805 810 for oc in getsrcfctx(f, msrc[f]).ancestors():
806 811 ocr = oc.linkrev()
807 812 of = oc.path()
808 813 if of in seen:
809 814 # check limit late - grab last rename before
810 815 if ocr < limit:
811 816 break
812 817 continue
813 818 seen.add(of)
814 819
815 820 # remember for dir rename detection
816 821 if backwards:
817 822 data['fullcopy'][of] = f # grafting backwards through renames
818 823 else:
819 824 data['fullcopy'][f] = of
820 825 if of not in mdst:
821 826 continue # no match, keep looking
822 827 if mdst[of] == mb.get(of):
823 828 return # no merge needed, quit early
824 829 c2 = getdstfctx(of, mdst[of])
825 830 # c2 might be a plain new file on added on destination side that is
826 831 # unrelated to the droids we are looking for.
827 832 cr = _related(oc, c2)
828 833 if cr and (of == f or of == c2.path()): # non-divergent
829 834 if backwards:
830 835 data['copy'][of] = f
831 836 elif of in mb:
832 837 data['copy'][f] = of
833 838 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
834 839 data['copy'][of] = f
835 840 del data['fullcopy'][f]
836 841 data['fullcopy'][of] = f
837 842 else: # divergence w.r.t. graft CA on one side of topological CA
838 843 for sf in seen:
839 844 if sf in mb:
840 845 assert sf not in data['diverge']
841 846 data['diverge'][sf] = [f, of]
842 847 break
843 848 return
844 849
845 850 if of in mta:
846 851 if backwards or remotebase:
847 852 data['incomplete'][of] = f
848 853 else:
849 854 for sf in seen:
850 855 if sf in mb:
851 856 if tca == base:
852 857 data['diverge'].setdefault(sf, []).append(f)
853 858 else:
854 859 data['incompletediverge'][sf] = [of, f]
855 860 return
856 861
857 862 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
858 863 """reproduce copies from fromrev to rev in the dirstate
859 864
860 865 If skiprev is specified, it's a revision that should be used to
861 866 filter copy records. Any copies that occur between fromrev and
862 867 skiprev will not be duplicated, even if they appear in the set of
863 868 copies between fromrev and rev.
864 869 """
865 870 exclude = {}
866 871 if (skiprev is not None and
867 872 repo.ui.config('experimental', 'copytrace') != 'off'):
868 873 # copytrace='off' skips this line, but not the entire function because
869 874 # the line below is O(size of the repo) during a rebase, while the rest
870 875 # of the function is much faster (and is required for carrying copy
871 876 # metadata across the rebase anyway).
872 877 exclude = pathcopies(repo[fromrev], repo[skiprev])
873 878 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
874 879 # copies.pathcopies returns backward renames, so dst might not
875 880 # actually be in the dirstate
876 881 if dst in exclude:
877 882 continue
878 883 wctx[dst].markcopied(src)
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now