##// END OF EJS Templates
copies: extract function for finding directory renames...
Martin von Zweigbergk -
r44624:45192589 default
parent child Browse files
Show More
@@ -1,1127 +1,1133
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import multiprocessing
11 import multiprocessing
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(prefix, suffix):
67 def _chain(prefix, suffix):
68 """chain two sets of copies 'prefix' and 'suffix'"""
68 """chain two sets of copies 'prefix' and 'suffix'"""
69 result = prefix.copy()
69 result = prefix.copy()
70 for key, value in pycompat.iteritems(suffix):
70 for key, value in pycompat.iteritems(suffix):
71 result[key] = prefix.get(value, value)
71 result[key] = prefix.get(value, value)
72 return result
72 return result
73
73
74
74
75 def _tracefile(fctx, am, basemf):
75 def _tracefile(fctx, am, basemf):
76 """return file context that is the ancestor of fctx present in ancestor
76 """return file context that is the ancestor of fctx present in ancestor
77 manifest am
77 manifest am
78
78
79 Note: we used to try and stop after a given limit, however checking if that
79 Note: we used to try and stop after a given limit, however checking if that
80 limit is reached turned out to be very expensive. we are better off
80 limit is reached turned out to be very expensive. we are better off
81 disabling that feature."""
81 disabling that feature."""
82
82
83 for f in fctx.ancestors():
83 for f in fctx.ancestors():
84 path = f.path()
84 path = f.path()
85 if am.get(path, None) == f.filenode():
85 if am.get(path, None) == f.filenode():
86 return path
86 return path
87 if basemf and basemf.get(path, None) == f.filenode():
87 if basemf and basemf.get(path, None) == f.filenode():
88 return path
88 return path
89
89
90
90
91 def _dirstatecopies(repo, match=None):
91 def _dirstatecopies(repo, match=None):
92 ds = repo.dirstate
92 ds = repo.dirstate
93 c = ds.copies().copy()
93 c = ds.copies().copy()
94 for k in list(c):
94 for k in list(c):
95 if ds[k] not in b'anm' or (match and not match(k)):
95 if ds[k] not in b'anm' or (match and not match(k)):
96 del c[k]
96 del c[k]
97 return c
97 return c
98
98
99
99
100 def _computeforwardmissing(a, b, match=None):
100 def _computeforwardmissing(a, b, match=None):
101 """Computes which files are in b but not a.
101 """Computes which files are in b but not a.
102 This is its own function so extensions can easily wrap this call to see what
102 This is its own function so extensions can easily wrap this call to see what
103 files _forwardcopies is about to process.
103 files _forwardcopies is about to process.
104 """
104 """
105 ma = a.manifest()
105 ma = a.manifest()
106 mb = b.manifest()
106 mb = b.manifest()
107 return mb.filesnotin(ma, match=match)
107 return mb.filesnotin(ma, match=match)
108
108
109
109
110 def usechangesetcentricalgo(repo):
110 def usechangesetcentricalgo(repo):
111 """Checks if we should use changeset-centric copy algorithms"""
111 """Checks if we should use changeset-centric copy algorithms"""
112 if repo.filecopiesmode == b'changeset-sidedata':
112 if repo.filecopiesmode == b'changeset-sidedata':
113 return True
113 return True
114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
115 changesetsource = (b'changeset-only', b'compatibility')
115 changesetsource = (b'changeset-only', b'compatibility')
116 return readfrom in changesetsource
116 return readfrom in changesetsource
117
117
118
118
119 def _committedforwardcopies(a, b, base, match):
119 def _committedforwardcopies(a, b, base, match):
120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
121 # files might have to be traced back to the fctx parent of the last
121 # files might have to be traced back to the fctx parent of the last
122 # one-side-only changeset, but not further back than that
122 # one-side-only changeset, but not further back than that
123 repo = a._repo
123 repo = a._repo
124
124
125 if usechangesetcentricalgo(repo):
125 if usechangesetcentricalgo(repo):
126 return _changesetforwardcopies(a, b, match)
126 return _changesetforwardcopies(a, b, match)
127
127
128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
129 dbg = repo.ui.debug
129 dbg = repo.ui.debug
130 if debug:
130 if debug:
131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
132 am = a.manifest()
132 am = a.manifest()
133 basemf = None if base is None else base.manifest()
133 basemf = None if base is None else base.manifest()
134
134
135 # find where new files came from
135 # find where new files came from
136 # we currently don't try to find where old files went, too expensive
136 # we currently don't try to find where old files went, too expensive
137 # this means we can miss a case like 'hg rm b; hg cp a b'
137 # this means we can miss a case like 'hg rm b; hg cp a b'
138 cm = {}
138 cm = {}
139
139
140 # Computing the forward missing is quite expensive on large manifests, since
140 # Computing the forward missing is quite expensive on large manifests, since
141 # it compares the entire manifests. We can optimize it in the common use
141 # it compares the entire manifests. We can optimize it in the common use
142 # case of computing what copies are in a commit versus its parent (like
142 # case of computing what copies are in a commit versus its parent (like
143 # during a rebase or histedit). Note, we exclude merge commits from this
143 # during a rebase or histedit). Note, we exclude merge commits from this
144 # optimization, since the ctx.files() for a merge commit is not correct for
144 # optimization, since the ctx.files() for a merge commit is not correct for
145 # this comparison.
145 # this comparison.
146 forwardmissingmatch = match
146 forwardmissingmatch = match
147 if b.p1() == a and b.p2().node() == node.nullid:
147 if b.p1() == a and b.p2().node() == node.nullid:
148 filesmatcher = matchmod.exact(b.files())
148 filesmatcher = matchmod.exact(b.files())
149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
151
151
152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
153
153
154 if debug:
154 if debug:
155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
156
156
157 for f in sorted(missing):
157 for f in sorted(missing):
158 if debug:
158 if debug:
159 dbg(b'debug.copies: tracing file: %s\n' % f)
159 dbg(b'debug.copies: tracing file: %s\n' % f)
160 fctx = b[f]
160 fctx = b[f]
161 fctx._ancestrycontext = ancestrycontext
161 fctx._ancestrycontext = ancestrycontext
162
162
163 if debug:
163 if debug:
164 start = util.timer()
164 start = util.timer()
165 opath = _tracefile(fctx, am, basemf)
165 opath = _tracefile(fctx, am, basemf)
166 if opath:
166 if opath:
167 if debug:
167 if debug:
168 dbg(b'debug.copies: rename of: %s\n' % opath)
168 dbg(b'debug.copies: rename of: %s\n' % opath)
169 cm[f] = opath
169 cm[f] = opath
170 if debug:
170 if debug:
171 dbg(
171 dbg(
172 b'debug.copies: time: %f seconds\n'
172 b'debug.copies: time: %f seconds\n'
173 % (util.timer() - start)
173 % (util.timer() - start)
174 )
174 )
175 return cm
175 return cm
176
176
177
177
178 def _revinfogetter(repo):
178 def _revinfogetter(repo):
179 """return a function that return multiple data given a <rev>"i
179 """return a function that return multiple data given a <rev>"i
180
180
181 * p1: revision number of first parent
181 * p1: revision number of first parent
182 * p2: revision number of first parent
182 * p2: revision number of first parent
183 * p1copies: mapping of copies from p1
183 * p1copies: mapping of copies from p1
184 * p2copies: mapping of copies from p2
184 * p2copies: mapping of copies from p2
185 * removed: a list of removed files
185 * removed: a list of removed files
186 """
186 """
187 cl = repo.changelog
187 cl = repo.changelog
188 parents = cl.parentrevs
188 parents = cl.parentrevs
189
189
190 if repo.filecopiesmode == b'changeset-sidedata':
190 if repo.filecopiesmode == b'changeset-sidedata':
191 changelogrevision = cl.changelogrevision
191 changelogrevision = cl.changelogrevision
192 flags = cl.flags
192 flags = cl.flags
193
193
194 # A small cache to avoid doing the work twice for merges
194 # A small cache to avoid doing the work twice for merges
195 #
195 #
196 # In the vast majority of cases, if we ask information for a revision
196 # In the vast majority of cases, if we ask information for a revision
197 # about 1 parent, we'll later ask it for the other. So it make sense to
197 # about 1 parent, we'll later ask it for the other. So it make sense to
198 # keep the information around when reaching the first parent of a merge
198 # keep the information around when reaching the first parent of a merge
199 # and dropping it after it was provided for the second parents.
199 # and dropping it after it was provided for the second parents.
200 #
200 #
201 # It exists cases were only one parent of the merge will be walked. It
201 # It exists cases were only one parent of the merge will be walked. It
202 # happens when the "destination" the copy tracing is descendant from a
202 # happens when the "destination" the copy tracing is descendant from a
203 # new root, not common with the "source". In that case, we will only walk
203 # new root, not common with the "source". In that case, we will only walk
204 # through merge parents that are descendant of changesets common
204 # through merge parents that are descendant of changesets common
205 # between "source" and "destination".
205 # between "source" and "destination".
206 #
206 #
207 # With the current case implementation if such changesets have a copy
207 # With the current case implementation if such changesets have a copy
208 # information, we'll keep them in memory until the end of
208 # information, we'll keep them in memory until the end of
209 # _changesetforwardcopies. We don't expect the case to be frequent
209 # _changesetforwardcopies. We don't expect the case to be frequent
210 # enough to matters.
210 # enough to matters.
211 #
211 #
212 # In addition, it would be possible to reach pathological case, were
212 # In addition, it would be possible to reach pathological case, were
213 # many first parent are met before any second parent is reached. In
213 # many first parent are met before any second parent is reached. In
214 # that case the cache could grow. If this even become an issue one can
214 # that case the cache could grow. If this even become an issue one can
215 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 # safely introduce a maximum cache size. This would trade extra CPU/IO
216 # time to save memory.
216 # time to save memory.
217 merge_caches = {}
217 merge_caches = {}
218
218
219 def revinfo(rev):
219 def revinfo(rev):
220 p1, p2 = parents(rev)
220 p1, p2 = parents(rev)
221 if flags(rev) & REVIDX_SIDEDATA:
221 if flags(rev) & REVIDX_SIDEDATA:
222 e = merge_caches.pop(rev, None)
222 e = merge_caches.pop(rev, None)
223 if e is not None:
223 if e is not None:
224 return e
224 return e
225 c = changelogrevision(rev)
225 c = changelogrevision(rev)
226 p1copies = c.p1copies
226 p1copies = c.p1copies
227 p2copies = c.p2copies
227 p2copies = c.p2copies
228 removed = c.filesremoved
228 removed = c.filesremoved
229 if p1 != node.nullrev and p2 != node.nullrev:
229 if p1 != node.nullrev and p2 != node.nullrev:
230 # XXX some case we over cache, IGNORE
230 # XXX some case we over cache, IGNORE
231 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
232 else:
232 else:
233 p1copies = {}
233 p1copies = {}
234 p2copies = {}
234 p2copies = {}
235 removed = []
235 removed = []
236 return p1, p2, p1copies, p2copies, removed
236 return p1, p2, p1copies, p2copies, removed
237
237
238 else:
238 else:
239
239
240 def revinfo(rev):
240 def revinfo(rev):
241 p1, p2 = parents(rev)
241 p1, p2 = parents(rev)
242 ctx = repo[rev]
242 ctx = repo[rev]
243 p1copies, p2copies = ctx._copies
243 p1copies, p2copies = ctx._copies
244 removed = ctx.filesremoved()
244 removed = ctx.filesremoved()
245 return p1, p2, p1copies, p2copies, removed
245 return p1, p2, p1copies, p2copies, removed
246
246
247 return revinfo
247 return revinfo
248
248
249
249
250 def _changesetforwardcopies(a, b, match):
250 def _changesetforwardcopies(a, b, match):
251 if a.rev() in (node.nullrev, b.rev()):
251 if a.rev() in (node.nullrev, b.rev()):
252 return {}
252 return {}
253
253
254 repo = a.repo().unfiltered()
254 repo = a.repo().unfiltered()
255 children = {}
255 children = {}
256 revinfo = _revinfogetter(repo)
256 revinfo = _revinfogetter(repo)
257
257
258 cl = repo.changelog
258 cl = repo.changelog
259 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
260 mrset = set(missingrevs)
260 mrset = set(missingrevs)
261 roots = set()
261 roots = set()
262 for r in missingrevs:
262 for r in missingrevs:
263 for p in cl.parentrevs(r):
263 for p in cl.parentrevs(r):
264 if p == node.nullrev:
264 if p == node.nullrev:
265 continue
265 continue
266 if p not in children:
266 if p not in children:
267 children[p] = [r]
267 children[p] = [r]
268 else:
268 else:
269 children[p].append(r)
269 children[p].append(r)
270 if p not in mrset:
270 if p not in mrset:
271 roots.add(p)
271 roots.add(p)
272 if not roots:
272 if not roots:
273 # no common revision to track copies from
273 # no common revision to track copies from
274 return {}
274 return {}
275 min_root = min(roots)
275 min_root = min(roots)
276
276
277 from_head = set(
277 from_head = set(
278 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
279 )
279 )
280
280
281 iterrevs = set(from_head)
281 iterrevs = set(from_head)
282 iterrevs &= mrset
282 iterrevs &= mrset
283 iterrevs.update(roots)
283 iterrevs.update(roots)
284 iterrevs.remove(b.rev())
284 iterrevs.remove(b.rev())
285 revs = sorted(iterrevs)
285 revs = sorted(iterrevs)
286 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
286 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
287
287
288
288
289 def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
289 def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
290 """combine the copies information for each item of iterrevs
290 """combine the copies information for each item of iterrevs
291
291
292 revs: sorted iterable of revision to visit
292 revs: sorted iterable of revision to visit
293 children: a {parent: [children]} mapping.
293 children: a {parent: [children]} mapping.
294 targetrev: the final copies destination revision (not in iterrevs)
294 targetrev: the final copies destination revision (not in iterrevs)
295 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
295 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
296 match: a matcher
296 match: a matcher
297
297
298 It returns the aggregated copies information for `targetrev`.
298 It returns the aggregated copies information for `targetrev`.
299 """
299 """
300 all_copies = {}
300 all_copies = {}
301 alwaysmatch = match.always()
301 alwaysmatch = match.always()
302 for r in revs:
302 for r in revs:
303 copies = all_copies.pop(r, None)
303 copies = all_copies.pop(r, None)
304 if copies is None:
304 if copies is None:
305 # this is a root
305 # this is a root
306 copies = {}
306 copies = {}
307 for i, c in enumerate(children[r]):
307 for i, c in enumerate(children[r]):
308 p1, p2, p1copies, p2copies, removed = revinfo(c)
308 p1, p2, p1copies, p2copies, removed = revinfo(c)
309 if r == p1:
309 if r == p1:
310 parent = 1
310 parent = 1
311 childcopies = p1copies
311 childcopies = p1copies
312 else:
312 else:
313 assert r == p2
313 assert r == p2
314 parent = 2
314 parent = 2
315 childcopies = p2copies
315 childcopies = p2copies
316 if not alwaysmatch:
316 if not alwaysmatch:
317 childcopies = {
317 childcopies = {
318 dst: src for dst, src in childcopies.items() if match(dst)
318 dst: src for dst, src in childcopies.items() if match(dst)
319 }
319 }
320 newcopies = copies
320 newcopies = copies
321 if childcopies:
321 if childcopies:
322 newcopies = _chain(newcopies, childcopies)
322 newcopies = _chain(newcopies, childcopies)
323 # _chain makes a copies, we can avoid doing so in some
323 # _chain makes a copies, we can avoid doing so in some
324 # simple/linear cases.
324 # simple/linear cases.
325 assert newcopies is not copies
325 assert newcopies is not copies
326 for f in removed:
326 for f in removed:
327 if f in newcopies:
327 if f in newcopies:
328 if newcopies is copies:
328 if newcopies is copies:
329 # copy on write to avoid affecting potential other
329 # copy on write to avoid affecting potential other
330 # branches. when there are no other branches, this
330 # branches. when there are no other branches, this
331 # could be avoided.
331 # could be avoided.
332 newcopies = copies.copy()
332 newcopies = copies.copy()
333 del newcopies[f]
333 del newcopies[f]
334 othercopies = all_copies.get(c)
334 othercopies = all_copies.get(c)
335 if othercopies is None:
335 if othercopies is None:
336 all_copies[c] = newcopies
336 all_copies[c] = newcopies
337 else:
337 else:
338 # we are the second parent to work on c, we need to merge our
338 # we are the second parent to work on c, we need to merge our
339 # work with the other.
339 # work with the other.
340 #
340 #
341 # Unlike when copies are stored in the filelog, we consider
341 # Unlike when copies are stored in the filelog, we consider
342 # it a copy even if the destination already existed on the
342 # it a copy even if the destination already existed on the
343 # other branch. It's simply too expensive to check if the
343 # other branch. It's simply too expensive to check if the
344 # file existed in the manifest.
344 # file existed in the manifest.
345 #
345 #
346 # In case of conflict, parent 1 take precedence over parent 2.
346 # In case of conflict, parent 1 take precedence over parent 2.
347 # This is an arbitrary choice made anew when implementing
347 # This is an arbitrary choice made anew when implementing
348 # changeset based copies. It was made without regards with
348 # changeset based copies. It was made without regards with
349 # potential filelog related behavior.
349 # potential filelog related behavior.
350 if parent == 1:
350 if parent == 1:
351 othercopies.update(newcopies)
351 othercopies.update(newcopies)
352 else:
352 else:
353 newcopies.update(othercopies)
353 newcopies.update(othercopies)
354 all_copies[c] = newcopies
354 all_copies[c] = newcopies
355 return all_copies[targetrev]
355 return all_copies[targetrev]
356
356
357
357
358 def _forwardcopies(a, b, base=None, match=None):
358 def _forwardcopies(a, b, base=None, match=None):
359 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
359 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
360
360
361 if base is None:
361 if base is None:
362 base = a
362 base = a
363 match = a.repo().narrowmatch(match)
363 match = a.repo().narrowmatch(match)
364 # check for working copy
364 # check for working copy
365 if b.rev() is None:
365 if b.rev() is None:
366 cm = _committedforwardcopies(a, b.p1(), base, match)
366 cm = _committedforwardcopies(a, b.p1(), base, match)
367 # combine copies from dirstate if necessary
367 # combine copies from dirstate if necessary
368 copies = _chain(cm, _dirstatecopies(b._repo, match))
368 copies = _chain(cm, _dirstatecopies(b._repo, match))
369 else:
369 else:
370 copies = _committedforwardcopies(a, b, base, match)
370 copies = _committedforwardcopies(a, b, base, match)
371 return copies
371 return copies
372
372
373
373
374 def _backwardrenames(a, b, match):
374 def _backwardrenames(a, b, match):
375 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
375 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
376 return {}
376 return {}
377
377
378 # Even though we're not taking copies into account, 1:n rename situations
378 # Even though we're not taking copies into account, 1:n rename situations
379 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
379 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
380 # arbitrarily pick one of the renames.
380 # arbitrarily pick one of the renames.
381 # We don't want to pass in "match" here, since that would filter
381 # We don't want to pass in "match" here, since that would filter
382 # the destination by it. Since we're reversing the copies, we want
382 # the destination by it. Since we're reversing the copies, we want
383 # to filter the source instead.
383 # to filter the source instead.
384 f = _forwardcopies(b, a)
384 f = _forwardcopies(b, a)
385 r = {}
385 r = {}
386 for k, v in sorted(pycompat.iteritems(f)):
386 for k, v in sorted(pycompat.iteritems(f)):
387 if match and not match(v):
387 if match and not match(v):
388 continue
388 continue
389 # remove copies
389 # remove copies
390 if v in a:
390 if v in a:
391 continue
391 continue
392 r[v] = k
392 r[v] = k
393 return r
393 return r
394
394
395
395
396 def pathcopies(x, y, match=None):
396 def pathcopies(x, y, match=None):
397 """find {dst@y: src@x} copy mapping for directed compare"""
397 """find {dst@y: src@x} copy mapping for directed compare"""
398 repo = x._repo
398 repo = x._repo
399 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
399 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
400 if debug:
400 if debug:
401 repo.ui.debug(
401 repo.ui.debug(
402 b'debug.copies: searching copies from %s to %s\n' % (x, y)
402 b'debug.copies: searching copies from %s to %s\n' % (x, y)
403 )
403 )
404 if x == y or not x or not y:
404 if x == y or not x or not y:
405 return {}
405 return {}
406 a = y.ancestor(x)
406 a = y.ancestor(x)
407 if a == x:
407 if a == x:
408 if debug:
408 if debug:
409 repo.ui.debug(b'debug.copies: search mode: forward\n')
409 repo.ui.debug(b'debug.copies: search mode: forward\n')
410 if y.rev() is None and x == y.p1():
410 if y.rev() is None and x == y.p1():
411 # short-circuit to avoid issues with merge states
411 # short-circuit to avoid issues with merge states
412 return _dirstatecopies(repo, match)
412 return _dirstatecopies(repo, match)
413 copies = _forwardcopies(x, y, match=match)
413 copies = _forwardcopies(x, y, match=match)
414 elif a == y:
414 elif a == y:
415 if debug:
415 if debug:
416 repo.ui.debug(b'debug.copies: search mode: backward\n')
416 repo.ui.debug(b'debug.copies: search mode: backward\n')
417 copies = _backwardrenames(x, y, match=match)
417 copies = _backwardrenames(x, y, match=match)
418 else:
418 else:
419 if debug:
419 if debug:
420 repo.ui.debug(b'debug.copies: search mode: combined\n')
420 repo.ui.debug(b'debug.copies: search mode: combined\n')
421 base = None
421 base = None
422 if a.rev() != node.nullrev:
422 if a.rev() != node.nullrev:
423 base = x
423 base = x
424 copies = _chain(
424 copies = _chain(
425 _backwardrenames(x, a, match=match),
425 _backwardrenames(x, a, match=match),
426 _forwardcopies(a, y, base, match=match),
426 _forwardcopies(a, y, base, match=match),
427 )
427 )
428 _filter(x, y, copies)
428 _filter(x, y, copies)
429 return copies
429 return copies
430
430
431
431
432 def mergecopies(repo, c1, c2, base):
432 def mergecopies(repo, c1, c2, base):
433 """
433 """
434 Finds moves and copies between context c1 and c2 that are relevant for
434 Finds moves and copies between context c1 and c2 that are relevant for
435 merging. 'base' will be used as the merge base.
435 merging. 'base' will be used as the merge base.
436
436
437 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
437 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
438 files that were moved/ copied in one merge parent and modified in another.
438 files that were moved/ copied in one merge parent and modified in another.
439 For example:
439 For example:
440
440
441 o ---> 4 another commit
441 o ---> 4 another commit
442 |
442 |
443 | o ---> 3 commit that modifies a.txt
443 | o ---> 3 commit that modifies a.txt
444 | /
444 | /
445 o / ---> 2 commit that moves a.txt to b.txt
445 o / ---> 2 commit that moves a.txt to b.txt
446 |/
446 |/
447 o ---> 1 merge base
447 o ---> 1 merge base
448
448
449 If we try to rebase revision 3 on revision 4, since there is no a.txt in
449 If we try to rebase revision 3 on revision 4, since there is no a.txt in
450 revision 4, and if user have copytrace disabled, we prints the following
450 revision 4, and if user have copytrace disabled, we prints the following
451 message:
451 message:
452
452
453 ```other changed <file> which local deleted```
453 ```other changed <file> which local deleted```
454
454
455 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
455 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
456 "dirmove".
456 "dirmove".
457
457
458 "copy" is a mapping from destination name -> source name,
458 "copy" is a mapping from destination name -> source name,
459 where source is in c1 and destination is in c2 or vice-versa.
459 where source is in c1 and destination is in c2 or vice-versa.
460
460
461 "movewithdir" is a mapping from source name -> destination name,
461 "movewithdir" is a mapping from source name -> destination name,
462 where the file at source present in one context but not the other
462 where the file at source present in one context but not the other
463 needs to be moved to destination by the merge process, because the
463 needs to be moved to destination by the merge process, because the
464 other context moved the directory it is in.
464 other context moved the directory it is in.
465
465
466 "diverge" is a mapping of source name -> list of destination names
466 "diverge" is a mapping of source name -> list of destination names
467 for divergent renames.
467 for divergent renames.
468
468
469 "renamedelete" is a mapping of source name -> list of destination
469 "renamedelete" is a mapping of source name -> list of destination
470 names for files deleted in c1 that were renamed in c2 or vice-versa.
470 names for files deleted in c1 that were renamed in c2 or vice-versa.
471
471
472 "dirmove" is a mapping of detected source dir -> destination dir renames.
472 "dirmove" is a mapping of detected source dir -> destination dir renames.
473 This is needed for handling changes to new files previously grafted into
473 This is needed for handling changes to new files previously grafted into
474 renamed directories.
474 renamed directories.
475
475
476 This function calls different copytracing algorithms based on config.
476 This function calls different copytracing algorithms based on config.
477 """
477 """
478 # avoid silly behavior for update from empty dir
478 # avoid silly behavior for update from empty dir
479 if not c1 or not c2 or c1 == c2:
479 if not c1 or not c2 or c1 == c2:
480 return {}, {}, {}, {}, {}
480 return {}, {}, {}, {}, {}
481
481
482 narrowmatch = c1.repo().narrowmatch()
482 narrowmatch = c1.repo().narrowmatch()
483
483
484 # avoid silly behavior for parent -> working dir
484 # avoid silly behavior for parent -> working dir
485 if c2.node() is None and c1.node() == repo.dirstate.p1():
485 if c2.node() is None and c1.node() == repo.dirstate.p1():
486 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
486 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
487
487
488 copytracing = repo.ui.config(b'experimental', b'copytrace')
488 copytracing = repo.ui.config(b'experimental', b'copytrace')
489 if stringutil.parsebool(copytracing) is False:
489 if stringutil.parsebool(copytracing) is False:
490 # stringutil.parsebool() returns None when it is unable to parse the
490 # stringutil.parsebool() returns None when it is unable to parse the
491 # value, so we should rely on making sure copytracing is on such cases
491 # value, so we should rely on making sure copytracing is on such cases
492 return {}, {}, {}, {}, {}
492 return {}, {}, {}, {}, {}
493
493
494 if usechangesetcentricalgo(repo):
494 if usechangesetcentricalgo(repo):
495 # The heuristics don't make sense when we need changeset-centric algos
495 # The heuristics don't make sense when we need changeset-centric algos
496 return _fullcopytracing(repo, c1, c2, base)
496 return _fullcopytracing(repo, c1, c2, base)
497
497
498 # Copy trace disabling is explicitly below the node == p1 logic above
498 # Copy trace disabling is explicitly below the node == p1 logic above
499 # because the logic above is required for a simple copy to be kept across a
499 # because the logic above is required for a simple copy to be kept across a
500 # rebase.
500 # rebase.
501 if copytracing == b'heuristics':
501 if copytracing == b'heuristics':
502 # Do full copytracing if only non-public revisions are involved as
502 # Do full copytracing if only non-public revisions are involved as
503 # that will be fast enough and will also cover the copies which could
503 # that will be fast enough and will also cover the copies which could
504 # be missed by heuristics
504 # be missed by heuristics
505 if _isfullcopytraceable(repo, c1, base):
505 if _isfullcopytraceable(repo, c1, base):
506 return _fullcopytracing(repo, c1, c2, base)
506 return _fullcopytracing(repo, c1, c2, base)
507 return _heuristicscopytracing(repo, c1, c2, base)
507 return _heuristicscopytracing(repo, c1, c2, base)
508 else:
508 else:
509 return _fullcopytracing(repo, c1, c2, base)
509 return _fullcopytracing(repo, c1, c2, base)
510
510
511
511
512 def _isfullcopytraceable(repo, c1, base):
512 def _isfullcopytraceable(repo, c1, base):
513 """ Checks that if base, source and destination are all no-public branches,
513 """ Checks that if base, source and destination are all no-public branches,
514 if yes let's use the full copytrace algorithm for increased capabilities
514 if yes let's use the full copytrace algorithm for increased capabilities
515 since it will be fast enough.
515 since it will be fast enough.
516
516
517 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
517 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
518 number of changesets from c1 to base such that if number of changesets are
518 number of changesets from c1 to base such that if number of changesets are
519 more than the limit, full copytracing algorithm won't be used.
519 more than the limit, full copytracing algorithm won't be used.
520 """
520 """
521 if c1.rev() is None:
521 if c1.rev() is None:
522 c1 = c1.p1()
522 c1 = c1.p1()
523 if c1.mutable() and base.mutable():
523 if c1.mutable() and base.mutable():
524 sourcecommitlimit = repo.ui.configint(
524 sourcecommitlimit = repo.ui.configint(
525 b'experimental', b'copytrace.sourcecommitlimit'
525 b'experimental', b'copytrace.sourcecommitlimit'
526 )
526 )
527 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
527 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
528 return commits < sourcecommitlimit
528 return commits < sourcecommitlimit
529 return False
529 return False
530
530
531
531
532 def _checksinglesidecopies(
532 def _checksinglesidecopies(
533 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
533 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
534 ):
534 ):
535 if src not in m2:
535 if src not in m2:
536 # deleted on side 2
536 # deleted on side 2
537 if src not in m1:
537 if src not in m1:
538 # renamed on side 1, deleted on side 2
538 # renamed on side 1, deleted on side 2
539 renamedelete[src] = dsts1
539 renamedelete[src] = dsts1
540 elif m2[src] != mb[src]:
540 elif m2[src] != mb[src]:
541 if not _related(c2[src], base[src]):
541 if not _related(c2[src], base[src]):
542 return
542 return
543 # modified on side 2
543 # modified on side 2
544 for dst in dsts1:
544 for dst in dsts1:
545 if dst not in m2:
545 if dst not in m2:
546 # dst not added on side 2 (handle as regular
546 # dst not added on side 2 (handle as regular
547 # "both created" case in manifestmerge otherwise)
547 # "both created" case in manifestmerge otherwise)
548 copy[dst] = src
548 copy[dst] = src
549
549
550
550
551 def _fullcopytracing(repo, c1, c2, base):
551 def _fullcopytracing(repo, c1, c2, base):
552 """ The full copytracing algorithm which finds all the new files that were
552 """ The full copytracing algorithm which finds all the new files that were
553 added from merge base up to the top commit and for each file it checks if
553 added from merge base up to the top commit and for each file it checks if
554 this file was copied from another file.
554 this file was copied from another file.
555
555
556 This is pretty slow when a lot of changesets are involved but will track all
556 This is pretty slow when a lot of changesets are involved but will track all
557 the copies.
557 the copies.
558 """
558 """
559 m1 = c1.manifest()
559 m1 = c1.manifest()
560 m2 = c2.manifest()
560 m2 = c2.manifest()
561 mb = base.manifest()
561 mb = base.manifest()
562
562
563 copies1 = pathcopies(base, c1)
563 copies1 = pathcopies(base, c1)
564 copies2 = pathcopies(base, c2)
564 copies2 = pathcopies(base, c2)
565
565
566 if not (copies1 or copies2):
566 if not (copies1 or copies2):
567 return {}, {}, {}, {}, {}
567 return {}, {}, {}, {}, {}
568
568
569 inversecopies1 = {}
569 inversecopies1 = {}
570 inversecopies2 = {}
570 inversecopies2 = {}
571 for dst, src in copies1.items():
571 for dst, src in copies1.items():
572 inversecopies1.setdefault(src, []).append(dst)
572 inversecopies1.setdefault(src, []).append(dst)
573 for dst, src in copies2.items():
573 for dst, src in copies2.items():
574 inversecopies2.setdefault(src, []).append(dst)
574 inversecopies2.setdefault(src, []).append(dst)
575
575
576 copy = {}
576 copy = {}
577 diverge = {}
577 diverge = {}
578 renamedelete = {}
578 renamedelete = {}
579 allsources = set(inversecopies1) | set(inversecopies2)
579 allsources = set(inversecopies1) | set(inversecopies2)
580 for src in allsources:
580 for src in allsources:
581 dsts1 = inversecopies1.get(src)
581 dsts1 = inversecopies1.get(src)
582 dsts2 = inversecopies2.get(src)
582 dsts2 = inversecopies2.get(src)
583 if dsts1 and dsts2:
583 if dsts1 and dsts2:
584 # copied/renamed on both sides
584 # copied/renamed on both sides
585 if src not in m1 and src not in m2:
585 if src not in m1 and src not in m2:
586 # renamed on both sides
586 # renamed on both sides
587 dsts1 = set(dsts1)
587 dsts1 = set(dsts1)
588 dsts2 = set(dsts2)
588 dsts2 = set(dsts2)
589 # If there's some overlap in the rename destinations, we
589 # If there's some overlap in the rename destinations, we
590 # consider it not divergent. For example, if side 1 copies 'a'
590 # consider it not divergent. For example, if side 1 copies 'a'
591 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
591 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
592 # and 'd' and deletes 'a'.
592 # and 'd' and deletes 'a'.
593 if dsts1 & dsts2:
593 if dsts1 & dsts2:
594 for dst in dsts1 & dsts2:
594 for dst in dsts1 & dsts2:
595 copy[dst] = src
595 copy[dst] = src
596 else:
596 else:
597 diverge[src] = sorted(dsts1 | dsts2)
597 diverge[src] = sorted(dsts1 | dsts2)
598 elif src in m1 and src in m2:
598 elif src in m1 and src in m2:
599 # copied on both sides
599 # copied on both sides
600 dsts1 = set(dsts1)
600 dsts1 = set(dsts1)
601 dsts2 = set(dsts2)
601 dsts2 = set(dsts2)
602 for dst in dsts1 & dsts2:
602 for dst in dsts1 & dsts2:
603 copy[dst] = src
603 copy[dst] = src
604 # TODO: Handle cases where it was renamed on one side and copied
604 # TODO: Handle cases where it was renamed on one side and copied
605 # on the other side
605 # on the other side
606 elif dsts1:
606 elif dsts1:
607 # copied/renamed only on side 1
607 # copied/renamed only on side 1
608 _checksinglesidecopies(
608 _checksinglesidecopies(
609 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
609 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
610 )
610 )
611 elif dsts2:
611 elif dsts2:
612 # copied/renamed only on side 2
612 # copied/renamed only on side 2
613 _checksinglesidecopies(
613 _checksinglesidecopies(
614 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
614 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
615 )
615 )
616
616
617 # find interesting file sets from manifests
617 # find interesting file sets from manifests
618 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
618 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
619 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
619 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
620 u1 = sorted(addedinm1 - addedinm2)
620 u1 = sorted(addedinm1 - addedinm2)
621 u2 = sorted(addedinm2 - addedinm1)
621 u2 = sorted(addedinm2 - addedinm1)
622
622
623 header = b" unmatched files in %s"
623 header = b" unmatched files in %s"
624 if u1:
624 if u1:
625 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
625 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
626 if u2:
626 if u2:
627 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
627 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
628
628
629 fullcopy = copies1.copy()
629 fullcopy = copies1.copy()
630 fullcopy.update(copies2)
630 fullcopy.update(copies2)
631
631
632 if repo.ui.debugflag:
632 if repo.ui.debugflag:
633 renamedeleteset = set()
633 renamedeleteset = set()
634 divergeset = set()
634 divergeset = set()
635 for dsts in diverge.values():
635 for dsts in diverge.values():
636 divergeset.update(dsts)
636 divergeset.update(dsts)
637 for dsts in renamedelete.values():
637 for dsts in renamedelete.values():
638 renamedeleteset.update(dsts)
638 renamedeleteset.update(dsts)
639
639
640 repo.ui.debug(
640 repo.ui.debug(
641 b" all copies found (* = to merge, ! = divergent, "
641 b" all copies found (* = to merge, ! = divergent, "
642 b"% = renamed and deleted):\n"
642 b"% = renamed and deleted):\n"
643 )
643 )
644 for f in sorted(fullcopy):
644 for f in sorted(fullcopy):
645 note = b""
645 note = b""
646 if f in copy:
646 if f in copy:
647 note += b"*"
647 note += b"*"
648 if f in divergeset:
648 if f in divergeset:
649 note += b"!"
649 note += b"!"
650 if f in renamedeleteset:
650 if f in renamedeleteset:
651 note += b"%"
651 note += b"%"
652 repo.ui.debug(
652 repo.ui.debug(
653 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
653 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
654 )
654 )
655 del renamedeleteset
655 del renamedeleteset
656 del divergeset
656 del divergeset
657
657
658 repo.ui.debug(b" checking for directory renames\n")
658 repo.ui.debug(b" checking for directory renames\n")
659
659
660 dirmove, movewithdir = _dir_renames(repo, c1, c2, copy, fullcopy, u1, u2)
661
662 return copy, movewithdir, diverge, renamedelete, dirmove
663
664
665 def _dir_renames(repo, c1, c2, copy, fullcopy, u1, u2):
660 # generate a directory move map
666 # generate a directory move map
661 d1, d2 = c1.dirs(), c2.dirs()
667 d1, d2 = c1.dirs(), c2.dirs()
662 invalid = set()
668 invalid = set()
663 dirmove = {}
669 dirmove = {}
664
670
665 # examine each file copy for a potential directory move, which is
671 # examine each file copy for a potential directory move, which is
666 # when all the files in a directory are moved to a new directory
672 # when all the files in a directory are moved to a new directory
667 for dst, src in pycompat.iteritems(fullcopy):
673 for dst, src in pycompat.iteritems(fullcopy):
668 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
674 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
669 if dsrc in invalid:
675 if dsrc in invalid:
670 # already seen to be uninteresting
676 # already seen to be uninteresting
671 continue
677 continue
672 elif dsrc in d1 and ddst in d1:
678 elif dsrc in d1 and ddst in d1:
673 # directory wasn't entirely moved locally
679 # directory wasn't entirely moved locally
674 invalid.add(dsrc)
680 invalid.add(dsrc)
675 elif dsrc in d2 and ddst in d2:
681 elif dsrc in d2 and ddst in d2:
676 # directory wasn't entirely moved remotely
682 # directory wasn't entirely moved remotely
677 invalid.add(dsrc)
683 invalid.add(dsrc)
678 elif dsrc in dirmove and dirmove[dsrc] != ddst:
684 elif dsrc in dirmove and dirmove[dsrc] != ddst:
679 # files from the same directory moved to two different places
685 # files from the same directory moved to two different places
680 invalid.add(dsrc)
686 invalid.add(dsrc)
681 else:
687 else:
682 # looks good so far
688 # looks good so far
683 dirmove[dsrc] = ddst
689 dirmove[dsrc] = ddst
684
690
685 for i in invalid:
691 for i in invalid:
686 if i in dirmove:
692 if i in dirmove:
687 del dirmove[i]
693 del dirmove[i]
688 del d1, d2, invalid
694 del d1, d2, invalid
689
695
690 if not dirmove:
696 if not dirmove:
691 return copy, {}, diverge, renamedelete, {}
697 return {}, {}
692
698
693 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
699 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
694
700
695 for d in dirmove:
701 for d in dirmove:
696 repo.ui.debug(
702 repo.ui.debug(
697 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
703 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
698 )
704 )
699
705
700 movewithdir = {}
706 movewithdir = {}
701 # check unaccounted nonoverlapping files against directory moves
707 # check unaccounted nonoverlapping files against directory moves
702 for f in u1 + u2:
708 for f in u1 + u2:
703 if f not in fullcopy:
709 if f not in fullcopy:
704 for d in dirmove:
710 for d in dirmove:
705 if f.startswith(d):
711 if f.startswith(d):
706 # new file added in a directory that was moved, move it
712 # new file added in a directory that was moved, move it
707 df = dirmove[d] + f[len(d) :]
713 df = dirmove[d] + f[len(d) :]
708 if df not in copy:
714 if df not in copy:
709 movewithdir[f] = df
715 movewithdir[f] = df
710 repo.ui.debug(
716 repo.ui.debug(
711 b" pending file src: '%s' -> dst: '%s'\n"
717 b" pending file src: '%s' -> dst: '%s'\n"
712 % (f, df)
718 % (f, df)
713 )
719 )
714 break
720 break
715
721
716 return copy, movewithdir, diverge, renamedelete, dirmove
722 return dirmove, movewithdir
717
723
718
724
719 def _heuristicscopytracing(repo, c1, c2, base):
725 def _heuristicscopytracing(repo, c1, c2, base):
720 """ Fast copytracing using filename heuristics
726 """ Fast copytracing using filename heuristics
721
727
722 Assumes that moves or renames are of following two types:
728 Assumes that moves or renames are of following two types:
723
729
724 1) Inside a directory only (same directory name but different filenames)
730 1) Inside a directory only (same directory name but different filenames)
725 2) Move from one directory to another
731 2) Move from one directory to another
726 (same filenames but different directory names)
732 (same filenames but different directory names)
727
733
728 Works only when there are no merge commits in the "source branch".
734 Works only when there are no merge commits in the "source branch".
729 Source branch is commits from base up to c2 not including base.
735 Source branch is commits from base up to c2 not including base.
730
736
731 If merge is involved it fallbacks to _fullcopytracing().
737 If merge is involved it fallbacks to _fullcopytracing().
732
738
733 Can be used by setting the following config:
739 Can be used by setting the following config:
734
740
735 [experimental]
741 [experimental]
736 copytrace = heuristics
742 copytrace = heuristics
737
743
738 In some cases the copy/move candidates found by heuristics can be very large
744 In some cases the copy/move candidates found by heuristics can be very large
739 in number and that will make the algorithm slow. The number of possible
745 in number and that will make the algorithm slow. The number of possible
740 candidates to check can be limited by using the config
746 candidates to check can be limited by using the config
741 `experimental.copytrace.movecandidateslimit` which defaults to 100.
747 `experimental.copytrace.movecandidateslimit` which defaults to 100.
742 """
748 """
743
749
744 if c1.rev() is None:
750 if c1.rev() is None:
745 c1 = c1.p1()
751 c1 = c1.p1()
746 if c2.rev() is None:
752 if c2.rev() is None:
747 c2 = c2.p1()
753 c2 = c2.p1()
748
754
749 copies = {}
755 copies = {}
750
756
751 changedfiles = set()
757 changedfiles = set()
752 m1 = c1.manifest()
758 m1 = c1.manifest()
753 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
759 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
754 # If base is not in c2 branch, we switch to fullcopytracing
760 # If base is not in c2 branch, we switch to fullcopytracing
755 repo.ui.debug(
761 repo.ui.debug(
756 b"switching to full copytracing as base is not "
762 b"switching to full copytracing as base is not "
757 b"an ancestor of c2\n"
763 b"an ancestor of c2\n"
758 )
764 )
759 return _fullcopytracing(repo, c1, c2, base)
765 return _fullcopytracing(repo, c1, c2, base)
760
766
761 ctx = c2
767 ctx = c2
762 while ctx != base:
768 while ctx != base:
763 if len(ctx.parents()) == 2:
769 if len(ctx.parents()) == 2:
764 # To keep things simple let's not handle merges
770 # To keep things simple let's not handle merges
765 repo.ui.debug(b"switching to full copytracing because of merges\n")
771 repo.ui.debug(b"switching to full copytracing because of merges\n")
766 return _fullcopytracing(repo, c1, c2, base)
772 return _fullcopytracing(repo, c1, c2, base)
767 changedfiles.update(ctx.files())
773 changedfiles.update(ctx.files())
768 ctx = ctx.p1()
774 ctx = ctx.p1()
769
775
770 cp = _forwardcopies(base, c2)
776 cp = _forwardcopies(base, c2)
771 for dst, src in pycompat.iteritems(cp):
777 for dst, src in pycompat.iteritems(cp):
772 if src in m1:
778 if src in m1:
773 copies[dst] = src
779 copies[dst] = src
774
780
775 # file is missing if it isn't present in the destination, but is present in
781 # file is missing if it isn't present in the destination, but is present in
776 # the base and present in the source.
782 # the base and present in the source.
777 # Presence in the base is important to exclude added files, presence in the
783 # Presence in the base is important to exclude added files, presence in the
778 # source is important to exclude removed files.
784 # source is important to exclude removed files.
779 filt = lambda f: f not in m1 and f in base and f in c2
785 filt = lambda f: f not in m1 and f in base and f in c2
780 missingfiles = [f for f in changedfiles if filt(f)]
786 missingfiles = [f for f in changedfiles if filt(f)]
781
787
782 if missingfiles:
788 if missingfiles:
783 basenametofilename = collections.defaultdict(list)
789 basenametofilename = collections.defaultdict(list)
784 dirnametofilename = collections.defaultdict(list)
790 dirnametofilename = collections.defaultdict(list)
785
791
786 for f in m1.filesnotin(base.manifest()):
792 for f in m1.filesnotin(base.manifest()):
787 basename = os.path.basename(f)
793 basename = os.path.basename(f)
788 dirname = os.path.dirname(f)
794 dirname = os.path.dirname(f)
789 basenametofilename[basename].append(f)
795 basenametofilename[basename].append(f)
790 dirnametofilename[dirname].append(f)
796 dirnametofilename[dirname].append(f)
791
797
792 for f in missingfiles:
798 for f in missingfiles:
793 basename = os.path.basename(f)
799 basename = os.path.basename(f)
794 dirname = os.path.dirname(f)
800 dirname = os.path.dirname(f)
795 samebasename = basenametofilename[basename]
801 samebasename = basenametofilename[basename]
796 samedirname = dirnametofilename[dirname]
802 samedirname = dirnametofilename[dirname]
797 movecandidates = samebasename + samedirname
803 movecandidates = samebasename + samedirname
798 # f is guaranteed to be present in c2, that's why
804 # f is guaranteed to be present in c2, that's why
799 # c2.filectx(f) won't fail
805 # c2.filectx(f) won't fail
800 f2 = c2.filectx(f)
806 f2 = c2.filectx(f)
801 # we can have a lot of candidates which can slow down the heuristics
807 # we can have a lot of candidates which can slow down the heuristics
802 # config value to limit the number of candidates moves to check
808 # config value to limit the number of candidates moves to check
803 maxcandidates = repo.ui.configint(
809 maxcandidates = repo.ui.configint(
804 b'experimental', b'copytrace.movecandidateslimit'
810 b'experimental', b'copytrace.movecandidateslimit'
805 )
811 )
806
812
807 if len(movecandidates) > maxcandidates:
813 if len(movecandidates) > maxcandidates:
808 repo.ui.status(
814 repo.ui.status(
809 _(
815 _(
810 b"skipping copytracing for '%s', more "
816 b"skipping copytracing for '%s', more "
811 b"candidates than the limit: %d\n"
817 b"candidates than the limit: %d\n"
812 )
818 )
813 % (f, len(movecandidates))
819 % (f, len(movecandidates))
814 )
820 )
815 continue
821 continue
816
822
817 for candidate in movecandidates:
823 for candidate in movecandidates:
818 f1 = c1.filectx(candidate)
824 f1 = c1.filectx(candidate)
819 if _related(f1, f2):
825 if _related(f1, f2):
820 # if there are a few related copies then we'll merge
826 # if there are a few related copies then we'll merge
821 # changes into all of them. This matches the behaviour
827 # changes into all of them. This matches the behaviour
822 # of upstream copytracing
828 # of upstream copytracing
823 copies[candidate] = f
829 copies[candidate] = f
824
830
825 return copies, {}, {}, {}, {}
831 return copies, {}, {}, {}, {}
826
832
827
833
828 def _related(f1, f2):
834 def _related(f1, f2):
829 """return True if f1 and f2 filectx have a common ancestor
835 """return True if f1 and f2 filectx have a common ancestor
830
836
831 Walk back to common ancestor to see if the two files originate
837 Walk back to common ancestor to see if the two files originate
832 from the same file. Since workingfilectx's rev() is None it messes
838 from the same file. Since workingfilectx's rev() is None it messes
833 up the integer comparison logic, hence the pre-step check for
839 up the integer comparison logic, hence the pre-step check for
834 None (f1 and f2 can only be workingfilectx's initially).
840 None (f1 and f2 can only be workingfilectx's initially).
835 """
841 """
836
842
837 if f1 == f2:
843 if f1 == f2:
838 return True # a match
844 return True # a match
839
845
840 g1, g2 = f1.ancestors(), f2.ancestors()
846 g1, g2 = f1.ancestors(), f2.ancestors()
841 try:
847 try:
842 f1r, f2r = f1.linkrev(), f2.linkrev()
848 f1r, f2r = f1.linkrev(), f2.linkrev()
843
849
844 if f1r is None:
850 if f1r is None:
845 f1 = next(g1)
851 f1 = next(g1)
846 if f2r is None:
852 if f2r is None:
847 f2 = next(g2)
853 f2 = next(g2)
848
854
849 while True:
855 while True:
850 f1r, f2r = f1.linkrev(), f2.linkrev()
856 f1r, f2r = f1.linkrev(), f2.linkrev()
851 if f1r > f2r:
857 if f1r > f2r:
852 f1 = next(g1)
858 f1 = next(g1)
853 elif f2r > f1r:
859 elif f2r > f1r:
854 f2 = next(g2)
860 f2 = next(g2)
855 else: # f1 and f2 point to files in the same linkrev
861 else: # f1 and f2 point to files in the same linkrev
856 return f1 == f2 # true if they point to the same file
862 return f1 == f2 # true if they point to the same file
857 except StopIteration:
863 except StopIteration:
858 return False
864 return False
859
865
860
866
861 def graftcopies(wctx, ctx, base):
867 def graftcopies(wctx, ctx, base):
862 """reproduce copies between base and ctx in the wctx
868 """reproduce copies between base and ctx in the wctx
863
869
864 Unlike mergecopies(), this function will only consider copies between base
870 Unlike mergecopies(), this function will only consider copies between base
865 and ctx; it will ignore copies between base and wctx. Also unlike
871 and ctx; it will ignore copies between base and wctx. Also unlike
866 mergecopies(), this function will apply copies to the working copy (instead
872 mergecopies(), this function will apply copies to the working copy (instead
867 of just returning information about the copies). That makes it cheaper
873 of just returning information about the copies). That makes it cheaper
868 (especially in the common case of base==ctx.p1()) and useful also when
874 (especially in the common case of base==ctx.p1()) and useful also when
869 experimental.copytrace=off.
875 experimental.copytrace=off.
870
876
871 merge.update() will have already marked most copies, but it will only
877 merge.update() will have already marked most copies, but it will only
872 mark copies if it thinks the source files are related (see
878 mark copies if it thinks the source files are related (see
873 merge._related()). It will also not mark copies if the file wasn't modified
879 merge._related()). It will also not mark copies if the file wasn't modified
874 on the local side. This function adds the copies that were "missed"
880 on the local side. This function adds the copies that were "missed"
875 by merge.update().
881 by merge.update().
876 """
882 """
877 new_copies = pathcopies(base, ctx)
883 new_copies = pathcopies(base, ctx)
878 _filter(wctx.p1(), wctx, new_copies)
884 _filter(wctx.p1(), wctx, new_copies)
879 for dst, src in pycompat.iteritems(new_copies):
885 for dst, src in pycompat.iteritems(new_copies):
880 wctx[dst].markcopied(src)
886 wctx[dst].markcopied(src)
881
887
882
888
883 def computechangesetfilesadded(ctx):
889 def computechangesetfilesadded(ctx):
884 """return the list of files added in a changeset
890 """return the list of files added in a changeset
885 """
891 """
886 added = []
892 added = []
887 for f in ctx.files():
893 for f in ctx.files():
888 if not any(f in p for p in ctx.parents()):
894 if not any(f in p for p in ctx.parents()):
889 added.append(f)
895 added.append(f)
890 return added
896 return added
891
897
892
898
893 def computechangesetfilesremoved(ctx):
899 def computechangesetfilesremoved(ctx):
894 """return the list of files removed in a changeset
900 """return the list of files removed in a changeset
895 """
901 """
896 removed = []
902 removed = []
897 for f in ctx.files():
903 for f in ctx.files():
898 if f not in ctx:
904 if f not in ctx:
899 removed.append(f)
905 removed.append(f)
900 return removed
906 return removed
901
907
902
908
903 def computechangesetcopies(ctx):
909 def computechangesetcopies(ctx):
904 """return the copies data for a changeset
910 """return the copies data for a changeset
905
911
906 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
912 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
907
913
908 Each dictionnary are in the form: `{newname: oldname}`
914 Each dictionnary are in the form: `{newname: oldname}`
909 """
915 """
910 p1copies = {}
916 p1copies = {}
911 p2copies = {}
917 p2copies = {}
912 p1 = ctx.p1()
918 p1 = ctx.p1()
913 p2 = ctx.p2()
919 p2 = ctx.p2()
914 narrowmatch = ctx._repo.narrowmatch()
920 narrowmatch = ctx._repo.narrowmatch()
915 for dst in ctx.files():
921 for dst in ctx.files():
916 if not narrowmatch(dst) or dst not in ctx:
922 if not narrowmatch(dst) or dst not in ctx:
917 continue
923 continue
918 copied = ctx[dst].renamed()
924 copied = ctx[dst].renamed()
919 if not copied:
925 if not copied:
920 continue
926 continue
921 src, srcnode = copied
927 src, srcnode = copied
922 if src in p1 and p1[src].filenode() == srcnode:
928 if src in p1 and p1[src].filenode() == srcnode:
923 p1copies[dst] = src
929 p1copies[dst] = src
924 elif src in p2 and p2[src].filenode() == srcnode:
930 elif src in p2 and p2[src].filenode() == srcnode:
925 p2copies[dst] = src
931 p2copies[dst] = src
926 return p1copies, p2copies
932 return p1copies, p2copies
927
933
928
934
929 def encodecopies(files, copies):
935 def encodecopies(files, copies):
930 items = []
936 items = []
931 for i, dst in enumerate(files):
937 for i, dst in enumerate(files):
932 if dst in copies:
938 if dst in copies:
933 items.append(b'%d\0%s' % (i, copies[dst]))
939 items.append(b'%d\0%s' % (i, copies[dst]))
934 if len(items) != len(copies):
940 if len(items) != len(copies):
935 raise error.ProgrammingError(
941 raise error.ProgrammingError(
936 b'some copy targets missing from file list'
942 b'some copy targets missing from file list'
937 )
943 )
938 return b"\n".join(items)
944 return b"\n".join(items)
939
945
940
946
941 def decodecopies(files, data):
947 def decodecopies(files, data):
942 try:
948 try:
943 copies = {}
949 copies = {}
944 if not data:
950 if not data:
945 return copies
951 return copies
946 for l in data.split(b'\n'):
952 for l in data.split(b'\n'):
947 strindex, src = l.split(b'\0')
953 strindex, src = l.split(b'\0')
948 i = int(strindex)
954 i = int(strindex)
949 dst = files[i]
955 dst = files[i]
950 copies[dst] = src
956 copies[dst] = src
951 return copies
957 return copies
952 except (ValueError, IndexError):
958 except (ValueError, IndexError):
953 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
959 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
954 # used different syntax for the value.
960 # used different syntax for the value.
955 return None
961 return None
956
962
957
963
958 def encodefileindices(files, subset):
964 def encodefileindices(files, subset):
959 subset = set(subset)
965 subset = set(subset)
960 indices = []
966 indices = []
961 for i, f in enumerate(files):
967 for i, f in enumerate(files):
962 if f in subset:
968 if f in subset:
963 indices.append(b'%d' % i)
969 indices.append(b'%d' % i)
964 return b'\n'.join(indices)
970 return b'\n'.join(indices)
965
971
966
972
967 def decodefileindices(files, data):
973 def decodefileindices(files, data):
968 try:
974 try:
969 subset = []
975 subset = []
970 if not data:
976 if not data:
971 return subset
977 return subset
972 for strindex in data.split(b'\n'):
978 for strindex in data.split(b'\n'):
973 i = int(strindex)
979 i = int(strindex)
974 if i < 0 or i >= len(files):
980 if i < 0 or i >= len(files):
975 return None
981 return None
976 subset.append(files[i])
982 subset.append(files[i])
977 return subset
983 return subset
978 except (ValueError, IndexError):
984 except (ValueError, IndexError):
979 # Perhaps someone had chosen the same key name (e.g. "added") and
985 # Perhaps someone had chosen the same key name (e.g. "added") and
980 # used different syntax for the value.
986 # used different syntax for the value.
981 return None
987 return None
982
988
983
989
984 def _getsidedata(srcrepo, rev):
990 def _getsidedata(srcrepo, rev):
985 ctx = srcrepo[rev]
991 ctx = srcrepo[rev]
986 filescopies = computechangesetcopies(ctx)
992 filescopies = computechangesetcopies(ctx)
987 filesadded = computechangesetfilesadded(ctx)
993 filesadded = computechangesetfilesadded(ctx)
988 filesremoved = computechangesetfilesremoved(ctx)
994 filesremoved = computechangesetfilesremoved(ctx)
989 sidedata = {}
995 sidedata = {}
990 if any([filescopies, filesadded, filesremoved]):
996 if any([filescopies, filesadded, filesremoved]):
991 sortedfiles = sorted(ctx.files())
997 sortedfiles = sorted(ctx.files())
992 p1copies, p2copies = filescopies
998 p1copies, p2copies = filescopies
993 p1copies = encodecopies(sortedfiles, p1copies)
999 p1copies = encodecopies(sortedfiles, p1copies)
994 p2copies = encodecopies(sortedfiles, p2copies)
1000 p2copies = encodecopies(sortedfiles, p2copies)
995 filesadded = encodefileindices(sortedfiles, filesadded)
1001 filesadded = encodefileindices(sortedfiles, filesadded)
996 filesremoved = encodefileindices(sortedfiles, filesremoved)
1002 filesremoved = encodefileindices(sortedfiles, filesremoved)
997 if p1copies:
1003 if p1copies:
998 sidedata[sidedatamod.SD_P1COPIES] = p1copies
1004 sidedata[sidedatamod.SD_P1COPIES] = p1copies
999 if p2copies:
1005 if p2copies:
1000 sidedata[sidedatamod.SD_P2COPIES] = p2copies
1006 sidedata[sidedatamod.SD_P2COPIES] = p2copies
1001 if filesadded:
1007 if filesadded:
1002 sidedata[sidedatamod.SD_FILESADDED] = filesadded
1008 sidedata[sidedatamod.SD_FILESADDED] = filesadded
1003 if filesremoved:
1009 if filesremoved:
1004 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
1010 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
1005 return sidedata
1011 return sidedata
1006
1012
1007
1013
1008 def getsidedataadder(srcrepo, destrepo):
1014 def getsidedataadder(srcrepo, destrepo):
1009 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
1015 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
1010 if pycompat.iswindows or not use_w:
1016 if pycompat.iswindows or not use_w:
1011 return _get_simple_sidedata_adder(srcrepo, destrepo)
1017 return _get_simple_sidedata_adder(srcrepo, destrepo)
1012 else:
1018 else:
1013 return _get_worker_sidedata_adder(srcrepo, destrepo)
1019 return _get_worker_sidedata_adder(srcrepo, destrepo)
1014
1020
1015
1021
1016 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
1022 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
1017 """The function used by worker precomputing sidedata
1023 """The function used by worker precomputing sidedata
1018
1024
1019 It read an input queue containing revision numbers
1025 It read an input queue containing revision numbers
1020 It write in an output queue containing (rev, <sidedata-map>)
1026 It write in an output queue containing (rev, <sidedata-map>)
1021
1027
1022 The `None` input value is used as a stop signal.
1028 The `None` input value is used as a stop signal.
1023
1029
1024 The `tokens` semaphore is user to avoid having too many unprocessed
1030 The `tokens` semaphore is user to avoid having too many unprocessed
1025 entries. The workers needs to acquire one token before fetching a task.
1031 entries. The workers needs to acquire one token before fetching a task.
1026 They will be released by the consumer of the produced data.
1032 They will be released by the consumer of the produced data.
1027 """
1033 """
1028 tokens.acquire()
1034 tokens.acquire()
1029 rev = revs_queue.get()
1035 rev = revs_queue.get()
1030 while rev is not None:
1036 while rev is not None:
1031 data = _getsidedata(srcrepo, rev)
1037 data = _getsidedata(srcrepo, rev)
1032 sidedata_queue.put((rev, data))
1038 sidedata_queue.put((rev, data))
1033 tokens.acquire()
1039 tokens.acquire()
1034 rev = revs_queue.get()
1040 rev = revs_queue.get()
1035 # processing of `None` is completed, release the token.
1041 # processing of `None` is completed, release the token.
1036 tokens.release()
1042 tokens.release()
1037
1043
1038
1044
1039 BUFF_PER_WORKER = 50
1045 BUFF_PER_WORKER = 50
1040
1046
1041
1047
1042 def _get_worker_sidedata_adder(srcrepo, destrepo):
1048 def _get_worker_sidedata_adder(srcrepo, destrepo):
1043 """The parallel version of the sidedata computation
1049 """The parallel version of the sidedata computation
1044
1050
1045 This code spawn a pool of worker that precompute a buffer of sidedata
1051 This code spawn a pool of worker that precompute a buffer of sidedata
1046 before we actually need them"""
1052 before we actually need them"""
1047 # avoid circular import copies -> scmutil -> worker -> copies
1053 # avoid circular import copies -> scmutil -> worker -> copies
1048 from . import worker
1054 from . import worker
1049
1055
1050 nbworkers = worker._numworkers(srcrepo.ui)
1056 nbworkers = worker._numworkers(srcrepo.ui)
1051
1057
1052 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
1058 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
1053 revsq = multiprocessing.Queue()
1059 revsq = multiprocessing.Queue()
1054 sidedataq = multiprocessing.Queue()
1060 sidedataq = multiprocessing.Queue()
1055
1061
1056 assert srcrepo.filtername is None
1062 assert srcrepo.filtername is None
1057 # queue all tasks beforehand, revision numbers are small and it make
1063 # queue all tasks beforehand, revision numbers are small and it make
1058 # synchronisation simpler
1064 # synchronisation simpler
1059 #
1065 #
1060 # Since the computation for each node can be quite expensive, the overhead
1066 # Since the computation for each node can be quite expensive, the overhead
1061 # of using a single queue is not revelant. In practice, most computation
1067 # of using a single queue is not revelant. In practice, most computation
1062 # are fast but some are very expensive and dominate all the other smaller
1068 # are fast but some are very expensive and dominate all the other smaller
1063 # cost.
1069 # cost.
1064 for r in srcrepo.changelog.revs():
1070 for r in srcrepo.changelog.revs():
1065 revsq.put(r)
1071 revsq.put(r)
1066 # queue the "no more tasks" markers
1072 # queue the "no more tasks" markers
1067 for i in range(nbworkers):
1073 for i in range(nbworkers):
1068 revsq.put(None)
1074 revsq.put(None)
1069
1075
1070 allworkers = []
1076 allworkers = []
1071 for i in range(nbworkers):
1077 for i in range(nbworkers):
1072 args = (srcrepo, revsq, sidedataq, tokens)
1078 args = (srcrepo, revsq, sidedataq, tokens)
1073 w = multiprocessing.Process(target=_sidedata_worker, args=args)
1079 w = multiprocessing.Process(target=_sidedata_worker, args=args)
1074 allworkers.append(w)
1080 allworkers.append(w)
1075 w.start()
1081 w.start()
1076
1082
1077 # dictionnary to store results for revision higher than we one we are
1083 # dictionnary to store results for revision higher than we one we are
1078 # looking for. For example, if we need the sidedatamap for 42, and 43 is
1084 # looking for. For example, if we need the sidedatamap for 42, and 43 is
1079 # received, when shelve 43 for later use.
1085 # received, when shelve 43 for later use.
1080 staging = {}
1086 staging = {}
1081
1087
1082 def sidedata_companion(revlog, rev):
1088 def sidedata_companion(revlog, rev):
1083 sidedata = {}
1089 sidedata = {}
1084 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
1090 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
1085 # Is the data previously shelved ?
1091 # Is the data previously shelved ?
1086 sidedata = staging.pop(rev, None)
1092 sidedata = staging.pop(rev, None)
1087 if sidedata is None:
1093 if sidedata is None:
1088 # look at the queued result until we find the one we are lookig
1094 # look at the queued result until we find the one we are lookig
1089 # for (shelve the other ones)
1095 # for (shelve the other ones)
1090 r, sidedata = sidedataq.get()
1096 r, sidedata = sidedataq.get()
1091 while r != rev:
1097 while r != rev:
1092 staging[r] = sidedata
1098 staging[r] = sidedata
1093 r, sidedata = sidedataq.get()
1099 r, sidedata = sidedataq.get()
1094 tokens.release()
1100 tokens.release()
1095 return False, (), sidedata
1101 return False, (), sidedata
1096
1102
1097 return sidedata_companion
1103 return sidedata_companion
1098
1104
1099
1105
1100 def _get_simple_sidedata_adder(srcrepo, destrepo):
1106 def _get_simple_sidedata_adder(srcrepo, destrepo):
1101 """The simple version of the sidedata computation
1107 """The simple version of the sidedata computation
1102
1108
1103 It just compute it in the same thread on request"""
1109 It just compute it in the same thread on request"""
1104
1110
1105 def sidedatacompanion(revlog, rev):
1111 def sidedatacompanion(revlog, rev):
1106 sidedata = {}
1112 sidedata = {}
1107 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1113 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1108 sidedata = _getsidedata(srcrepo, rev)
1114 sidedata = _getsidedata(srcrepo, rev)
1109 return False, (), sidedata
1115 return False, (), sidedata
1110
1116
1111 return sidedatacompanion
1117 return sidedatacompanion
1112
1118
1113
1119
1114 def getsidedataremover(srcrepo, destrepo):
1120 def getsidedataremover(srcrepo, destrepo):
1115 def sidedatacompanion(revlog, rev):
1121 def sidedatacompanion(revlog, rev):
1116 f = ()
1122 f = ()
1117 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1123 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1118 if revlog.flags(rev) & REVIDX_SIDEDATA:
1124 if revlog.flags(rev) & REVIDX_SIDEDATA:
1119 f = (
1125 f = (
1120 sidedatamod.SD_P1COPIES,
1126 sidedatamod.SD_P1COPIES,
1121 sidedatamod.SD_P2COPIES,
1127 sidedatamod.SD_P2COPIES,
1122 sidedatamod.SD_FILESADDED,
1128 sidedatamod.SD_FILESADDED,
1123 sidedatamod.SD_FILESREMOVED,
1129 sidedatamod.SD_FILESREMOVED,
1124 )
1130 )
1125 return False, f, {}
1131 return False, f, {}
1126
1132
1127 return sidedatacompanion
1133 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now