##// END OF EJS Templates
flags: account for flag change when tracking rename relevant to merge...
marmoute -
r45396:d452acc8 stable
parent child Browse files
Show More
@@ -1,1171 +1,1171 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import multiprocessing
11 import multiprocessing
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(prefix, suffix):
67 def _chain(prefix, suffix):
68 """chain two sets of copies 'prefix' and 'suffix'"""
68 """chain two sets of copies 'prefix' and 'suffix'"""
69 result = prefix.copy()
69 result = prefix.copy()
70 for key, value in pycompat.iteritems(suffix):
70 for key, value in pycompat.iteritems(suffix):
71 result[key] = prefix.get(value, value)
71 result[key] = prefix.get(value, value)
72 return result
72 return result
73
73
74
74
75 def _tracefile(fctx, am, basemf):
75 def _tracefile(fctx, am, basemf):
76 """return file context that is the ancestor of fctx present in ancestor
76 """return file context that is the ancestor of fctx present in ancestor
77 manifest am
77 manifest am
78
78
79 Note: we used to try and stop after a given limit, however checking if that
79 Note: we used to try and stop after a given limit, however checking if that
80 limit is reached turned out to be very expensive. we are better off
80 limit is reached turned out to be very expensive. we are better off
81 disabling that feature."""
81 disabling that feature."""
82
82
83 for f in fctx.ancestors():
83 for f in fctx.ancestors():
84 path = f.path()
84 path = f.path()
85 if am.get(path, None) == f.filenode():
85 if am.get(path, None) == f.filenode():
86 return path
86 return path
87 if basemf and basemf.get(path, None) == f.filenode():
87 if basemf and basemf.get(path, None) == f.filenode():
88 return path
88 return path
89
89
90
90
91 def _dirstatecopies(repo, match=None):
91 def _dirstatecopies(repo, match=None):
92 ds = repo.dirstate
92 ds = repo.dirstate
93 c = ds.copies().copy()
93 c = ds.copies().copy()
94 for k in list(c):
94 for k in list(c):
95 if ds[k] not in b'anm' or (match and not match(k)):
95 if ds[k] not in b'anm' or (match and not match(k)):
96 del c[k]
96 del c[k]
97 return c
97 return c
98
98
99
99
100 def _computeforwardmissing(a, b, match=None):
100 def _computeforwardmissing(a, b, match=None):
101 """Computes which files are in b but not a.
101 """Computes which files are in b but not a.
102 This is its own function so extensions can easily wrap this call to see what
102 This is its own function so extensions can easily wrap this call to see what
103 files _forwardcopies is about to process.
103 files _forwardcopies is about to process.
104 """
104 """
105 ma = a.manifest()
105 ma = a.manifest()
106 mb = b.manifest()
106 mb = b.manifest()
107 return mb.filesnotin(ma, match=match)
107 return mb.filesnotin(ma, match=match)
108
108
109
109
110 def usechangesetcentricalgo(repo):
110 def usechangesetcentricalgo(repo):
111 """Checks if we should use changeset-centric copy algorithms"""
111 """Checks if we should use changeset-centric copy algorithms"""
112 if repo.filecopiesmode == b'changeset-sidedata':
112 if repo.filecopiesmode == b'changeset-sidedata':
113 return True
113 return True
114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
115 changesetsource = (b'changeset-only', b'compatibility')
115 changesetsource = (b'changeset-only', b'compatibility')
116 return readfrom in changesetsource
116 return readfrom in changesetsource
117
117
118
118
119 def _committedforwardcopies(a, b, base, match):
119 def _committedforwardcopies(a, b, base, match):
120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
121 # files might have to be traced back to the fctx parent of the last
121 # files might have to be traced back to the fctx parent of the last
122 # one-side-only changeset, but not further back than that
122 # one-side-only changeset, but not further back than that
123 repo = a._repo
123 repo = a._repo
124
124
125 if usechangesetcentricalgo(repo):
125 if usechangesetcentricalgo(repo):
126 return _changesetforwardcopies(a, b, match)
126 return _changesetforwardcopies(a, b, match)
127
127
128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
129 dbg = repo.ui.debug
129 dbg = repo.ui.debug
130 if debug:
130 if debug:
131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
132 am = a.manifest()
132 am = a.manifest()
133 basemf = None if base is None else base.manifest()
133 basemf = None if base is None else base.manifest()
134
134
135 # find where new files came from
135 # find where new files came from
136 # we currently don't try to find where old files went, too expensive
136 # we currently don't try to find where old files went, too expensive
137 # this means we can miss a case like 'hg rm b; hg cp a b'
137 # this means we can miss a case like 'hg rm b; hg cp a b'
138 cm = {}
138 cm = {}
139
139
140 # Computing the forward missing is quite expensive on large manifests, since
140 # Computing the forward missing is quite expensive on large manifests, since
141 # it compares the entire manifests. We can optimize it in the common use
141 # it compares the entire manifests. We can optimize it in the common use
142 # case of computing what copies are in a commit versus its parent (like
142 # case of computing what copies are in a commit versus its parent (like
143 # during a rebase or histedit). Note, we exclude merge commits from this
143 # during a rebase or histedit). Note, we exclude merge commits from this
144 # optimization, since the ctx.files() for a merge commit is not correct for
144 # optimization, since the ctx.files() for a merge commit is not correct for
145 # this comparison.
145 # this comparison.
146 forwardmissingmatch = match
146 forwardmissingmatch = match
147 if b.p1() == a and b.p2().node() == node.nullid:
147 if b.p1() == a and b.p2().node() == node.nullid:
148 filesmatcher = matchmod.exact(b.files())
148 filesmatcher = matchmod.exact(b.files())
149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
151
151
152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
153
153
154 if debug:
154 if debug:
155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
156
156
157 for f in sorted(missing):
157 for f in sorted(missing):
158 if debug:
158 if debug:
159 dbg(b'debug.copies: tracing file: %s\n' % f)
159 dbg(b'debug.copies: tracing file: %s\n' % f)
160 fctx = b[f]
160 fctx = b[f]
161 fctx._ancestrycontext = ancestrycontext
161 fctx._ancestrycontext = ancestrycontext
162
162
163 if debug:
163 if debug:
164 start = util.timer()
164 start = util.timer()
165 opath = _tracefile(fctx, am, basemf)
165 opath = _tracefile(fctx, am, basemf)
166 if opath:
166 if opath:
167 if debug:
167 if debug:
168 dbg(b'debug.copies: rename of: %s\n' % opath)
168 dbg(b'debug.copies: rename of: %s\n' % opath)
169 cm[f] = opath
169 cm[f] = opath
170 if debug:
170 if debug:
171 dbg(
171 dbg(
172 b'debug.copies: time: %f seconds\n'
172 b'debug.copies: time: %f seconds\n'
173 % (util.timer() - start)
173 % (util.timer() - start)
174 )
174 )
175 return cm
175 return cm
176
176
177
177
178 def _revinfogetter(repo):
178 def _revinfogetter(repo):
179 """return a function that return multiple data given a <rev>"i
179 """return a function that return multiple data given a <rev>"i
180
180
181 * p1: revision number of first parent
181 * p1: revision number of first parent
182 * p2: revision number of first parent
182 * p2: revision number of first parent
183 * p1copies: mapping of copies from p1
183 * p1copies: mapping of copies from p1
184 * p2copies: mapping of copies from p2
184 * p2copies: mapping of copies from p2
185 * removed: a list of removed files
185 * removed: a list of removed files
186 """
186 """
187 cl = repo.changelog
187 cl = repo.changelog
188 parents = cl.parentrevs
188 parents = cl.parentrevs
189
189
190 if repo.filecopiesmode == b'changeset-sidedata':
190 if repo.filecopiesmode == b'changeset-sidedata':
191 changelogrevision = cl.changelogrevision
191 changelogrevision = cl.changelogrevision
192 flags = cl.flags
192 flags = cl.flags
193
193
194 # A small cache to avoid doing the work twice for merges
194 # A small cache to avoid doing the work twice for merges
195 #
195 #
196 # In the vast majority of cases, if we ask information for a revision
196 # In the vast majority of cases, if we ask information for a revision
197 # about 1 parent, we'll later ask it for the other. So it make sense to
197 # about 1 parent, we'll later ask it for the other. So it make sense to
198 # keep the information around when reaching the first parent of a merge
198 # keep the information around when reaching the first parent of a merge
199 # and dropping it after it was provided for the second parents.
199 # and dropping it after it was provided for the second parents.
200 #
200 #
201 # It exists cases were only one parent of the merge will be walked. It
201 # It exists cases were only one parent of the merge will be walked. It
202 # happens when the "destination" the copy tracing is descendant from a
202 # happens when the "destination" the copy tracing is descendant from a
203 # new root, not common with the "source". In that case, we will only walk
203 # new root, not common with the "source". In that case, we will only walk
204 # through merge parents that are descendant of changesets common
204 # through merge parents that are descendant of changesets common
205 # between "source" and "destination".
205 # between "source" and "destination".
206 #
206 #
207 # With the current case implementation if such changesets have a copy
207 # With the current case implementation if such changesets have a copy
208 # information, we'll keep them in memory until the end of
208 # information, we'll keep them in memory until the end of
209 # _changesetforwardcopies. We don't expect the case to be frequent
209 # _changesetforwardcopies. We don't expect the case to be frequent
210 # enough to matters.
210 # enough to matters.
211 #
211 #
212 # In addition, it would be possible to reach pathological case, were
212 # In addition, it would be possible to reach pathological case, were
213 # many first parent are met before any second parent is reached. In
213 # many first parent are met before any second parent is reached. In
214 # that case the cache could grow. If this even become an issue one can
214 # that case the cache could grow. If this even become an issue one can
215 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 # safely introduce a maximum cache size. This would trade extra CPU/IO
216 # time to save memory.
216 # time to save memory.
217 merge_caches = {}
217 merge_caches = {}
218
218
219 def revinfo(rev):
219 def revinfo(rev):
220 p1, p2 = parents(rev)
220 p1, p2 = parents(rev)
221 if flags(rev) & REVIDX_SIDEDATA:
221 if flags(rev) & REVIDX_SIDEDATA:
222 e = merge_caches.pop(rev, None)
222 e = merge_caches.pop(rev, None)
223 if e is not None:
223 if e is not None:
224 return e
224 return e
225 c = changelogrevision(rev)
225 c = changelogrevision(rev)
226 p1copies = c.p1copies
226 p1copies = c.p1copies
227 p2copies = c.p2copies
227 p2copies = c.p2copies
228 removed = c.filesremoved
228 removed = c.filesremoved
229 if p1 != node.nullrev and p2 != node.nullrev:
229 if p1 != node.nullrev and p2 != node.nullrev:
230 # XXX some case we over cache, IGNORE
230 # XXX some case we over cache, IGNORE
231 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
232 else:
232 else:
233 p1copies = {}
233 p1copies = {}
234 p2copies = {}
234 p2copies = {}
235 removed = []
235 removed = []
236 return p1, p2, p1copies, p2copies, removed
236 return p1, p2, p1copies, p2copies, removed
237
237
238 else:
238 else:
239
239
240 def revinfo(rev):
240 def revinfo(rev):
241 p1, p2 = parents(rev)
241 p1, p2 = parents(rev)
242 ctx = repo[rev]
242 ctx = repo[rev]
243 p1copies, p2copies = ctx._copies
243 p1copies, p2copies = ctx._copies
244 removed = ctx.filesremoved()
244 removed = ctx.filesremoved()
245 return p1, p2, p1copies, p2copies, removed
245 return p1, p2, p1copies, p2copies, removed
246
246
247 return revinfo
247 return revinfo
248
248
249
249
250 def _changesetforwardcopies(a, b, match):
250 def _changesetforwardcopies(a, b, match):
251 if a.rev() in (node.nullrev, b.rev()):
251 if a.rev() in (node.nullrev, b.rev()):
252 return {}
252 return {}
253
253
254 repo = a.repo().unfiltered()
254 repo = a.repo().unfiltered()
255 children = {}
255 children = {}
256 revinfo = _revinfogetter(repo)
256 revinfo = _revinfogetter(repo)
257
257
258 cl = repo.changelog
258 cl = repo.changelog
259 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
260 mrset = set(missingrevs)
260 mrset = set(missingrevs)
261 roots = set()
261 roots = set()
262 for r in missingrevs:
262 for r in missingrevs:
263 for p in cl.parentrevs(r):
263 for p in cl.parentrevs(r):
264 if p == node.nullrev:
264 if p == node.nullrev:
265 continue
265 continue
266 if p not in children:
266 if p not in children:
267 children[p] = [r]
267 children[p] = [r]
268 else:
268 else:
269 children[p].append(r)
269 children[p].append(r)
270 if p not in mrset:
270 if p not in mrset:
271 roots.add(p)
271 roots.add(p)
272 if not roots:
272 if not roots:
273 # no common revision to track copies from
273 # no common revision to track copies from
274 return {}
274 return {}
275 min_root = min(roots)
275 min_root = min(roots)
276
276
277 from_head = set(
277 from_head = set(
278 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
279 )
279 )
280
280
281 iterrevs = set(from_head)
281 iterrevs = set(from_head)
282 iterrevs &= mrset
282 iterrevs &= mrset
283 iterrevs.update(roots)
283 iterrevs.update(roots)
284 iterrevs.remove(b.rev())
284 iterrevs.remove(b.rev())
285 revs = sorted(iterrevs)
285 revs = sorted(iterrevs)
286 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
286 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
287
287
288
288
289 def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
289 def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
290 """combine the copies information for each item of iterrevs
290 """combine the copies information for each item of iterrevs
291
291
292 revs: sorted iterable of revision to visit
292 revs: sorted iterable of revision to visit
293 children: a {parent: [children]} mapping.
293 children: a {parent: [children]} mapping.
294 targetrev: the final copies destination revision (not in iterrevs)
294 targetrev: the final copies destination revision (not in iterrevs)
295 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
295 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
296 match: a matcher
296 match: a matcher
297
297
298 It returns the aggregated copies information for `targetrev`.
298 It returns the aggregated copies information for `targetrev`.
299 """
299 """
300 all_copies = {}
300 all_copies = {}
301 alwaysmatch = match.always()
301 alwaysmatch = match.always()
302 for r in revs:
302 for r in revs:
303 copies = all_copies.pop(r, None)
303 copies = all_copies.pop(r, None)
304 if copies is None:
304 if copies is None:
305 # this is a root
305 # this is a root
306 copies = {}
306 copies = {}
307 for i, c in enumerate(children[r]):
307 for i, c in enumerate(children[r]):
308 p1, p2, p1copies, p2copies, removed = revinfo(c)
308 p1, p2, p1copies, p2copies, removed = revinfo(c)
309 if r == p1:
309 if r == p1:
310 parent = 1
310 parent = 1
311 childcopies = p1copies
311 childcopies = p1copies
312 else:
312 else:
313 assert r == p2
313 assert r == p2
314 parent = 2
314 parent = 2
315 childcopies = p2copies
315 childcopies = p2copies
316 if not alwaysmatch:
316 if not alwaysmatch:
317 childcopies = {
317 childcopies = {
318 dst: src for dst, src in childcopies.items() if match(dst)
318 dst: src for dst, src in childcopies.items() if match(dst)
319 }
319 }
320 newcopies = copies
320 newcopies = copies
321 if childcopies:
321 if childcopies:
322 newcopies = _chain(newcopies, childcopies)
322 newcopies = _chain(newcopies, childcopies)
323 # _chain makes a copies, we can avoid doing so in some
323 # _chain makes a copies, we can avoid doing so in some
324 # simple/linear cases.
324 # simple/linear cases.
325 assert newcopies is not copies
325 assert newcopies is not copies
326 for f in removed:
326 for f in removed:
327 if f in newcopies:
327 if f in newcopies:
328 if newcopies is copies:
328 if newcopies is copies:
329 # copy on write to avoid affecting potential other
329 # copy on write to avoid affecting potential other
330 # branches. when there are no other branches, this
330 # branches. when there are no other branches, this
331 # could be avoided.
331 # could be avoided.
332 newcopies = copies.copy()
332 newcopies = copies.copy()
333 del newcopies[f]
333 del newcopies[f]
334 othercopies = all_copies.get(c)
334 othercopies = all_copies.get(c)
335 if othercopies is None:
335 if othercopies is None:
336 all_copies[c] = newcopies
336 all_copies[c] = newcopies
337 else:
337 else:
338 # we are the second parent to work on c, we need to merge our
338 # we are the second parent to work on c, we need to merge our
339 # work with the other.
339 # work with the other.
340 #
340 #
341 # Unlike when copies are stored in the filelog, we consider
341 # Unlike when copies are stored in the filelog, we consider
342 # it a copy even if the destination already existed on the
342 # it a copy even if the destination already existed on the
343 # other branch. It's simply too expensive to check if the
343 # other branch. It's simply too expensive to check if the
344 # file existed in the manifest.
344 # file existed in the manifest.
345 #
345 #
346 # In case of conflict, parent 1 take precedence over parent 2.
346 # In case of conflict, parent 1 take precedence over parent 2.
347 # This is an arbitrary choice made anew when implementing
347 # This is an arbitrary choice made anew when implementing
348 # changeset based copies. It was made without regards with
348 # changeset based copies. It was made without regards with
349 # potential filelog related behavior.
349 # potential filelog related behavior.
350 if parent == 1:
350 if parent == 1:
351 othercopies.update(newcopies)
351 othercopies.update(newcopies)
352 else:
352 else:
353 newcopies.update(othercopies)
353 newcopies.update(othercopies)
354 all_copies[c] = newcopies
354 all_copies[c] = newcopies
355 return all_copies[targetrev]
355 return all_copies[targetrev]
356
356
357
357
358 def _forwardcopies(a, b, base=None, match=None):
358 def _forwardcopies(a, b, base=None, match=None):
359 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
359 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
360
360
361 if base is None:
361 if base is None:
362 base = a
362 base = a
363 match = a.repo().narrowmatch(match)
363 match = a.repo().narrowmatch(match)
364 # check for working copy
364 # check for working copy
365 if b.rev() is None:
365 if b.rev() is None:
366 cm = _committedforwardcopies(a, b.p1(), base, match)
366 cm = _committedforwardcopies(a, b.p1(), base, match)
367 # combine copies from dirstate if necessary
367 # combine copies from dirstate if necessary
368 copies = _chain(cm, _dirstatecopies(b._repo, match))
368 copies = _chain(cm, _dirstatecopies(b._repo, match))
369 else:
369 else:
370 copies = _committedforwardcopies(a, b, base, match)
370 copies = _committedforwardcopies(a, b, base, match)
371 return copies
371 return copies
372
372
373
373
374 def _backwardrenames(a, b, match):
374 def _backwardrenames(a, b, match):
375 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
375 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
376 return {}
376 return {}
377
377
378 # Even though we're not taking copies into account, 1:n rename situations
378 # Even though we're not taking copies into account, 1:n rename situations
379 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
379 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
380 # arbitrarily pick one of the renames.
380 # arbitrarily pick one of the renames.
381 # We don't want to pass in "match" here, since that would filter
381 # We don't want to pass in "match" here, since that would filter
382 # the destination by it. Since we're reversing the copies, we want
382 # the destination by it. Since we're reversing the copies, we want
383 # to filter the source instead.
383 # to filter the source instead.
384 f = _forwardcopies(b, a)
384 f = _forwardcopies(b, a)
385 r = {}
385 r = {}
386 for k, v in sorted(pycompat.iteritems(f)):
386 for k, v in sorted(pycompat.iteritems(f)):
387 if match and not match(v):
387 if match and not match(v):
388 continue
388 continue
389 # remove copies
389 # remove copies
390 if v in a:
390 if v in a:
391 continue
391 continue
392 r[v] = k
392 r[v] = k
393 return r
393 return r
394
394
395
395
396 def pathcopies(x, y, match=None):
396 def pathcopies(x, y, match=None):
397 """find {dst@y: src@x} copy mapping for directed compare"""
397 """find {dst@y: src@x} copy mapping for directed compare"""
398 repo = x._repo
398 repo = x._repo
399 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
399 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
400 if debug:
400 if debug:
401 repo.ui.debug(
401 repo.ui.debug(
402 b'debug.copies: searching copies from %s to %s\n' % (x, y)
402 b'debug.copies: searching copies from %s to %s\n' % (x, y)
403 )
403 )
404 if x == y or not x or not y:
404 if x == y or not x or not y:
405 return {}
405 return {}
406 if y.rev() is None and x == y.p1():
406 if y.rev() is None and x == y.p1():
407 if debug:
407 if debug:
408 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
408 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
409 # short-circuit to avoid issues with merge states
409 # short-circuit to avoid issues with merge states
410 return _dirstatecopies(repo, match)
410 return _dirstatecopies(repo, match)
411 a = y.ancestor(x)
411 a = y.ancestor(x)
412 if a == x:
412 if a == x:
413 if debug:
413 if debug:
414 repo.ui.debug(b'debug.copies: search mode: forward\n')
414 repo.ui.debug(b'debug.copies: search mode: forward\n')
415 copies = _forwardcopies(x, y, match=match)
415 copies = _forwardcopies(x, y, match=match)
416 elif a == y:
416 elif a == y:
417 if debug:
417 if debug:
418 repo.ui.debug(b'debug.copies: search mode: backward\n')
418 repo.ui.debug(b'debug.copies: search mode: backward\n')
419 copies = _backwardrenames(x, y, match=match)
419 copies = _backwardrenames(x, y, match=match)
420 else:
420 else:
421 if debug:
421 if debug:
422 repo.ui.debug(b'debug.copies: search mode: combined\n')
422 repo.ui.debug(b'debug.copies: search mode: combined\n')
423 base = None
423 base = None
424 if a.rev() != node.nullrev:
424 if a.rev() != node.nullrev:
425 base = x
425 base = x
426 copies = _chain(
426 copies = _chain(
427 _backwardrenames(x, a, match=match),
427 _backwardrenames(x, a, match=match),
428 _forwardcopies(a, y, base, match=match),
428 _forwardcopies(a, y, base, match=match),
429 )
429 )
430 _filter(x, y, copies)
430 _filter(x, y, copies)
431 return copies
431 return copies
432
432
433
433
434 def mergecopies(repo, c1, c2, base):
434 def mergecopies(repo, c1, c2, base):
435 """
435 """
436 Finds moves and copies between context c1 and c2 that are relevant for
436 Finds moves and copies between context c1 and c2 that are relevant for
437 merging. 'base' will be used as the merge base.
437 merging. 'base' will be used as the merge base.
438
438
439 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
439 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
440 files that were moved/ copied in one merge parent and modified in another.
440 files that were moved/ copied in one merge parent and modified in another.
441 For example:
441 For example:
442
442
443 o ---> 4 another commit
443 o ---> 4 another commit
444 |
444 |
445 | o ---> 3 commit that modifies a.txt
445 | o ---> 3 commit that modifies a.txt
446 | /
446 | /
447 o / ---> 2 commit that moves a.txt to b.txt
447 o / ---> 2 commit that moves a.txt to b.txt
448 |/
448 |/
449 o ---> 1 merge base
449 o ---> 1 merge base
450
450
451 If we try to rebase revision 3 on revision 4, since there is no a.txt in
451 If we try to rebase revision 3 on revision 4, since there is no a.txt in
452 revision 4, and if user have copytrace disabled, we prints the following
452 revision 4, and if user have copytrace disabled, we prints the following
453 message:
453 message:
454
454
455 ```other changed <file> which local deleted```
455 ```other changed <file> which local deleted```
456
456
457 Returns a tuple where:
457 Returns a tuple where:
458
458
459 "branch_copies" an instance of branch_copies.
459 "branch_copies" an instance of branch_copies.
460
460
461 "diverge" is a mapping of source name -> list of destination names
461 "diverge" is a mapping of source name -> list of destination names
462 for divergent renames.
462 for divergent renames.
463
463
464 This function calls different copytracing algorithms based on config.
464 This function calls different copytracing algorithms based on config.
465 """
465 """
466 # avoid silly behavior for update from empty dir
466 # avoid silly behavior for update from empty dir
467 if not c1 or not c2 or c1 == c2:
467 if not c1 or not c2 or c1 == c2:
468 return branch_copies(), branch_copies(), {}
468 return branch_copies(), branch_copies(), {}
469
469
470 narrowmatch = c1.repo().narrowmatch()
470 narrowmatch = c1.repo().narrowmatch()
471
471
472 # avoid silly behavior for parent -> working dir
472 # avoid silly behavior for parent -> working dir
473 if c2.node() is None and c1.node() == repo.dirstate.p1():
473 if c2.node() is None and c1.node() == repo.dirstate.p1():
474 return (
474 return (
475 branch_copies(_dirstatecopies(repo, narrowmatch)),
475 branch_copies(_dirstatecopies(repo, narrowmatch)),
476 branch_copies(),
476 branch_copies(),
477 {},
477 {},
478 )
478 )
479
479
480 copytracing = repo.ui.config(b'experimental', b'copytrace')
480 copytracing = repo.ui.config(b'experimental', b'copytrace')
481 if stringutil.parsebool(copytracing) is False:
481 if stringutil.parsebool(copytracing) is False:
482 # stringutil.parsebool() returns None when it is unable to parse the
482 # stringutil.parsebool() returns None when it is unable to parse the
483 # value, so we should rely on making sure copytracing is on such cases
483 # value, so we should rely on making sure copytracing is on such cases
484 return branch_copies(), branch_copies(), {}
484 return branch_copies(), branch_copies(), {}
485
485
486 if usechangesetcentricalgo(repo):
486 if usechangesetcentricalgo(repo):
487 # The heuristics don't make sense when we need changeset-centric algos
487 # The heuristics don't make sense when we need changeset-centric algos
488 return _fullcopytracing(repo, c1, c2, base)
488 return _fullcopytracing(repo, c1, c2, base)
489
489
490 # Copy trace disabling is explicitly below the node == p1 logic above
490 # Copy trace disabling is explicitly below the node == p1 logic above
491 # because the logic above is required for a simple copy to be kept across a
491 # because the logic above is required for a simple copy to be kept across a
492 # rebase.
492 # rebase.
493 if copytracing == b'heuristics':
493 if copytracing == b'heuristics':
494 # Do full copytracing if only non-public revisions are involved as
494 # Do full copytracing if only non-public revisions are involved as
495 # that will be fast enough and will also cover the copies which could
495 # that will be fast enough and will also cover the copies which could
496 # be missed by heuristics
496 # be missed by heuristics
497 if _isfullcopytraceable(repo, c1, base):
497 if _isfullcopytraceable(repo, c1, base):
498 return _fullcopytracing(repo, c1, c2, base)
498 return _fullcopytracing(repo, c1, c2, base)
499 return _heuristicscopytracing(repo, c1, c2, base)
499 return _heuristicscopytracing(repo, c1, c2, base)
500 else:
500 else:
501 return _fullcopytracing(repo, c1, c2, base)
501 return _fullcopytracing(repo, c1, c2, base)
502
502
503
503
504 def _isfullcopytraceable(repo, c1, base):
504 def _isfullcopytraceable(repo, c1, base):
505 """ Checks that if base, source and destination are all no-public branches,
505 """ Checks that if base, source and destination are all no-public branches,
506 if yes let's use the full copytrace algorithm for increased capabilities
506 if yes let's use the full copytrace algorithm for increased capabilities
507 since it will be fast enough.
507 since it will be fast enough.
508
508
509 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
509 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
510 number of changesets from c1 to base such that if number of changesets are
510 number of changesets from c1 to base such that if number of changesets are
511 more than the limit, full copytracing algorithm won't be used.
511 more than the limit, full copytracing algorithm won't be used.
512 """
512 """
513 if c1.rev() is None:
513 if c1.rev() is None:
514 c1 = c1.p1()
514 c1 = c1.p1()
515 if c1.mutable() and base.mutable():
515 if c1.mutable() and base.mutable():
516 sourcecommitlimit = repo.ui.configint(
516 sourcecommitlimit = repo.ui.configint(
517 b'experimental', b'copytrace.sourcecommitlimit'
517 b'experimental', b'copytrace.sourcecommitlimit'
518 )
518 )
519 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
519 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
520 return commits < sourcecommitlimit
520 return commits < sourcecommitlimit
521 return False
521 return False
522
522
523
523
524 def _checksinglesidecopies(
524 def _checksinglesidecopies(
525 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
525 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
526 ):
526 ):
527 if src not in m2:
527 if src not in m2:
528 # deleted on side 2
528 # deleted on side 2
529 if src not in m1:
529 if src not in m1:
530 # renamed on side 1, deleted on side 2
530 # renamed on side 1, deleted on side 2
531 renamedelete[src] = dsts1
531 renamedelete[src] = dsts1
532 elif src not in mb:
532 elif src not in mb:
533 # Work around the "short-circuit to avoid issues with merge states"
533 # Work around the "short-circuit to avoid issues with merge states"
534 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
534 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
535 # destination doesn't exist in y.
535 # destination doesn't exist in y.
536 pass
536 pass
537 elif m2[src] != mb[src]:
537 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
538 if not _related(c2[src], base[src]):
538 return
539 return
539 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
540 # modified on side 2
540 # modified on side 2
541 for dst in dsts1:
541 for dst in dsts1:
542 copy[dst] = src
542 copy[dst] = src
543
543
544
544
545 class branch_copies(object):
545 class branch_copies(object):
546 """Information about copies made on one side of a merge/graft.
546 """Information about copies made on one side of a merge/graft.
547
547
548 "copy" is a mapping from destination name -> source name,
548 "copy" is a mapping from destination name -> source name,
549 where source is in c1 and destination is in c2 or vice-versa.
549 where source is in c1 and destination is in c2 or vice-versa.
550
550
551 "movewithdir" is a mapping from source name -> destination name,
551 "movewithdir" is a mapping from source name -> destination name,
552 where the file at source present in one context but not the other
552 where the file at source present in one context but not the other
553 needs to be moved to destination by the merge process, because the
553 needs to be moved to destination by the merge process, because the
554 other context moved the directory it is in.
554 other context moved the directory it is in.
555
555
556 "renamedelete" is a mapping of source name -> list of destination
556 "renamedelete" is a mapping of source name -> list of destination
557 names for files deleted in c1 that were renamed in c2 or vice-versa.
557 names for files deleted in c1 that were renamed in c2 or vice-versa.
558
558
559 "dirmove" is a mapping of detected source dir -> destination dir renames.
559 "dirmove" is a mapping of detected source dir -> destination dir renames.
560 This is needed for handling changes to new files previously grafted into
560 This is needed for handling changes to new files previously grafted into
561 renamed directories.
561 renamed directories.
562 """
562 """
563
563
564 def __init__(
564 def __init__(
565 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
565 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
566 ):
566 ):
567 self.copy = {} if copy is None else copy
567 self.copy = {} if copy is None else copy
568 self.renamedelete = {} if renamedelete is None else renamedelete
568 self.renamedelete = {} if renamedelete is None else renamedelete
569 self.dirmove = {} if dirmove is None else dirmove
569 self.dirmove = {} if dirmove is None else dirmove
570 self.movewithdir = {} if movewithdir is None else movewithdir
570 self.movewithdir = {} if movewithdir is None else movewithdir
571
571
572
572
573 def _fullcopytracing(repo, c1, c2, base):
573 def _fullcopytracing(repo, c1, c2, base):
574 """ The full copytracing algorithm which finds all the new files that were
574 """ The full copytracing algorithm which finds all the new files that were
575 added from merge base up to the top commit and for each file it checks if
575 added from merge base up to the top commit and for each file it checks if
576 this file was copied from another file.
576 this file was copied from another file.
577
577
578 This is pretty slow when a lot of changesets are involved but will track all
578 This is pretty slow when a lot of changesets are involved but will track all
579 the copies.
579 the copies.
580 """
580 """
581 m1 = c1.manifest()
581 m1 = c1.manifest()
582 m2 = c2.manifest()
582 m2 = c2.manifest()
583 mb = base.manifest()
583 mb = base.manifest()
584
584
585 copies1 = pathcopies(base, c1)
585 copies1 = pathcopies(base, c1)
586 copies2 = pathcopies(base, c2)
586 copies2 = pathcopies(base, c2)
587
587
588 if not (copies1 or copies2):
588 if not (copies1 or copies2):
589 return branch_copies(), branch_copies(), {}
589 return branch_copies(), branch_copies(), {}
590
590
591 inversecopies1 = {}
591 inversecopies1 = {}
592 inversecopies2 = {}
592 inversecopies2 = {}
593 for dst, src in copies1.items():
593 for dst, src in copies1.items():
594 inversecopies1.setdefault(src, []).append(dst)
594 inversecopies1.setdefault(src, []).append(dst)
595 for dst, src in copies2.items():
595 for dst, src in copies2.items():
596 inversecopies2.setdefault(src, []).append(dst)
596 inversecopies2.setdefault(src, []).append(dst)
597
597
598 copy1 = {}
598 copy1 = {}
599 copy2 = {}
599 copy2 = {}
600 diverge = {}
600 diverge = {}
601 renamedelete1 = {}
601 renamedelete1 = {}
602 renamedelete2 = {}
602 renamedelete2 = {}
603 allsources = set(inversecopies1) | set(inversecopies2)
603 allsources = set(inversecopies1) | set(inversecopies2)
604 for src in allsources:
604 for src in allsources:
605 dsts1 = inversecopies1.get(src)
605 dsts1 = inversecopies1.get(src)
606 dsts2 = inversecopies2.get(src)
606 dsts2 = inversecopies2.get(src)
607 if dsts1 and dsts2:
607 if dsts1 and dsts2:
608 # copied/renamed on both sides
608 # copied/renamed on both sides
609 if src not in m1 and src not in m2:
609 if src not in m1 and src not in m2:
610 # renamed on both sides
610 # renamed on both sides
611 dsts1 = set(dsts1)
611 dsts1 = set(dsts1)
612 dsts2 = set(dsts2)
612 dsts2 = set(dsts2)
613 # If there's some overlap in the rename destinations, we
613 # If there's some overlap in the rename destinations, we
614 # consider it not divergent. For example, if side 1 copies 'a'
614 # consider it not divergent. For example, if side 1 copies 'a'
615 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
615 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
616 # and 'd' and deletes 'a'.
616 # and 'd' and deletes 'a'.
617 if dsts1 & dsts2:
617 if dsts1 & dsts2:
618 for dst in dsts1 & dsts2:
618 for dst in dsts1 & dsts2:
619 copy1[dst] = src
619 copy1[dst] = src
620 copy2[dst] = src
620 copy2[dst] = src
621 else:
621 else:
622 diverge[src] = sorted(dsts1 | dsts2)
622 diverge[src] = sorted(dsts1 | dsts2)
623 elif src in m1 and src in m2:
623 elif src in m1 and src in m2:
624 # copied on both sides
624 # copied on both sides
625 dsts1 = set(dsts1)
625 dsts1 = set(dsts1)
626 dsts2 = set(dsts2)
626 dsts2 = set(dsts2)
627 for dst in dsts1 & dsts2:
627 for dst in dsts1 & dsts2:
628 copy1[dst] = src
628 copy1[dst] = src
629 copy2[dst] = src
629 copy2[dst] = src
630 # TODO: Handle cases where it was renamed on one side and copied
630 # TODO: Handle cases where it was renamed on one side and copied
631 # on the other side
631 # on the other side
632 elif dsts1:
632 elif dsts1:
633 # copied/renamed only on side 1
633 # copied/renamed only on side 1
634 _checksinglesidecopies(
634 _checksinglesidecopies(
635 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
635 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
636 )
636 )
637 elif dsts2:
637 elif dsts2:
638 # copied/renamed only on side 2
638 # copied/renamed only on side 2
639 _checksinglesidecopies(
639 _checksinglesidecopies(
640 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
640 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
641 )
641 )
642
642
643 # find interesting file sets from manifests
643 # find interesting file sets from manifests
644 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
644 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
645 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
645 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
646 u1 = sorted(addedinm1 - addedinm2)
646 u1 = sorted(addedinm1 - addedinm2)
647 u2 = sorted(addedinm2 - addedinm1)
647 u2 = sorted(addedinm2 - addedinm1)
648
648
649 header = b" unmatched files in %s"
649 header = b" unmatched files in %s"
650 if u1:
650 if u1:
651 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
651 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
652 if u2:
652 if u2:
653 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
653 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
654
654
655 if repo.ui.debugflag:
655 if repo.ui.debugflag:
656 renamedeleteset = set()
656 renamedeleteset = set()
657 divergeset = set()
657 divergeset = set()
658 for dsts in diverge.values():
658 for dsts in diverge.values():
659 divergeset.update(dsts)
659 divergeset.update(dsts)
660 for dsts in renamedelete1.values():
660 for dsts in renamedelete1.values():
661 renamedeleteset.update(dsts)
661 renamedeleteset.update(dsts)
662 for dsts in renamedelete2.values():
662 for dsts in renamedelete2.values():
663 renamedeleteset.update(dsts)
663 renamedeleteset.update(dsts)
664
664
665 repo.ui.debug(
665 repo.ui.debug(
666 b" all copies found (* = to merge, ! = divergent, "
666 b" all copies found (* = to merge, ! = divergent, "
667 b"% = renamed and deleted):\n"
667 b"% = renamed and deleted):\n"
668 )
668 )
669 for side, copies in ((b"local", copies1), (b"remote", copies2)):
669 for side, copies in ((b"local", copies1), (b"remote", copies2)):
670 if not copies:
670 if not copies:
671 continue
671 continue
672 repo.ui.debug(b" on %s side:\n" % side)
672 repo.ui.debug(b" on %s side:\n" % side)
673 for f in sorted(copies):
673 for f in sorted(copies):
674 note = b""
674 note = b""
675 if f in copy1 or f in copy2:
675 if f in copy1 or f in copy2:
676 note += b"*"
676 note += b"*"
677 if f in divergeset:
677 if f in divergeset:
678 note += b"!"
678 note += b"!"
679 if f in renamedeleteset:
679 if f in renamedeleteset:
680 note += b"%"
680 note += b"%"
681 repo.ui.debug(
681 repo.ui.debug(
682 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
682 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
683 )
683 )
684 del renamedeleteset
684 del renamedeleteset
685 del divergeset
685 del divergeset
686
686
687 repo.ui.debug(b" checking for directory renames\n")
687 repo.ui.debug(b" checking for directory renames\n")
688
688
689 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
689 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
690 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
690 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
691
691
692 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
692 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
693 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
693 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
694
694
695 return branch_copies1, branch_copies2, diverge
695 return branch_copies1, branch_copies2, diverge
696
696
697
697
698 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
698 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
699 """Finds moved directories and files that should move with them.
699 """Finds moved directories and files that should move with them.
700
700
701 ctx: the context for one of the sides
701 ctx: the context for one of the sides
702 copy: files copied on the same side (as ctx)
702 copy: files copied on the same side (as ctx)
703 fullcopy: files copied on the same side (as ctx), including those that
703 fullcopy: files copied on the same side (as ctx), including those that
704 merge.manifestmerge() won't care about
704 merge.manifestmerge() won't care about
705 addedfiles: added files on the other side (compared to ctx)
705 addedfiles: added files on the other side (compared to ctx)
706 """
706 """
707 # generate a directory move map
707 # generate a directory move map
708 d = ctx.dirs()
708 d = ctx.dirs()
709 invalid = set()
709 invalid = set()
710 dirmove = {}
710 dirmove = {}
711
711
712 # examine each file copy for a potential directory move, which is
712 # examine each file copy for a potential directory move, which is
713 # when all the files in a directory are moved to a new directory
713 # when all the files in a directory are moved to a new directory
714 for dst, src in pycompat.iteritems(fullcopy):
714 for dst, src in pycompat.iteritems(fullcopy):
715 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
715 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
716 if dsrc in invalid:
716 if dsrc in invalid:
717 # already seen to be uninteresting
717 # already seen to be uninteresting
718 continue
718 continue
719 elif dsrc in d and ddst in d:
719 elif dsrc in d and ddst in d:
720 # directory wasn't entirely moved locally
720 # directory wasn't entirely moved locally
721 invalid.add(dsrc)
721 invalid.add(dsrc)
722 elif dsrc in dirmove and dirmove[dsrc] != ddst:
722 elif dsrc in dirmove and dirmove[dsrc] != ddst:
723 # files from the same directory moved to two different places
723 # files from the same directory moved to two different places
724 invalid.add(dsrc)
724 invalid.add(dsrc)
725 else:
725 else:
726 # looks good so far
726 # looks good so far
727 dirmove[dsrc] = ddst
727 dirmove[dsrc] = ddst
728
728
729 for i in invalid:
729 for i in invalid:
730 if i in dirmove:
730 if i in dirmove:
731 del dirmove[i]
731 del dirmove[i]
732 del d, invalid
732 del d, invalid
733
733
734 if not dirmove:
734 if not dirmove:
735 return {}, {}
735 return {}, {}
736
736
737 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
737 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
738
738
739 for d in dirmove:
739 for d in dirmove:
740 repo.ui.debug(
740 repo.ui.debug(
741 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
741 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
742 )
742 )
743
743
744 movewithdir = {}
744 movewithdir = {}
745 # check unaccounted nonoverlapping files against directory moves
745 # check unaccounted nonoverlapping files against directory moves
746 for f in addedfiles:
746 for f in addedfiles:
747 if f not in fullcopy:
747 if f not in fullcopy:
748 for d in dirmove:
748 for d in dirmove:
749 if f.startswith(d):
749 if f.startswith(d):
750 # new file added in a directory that was moved, move it
750 # new file added in a directory that was moved, move it
751 df = dirmove[d] + f[len(d) :]
751 df = dirmove[d] + f[len(d) :]
752 if df not in copy:
752 if df not in copy:
753 movewithdir[f] = df
753 movewithdir[f] = df
754 repo.ui.debug(
754 repo.ui.debug(
755 b" pending file src: '%s' -> dst: '%s'\n"
755 b" pending file src: '%s' -> dst: '%s'\n"
756 % (f, df)
756 % (f, df)
757 )
757 )
758 break
758 break
759
759
760 return dirmove, movewithdir
760 return dirmove, movewithdir
761
761
762
762
763 def _heuristicscopytracing(repo, c1, c2, base):
763 def _heuristicscopytracing(repo, c1, c2, base):
764 """ Fast copytracing using filename heuristics
764 """ Fast copytracing using filename heuristics
765
765
766 Assumes that moves or renames are of following two types:
766 Assumes that moves or renames are of following two types:
767
767
768 1) Inside a directory only (same directory name but different filenames)
768 1) Inside a directory only (same directory name but different filenames)
769 2) Move from one directory to another
769 2) Move from one directory to another
770 (same filenames but different directory names)
770 (same filenames but different directory names)
771
771
772 Works only when there are no merge commits in the "source branch".
772 Works only when there are no merge commits in the "source branch".
773 Source branch is commits from base up to c2 not including base.
773 Source branch is commits from base up to c2 not including base.
774
774
775 If merge is involved it fallbacks to _fullcopytracing().
775 If merge is involved it fallbacks to _fullcopytracing().
776
776
777 Can be used by setting the following config:
777 Can be used by setting the following config:
778
778
779 [experimental]
779 [experimental]
780 copytrace = heuristics
780 copytrace = heuristics
781
781
782 In some cases the copy/move candidates found by heuristics can be very large
782 In some cases the copy/move candidates found by heuristics can be very large
783 in number and that will make the algorithm slow. The number of possible
783 in number and that will make the algorithm slow. The number of possible
784 candidates to check can be limited by using the config
784 candidates to check can be limited by using the config
785 `experimental.copytrace.movecandidateslimit` which defaults to 100.
785 `experimental.copytrace.movecandidateslimit` which defaults to 100.
786 """
786 """
787
787
788 if c1.rev() is None:
788 if c1.rev() is None:
789 c1 = c1.p1()
789 c1 = c1.p1()
790 if c2.rev() is None:
790 if c2.rev() is None:
791 c2 = c2.p1()
791 c2 = c2.p1()
792
792
793 changedfiles = set()
793 changedfiles = set()
794 m1 = c1.manifest()
794 m1 = c1.manifest()
795 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
795 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
796 # If base is not in c2 branch, we switch to fullcopytracing
796 # If base is not in c2 branch, we switch to fullcopytracing
797 repo.ui.debug(
797 repo.ui.debug(
798 b"switching to full copytracing as base is not "
798 b"switching to full copytracing as base is not "
799 b"an ancestor of c2\n"
799 b"an ancestor of c2\n"
800 )
800 )
801 return _fullcopytracing(repo, c1, c2, base)
801 return _fullcopytracing(repo, c1, c2, base)
802
802
803 ctx = c2
803 ctx = c2
804 while ctx != base:
804 while ctx != base:
805 if len(ctx.parents()) == 2:
805 if len(ctx.parents()) == 2:
806 # To keep things simple let's not handle merges
806 # To keep things simple let's not handle merges
807 repo.ui.debug(b"switching to full copytracing because of merges\n")
807 repo.ui.debug(b"switching to full copytracing because of merges\n")
808 return _fullcopytracing(repo, c1, c2, base)
808 return _fullcopytracing(repo, c1, c2, base)
809 changedfiles.update(ctx.files())
809 changedfiles.update(ctx.files())
810 ctx = ctx.p1()
810 ctx = ctx.p1()
811
811
812 copies2 = {}
812 copies2 = {}
813 cp = _forwardcopies(base, c2)
813 cp = _forwardcopies(base, c2)
814 for dst, src in pycompat.iteritems(cp):
814 for dst, src in pycompat.iteritems(cp):
815 if src in m1:
815 if src in m1:
816 copies2[dst] = src
816 copies2[dst] = src
817
817
818 # file is missing if it isn't present in the destination, but is present in
818 # file is missing if it isn't present in the destination, but is present in
819 # the base and present in the source.
819 # the base and present in the source.
820 # Presence in the base is important to exclude added files, presence in the
820 # Presence in the base is important to exclude added files, presence in the
821 # source is important to exclude removed files.
821 # source is important to exclude removed files.
822 filt = lambda f: f not in m1 and f in base and f in c2
822 filt = lambda f: f not in m1 and f in base and f in c2
823 missingfiles = [f for f in changedfiles if filt(f)]
823 missingfiles = [f for f in changedfiles if filt(f)]
824
824
825 copies1 = {}
825 copies1 = {}
826 if missingfiles:
826 if missingfiles:
827 basenametofilename = collections.defaultdict(list)
827 basenametofilename = collections.defaultdict(list)
828 dirnametofilename = collections.defaultdict(list)
828 dirnametofilename = collections.defaultdict(list)
829
829
830 for f in m1.filesnotin(base.manifest()):
830 for f in m1.filesnotin(base.manifest()):
831 basename = os.path.basename(f)
831 basename = os.path.basename(f)
832 dirname = os.path.dirname(f)
832 dirname = os.path.dirname(f)
833 basenametofilename[basename].append(f)
833 basenametofilename[basename].append(f)
834 dirnametofilename[dirname].append(f)
834 dirnametofilename[dirname].append(f)
835
835
836 for f in missingfiles:
836 for f in missingfiles:
837 basename = os.path.basename(f)
837 basename = os.path.basename(f)
838 dirname = os.path.dirname(f)
838 dirname = os.path.dirname(f)
839 samebasename = basenametofilename[basename]
839 samebasename = basenametofilename[basename]
840 samedirname = dirnametofilename[dirname]
840 samedirname = dirnametofilename[dirname]
841 movecandidates = samebasename + samedirname
841 movecandidates = samebasename + samedirname
842 # f is guaranteed to be present in c2, that's why
842 # f is guaranteed to be present in c2, that's why
843 # c2.filectx(f) won't fail
843 # c2.filectx(f) won't fail
844 f2 = c2.filectx(f)
844 f2 = c2.filectx(f)
845 # we can have a lot of candidates which can slow down the heuristics
845 # we can have a lot of candidates which can slow down the heuristics
846 # config value to limit the number of candidates moves to check
846 # config value to limit the number of candidates moves to check
847 maxcandidates = repo.ui.configint(
847 maxcandidates = repo.ui.configint(
848 b'experimental', b'copytrace.movecandidateslimit'
848 b'experimental', b'copytrace.movecandidateslimit'
849 )
849 )
850
850
851 if len(movecandidates) > maxcandidates:
851 if len(movecandidates) > maxcandidates:
852 repo.ui.status(
852 repo.ui.status(
853 _(
853 _(
854 b"skipping copytracing for '%s', more "
854 b"skipping copytracing for '%s', more "
855 b"candidates than the limit: %d\n"
855 b"candidates than the limit: %d\n"
856 )
856 )
857 % (f, len(movecandidates))
857 % (f, len(movecandidates))
858 )
858 )
859 continue
859 continue
860
860
861 for candidate in movecandidates:
861 for candidate in movecandidates:
862 f1 = c1.filectx(candidate)
862 f1 = c1.filectx(candidate)
863 if _related(f1, f2):
863 if _related(f1, f2):
864 # if there are a few related copies then we'll merge
864 # if there are a few related copies then we'll merge
865 # changes into all of them. This matches the behaviour
865 # changes into all of them. This matches the behaviour
866 # of upstream copytracing
866 # of upstream copytracing
867 copies1[candidate] = f
867 copies1[candidate] = f
868
868
869 return branch_copies(copies1), branch_copies(copies2), {}
869 return branch_copies(copies1), branch_copies(copies2), {}
870
870
871
871
872 def _related(f1, f2):
872 def _related(f1, f2):
873 """return True if f1 and f2 filectx have a common ancestor
873 """return True if f1 and f2 filectx have a common ancestor
874
874
875 Walk back to common ancestor to see if the two files originate
875 Walk back to common ancestor to see if the two files originate
876 from the same file. Since workingfilectx's rev() is None it messes
876 from the same file. Since workingfilectx's rev() is None it messes
877 up the integer comparison logic, hence the pre-step check for
877 up the integer comparison logic, hence the pre-step check for
878 None (f1 and f2 can only be workingfilectx's initially).
878 None (f1 and f2 can only be workingfilectx's initially).
879 """
879 """
880
880
881 if f1 == f2:
881 if f1 == f2:
882 return True # a match
882 return True # a match
883
883
884 g1, g2 = f1.ancestors(), f2.ancestors()
884 g1, g2 = f1.ancestors(), f2.ancestors()
885 try:
885 try:
886 f1r, f2r = f1.linkrev(), f2.linkrev()
886 f1r, f2r = f1.linkrev(), f2.linkrev()
887
887
888 if f1r is None:
888 if f1r is None:
889 f1 = next(g1)
889 f1 = next(g1)
890 if f2r is None:
890 if f2r is None:
891 f2 = next(g2)
891 f2 = next(g2)
892
892
893 while True:
893 while True:
894 f1r, f2r = f1.linkrev(), f2.linkrev()
894 f1r, f2r = f1.linkrev(), f2.linkrev()
895 if f1r > f2r:
895 if f1r > f2r:
896 f1 = next(g1)
896 f1 = next(g1)
897 elif f2r > f1r:
897 elif f2r > f1r:
898 f2 = next(g2)
898 f2 = next(g2)
899 else: # f1 and f2 point to files in the same linkrev
899 else: # f1 and f2 point to files in the same linkrev
900 return f1 == f2 # true if they point to the same file
900 return f1 == f2 # true if they point to the same file
901 except StopIteration:
901 except StopIteration:
902 return False
902 return False
903
903
904
904
905 def graftcopies(wctx, ctx, base):
905 def graftcopies(wctx, ctx, base):
906 """reproduce copies between base and ctx in the wctx
906 """reproduce copies between base and ctx in the wctx
907
907
908 Unlike mergecopies(), this function will only consider copies between base
908 Unlike mergecopies(), this function will only consider copies between base
909 and ctx; it will ignore copies between base and wctx. Also unlike
909 and ctx; it will ignore copies between base and wctx. Also unlike
910 mergecopies(), this function will apply copies to the working copy (instead
910 mergecopies(), this function will apply copies to the working copy (instead
911 of just returning information about the copies). That makes it cheaper
911 of just returning information about the copies). That makes it cheaper
912 (especially in the common case of base==ctx.p1()) and useful also when
912 (especially in the common case of base==ctx.p1()) and useful also when
913 experimental.copytrace=off.
913 experimental.copytrace=off.
914
914
915 merge.update() will have already marked most copies, but it will only
915 merge.update() will have already marked most copies, but it will only
916 mark copies if it thinks the source files are related (see
916 mark copies if it thinks the source files are related (see
917 merge._related()). It will also not mark copies if the file wasn't modified
917 merge._related()). It will also not mark copies if the file wasn't modified
918 on the local side. This function adds the copies that were "missed"
918 on the local side. This function adds the copies that were "missed"
919 by merge.update().
919 by merge.update().
920 """
920 """
921 new_copies = pathcopies(base, ctx)
921 new_copies = pathcopies(base, ctx)
922 _filter(wctx.p1(), wctx, new_copies)
922 _filter(wctx.p1(), wctx, new_copies)
923 for dst, src in pycompat.iteritems(new_copies):
923 for dst, src in pycompat.iteritems(new_copies):
924 wctx[dst].markcopied(src)
924 wctx[dst].markcopied(src)
925
925
926
926
927 def computechangesetfilesadded(ctx):
927 def computechangesetfilesadded(ctx):
928 """return the list of files added in a changeset
928 """return the list of files added in a changeset
929 """
929 """
930 added = []
930 added = []
931 for f in ctx.files():
931 for f in ctx.files():
932 if not any(f in p for p in ctx.parents()):
932 if not any(f in p for p in ctx.parents()):
933 added.append(f)
933 added.append(f)
934 return added
934 return added
935
935
936
936
937 def computechangesetfilesremoved(ctx):
937 def computechangesetfilesremoved(ctx):
938 """return the list of files removed in a changeset
938 """return the list of files removed in a changeset
939 """
939 """
940 removed = []
940 removed = []
941 for f in ctx.files():
941 for f in ctx.files():
942 if f not in ctx:
942 if f not in ctx:
943 removed.append(f)
943 removed.append(f)
944 return removed
944 return removed
945
945
946
946
947 def computechangesetcopies(ctx):
947 def computechangesetcopies(ctx):
948 """return the copies data for a changeset
948 """return the copies data for a changeset
949
949
950 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
950 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
951
951
952 Each dictionnary are in the form: `{newname: oldname}`
952 Each dictionnary are in the form: `{newname: oldname}`
953 """
953 """
954 p1copies = {}
954 p1copies = {}
955 p2copies = {}
955 p2copies = {}
956 p1 = ctx.p1()
956 p1 = ctx.p1()
957 p2 = ctx.p2()
957 p2 = ctx.p2()
958 narrowmatch = ctx._repo.narrowmatch()
958 narrowmatch = ctx._repo.narrowmatch()
959 for dst in ctx.files():
959 for dst in ctx.files():
960 if not narrowmatch(dst) or dst not in ctx:
960 if not narrowmatch(dst) or dst not in ctx:
961 continue
961 continue
962 copied = ctx[dst].renamed()
962 copied = ctx[dst].renamed()
963 if not copied:
963 if not copied:
964 continue
964 continue
965 src, srcnode = copied
965 src, srcnode = copied
966 if src in p1 and p1[src].filenode() == srcnode:
966 if src in p1 and p1[src].filenode() == srcnode:
967 p1copies[dst] = src
967 p1copies[dst] = src
968 elif src in p2 and p2[src].filenode() == srcnode:
968 elif src in p2 and p2[src].filenode() == srcnode:
969 p2copies[dst] = src
969 p2copies[dst] = src
970 return p1copies, p2copies
970 return p1copies, p2copies
971
971
972
972
973 def encodecopies(files, copies):
973 def encodecopies(files, copies):
974 items = []
974 items = []
975 for i, dst in enumerate(files):
975 for i, dst in enumerate(files):
976 if dst in copies:
976 if dst in copies:
977 items.append(b'%d\0%s' % (i, copies[dst]))
977 items.append(b'%d\0%s' % (i, copies[dst]))
978 if len(items) != len(copies):
978 if len(items) != len(copies):
979 raise error.ProgrammingError(
979 raise error.ProgrammingError(
980 b'some copy targets missing from file list'
980 b'some copy targets missing from file list'
981 )
981 )
982 return b"\n".join(items)
982 return b"\n".join(items)
983
983
984
984
985 def decodecopies(files, data):
985 def decodecopies(files, data):
986 try:
986 try:
987 copies = {}
987 copies = {}
988 if not data:
988 if not data:
989 return copies
989 return copies
990 for l in data.split(b'\n'):
990 for l in data.split(b'\n'):
991 strindex, src = l.split(b'\0')
991 strindex, src = l.split(b'\0')
992 i = int(strindex)
992 i = int(strindex)
993 dst = files[i]
993 dst = files[i]
994 copies[dst] = src
994 copies[dst] = src
995 return copies
995 return copies
996 except (ValueError, IndexError):
996 except (ValueError, IndexError):
997 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
997 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
998 # used different syntax for the value.
998 # used different syntax for the value.
999 return None
999 return None
1000
1000
1001
1001
1002 def encodefileindices(files, subset):
1002 def encodefileindices(files, subset):
1003 subset = set(subset)
1003 subset = set(subset)
1004 indices = []
1004 indices = []
1005 for i, f in enumerate(files):
1005 for i, f in enumerate(files):
1006 if f in subset:
1006 if f in subset:
1007 indices.append(b'%d' % i)
1007 indices.append(b'%d' % i)
1008 return b'\n'.join(indices)
1008 return b'\n'.join(indices)
1009
1009
1010
1010
1011 def decodefileindices(files, data):
1011 def decodefileindices(files, data):
1012 try:
1012 try:
1013 subset = []
1013 subset = []
1014 if not data:
1014 if not data:
1015 return subset
1015 return subset
1016 for strindex in data.split(b'\n'):
1016 for strindex in data.split(b'\n'):
1017 i = int(strindex)
1017 i = int(strindex)
1018 if i < 0 or i >= len(files):
1018 if i < 0 or i >= len(files):
1019 return None
1019 return None
1020 subset.append(files[i])
1020 subset.append(files[i])
1021 return subset
1021 return subset
1022 except (ValueError, IndexError):
1022 except (ValueError, IndexError):
1023 # Perhaps someone had chosen the same key name (e.g. "added") and
1023 # Perhaps someone had chosen the same key name (e.g. "added") and
1024 # used different syntax for the value.
1024 # used different syntax for the value.
1025 return None
1025 return None
1026
1026
1027
1027
1028 def _getsidedata(srcrepo, rev):
1028 def _getsidedata(srcrepo, rev):
1029 ctx = srcrepo[rev]
1029 ctx = srcrepo[rev]
1030 filescopies = computechangesetcopies(ctx)
1030 filescopies = computechangesetcopies(ctx)
1031 filesadded = computechangesetfilesadded(ctx)
1031 filesadded = computechangesetfilesadded(ctx)
1032 filesremoved = computechangesetfilesremoved(ctx)
1032 filesremoved = computechangesetfilesremoved(ctx)
1033 sidedata = {}
1033 sidedata = {}
1034 if any([filescopies, filesadded, filesremoved]):
1034 if any([filescopies, filesadded, filesremoved]):
1035 sortedfiles = sorted(ctx.files())
1035 sortedfiles = sorted(ctx.files())
1036 p1copies, p2copies = filescopies
1036 p1copies, p2copies = filescopies
1037 p1copies = encodecopies(sortedfiles, p1copies)
1037 p1copies = encodecopies(sortedfiles, p1copies)
1038 p2copies = encodecopies(sortedfiles, p2copies)
1038 p2copies = encodecopies(sortedfiles, p2copies)
1039 filesadded = encodefileindices(sortedfiles, filesadded)
1039 filesadded = encodefileindices(sortedfiles, filesadded)
1040 filesremoved = encodefileindices(sortedfiles, filesremoved)
1040 filesremoved = encodefileindices(sortedfiles, filesremoved)
1041 if p1copies:
1041 if p1copies:
1042 sidedata[sidedatamod.SD_P1COPIES] = p1copies
1042 sidedata[sidedatamod.SD_P1COPIES] = p1copies
1043 if p2copies:
1043 if p2copies:
1044 sidedata[sidedatamod.SD_P2COPIES] = p2copies
1044 sidedata[sidedatamod.SD_P2COPIES] = p2copies
1045 if filesadded:
1045 if filesadded:
1046 sidedata[sidedatamod.SD_FILESADDED] = filesadded
1046 sidedata[sidedatamod.SD_FILESADDED] = filesadded
1047 if filesremoved:
1047 if filesremoved:
1048 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
1048 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
1049 return sidedata
1049 return sidedata
1050
1050
1051
1051
1052 def getsidedataadder(srcrepo, destrepo):
1052 def getsidedataadder(srcrepo, destrepo):
1053 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
1053 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
1054 if pycompat.iswindows or not use_w:
1054 if pycompat.iswindows or not use_w:
1055 return _get_simple_sidedata_adder(srcrepo, destrepo)
1055 return _get_simple_sidedata_adder(srcrepo, destrepo)
1056 else:
1056 else:
1057 return _get_worker_sidedata_adder(srcrepo, destrepo)
1057 return _get_worker_sidedata_adder(srcrepo, destrepo)
1058
1058
1059
1059
1060 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
1060 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
1061 """The function used by worker precomputing sidedata
1061 """The function used by worker precomputing sidedata
1062
1062
1063 It read an input queue containing revision numbers
1063 It read an input queue containing revision numbers
1064 It write in an output queue containing (rev, <sidedata-map>)
1064 It write in an output queue containing (rev, <sidedata-map>)
1065
1065
1066 The `None` input value is used as a stop signal.
1066 The `None` input value is used as a stop signal.
1067
1067
1068 The `tokens` semaphore is user to avoid having too many unprocessed
1068 The `tokens` semaphore is user to avoid having too many unprocessed
1069 entries. The workers needs to acquire one token before fetching a task.
1069 entries. The workers needs to acquire one token before fetching a task.
1070 They will be released by the consumer of the produced data.
1070 They will be released by the consumer of the produced data.
1071 """
1071 """
1072 tokens.acquire()
1072 tokens.acquire()
1073 rev = revs_queue.get()
1073 rev = revs_queue.get()
1074 while rev is not None:
1074 while rev is not None:
1075 data = _getsidedata(srcrepo, rev)
1075 data = _getsidedata(srcrepo, rev)
1076 sidedata_queue.put((rev, data))
1076 sidedata_queue.put((rev, data))
1077 tokens.acquire()
1077 tokens.acquire()
1078 rev = revs_queue.get()
1078 rev = revs_queue.get()
1079 # processing of `None` is completed, release the token.
1079 # processing of `None` is completed, release the token.
1080 tokens.release()
1080 tokens.release()
1081
1081
1082
1082
1083 BUFF_PER_WORKER = 50
1083 BUFF_PER_WORKER = 50
1084
1084
1085
1085
1086 def _get_worker_sidedata_adder(srcrepo, destrepo):
1086 def _get_worker_sidedata_adder(srcrepo, destrepo):
1087 """The parallel version of the sidedata computation
1087 """The parallel version of the sidedata computation
1088
1088
1089 This code spawn a pool of worker that precompute a buffer of sidedata
1089 This code spawn a pool of worker that precompute a buffer of sidedata
1090 before we actually need them"""
1090 before we actually need them"""
1091 # avoid circular import copies -> scmutil -> worker -> copies
1091 # avoid circular import copies -> scmutil -> worker -> copies
1092 from . import worker
1092 from . import worker
1093
1093
1094 nbworkers = worker._numworkers(srcrepo.ui)
1094 nbworkers = worker._numworkers(srcrepo.ui)
1095
1095
1096 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
1096 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
1097 revsq = multiprocessing.Queue()
1097 revsq = multiprocessing.Queue()
1098 sidedataq = multiprocessing.Queue()
1098 sidedataq = multiprocessing.Queue()
1099
1099
1100 assert srcrepo.filtername is None
1100 assert srcrepo.filtername is None
1101 # queue all tasks beforehand, revision numbers are small and it make
1101 # queue all tasks beforehand, revision numbers are small and it make
1102 # synchronisation simpler
1102 # synchronisation simpler
1103 #
1103 #
1104 # Since the computation for each node can be quite expensive, the overhead
1104 # Since the computation for each node can be quite expensive, the overhead
1105 # of using a single queue is not revelant. In practice, most computation
1105 # of using a single queue is not revelant. In practice, most computation
1106 # are fast but some are very expensive and dominate all the other smaller
1106 # are fast but some are very expensive and dominate all the other smaller
1107 # cost.
1107 # cost.
1108 for r in srcrepo.changelog.revs():
1108 for r in srcrepo.changelog.revs():
1109 revsq.put(r)
1109 revsq.put(r)
1110 # queue the "no more tasks" markers
1110 # queue the "no more tasks" markers
1111 for i in range(nbworkers):
1111 for i in range(nbworkers):
1112 revsq.put(None)
1112 revsq.put(None)
1113
1113
1114 allworkers = []
1114 allworkers = []
1115 for i in range(nbworkers):
1115 for i in range(nbworkers):
1116 args = (srcrepo, revsq, sidedataq, tokens)
1116 args = (srcrepo, revsq, sidedataq, tokens)
1117 w = multiprocessing.Process(target=_sidedata_worker, args=args)
1117 w = multiprocessing.Process(target=_sidedata_worker, args=args)
1118 allworkers.append(w)
1118 allworkers.append(w)
1119 w.start()
1119 w.start()
1120
1120
1121 # dictionnary to store results for revision higher than we one we are
1121 # dictionnary to store results for revision higher than we one we are
1122 # looking for. For example, if we need the sidedatamap for 42, and 43 is
1122 # looking for. For example, if we need the sidedatamap for 42, and 43 is
1123 # received, when shelve 43 for later use.
1123 # received, when shelve 43 for later use.
1124 staging = {}
1124 staging = {}
1125
1125
1126 def sidedata_companion(revlog, rev):
1126 def sidedata_companion(revlog, rev):
1127 sidedata = {}
1127 sidedata = {}
1128 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
1128 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
1129 # Is the data previously shelved ?
1129 # Is the data previously shelved ?
1130 sidedata = staging.pop(rev, None)
1130 sidedata = staging.pop(rev, None)
1131 if sidedata is None:
1131 if sidedata is None:
1132 # look at the queued result until we find the one we are lookig
1132 # look at the queued result until we find the one we are lookig
1133 # for (shelve the other ones)
1133 # for (shelve the other ones)
1134 r, sidedata = sidedataq.get()
1134 r, sidedata = sidedataq.get()
1135 while r != rev:
1135 while r != rev:
1136 staging[r] = sidedata
1136 staging[r] = sidedata
1137 r, sidedata = sidedataq.get()
1137 r, sidedata = sidedataq.get()
1138 tokens.release()
1138 tokens.release()
1139 return False, (), sidedata
1139 return False, (), sidedata
1140
1140
1141 return sidedata_companion
1141 return sidedata_companion
1142
1142
1143
1143
1144 def _get_simple_sidedata_adder(srcrepo, destrepo):
1144 def _get_simple_sidedata_adder(srcrepo, destrepo):
1145 """The simple version of the sidedata computation
1145 """The simple version of the sidedata computation
1146
1146
1147 It just compute it in the same thread on request"""
1147 It just compute it in the same thread on request"""
1148
1148
1149 def sidedatacompanion(revlog, rev):
1149 def sidedatacompanion(revlog, rev):
1150 sidedata = {}
1150 sidedata = {}
1151 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1151 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1152 sidedata = _getsidedata(srcrepo, rev)
1152 sidedata = _getsidedata(srcrepo, rev)
1153 return False, (), sidedata
1153 return False, (), sidedata
1154
1154
1155 return sidedatacompanion
1155 return sidedatacompanion
1156
1156
1157
1157
1158 def getsidedataremover(srcrepo, destrepo):
1158 def getsidedataremover(srcrepo, destrepo):
1159 def sidedatacompanion(revlog, rev):
1159 def sidedatacompanion(revlog, rev):
1160 f = ()
1160 f = ()
1161 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1161 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1162 if revlog.flags(rev) & REVIDX_SIDEDATA:
1162 if revlog.flags(rev) & REVIDX_SIDEDATA:
1163 f = (
1163 f = (
1164 sidedatamod.SD_P1COPIES,
1164 sidedatamod.SD_P1COPIES,
1165 sidedatamod.SD_P2COPIES,
1165 sidedatamod.SD_P2COPIES,
1166 sidedatamod.SD_FILESADDED,
1166 sidedatamod.SD_FILESADDED,
1167 sidedatamod.SD_FILESREMOVED,
1167 sidedatamod.SD_FILESREMOVED,
1168 )
1168 )
1169 return False, f, {}
1169 return False, f, {}
1170
1170
1171 return sidedatacompanion
1171 return sidedatacompanion
@@ -1,139 +1,135 b''
1 ===============================================
1 ===============================================
2 Testing merge involving change to the exec flag
2 Testing merge involving change to the exec flag
3 ===============================================
3 ===============================================
4
4
5 #require execbit
5 #require execbit
6
6
7
7
8 Initial setup
8 Initial setup
9 ==============
9 ==============
10
10
11
11
12 $ hg init base-repo
12 $ hg init base-repo
13 $ cd base-repo
13 $ cd base-repo
14 $ cat << EOF > a
14 $ cat << EOF > a
15 > 1
15 > 1
16 > 2
16 > 2
17 > 3
17 > 3
18 > 4
18 > 4
19 > 5
19 > 5
20 > 6
20 > 6
21 > 7
21 > 7
22 > 8
22 > 8
23 > 9
23 > 9
24 > EOF
24 > EOF
25 $ touch b
25 $ touch b
26 $ hg add a b
26 $ hg add a b
27 $ hg commit -m "initial commit"
27 $ hg commit -m "initial commit"
28 $ cd ..
28 $ cd ..
29
29
30 Testing merging mode change
30 Testing merging mode change
31 ===========================
31 ===========================
32
32
33 setup
33 setup
34
34
35 Change on one side, executable bit on the other
35 Change on one side, executable bit on the other
36
36
37 $ hg clone base-repo simple-merge-repo
37 $ hg clone base-repo simple-merge-repo
38 updating to branch default
38 updating to branch default
39 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
39 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
40 $ cd simple-merge-repo
40 $ cd simple-merge-repo
41 $ chmod +x a
41 $ chmod +x a
42 $ hg ci -m "make a executable, no change"
42 $ hg ci -m "make a executable, no change"
43 $ [ -x a ] || echo "executable bit not recorded"
43 $ [ -x a ] || echo "executable bit not recorded"
44 $ hg up ".^"
44 $ hg up ".^"
45 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
45 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
46 $ cat << EOF > a
46 $ cat << EOF > a
47 > 1
47 > 1
48 > 2
48 > 2
49 > 3
49 > 3
50 > 4
50 > 4
51 > 5
51 > 5
52 > 6
52 > 6
53 > 7
53 > 7
54 > x
54 > x
55 > 9
55 > 9
56 > EOF
56 > EOF
57 $ hg commit -m "edit end of file"
57 $ hg commit -m "edit end of file"
58 created new head
58 created new head
59
59
60 merge them (from the update side)
60 merge them (from the update side)
61
61
62 $ hg merge 'desc("make a executable, no change")'
62 $ hg merge 'desc("make a executable, no change")'
63 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
63 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
64 (branch merge, don't forget to commit)
64 (branch merge, don't forget to commit)
65 $ hg st
65 $ hg st
66 M a
66 M a
67 $ [ -x a ] || echo "executable bit lost"
67 $ [ -x a ] || echo "executable bit lost"
68
68
69 merge them (from the chmod side)
69 merge them (from the chmod side)
70
70
71 $ hg up -C 'desc("make a executable, no change")'
71 $ hg up -C 'desc("make a executable, no change")'
72 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
72 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
73 $ hg merge 'desc("edit end of file")'
73 $ hg merge 'desc("edit end of file")'
74 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
74 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
75 (branch merge, don't forget to commit)
75 (branch merge, don't forget to commit)
76 $ hg st
76 $ hg st
77 M a
77 M a
78 $ [ -x a ] || echo "executable bit lost"
78 $ [ -x a ] || echo "executable bit lost"
79
79
80
80
81 $ cd ..
81 $ cd ..
82
82
83 Testing merging mode change with rename
83 Testing merging mode change with rename
84 =======================================
84 =======================================
85
85
86 $ hg clone base-repo rename-merge-repo
86 $ hg clone base-repo rename-merge-repo
87 updating to branch default
87 updating to branch default
88 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
88 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
89 $ cd rename-merge-repo
89 $ cd rename-merge-repo
90
90
91 make "a" executable on one side
91 make "a" executable on one side
92
92
93 $ chmod +x a
93 $ chmod +x a
94 $ hg status
94 $ hg status
95 M a
95 M a
96 $ hg ci -m "make a executable"
96 $ hg ci -m "make a executable"
97 $ [ -x a ] || echo "executable bit not recorded"
97 $ [ -x a ] || echo "executable bit not recorded"
98 $ hg up ".^"
98 $ hg up ".^"
99 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
99 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
100
100
101 make "a" renamed on the other side
101 make "a" renamed on the other side
102
102
103 $ hg mv a z
103 $ hg mv a z
104 $ hg st --copies
104 $ hg st --copies
105 A z
105 A z
106 a
106 a
107 R a
107 R a
108 $ hg ci -m "rename a to z"
108 $ hg ci -m "rename a to z"
109 created new head
109 created new head
110
110
111 merge them (from the rename side)
111 merge them (from the rename side)
112
112
113 $ hg merge 'desc("make a executable")'
113 $ hg merge 'desc("make a executable")'
114 1 files updated, 0 files merged, 0 files removed, 0 files unresolved (false !)
114 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
115 0 files updated, 0 files merged, 0 files removed, 0 files unresolved (true !)
116 (branch merge, don't forget to commit)
115 (branch merge, don't forget to commit)
117 $ hg st --copies
116 $ hg st --copies
118 M z (false !)
117 M z
119 a (false !)
118 a
120 $ [ -x z ] || echo "executable bit lost"
119 $ [ -x z ] || echo "executable bit lost"
121 executable bit lost (true !)
122
120
123 merge them (from the chmod side)
121 merge them (from the chmod side)
124
122
125 $ hg up -C 'desc("make a executable")'
123 $ hg up -C 'desc("make a executable")'
126 1 files updated, 0 files merged, 1 files removed, 0 files unresolved
124 1 files updated, 0 files merged, 1 files removed, 0 files unresolved
127 $ hg merge 'desc("rename a to z")'
125 $ hg merge 'desc("rename a to z")'
128 1 files updated, 0 files merged, 0 files removed, 0 files unresolved (false !)
126 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
129 1 files updated, 0 files merged, 1 files removed, 0 files unresolved (true !)
130 (branch merge, don't forget to commit)
127 (branch merge, don't forget to commit)
131 $ hg st --copies
128 $ hg st --copies
132 M z
129 M z
133 a (false !)
130 a
134 R a
131 R a
135 $ [ -x z ] || echo "executable bit lost"
132 $ [ -x z ] || echo "executable bit lost"
136 executable bit lost (true !)
137
133
138
134
139 $ cd ..
135 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now