##// END OF EJS Templates
copies: avoid filtering by short-circuit dirstate-only copies earlier...
Martin von Zweigbergk -
r44749:30862e22 default
parent child Browse files
Show More
@@ -1,1169 +1,1171 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import multiprocessing
11 import multiprocessing
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(prefix, suffix):
67 def _chain(prefix, suffix):
68 """chain two sets of copies 'prefix' and 'suffix'"""
68 """chain two sets of copies 'prefix' and 'suffix'"""
69 result = prefix.copy()
69 result = prefix.copy()
70 for key, value in pycompat.iteritems(suffix):
70 for key, value in pycompat.iteritems(suffix):
71 result[key] = prefix.get(value, value)
71 result[key] = prefix.get(value, value)
72 return result
72 return result
73
73
74
74
75 def _tracefile(fctx, am, basemf):
75 def _tracefile(fctx, am, basemf):
76 """return file context that is the ancestor of fctx present in ancestor
76 """return file context that is the ancestor of fctx present in ancestor
77 manifest am
77 manifest am
78
78
79 Note: we used to try and stop after a given limit, however checking if that
79 Note: we used to try and stop after a given limit, however checking if that
80 limit is reached turned out to be very expensive. we are better off
80 limit is reached turned out to be very expensive. we are better off
81 disabling that feature."""
81 disabling that feature."""
82
82
83 for f in fctx.ancestors():
83 for f in fctx.ancestors():
84 path = f.path()
84 path = f.path()
85 if am.get(path, None) == f.filenode():
85 if am.get(path, None) == f.filenode():
86 return path
86 return path
87 if basemf and basemf.get(path, None) == f.filenode():
87 if basemf and basemf.get(path, None) == f.filenode():
88 return path
88 return path
89
89
90
90
91 def _dirstatecopies(repo, match=None):
91 def _dirstatecopies(repo, match=None):
92 ds = repo.dirstate
92 ds = repo.dirstate
93 c = ds.copies().copy()
93 c = ds.copies().copy()
94 for k in list(c):
94 for k in list(c):
95 if ds[k] not in b'anm' or (match and not match(k)):
95 if ds[k] not in b'anm' or (match and not match(k)):
96 del c[k]
96 del c[k]
97 return c
97 return c
98
98
99
99
100 def _computeforwardmissing(a, b, match=None):
100 def _computeforwardmissing(a, b, match=None):
101 """Computes which files are in b but not a.
101 """Computes which files are in b but not a.
102 This is its own function so extensions can easily wrap this call to see what
102 This is its own function so extensions can easily wrap this call to see what
103 files _forwardcopies is about to process.
103 files _forwardcopies is about to process.
104 """
104 """
105 ma = a.manifest()
105 ma = a.manifest()
106 mb = b.manifest()
106 mb = b.manifest()
107 return mb.filesnotin(ma, match=match)
107 return mb.filesnotin(ma, match=match)
108
108
109
109
110 def usechangesetcentricalgo(repo):
110 def usechangesetcentricalgo(repo):
111 """Checks if we should use changeset-centric copy algorithms"""
111 """Checks if we should use changeset-centric copy algorithms"""
112 if repo.filecopiesmode == b'changeset-sidedata':
112 if repo.filecopiesmode == b'changeset-sidedata':
113 return True
113 return True
114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
115 changesetsource = (b'changeset-only', b'compatibility')
115 changesetsource = (b'changeset-only', b'compatibility')
116 return readfrom in changesetsource
116 return readfrom in changesetsource
117
117
118
118
119 def _committedforwardcopies(a, b, base, match):
119 def _committedforwardcopies(a, b, base, match):
120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
121 # files might have to be traced back to the fctx parent of the last
121 # files might have to be traced back to the fctx parent of the last
122 # one-side-only changeset, but not further back than that
122 # one-side-only changeset, but not further back than that
123 repo = a._repo
123 repo = a._repo
124
124
125 if usechangesetcentricalgo(repo):
125 if usechangesetcentricalgo(repo):
126 return _changesetforwardcopies(a, b, match)
126 return _changesetforwardcopies(a, b, match)
127
127
128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
129 dbg = repo.ui.debug
129 dbg = repo.ui.debug
130 if debug:
130 if debug:
131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
132 am = a.manifest()
132 am = a.manifest()
133 basemf = None if base is None else base.manifest()
133 basemf = None if base is None else base.manifest()
134
134
135 # find where new files came from
135 # find where new files came from
136 # we currently don't try to find where old files went, too expensive
136 # we currently don't try to find where old files went, too expensive
137 # this means we can miss a case like 'hg rm b; hg cp a b'
137 # this means we can miss a case like 'hg rm b; hg cp a b'
138 cm = {}
138 cm = {}
139
139
140 # Computing the forward missing is quite expensive on large manifests, since
140 # Computing the forward missing is quite expensive on large manifests, since
141 # it compares the entire manifests. We can optimize it in the common use
141 # it compares the entire manifests. We can optimize it in the common use
142 # case of computing what copies are in a commit versus its parent (like
142 # case of computing what copies are in a commit versus its parent (like
143 # during a rebase or histedit). Note, we exclude merge commits from this
143 # during a rebase or histedit). Note, we exclude merge commits from this
144 # optimization, since the ctx.files() for a merge commit is not correct for
144 # optimization, since the ctx.files() for a merge commit is not correct for
145 # this comparison.
145 # this comparison.
146 forwardmissingmatch = match
146 forwardmissingmatch = match
147 if b.p1() == a and b.p2().node() == node.nullid:
147 if b.p1() == a and b.p2().node() == node.nullid:
148 filesmatcher = matchmod.exact(b.files())
148 filesmatcher = matchmod.exact(b.files())
149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
151
151
152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
153
153
154 if debug:
154 if debug:
155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
156
156
157 for f in sorted(missing):
157 for f in sorted(missing):
158 if debug:
158 if debug:
159 dbg(b'debug.copies: tracing file: %s\n' % f)
159 dbg(b'debug.copies: tracing file: %s\n' % f)
160 fctx = b[f]
160 fctx = b[f]
161 fctx._ancestrycontext = ancestrycontext
161 fctx._ancestrycontext = ancestrycontext
162
162
163 if debug:
163 if debug:
164 start = util.timer()
164 start = util.timer()
165 opath = _tracefile(fctx, am, basemf)
165 opath = _tracefile(fctx, am, basemf)
166 if opath:
166 if opath:
167 if debug:
167 if debug:
168 dbg(b'debug.copies: rename of: %s\n' % opath)
168 dbg(b'debug.copies: rename of: %s\n' % opath)
169 cm[f] = opath
169 cm[f] = opath
170 if debug:
170 if debug:
171 dbg(
171 dbg(
172 b'debug.copies: time: %f seconds\n'
172 b'debug.copies: time: %f seconds\n'
173 % (util.timer() - start)
173 % (util.timer() - start)
174 )
174 )
175 return cm
175 return cm
176
176
177
177
178 def _revinfogetter(repo):
178 def _revinfogetter(repo):
179 """return a function that return multiple data given a <rev>"i
179 """return a function that return multiple data given a <rev>"i
180
180
181 * p1: revision number of first parent
181 * p1: revision number of first parent
182 * p2: revision number of first parent
182 * p2: revision number of first parent
183 * p1copies: mapping of copies from p1
183 * p1copies: mapping of copies from p1
184 * p2copies: mapping of copies from p2
184 * p2copies: mapping of copies from p2
185 * removed: a list of removed files
185 * removed: a list of removed files
186 """
186 """
187 cl = repo.changelog
187 cl = repo.changelog
188 parents = cl.parentrevs
188 parents = cl.parentrevs
189
189
190 if repo.filecopiesmode == b'changeset-sidedata':
190 if repo.filecopiesmode == b'changeset-sidedata':
191 changelogrevision = cl.changelogrevision
191 changelogrevision = cl.changelogrevision
192 flags = cl.flags
192 flags = cl.flags
193
193
194 # A small cache to avoid doing the work twice for merges
194 # A small cache to avoid doing the work twice for merges
195 #
195 #
196 # In the vast majority of cases, if we ask information for a revision
196 # In the vast majority of cases, if we ask information for a revision
197 # about 1 parent, we'll later ask it for the other. So it make sense to
197 # about 1 parent, we'll later ask it for the other. So it make sense to
198 # keep the information around when reaching the first parent of a merge
198 # keep the information around when reaching the first parent of a merge
199 # and dropping it after it was provided for the second parents.
199 # and dropping it after it was provided for the second parents.
200 #
200 #
201 # It exists cases were only one parent of the merge will be walked. It
201 # It exists cases were only one parent of the merge will be walked. It
202 # happens when the "destination" the copy tracing is descendant from a
202 # happens when the "destination" the copy tracing is descendant from a
203 # new root, not common with the "source". In that case, we will only walk
203 # new root, not common with the "source". In that case, we will only walk
204 # through merge parents that are descendant of changesets common
204 # through merge parents that are descendant of changesets common
205 # between "source" and "destination".
205 # between "source" and "destination".
206 #
206 #
207 # With the current case implementation if such changesets have a copy
207 # With the current case implementation if such changesets have a copy
208 # information, we'll keep them in memory until the end of
208 # information, we'll keep them in memory until the end of
209 # _changesetforwardcopies. We don't expect the case to be frequent
209 # _changesetforwardcopies. We don't expect the case to be frequent
210 # enough to matters.
210 # enough to matters.
211 #
211 #
212 # In addition, it would be possible to reach pathological case, were
212 # In addition, it would be possible to reach pathological case, were
213 # many first parent are met before any second parent is reached. In
213 # many first parent are met before any second parent is reached. In
214 # that case the cache could grow. If this even become an issue one can
214 # that case the cache could grow. If this even become an issue one can
215 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 # safely introduce a maximum cache size. This would trade extra CPU/IO
216 # time to save memory.
216 # time to save memory.
217 merge_caches = {}
217 merge_caches = {}
218
218
219 def revinfo(rev):
219 def revinfo(rev):
220 p1, p2 = parents(rev)
220 p1, p2 = parents(rev)
221 if flags(rev) & REVIDX_SIDEDATA:
221 if flags(rev) & REVIDX_SIDEDATA:
222 e = merge_caches.pop(rev, None)
222 e = merge_caches.pop(rev, None)
223 if e is not None:
223 if e is not None:
224 return e
224 return e
225 c = changelogrevision(rev)
225 c = changelogrevision(rev)
226 p1copies = c.p1copies
226 p1copies = c.p1copies
227 p2copies = c.p2copies
227 p2copies = c.p2copies
228 removed = c.filesremoved
228 removed = c.filesremoved
229 if p1 != node.nullrev and p2 != node.nullrev:
229 if p1 != node.nullrev and p2 != node.nullrev:
230 # XXX some case we over cache, IGNORE
230 # XXX some case we over cache, IGNORE
231 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
232 else:
232 else:
233 p1copies = {}
233 p1copies = {}
234 p2copies = {}
234 p2copies = {}
235 removed = []
235 removed = []
236 return p1, p2, p1copies, p2copies, removed
236 return p1, p2, p1copies, p2copies, removed
237
237
238 else:
238 else:
239
239
240 def revinfo(rev):
240 def revinfo(rev):
241 p1, p2 = parents(rev)
241 p1, p2 = parents(rev)
242 ctx = repo[rev]
242 ctx = repo[rev]
243 p1copies, p2copies = ctx._copies
243 p1copies, p2copies = ctx._copies
244 removed = ctx.filesremoved()
244 removed = ctx.filesremoved()
245 return p1, p2, p1copies, p2copies, removed
245 return p1, p2, p1copies, p2copies, removed
246
246
247 return revinfo
247 return revinfo
248
248
249
249
250 def _changesetforwardcopies(a, b, match):
250 def _changesetforwardcopies(a, b, match):
251 if a.rev() in (node.nullrev, b.rev()):
251 if a.rev() in (node.nullrev, b.rev()):
252 return {}
252 return {}
253
253
254 repo = a.repo().unfiltered()
254 repo = a.repo().unfiltered()
255 children = {}
255 children = {}
256 revinfo = _revinfogetter(repo)
256 revinfo = _revinfogetter(repo)
257
257
258 cl = repo.changelog
258 cl = repo.changelog
259 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
260 mrset = set(missingrevs)
260 mrset = set(missingrevs)
261 roots = set()
261 roots = set()
262 for r in missingrevs:
262 for r in missingrevs:
263 for p in cl.parentrevs(r):
263 for p in cl.parentrevs(r):
264 if p == node.nullrev:
264 if p == node.nullrev:
265 continue
265 continue
266 if p not in children:
266 if p not in children:
267 children[p] = [r]
267 children[p] = [r]
268 else:
268 else:
269 children[p].append(r)
269 children[p].append(r)
270 if p not in mrset:
270 if p not in mrset:
271 roots.add(p)
271 roots.add(p)
272 if not roots:
272 if not roots:
273 # no common revision to track copies from
273 # no common revision to track copies from
274 return {}
274 return {}
275 min_root = min(roots)
275 min_root = min(roots)
276
276
277 from_head = set(
277 from_head = set(
278 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
279 )
279 )
280
280
281 iterrevs = set(from_head)
281 iterrevs = set(from_head)
282 iterrevs &= mrset
282 iterrevs &= mrset
283 iterrevs.update(roots)
283 iterrevs.update(roots)
284 iterrevs.remove(b.rev())
284 iterrevs.remove(b.rev())
285 revs = sorted(iterrevs)
285 revs = sorted(iterrevs)
286 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
286 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
287
287
288
288
289 def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
289 def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
290 """combine the copies information for each item of iterrevs
290 """combine the copies information for each item of iterrevs
291
291
292 revs: sorted iterable of revision to visit
292 revs: sorted iterable of revision to visit
293 children: a {parent: [children]} mapping.
293 children: a {parent: [children]} mapping.
294 targetrev: the final copies destination revision (not in iterrevs)
294 targetrev: the final copies destination revision (not in iterrevs)
295 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
295 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
296 match: a matcher
296 match: a matcher
297
297
298 It returns the aggregated copies information for `targetrev`.
298 It returns the aggregated copies information for `targetrev`.
299 """
299 """
300 all_copies = {}
300 all_copies = {}
301 alwaysmatch = match.always()
301 alwaysmatch = match.always()
302 for r in revs:
302 for r in revs:
303 copies = all_copies.pop(r, None)
303 copies = all_copies.pop(r, None)
304 if copies is None:
304 if copies is None:
305 # this is a root
305 # this is a root
306 copies = {}
306 copies = {}
307 for i, c in enumerate(children[r]):
307 for i, c in enumerate(children[r]):
308 p1, p2, p1copies, p2copies, removed = revinfo(c)
308 p1, p2, p1copies, p2copies, removed = revinfo(c)
309 if r == p1:
309 if r == p1:
310 parent = 1
310 parent = 1
311 childcopies = p1copies
311 childcopies = p1copies
312 else:
312 else:
313 assert r == p2
313 assert r == p2
314 parent = 2
314 parent = 2
315 childcopies = p2copies
315 childcopies = p2copies
316 if not alwaysmatch:
316 if not alwaysmatch:
317 childcopies = {
317 childcopies = {
318 dst: src for dst, src in childcopies.items() if match(dst)
318 dst: src for dst, src in childcopies.items() if match(dst)
319 }
319 }
320 newcopies = copies
320 newcopies = copies
321 if childcopies:
321 if childcopies:
322 newcopies = _chain(newcopies, childcopies)
322 newcopies = _chain(newcopies, childcopies)
323 # _chain makes a copies, we can avoid doing so in some
323 # _chain makes a copies, we can avoid doing so in some
324 # simple/linear cases.
324 # simple/linear cases.
325 assert newcopies is not copies
325 assert newcopies is not copies
326 for f in removed:
326 for f in removed:
327 if f in newcopies:
327 if f in newcopies:
328 if newcopies is copies:
328 if newcopies is copies:
329 # copy on write to avoid affecting potential other
329 # copy on write to avoid affecting potential other
330 # branches. when there are no other branches, this
330 # branches. when there are no other branches, this
331 # could be avoided.
331 # could be avoided.
332 newcopies = copies.copy()
332 newcopies = copies.copy()
333 del newcopies[f]
333 del newcopies[f]
334 othercopies = all_copies.get(c)
334 othercopies = all_copies.get(c)
335 if othercopies is None:
335 if othercopies is None:
336 all_copies[c] = newcopies
336 all_copies[c] = newcopies
337 else:
337 else:
338 # we are the second parent to work on c, we need to merge our
338 # we are the second parent to work on c, we need to merge our
339 # work with the other.
339 # work with the other.
340 #
340 #
341 # Unlike when copies are stored in the filelog, we consider
341 # Unlike when copies are stored in the filelog, we consider
342 # it a copy even if the destination already existed on the
342 # it a copy even if the destination already existed on the
343 # other branch. It's simply too expensive to check if the
343 # other branch. It's simply too expensive to check if the
344 # file existed in the manifest.
344 # file existed in the manifest.
345 #
345 #
346 # In case of conflict, parent 1 take precedence over parent 2.
346 # In case of conflict, parent 1 take precedence over parent 2.
347 # This is an arbitrary choice made anew when implementing
347 # This is an arbitrary choice made anew when implementing
348 # changeset based copies. It was made without regards with
348 # changeset based copies. It was made without regards with
349 # potential filelog related behavior.
349 # potential filelog related behavior.
350 if parent == 1:
350 if parent == 1:
351 othercopies.update(newcopies)
351 othercopies.update(newcopies)
352 else:
352 else:
353 newcopies.update(othercopies)
353 newcopies.update(othercopies)
354 all_copies[c] = newcopies
354 all_copies[c] = newcopies
355 return all_copies[targetrev]
355 return all_copies[targetrev]
356
356
357
357
358 def _forwardcopies(a, b, base=None, match=None):
358 def _forwardcopies(a, b, base=None, match=None):
359 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
359 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
360
360
361 if base is None:
361 if base is None:
362 base = a
362 base = a
363 match = a.repo().narrowmatch(match)
363 match = a.repo().narrowmatch(match)
364 # check for working copy
364 # check for working copy
365 if b.rev() is None:
365 if b.rev() is None:
366 cm = _committedforwardcopies(a, b.p1(), base, match)
366 cm = _committedforwardcopies(a, b.p1(), base, match)
367 # combine copies from dirstate if necessary
367 # combine copies from dirstate if necessary
368 copies = _chain(cm, _dirstatecopies(b._repo, match))
368 copies = _chain(cm, _dirstatecopies(b._repo, match))
369 else:
369 else:
370 copies = _committedforwardcopies(a, b, base, match)
370 copies = _committedforwardcopies(a, b, base, match)
371 return copies
371 return copies
372
372
373
373
374 def _backwardrenames(a, b, match):
374 def _backwardrenames(a, b, match):
375 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
375 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
376 return {}
376 return {}
377
377
378 # Even though we're not taking copies into account, 1:n rename situations
378 # Even though we're not taking copies into account, 1:n rename situations
379 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
379 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
380 # arbitrarily pick one of the renames.
380 # arbitrarily pick one of the renames.
381 # We don't want to pass in "match" here, since that would filter
381 # We don't want to pass in "match" here, since that would filter
382 # the destination by it. Since we're reversing the copies, we want
382 # the destination by it. Since we're reversing the copies, we want
383 # to filter the source instead.
383 # to filter the source instead.
384 f = _forwardcopies(b, a)
384 f = _forwardcopies(b, a)
385 r = {}
385 r = {}
386 for k, v in sorted(pycompat.iteritems(f)):
386 for k, v in sorted(pycompat.iteritems(f)):
387 if match and not match(v):
387 if match and not match(v):
388 continue
388 continue
389 # remove copies
389 # remove copies
390 if v in a:
390 if v in a:
391 continue
391 continue
392 r[v] = k
392 r[v] = k
393 return r
393 return r
394
394
395
395
396 def pathcopies(x, y, match=None):
396 def pathcopies(x, y, match=None):
397 """find {dst@y: src@x} copy mapping for directed compare"""
397 """find {dst@y: src@x} copy mapping for directed compare"""
398 repo = x._repo
398 repo = x._repo
399 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
399 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
400 if debug:
400 if debug:
401 repo.ui.debug(
401 repo.ui.debug(
402 b'debug.copies: searching copies from %s to %s\n' % (x, y)
402 b'debug.copies: searching copies from %s to %s\n' % (x, y)
403 )
403 )
404 if x == y or not x or not y:
404 if x == y or not x or not y:
405 return {}
405 return {}
406 if y.rev() is None and x == y.p1():
407 if debug:
408 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
409 # short-circuit to avoid issues with merge states
410 return _dirstatecopies(repo, match)
406 a = y.ancestor(x)
411 a = y.ancestor(x)
407 if a == x:
412 if a == x:
408 if debug:
413 if debug:
409 repo.ui.debug(b'debug.copies: search mode: forward\n')
414 repo.ui.debug(b'debug.copies: search mode: forward\n')
410 if y.rev() is None and x == y.p1():
411 # short-circuit to avoid issues with merge states
412 return _dirstatecopies(repo, match)
413 copies = _forwardcopies(x, y, match=match)
415 copies = _forwardcopies(x, y, match=match)
414 elif a == y:
416 elif a == y:
415 if debug:
417 if debug:
416 repo.ui.debug(b'debug.copies: search mode: backward\n')
418 repo.ui.debug(b'debug.copies: search mode: backward\n')
417 copies = _backwardrenames(x, y, match=match)
419 copies = _backwardrenames(x, y, match=match)
418 else:
420 else:
419 if debug:
421 if debug:
420 repo.ui.debug(b'debug.copies: search mode: combined\n')
422 repo.ui.debug(b'debug.copies: search mode: combined\n')
421 base = None
423 base = None
422 if a.rev() != node.nullrev:
424 if a.rev() != node.nullrev:
423 base = x
425 base = x
424 copies = _chain(
426 copies = _chain(
425 _backwardrenames(x, a, match=match),
427 _backwardrenames(x, a, match=match),
426 _forwardcopies(a, y, base, match=match),
428 _forwardcopies(a, y, base, match=match),
427 )
429 )
428 _filter(x, y, copies)
430 _filter(x, y, copies)
429 return copies
431 return copies
430
432
431
433
432 def mergecopies(repo, c1, c2, base):
434 def mergecopies(repo, c1, c2, base):
433 """
435 """
434 Finds moves and copies between context c1 and c2 that are relevant for
436 Finds moves and copies between context c1 and c2 that are relevant for
435 merging. 'base' will be used as the merge base.
437 merging. 'base' will be used as the merge base.
436
438
437 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
439 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
438 files that were moved/ copied in one merge parent and modified in another.
440 files that were moved/ copied in one merge parent and modified in another.
439 For example:
441 For example:
440
442
441 o ---> 4 another commit
443 o ---> 4 another commit
442 |
444 |
443 | o ---> 3 commit that modifies a.txt
445 | o ---> 3 commit that modifies a.txt
444 | /
446 | /
445 o / ---> 2 commit that moves a.txt to b.txt
447 o / ---> 2 commit that moves a.txt to b.txt
446 |/
448 |/
447 o ---> 1 merge base
449 o ---> 1 merge base
448
450
449 If we try to rebase revision 3 on revision 4, since there is no a.txt in
451 If we try to rebase revision 3 on revision 4, since there is no a.txt in
450 revision 4, and if user have copytrace disabled, we prints the following
452 revision 4, and if user have copytrace disabled, we prints the following
451 message:
453 message:
452
454
453 ```other changed <file> which local deleted```
455 ```other changed <file> which local deleted```
454
456
455 Returns a tuple where:
457 Returns a tuple where:
456
458
457 "branch_copies" an instance of branch_copies.
459 "branch_copies" an instance of branch_copies.
458
460
459 "diverge" is a mapping of source name -> list of destination names
461 "diverge" is a mapping of source name -> list of destination names
460 for divergent renames.
462 for divergent renames.
461
463
462 This function calls different copytracing algorithms based on config.
464 This function calls different copytracing algorithms based on config.
463 """
465 """
464 # avoid silly behavior for update from empty dir
466 # avoid silly behavior for update from empty dir
465 if not c1 or not c2 or c1 == c2:
467 if not c1 or not c2 or c1 == c2:
466 return branch_copies(), branch_copies(), {}
468 return branch_copies(), branch_copies(), {}
467
469
468 narrowmatch = c1.repo().narrowmatch()
470 narrowmatch = c1.repo().narrowmatch()
469
471
470 # avoid silly behavior for parent -> working dir
472 # avoid silly behavior for parent -> working dir
471 if c2.node() is None and c1.node() == repo.dirstate.p1():
473 if c2.node() is None and c1.node() == repo.dirstate.p1():
472 return (
474 return (
473 branch_copies(_dirstatecopies(repo, narrowmatch)),
475 branch_copies(_dirstatecopies(repo, narrowmatch)),
474 branch_copies(),
476 branch_copies(),
475 {},
477 {},
476 )
478 )
477
479
478 copytracing = repo.ui.config(b'experimental', b'copytrace')
480 copytracing = repo.ui.config(b'experimental', b'copytrace')
479 if stringutil.parsebool(copytracing) is False:
481 if stringutil.parsebool(copytracing) is False:
480 # stringutil.parsebool() returns None when it is unable to parse the
482 # stringutil.parsebool() returns None when it is unable to parse the
481 # value, so we should rely on making sure copytracing is on such cases
483 # value, so we should rely on making sure copytracing is on such cases
482 return branch_copies(), branch_copies(), {}
484 return branch_copies(), branch_copies(), {}
483
485
484 if usechangesetcentricalgo(repo):
486 if usechangesetcentricalgo(repo):
485 # The heuristics don't make sense when we need changeset-centric algos
487 # The heuristics don't make sense when we need changeset-centric algos
486 return _fullcopytracing(repo, c1, c2, base)
488 return _fullcopytracing(repo, c1, c2, base)
487
489
488 # Copy trace disabling is explicitly below the node == p1 logic above
490 # Copy trace disabling is explicitly below the node == p1 logic above
489 # because the logic above is required for a simple copy to be kept across a
491 # because the logic above is required for a simple copy to be kept across a
490 # rebase.
492 # rebase.
491 if copytracing == b'heuristics':
493 if copytracing == b'heuristics':
492 # Do full copytracing if only non-public revisions are involved as
494 # Do full copytracing if only non-public revisions are involved as
493 # that will be fast enough and will also cover the copies which could
495 # that will be fast enough and will also cover the copies which could
494 # be missed by heuristics
496 # be missed by heuristics
495 if _isfullcopytraceable(repo, c1, base):
497 if _isfullcopytraceable(repo, c1, base):
496 return _fullcopytracing(repo, c1, c2, base)
498 return _fullcopytracing(repo, c1, c2, base)
497 return _heuristicscopytracing(repo, c1, c2, base)
499 return _heuristicscopytracing(repo, c1, c2, base)
498 else:
500 else:
499 return _fullcopytracing(repo, c1, c2, base)
501 return _fullcopytracing(repo, c1, c2, base)
500
502
501
503
502 def _isfullcopytraceable(repo, c1, base):
504 def _isfullcopytraceable(repo, c1, base):
503 """ Checks that if base, source and destination are all no-public branches,
505 """ Checks that if base, source and destination are all no-public branches,
504 if yes let's use the full copytrace algorithm for increased capabilities
506 if yes let's use the full copytrace algorithm for increased capabilities
505 since it will be fast enough.
507 since it will be fast enough.
506
508
507 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
509 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
508 number of changesets from c1 to base such that if number of changesets are
510 number of changesets from c1 to base such that if number of changesets are
509 more than the limit, full copytracing algorithm won't be used.
511 more than the limit, full copytracing algorithm won't be used.
510 """
512 """
511 if c1.rev() is None:
513 if c1.rev() is None:
512 c1 = c1.p1()
514 c1 = c1.p1()
513 if c1.mutable() and base.mutable():
515 if c1.mutable() and base.mutable():
514 sourcecommitlimit = repo.ui.configint(
516 sourcecommitlimit = repo.ui.configint(
515 b'experimental', b'copytrace.sourcecommitlimit'
517 b'experimental', b'copytrace.sourcecommitlimit'
516 )
518 )
517 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
519 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
518 return commits < sourcecommitlimit
520 return commits < sourcecommitlimit
519 return False
521 return False
520
522
521
523
522 def _checksinglesidecopies(
524 def _checksinglesidecopies(
523 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
525 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
524 ):
526 ):
525 if src not in m2:
527 if src not in m2:
526 # deleted on side 2
528 # deleted on side 2
527 if src not in m1:
529 if src not in m1:
528 # renamed on side 1, deleted on side 2
530 # renamed on side 1, deleted on side 2
529 renamedelete[src] = dsts1
531 renamedelete[src] = dsts1
530 elif src not in mb:
532 elif src not in mb:
531 # Work around the "short-circuit to avoid issues with merge states"
533 # Work around the "short-circuit to avoid issues with merge states"
532 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
534 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
533 # destination doesn't exist in y.
535 # destination doesn't exist in y.
534 pass
536 pass
535 elif m2[src] != mb[src]:
537 elif m2[src] != mb[src]:
536 if not _related(c2[src], base[src]):
538 if not _related(c2[src], base[src]):
537 return
539 return
538 # modified on side 2
540 # modified on side 2
539 for dst in dsts1:
541 for dst in dsts1:
540 copy[dst] = src
542 copy[dst] = src
541
543
542
544
543 class branch_copies(object):
545 class branch_copies(object):
544 """Information about copies made on one side of a merge/graft.
546 """Information about copies made on one side of a merge/graft.
545
547
546 "copy" is a mapping from destination name -> source name,
548 "copy" is a mapping from destination name -> source name,
547 where source is in c1 and destination is in c2 or vice-versa.
549 where source is in c1 and destination is in c2 or vice-versa.
548
550
549 "movewithdir" is a mapping from source name -> destination name,
551 "movewithdir" is a mapping from source name -> destination name,
550 where the file at source present in one context but not the other
552 where the file at source present in one context but not the other
551 needs to be moved to destination by the merge process, because the
553 needs to be moved to destination by the merge process, because the
552 other context moved the directory it is in.
554 other context moved the directory it is in.
553
555
554 "renamedelete" is a mapping of source name -> list of destination
556 "renamedelete" is a mapping of source name -> list of destination
555 names for files deleted in c1 that were renamed in c2 or vice-versa.
557 names for files deleted in c1 that were renamed in c2 or vice-versa.
556
558
557 "dirmove" is a mapping of detected source dir -> destination dir renames.
559 "dirmove" is a mapping of detected source dir -> destination dir renames.
558 This is needed for handling changes to new files previously grafted into
560 This is needed for handling changes to new files previously grafted into
559 renamed directories.
561 renamed directories.
560 """
562 """
561
563
562 def __init__(
564 def __init__(
563 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
565 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
564 ):
566 ):
565 self.copy = {} if copy is None else copy
567 self.copy = {} if copy is None else copy
566 self.renamedelete = {} if renamedelete is None else renamedelete
568 self.renamedelete = {} if renamedelete is None else renamedelete
567 self.dirmove = {} if dirmove is None else dirmove
569 self.dirmove = {} if dirmove is None else dirmove
568 self.movewithdir = {} if movewithdir is None else movewithdir
570 self.movewithdir = {} if movewithdir is None else movewithdir
569
571
570
572
571 def _fullcopytracing(repo, c1, c2, base):
573 def _fullcopytracing(repo, c1, c2, base):
572 """ The full copytracing algorithm which finds all the new files that were
574 """ The full copytracing algorithm which finds all the new files that were
573 added from merge base up to the top commit and for each file it checks if
575 added from merge base up to the top commit and for each file it checks if
574 this file was copied from another file.
576 this file was copied from another file.
575
577
576 This is pretty slow when a lot of changesets are involved but will track all
578 This is pretty slow when a lot of changesets are involved but will track all
577 the copies.
579 the copies.
578 """
580 """
579 m1 = c1.manifest()
581 m1 = c1.manifest()
580 m2 = c2.manifest()
582 m2 = c2.manifest()
581 mb = base.manifest()
583 mb = base.manifest()
582
584
583 copies1 = pathcopies(base, c1)
585 copies1 = pathcopies(base, c1)
584 copies2 = pathcopies(base, c2)
586 copies2 = pathcopies(base, c2)
585
587
586 if not (copies1 or copies2):
588 if not (copies1 or copies2):
587 return branch_copies(), branch_copies(), {}
589 return branch_copies(), branch_copies(), {}
588
590
589 inversecopies1 = {}
591 inversecopies1 = {}
590 inversecopies2 = {}
592 inversecopies2 = {}
591 for dst, src in copies1.items():
593 for dst, src in copies1.items():
592 inversecopies1.setdefault(src, []).append(dst)
594 inversecopies1.setdefault(src, []).append(dst)
593 for dst, src in copies2.items():
595 for dst, src in copies2.items():
594 inversecopies2.setdefault(src, []).append(dst)
596 inversecopies2.setdefault(src, []).append(dst)
595
597
596 copy1 = {}
598 copy1 = {}
597 copy2 = {}
599 copy2 = {}
598 diverge = {}
600 diverge = {}
599 renamedelete1 = {}
601 renamedelete1 = {}
600 renamedelete2 = {}
602 renamedelete2 = {}
601 allsources = set(inversecopies1) | set(inversecopies2)
603 allsources = set(inversecopies1) | set(inversecopies2)
602 for src in allsources:
604 for src in allsources:
603 dsts1 = inversecopies1.get(src)
605 dsts1 = inversecopies1.get(src)
604 dsts2 = inversecopies2.get(src)
606 dsts2 = inversecopies2.get(src)
605 if dsts1 and dsts2:
607 if dsts1 and dsts2:
606 # copied/renamed on both sides
608 # copied/renamed on both sides
607 if src not in m1 and src not in m2:
609 if src not in m1 and src not in m2:
608 # renamed on both sides
610 # renamed on both sides
609 dsts1 = set(dsts1)
611 dsts1 = set(dsts1)
610 dsts2 = set(dsts2)
612 dsts2 = set(dsts2)
611 # If there's some overlap in the rename destinations, we
613 # If there's some overlap in the rename destinations, we
612 # consider it not divergent. For example, if side 1 copies 'a'
614 # consider it not divergent. For example, if side 1 copies 'a'
613 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
615 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
614 # and 'd' and deletes 'a'.
616 # and 'd' and deletes 'a'.
615 if dsts1 & dsts2:
617 if dsts1 & dsts2:
616 for dst in dsts1 & dsts2:
618 for dst in dsts1 & dsts2:
617 copy1[dst] = src
619 copy1[dst] = src
618 copy2[dst] = src
620 copy2[dst] = src
619 else:
621 else:
620 diverge[src] = sorted(dsts1 | dsts2)
622 diverge[src] = sorted(dsts1 | dsts2)
621 elif src in m1 and src in m2:
623 elif src in m1 and src in m2:
622 # copied on both sides
624 # copied on both sides
623 dsts1 = set(dsts1)
625 dsts1 = set(dsts1)
624 dsts2 = set(dsts2)
626 dsts2 = set(dsts2)
625 for dst in dsts1 & dsts2:
627 for dst in dsts1 & dsts2:
626 copy1[dst] = src
628 copy1[dst] = src
627 copy2[dst] = src
629 copy2[dst] = src
628 # TODO: Handle cases where it was renamed on one side and copied
630 # TODO: Handle cases where it was renamed on one side and copied
629 # on the other side
631 # on the other side
630 elif dsts1:
632 elif dsts1:
631 # copied/renamed only on side 1
633 # copied/renamed only on side 1
632 _checksinglesidecopies(
634 _checksinglesidecopies(
633 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
635 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
634 )
636 )
635 elif dsts2:
637 elif dsts2:
636 # copied/renamed only on side 2
638 # copied/renamed only on side 2
637 _checksinglesidecopies(
639 _checksinglesidecopies(
638 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
640 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
639 )
641 )
640
642
641 # find interesting file sets from manifests
643 # find interesting file sets from manifests
642 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
644 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
643 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
645 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
644 u1 = sorted(addedinm1 - addedinm2)
646 u1 = sorted(addedinm1 - addedinm2)
645 u2 = sorted(addedinm2 - addedinm1)
647 u2 = sorted(addedinm2 - addedinm1)
646
648
647 header = b" unmatched files in %s"
649 header = b" unmatched files in %s"
648 if u1:
650 if u1:
649 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
651 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
650 if u2:
652 if u2:
651 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
653 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
652
654
653 if repo.ui.debugflag:
655 if repo.ui.debugflag:
654 renamedeleteset = set()
656 renamedeleteset = set()
655 divergeset = set()
657 divergeset = set()
656 for dsts in diverge.values():
658 for dsts in diverge.values():
657 divergeset.update(dsts)
659 divergeset.update(dsts)
658 for dsts in renamedelete1.values():
660 for dsts in renamedelete1.values():
659 renamedeleteset.update(dsts)
661 renamedeleteset.update(dsts)
660 for dsts in renamedelete2.values():
662 for dsts in renamedelete2.values():
661 renamedeleteset.update(dsts)
663 renamedeleteset.update(dsts)
662
664
663 repo.ui.debug(
665 repo.ui.debug(
664 b" all copies found (* = to merge, ! = divergent, "
666 b" all copies found (* = to merge, ! = divergent, "
665 b"% = renamed and deleted):\n"
667 b"% = renamed and deleted):\n"
666 )
668 )
667 for side, copies in ((b"local", copies1), (b"remote", copies2)):
669 for side, copies in ((b"local", copies1), (b"remote", copies2)):
668 if not copies:
670 if not copies:
669 continue
671 continue
670 repo.ui.debug(b" on %s side:\n" % side)
672 repo.ui.debug(b" on %s side:\n" % side)
671 for f in sorted(copies):
673 for f in sorted(copies):
672 note = b""
674 note = b""
673 if f in copy1 or f in copy2:
675 if f in copy1 or f in copy2:
674 note += b"*"
676 note += b"*"
675 if f in divergeset:
677 if f in divergeset:
676 note += b"!"
678 note += b"!"
677 if f in renamedeleteset:
679 if f in renamedeleteset:
678 note += b"%"
680 note += b"%"
679 repo.ui.debug(
681 repo.ui.debug(
680 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
682 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
681 )
683 )
682 del renamedeleteset
684 del renamedeleteset
683 del divergeset
685 del divergeset
684
686
685 repo.ui.debug(b" checking for directory renames\n")
687 repo.ui.debug(b" checking for directory renames\n")
686
688
687 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
689 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
688 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
690 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
689
691
690 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
692 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
691 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
693 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
692
694
693 return branch_copies1, branch_copies2, diverge
695 return branch_copies1, branch_copies2, diverge
694
696
695
697
696 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
698 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
697 """Finds moved directories and files that should move with them.
699 """Finds moved directories and files that should move with them.
698
700
699 ctx: the context for one of the sides
701 ctx: the context for one of the sides
700 copy: files copied on the same side (as ctx)
702 copy: files copied on the same side (as ctx)
701 fullcopy: files copied on the same side (as ctx), including those that
703 fullcopy: files copied on the same side (as ctx), including those that
702 merge.manifestmerge() won't care about
704 merge.manifestmerge() won't care about
703 addedfiles: added files on the other side (compared to ctx)
705 addedfiles: added files on the other side (compared to ctx)
704 """
706 """
705 # generate a directory move map
707 # generate a directory move map
706 d = ctx.dirs()
708 d = ctx.dirs()
707 invalid = set()
709 invalid = set()
708 dirmove = {}
710 dirmove = {}
709
711
710 # examine each file copy for a potential directory move, which is
712 # examine each file copy for a potential directory move, which is
711 # when all the files in a directory are moved to a new directory
713 # when all the files in a directory are moved to a new directory
712 for dst, src in pycompat.iteritems(fullcopy):
714 for dst, src in pycompat.iteritems(fullcopy):
713 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
715 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
714 if dsrc in invalid:
716 if dsrc in invalid:
715 # already seen to be uninteresting
717 # already seen to be uninteresting
716 continue
718 continue
717 elif dsrc in d and ddst in d:
719 elif dsrc in d and ddst in d:
718 # directory wasn't entirely moved locally
720 # directory wasn't entirely moved locally
719 invalid.add(dsrc)
721 invalid.add(dsrc)
720 elif dsrc in dirmove and dirmove[dsrc] != ddst:
722 elif dsrc in dirmove and dirmove[dsrc] != ddst:
721 # files from the same directory moved to two different places
723 # files from the same directory moved to two different places
722 invalid.add(dsrc)
724 invalid.add(dsrc)
723 else:
725 else:
724 # looks good so far
726 # looks good so far
725 dirmove[dsrc] = ddst
727 dirmove[dsrc] = ddst
726
728
727 for i in invalid:
729 for i in invalid:
728 if i in dirmove:
730 if i in dirmove:
729 del dirmove[i]
731 del dirmove[i]
730 del d, invalid
732 del d, invalid
731
733
732 if not dirmove:
734 if not dirmove:
733 return {}, {}
735 return {}, {}
734
736
735 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
737 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
736
738
737 for d in dirmove:
739 for d in dirmove:
738 repo.ui.debug(
740 repo.ui.debug(
739 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
741 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
740 )
742 )
741
743
742 movewithdir = {}
744 movewithdir = {}
743 # check unaccounted nonoverlapping files against directory moves
745 # check unaccounted nonoverlapping files against directory moves
744 for f in addedfiles:
746 for f in addedfiles:
745 if f not in fullcopy:
747 if f not in fullcopy:
746 for d in dirmove:
748 for d in dirmove:
747 if f.startswith(d):
749 if f.startswith(d):
748 # new file added in a directory that was moved, move it
750 # new file added in a directory that was moved, move it
749 df = dirmove[d] + f[len(d) :]
751 df = dirmove[d] + f[len(d) :]
750 if df not in copy:
752 if df not in copy:
751 movewithdir[f] = df
753 movewithdir[f] = df
752 repo.ui.debug(
754 repo.ui.debug(
753 b" pending file src: '%s' -> dst: '%s'\n"
755 b" pending file src: '%s' -> dst: '%s'\n"
754 % (f, df)
756 % (f, df)
755 )
757 )
756 break
758 break
757
759
758 return dirmove, movewithdir
760 return dirmove, movewithdir
759
761
760
762
761 def _heuristicscopytracing(repo, c1, c2, base):
763 def _heuristicscopytracing(repo, c1, c2, base):
762 """ Fast copytracing using filename heuristics
764 """ Fast copytracing using filename heuristics
763
765
764 Assumes that moves or renames are of following two types:
766 Assumes that moves or renames are of following two types:
765
767
766 1) Inside a directory only (same directory name but different filenames)
768 1) Inside a directory only (same directory name but different filenames)
767 2) Move from one directory to another
769 2) Move from one directory to another
768 (same filenames but different directory names)
770 (same filenames but different directory names)
769
771
770 Works only when there are no merge commits in the "source branch".
772 Works only when there are no merge commits in the "source branch".
771 Source branch is commits from base up to c2 not including base.
773 Source branch is commits from base up to c2 not including base.
772
774
773 If merge is involved it fallbacks to _fullcopytracing().
775 If merge is involved it fallbacks to _fullcopytracing().
774
776
775 Can be used by setting the following config:
777 Can be used by setting the following config:
776
778
777 [experimental]
779 [experimental]
778 copytrace = heuristics
780 copytrace = heuristics
779
781
780 In some cases the copy/move candidates found by heuristics can be very large
782 In some cases the copy/move candidates found by heuristics can be very large
781 in number and that will make the algorithm slow. The number of possible
783 in number and that will make the algorithm slow. The number of possible
782 candidates to check can be limited by using the config
784 candidates to check can be limited by using the config
783 `experimental.copytrace.movecandidateslimit` which defaults to 100.
785 `experimental.copytrace.movecandidateslimit` which defaults to 100.
784 """
786 """
785
787
786 if c1.rev() is None:
788 if c1.rev() is None:
787 c1 = c1.p1()
789 c1 = c1.p1()
788 if c2.rev() is None:
790 if c2.rev() is None:
789 c2 = c2.p1()
791 c2 = c2.p1()
790
792
791 changedfiles = set()
793 changedfiles = set()
792 m1 = c1.manifest()
794 m1 = c1.manifest()
793 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
795 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
794 # If base is not in c2 branch, we switch to fullcopytracing
796 # If base is not in c2 branch, we switch to fullcopytracing
795 repo.ui.debug(
797 repo.ui.debug(
796 b"switching to full copytracing as base is not "
798 b"switching to full copytracing as base is not "
797 b"an ancestor of c2\n"
799 b"an ancestor of c2\n"
798 )
800 )
799 return _fullcopytracing(repo, c1, c2, base)
801 return _fullcopytracing(repo, c1, c2, base)
800
802
801 ctx = c2
803 ctx = c2
802 while ctx != base:
804 while ctx != base:
803 if len(ctx.parents()) == 2:
805 if len(ctx.parents()) == 2:
804 # To keep things simple let's not handle merges
806 # To keep things simple let's not handle merges
805 repo.ui.debug(b"switching to full copytracing because of merges\n")
807 repo.ui.debug(b"switching to full copytracing because of merges\n")
806 return _fullcopytracing(repo, c1, c2, base)
808 return _fullcopytracing(repo, c1, c2, base)
807 changedfiles.update(ctx.files())
809 changedfiles.update(ctx.files())
808 ctx = ctx.p1()
810 ctx = ctx.p1()
809
811
810 copies2 = {}
812 copies2 = {}
811 cp = _forwardcopies(base, c2)
813 cp = _forwardcopies(base, c2)
812 for dst, src in pycompat.iteritems(cp):
814 for dst, src in pycompat.iteritems(cp):
813 if src in m1:
815 if src in m1:
814 copies2[dst] = src
816 copies2[dst] = src
815
817
816 # file is missing if it isn't present in the destination, but is present in
818 # file is missing if it isn't present in the destination, but is present in
817 # the base and present in the source.
819 # the base and present in the source.
818 # Presence in the base is important to exclude added files, presence in the
820 # Presence in the base is important to exclude added files, presence in the
819 # source is important to exclude removed files.
821 # source is important to exclude removed files.
820 filt = lambda f: f not in m1 and f in base and f in c2
822 filt = lambda f: f not in m1 and f in base and f in c2
821 missingfiles = [f for f in changedfiles if filt(f)]
823 missingfiles = [f for f in changedfiles if filt(f)]
822
824
823 copies1 = {}
825 copies1 = {}
824 if missingfiles:
826 if missingfiles:
825 basenametofilename = collections.defaultdict(list)
827 basenametofilename = collections.defaultdict(list)
826 dirnametofilename = collections.defaultdict(list)
828 dirnametofilename = collections.defaultdict(list)
827
829
828 for f in m1.filesnotin(base.manifest()):
830 for f in m1.filesnotin(base.manifest()):
829 basename = os.path.basename(f)
831 basename = os.path.basename(f)
830 dirname = os.path.dirname(f)
832 dirname = os.path.dirname(f)
831 basenametofilename[basename].append(f)
833 basenametofilename[basename].append(f)
832 dirnametofilename[dirname].append(f)
834 dirnametofilename[dirname].append(f)
833
835
834 for f in missingfiles:
836 for f in missingfiles:
835 basename = os.path.basename(f)
837 basename = os.path.basename(f)
836 dirname = os.path.dirname(f)
838 dirname = os.path.dirname(f)
837 samebasename = basenametofilename[basename]
839 samebasename = basenametofilename[basename]
838 samedirname = dirnametofilename[dirname]
840 samedirname = dirnametofilename[dirname]
839 movecandidates = samebasename + samedirname
841 movecandidates = samebasename + samedirname
840 # f is guaranteed to be present in c2, that's why
842 # f is guaranteed to be present in c2, that's why
841 # c2.filectx(f) won't fail
843 # c2.filectx(f) won't fail
842 f2 = c2.filectx(f)
844 f2 = c2.filectx(f)
843 # we can have a lot of candidates which can slow down the heuristics
845 # we can have a lot of candidates which can slow down the heuristics
844 # config value to limit the number of candidates moves to check
846 # config value to limit the number of candidates moves to check
845 maxcandidates = repo.ui.configint(
847 maxcandidates = repo.ui.configint(
846 b'experimental', b'copytrace.movecandidateslimit'
848 b'experimental', b'copytrace.movecandidateslimit'
847 )
849 )
848
850
849 if len(movecandidates) > maxcandidates:
851 if len(movecandidates) > maxcandidates:
850 repo.ui.status(
852 repo.ui.status(
851 _(
853 _(
852 b"skipping copytracing for '%s', more "
854 b"skipping copytracing for '%s', more "
853 b"candidates than the limit: %d\n"
855 b"candidates than the limit: %d\n"
854 )
856 )
855 % (f, len(movecandidates))
857 % (f, len(movecandidates))
856 )
858 )
857 continue
859 continue
858
860
859 for candidate in movecandidates:
861 for candidate in movecandidates:
860 f1 = c1.filectx(candidate)
862 f1 = c1.filectx(candidate)
861 if _related(f1, f2):
863 if _related(f1, f2):
862 # if there are a few related copies then we'll merge
864 # if there are a few related copies then we'll merge
863 # changes into all of them. This matches the behaviour
865 # changes into all of them. This matches the behaviour
864 # of upstream copytracing
866 # of upstream copytracing
865 copies1[candidate] = f
867 copies1[candidate] = f
866
868
867 return branch_copies(copies1), branch_copies(copies2), {}
869 return branch_copies(copies1), branch_copies(copies2), {}
868
870
869
871
870 def _related(f1, f2):
872 def _related(f1, f2):
871 """return True if f1 and f2 filectx have a common ancestor
873 """return True if f1 and f2 filectx have a common ancestor
872
874
873 Walk back to common ancestor to see if the two files originate
875 Walk back to common ancestor to see if the two files originate
874 from the same file. Since workingfilectx's rev() is None it messes
876 from the same file. Since workingfilectx's rev() is None it messes
875 up the integer comparison logic, hence the pre-step check for
877 up the integer comparison logic, hence the pre-step check for
876 None (f1 and f2 can only be workingfilectx's initially).
878 None (f1 and f2 can only be workingfilectx's initially).
877 """
879 """
878
880
879 if f1 == f2:
881 if f1 == f2:
880 return True # a match
882 return True # a match
881
883
882 g1, g2 = f1.ancestors(), f2.ancestors()
884 g1, g2 = f1.ancestors(), f2.ancestors()
883 try:
885 try:
884 f1r, f2r = f1.linkrev(), f2.linkrev()
886 f1r, f2r = f1.linkrev(), f2.linkrev()
885
887
886 if f1r is None:
888 if f1r is None:
887 f1 = next(g1)
889 f1 = next(g1)
888 if f2r is None:
890 if f2r is None:
889 f2 = next(g2)
891 f2 = next(g2)
890
892
891 while True:
893 while True:
892 f1r, f2r = f1.linkrev(), f2.linkrev()
894 f1r, f2r = f1.linkrev(), f2.linkrev()
893 if f1r > f2r:
895 if f1r > f2r:
894 f1 = next(g1)
896 f1 = next(g1)
895 elif f2r > f1r:
897 elif f2r > f1r:
896 f2 = next(g2)
898 f2 = next(g2)
897 else: # f1 and f2 point to files in the same linkrev
899 else: # f1 and f2 point to files in the same linkrev
898 return f1 == f2 # true if they point to the same file
900 return f1 == f2 # true if they point to the same file
899 except StopIteration:
901 except StopIteration:
900 return False
902 return False
901
903
902
904
903 def graftcopies(wctx, ctx, base):
905 def graftcopies(wctx, ctx, base):
904 """reproduce copies between base and ctx in the wctx
906 """reproduce copies between base and ctx in the wctx
905
907
906 Unlike mergecopies(), this function will only consider copies between base
908 Unlike mergecopies(), this function will only consider copies between base
907 and ctx; it will ignore copies between base and wctx. Also unlike
909 and ctx; it will ignore copies between base and wctx. Also unlike
908 mergecopies(), this function will apply copies to the working copy (instead
910 mergecopies(), this function will apply copies to the working copy (instead
909 of just returning information about the copies). That makes it cheaper
911 of just returning information about the copies). That makes it cheaper
910 (especially in the common case of base==ctx.p1()) and useful also when
912 (especially in the common case of base==ctx.p1()) and useful also when
911 experimental.copytrace=off.
913 experimental.copytrace=off.
912
914
913 merge.update() will have already marked most copies, but it will only
915 merge.update() will have already marked most copies, but it will only
914 mark copies if it thinks the source files are related (see
916 mark copies if it thinks the source files are related (see
915 merge._related()). It will also not mark copies if the file wasn't modified
917 merge._related()). It will also not mark copies if the file wasn't modified
916 on the local side. This function adds the copies that were "missed"
918 on the local side. This function adds the copies that were "missed"
917 by merge.update().
919 by merge.update().
918 """
920 """
919 new_copies = pathcopies(base, ctx)
921 new_copies = pathcopies(base, ctx)
920 _filter(wctx.p1(), wctx, new_copies)
922 _filter(wctx.p1(), wctx, new_copies)
921 for dst, src in pycompat.iteritems(new_copies):
923 for dst, src in pycompat.iteritems(new_copies):
922 wctx[dst].markcopied(src)
924 wctx[dst].markcopied(src)
923
925
924
926
925 def computechangesetfilesadded(ctx):
927 def computechangesetfilesadded(ctx):
926 """return the list of files added in a changeset
928 """return the list of files added in a changeset
927 """
929 """
928 added = []
930 added = []
929 for f in ctx.files():
931 for f in ctx.files():
930 if not any(f in p for p in ctx.parents()):
932 if not any(f in p for p in ctx.parents()):
931 added.append(f)
933 added.append(f)
932 return added
934 return added
933
935
934
936
935 def computechangesetfilesremoved(ctx):
937 def computechangesetfilesremoved(ctx):
936 """return the list of files removed in a changeset
938 """return the list of files removed in a changeset
937 """
939 """
938 removed = []
940 removed = []
939 for f in ctx.files():
941 for f in ctx.files():
940 if f not in ctx:
942 if f not in ctx:
941 removed.append(f)
943 removed.append(f)
942 return removed
944 return removed
943
945
944
946
945 def computechangesetcopies(ctx):
947 def computechangesetcopies(ctx):
946 """return the copies data for a changeset
948 """return the copies data for a changeset
947
949
948 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
950 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
949
951
950 Each dictionnary are in the form: `{newname: oldname}`
952 Each dictionnary are in the form: `{newname: oldname}`
951 """
953 """
952 p1copies = {}
954 p1copies = {}
953 p2copies = {}
955 p2copies = {}
954 p1 = ctx.p1()
956 p1 = ctx.p1()
955 p2 = ctx.p2()
957 p2 = ctx.p2()
956 narrowmatch = ctx._repo.narrowmatch()
958 narrowmatch = ctx._repo.narrowmatch()
957 for dst in ctx.files():
959 for dst in ctx.files():
958 if not narrowmatch(dst) or dst not in ctx:
960 if not narrowmatch(dst) or dst not in ctx:
959 continue
961 continue
960 copied = ctx[dst].renamed()
962 copied = ctx[dst].renamed()
961 if not copied:
963 if not copied:
962 continue
964 continue
963 src, srcnode = copied
965 src, srcnode = copied
964 if src in p1 and p1[src].filenode() == srcnode:
966 if src in p1 and p1[src].filenode() == srcnode:
965 p1copies[dst] = src
967 p1copies[dst] = src
966 elif src in p2 and p2[src].filenode() == srcnode:
968 elif src in p2 and p2[src].filenode() == srcnode:
967 p2copies[dst] = src
969 p2copies[dst] = src
968 return p1copies, p2copies
970 return p1copies, p2copies
969
971
970
972
971 def encodecopies(files, copies):
973 def encodecopies(files, copies):
972 items = []
974 items = []
973 for i, dst in enumerate(files):
975 for i, dst in enumerate(files):
974 if dst in copies:
976 if dst in copies:
975 items.append(b'%d\0%s' % (i, copies[dst]))
977 items.append(b'%d\0%s' % (i, copies[dst]))
976 if len(items) != len(copies):
978 if len(items) != len(copies):
977 raise error.ProgrammingError(
979 raise error.ProgrammingError(
978 b'some copy targets missing from file list'
980 b'some copy targets missing from file list'
979 )
981 )
980 return b"\n".join(items)
982 return b"\n".join(items)
981
983
982
984
983 def decodecopies(files, data):
985 def decodecopies(files, data):
984 try:
986 try:
985 copies = {}
987 copies = {}
986 if not data:
988 if not data:
987 return copies
989 return copies
988 for l in data.split(b'\n'):
990 for l in data.split(b'\n'):
989 strindex, src = l.split(b'\0')
991 strindex, src = l.split(b'\0')
990 i = int(strindex)
992 i = int(strindex)
991 dst = files[i]
993 dst = files[i]
992 copies[dst] = src
994 copies[dst] = src
993 return copies
995 return copies
994 except (ValueError, IndexError):
996 except (ValueError, IndexError):
995 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
997 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
996 # used different syntax for the value.
998 # used different syntax for the value.
997 return None
999 return None
998
1000
999
1001
1000 def encodefileindices(files, subset):
1002 def encodefileindices(files, subset):
1001 subset = set(subset)
1003 subset = set(subset)
1002 indices = []
1004 indices = []
1003 for i, f in enumerate(files):
1005 for i, f in enumerate(files):
1004 if f in subset:
1006 if f in subset:
1005 indices.append(b'%d' % i)
1007 indices.append(b'%d' % i)
1006 return b'\n'.join(indices)
1008 return b'\n'.join(indices)
1007
1009
1008
1010
1009 def decodefileindices(files, data):
1011 def decodefileindices(files, data):
1010 try:
1012 try:
1011 subset = []
1013 subset = []
1012 if not data:
1014 if not data:
1013 return subset
1015 return subset
1014 for strindex in data.split(b'\n'):
1016 for strindex in data.split(b'\n'):
1015 i = int(strindex)
1017 i = int(strindex)
1016 if i < 0 or i >= len(files):
1018 if i < 0 or i >= len(files):
1017 return None
1019 return None
1018 subset.append(files[i])
1020 subset.append(files[i])
1019 return subset
1021 return subset
1020 except (ValueError, IndexError):
1022 except (ValueError, IndexError):
1021 # Perhaps someone had chosen the same key name (e.g. "added") and
1023 # Perhaps someone had chosen the same key name (e.g. "added") and
1022 # used different syntax for the value.
1024 # used different syntax for the value.
1023 return None
1025 return None
1024
1026
1025
1027
1026 def _getsidedata(srcrepo, rev):
1028 def _getsidedata(srcrepo, rev):
1027 ctx = srcrepo[rev]
1029 ctx = srcrepo[rev]
1028 filescopies = computechangesetcopies(ctx)
1030 filescopies = computechangesetcopies(ctx)
1029 filesadded = computechangesetfilesadded(ctx)
1031 filesadded = computechangesetfilesadded(ctx)
1030 filesremoved = computechangesetfilesremoved(ctx)
1032 filesremoved = computechangesetfilesremoved(ctx)
1031 sidedata = {}
1033 sidedata = {}
1032 if any([filescopies, filesadded, filesremoved]):
1034 if any([filescopies, filesadded, filesremoved]):
1033 sortedfiles = sorted(ctx.files())
1035 sortedfiles = sorted(ctx.files())
1034 p1copies, p2copies = filescopies
1036 p1copies, p2copies = filescopies
1035 p1copies = encodecopies(sortedfiles, p1copies)
1037 p1copies = encodecopies(sortedfiles, p1copies)
1036 p2copies = encodecopies(sortedfiles, p2copies)
1038 p2copies = encodecopies(sortedfiles, p2copies)
1037 filesadded = encodefileindices(sortedfiles, filesadded)
1039 filesadded = encodefileindices(sortedfiles, filesadded)
1038 filesremoved = encodefileindices(sortedfiles, filesremoved)
1040 filesremoved = encodefileindices(sortedfiles, filesremoved)
1039 if p1copies:
1041 if p1copies:
1040 sidedata[sidedatamod.SD_P1COPIES] = p1copies
1042 sidedata[sidedatamod.SD_P1COPIES] = p1copies
1041 if p2copies:
1043 if p2copies:
1042 sidedata[sidedatamod.SD_P2COPIES] = p2copies
1044 sidedata[sidedatamod.SD_P2COPIES] = p2copies
1043 if filesadded:
1045 if filesadded:
1044 sidedata[sidedatamod.SD_FILESADDED] = filesadded
1046 sidedata[sidedatamod.SD_FILESADDED] = filesadded
1045 if filesremoved:
1047 if filesremoved:
1046 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
1048 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
1047 return sidedata
1049 return sidedata
1048
1050
1049
1051
1050 def getsidedataadder(srcrepo, destrepo):
1052 def getsidedataadder(srcrepo, destrepo):
1051 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
1053 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
1052 if pycompat.iswindows or not use_w:
1054 if pycompat.iswindows or not use_w:
1053 return _get_simple_sidedata_adder(srcrepo, destrepo)
1055 return _get_simple_sidedata_adder(srcrepo, destrepo)
1054 else:
1056 else:
1055 return _get_worker_sidedata_adder(srcrepo, destrepo)
1057 return _get_worker_sidedata_adder(srcrepo, destrepo)
1056
1058
1057
1059
1058 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
1060 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
1059 """The function used by worker precomputing sidedata
1061 """The function used by worker precomputing sidedata
1060
1062
1061 It read an input queue containing revision numbers
1063 It read an input queue containing revision numbers
1062 It write in an output queue containing (rev, <sidedata-map>)
1064 It write in an output queue containing (rev, <sidedata-map>)
1063
1065
1064 The `None` input value is used as a stop signal.
1066 The `None` input value is used as a stop signal.
1065
1067
1066 The `tokens` semaphore is user to avoid having too many unprocessed
1068 The `tokens` semaphore is user to avoid having too many unprocessed
1067 entries. The workers needs to acquire one token before fetching a task.
1069 entries. The workers needs to acquire one token before fetching a task.
1068 They will be released by the consumer of the produced data.
1070 They will be released by the consumer of the produced data.
1069 """
1071 """
1070 tokens.acquire()
1072 tokens.acquire()
1071 rev = revs_queue.get()
1073 rev = revs_queue.get()
1072 while rev is not None:
1074 while rev is not None:
1073 data = _getsidedata(srcrepo, rev)
1075 data = _getsidedata(srcrepo, rev)
1074 sidedata_queue.put((rev, data))
1076 sidedata_queue.put((rev, data))
1075 tokens.acquire()
1077 tokens.acquire()
1076 rev = revs_queue.get()
1078 rev = revs_queue.get()
1077 # processing of `None` is completed, release the token.
1079 # processing of `None` is completed, release the token.
1078 tokens.release()
1080 tokens.release()
1079
1081
1080
1082
1081 BUFF_PER_WORKER = 50
1083 BUFF_PER_WORKER = 50
1082
1084
1083
1085
1084 def _get_worker_sidedata_adder(srcrepo, destrepo):
1086 def _get_worker_sidedata_adder(srcrepo, destrepo):
1085 """The parallel version of the sidedata computation
1087 """The parallel version of the sidedata computation
1086
1088
1087 This code spawn a pool of worker that precompute a buffer of sidedata
1089 This code spawn a pool of worker that precompute a buffer of sidedata
1088 before we actually need them"""
1090 before we actually need them"""
1089 # avoid circular import copies -> scmutil -> worker -> copies
1091 # avoid circular import copies -> scmutil -> worker -> copies
1090 from . import worker
1092 from . import worker
1091
1093
1092 nbworkers = worker._numworkers(srcrepo.ui)
1094 nbworkers = worker._numworkers(srcrepo.ui)
1093
1095
1094 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
1096 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
1095 revsq = multiprocessing.Queue()
1097 revsq = multiprocessing.Queue()
1096 sidedataq = multiprocessing.Queue()
1098 sidedataq = multiprocessing.Queue()
1097
1099
1098 assert srcrepo.filtername is None
1100 assert srcrepo.filtername is None
1099 # queue all tasks beforehand, revision numbers are small and it make
1101 # queue all tasks beforehand, revision numbers are small and it make
1100 # synchronisation simpler
1102 # synchronisation simpler
1101 #
1103 #
1102 # Since the computation for each node can be quite expensive, the overhead
1104 # Since the computation for each node can be quite expensive, the overhead
1103 # of using a single queue is not revelant. In practice, most computation
1105 # of using a single queue is not revelant. In practice, most computation
1104 # are fast but some are very expensive and dominate all the other smaller
1106 # are fast but some are very expensive and dominate all the other smaller
1105 # cost.
1107 # cost.
1106 for r in srcrepo.changelog.revs():
1108 for r in srcrepo.changelog.revs():
1107 revsq.put(r)
1109 revsq.put(r)
1108 # queue the "no more tasks" markers
1110 # queue the "no more tasks" markers
1109 for i in range(nbworkers):
1111 for i in range(nbworkers):
1110 revsq.put(None)
1112 revsq.put(None)
1111
1113
1112 allworkers = []
1114 allworkers = []
1113 for i in range(nbworkers):
1115 for i in range(nbworkers):
1114 args = (srcrepo, revsq, sidedataq, tokens)
1116 args = (srcrepo, revsq, sidedataq, tokens)
1115 w = multiprocessing.Process(target=_sidedata_worker, args=args)
1117 w = multiprocessing.Process(target=_sidedata_worker, args=args)
1116 allworkers.append(w)
1118 allworkers.append(w)
1117 w.start()
1119 w.start()
1118
1120
1119 # dictionnary to store results for revision higher than we one we are
1121 # dictionnary to store results for revision higher than we one we are
1120 # looking for. For example, if we need the sidedatamap for 42, and 43 is
1122 # looking for. For example, if we need the sidedatamap for 42, and 43 is
1121 # received, when shelve 43 for later use.
1123 # received, when shelve 43 for later use.
1122 staging = {}
1124 staging = {}
1123
1125
1124 def sidedata_companion(revlog, rev):
1126 def sidedata_companion(revlog, rev):
1125 sidedata = {}
1127 sidedata = {}
1126 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
1128 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
1127 # Is the data previously shelved ?
1129 # Is the data previously shelved ?
1128 sidedata = staging.pop(rev, None)
1130 sidedata = staging.pop(rev, None)
1129 if sidedata is None:
1131 if sidedata is None:
1130 # look at the queued result until we find the one we are lookig
1132 # look at the queued result until we find the one we are lookig
1131 # for (shelve the other ones)
1133 # for (shelve the other ones)
1132 r, sidedata = sidedataq.get()
1134 r, sidedata = sidedataq.get()
1133 while r != rev:
1135 while r != rev:
1134 staging[r] = sidedata
1136 staging[r] = sidedata
1135 r, sidedata = sidedataq.get()
1137 r, sidedata = sidedataq.get()
1136 tokens.release()
1138 tokens.release()
1137 return False, (), sidedata
1139 return False, (), sidedata
1138
1140
1139 return sidedata_companion
1141 return sidedata_companion
1140
1142
1141
1143
1142 def _get_simple_sidedata_adder(srcrepo, destrepo):
1144 def _get_simple_sidedata_adder(srcrepo, destrepo):
1143 """The simple version of the sidedata computation
1145 """The simple version of the sidedata computation
1144
1146
1145 It just compute it in the same thread on request"""
1147 It just compute it in the same thread on request"""
1146
1148
1147 def sidedatacompanion(revlog, rev):
1149 def sidedatacompanion(revlog, rev):
1148 sidedata = {}
1150 sidedata = {}
1149 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1151 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1150 sidedata = _getsidedata(srcrepo, rev)
1152 sidedata = _getsidedata(srcrepo, rev)
1151 return False, (), sidedata
1153 return False, (), sidedata
1152
1154
1153 return sidedatacompanion
1155 return sidedatacompanion
1154
1156
1155
1157
1156 def getsidedataremover(srcrepo, destrepo):
1158 def getsidedataremover(srcrepo, destrepo):
1157 def sidedatacompanion(revlog, rev):
1159 def sidedatacompanion(revlog, rev):
1158 f = ()
1160 f = ()
1159 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1161 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1160 if revlog.flags(rev) & REVIDX_SIDEDATA:
1162 if revlog.flags(rev) & REVIDX_SIDEDATA:
1161 f = (
1163 f = (
1162 sidedatamod.SD_P1COPIES,
1164 sidedatamod.SD_P1COPIES,
1163 sidedatamod.SD_P2COPIES,
1165 sidedatamod.SD_P2COPIES,
1164 sidedatamod.SD_FILESADDED,
1166 sidedatamod.SD_FILESADDED,
1165 sidedatamod.SD_FILESREMOVED,
1167 sidedatamod.SD_FILESREMOVED,
1166 )
1168 )
1167 return False, f, {}
1169 return False, f, {}
1168
1170
1169 return sidedatacompanion
1171 return sidedatacompanion
@@ -1,153 +1,152 b''
1 ===================================
1 ===================================
2 Test repository filtering avoidance
2 Test repository filtering avoidance
3 ===================================
3 ===================================
4
4
5 This test file is a bit special as he does not check feature, but performance related internal code path.
5 This test file is a bit special as he does not check feature, but performance related internal code path.
6
6
7 Right now, filtering a repository comes with a cost that might be significant.
7 Right now, filtering a repository comes with a cost that might be significant.
8 Until this get better, ther are various operation that try hard not to trigger
8 Until this get better, ther are various operation that try hard not to trigger
9 a filtering computation. This test file make sure we don't reintroduce code that trigger the filtering for these operation:
9 a filtering computation. This test file make sure we don't reintroduce code that trigger the filtering for these operation:
10
10
11 Setup
11 Setup
12 -----
12 -----
13 $ hg init test-repo
13 $ hg init test-repo
14 $ cd test-repo
14 $ cd test-repo
15 $ echo "some line" > z
15 $ echo "some line" > z
16 $ echo a > a
16 $ echo a > a
17 $ hg commit -Am a
17 $ hg commit -Am a
18 adding a
18 adding a
19 adding z
19 adding z
20 $ echo "in a" >> z
20 $ echo "in a" >> z
21 $ echo b > b
21 $ echo b > b
22 $ hg commit -Am b
22 $ hg commit -Am b
23 adding b
23 adding b
24 $ echo "file" >> z
24 $ echo "file" >> z
25 $ echo c > c
25 $ echo c > c
26 $ hg commit -Am c
26 $ hg commit -Am c
27 adding c
27 adding c
28 $ hg rm a
28 $ hg rm a
29 $ echo c1 > c
29 $ echo c1 > c
30 $ hg add c
30 $ hg add c
31 c already tracked!
31 c already tracked!
32 $ echo d > d
32 $ echo d > d
33 $ hg add d
33 $ hg add d
34 $ rm b
34 $ rm b
35
35
36 $ cat << EOF >> $HGRCPATH
36 $ cat << EOF >> $HGRCPATH
37 > [devel]
37 > [devel]
38 > debug.repo-filters = yes
38 > debug.repo-filters = yes
39 > [ui]
39 > [ui]
40 > debug = yes
40 > debug = yes
41 > EOF
41 > EOF
42
42
43
43
44 tests
44 tests
45 -----
45 -----
46
46
47 Getting the node of `null`
47 Getting the node of `null`
48
48
49 $ hg log -r null -T "{node}\n"
49 $ hg log -r null -T "{node}\n"
50 0000000000000000000000000000000000000000
50 0000000000000000000000000000000000000000
51
51
52 Getting basic changeset inforation about `null`
52 Getting basic changeset inforation about `null`
53
53
54 $ hg log -r null -T "{node}\n{date}\n"
54 $ hg log -r null -T "{node}\n{date}\n"
55 0000000000000000000000000000000000000000
55 0000000000000000000000000000000000000000
56 0.00
56 0.00
57
57
58 Getting status of null
58 Getting status of null
59
59
60 $ hg status --change null
60 $ hg status --change null
61
61
62 Getting status of working copy
62 Getting status of working copy
63
63
64 $ hg status
64 $ hg status
65 M c
65 M c
66 A d
66 A d
67 R a
67 R a
68 ! b
68 ! b
69
69
70 $ hg status --copies
70 $ hg status --copies
71 debug.filters: computing revision filter for "visible"
72 M c
71 M c
73 A d
72 A d
74 R a
73 R a
75 ! b
74 ! b
76
75
77 Getting data about the working copy parent
76 Getting data about the working copy parent
78
77
79 $ hg log -r '.' -T "{node}\n{date}\n"
78 $ hg log -r '.' -T "{node}\n{date}\n"
80 c2932ca7786be30b67154d541a8764fae5532261
79 c2932ca7786be30b67154d541a8764fae5532261
81 0.00
80 0.00
82
81
83 Getting working copy diff
82 Getting working copy diff
84
83
85 $ hg diff
84 $ hg diff
86 diff -r c2932ca7786be30b67154d541a8764fae5532261 a
85 diff -r c2932ca7786be30b67154d541a8764fae5532261 a
87 --- a/a Thu Jan 01 00:00:00 1970 +0000
86 --- a/a Thu Jan 01 00:00:00 1970 +0000
88 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
87 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
89 @@ -1,1 +0,0 @@
88 @@ -1,1 +0,0 @@
90 -a
89 -a
91 diff -r c2932ca7786be30b67154d541a8764fae5532261 c
90 diff -r c2932ca7786be30b67154d541a8764fae5532261 c
92 --- a/c Thu Jan 01 00:00:00 1970 +0000
91 --- a/c Thu Jan 01 00:00:00 1970 +0000
93 +++ b/c Thu Jan 01 00:00:00 1970 +0000
92 +++ b/c Thu Jan 01 00:00:00 1970 +0000
94 @@ -1,1 +1,1 @@
93 @@ -1,1 +1,1 @@
95 -c
94 -c
96 +c1
95 +c1
97 diff -r c2932ca7786be30b67154d541a8764fae5532261 d
96 diff -r c2932ca7786be30b67154d541a8764fae5532261 d
98 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
97 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
99 +++ b/d Thu Jan 01 00:00:00 1970 +0000
98 +++ b/d Thu Jan 01 00:00:00 1970 +0000
100 @@ -0,0 +1,1 @@
99 @@ -0,0 +1,1 @@
101 +d
100 +d
102 $ hg diff --change .
101 $ hg diff --change .
103 diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 c
102 diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 c
104 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
103 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
105 +++ b/c Thu Jan 01 00:00:00 1970 +0000
104 +++ b/c Thu Jan 01 00:00:00 1970 +0000
106 @@ -0,0 +1,1 @@
105 @@ -0,0 +1,1 @@
107 +c
106 +c
108 diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 z
107 diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 z
109 --- a/z Thu Jan 01 00:00:00 1970 +0000
108 --- a/z Thu Jan 01 00:00:00 1970 +0000
110 +++ b/z Thu Jan 01 00:00:00 1970 +0000
109 +++ b/z Thu Jan 01 00:00:00 1970 +0000
111 @@ -1,2 +1,3 @@
110 @@ -1,2 +1,3 @@
112 some line
111 some line
113 in a
112 in a
114 +file
113 +file
115
114
116 exporting the current changeset
115 exporting the current changeset
117
116
118 $ hg export
117 $ hg export
119 exporting patch:
118 exporting patch:
120 # HG changeset patch
119 # HG changeset patch
121 # User test
120 # User test
122 # Date 0 0
121 # Date 0 0
123 # Thu Jan 01 00:00:00 1970 +0000
122 # Thu Jan 01 00:00:00 1970 +0000
124 # Node ID c2932ca7786be30b67154d541a8764fae5532261
123 # Node ID c2932ca7786be30b67154d541a8764fae5532261
125 # Parent 05293e5dd8d1ae4f84a8520a11c6f97cad26deca
124 # Parent 05293e5dd8d1ae4f84a8520a11c6f97cad26deca
126 c
125 c
127
126
128 diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 c
127 diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 c
129 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
128 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
130 +++ b/c Thu Jan 01 00:00:00 1970 +0000
129 +++ b/c Thu Jan 01 00:00:00 1970 +0000
131 @@ -0,0 +1,1 @@
130 @@ -0,0 +1,1 @@
132 +c
131 +c
133 diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 z
132 diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 z
134 --- a/z Thu Jan 01 00:00:00 1970 +0000
133 --- a/z Thu Jan 01 00:00:00 1970 +0000
135 +++ b/z Thu Jan 01 00:00:00 1970 +0000
134 +++ b/z Thu Jan 01 00:00:00 1970 +0000
136 @@ -1,2 +1,3 @@
135 @@ -1,2 +1,3 @@
137 some line
136 some line
138 in a
137 in a
139 +file
138 +file
140
139
141 using annotate
140 using annotate
142
141
143 - file with a single change
142 - file with a single change
144
143
145 $ hg annotate a
144 $ hg annotate a
146 0: a
145 0: a
147
146
148 - file with multiple change
147 - file with multiple change
149
148
150 $ hg annotate z
149 $ hg annotate z
151 0: some line
150 0: some line
152 1: in a
151 1: in a
153 2: file
152 2: file
General Comments 0
You need to be logged in to leave comments. Login now