##// END OF EJS Templates
copies-rust: parse the changed-file sidedata directly in rust...
marmoute -
r46674:e0313b0a default
parent child Browse files
Show More
@@ -1,1153 +1,1178 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 policy,
21 21 pycompat,
22 22 util,
23 23 )
24 24
25 25
26 26 from .utils import stringutil
27 27
28 from .revlogutils import flagutil
28 from .revlogutils import (
29 flagutil,
30 sidedata as sidedatamod,
31 )
29 32
30 33 rustmod = policy.importrust("copy_tracing")
31 34
32 35
33 36 def _filter(src, dst, t):
34 37 """filters out invalid copies after chaining"""
35 38
36 39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 41 # in the following table (not including trivial cases). For example, case 2
39 42 # is where a file existed in 'src' and remained under that name in 'mid' and
40 43 # then was renamed between 'mid' and 'dst'.
41 44 #
42 45 # case src mid dst result
43 46 # 1 x y - -
44 47 # 2 x y y x->y
45 48 # 3 x y x -
46 49 # 4 x y z x->z
47 50 # 5 - x y -
48 51 # 6 x x y x->y
49 52 #
50 53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 55 # between 5 and 6, so it includes all cases in its result.
53 56 # Cases 1, 3, and 5 are then removed by _filter().
54 57
55 58 for k, v in list(t.items()):
56 59 # remove copies from files that didn't exist
57 60 if v not in src:
58 61 del t[k]
59 62 # remove criss-crossed copies
60 63 elif k in src and v in dst:
61 64 del t[k]
62 65 # remove copies to files that were then removed
63 66 elif k not in dst:
64 67 del t[k]
65 68
66 69
67 70 def _chain(prefix, suffix):
68 71 """chain two sets of copies 'prefix' and 'suffix'"""
69 72 result = prefix.copy()
70 73 for key, value in pycompat.iteritems(suffix):
71 74 result[key] = prefix.get(value, value)
72 75 return result
73 76
74 77
75 78 def _tracefile(fctx, am, basemf):
76 79 """return file context that is the ancestor of fctx present in ancestor
77 80 manifest am
78 81
79 82 Note: we used to try and stop after a given limit, however checking if that
80 83 limit is reached turned out to be very expensive. we are better off
81 84 disabling that feature."""
82 85
83 86 for f in fctx.ancestors():
84 87 path = f.path()
85 88 if am.get(path, None) == f.filenode():
86 89 return path
87 90 if basemf and basemf.get(path, None) == f.filenode():
88 91 return path
89 92
90 93
91 94 def _dirstatecopies(repo, match=None):
92 95 ds = repo.dirstate
93 96 c = ds.copies().copy()
94 97 for k in list(c):
95 98 if ds[k] not in b'anm' or (match and not match(k)):
96 99 del c[k]
97 100 return c
98 101
99 102
100 103 def _computeforwardmissing(a, b, match=None):
101 104 """Computes which files are in b but not a.
102 105 This is its own function so extensions can easily wrap this call to see what
103 106 files _forwardcopies is about to process.
104 107 """
105 108 ma = a.manifest()
106 109 mb = b.manifest()
107 110 return mb.filesnotin(ma, match=match)
108 111
109 112
110 113 def usechangesetcentricalgo(repo):
111 114 """Checks if we should use changeset-centric copy algorithms"""
112 115 if repo.filecopiesmode == b'changeset-sidedata':
113 116 return True
114 117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
115 118 changesetsource = (b'changeset-only', b'compatibility')
116 119 return readfrom in changesetsource
117 120
118 121
119 122 def _committedforwardcopies(a, b, base, match):
120 123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
121 124 # files might have to be traced back to the fctx parent of the last
122 125 # one-side-only changeset, but not further back than that
123 126 repo = a._repo
124 127
125 128 if usechangesetcentricalgo(repo):
126 129 return _changesetforwardcopies(a, b, match)
127 130
128 131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
129 132 dbg = repo.ui.debug
130 133 if debug:
131 134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
132 135 am = a.manifest()
133 136 basemf = None if base is None else base.manifest()
134 137
135 138 # find where new files came from
136 139 # we currently don't try to find where old files went, too expensive
137 140 # this means we can miss a case like 'hg rm b; hg cp a b'
138 141 cm = {}
139 142
140 143 # Computing the forward missing is quite expensive on large manifests, since
141 144 # it compares the entire manifests. We can optimize it in the common use
142 145 # case of computing what copies are in a commit versus its parent (like
143 146 # during a rebase or histedit). Note, we exclude merge commits from this
144 147 # optimization, since the ctx.files() for a merge commit is not correct for
145 148 # this comparison.
146 149 forwardmissingmatch = match
147 150 if b.p1() == a and b.p2().node() == node.nullid:
148 151 filesmatcher = matchmod.exact(b.files())
149 152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
150 153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
151 154
152 155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
153 156
154 157 if debug:
155 158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
156 159
157 160 for f in sorted(missing):
158 161 if debug:
159 162 dbg(b'debug.copies: tracing file: %s\n' % f)
160 163 fctx = b[f]
161 164 fctx._ancestrycontext = ancestrycontext
162 165
163 166 if debug:
164 167 start = util.timer()
165 168 opath = _tracefile(fctx, am, basemf)
166 169 if opath:
167 170 if debug:
168 171 dbg(b'debug.copies: rename of: %s\n' % opath)
169 172 cm[f] = opath
170 173 if debug:
171 174 dbg(
172 175 b'debug.copies: time: %f seconds\n'
173 176 % (util.timer() - start)
174 177 )
175 178 return cm
176 179
177 180
178 def _revinfo_getter(repo):
181 def _revinfo_getter(repo, match):
179 182 """returns a function that returns the following data given a <rev>"
180 183
181 184 * p1: revision number of first parent
182 185 * p2: revision number of first parent
183 186 * changes: a ChangingFiles object
184 187 """
185 188 cl = repo.changelog
186 189 parents = cl.parentrevs
187 190 flags = cl.flags
188 191
189 192 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
190 193
191 194 changelogrevision = cl.changelogrevision
192 195
193 196 # A small cache to avoid doing the work twice for merges
194 197 #
195 198 # In the vast majority of cases, if we ask information for a revision
196 199 # about 1 parent, we'll later ask it for the other. So it make sense to
197 200 # keep the information around when reaching the first parent of a merge
198 201 # and dropping it after it was provided for the second parents.
199 202 #
200 203 # It exists cases were only one parent of the merge will be walked. It
201 204 # happens when the "destination" the copy tracing is descendant from a
202 205 # new root, not common with the "source". In that case, we will only walk
203 206 # through merge parents that are descendant of changesets common
204 207 # between "source" and "destination".
205 208 #
206 209 # With the current case implementation if such changesets have a copy
207 210 # information, we'll keep them in memory until the end of
208 211 # _changesetforwardcopies. We don't expect the case to be frequent
209 212 # enough to matters.
210 213 #
211 214 # In addition, it would be possible to reach pathological case, were
212 215 # many first parent are met before any second parent is reached. In
213 216 # that case the cache could grow. If this even become an issue one can
214 217 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 218 # time to save memory.
216 219 merge_caches = {}
217 220
218 def revinfo(rev):
219 p1, p2 = parents(rev)
220 value = None
221 e = merge_caches.pop(rev, None)
222 if e is not None:
223 return e
224 changes = None
225 if flags(rev) & HASCOPIESINFO:
226 changes = changelogrevision(rev).changes
227 value = (p1, p2, changes)
228 if p1 != node.nullrev and p2 != node.nullrev:
229 # XXX some case we over cache, IGNORE
230 merge_caches[rev] = value
231 return value
221 alwaysmatch = match.always()
222
223 if rustmod is not None and alwaysmatch:
224
225 def revinfo(rev):
226 p1, p2 = parents(rev)
227 value = None
228 e = merge_caches.pop(rev, None)
229 if e is not None:
230 return e
231 if flags(rev) & HASCOPIESINFO:
232 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
233 else:
234 raw = None
235 value = (p1, p2, raw)
236 if p1 != node.nullrev and p2 != node.nullrev:
237 # XXX some case we over cache, IGNORE
238 merge_caches[rev] = value
239 return value
240
241 else:
242
243 def revinfo(rev):
244 p1, p2 = parents(rev)
245 value = None
246 e = merge_caches.pop(rev, None)
247 if e is not None:
248 return e
249 changes = None
250 if flags(rev) & HASCOPIESINFO:
251 changes = changelogrevision(rev).changes
252 value = (p1, p2, changes)
253 if p1 != node.nullrev and p2 != node.nullrev:
254 # XXX some case we over cache, IGNORE
255 merge_caches[rev] = value
256 return value
232 257
233 258 return revinfo
234 259
235 260
236 261 def cached_is_ancestor(is_ancestor):
237 262 """return a cached version of is_ancestor"""
238 263 cache = {}
239 264
240 265 def _is_ancestor(anc, desc):
241 266 if anc > desc:
242 267 return False
243 268 elif anc == desc:
244 269 return True
245 270 key = (anc, desc)
246 271 ret = cache.get(key)
247 272 if ret is None:
248 273 ret = cache[key] = is_ancestor(anc, desc)
249 274 return ret
250 275
251 276 return _is_ancestor
252 277
253 278
254 279 def _changesetforwardcopies(a, b, match):
255 280 if a.rev() in (node.nullrev, b.rev()):
256 281 return {}
257 282
258 283 repo = a.repo().unfiltered()
259 284 children = {}
260 285
261 286 cl = repo.changelog
262 287 isancestor = cl.isancestorrev
263 288 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
264 289 mrset = set(missingrevs)
265 290 roots = set()
266 291 for r in missingrevs:
267 292 for p in cl.parentrevs(r):
268 293 if p == node.nullrev:
269 294 continue
270 295 if p not in children:
271 296 children[p] = [r]
272 297 else:
273 298 children[p].append(r)
274 299 if p not in mrset:
275 300 roots.add(p)
276 301 if not roots:
277 302 # no common revision to track copies from
278 303 return {}
279 304 min_root = min(roots)
280 305
281 306 from_head = set(
282 307 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
283 308 )
284 309
285 310 iterrevs = set(from_head)
286 311 iterrevs &= mrset
287 312 iterrevs.update(roots)
288 313 iterrevs.remove(b.rev())
289 314 revs = sorted(iterrevs)
290 315
291 316 if repo.filecopiesmode == b'changeset-sidedata':
292 revinfo = _revinfo_getter(repo)
317 revinfo = _revinfo_getter(repo, match)
293 318 return _combine_changeset_copies(
294 319 revs, children, b.rev(), revinfo, match, isancestor
295 320 )
296 321 else:
297 322 revinfo = _revinfo_getter_extra(repo)
298 323 return _combine_changeset_copies_extra(
299 324 revs, children, b.rev(), revinfo, match, isancestor
300 325 )
301 326
302 327
303 328 def _combine_changeset_copies(
304 329 revs, children, targetrev, revinfo, match, isancestor
305 330 ):
306 331 """combine the copies information for each item of iterrevs
307 332
308 333 revs: sorted iterable of revision to visit
309 334 children: a {parent: [children]} mapping.
310 335 targetrev: the final copies destination revision (not in iterrevs)
311 336 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
312 337 match: a matcher
313 338
314 339 It returns the aggregated copies information for `targetrev`.
315 340 """
316 341
317 342 alwaysmatch = match.always()
318 343
319 344 if rustmod is not None and alwaysmatch:
320 345 return rustmod.combine_changeset_copies(
321 346 list(revs), children, targetrev, revinfo, isancestor
322 347 )
323 348
324 349 isancestor = cached_is_ancestor(isancestor)
325 350
326 351 all_copies = {}
327 352 for r in revs:
328 353 copies = all_copies.pop(r, None)
329 354 if copies is None:
330 355 # this is a root
331 356 copies = {}
332 357 for i, c in enumerate(children[r]):
333 358 p1, p2, changes = revinfo(c)
334 359 childcopies = {}
335 360 if r == p1:
336 361 parent = 1
337 362 if changes is not None:
338 363 childcopies = changes.copied_from_p1
339 364 else:
340 365 assert r == p2
341 366 parent = 2
342 367 if changes is not None:
343 368 childcopies = changes.copied_from_p2
344 369 if not alwaysmatch:
345 370 childcopies = {
346 371 dst: src for dst, src in childcopies.items() if match(dst)
347 372 }
348 373 newcopies = copies
349 374 if childcopies:
350 375 newcopies = copies.copy()
351 376 for dest, source in pycompat.iteritems(childcopies):
352 377 prev = copies.get(source)
353 378 if prev is not None and prev[1] is not None:
354 379 source = prev[1]
355 380 newcopies[dest] = (c, source)
356 381 assert newcopies is not copies
357 382 if changes is not None and changes.removed:
358 383 if newcopies is copies:
359 384 newcopies = copies.copy()
360 385 for f in changes.removed:
361 386 if f in newcopies:
362 387 if newcopies is copies:
363 388 # copy on write to avoid affecting potential other
364 389 # branches. when there are no other branches, this
365 390 # could be avoided.
366 391 newcopies = copies.copy()
367 392 newcopies[f] = (c, None)
368 393 othercopies = all_copies.get(c)
369 394 if othercopies is None:
370 395 all_copies[c] = newcopies
371 396 elif newcopies is othercopies:
372 397 # nothing to merge:
373 398 pass
374 399 else:
375 400 # we are the second parent to work on c, we need to merge our
376 401 # work with the other.
377 402 #
378 403 # In case of conflict, parent 1 take precedence over parent 2.
379 404 # This is an arbitrary choice made anew when implementing
380 405 # changeset based copies. It was made without regards with
381 406 # potential filelog related behavior.
382 407 if parent == 1:
383 408 if newcopies is copies:
384 409 newcopies = copies.copy()
385 410 minor, major = othercopies, newcopies
386 411 else:
387 412 # we do not know if the other dict is a copy or not, so we
388 413 # need to blindly copy it. Future change should make this
389 414 # unnecessary.
390 415 minor, major = newcopies, othercopies.copy()
391 416 copies = _merge_copies_dict(minor, major, isancestor, changes)
392 417 all_copies[c] = copies
393 418
394 419 final_copies = {}
395 420 for dest, (tt, source) in all_copies[targetrev].items():
396 421 if source is not None:
397 422 final_copies[dest] = source
398 423 return final_copies
399 424
400 425
401 426 def _merge_copies_dict(minor, major, isancestor, changes):
402 427 """merge two copies-mapping together, minor and major
403 428
404 429 In case of conflict, value from "major" will be picked.
405 430
406 431 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
407 432 ancestors of `high_rev`,
408 433
409 434 - `ismerged(path)`: callable return True if `path` have been merged in the
410 435 current revision,
411 436
412 437 return the resulting dict (in practice, the "minor" object, updated)
413 438 """
414 439 for dest, value in major.items():
415 440 other = minor.get(dest)
416 441 if other is None:
417 442 minor[dest] = value
418 443 else:
419 444 new_tt = value[0]
420 445 other_tt = other[0]
421 446 if value[1] == other[1]:
422 447 continue
423 448 # content from "major" wins, unless it is older
424 449 # than the branch point or there is a merge
425 450 if new_tt == other_tt:
426 451 minor[dest] = value
427 452 elif (
428 453 changes is not None
429 454 and value[1] is None
430 455 and dest in changes.salvaged
431 456 ):
432 457 pass
433 458 elif (
434 459 changes is not None
435 460 and other[1] is None
436 461 and dest in changes.salvaged
437 462 ):
438 463 minor[dest] = value
439 464 elif changes is not None and dest in changes.merged:
440 465 minor[dest] = value
441 466 elif not isancestor(new_tt, other_tt):
442 467 if value[1] is not None:
443 468 minor[dest] = value
444 469 elif isancestor(other_tt, new_tt):
445 470 minor[dest] = value
446 471 return minor
447 472
448 473
449 474 def _revinfo_getter_extra(repo):
450 475 """return a function that return multiple data given a <rev>"i
451 476
452 477 * p1: revision number of first parent
453 478 * p2: revision number of first parent
454 479 * p1copies: mapping of copies from p1
455 480 * p2copies: mapping of copies from p2
456 481 * removed: a list of removed files
457 482 * ismerged: a callback to know if file was merged in that revision
458 483 """
459 484 cl = repo.changelog
460 485 parents = cl.parentrevs
461 486
462 487 def get_ismerged(rev):
463 488 ctx = repo[rev]
464 489
465 490 def ismerged(path):
466 491 if path not in ctx.files():
467 492 return False
468 493 fctx = ctx[path]
469 494 parents = fctx._filelog.parents(fctx._filenode)
470 495 nb_parents = 0
471 496 for n in parents:
472 497 if n != node.nullid:
473 498 nb_parents += 1
474 499 return nb_parents >= 2
475 500
476 501 return ismerged
477 502
478 503 def revinfo(rev):
479 504 p1, p2 = parents(rev)
480 505 ctx = repo[rev]
481 506 p1copies, p2copies = ctx._copies
482 507 removed = ctx.filesremoved()
483 508 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
484 509
485 510 return revinfo
486 511
487 512
488 513 def _combine_changeset_copies_extra(
489 514 revs, children, targetrev, revinfo, match, isancestor
490 515 ):
491 516 """version of `_combine_changeset_copies` that works with the Google
492 517 specific "extra" based storage for copy information"""
493 518 all_copies = {}
494 519 alwaysmatch = match.always()
495 520 for r in revs:
496 521 copies = all_copies.pop(r, None)
497 522 if copies is None:
498 523 # this is a root
499 524 copies = {}
500 525 for i, c in enumerate(children[r]):
501 526 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
502 527 if r == p1:
503 528 parent = 1
504 529 childcopies = p1copies
505 530 else:
506 531 assert r == p2
507 532 parent = 2
508 533 childcopies = p2copies
509 534 if not alwaysmatch:
510 535 childcopies = {
511 536 dst: src for dst, src in childcopies.items() if match(dst)
512 537 }
513 538 newcopies = copies
514 539 if childcopies:
515 540 newcopies = copies.copy()
516 541 for dest, source in pycompat.iteritems(childcopies):
517 542 prev = copies.get(source)
518 543 if prev is not None and prev[1] is not None:
519 544 source = prev[1]
520 545 newcopies[dest] = (c, source)
521 546 assert newcopies is not copies
522 547 for f in removed:
523 548 if f in newcopies:
524 549 if newcopies is copies:
525 550 # copy on write to avoid affecting potential other
526 551 # branches. when there are no other branches, this
527 552 # could be avoided.
528 553 newcopies = copies.copy()
529 554 newcopies[f] = (c, None)
530 555 othercopies = all_copies.get(c)
531 556 if othercopies is None:
532 557 all_copies[c] = newcopies
533 558 else:
534 559 # we are the second parent to work on c, we need to merge our
535 560 # work with the other.
536 561 #
537 562 # In case of conflict, parent 1 take precedence over parent 2.
538 563 # This is an arbitrary choice made anew when implementing
539 564 # changeset based copies. It was made without regards with
540 565 # potential filelog related behavior.
541 566 if parent == 1:
542 567 _merge_copies_dict_extra(
543 568 othercopies, newcopies, isancestor, ismerged
544 569 )
545 570 else:
546 571 _merge_copies_dict_extra(
547 572 newcopies, othercopies, isancestor, ismerged
548 573 )
549 574 all_copies[c] = newcopies
550 575
551 576 final_copies = {}
552 577 for dest, (tt, source) in all_copies[targetrev].items():
553 578 if source is not None:
554 579 final_copies[dest] = source
555 580 return final_copies
556 581
557 582
558 583 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
559 584 """version of `_merge_copies_dict` that works with the Google
560 585 specific "extra" based storage for copy information"""
561 586 for dest, value in major.items():
562 587 other = minor.get(dest)
563 588 if other is None:
564 589 minor[dest] = value
565 590 else:
566 591 new_tt = value[0]
567 592 other_tt = other[0]
568 593 if value[1] == other[1]:
569 594 continue
570 595 # content from "major" wins, unless it is older
571 596 # than the branch point or there is a merge
572 597 if (
573 598 new_tt == other_tt
574 599 or not isancestor(new_tt, other_tt)
575 600 or ismerged(dest)
576 601 ):
577 602 minor[dest] = value
578 603
579 604
580 605 def _forwardcopies(a, b, base=None, match=None):
581 606 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
582 607
583 608 if base is None:
584 609 base = a
585 610 match = a.repo().narrowmatch(match)
586 611 # check for working copy
587 612 if b.rev() is None:
588 613 cm = _committedforwardcopies(a, b.p1(), base, match)
589 614 # combine copies from dirstate if necessary
590 615 copies = _chain(cm, _dirstatecopies(b._repo, match))
591 616 else:
592 617 copies = _committedforwardcopies(a, b, base, match)
593 618 return copies
594 619
595 620
596 621 def _backwardrenames(a, b, match):
597 622 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
598 623 return {}
599 624
600 625 # Even though we're not taking copies into account, 1:n rename situations
601 626 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
602 627 # arbitrarily pick one of the renames.
603 628 # We don't want to pass in "match" here, since that would filter
604 629 # the destination by it. Since we're reversing the copies, we want
605 630 # to filter the source instead.
606 631 f = _forwardcopies(b, a)
607 632 r = {}
608 633 for k, v in sorted(pycompat.iteritems(f)):
609 634 if match and not match(v):
610 635 continue
611 636 # remove copies
612 637 if v in a:
613 638 continue
614 639 r[v] = k
615 640 return r
616 641
617 642
618 643 def pathcopies(x, y, match=None):
619 644 """find {dst@y: src@x} copy mapping for directed compare"""
620 645 repo = x._repo
621 646 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
622 647 if debug:
623 648 repo.ui.debug(
624 649 b'debug.copies: searching copies from %s to %s\n' % (x, y)
625 650 )
626 651 if x == y or not x or not y:
627 652 return {}
628 653 if y.rev() is None and x == y.p1():
629 654 if debug:
630 655 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
631 656 # short-circuit to avoid issues with merge states
632 657 return _dirstatecopies(repo, match)
633 658 a = y.ancestor(x)
634 659 if a == x:
635 660 if debug:
636 661 repo.ui.debug(b'debug.copies: search mode: forward\n')
637 662 copies = _forwardcopies(x, y, match=match)
638 663 elif a == y:
639 664 if debug:
640 665 repo.ui.debug(b'debug.copies: search mode: backward\n')
641 666 copies = _backwardrenames(x, y, match=match)
642 667 else:
643 668 if debug:
644 669 repo.ui.debug(b'debug.copies: search mode: combined\n')
645 670 base = None
646 671 if a.rev() != node.nullrev:
647 672 base = x
648 673 copies = _chain(
649 674 _backwardrenames(x, a, match=match),
650 675 _forwardcopies(a, y, base, match=match),
651 676 )
652 677 _filter(x, y, copies)
653 678 return copies
654 679
655 680
656 681 def mergecopies(repo, c1, c2, base):
657 682 """
658 683 Finds moves and copies between context c1 and c2 that are relevant for
659 684 merging. 'base' will be used as the merge base.
660 685
661 686 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
662 687 files that were moved/ copied in one merge parent and modified in another.
663 688 For example:
664 689
665 690 o ---> 4 another commit
666 691 |
667 692 | o ---> 3 commit that modifies a.txt
668 693 | /
669 694 o / ---> 2 commit that moves a.txt to b.txt
670 695 |/
671 696 o ---> 1 merge base
672 697
673 698 If we try to rebase revision 3 on revision 4, since there is no a.txt in
674 699 revision 4, and if user have copytrace disabled, we prints the following
675 700 message:
676 701
677 702 ```other changed <file> which local deleted```
678 703
679 704 Returns a tuple where:
680 705
681 706 "branch_copies" an instance of branch_copies.
682 707
683 708 "diverge" is a mapping of source name -> list of destination names
684 709 for divergent renames.
685 710
686 711 This function calls different copytracing algorithms based on config.
687 712 """
688 713 # avoid silly behavior for update from empty dir
689 714 if not c1 or not c2 or c1 == c2:
690 715 return branch_copies(), branch_copies(), {}
691 716
692 717 narrowmatch = c1.repo().narrowmatch()
693 718
694 719 # avoid silly behavior for parent -> working dir
695 720 if c2.node() is None and c1.node() == repo.dirstate.p1():
696 721 return (
697 722 branch_copies(_dirstatecopies(repo, narrowmatch)),
698 723 branch_copies(),
699 724 {},
700 725 )
701 726
702 727 copytracing = repo.ui.config(b'experimental', b'copytrace')
703 728 if stringutil.parsebool(copytracing) is False:
704 729 # stringutil.parsebool() returns None when it is unable to parse the
705 730 # value, so we should rely on making sure copytracing is on such cases
706 731 return branch_copies(), branch_copies(), {}
707 732
708 733 if usechangesetcentricalgo(repo):
709 734 # The heuristics don't make sense when we need changeset-centric algos
710 735 return _fullcopytracing(repo, c1, c2, base)
711 736
712 737 # Copy trace disabling is explicitly below the node == p1 logic above
713 738 # because the logic above is required for a simple copy to be kept across a
714 739 # rebase.
715 740 if copytracing == b'heuristics':
716 741 # Do full copytracing if only non-public revisions are involved as
717 742 # that will be fast enough and will also cover the copies which could
718 743 # be missed by heuristics
719 744 if _isfullcopytraceable(repo, c1, base):
720 745 return _fullcopytracing(repo, c1, c2, base)
721 746 return _heuristicscopytracing(repo, c1, c2, base)
722 747 else:
723 748 return _fullcopytracing(repo, c1, c2, base)
724 749
725 750
726 751 def _isfullcopytraceable(repo, c1, base):
727 752 """Checks that if base, source and destination are all no-public branches,
728 753 if yes let's use the full copytrace algorithm for increased capabilities
729 754 since it will be fast enough.
730 755
731 756 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
732 757 number of changesets from c1 to base such that if number of changesets are
733 758 more than the limit, full copytracing algorithm won't be used.
734 759 """
735 760 if c1.rev() is None:
736 761 c1 = c1.p1()
737 762 if c1.mutable() and base.mutable():
738 763 sourcecommitlimit = repo.ui.configint(
739 764 b'experimental', b'copytrace.sourcecommitlimit'
740 765 )
741 766 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
742 767 return commits < sourcecommitlimit
743 768 return False
744 769
745 770
746 771 def _checksinglesidecopies(
747 772 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
748 773 ):
749 774 if src not in m2:
750 775 # deleted on side 2
751 776 if src not in m1:
752 777 # renamed on side 1, deleted on side 2
753 778 renamedelete[src] = dsts1
754 779 elif src not in mb:
755 780 # Work around the "short-circuit to avoid issues with merge states"
756 781 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
757 782 # destination doesn't exist in y.
758 783 pass
759 784 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
760 785 return
761 786 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
762 787 # modified on side 2
763 788 for dst in dsts1:
764 789 copy[dst] = src
765 790
766 791
767 792 class branch_copies(object):
768 793 """Information about copies made on one side of a merge/graft.
769 794
770 795 "copy" is a mapping from destination name -> source name,
771 796 where source is in c1 and destination is in c2 or vice-versa.
772 797
773 798 "movewithdir" is a mapping from source name -> destination name,
774 799 where the file at source present in one context but not the other
775 800 needs to be moved to destination by the merge process, because the
776 801 other context moved the directory it is in.
777 802
778 803 "renamedelete" is a mapping of source name -> list of destination
779 804 names for files deleted in c1 that were renamed in c2 or vice-versa.
780 805
781 806 "dirmove" is a mapping of detected source dir -> destination dir renames.
782 807 This is needed for handling changes to new files previously grafted into
783 808 renamed directories.
784 809 """
785 810
786 811 def __init__(
787 812 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
788 813 ):
789 814 self.copy = {} if copy is None else copy
790 815 self.renamedelete = {} if renamedelete is None else renamedelete
791 816 self.dirmove = {} if dirmove is None else dirmove
792 817 self.movewithdir = {} if movewithdir is None else movewithdir
793 818
794 819 def __repr__(self):
795 820 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
796 821 self.copy,
797 822 self.renamedelete,
798 823 self.dirmove,
799 824 self.movewithdir,
800 825 )
801 826
802 827
803 828 def _fullcopytracing(repo, c1, c2, base):
804 829 """The full copytracing algorithm which finds all the new files that were
805 830 added from merge base up to the top commit and for each file it checks if
806 831 this file was copied from another file.
807 832
808 833 This is pretty slow when a lot of changesets are involved but will track all
809 834 the copies.
810 835 """
811 836 m1 = c1.manifest()
812 837 m2 = c2.manifest()
813 838 mb = base.manifest()
814 839
815 840 copies1 = pathcopies(base, c1)
816 841 copies2 = pathcopies(base, c2)
817 842
818 843 if not (copies1 or copies2):
819 844 return branch_copies(), branch_copies(), {}
820 845
821 846 inversecopies1 = {}
822 847 inversecopies2 = {}
823 848 for dst, src in copies1.items():
824 849 inversecopies1.setdefault(src, []).append(dst)
825 850 for dst, src in copies2.items():
826 851 inversecopies2.setdefault(src, []).append(dst)
827 852
828 853 copy1 = {}
829 854 copy2 = {}
830 855 diverge = {}
831 856 renamedelete1 = {}
832 857 renamedelete2 = {}
833 858 allsources = set(inversecopies1) | set(inversecopies2)
834 859 for src in allsources:
835 860 dsts1 = inversecopies1.get(src)
836 861 dsts2 = inversecopies2.get(src)
837 862 if dsts1 and dsts2:
838 863 # copied/renamed on both sides
839 864 if src not in m1 and src not in m2:
840 865 # renamed on both sides
841 866 dsts1 = set(dsts1)
842 867 dsts2 = set(dsts2)
843 868 # If there's some overlap in the rename destinations, we
844 869 # consider it not divergent. For example, if side 1 copies 'a'
845 870 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
846 871 # and 'd' and deletes 'a'.
847 872 if dsts1 & dsts2:
848 873 for dst in dsts1 & dsts2:
849 874 copy1[dst] = src
850 875 copy2[dst] = src
851 876 else:
852 877 diverge[src] = sorted(dsts1 | dsts2)
853 878 elif src in m1 and src in m2:
854 879 # copied on both sides
855 880 dsts1 = set(dsts1)
856 881 dsts2 = set(dsts2)
857 882 for dst in dsts1 & dsts2:
858 883 copy1[dst] = src
859 884 copy2[dst] = src
860 885 # TODO: Handle cases where it was renamed on one side and copied
861 886 # on the other side
862 887 elif dsts1:
863 888 # copied/renamed only on side 1
864 889 _checksinglesidecopies(
865 890 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
866 891 )
867 892 elif dsts2:
868 893 # copied/renamed only on side 2
869 894 _checksinglesidecopies(
870 895 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
871 896 )
872 897
873 898 # find interesting file sets from manifests
874 899 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
875 900 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
876 901 u1 = sorted(addedinm1 - addedinm2)
877 902 u2 = sorted(addedinm2 - addedinm1)
878 903
879 904 header = b" unmatched files in %s"
880 905 if u1:
881 906 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
882 907 if u2:
883 908 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
884 909
885 910 if repo.ui.debugflag:
886 911 renamedeleteset = set()
887 912 divergeset = set()
888 913 for dsts in diverge.values():
889 914 divergeset.update(dsts)
890 915 for dsts in renamedelete1.values():
891 916 renamedeleteset.update(dsts)
892 917 for dsts in renamedelete2.values():
893 918 renamedeleteset.update(dsts)
894 919
895 920 repo.ui.debug(
896 921 b" all copies found (* = to merge, ! = divergent, "
897 922 b"% = renamed and deleted):\n"
898 923 )
899 924 for side, copies in ((b"local", copies1), (b"remote", copies2)):
900 925 if not copies:
901 926 continue
902 927 repo.ui.debug(b" on %s side:\n" % side)
903 928 for f in sorted(copies):
904 929 note = b""
905 930 if f in copy1 or f in copy2:
906 931 note += b"*"
907 932 if f in divergeset:
908 933 note += b"!"
909 934 if f in renamedeleteset:
910 935 note += b"%"
911 936 repo.ui.debug(
912 937 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
913 938 )
914 939 del renamedeleteset
915 940 del divergeset
916 941
917 942 repo.ui.debug(b" checking for directory renames\n")
918 943
919 944 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
920 945 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
921 946
922 947 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
923 948 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
924 949
925 950 return branch_copies1, branch_copies2, diverge
926 951
927 952
928 953 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
929 954 """Finds moved directories and files that should move with them.
930 955
931 956 ctx: the context for one of the sides
932 957 copy: files copied on the same side (as ctx)
933 958 fullcopy: files copied on the same side (as ctx), including those that
934 959 merge.manifestmerge() won't care about
935 960 addedfiles: added files on the other side (compared to ctx)
936 961 """
937 962 # generate a directory move map
938 963 invalid = set()
939 964 dirmove = {}
940 965
941 966 # examine each file copy for a potential directory move, which is
942 967 # when all the files in a directory are moved to a new directory
943 968 for dst, src in pycompat.iteritems(fullcopy):
944 969 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
945 970 if dsrc in invalid:
946 971 # already seen to be uninteresting
947 972 continue
948 973 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
949 974 # directory wasn't entirely moved locally
950 975 invalid.add(dsrc)
951 976 elif dsrc in dirmove and dirmove[dsrc] != ddst:
952 977 # files from the same directory moved to two different places
953 978 invalid.add(dsrc)
954 979 else:
955 980 # looks good so far
956 981 dirmove[dsrc] = ddst
957 982
958 983 for i in invalid:
959 984 if i in dirmove:
960 985 del dirmove[i]
961 986 del invalid
962 987
963 988 if not dirmove:
964 989 return {}, {}
965 990
966 991 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
967 992
968 993 for d in dirmove:
969 994 repo.ui.debug(
970 995 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
971 996 )
972 997
973 998 movewithdir = {}
974 999 # check unaccounted nonoverlapping files against directory moves
975 1000 for f in addedfiles:
976 1001 if f not in fullcopy:
977 1002 for d in dirmove:
978 1003 if f.startswith(d):
979 1004 # new file added in a directory that was moved, move it
980 1005 df = dirmove[d] + f[len(d) :]
981 1006 if df not in copy:
982 1007 movewithdir[f] = df
983 1008 repo.ui.debug(
984 1009 b" pending file src: '%s' -> dst: '%s'\n"
985 1010 % (f, df)
986 1011 )
987 1012 break
988 1013
989 1014 return dirmove, movewithdir
990 1015
991 1016
992 1017 def _heuristicscopytracing(repo, c1, c2, base):
993 1018 """Fast copytracing using filename heuristics
994 1019
995 1020 Assumes that moves or renames are of following two types:
996 1021
997 1022 1) Inside a directory only (same directory name but different filenames)
998 1023 2) Move from one directory to another
999 1024 (same filenames but different directory names)
1000 1025
1001 1026 Works only when there are no merge commits in the "source branch".
1002 1027 Source branch is commits from base up to c2 not including base.
1003 1028
1004 1029 If merge is involved it fallbacks to _fullcopytracing().
1005 1030
1006 1031 Can be used by setting the following config:
1007 1032
1008 1033 [experimental]
1009 1034 copytrace = heuristics
1010 1035
1011 1036 In some cases the copy/move candidates found by heuristics can be very large
1012 1037 in number and that will make the algorithm slow. The number of possible
1013 1038 candidates to check can be limited by using the config
1014 1039 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1015 1040 """
1016 1041
1017 1042 if c1.rev() is None:
1018 1043 c1 = c1.p1()
1019 1044 if c2.rev() is None:
1020 1045 c2 = c2.p1()
1021 1046
1022 1047 changedfiles = set()
1023 1048 m1 = c1.manifest()
1024 1049 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1025 1050 # If base is not in c2 branch, we switch to fullcopytracing
1026 1051 repo.ui.debug(
1027 1052 b"switching to full copytracing as base is not "
1028 1053 b"an ancestor of c2\n"
1029 1054 )
1030 1055 return _fullcopytracing(repo, c1, c2, base)
1031 1056
1032 1057 ctx = c2
1033 1058 while ctx != base:
1034 1059 if len(ctx.parents()) == 2:
1035 1060 # To keep things simple let's not handle merges
1036 1061 repo.ui.debug(b"switching to full copytracing because of merges\n")
1037 1062 return _fullcopytracing(repo, c1, c2, base)
1038 1063 changedfiles.update(ctx.files())
1039 1064 ctx = ctx.p1()
1040 1065
1041 1066 copies2 = {}
1042 1067 cp = _forwardcopies(base, c2)
1043 1068 for dst, src in pycompat.iteritems(cp):
1044 1069 if src in m1:
1045 1070 copies2[dst] = src
1046 1071
1047 1072 # file is missing if it isn't present in the destination, but is present in
1048 1073 # the base and present in the source.
1049 1074 # Presence in the base is important to exclude added files, presence in the
1050 1075 # source is important to exclude removed files.
1051 1076 filt = lambda f: f not in m1 and f in base and f in c2
1052 1077 missingfiles = [f for f in changedfiles if filt(f)]
1053 1078
1054 1079 copies1 = {}
1055 1080 if missingfiles:
1056 1081 basenametofilename = collections.defaultdict(list)
1057 1082 dirnametofilename = collections.defaultdict(list)
1058 1083
1059 1084 for f in m1.filesnotin(base.manifest()):
1060 1085 basename = os.path.basename(f)
1061 1086 dirname = os.path.dirname(f)
1062 1087 basenametofilename[basename].append(f)
1063 1088 dirnametofilename[dirname].append(f)
1064 1089
1065 1090 for f in missingfiles:
1066 1091 basename = os.path.basename(f)
1067 1092 dirname = os.path.dirname(f)
1068 1093 samebasename = basenametofilename[basename]
1069 1094 samedirname = dirnametofilename[dirname]
1070 1095 movecandidates = samebasename + samedirname
1071 1096 # f is guaranteed to be present in c2, that's why
1072 1097 # c2.filectx(f) won't fail
1073 1098 f2 = c2.filectx(f)
1074 1099 # we can have a lot of candidates which can slow down the heuristics
1075 1100 # config value to limit the number of candidates moves to check
1076 1101 maxcandidates = repo.ui.configint(
1077 1102 b'experimental', b'copytrace.movecandidateslimit'
1078 1103 )
1079 1104
1080 1105 if len(movecandidates) > maxcandidates:
1081 1106 repo.ui.status(
1082 1107 _(
1083 1108 b"skipping copytracing for '%s', more "
1084 1109 b"candidates than the limit: %d\n"
1085 1110 )
1086 1111 % (f, len(movecandidates))
1087 1112 )
1088 1113 continue
1089 1114
1090 1115 for candidate in movecandidates:
1091 1116 f1 = c1.filectx(candidate)
1092 1117 if _related(f1, f2):
1093 1118 # if there are a few related copies then we'll merge
1094 1119 # changes into all of them. This matches the behaviour
1095 1120 # of upstream copytracing
1096 1121 copies1[candidate] = f
1097 1122
1098 1123 return branch_copies(copies1), branch_copies(copies2), {}
1099 1124
1100 1125
1101 1126 def _related(f1, f2):
1102 1127 """return True if f1 and f2 filectx have a common ancestor
1103 1128
1104 1129 Walk back to common ancestor to see if the two files originate
1105 1130 from the same file. Since workingfilectx's rev() is None it messes
1106 1131 up the integer comparison logic, hence the pre-step check for
1107 1132 None (f1 and f2 can only be workingfilectx's initially).
1108 1133 """
1109 1134
1110 1135 if f1 == f2:
1111 1136 return True # a match
1112 1137
1113 1138 g1, g2 = f1.ancestors(), f2.ancestors()
1114 1139 try:
1115 1140 f1r, f2r = f1.linkrev(), f2.linkrev()
1116 1141
1117 1142 if f1r is None:
1118 1143 f1 = next(g1)
1119 1144 if f2r is None:
1120 1145 f2 = next(g2)
1121 1146
1122 1147 while True:
1123 1148 f1r, f2r = f1.linkrev(), f2.linkrev()
1124 1149 if f1r > f2r:
1125 1150 f1 = next(g1)
1126 1151 elif f2r > f1r:
1127 1152 f2 = next(g2)
1128 1153 else: # f1 and f2 point to files in the same linkrev
1129 1154 return f1 == f2 # true if they point to the same file
1130 1155 except StopIteration:
1131 1156 return False
1132 1157
1133 1158
1134 1159 def graftcopies(wctx, ctx, base):
1135 1160 """reproduce copies between base and ctx in the wctx
1136 1161
1137 1162 Unlike mergecopies(), this function will only consider copies between base
1138 1163 and ctx; it will ignore copies between base and wctx. Also unlike
1139 1164 mergecopies(), this function will apply copies to the working copy (instead
1140 1165 of just returning information about the copies). That makes it cheaper
1141 1166 (especially in the common case of base==ctx.p1()) and useful also when
1142 1167 experimental.copytrace=off.
1143 1168
1144 1169 merge.update() will have already marked most copies, but it will only
1145 1170 mark copies if it thinks the source files are related (see
1146 1171 merge._related()). It will also not mark copies if the file wasn't modified
1147 1172 on the local side. This function adds the copies that were "missed"
1148 1173 by merge.update().
1149 1174 """
1150 1175 new_copies = pathcopies(base, ctx)
1151 1176 _filter(wctx.p1(), wctx, new_copies)
1152 1177 for dst, src in pycompat.iteritems(new_copies):
1153 1178 wctx[dst].markcopied(src)
@@ -1,415 +1,567 b''
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4
5 5 use im_rc::ordmap::DiffItem;
6 6 use im_rc::ordmap::OrdMap;
7 7
8 use std::cmp::Ordering;
8 9 use std::collections::HashMap;
9 use std::collections::HashSet;
10 use std::convert::TryInto;
10 11
11 12 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
12 13
13 14 #[derive(Clone, Debug, PartialEq)]
14 15 struct TimeStampedPathCopy {
15 16 /// revision at which the copy information was added
16 17 rev: Revision,
17 18 /// the copy source, (Set to None in case of deletion of the associated
18 19 /// key)
19 20 path: Option<HgPathBuf>,
20 21 }
21 22
22 23 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
23 24 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
24 25
25 26 /// hold parent 1, parent 2 and relevant files actions.
26 pub type RevInfo = (Revision, Revision, ChangedFiles);
27 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
27 28
28 29 /// represent the files affected by a changesets
29 30 ///
30 31 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
31 32 /// all the data categories tracked by it.
32 pub struct ChangedFiles {
33 removed: HashSet<HgPathBuf>,
34 merged: HashSet<HgPathBuf>,
35 salvaged: HashSet<HgPathBuf>,
36 copied_from_p1: PathCopies,
37 copied_from_p2: PathCopies,
33 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
34 /// all the data categories tracked by it.
35 pub struct ChangedFiles<'a> {
36 nb_items: u32,
37 index: &'a [u8],
38 data: &'a [u8],
38 39 }
39 40
40 41 /// Represent active changes that affect the copy tracing.
41 42 enum Action<'a> {
42 43 /// The parent ? children edge is removing a file
43 44 ///
44 45 /// (actually, this could be the edge from the other parent, but it does
45 46 /// not matters)
46 47 Removed(&'a HgPath),
47 48 /// The parent ? children edge introduce copy information between (dest,
48 49 /// source)
49 50 Copied(&'a HgPath, &'a HgPath),
50 51 }
51 52
52 53 /// This express the possible "special" case we can get in a merge
53 54 ///
54 55 /// See mercurial/metadata.py for details on these values.
55 56 #[derive(PartialEq)]
56 57 enum MergeCase {
57 58 /// Merged: file had history on both side that needed to be merged
58 59 Merged,
59 60 /// Salvaged: file was candidate for deletion, but survived the merge
60 61 Salvaged,
61 62 /// Normal: Not one of the two cases above
62 63 Normal,
63 64 }
64 65
65 impl ChangedFiles {
66 pub fn new(
67 removed: HashSet<HgPathBuf>,
68 merged: HashSet<HgPathBuf>,
69 salvaged: HashSet<HgPathBuf>,
70 copied_from_p1: PathCopies,
71 copied_from_p2: PathCopies,
72 ) -> Self {
73 ChangedFiles {
74 removed,
75 merged,
76 salvaged,
77 copied_from_p1,
78 copied_from_p2,
79 }
66 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
67
68 const EMPTY: &[u8] = b"";
69 const COPY_MASK: u8 = 3;
70 const P1_COPY: u8 = 2;
71 const P2_COPY: u8 = 3;
72 const ACTION_MASK: u8 = 28;
73 const REMOVED: u8 = 12;
74 const MERGED: u8 = 8;
75 const SALVAGED: u8 = 16;
76
77 impl<'a> ChangedFiles<'a> {
78 const INDEX_START: usize = 4;
79 const ENTRY_SIZE: u32 = 9;
80 const FILENAME_START: u32 = 1;
81 const COPY_SOURCE_START: u32 = 5;
82
83 pub fn new(data: &'a [u8]) -> Self {
84 assert!(
85 data.len() >= 4,
86 "data size ({}) is too small to contain the header (4)",
87 data.len()
88 );
89 let nb_items_raw: [u8; 4] = (&data[0..=3])
90 .try_into()
91 .expect("failed to turn 4 bytes into 4 bytes");
92 let nb_items = u32::from_be_bytes(nb_items_raw);
93
94 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
95 let index_end = Self::INDEX_START + index_size;
96
97 assert!(
98 data.len() >= index_end,
99 "data size ({}) is too small to fit the index_data ({})",
100 data.len(),
101 index_end
102 );
103
104 let ret = ChangedFiles {
105 nb_items,
106 index: &data[Self::INDEX_START..index_end],
107 data: &data[index_end..],
108 };
109 let max_data = ret.filename_end(nb_items - 1) as usize;
110 assert!(
111 ret.data.len() >= max_data,
112 "data size ({}) is too small to fit all data ({})",
113 data.len(),
114 index_end + max_data
115 );
116 ret
80 117 }
81 118
82 119 pub fn new_empty() -> Self {
83 120 ChangedFiles {
84 removed: HashSet::new(),
85 merged: HashSet::new(),
86 salvaged: HashSet::new(),
87 copied_from_p1: PathCopies::new(),
88 copied_from_p2: PathCopies::new(),
121 nb_items: 0,
122 index: EMPTY,
123 data: EMPTY,
124 }
125 }
126
127 /// internal function to return an individual entry at a given index
128 fn entry(&'a self, idx: u32) -> FileChange<'a> {
129 if idx >= self.nb_items {
130 panic!(
131 "index for entry is higher that the number of file {} >= {}",
132 idx, self.nb_items
133 )
134 }
135 let flags = self.flags(idx);
136 let filename = self.filename(idx);
137 let copy_idx = self.copy_idx(idx);
138 let copy_source = self.filename(copy_idx);
139 (flags, filename, copy_source)
140 }
141
142 /// internal function to return the filename of the entry at a given index
143 fn filename(&self, idx: u32) -> &HgPath {
144 let filename_start;
145 if idx == 0 {
146 filename_start = 0;
147 } else {
148 filename_start = self.filename_end(idx - 1)
89 149 }
150 let filename_end = self.filename_end(idx);
151 let filename_start = filename_start as usize;
152 let filename_end = filename_end as usize;
153 HgPath::new(&self.data[filename_start..filename_end])
154 }
155
156 /// internal function to return the flag field of the entry at a given
157 /// index
158 fn flags(&self, idx: u32) -> u8 {
159 let idx = idx as usize;
160 self.index[idx * (Self::ENTRY_SIZE as usize)]
161 }
162
163 /// internal function to return the end of a filename part at a given index
164 fn filename_end(&self, idx: u32) -> u32 {
165 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
166 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
167 let start = start as usize;
168 let end = end as usize;
169 let raw = (&self.index[start..end])
170 .try_into()
171 .expect("failed to turn 4 bytes into 4 bytes");
172 u32::from_be_bytes(raw)
173 }
174
175 /// internal function to return index of the copy source of the entry at a
176 /// given index
177 fn copy_idx(&self, idx: u32) -> u32 {
178 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
179 let end = (idx + 1) * Self::ENTRY_SIZE;
180 let start = start as usize;
181 let end = end as usize;
182 let raw = (&self.index[start..end])
183 .try_into()
184 .expect("failed to turn 4 bytes into 4 bytes");
185 u32::from_be_bytes(raw)
90 186 }
91 187
92 188 /// Return an iterator over all the `Action` in this instance.
93 fn iter_actions(&self, parent: usize) -> impl Iterator<Item = Action> {
94 let copies_iter = match parent {
95 1 => self.copied_from_p1.iter(),
96 2 => self.copied_from_p2.iter(),
97 _ => unreachable!(),
98 };
99 let remove_iter = self.removed.iter();
100 let copies_iter = copies_iter.map(|(x, y)| Action::Copied(x, y));
101 let remove_iter = remove_iter.map(|x| Action::Removed(x));
102 copies_iter.chain(remove_iter)
189 fn iter_actions(&self, parent: usize) -> ActionsIterator {
190 ActionsIterator {
191 changes: &self,
192 parent: parent,
193 current: 0,
194 }
103 195 }
104 196
105 197 /// return the MergeCase value associated with a filename
106 198 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
107 if self.salvaged.contains(path) {
108 return MergeCase::Salvaged;
109 } else if self.merged.contains(path) {
110 return MergeCase::Merged;
111 } else {
199 if self.nb_items == 0 {
112 200 return MergeCase::Normal;
113 201 }
202 let mut low_part = 0;
203 let mut high_part = self.nb_items;
204
205 while low_part < high_part {
206 let cursor = (low_part + high_part - 1) / 2;
207 let (flags, filename, _source) = self.entry(cursor);
208 match path.cmp(filename) {
209 Ordering::Less => low_part = cursor + 1,
210 Ordering::Greater => high_part = cursor,
211 Ordering::Equal => {
212 return match flags & ACTION_MASK {
213 MERGED => MergeCase::Merged,
214 SALVAGED => MergeCase::Salvaged,
215 _ => MergeCase::Normal,
216 };
217 }
218 }
219 }
220 MergeCase::Normal
114 221 }
115 222 }
116 223
117 224 /// A struct responsible for answering "is X ancestors of Y" quickly
118 225 ///
119 226 /// The structure will delegate ancestors call to a callback, and cache the
120 227 /// result.
121 228 #[derive(Debug)]
122 229 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
123 230 inner: &'a A,
124 231 pairs: HashMap<(Revision, Revision), bool>,
125 232 }
126 233
127 234 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
128 235 fn new(func: &'a A) -> Self {
129 236 Self {
130 237 inner: func,
131 238 pairs: HashMap::default(),
132 239 }
133 240 }
134 241
135 242 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
136 243 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
137 244 if anc > desc {
138 245 false
139 246 } else if anc == desc {
140 247 true
141 248 } else {
142 249 if let Some(b) = self.pairs.get(&(anc, desc)) {
143 250 *b
144 251 } else {
145 252 let b = (self.inner)(anc, desc);
146 253 self.pairs.insert((anc, desc), b);
147 254 b
148 255 }
149 256 }
150 257 }
151 258 }
152 259
260 struct ActionsIterator<'a> {
261 changes: &'a ChangedFiles<'a>,
262 parent: usize,
263 current: u32,
264 }
265
266 impl<'a> Iterator for ActionsIterator<'a> {
267 type Item = Action<'a>;
268
269 fn next(&mut self) -> Option<Action<'a>> {
270 while self.current < self.changes.nb_items {
271 let (flags, file, source) = self.changes.entry(self.current);
272 self.current += 1;
273 if (flags & ACTION_MASK) == REMOVED {
274 return Some(Action::Removed(file));
275 }
276 let copy = flags & COPY_MASK;
277 if self.parent == 1 && copy == P1_COPY {
278 return Some(Action::Copied(file, source));
279 }
280 if self.parent == 2 && copy == P2_COPY {
281 return Some(Action::Copied(file, source));
282 }
283 }
284 return None;
285 }
286 }
287
288 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
289 /// ChangedFiles
290 ///
291 /// It is passed to the RevInfoMaker callback who can assign any necessary
292 /// content to the `data` attribute. The copy tracing code is responsible for
293 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
294 pub struct DataHolder<D> {
295 /// RevInfoMaker callback should assign data referenced by the
296 /// ChangedFiles struct it return to this attribute. The DataHolder
297 /// lifetime will be at least as long as the ChangedFiles one.
298 pub data: Option<D>,
299 }
300
301 pub type RevInfoMaker<'a, D> =
302 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
303
153 304 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
154 305 ///
155 306 /// Arguments are:
156 307 ///
157 308 /// revs: all revisions to be considered
158 309 /// children: a {parent ? [childrens]} mapping
159 310 /// target_rev: the final revision we are combining copies to
160 311 /// rev_info(rev): callback to get revision information:
161 312 /// * first parent
162 313 /// * second parent
163 314 /// * ChangedFiles
164 315 /// isancestors(low_rev, high_rev): callback to check if a revision is an
165 316 /// ancestor of another
166 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool>(
317 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
167 318 revs: Vec<Revision>,
168 319 children: HashMap<Revision, Vec<Revision>>,
169 320 target_rev: Revision,
170 rev_info: &impl Fn(Revision) -> RevInfo,
321 rev_info: RevInfoMaker<D>,
171 322 is_ancestor: &A,
172 323 ) -> PathCopies {
173 324 let mut all_copies = HashMap::new();
174 325 let mut oracle = AncestorOracle::new(is_ancestor);
175 326
176 327 for rev in revs {
177 328 // Retrieve data computed in a previous iteration
178 329 let copies = all_copies.remove(&rev);
179 330 let copies = match copies {
180 331 Some(c) => c,
181 332 None => TimeStampedPathCopies::default(), // root of the walked set
182 333 };
183 334
184 335 let current_children = match children.get(&rev) {
185 336 Some(c) => c,
186 337 None => panic!("inconsistent `revs` and `children`"),
187 338 };
188 339
189 340 for child in current_children {
190 341 // We will chain the copies information accumulated for `rev` with
191 342 // the individual copies information for each of its children.
192 // Creating a new PathCopies for each `rev` ? `children` vertex.
193 let (p1, p2, changes) = rev_info(*child);
343 // Creating a new PathCopies for each `rev` `children` vertex.
344 let mut d: DataHolder<D> = DataHolder { data: None };
345 let (p1, p2, changes) = rev_info(*child, &mut d);
194 346
195 347 let parent = if rev == p1 {
196 348 1
197 349 } else {
198 350 assert_eq!(rev, p2);
199 351 2
200 352 };
201 353 let mut new_copies = copies.clone();
202 354
203 355 for action in changes.iter_actions(parent) {
204 356 match action {
205 357 Action::Copied(dest, source) => {
206 358 let entry;
207 359 if let Some(v) = copies.get(source) {
208 360 entry = match &v.path {
209 361 Some(path) => Some((*(path)).to_owned()),
210 362 None => Some(source.to_owned()),
211 363 }
212 364 } else {
213 365 entry = Some(source.to_owned());
214 366 }
215 367 // Each new entry is introduced by the children, we
216 368 // record this information as we will need it to take
217 369 // the right decision when merging conflicting copy
218 370 // information. See merge_copies_dict for details.
219 371 let ttpc = TimeStampedPathCopy {
220 372 rev: *child,
221 373 path: entry,
222 374 };
223 375 new_copies.insert(dest.to_owned(), ttpc);
224 376 }
225 377 Action::Removed(f) => {
226 378 // We must drop copy information for removed file.
227 379 //
228 380 // We need to explicitly record them as dropped to
229 381 // propagate this information when merging two
230 382 // TimeStampedPathCopies object.
231 383 if new_copies.contains_key(f.as_ref()) {
232 384 let ttpc = TimeStampedPathCopy {
233 385 rev: *child,
234 386 path: None,
235 387 };
236 388 new_copies.insert(f.to_owned(), ttpc);
237 389 }
238 390 }
239 391 }
240 392 }
241 393
242 394 // Merge has two parents needs to combines their copy information.
243 395 //
244 396 // If the vertex from the other parent was already processed, we
245 397 // will have a value for the child ready to be used. We need to
246 398 // grab it and combine it with the one we already
247 399 // computed. If not we can simply store the newly
248 400 // computed data. The processing happening at
249 401 // the time of the second parent will take care of combining the
250 402 // two TimeStampedPathCopies instance.
251 403 match all_copies.remove(child) {
252 404 None => {
253 405 all_copies.insert(child, new_copies);
254 406 }
255 407 Some(other_copies) => {
256 408 let (minor, major) = match parent {
257 409 1 => (other_copies, new_copies),
258 410 2 => (new_copies, other_copies),
259 411 _ => unreachable!(),
260 412 };
261 413 let merged_copies =
262 414 merge_copies_dict(minor, major, &changes, &mut oracle);
263 415 all_copies.insert(child, merged_copies);
264 416 }
265 417 };
266 418 }
267 419 }
268 420
269 421 // Drop internal information (like the timestamp) and return the final
270 422 // mapping.
271 423 let tt_result = all_copies
272 424 .remove(&target_rev)
273 425 .expect("target revision was not processed");
274 426 let mut result = PathCopies::default();
275 427 for (dest, tt_source) in tt_result {
276 428 if let Some(path) = tt_source.path {
277 429 result.insert(dest, path);
278 430 }
279 431 }
280 432 result
281 433 }
282 434
283 435 /// merge two copies-mapping together, minor and major
284 436 ///
285 437 /// In case of conflict, value from "major" will be picked, unless in some
286 438 /// cases. See inline documentation for details.
287 439 #[allow(clippy::if_same_then_else)]
288 440 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
289 441 minor: TimeStampedPathCopies,
290 442 major: TimeStampedPathCopies,
291 443 changes: &ChangedFiles,
292 444 oracle: &mut AncestorOracle<A>,
293 445 ) -> TimeStampedPathCopies {
294 446 if minor.is_empty() {
295 447 return major;
296 448 } else if major.is_empty() {
297 449 return minor;
298 450 }
299 451 let mut override_minor = Vec::new();
300 452 let mut override_major = Vec::new();
301 453
302 454 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
303 455 override_major.push((k.clone(), v.clone()))
304 456 };
305 457 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
306 458 override_minor.push((k.clone(), v.clone()))
307 459 };
308 460
309 461 // The diff function leverage detection of the identical subpart if minor
310 462 // and major has some common ancestors. This make it very fast is most
311 463 // case.
312 464 //
313 465 // In case where the two map are vastly different in size, the current
314 466 // approach is still slowish because the iteration will iterate over
315 467 // all the "exclusive" content of the larger on. This situation can be
316 468 // frequent when the subgraph of revision we are processing has a lot
317 469 // of roots. Each roots adding they own fully new map to the mix (and
318 470 // likely a small map, if the path from the root to the "main path" is
319 471 // small.
320 472 //
321 473 // We could do better by detecting such situation and processing them
322 474 // differently.
323 475 for d in minor.diff(&major) {
324 476 match d {
325 477 DiffItem::Add(k, v) => to_minor(k, v),
326 478 DiffItem::Remove(k, v) => to_major(k, v),
327 479 DiffItem::Update { old, new } => {
328 480 let (dest, src_major) = new;
329 481 let (_, src_minor) = old;
330 482 let mut pick_minor = || (to_major(dest, src_minor));
331 483 let mut pick_major = || (to_minor(dest, src_major));
332 484 if src_major.path == src_minor.path {
333 485 // we have the same value, but from other source;
334 486 if src_major.rev == src_minor.rev {
335 487 // If the two entry are identical, no need to do
336 488 // anything (but diff should not have yield them)
337 489 unreachable!();
338 490 } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
339 491 {
340 492 pick_minor();
341 493 } else {
342 494 pick_major();
343 495 }
344 496 } else if src_major.rev == src_minor.rev {
345 497 // We cannot get copy information for both p1 and p2 in the
346 498 // same rev. So this is the same value.
347 499 unreachable!();
348 500 } else {
349 501 let action = changes.get_merge_case(&dest);
350 502 if src_major.path.is_none()
351 503 && action == MergeCase::Salvaged
352 504 {
353 505 // If the file is "deleted" in the major side but was
354 506 // salvaged by the merge, we keep the minor side alive
355 507 pick_minor();
356 508 } else if src_minor.path.is_none()
357 509 && action == MergeCase::Salvaged
358 510 {
359 511 // If the file is "deleted" in the minor side but was
360 512 // salvaged by the merge, unconditionnaly preserve the
361 513 // major side.
362 514 pick_major();
363 515 } else if action == MergeCase::Merged {
364 516 // If the file was actively merged, copy information
365 517 // from each side might conflict. The major side will
366 518 // win such conflict.
367 519 pick_major();
368 520 } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
369 521 {
370 522 // If the minor side is strictly newer than the major
371 523 // side, it should be kept.
372 524 pick_minor();
373 525 } else if src_major.path.is_some() {
374 526 // without any special case, the "major" value win
375 527 // other the "minor" one.
376 528 pick_major();
377 529 } else if oracle.is_ancestor(src_minor.rev, src_major.rev)
378 530 {
379 531 // the "major" rev is a direct ancestors of "minor",
380 532 // any different value should
381 533 // overwrite
382 534 pick_major();
383 535 } else {
384 536 // major version is None (so the file was deleted on
385 537 // that branch) and that branch is independant (neither
386 538 // minor nor major is an ancestors of the other one.)
387 539 // We preserve the new
388 540 // information about the new file.
389 541 pick_minor();
390 542 }
391 543 }
392 544 }
393 545 };
394 546 }
395 547
396 548 let updates;
397 549 let mut result;
398 550 if override_major.is_empty() {
399 551 result = major
400 552 } else if override_minor.is_empty() {
401 553 result = minor
402 554 } else {
403 555 if override_minor.len() < override_major.len() {
404 556 updates = override_minor;
405 557 result = minor;
406 558 } else {
407 559 updates = override_major;
408 560 result = major;
409 561 }
410 562 for (k, v) in updates {
411 563 result.insert(k, v);
412 564 }
413 565 }
414 566 result
415 567 }
@@ -1,295 +1,153 b''
1 1 use cpython::ObjectProtocol;
2 2 use cpython::PyBool;
3 3 use cpython::PyBytes;
4 4 use cpython::PyDict;
5 5 use cpython::PyList;
6 6 use cpython::PyModule;
7 7 use cpython::PyObject;
8 8 use cpython::PyResult;
9 9 use cpython::PyTuple;
10 10 use cpython::Python;
11 11
12 12 use hg::copy_tracing::combine_changeset_copies;
13 13 use hg::copy_tracing::ChangedFiles;
14 use hg::copy_tracing::DataHolder;
14 15 use hg::copy_tracing::RevInfo;
15 use hg::utils::hg_path::HgPathBuf;
16 use hg::copy_tracing::RevInfoMaker;
16 17 use hg::Revision;
17 18
18 19 /// Combines copies information contained into revision `revs` to build a copy
19 20 /// map.
20 21 ///
21 22 /// See mercurial/copies.py for details
22 23 pub fn combine_changeset_copies_wrapper(
23 24 py: Python,
24 25 revs: PyList,
25 26 children: PyDict,
26 27 target_rev: Revision,
27 28 rev_info: PyObject,
28 29 is_ancestor: PyObject,
29 30 ) -> PyResult<PyDict> {
30 31 let revs: PyResult<_> =
31 32 revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
32 33
33 34 // Wrap the `is_ancestor` python callback as a Rust closure
34 35 //
35 36 // No errors are expected from the Python side, and they will should only
36 37 // happens in case of programing error or severe data corruption. Such
37 38 // errors will raise panic and the rust-cpython harness will turn them into
38 39 // Python exception.
39 40 let is_ancestor_wrap = |anc: Revision, desc: Revision| -> bool {
40 41 is_ancestor
41 42 .call(py, (anc, desc), None)
42 43 .expect(
43 44 "rust-copy-tracing: python call to `is_ancestor` \
44 45 failed",
45 46 )
46 47 .cast_into::<PyBool>(py)
47 48 .expect(
48 49 "rust-copy-tracing: python call to `is_ancestor` \
49 50 returned unexpected non-Bool value",
50 51 )
51 52 .is_true()
52 53 };
53 54
54 55 // Wrap the `rev_info_maker` python callback as a Rust closure
55 56 //
56 57 // No errors are expected from the Python side, and they will should only
57 58 // happens in case of programing error or severe data corruption. Such
58 59 // errors will raise panic and the rust-cpython harness will turn them into
59 60 // Python exception.
60 let rev_info_maker = |rev: Revision| -> RevInfo {
61 let res: PyTuple = rev_info
62 .call(py, (rev,), None)
63 .expect("rust-copy-tracing: python call to `rev_info` failed")
64 .cast_into(py)
65 .expect(
66 "rust-copy_tracing: python call to `rev_info` returned \
67 unexpected non-Tuple value",
68 );
69 let p1 = res.get_item(py, 0).extract(py).expect(
70 "rust-copy-tracing: \
71 rev_info return is invalid, first item is a not a revision",
72 );
73 let p2 = res.get_item(py, 1).extract(py).expect(
74 "rust-copy-tracing: \
75 rev_info return is invalid, second item is a not a revision",
76 );
77
78 let changes = res.get_item(py, 2);
79
80 let files;
81 if !changes
82 .hasattr(py, "copied_from_p1")
83 .expect("rust-copy-tracing: python call to `hasattr` failed")
84 {
85 files = ChangedFiles::new_empty();
86 } else {
87 let p1_copies: PyDict = changes
88 .getattr(py, "copied_from_p1")
89 .expect(
90 "rust-copy-tracing: retrieval of python attribute \
91 `copied_from_p1` failed",
92 )
93 .cast_into(py)
94 .expect(
95 "rust-copy-tracing: failed to convert `copied_from_p1` \
96 to PyDict",
97 );
98 let p1_copies: PyResult<_> = p1_copies
99 .items(py)
100 .iter()
101 .map(|(key, value)| {
102 let key = key.extract::<PyBytes>(py).expect(
103 "rust-copy-tracing: conversion of copy destination to\
104 PyBytes failed",
105 );
106 let key = key.data(py);
107 let value = value.extract::<PyBytes>(py).expect(
108 "rust-copy-tracing: conversion of copy source to \
109 PyBytes failed",
110 );
111 let value = value.data(py);
112 Ok((
113 HgPathBuf::from_bytes(key),
114 HgPathBuf::from_bytes(value),
115 ))
116 })
117 .collect();
118
119 let p2_copies: PyDict = changes
120 .getattr(py, "copied_from_p2")
121 .expect(
122 "rust-copy-tracing: retrieval of python attribute \
123 `copied_from_p2` failed",
124 )
61 let rev_info_maker: RevInfoMaker<PyBytes> =
62 Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
63 let res: PyTuple = rev_info
64 .call(py, (rev,), None)
65 .expect("rust-copy-tracing: python call to `rev_info` failed")
125 66 .cast_into(py)
126 67 .expect(
127 "rust-copy-tracing: failed to convert `copied_from_p2` \
128 to PyDict",
68 "rust-copy_tracing: python call to `rev_info` returned \
69 unexpected non-Tuple value",
129 70 );
130 let p2_copies: PyResult<_> = p2_copies
131 .items(py)
132 .iter()
133 .map(|(key, value)| {
134 let key = key.extract::<PyBytes>(py).expect(
135 "rust-copy-tracing: conversion of copy destination to \
136 PyBytes failed");
137 let key = key.data(py);
138 let value = value.extract::<PyBytes>(py).expect(
139 "rust-copy-tracing: conversion of copy source to \
140 PyBytes failed",
141 );
142 let value = value.data(py);
143 Ok((
144 HgPathBuf::from_bytes(key),
145 HgPathBuf::from_bytes(value),
146 ))
147 })
148 .collect();
149
150 let removed: PyObject = changes.getattr(py, "removed").expect(
151 "rust-copy-tracing: retrieval of python attribute \
152 `removed` failed",
71 let p1 = res.get_item(py, 0).extract(py).expect(
72 "rust-copy-tracing: rev_info return is invalid, first item \
73 is a not a revision",
153 74 );
154 let removed: PyResult<_> = removed
155 .iter(py)
156 .expect(
157 "rust-copy-tracing: getting a python iterator over the \
158 `removed` set failed",
159 )
160 .map(|filename| {
161 let filename = filename
162 .expect(
163 "rust-copy-tracing: python iteration over the \
164 `removed` set failed",
165 )
166 .extract::<PyBytes>(py)
167 .expect(
168 "rust-copy-tracing: \
169 conversion of `removed` item to PyBytes failed",
170 );
171 let filename = filename.data(py);
172 Ok(HgPathBuf::from_bytes(filename))
173 })
174 .collect();
175
176 let merged: PyObject = changes.getattr(py, "merged").expect(
177 "rust-copy-tracing: retrieval of python attribute \
178 `merged` failed",
75 let p2 = res.get_item(py, 1).extract(py).expect(
76 "rust-copy-tracing: rev_info return is invalid, first item \
77 is a not a revision",
179 78 );
180 let merged: PyResult<_> = merged
181 .iter(py)
182 .expect(
183 "rust-copy-tracing: getting a python iterator over the \
184 `merged` set failed",
185 )
186 .map(|filename| {
187 let filename = filename
188 .expect(
189 "rust-copy-tracing: python iteration over the \
190 `merged` set failed",
191 )
192 .extract::<PyBytes>(py)
193 .expect(
194 "rust-copy-tracing: \
195 conversion of `merged` item to PyBytes failed",
196 );
197 let filename = filename.data(py);
198 Ok(HgPathBuf::from_bytes(filename))
199 })
200 .collect();
201 79
202 let salvaged: PyObject = changes.getattr(py, "salvaged").expect(
203 "rust-copy-tracing: retrieval of python attribute \
204 `salvaged` failed",
205 );
206 let salvaged: PyResult<_> = salvaged
207 .iter(py)
208 .expect(
209 "rust-copy-tracing: getting a python iterator over the \
210 `salvaged` set failed",
211 )
212 .map(|filename| {
213 let filename = filename
214 .expect(
215 "rust-copy-tracing: python iteration over the \
216 `salvaged` set failed",
217 )
218 .extract::<PyBytes>(py)
219 .expect(
220 "rust-copy-tracing: \
221 conversion of `salvaged` item to PyBytes failed",
222 );
223 let filename = filename.data(py);
224 Ok(HgPathBuf::from_bytes(filename))
225 })
226 .collect();
227 files = ChangedFiles::new(
228 removed.unwrap(),
229 merged.unwrap(),
230 salvaged.unwrap(),
231 p1_copies.unwrap(),
232 p2_copies.unwrap(),
233 );
234 }
80 let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
81 Ok(raw) => {
82 // Give responsability for the raw bytes lifetime to
83 // hg-core
84 d.data = Some(raw);
85 let addrs = d.data.as_ref().expect(
86 "rust-copy-tracing: failed to get a reference to the \
87 raw bytes for copy data").data(py);
88 ChangedFiles::new(addrs)
89 }
90 // value was presumably None, meaning they was no copy data.
91 Err(_) => ChangedFiles::new_empty(),
92 };
235 93
236 (p1, p2, files)
237 };
94 (p1, p2, files)
95 });
238 96 let children: PyResult<_> = children
239 97 .items(py)
240 98 .iter()
241 99 .map(|(k, v)| {
242 100 let v: &PyList = v.cast_as(py)?;
243 101 let v: PyResult<_> =
244 102 v.iter(py).map(|child| Ok(child.extract(py)?)).collect();
245 103 Ok((k.extract(py)?, v?))
246 104 })
247 105 .collect();
248 106
249 107 let res = combine_changeset_copies(
250 108 revs?,
251 109 children?,
252 110 target_rev,
253 &rev_info_maker,
111 rev_info_maker,
254 112 &is_ancestor_wrap,
255 113 );
256 114 let out = PyDict::new(py);
257 115 for (dest, source) in res.into_iter() {
258 116 out.set_item(
259 117 py,
260 118 PyBytes::new(py, &dest.into_vec()),
261 119 PyBytes::new(py, &source.into_vec()),
262 120 )?;
263 121 }
264 122 Ok(out)
265 123 }
266 124
267 125 /// Create the module, with `__package__` given from parent
268 126 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
269 127 let dotted_name = &format!("{}.copy_tracing", package);
270 128 let m = PyModule::new(py, dotted_name)?;
271 129
272 130 m.add(py, "__package__", package)?;
273 131 m.add(py, "__doc__", "Copy tracing - Rust implementation")?;
274 132
275 133 m.add(
276 134 py,
277 135 "combine_changeset_copies",
278 136 py_fn!(
279 137 py,
280 138 combine_changeset_copies_wrapper(
281 139 revs: PyList,
282 140 children: PyDict,
283 141 target_rev: Revision,
284 142 rev_info: PyObject,
285 143 is_ancestor: PyObject
286 144 )
287 145 ),
288 146 )?;
289 147
290 148 let sys = PyModule::import(py, "sys")?;
291 149 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
292 150 sys_modules.set_item(py, dotted_name, &m)?;
293 151
294 152 Ok(m)
295 153 }
General Comments 0
You need to be logged in to leave comments. Login now