##// END OF EJS Templates
fix: reduce number of tool executions...
Danny Hooper -
r48992:f12a19d0 default
parent child Browse files
Show More
@@ -284,20 +284,29 b' def fix(ui, repo, *pats, **opts):'
284 284 # There are no data dependencies between the workers fixing each file
285 285 # revision, so we can use all available parallelism.
286 286 def getfixes(items):
287 for rev, path in items:
288 ctx = repo[rev]
287 for srcrev, path, dstrevs in items:
288 ctx = repo[srcrev]
289 289 olddata = ctx[path].data()
290 290 metadata, newdata = fixfile(
291 ui, repo, opts, fixers, ctx, path, basepaths, basectxs[rev]
291 ui,
292 repo,
293 opts,
294 fixers,
295 ctx,
296 path,
297 basepaths,
298 basectxs[srcrev],
292 299 )
293 # Don't waste memory/time passing unchanged content back, but
294 # produce one result per item either way.
295 yield (
296 rev,
297 path,
298 metadata,
299 newdata if newdata != olddata else None,
300 )
300 # We ungroup the work items now, because the code that consumes
301 # these results has to handle each dstrev separately, and in
302 # topological order. Because these are handled in topological
303 # order, it's important that we pass around references to
304 # "newdata" instead of copying it. Otherwise, we would be
305 # keeping more copies of file content in memory at a time than
306 # if we hadn't bothered to group/deduplicate the work items.
307 data = newdata if newdata != olddata else None
308 for dstrev in dstrevs:
309 yield (dstrev, path, metadata, data)
301 310
302 311 results = worker.worker(
303 312 ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False
@@ -377,23 +386,32 b' def cleanup(repo, replacements, wdirwrit'
377 386
378 387
379 388 def getworkqueue(ui, repo, pats, opts, revstofix, basectxs):
380 """Constructs the list of files to be fixed at specific revisions
389 """Constructs a list of files to fix and which revisions each fix applies to
381 390
382 It is up to the caller how to consume the work items, and the only
383 dependence between them is that replacement revisions must be committed in
384 topological order. Each work item represents a file in the working copy or
385 in some revision that should be fixed and written back to the working copy
386 or into a replacement revision.
391 To avoid duplicating work, there is usually only one work item for each file
392 revision that might need to be fixed. There can be multiple work items per
393 file revision if the same file needs to be fixed in multiple changesets with
394 different baserevs. Each work item also contains a list of changesets where
395 the file's data should be replaced with the fixed data. The work items for
396 earlier changesets come earlier in the work queue, to improve pipelining by
397 allowing the first changeset to be replaced while fixes are still being
398 computed for later changesets.
387 399
388 Work items for the same revision are grouped together, so that a worker
389 pool starting with the first N items in parallel is likely to finish the
390 first revision's work before other revisions. This can allow us to write
391 the result to disk and reduce memory footprint. At time of writing, the
392 partition strategy in worker.py seems favorable to this. We also sort the
393 items by ascending revision number to match the order in which we commit
394 the fixes later.
400 Also returned is a map from changesets to the count of work items that might
401 affect each changeset. This is used later to count when all of a changeset's
402 work items have been finished, without having to inspect the remaining work
403 queue in each worker subprocess.
404
405 The example work item (1, "foo/bar.txt", (1, 2, 3)) means that the data of
406 bar.txt should be read from revision 1, then fixed, and written back to
407 revisions 1, 2 and 3. Revision 1 is called the "srcrev" and the list of
408 revisions is called the "dstrevs". In practice the srcrev is always one of
409 the dstrevs, and we make that choice when constructing the work item so that
410 the choice can't be made inconsistently later on. The dstrevs should all
411 have the same file revision for the given path, so the choice of srcrev is
412 arbitrary. The wdirrev can be a dstrev and a srcrev.
395 413 """
396 workqueue = []
414 dstrevmap = collections.defaultdict(list)
397 415 numitems = collections.defaultdict(int)
398 416 maxfilesize = ui.configbytes(b'fix', b'maxfilesize')
399 417 for rev in sorted(revstofix):
@@ -411,8 +429,21 b' def getworkqueue(ui, repo, pats, opts, r'
411 429 % (util.bytecount(maxfilesize), path)
412 430 )
413 431 continue
414 workqueue.append((rev, path))
432 baserevs = tuple(ctx.rev() for ctx in basectxs[rev])
433 dstrevmap[(fctx.filerev(), baserevs, path)].append(rev)
415 434 numitems[rev] += 1
435 workqueue = [
436 (min(dstrevs), path, dstrevs)
437 for (filerev, baserevs, path), dstrevs in dstrevmap.items()
438 ]
439 # Move work items for earlier changesets to the front of the queue, so we
440 # might be able to replace those changesets (in topological order) while
441 # we're still processing later work items. Note the min() in the previous
442 # expression, which means we don't need a custom comparator here. The path
443 # is also important in the sort order to make the output order stable. There
444 # are some situations where this doesn't help much, but some situations
445 # where it lets us buffer O(1) files instead of O(n) files.
446 workqueue.sort()
416 447 return workqueue, numitems
417 448
418 449
@@ -517,9 +548,9 b' def getbasepaths(repo, opts, workqueue, '
517 548 return {}
518 549
519 550 basepaths = {}
520 for rev, path in workqueue:
521 fixctx = repo[rev]
522 for basectx in basectxs[rev]:
551 for srcrev, path, _dstrevs in workqueue:
552 fixctx = repo[srcrev]
553 for basectx in basectxs[srcrev]:
523 554 basepath = copies.pathcopies(basectx, fixctx).get(path, path)
524 555 if basepath in basectx:
525 556 basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath
@@ -642,10 +673,10 b' def _prefetchfiles(repo, workqueue, base'
642 673 toprefetch = set()
643 674
644 675 # Prefetch the files that will be fixed.
645 for rev, path in workqueue:
646 if rev == wdirrev:
676 for srcrev, path, _dstrevs in workqueue:
677 if srcrev == wdirrev:
647 678 continue
648 toprefetch.add((rev, path))
679 toprefetch.add((srcrev, path))
649 680
650 681 # Prefetch the base contents for lineranges().
651 682 for (baserev, fixrev, path), basepath in basepaths.items():
@@ -1797,7 +1797,56 b' fixed.'
1797 1797 $ cat $LOGFILE | sort | uniq -c
1798 1798 4 bar.log
1799 1799 4 baz.log
1800 4 foo.log
1801 4 qux.log
1800 3 foo.log
1801 2 qux.log
1802 1802
1803 1803 $ cd ..
1804
1805 For tools that support line ranges, it's wrong to blindly re-use fixed file
1806 content for the same file revision if it appears twice with different baserevs,
1807 because the line ranges could be different. Since computing line ranges is
1808 ambiguous, this isn't a matter of correctness, but it affects the usability of
1809 this extension. It could maybe be simpler if baserevs were computed on a
1810 per-file basis to make this situation impossible to construct.
1811
1812 In the following example, we construct two subgraphs with the same file
1813 revisions, and fix different sub-subgraphs to get different baserevs and
1814 different changed line ranges. The key precondition is that revisions 1 and 4
1815 have the same file revision, and the key result is that their successors don't
1816 have the same file content, because we want to fix different areas of that same
1817 file revision's content.
1818
1819 $ hg init differentlineranges
1820 $ cd differentlineranges
1821
1822 $ printf "a\nb\n" > file.changed
1823 $ hg commit -Aqm "0 ab"
1824 $ printf "a\nx\n" > file.changed
1825 $ hg commit -Aqm "1 ax"
1826 $ hg remove file.changed
1827 $ hg commit -Aqm "2 removed"
1828 $ hg revert file.changed -r 0
1829 $ hg commit -Aqm "3 ab (reverted)"
1830 $ hg revert file.changed -r 1
1831 $ hg commit -Aqm "4 ax (reverted)"
1832
1833 $ hg manifest --debug --template "{hash}\n" -r 0; \
1834 > hg manifest --debug --template "{hash}\n" -r 3
1835 418f692145676128d2fb518b027ddbac624be76e
1836 418f692145676128d2fb518b027ddbac624be76e
1837 $ hg manifest --debug --template "{hash}\n" -r 1; \
1838 > hg manifest --debug --template "{hash}\n" -r 4
1839 09b8b3ce5a507caaa282f7262679e6d04091426c
1840 09b8b3ce5a507caaa282f7262679e6d04091426c
1841
1842 $ hg fix --working-dir -r 1+3+4
1843 3 new orphan changesets
1844
1845 $ hg cat file.changed -r "successors(1)" --hidden
1846 a
1847 X
1848 $ hg cat file.changed -r "successors(4)" --hidden
1849 A
1850 X
1851
1852 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now