##// END OF EJS Templates
fix: reduce number of tool executions...
Danny Hooper -
r48992:f12a19d0 default
parent child Browse files
Show More
@@ -284,20 +284,29 b' def fix(ui, repo, *pats, **opts):'
284 # There are no data dependencies between the workers fixing each file
284 # There are no data dependencies between the workers fixing each file
285 # revision, so we can use all available parallelism.
285 # revision, so we can use all available parallelism.
286 def getfixes(items):
286 def getfixes(items):
287 for rev, path in items:
287 for srcrev, path, dstrevs in items:
288 ctx = repo[rev]
288 ctx = repo[srcrev]
289 olddata = ctx[path].data()
289 olddata = ctx[path].data()
290 metadata, newdata = fixfile(
290 metadata, newdata = fixfile(
291 ui, repo, opts, fixers, ctx, path, basepaths, basectxs[rev]
291 ui,
292 repo,
293 opts,
294 fixers,
295 ctx,
296 path,
297 basepaths,
298 basectxs[srcrev],
292 )
299 )
293 # Don't waste memory/time passing unchanged content back, but
300 # We ungroup the work items now, because the code that consumes
294 # produce one result per item either way.
301 # these results has to handle each dstrev separately, and in
295 yield (
302 # topological order. Because these are handled in topological
296 rev,
303 # order, it's important that we pass around references to
297 path,
304 # "newdata" instead of copying it. Otherwise, we would be
298 metadata,
305 # keeping more copies of file content in memory at a time than
299 newdata if newdata != olddata else None,
306 # if we hadn't bothered to group/deduplicate the work items.
300 )
307 data = newdata if newdata != olddata else None
308 for dstrev in dstrevs:
309 yield (dstrev, path, metadata, data)
301
310
302 results = worker.worker(
311 results = worker.worker(
303 ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False
312 ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False
@@ -377,23 +386,32 b' def cleanup(repo, replacements, wdirwrit'
377
386
378
387
379 def getworkqueue(ui, repo, pats, opts, revstofix, basectxs):
388 def getworkqueue(ui, repo, pats, opts, revstofix, basectxs):
380 """Constructs the list of files to be fixed at specific revisions
389 """Constructs a list of files to fix and which revisions each fix applies to
381
390
382 It is up to the caller how to consume the work items, and the only
391 To avoid duplicating work, there is usually only one work item for each file
383 dependence between them is that replacement revisions must be committed in
392 revision that might need to be fixed. There can be multiple work items per
384 topological order. Each work item represents a file in the working copy or
393 file revision if the same file needs to be fixed in multiple changesets with
385 in some revision that should be fixed and written back to the working copy
394 different baserevs. Each work item also contains a list of changesets where
386 or into a replacement revision.
395 the file's data should be replaced with the fixed data. The work items for
396 earlier changesets come earlier in the work queue, to improve pipelining by
397 allowing the first changeset to be replaced while fixes are still being
398 computed for later changesets.
387
399
388 Work items for the same revision are grouped together, so that a worker
400 Also returned is a map from changesets to the count of work items that might
389 pool starting with the first N items in parallel is likely to finish the
401 affect each changeset. This is used later to count when all of a changeset's
390 first revision's work before other revisions. This can allow us to write
402 work items have been finished, without having to inspect the remaining work
391 the result to disk and reduce memory footprint. At time of writing, the
403 queue in each worker subprocess.
392 partition strategy in worker.py seems favorable to this. We also sort the
404
393 items by ascending revision number to match the order in which we commit
405 The example work item (1, "foo/bar.txt", (1, 2, 3)) means that the data of
394 the fixes later.
406 bar.txt should be read from revision 1, then fixed, and written back to
407 revisions 1, 2 and 3. Revision 1 is called the "srcrev" and the list of
408 revisions is called the "dstrevs". In practice the srcrev is always one of
409 the dstrevs, and we make that choice when constructing the work item so that
410 the choice can't be made inconsistently later on. The dstrevs should all
411 have the same file revision for the given path, so the choice of srcrev is
412 arbitrary. The wdirrev can be a dstrev and a srcrev.
395 """
413 """
396 workqueue = []
414 dstrevmap = collections.defaultdict(list)
397 numitems = collections.defaultdict(int)
415 numitems = collections.defaultdict(int)
398 maxfilesize = ui.configbytes(b'fix', b'maxfilesize')
416 maxfilesize = ui.configbytes(b'fix', b'maxfilesize')
399 for rev in sorted(revstofix):
417 for rev in sorted(revstofix):
@@ -411,8 +429,21 b' def getworkqueue(ui, repo, pats, opts, r'
411 % (util.bytecount(maxfilesize), path)
429 % (util.bytecount(maxfilesize), path)
412 )
430 )
413 continue
431 continue
414 workqueue.append((rev, path))
432 baserevs = tuple(ctx.rev() for ctx in basectxs[rev])
433 dstrevmap[(fctx.filerev(), baserevs, path)].append(rev)
415 numitems[rev] += 1
434 numitems[rev] += 1
435 workqueue = [
436 (min(dstrevs), path, dstrevs)
437 for (filerev, baserevs, path), dstrevs in dstrevmap.items()
438 ]
439 # Move work items for earlier changesets to the front of the queue, so we
440 # might be able to replace those changesets (in topological order) while
441 # we're still processing later work items. Note the min() in the previous
442 # expression, which means we don't need a custom comparator here. The path
443 # is also important in the sort order to make the output order stable. There
444 # are some situations where this doesn't help much, but some situations
445 # where it lets us buffer O(1) files instead of O(n) files.
446 workqueue.sort()
416 return workqueue, numitems
447 return workqueue, numitems
417
448
418
449
@@ -517,9 +548,9 b' def getbasepaths(repo, opts, workqueue, '
517 return {}
548 return {}
518
549
519 basepaths = {}
550 basepaths = {}
520 for rev, path in workqueue:
551 for srcrev, path, _dstrevs in workqueue:
521 fixctx = repo[rev]
552 fixctx = repo[srcrev]
522 for basectx in basectxs[rev]:
553 for basectx in basectxs[srcrev]:
523 basepath = copies.pathcopies(basectx, fixctx).get(path, path)
554 basepath = copies.pathcopies(basectx, fixctx).get(path, path)
524 if basepath in basectx:
555 if basepath in basectx:
525 basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath
556 basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath
@@ -642,10 +673,10 b' def _prefetchfiles(repo, workqueue, base'
642 toprefetch = set()
673 toprefetch = set()
643
674
644 # Prefetch the files that will be fixed.
675 # Prefetch the files that will be fixed.
645 for rev, path in workqueue:
676 for srcrev, path, _dstrevs in workqueue:
646 if rev == wdirrev:
677 if srcrev == wdirrev:
647 continue
678 continue
648 toprefetch.add((rev, path))
679 toprefetch.add((srcrev, path))
649
680
650 # Prefetch the base contents for lineranges().
681 # Prefetch the base contents for lineranges().
651 for (baserev, fixrev, path), basepath in basepaths.items():
682 for (baserev, fixrev, path), basepath in basepaths.items():
@@ -1797,7 +1797,56 b' fixed.'
1797 $ cat $LOGFILE | sort | uniq -c
1797 $ cat $LOGFILE | sort | uniq -c
1798 4 bar.log
1798 4 bar.log
1799 4 baz.log
1799 4 baz.log
1800 4 foo.log
1800 3 foo.log
1801 4 qux.log
1801 2 qux.log
1802
1802
1803 $ cd ..
1803 $ cd ..
1804
1805 For tools that support line ranges, it's wrong to blindly re-use fixed file
1806 content for the same file revision if it appears twice with different baserevs,
1807 because the line ranges could be different. Since computing line ranges is
1808 ambiguous, this isn't a matter of correctness, but it affects the usability of
1809 this extension. It could maybe be simpler if baserevs were computed on a
1810 per-file basis to make this situation impossible to construct.
1811
1812 In the following example, we construct two subgraphs with the same file
1813 revisions, and fix different sub-subgraphs to get different baserevs and
1814 different changed line ranges. The key precondition is that revisions 1 and 4
1815 have the same file revision, and the key result is that their successors don't
1816 have the same file content, because we want to fix different areas of that same
1817 file revision's content.
1818
1819 $ hg init differentlineranges
1820 $ cd differentlineranges
1821
1822 $ printf "a\nb\n" > file.changed
1823 $ hg commit -Aqm "0 ab"
1824 $ printf "a\nx\n" > file.changed
1825 $ hg commit -Aqm "1 ax"
1826 $ hg remove file.changed
1827 $ hg commit -Aqm "2 removed"
1828 $ hg revert file.changed -r 0
1829 $ hg commit -Aqm "3 ab (reverted)"
1830 $ hg revert file.changed -r 1
1831 $ hg commit -Aqm "4 ax (reverted)"
1832
1833 $ hg manifest --debug --template "{hash}\n" -r 0; \
1834 > hg manifest --debug --template "{hash}\n" -r 3
1835 418f692145676128d2fb518b027ddbac624be76e
1836 418f692145676128d2fb518b027ddbac624be76e
1837 $ hg manifest --debug --template "{hash}\n" -r 1; \
1838 > hg manifest --debug --template "{hash}\n" -r 4
1839 09b8b3ce5a507caaa282f7262679e6d04091426c
1840 09b8b3ce5a507caaa282f7262679e6d04091426c
1841
1842 $ hg fix --working-dir -r 1+3+4
1843 3 new orphan changesets
1844
1845 $ hg cat file.changed -r "successors(1)" --hidden
1846 a
1847 X
1848 $ hg cat file.changed -r "successors(4)" --hidden
1849 A
1850 X
1851
1852 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now