Show More
@@ -284,20 +284,29 b' def fix(ui, repo, *pats, **opts):' | |||||
284 | # There are no data dependencies between the workers fixing each file |
|
284 | # There are no data dependencies between the workers fixing each file | |
285 | # revision, so we can use all available parallelism. |
|
285 | # revision, so we can use all available parallelism. | |
286 | def getfixes(items): |
|
286 | def getfixes(items): | |
287 | for rev, path in items: |
|
287 | for srcrev, path, dstrevs in items: | |
288 | ctx = repo[rev] |
|
288 | ctx = repo[srcrev] | |
289 | olddata = ctx[path].data() |
|
289 | olddata = ctx[path].data() | |
290 | metadata, newdata = fixfile( |
|
290 | metadata, newdata = fixfile( | |
291 | ui, repo, opts, fixers, ctx, path, basepaths, basectxs[rev] |
|
291 | ui, | |
|
292 | repo, | |||
|
293 | opts, | |||
|
294 | fixers, | |||
|
295 | ctx, | |||
|
296 | path, | |||
|
297 | basepaths, | |||
|
298 | basectxs[srcrev], | |||
292 | ) |
|
299 | ) | |
293 | # Don't waste memory/time passing unchanged content back, but |
|
300 | # We ungroup the work items now, because the code that consumes | |
294 | # produce one result per item either way. |
|
301 | # these results has to handle each dstrev separately, and in | |
295 | yield ( |
|
302 | # topological order. Because these are handled in topological | |
296 | rev, |
|
303 | # order, it's important that we pass around references to | |
297 | path, |
|
304 | # "newdata" instead of copying it. Otherwise, we would be | |
298 | metadata, |
|
305 | # keeping more copies of file content in memory at a time than | |
299 | newdata if newdata != olddata else None, |
|
306 | # if we hadn't bothered to group/deduplicate the work items. | |
300 | ) |
|
307 | data = newdata if newdata != olddata else None | |
|
308 | for dstrev in dstrevs: | |||
|
309 | yield (dstrev, path, metadata, data) | |||
301 |
|
310 | |||
302 | results = worker.worker( |
|
311 | results = worker.worker( | |
303 | ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False |
|
312 | ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False | |
@@ -377,23 +386,32 b' def cleanup(repo, replacements, wdirwrit' | |||||
377 |
|
386 | |||
378 |
|
387 | |||
379 | def getworkqueue(ui, repo, pats, opts, revstofix, basectxs): |
|
388 | def getworkqueue(ui, repo, pats, opts, revstofix, basectxs): | |
380 |
"""Constructs |
|
389 | """Constructs a list of files to fix and which revisions each fix applies to | |
381 |
|
390 | |||
382 | It is up to the caller how to consume the work items, and the only |
|
391 | To avoid duplicating work, there is usually only one work item for each file | |
383 | dependence between them is that replacement revisions must be committed in |
|
392 | revision that might need to be fixed. There can be multiple work items per | |
384 | topological order. Each work item represents a file in the working copy or |
|
393 | file revision if the same file needs to be fixed in multiple changesets with | |
385 | in some revision that should be fixed and written back to the working copy |
|
394 | different baserevs. Each work item also contains a list of changesets where | |
386 | or into a replacement revision. |
|
395 | the file's data should be replaced with the fixed data. The work items for | |
|
396 | earlier changesets come earlier in the work queue, to improve pipelining by | |||
|
397 | allowing the first changeset to be replaced while fixes are still being | |||
|
398 | computed for later changesets. | |||
387 |
|
399 | |||
388 | Work items for the same revision are grouped together, so that a worker |
|
400 | Also returned is a map from changesets to the count of work items that might | |
389 | pool starting with the first N items in parallel is likely to finish the |
|
401 | affect each changeset. This is used later to count when all of a changeset's | |
390 | first revision's work before other revisions. This can allow us to write |
|
402 | work items have been finished, without having to inspect the remaining work | |
391 | the result to disk and reduce memory footprint. At time of writing, the |
|
403 | queue in each worker subprocess. | |
392 | partition strategy in worker.py seems favorable to this. We also sort the |
|
404 | ||
393 | items by ascending revision number to match the order in which we commit |
|
405 | The example work item (1, "foo/bar.txt", (1, 2, 3)) means that the data of | |
394 | the fixes later. |
|
406 | bar.txt should be read from revision 1, then fixed, and written back to | |
|
407 | revisions 1, 2 and 3. Revision 1 is called the "srcrev" and the list of | |||
|
408 | revisions is called the "dstrevs". In practice the srcrev is always one of | |||
|
409 | the dstrevs, and we make that choice when constructing the work item so that | |||
|
410 | the choice can't be made inconsistently later on. The dstrevs should all | |||
|
411 | have the same file revision for the given path, so the choice of srcrev is | |||
|
412 | arbitrary. The wdirrev can be a dstrev and a srcrev. | |||
395 | """ |
|
413 | """ | |
396 | workqueue = [] |
|
414 | dstrevmap = collections.defaultdict(list) | |
397 | numitems = collections.defaultdict(int) |
|
415 | numitems = collections.defaultdict(int) | |
398 | maxfilesize = ui.configbytes(b'fix', b'maxfilesize') |
|
416 | maxfilesize = ui.configbytes(b'fix', b'maxfilesize') | |
399 | for rev in sorted(revstofix): |
|
417 | for rev in sorted(revstofix): | |
@@ -411,8 +429,21 b' def getworkqueue(ui, repo, pats, opts, r' | |||||
411 | % (util.bytecount(maxfilesize), path) |
|
429 | % (util.bytecount(maxfilesize), path) | |
412 | ) |
|
430 | ) | |
413 | continue |
|
431 | continue | |
414 | workqueue.append((rev, path)) |
|
432 | baserevs = tuple(ctx.rev() for ctx in basectxs[rev]) | |
|
433 | dstrevmap[(fctx.filerev(), baserevs, path)].append(rev) | |||
415 | numitems[rev] += 1 |
|
434 | numitems[rev] += 1 | |
|
435 | workqueue = [ | |||
|
436 | (min(dstrevs), path, dstrevs) | |||
|
437 | for (filerev, baserevs, path), dstrevs in dstrevmap.items() | |||
|
438 | ] | |||
|
439 | # Move work items for earlier changesets to the front of the queue, so we | |||
|
440 | # might be able to replace those changesets (in topological order) while | |||
|
441 | # we're still processing later work items. Note the min() in the previous | |||
|
442 | # expression, which means we don't need a custom comparator here. The path | |||
|
443 | # is also important in the sort order to make the output order stable. There | |||
|
444 | # are some situations where this doesn't help much, but some situations | |||
|
445 | # where it lets us buffer O(1) files instead of O(n) files. | |||
|
446 | workqueue.sort() | |||
416 | return workqueue, numitems |
|
447 | return workqueue, numitems | |
417 |
|
448 | |||
418 |
|
449 | |||
@@ -517,9 +548,9 b' def getbasepaths(repo, opts, workqueue, ' | |||||
517 | return {} |
|
548 | return {} | |
518 |
|
549 | |||
519 | basepaths = {} |
|
550 | basepaths = {} | |
520 | for rev, path in workqueue: |
|
551 | for srcrev, path, _dstrevs in workqueue: | |
521 | fixctx = repo[rev] |
|
552 | fixctx = repo[srcrev] | |
522 | for basectx in basectxs[rev]: |
|
553 | for basectx in basectxs[srcrev]: | |
523 | basepath = copies.pathcopies(basectx, fixctx).get(path, path) |
|
554 | basepath = copies.pathcopies(basectx, fixctx).get(path, path) | |
524 | if basepath in basectx: |
|
555 | if basepath in basectx: | |
525 | basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath |
|
556 | basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath | |
@@ -642,10 +673,10 b' def _prefetchfiles(repo, workqueue, base' | |||||
642 | toprefetch = set() |
|
673 | toprefetch = set() | |
643 |
|
674 | |||
644 | # Prefetch the files that will be fixed. |
|
675 | # Prefetch the files that will be fixed. | |
645 | for rev, path in workqueue: |
|
676 | for srcrev, path, _dstrevs in workqueue: | |
646 | if rev == wdirrev: |
|
677 | if srcrev == wdirrev: | |
647 | continue |
|
678 | continue | |
648 | toprefetch.add((rev, path)) |
|
679 | toprefetch.add((srcrev, path)) | |
649 |
|
680 | |||
650 | # Prefetch the base contents for lineranges(). |
|
681 | # Prefetch the base contents for lineranges(). | |
651 | for (baserev, fixrev, path), basepath in basepaths.items(): |
|
682 | for (baserev, fixrev, path), basepath in basepaths.items(): |
@@ -1797,7 +1797,56 b' fixed.' | |||||
1797 | $ cat $LOGFILE | sort | uniq -c |
|
1797 | $ cat $LOGFILE | sort | uniq -c | |
1798 | 4 bar.log |
|
1798 | 4 bar.log | |
1799 | 4 baz.log |
|
1799 | 4 baz.log | |
1800 |
|
|
1800 | 3 foo.log | |
1801 |
|
|
1801 | 2 qux.log | |
1802 |
|
1802 | |||
1803 | $ cd .. |
|
1803 | $ cd .. | |
|
1804 | ||||
|
1805 | For tools that support line ranges, it's wrong to blindly re-use fixed file | |||
|
1806 | content for the same file revision if it appears twice with different baserevs, | |||
|
1807 | because the line ranges could be different. Since computing line ranges is | |||
|
1808 | ambiguous, this isn't a matter of correctness, but it affects the usability of | |||
|
1809 | this extension. It could maybe be simpler if baserevs were computed on a | |||
|
1810 | per-file basis to make this situation impossible to construct. | |||
|
1811 | ||||
|
1812 | In the following example, we construct two subgraphs with the same file | |||
|
1813 | revisions, and fix different sub-subgraphs to get different baserevs and | |||
|
1814 | different changed line ranges. The key precondition is that revisions 1 and 4 | |||
|
1815 | have the same file revision, and the key result is that their successors don't | |||
|
1816 | have the same file content, because we want to fix different areas of that same | |||
|
1817 | file revision's content. | |||
|
1818 | ||||
|
1819 | $ hg init differentlineranges | |||
|
1820 | $ cd differentlineranges | |||
|
1821 | ||||
|
1822 | $ printf "a\nb\n" > file.changed | |||
|
1823 | $ hg commit -Aqm "0 ab" | |||
|
1824 | $ printf "a\nx\n" > file.changed | |||
|
1825 | $ hg commit -Aqm "1 ax" | |||
|
1826 | $ hg remove file.changed | |||
|
1827 | $ hg commit -Aqm "2 removed" | |||
|
1828 | $ hg revert file.changed -r 0 | |||
|
1829 | $ hg commit -Aqm "3 ab (reverted)" | |||
|
1830 | $ hg revert file.changed -r 1 | |||
|
1831 | $ hg commit -Aqm "4 ax (reverted)" | |||
|
1832 | ||||
|
1833 | $ hg manifest --debug --template "{hash}\n" -r 0; \ | |||
|
1834 | > hg manifest --debug --template "{hash}\n" -r 3 | |||
|
1835 | 418f692145676128d2fb518b027ddbac624be76e | |||
|
1836 | 418f692145676128d2fb518b027ddbac624be76e | |||
|
1837 | $ hg manifest --debug --template "{hash}\n" -r 1; \ | |||
|
1838 | > hg manifest --debug --template "{hash}\n" -r 4 | |||
|
1839 | 09b8b3ce5a507caaa282f7262679e6d04091426c | |||
|
1840 | 09b8b3ce5a507caaa282f7262679e6d04091426c | |||
|
1841 | ||||
|
1842 | $ hg fix --working-dir -r 1+3+4 | |||
|
1843 | 3 new orphan changesets | |||
|
1844 | ||||
|
1845 | $ hg cat file.changed -r "successors(1)" --hidden | |||
|
1846 | a | |||
|
1847 | X | |||
|
1848 | $ hg cat file.changed -r "successors(4)" --hidden | |||
|
1849 | A | |||
|
1850 | X | |||
|
1851 | ||||
|
1852 | $ cd .. |
General Comments 0
You need to be logged in to leave comments.
Login now