Show More
@@ -284,20 +284,29 b' def fix(ui, repo, *pats, **opts):' | |||
|
284 | 284 | # There are no data dependencies between the workers fixing each file |
|
285 | 285 | # revision, so we can use all available parallelism. |
|
286 | 286 | def getfixes(items): |
|
287 | for rev, path in items: | |
|
288 | ctx = repo[rev] | |
|
287 | for srcrev, path, dstrevs in items: | |
|
288 | ctx = repo[srcrev] | |
|
289 | 289 | olddata = ctx[path].data() |
|
290 | 290 | metadata, newdata = fixfile( |
|
291 | ui, repo, opts, fixers, ctx, path, basepaths, basectxs[rev] | |
|
291 | ui, | |
|
292 | repo, | |
|
293 | opts, | |
|
294 | fixers, | |
|
295 | ctx, | |
|
296 | path, | |
|
297 | basepaths, | |
|
298 | basectxs[srcrev], | |
|
292 | 299 | ) |
|
293 | # Don't waste memory/time passing unchanged content back, but | |
|
294 | # produce one result per item either way. | |
|
295 | yield ( | |
|
296 | rev, | |
|
297 | path, | |
|
298 | metadata, | |
|
299 | newdata if newdata != olddata else None, | |
|
300 | ) | |
|
300 | # We ungroup the work items now, because the code that consumes | |
|
301 | # these results has to handle each dstrev separately, and in | |
|
302 | # topological order. Because these are handled in topological | |
|
303 | # order, it's important that we pass around references to | |
|
304 | # "newdata" instead of copying it. Otherwise, we would be | |
|
305 | # keeping more copies of file content in memory at a time than | |
|
306 | # if we hadn't bothered to group/deduplicate the work items. | |
|
307 | data = newdata if newdata != olddata else None | |
|
308 | for dstrev in dstrevs: | |
|
309 | yield (dstrev, path, metadata, data) | |
|
301 | 310 | |
|
302 | 311 | results = worker.worker( |
|
303 | 312 | ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False |
@@ -377,23 +386,32 b' def cleanup(repo, replacements, wdirwrit' | |||
|
377 | 386 | |
|
378 | 387 | |
|
379 | 388 | def getworkqueue(ui, repo, pats, opts, revstofix, basectxs): |
|
380 |
"""Constructs |
|
|
389 | """Constructs a list of files to fix and which revisions each fix applies to | |
|
381 | 390 | |
|
382 | It is up to the caller how to consume the work items, and the only | |
|
383 | dependence between them is that replacement revisions must be committed in | |
|
384 | topological order. Each work item represents a file in the working copy or | |
|
385 | in some revision that should be fixed and written back to the working copy | |
|
386 | or into a replacement revision. | |
|
391 | To avoid duplicating work, there is usually only one work item for each file | |
|
392 | revision that might need to be fixed. There can be multiple work items per | |
|
393 | file revision if the same file needs to be fixed in multiple changesets with | |
|
394 | different baserevs. Each work item also contains a list of changesets where | |
|
395 | the file's data should be replaced with the fixed data. The work items for | |
|
396 | earlier changesets come earlier in the work queue, to improve pipelining by | |
|
397 | allowing the first changeset to be replaced while fixes are still being | |
|
398 | computed for later changesets. | |
|
387 | 399 | |
|
388 | Work items for the same revision are grouped together, so that a worker | |
|
389 | pool starting with the first N items in parallel is likely to finish the | |
|
390 | first revision's work before other revisions. This can allow us to write | |
|
391 | the result to disk and reduce memory footprint. At time of writing, the | |
|
392 | partition strategy in worker.py seems favorable to this. We also sort the | |
|
393 | items by ascending revision number to match the order in which we commit | |
|
394 | the fixes later. | |
|
400 | Also returned is a map from changesets to the count of work items that might | |
|
401 | affect each changeset. This is used later to count when all of a changeset's | |
|
402 | work items have been finished, without having to inspect the remaining work | |
|
403 | queue in each worker subprocess. | |
|
404 | ||
|
405 | The example work item (1, "foo/bar.txt", (1, 2, 3)) means that the data of | |
|
406 | bar.txt should be read from revision 1, then fixed, and written back to | |
|
407 | revisions 1, 2 and 3. Revision 1 is called the "srcrev" and the list of | |
|
408 | revisions is called the "dstrevs". In practice the srcrev is always one of | |
|
409 | the dstrevs, and we make that choice when constructing the work item so that | |
|
410 | the choice can't be made inconsistently later on. The dstrevs should all | |
|
411 | have the same file revision for the given path, so the choice of srcrev is | |
|
412 | arbitrary. The wdirrev can be a dstrev and a srcrev. | |
|
395 | 413 | """ |
|
396 | workqueue = [] | |
|
414 | dstrevmap = collections.defaultdict(list) | |
|
397 | 415 | numitems = collections.defaultdict(int) |
|
398 | 416 | maxfilesize = ui.configbytes(b'fix', b'maxfilesize') |
|
399 | 417 | for rev in sorted(revstofix): |
@@ -411,8 +429,21 b' def getworkqueue(ui, repo, pats, opts, r' | |||
|
411 | 429 | % (util.bytecount(maxfilesize), path) |
|
412 | 430 | ) |
|
413 | 431 | continue |
|
414 | workqueue.append((rev, path)) | |
|
432 | baserevs = tuple(ctx.rev() for ctx in basectxs[rev]) | |
|
433 | dstrevmap[(fctx.filerev(), baserevs, path)].append(rev) | |
|
415 | 434 | numitems[rev] += 1 |
|
435 | workqueue = [ | |
|
436 | (min(dstrevs), path, dstrevs) | |
|
437 | for (filerev, baserevs, path), dstrevs in dstrevmap.items() | |
|
438 | ] | |
|
439 | # Move work items for earlier changesets to the front of the queue, so we | |
|
440 | # might be able to replace those changesets (in topological order) while | |
|
441 | # we're still processing later work items. Note the min() in the previous | |
|
442 | # expression, which means we don't need a custom comparator here. The path | |
|
443 | # is also important in the sort order to make the output order stable. There | |
|
444 | # are some situations where this doesn't help much, but some situations | |
|
445 | # where it lets us buffer O(1) files instead of O(n) files. | |
|
446 | workqueue.sort() | |
|
416 | 447 | return workqueue, numitems |
|
417 | 448 | |
|
418 | 449 | |
@@ -517,9 +548,9 b' def getbasepaths(repo, opts, workqueue, ' | |||
|
517 | 548 | return {} |
|
518 | 549 | |
|
519 | 550 | basepaths = {} |
|
520 | for rev, path in workqueue: | |
|
521 | fixctx = repo[rev] | |
|
522 | for basectx in basectxs[rev]: | |
|
551 | for srcrev, path, _dstrevs in workqueue: | |
|
552 | fixctx = repo[srcrev] | |
|
553 | for basectx in basectxs[srcrev]: | |
|
523 | 554 | basepath = copies.pathcopies(basectx, fixctx).get(path, path) |
|
524 | 555 | if basepath in basectx: |
|
525 | 556 | basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath |
@@ -642,10 +673,10 b' def _prefetchfiles(repo, workqueue, base' | |||
|
642 | 673 | toprefetch = set() |
|
643 | 674 | |
|
644 | 675 | # Prefetch the files that will be fixed. |
|
645 | for rev, path in workqueue: | |
|
646 | if rev == wdirrev: | |
|
676 | for srcrev, path, _dstrevs in workqueue: | |
|
677 | if srcrev == wdirrev: | |
|
647 | 678 | continue |
|
648 | toprefetch.add((rev, path)) | |
|
679 | toprefetch.add((srcrev, path)) | |
|
649 | 680 | |
|
650 | 681 | # Prefetch the base contents for lineranges(). |
|
651 | 682 | for (baserev, fixrev, path), basepath in basepaths.items(): |
@@ -1797,7 +1797,56 b' fixed.' | |||
|
1797 | 1797 | $ cat $LOGFILE | sort | uniq -c |
|
1798 | 1798 | 4 bar.log |
|
1799 | 1799 | 4 baz.log |
|
1800 |
|
|
|
1801 |
|
|
|
1800 | 3 foo.log | |
|
1801 | 2 qux.log | |
|
1802 | 1802 | |
|
1803 | 1803 | $ cd .. |
|
1804 | ||
|
1805 | For tools that support line ranges, it's wrong to blindly re-use fixed file | |
|
1806 | content for the same file revision if it appears twice with different baserevs, | |
|
1807 | because the line ranges could be different. Since computing line ranges is | |
|
1808 | ambiguous, this isn't a matter of correctness, but it affects the usability of | |
|
1809 | this extension. It could maybe be simpler if baserevs were computed on a | |
|
1810 | per-file basis to make this situation impossible to construct. | |
|
1811 | ||
|
1812 | In the following example, we construct two subgraphs with the same file | |
|
1813 | revisions, and fix different sub-subgraphs to get different baserevs and | |
|
1814 | different changed line ranges. The key precondition is that revisions 1 and 4 | |
|
1815 | have the same file revision, and the key result is that their successors don't | |
|
1816 | have the same file content, because we want to fix different areas of that same | |
|
1817 | file revision's content. | |
|
1818 | ||
|
1819 | $ hg init differentlineranges | |
|
1820 | $ cd differentlineranges | |
|
1821 | ||
|
1822 | $ printf "a\nb\n" > file.changed | |
|
1823 | $ hg commit -Aqm "0 ab" | |
|
1824 | $ printf "a\nx\n" > file.changed | |
|
1825 | $ hg commit -Aqm "1 ax" | |
|
1826 | $ hg remove file.changed | |
|
1827 | $ hg commit -Aqm "2 removed" | |
|
1828 | $ hg revert file.changed -r 0 | |
|
1829 | $ hg commit -Aqm "3 ab (reverted)" | |
|
1830 | $ hg revert file.changed -r 1 | |
|
1831 | $ hg commit -Aqm "4 ax (reverted)" | |
|
1832 | ||
|
1833 | $ hg manifest --debug --template "{hash}\n" -r 0; \ | |
|
1834 | > hg manifest --debug --template "{hash}\n" -r 3 | |
|
1835 | 418f692145676128d2fb518b027ddbac624be76e | |
|
1836 | 418f692145676128d2fb518b027ddbac624be76e | |
|
1837 | $ hg manifest --debug --template "{hash}\n" -r 1; \ | |
|
1838 | > hg manifest --debug --template "{hash}\n" -r 4 | |
|
1839 | 09b8b3ce5a507caaa282f7262679e6d04091426c | |
|
1840 | 09b8b3ce5a507caaa282f7262679e6d04091426c | |
|
1841 | ||
|
1842 | $ hg fix --working-dir -r 1+3+4 | |
|
1843 | 3 new orphan changesets | |
|
1844 | ||
|
1845 | $ hg cat file.changed -r "successors(1)" --hidden | |
|
1846 | a | |
|
1847 | X | |
|
1848 | $ hg cat file.changed -r "successors(4)" --hidden | |
|
1849 | A | |
|
1850 | X | |
|
1851 | ||
|
1852 | $ cd .. |
General Comments 0
You need to be logged in to leave comments.
Login now