upstream/mercurial-mirror Commit - r48992:f12a19d0

fix: reduce number of tool executions...

Danny Hooper -

r48992:f12a19d0 default

parent child

hgext/fix.py

0 +63 -32

@@ -284,20 +284,29 b' def fix(ui, repo, pats, *opts):'
284	# There are no data dependencies between the workers fixing each file	284	# There are no data dependencies between the workers fixing each file
285	# revision, so we can use all available parallelism.	285	# revision, so we can use all available parallelism.
286	def getfixes(items):	286	def getfixes(items):
287	for rev, path in items:	287	for srcrev, path, dstrevs in items:
288	ctx = repo[rev]	288	ctx = repo[srcrev]
289	olddata = ctx[path].data()	289	olddata = ctx[path].data()
290	metadata, newdata = fixfile(	290	metadata, newdata = fixfile(
291	ui, repo, opts, fixers, ctx, path, basepaths, basectxs[rev]	291	ui,
		292	repo,
		293	opts,
		294	fixers,
		295	ctx,
		296	path,
		297	basepaths,
		298	basectxs[srcrev],
292	)	299	)
293	# Don't waste memory/time passing unchanged content back, but	300	# We ungroup the work items now, because the code that consumes
294	# produce one result per item either way.	301	# these results has to handle each dstrev separately, and in
295	yield (	302	# topological order. Because these are handled in topological
296	rev,	303	# order, it's important that we pass around references to
297	path,	304	# "newdata" instead of copying it. Otherwise, we would be
298	metadata,	305	# keeping more copies of file content in memory at a time than
299	newdata if newdata != olddata else None,	306	# if we hadn't bothered to group/deduplicate the work items.
300	)	307	data = newdata if newdata != olddata else None
		308	for dstrev in dstrevs:
		309	yield (dstrev, path, metadata, data)
301		310
302	results = worker.worker(	311	results = worker.worker(
303	ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False	312	ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False
@@ -377,23 +386,32 b' def cleanup(repo, replacements, wdirwrit'
377		386
378		387
379	def getworkqueue(ui, repo, pats, opts, revstofix, basectxs):	388	def getworkqueue(ui, repo, pats, opts, revstofix, basectxs):
380	"""Constructs ~~the~~ list of files to ~~be fixed at specific~~ revisions	389	"""Constructs a list of files to fix and which revisions each fix applies to
381		390
382	It is up to the caller how to consume the work items, and the only	391	To avoid duplicating work, there is usually only one work item for each file
383	dependence between them is that replacement revisions must be committed in	392	revision that might need to be fixed. There can be multiple work items per
384	topological order. Each work item represents a file in the working copy or	393	file revision if the same file needs to be fixed in multiple changesets with
385	in some revision that should be fixed and written back to the working copy	394	different baserevs. Each work item also contains a list of changesets where
386	or into a replacement revision.	395	the file's data should be replaced with the fixed data. The work items for
		396	earlier changesets come earlier in the work queue, to improve pipelining by
		397	allowing the first changeset to be replaced while fixes are still being
		398	computed for later changesets.
387		399
388	Work items for the same revision are grouped together, so that a worker	400	Also returned is a map from changesets to the count of work items that might
389	pool starting with the first N items in parallel is likely to finish the	401	affect each changeset. This is used later to count when all of a changeset's
390	first revision's work before other revisions. This can allow us to write	402	work items have been finished, without having to inspect the remaining work
391	the result to disk and reduce memory footprint. At time of writing, the	403	queue in each worker subprocess.
392	partition strategy in worker.py seems favorable to this. We also sort the	404
393	items by ascending revision number to match the order in which we commit	405	The example work item (1, "foo/bar.txt", (1, 2, 3)) means that the data of
394	the fixes later.	406	bar.txt should be read from revision 1, then fixed, and written back to
		407	revisions 1, 2 and 3. Revision 1 is called the "srcrev" and the list of
		408	revisions is called the "dstrevs". In practice the srcrev is always one of
		409	the dstrevs, and we make that choice when constructing the work item so that
		410	the choice can't be made inconsistently later on. The dstrevs should all
		411	have the same file revision for the given path, so the choice of srcrev is
		412	arbitrary. The wdirrev can be a dstrev and a srcrev.
395	"""	413	"""
396	workqueue = []	414	dstrevmap = collections.defaultdict(list)
397	numitems = collections.defaultdict(int)	415	numitems = collections.defaultdict(int)
398	maxfilesize = ui.configbytes(b'fix', b'maxfilesize')	416	maxfilesize = ui.configbytes(b'fix', b'maxfilesize')
399	for rev in sorted(revstofix):	417	for rev in sorted(revstofix):
@@ -411,8 +429,21 b' def getworkqueue(ui, repo, pats, opts, r'
411	% (util.bytecount(maxfilesize), path)	429	% (util.bytecount(maxfilesize), path)
412	)	430	)
413	continue	431	continue
414	workqueue.append((rev, path))	432	baserevs = tuple(ctx.rev() for ctx in basectxs[rev])
		433	dstrevmap[(fctx.filerev(), baserevs, path)].append(rev)
415	numitems[rev] += 1	434	numitems[rev] += 1
		435	workqueue = [
		436	(min(dstrevs), path, dstrevs)
		437	for (filerev, baserevs, path), dstrevs in dstrevmap.items()
		438	]
		439	# Move work items for earlier changesets to the front of the queue, so we
		440	# might be able to replace those changesets (in topological order) while
		441	# we're still processing later work items. Note the min() in the previous
		442	# expression, which means we don't need a custom comparator here. The path
		443	# is also important in the sort order to make the output order stable. There
		444	# are some situations where this doesn't help much, but some situations
		445	# where it lets us buffer O(1) files instead of O(n) files.
		446	workqueue.sort()
416	return workqueue, numitems	447	return workqueue, numitems
417		448
418		449
@@ -517,9 +548,9 b' def getbasepaths(repo, opts, workqueue, '
517	return {}	548	return {}
518		549
519	basepaths = {}	550	basepaths = {}
520	for rev, path in workqueue:	551	for srcrev, path, _dstrevs in workqueue:
521	fixctx = repo[rev]	552	fixctx = repo[srcrev]
522	for basectx in basectxs[rev]:	553	for basectx in basectxs[srcrev]:
523	basepath = copies.pathcopies(basectx, fixctx).get(path, path)	554	basepath = copies.pathcopies(basectx, fixctx).get(path, path)
524	if basepath in basectx:	555	if basepath in basectx:
525	basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath	556	basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath
@@ -642,10 +673,10 b' def _prefetchfiles(repo, workqueue, base'
642	toprefetch = set()	673	toprefetch = set()
643		674
644	# Prefetch the files that will be fixed.	675	# Prefetch the files that will be fixed.
645	for rev, path in workqueue:	676	for srcrev, path, _dstrevs in workqueue:
646	if rev == wdirrev:	677	if srcrev == wdirrev:
647	continue	678	continue
648	toprefetch.add((rev, path))	679	toprefetch.add((srcrev, path))
649		680
650	# Prefetch the base contents for lineranges().	681	# Prefetch the base contents for lineranges().
651	for (baserev, fixrev, path), basepath in basepaths.items():	682	for (baserev, fixrev, path), basepath in basepaths.items():

tests/test-fix.t

0 +51 -2

@@ -1797,7 +1797,56 b' fixed.'
1797	$ cat $LOGFILE \| sort \| uniq -c	1797	$ cat $LOGFILE \| sort \| uniq -c
1798	4 bar.log	1798	4 bar.log
1799	4 baz.log	1799	4 baz.log
1800	4 foo.log	1800	3 foo.log
1801	4 qux.log	1801	2 qux.log
1802		1802
1803	$ cd ..	1803	$ cd ..
		1804
		1805	For tools that support line ranges, it's wrong to blindly re-use fixed file
		1806	content for the same file revision if it appears twice with different baserevs,
		1807	because the line ranges could be different. Since computing line ranges is
		1808	ambiguous, this isn't a matter of correctness, but it affects the usability of
		1809	this extension. It could maybe be simpler if baserevs were computed on a
		1810	per-file basis to make this situation impossible to construct.
		1811
		1812	In the following example, we construct two subgraphs with the same file
		1813	revisions, and fix different sub-subgraphs to get different baserevs and
		1814	different changed line ranges. The key precondition is that revisions 1 and 4
		1815	have the same file revision, and the key result is that their successors don't
		1816	have the same file content, because we want to fix different areas of that same
		1817	file revision's content.
		1818
		1819	$ hg init differentlineranges
		1820	$ cd differentlineranges
		1821
		1822	$ printf "a\nb\n" > file.changed
		1823	$ hg commit -Aqm "0 ab"
		1824	$ printf "a\nx\n" > file.changed
		1825	$ hg commit -Aqm "1 ax"
		1826	$ hg remove file.changed
		1827	$ hg commit -Aqm "2 removed"
		1828	$ hg revert file.changed -r 0
		1829	$ hg commit -Aqm "3 ab (reverted)"
		1830	$ hg revert file.changed -r 1
		1831	$ hg commit -Aqm "4 ax (reverted)"
		1832
		1833	$ hg manifest --debug --template "{hash}\n" -r 0; \
		1834	> hg manifest --debug --template "{hash}\n" -r 3
		1835	418f692145676128d2fb518b027ddbac624be76e
		1836	418f692145676128d2fb518b027ddbac624be76e
		1837	$ hg manifest --debug --template "{hash}\n" -r 1; \
		1838	> hg manifest --debug --template "{hash}\n" -r 4
		1839	09b8b3ce5a507caaa282f7262679e6d04091426c
		1840	09b8b3ce5a507caaa282f7262679e6d04091426c
		1841
		1842	$ hg fix --working-dir -r 1+3+4
		1843	3 new orphan changesets
		1844
		1845	$ hg cat file.changed -r "successors(1)" --hidden
		1846	a
		1847	X
		1848	$ hg cat file.changed -r "successors(4)" --hidden
		1849	A
		1850	X
		1851
		1852	$ cd ..

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages