# HG changeset patch # User Siddharth Agarwal # Date 2015-03-30 02:47:16 # Node ID e235b5dc5cf9b084a4ca5c3fb29e4ccc66fa9bab # Parent 25c1d3ca5ff666229ecff794f317c79943c8571b dirstate.walk: use the file foldmap to normalize Computing the set of directories in the dirstate is expensive. It turns out that it isn't necessary for operations like 'hg status' at all. Why? Consider the file 'foo/bar' on disk, which is represented in the dirstate as 'FOO/BAR'. On 'hg status', we'd walk down the directory tree, coming across 'foo' first. Before: we'd normalize 'foo' to 'FOO', then add 'FOO' to our visited stack. We'd then visit 'FOO', finding the file 'bar'. We'd normalize 'FOO/bar' to 'FOO/BAR', then add it to the results dict. After: we wouldn't normalize 'foo' at all. We'd add it to our visited stack, then visit 'foo', finding the file 'bar'. We'd normalize 'foo/bar' to 'FOO/BAR', then add it to the results dict. So whether we normalize intermediate directories or not actually makes no difference in most cases. The only case where normalization matters at all is if a file is replaced with a directory with the same case-folded name. In that case we can do a relatively cheap file normalization instead and still get away with not computing the set of directories. This is a nice boost in status performance. On OS X with case-insensitive HFS+, for a large repo with over 200,000 files, this brings down 'hg status' from 4.00 seconds to 3.62. diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py --- a/mercurial/dirstate.py +++ b/mercurial/dirstate.py @@ -744,10 +744,10 @@ class dirstate(object): skipstep3 = True if not exact and self._checkcase: - normalize = self._normalize + normalizefile = self._normalizefile skipstep3 = False else: - normalize = None + normalizefile = None # step 1: find all explicit files results, work, dirsnotfound = self._walkexplicit(match, subrepos) @@ -772,8 +772,11 @@ class dirstate(object): continue raise for f, kind, st in entries: - if normalize: - nf = normalize(nd and (nd + "/" + f) or f, True, True) + if normalizefile: + # even though f might be a directory, we're only interested + # in comparing it to files currently in the dmap -- + # therefore normalizefile is enough + nf = normalizefile(nd and (nd + "/" + f) or f, True, True) else: nf = nd and (nd + "/" + f) or f if nf not in results: