# HG changeset patch # User Petr Kodl # Date 2008-10-09 14:29:47 # Node ID 619ebf82cef2b661972bf5bfe05bf0ec3311e239 # Parent ceb8aef03aa7934f6cacd95d7e1b86ef3cd9e1da Take advantage of fstat calls clustering per directory if OS support it. util module implements two versions of statfiles function _statfiles calls lstat per file _statfiles_clustered takes advantage of optimizations in osutil.c, stats all files in directory at once when new directory is hit and caches the results util.statfiles dispatches to appropriate version during module loading The speedup on directory tree with 2k directories and 63k files is about factor of 1.8 (1.3s -> 0.8s for hg diff - hg startup overhead about .2s) At this point only Win32 now benefit from this patch. Rest of OSes use the non clustered implementation. diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py --- a/mercurial/dirstate.py +++ b/mercurial/dirstate.py @@ -522,17 +522,11 @@ class dirstate(object): results[nf] = None # step 3: report unseen items in the dmap hash - visit = [f for f in dmap if f not in results and match(f)] - for nf in util.sort(visit): - results[nf] = None - try: - st = lstat(join(nf)) - kind = getkind(st.st_mode) - if kind == regkind or kind == lnkkind: - results[nf] = st - except OSError, inst: - if inst.errno not in (errno.ENOENT, errno.ENOTDIR): - raise + visit = util.sort([f for f in dmap if f not in results and match(f)]) + for nf, st in zip(visit, util.statfiles([join(i) for i in visit])): + if not st is None and not getkind(st.st_mode) in (regkind, lnkkind): + st = None + results[nf] = st del results['.hg'] return results diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -826,6 +826,52 @@ def openhardlinks(): '''return true if it is safe to hold open file handles to hardlinks''' return True +def _statfiles(files): + 'Stat each file in files and yield stat or None if file does not exist.' + lstat = os.lstat + for nf in files: + try: + st = lstat(nf) + except OSError, err: + if err.errno not in (errno.ENOENT, errno.ENOTDIR): + raise + st = None + yield st + +def _statfiles_clustered(files): + '''Stat each file in files and yield stat or None if file does not exist. + Cluster and cache stat per directory to minimize number of OS stat calls.''' + lstat = os.lstat + ncase = os.path.normcase + sep = os.sep + dircache = {} # dirname -> filename -> status | None if file does not exist + for nf in files: + nf = ncase(nf) + pos = nf.rfind(sep) + if pos == -1: + dir, base = '.', nf + else: + dir, base = nf[:pos], nf[pos+1:] + cache = dircache.get(dir, None) + if cache is None: + try: + dmap = dict([(ncase(n), s) + for n, k, s in osutil.listdir(dir, True)]) + except OSError, err: + # handle directory not found in Python version prior to 2.5 + # Python <= 2.4 returns native Windows code 3 in errno + # Python >= 2.5 returns ENOENT and adds winerror field + if err.errno not in (3, errno.ENOENT, errno.ENOTDIR): + raise + dmap = {} + cache = dircache.setdefault(dir, dmap) + yield cache.get(base, None) + +if sys.platform == 'win32': + statfiles = _statfiles_clustered +else: + statfiles = _statfiles + getuser_fallback = None def getuser():