# HG changeset patch # User Augie Fackler # Date 2015-09-26 02:54:46 # Node ID 05871262acd591b2cf926823f97223aa2b5f4d44 # Parent e93e12e2ff9a72f5fca1a85f393a3943233fe4f8 treemanifest: rework lazy-copying code (issue4840) The old lazy-copy code formed a chain of copied manifests with each copy. Under typical operation, the stack never got more than a couple of manifests deep and was fine. Under conditions like hgsubversion or convert, the stack could get hundreds of manifests deep, and eventually overflow the recursion limit for Python. I was able to consistently reproduce this by converting an hgsubversion clone of svn's history to treemanifests. This may result in fewer manifests staying in memory during operations like convert when treemanifests are in use, and should make those operations faster since there will be significantly fewer noop function calls going on. A previous attempt (never mailed) of mine to fix this problem tried to simply have all treemanifests only have a loadfunc - that caused somewhat weird problems because the gettext() callable passed into read() wasn't idempotent, so the easy solution is to have a loadfunc and a copyfunc. diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -442,13 +442,14 @@ def _splittopdir(f): else: return '', f -_noop = lambda: None +_noop = lambda s: None class treemanifest(object): def __init__(self, dir='', text=''): self._dir = dir self._node = revlog.nullid - self._load = _noop + self._loadfunc = _noop + self._copyfunc = _noop self._dirty = False self._dirs = {} # Using _lazymanifest here is a little slower than plain old dicts @@ -479,7 +480,7 @@ class treemanifest(object): def __repr__(self): return ('' % (self._dir, revlog.hex(self._node), - bool(self._load is _noop), + bool(self._loadfunc is _noop), self._dirty, id(self))) def dir(self): @@ -598,6 +599,14 @@ class treemanifest(object): self._files[f] = n[:21] # to match manifestdict's behavior self._dirty = True + def _load(self): + if self._loadfunc is not _noop: + lf, self._loadfunc = self._loadfunc, _noop + lf(self) + elif self._copyfunc is not _noop: + cf, self._copyfunc = self._copyfunc, _noop + cf(self) + def setflag(self, f, flags): """Set the flags (symlink, executable) for path f.""" assert 'd' not in flags @@ -615,19 +624,19 @@ class treemanifest(object): copy = treemanifest(self._dir) copy._node = self._node copy._dirty = self._dirty - def _load_for_copy(): - self._load() - for d in self._dirs: - copy._dirs[d] = self._dirs[d].copy() - copy._files = dict.copy(self._files) - copy._flags = dict.copy(self._flags) - copy._load = _noop - copy._load = _load_for_copy - if self._load == _noop: - # Chaining _load if it's _noop is functionally correct, but the - # chain may end up excessively long (stack overflow), and - # will prevent garbage collection of 'self'. - copy._load() + if self._copyfunc is _noop: + def _copyfunc(s): + self._load() + for d in self._dirs: + s._dirs[d] = self._dirs[d].copy() + s._files = dict.copy(self._files) + s._flags = dict.copy(self._flags) + if self._loadfunc is _noop: + _copyfunc(copy) + else: + copy._copyfunc = _copyfunc + else: + copy._copyfunc = self._copyfunc return copy def filesnotin(self, m2): @@ -834,13 +843,10 @@ class treemanifest(object): return _text(sorted(dirs + files), usemanifestv2) def read(self, gettext, readsubtree): - def _load_for_read(): - # Mark as loaded already here, so __setitem__ and setflag() don't - # cause infinite loops when they try to load. - self._load = _noop - self.parse(gettext(), readsubtree) - self._dirty = False - self._load = _load_for_read + def _load_for_read(s): + s.parse(gettext(), readsubtree) + s._dirty = False + self._loadfunc = _load_for_read def writesubtrees(self, m1, m2, writesubtree): self._load() # for consistency; should never have any effect here