# HG changeset patch # User Augie Fackler # Date 2015-12-04 15:34:58 # Node ID c08814b48ae52b69dc032318c95ce50fa6a5e791 # Parent b0d90fef16b616632bcc4d96eef0a4e778eb530b changegroup: avoid iterating the whole manifest The old code gathered the list of all files that changed anywhere in history and then gathered changed file nodes by walking the entirety of each manifest to be sent in order to gather changed file nodes. That's going to be unfortunate for narrowhg, and it's already inefficient for medium-to-large repositories. Timings for bundle --all on my hg repo, tested with hgperf: Before: ! wall 23.442445 comb 23.440000 user 23.250000 sys 0.190000 (best of 3) After: ! wall 20.272187 comb 20.270000 user 20.190000 sys 0.080000 (best of 3) diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -613,7 +613,8 @@ class cg1packer(object): clrevorder = {} mfs = {} # needed manifests fnodes = {} # needed file nodes - changedfiles = set() + # maps manifest node id -> set(changed files) + mfchangedfiles = {} # Callback for the changelog, used to collect changed files and manifest # nodes. @@ -621,9 +622,12 @@ class cg1packer(object): def lookupcl(x): c = cl.read(x) clrevorder[x] = len(clrevorder) - changedfiles.update(c[3]) + n = c[0] # record the first changeset introducing this manifest version - mfs.setdefault(c[0], x) + mfs.setdefault(n, x) + # Record a complete list of potentially-changed files in + # this manifest. + mfchangedfiles.setdefault(n, set()).update(c[3]) return x self._verbosenote(_('uncompressed size of bundle content:\n')) @@ -668,8 +672,12 @@ class cg1packer(object): clnode = mfs[x] if not fastpathlinkrev: mdata = ml.readfast(x) - for f, n in mdata.iteritems(): - if f in changedfiles: + for f in mfchangedfiles[x]: + if True: + try: + n = mdata[f] + except KeyError: + continue # record the first changeset introducing this filelog # version fclnodes = fnodes.setdefault(f, {}) @@ -696,6 +704,9 @@ class cg1packer(object): return dict(genfilenodes()) return fnodes.get(fname, {}) + changedfiles = set() + for x in mfchangedfiles.itervalues(): + changedfiles.update(x) for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, source): yield chunk