# HG changeset patch # User Joerg Sonnenberger # Date 2020-10-18 20:18:02 # Node ID a5206e71c536816869704182994120bbda1d80da # Parent 225e513c444ef742c16e7436936ed8854e9d23cf revlog: extend addgroup() with callback for duplicates The addgroup() interface currently doesn't allow the caller to keep track of duplicated nodes except by looking at the returned node list. Add an optional second callback for this purpose and change the return type to a boolean. This allows follow-up changes to use more efficient storage for the node list in places that are memory-sensitive. Differential Revision: https://phab.mercurial-scm.org/D9231 diff --git a/hgext/sqlitestore.py b/hgext/sqlitestore.py --- a/hgext/sqlitestore.py +++ b/hgext/sqlitestore.py @@ -674,9 +674,10 @@ class sqlitefilestore(object): linkmapper, transaction, addrevisioncb=None, + duplicaterevisioncb=None, maybemissingparents=False, ): - nodes = [] + empty = True for node, p1, p2, linknode, deltabase, delta, wireflags in deltas: storeflags = 0 @@ -715,8 +716,6 @@ class sqlitefilestore(object): linkrev = linkmapper(linknode) - nodes.append(node) - if node in self._revisions: # Possibly reset parents to make them proper. entry = self._revisions[node] @@ -741,6 +740,9 @@ class sqlitefilestore(object): (self._nodetorev[p1], entry.flags, entry.rid), ) + if duplicaterevisioncb: + duplicaterevisioncb(self, node) + empty = False continue if deltabase == nullid: @@ -763,8 +765,9 @@ class sqlitefilestore(object): if addrevisioncb: addrevisioncb(self, node) + empty = False - return nodes + return not empty def censorrevision(self, tr, censornode, tombstone=b''): tombstone = storageutil.packmeta({b'censored': tombstone}, b'') diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -316,20 +316,29 @@ class cg1unpacker(object): self.callback = progress.increment efilesset = set() + cgnodes = [] def onchangelog(cl, node): efilesset.update(cl.readfiles(node)) + cgnodes.append(node) + + def ondupchangelog(cl, node): + cgnodes.append(node) self.changelogheader() deltas = self.deltaiter() - cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog) - efiles = len(efilesset) - - if not cgnodes: + if not cl.addgroup( + deltas, + csmap, + trp, + addrevisioncb=onchangelog, + duplicaterevisioncb=ondupchangelog, + ): repo.ui.develwarn( b'applied empty changelog from changegroup', config=b'warn-empty-changegroup', ) + efiles = len(efilesset) clend = len(cl) changesets = clend - clstart progress.complete() diff --git a/mercurial/exchangev2.py b/mercurial/exchangev2.py --- a/mercurial/exchangev2.py +++ b/mercurial/exchangev2.py @@ -343,16 +343,21 @@ def _processchangesetdata(repo, tr, objs ) manifestnodes = {} + added = [] def linkrev(node): repo.ui.debug(b'add changeset %s\n' % short(node)) # Linkrev for changelog is always self. return len(cl) + def ondupchangeset(cl, node): + added.append(node) + def onchangeset(cl, node): progress.increment() revision = cl.changelogrevision(node) + added.append(node) # We need to preserve the mapping of changelog revision to node # so we can set the linkrev accordingly when manifests are added. @@ -403,8 +408,12 @@ def _processchangesetdata(repo, tr, objs 0, ) - added = cl.addgroup( - iterrevisions(), linkrev, weakref.proxy(tr), addrevisioncb=onchangeset + cl.addgroup( + iterrevisions(), + linkrev, + weakref.proxy(tr), + addrevisioncb=onchangeset, + duplicaterevisioncb=ondupchangeset, ) progress.complete() @@ -516,12 +525,15 @@ def _fetchmanifests(repo, tr, remote, ma # Chomp off header object. next(objs) - added.extend( - rootmanifest.addgroup( - iterrevisions(objs, progress), - linkrevs.__getitem__, - weakref.proxy(tr), - ) + def onchangeset(cl, node): + added.append(node) + + rootmanifest.addgroup( + iterrevisions(objs, progress), + linkrevs.__getitem__, + weakref.proxy(tr), + addrevisioncb=onchangeset, + duplicaterevisioncb=onchangeset, ) progress.complete() diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -139,6 +139,7 @@ class filelog(object): linkmapper, transaction, addrevisioncb=None, + duplicaterevisioncb=None, maybemissingparents=False, ): if maybemissingparents: @@ -150,7 +151,11 @@ class filelog(object): ) return self._revlog.addgroup( - deltas, linkmapper, transaction, addrevisioncb=addrevisioncb + deltas, + linkmapper, + transaction, + addrevisioncb=addrevisioncb, + duplicaterevisioncb=duplicaterevisioncb, ) def getstrippoint(self, minlink): diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py --- a/mercurial/interfaces/repository.py +++ b/mercurial/interfaces/repository.py @@ -756,6 +756,7 @@ class ifilemutation(interfaceutil.Interf linkmapper, transaction, addrevisioncb=None, + duplicaterevisioncb=None, maybemissingparents=False, ): """Process a series of deltas for storage. @@ -1247,7 +1248,13 @@ class imanifeststorage(interfaceutil.Int See the documentation for ``ifiledata`` for more. """ - def addgroup(deltas, linkmapper, transaction, addrevisioncb=None): + def addgroup( + deltas, + linkmapper, + transaction, + addrevisioncb=None, + duplicaterevisioncb=None, + ): """Process a series of deltas for storage. See the documentation in ``ifilemutation`` for more. diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -1832,9 +1832,20 @@ class manifestrevlog(object): deltamode=deltamode, ) - def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None): + def addgroup( + self, + deltas, + linkmapper, + transaction, + addrevisioncb=None, + duplicaterevisioncb=None, + ): return self._revlog.addgroup( - deltas, linkmapper, transaction, addrevisioncb=addrevisioncb + deltas, + linkmapper, + transaction, + addrevisioncb=addrevisioncb, + duplicaterevisioncb=duplicaterevisioncb, ) def rawsize(self, rev): diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -2368,7 +2368,14 @@ class revlog(object): self._enforceinlinesize(transaction, ifh) nodemaputil.setup_persistent_nodemap(transaction, self) - def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None): + def addgroup( + self, + deltas, + linkmapper, + transaction, + addrevisioncb=None, + duplicaterevisioncb=None, + ): """ add a delta group @@ -2383,8 +2390,6 @@ class revlog(object): if self._writinghandles: raise error.ProgrammingError(b'cannot nest addgroup() calls') - nodes = [] - r = len(self) end = 0 if r: @@ -2405,6 +2410,7 @@ class revlog(object): ifh.flush() self._writinghandles = (ifh, dfh) + empty = True try: deltacomputer = deltautil.deltacomputer(self) @@ -2414,11 +2420,12 @@ class revlog(object): link = linkmapper(linknode) flags = flags or REVIDX_DEFAULT_FLAGS - nodes.append(node) - if self.index.has_node(node): + # this can happen if two branches make the same change self._nodeduplicatecallback(transaction, node) - # this can happen if two branches make the same change + if duplicaterevisioncb: + duplicaterevisioncb(self, node) + empty = False continue for p in (p1, p2): @@ -2472,6 +2479,7 @@ class revlog(object): if addrevisioncb: addrevisioncb(self, node) + empty = False if not dfh and not self._inline: # addrevision switched from inline to conventional @@ -2486,8 +2494,7 @@ class revlog(object): if dfh: dfh.close() ifh.close() - - return nodes + return not empty def iscensored(self, rev): """Check if a file revision is censored.""" diff --git a/mercurial/testing/storage.py b/mercurial/testing/storage.py --- a/mercurial/testing/storage.py +++ b/mercurial/testing/storage.py @@ -1117,7 +1117,22 @@ class ifilemutationtests(basetestcase): return 0 with self._maketransactionfn() as tr: - nodes = f.addgroup([], None, tr, addrevisioncb=cb) + nodes = [] + + def onchangeset(cl, node): + nodes.append(node) + cb(cl, node) + + def ondupchangeset(cl, node): + nodes.append(node) + + f.addgroup( + [], + None, + tr, + addrevisioncb=onchangeset, + duplicaterevisioncb=ondupchangeset, + ) self.assertEqual(nodes, []) self.assertEqual(callbackargs, []) @@ -1136,7 +1151,22 @@ class ifilemutationtests(basetestcase): ] with self._maketransactionfn() as tr: - nodes = f.addgroup(deltas, linkmapper, tr, addrevisioncb=cb) + nodes = [] + + def onchangeset(cl, node): + nodes.append(node) + cb(cl, node) + + def ondupchangeset(cl, node): + nodes.append(node) + + f.addgroup( + deltas, + linkmapper, + tr, + addrevisioncb=onchangeset, + duplicaterevisioncb=ondupchangeset, + ) self.assertEqual( nodes, @@ -1175,7 +1205,19 @@ class ifilemutationtests(basetestcase): deltas.append((nodes[i], nullid, nullid, nullid, nullid, delta, 0)) with self._maketransactionfn() as tr: - self.assertEqual(f.addgroup(deltas, lambda x: 0, tr), nodes) + newnodes = [] + + def onchangeset(cl, node): + newnodes.append(node) + + f.addgroup( + deltas, + lambda x: 0, + tr, + addrevisioncb=onchangeset, + duplicaterevisioncb=onchangeset, + ) + self.assertEqual(newnodes, nodes) self.assertEqual(len(f), len(deltas)) self.assertEqual(list(f.revs()), [0, 1, 2]) diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py --- a/mercurial/unionrepo.py +++ b/mercurial/unionrepo.py @@ -129,6 +129,7 @@ class unionrevlog(revlog.revlog): linkmapper, transaction, addrevisioncb=None, + duplicaterevisioncb=None, maybemissingparents=False, ): raise NotImplementedError diff --git a/tests/simplestorerepo.py b/tests/simplestorerepo.py --- a/tests/simplestorerepo.py +++ b/tests/simplestorerepo.py @@ -532,6 +532,7 @@ class filestorage(object): linkmapper, transaction, addrevisioncb=None, + duplicaterevisioncb=None, maybemissingparents=False, ): if maybemissingparents: @@ -539,7 +540,7 @@ class filestorage(object): _('simple store does not support missing parents ' 'write mode') ) - nodes = [] + empty = True transaction.addbackup(self._indexpath) @@ -547,9 +548,10 @@ class filestorage(object): linkrev = linkmapper(linknode) flags = flags or revlog.REVIDX_DEFAULT_FLAGS - nodes.append(node) - if node in self._indexbynode: + if duplicaterevisioncb: + duplicaterevisioncb(self, node) + empty = False continue # Need to resolve the fulltext from the delta base. @@ -564,7 +566,8 @@ class filestorage(object): if addrevisioncb: addrevisioncb(self, node) - return nodes + empty = False + return not empty def _headrevs(self): # Assume all revisions are heads by default.