# HG changeset patch # User Pierre-Yves David # Date 2020-01-15 14:47:59 # Node ID 563dfdfd01a4e050b5ea01796843cf2fd2d33ef9 # Parent 2b72c4ff8ed11cad198bcd60c46a05d94292ce0b nodemap: delete older raw data file when creating a new ones When we write new full files, it replace an older one with a different name. We add the associated cleanup for the older file to be removed after the transaction. We delete all file matching the expected pattern to give use extra chance to delete orphan files we might have failed to delete earlier. Note: eventually we won't rewrite all data for each transaction. This is coming in later changesets. Differential Revision: https://phab.mercurial-scm.org/D7839 diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -9,6 +9,7 @@ from __future__ import absolute_import import os +import re import struct from .. import ( @@ -71,6 +72,16 @@ def _persist_nodemap(tr, revlog): data = persistent_data(revlog.index) uid = _make_uid() datafile = _rawdata_filepath(revlog, uid) + olds = _other_rawdata_filepath(revlog, uid) + if olds: + realvfs = getattr(revlog, '_realopener', revlog.opener) + + def cleanup(tr): + for oldfile in olds: + realvfs.tryunlink(oldfile) + + callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file + tr.addpostclose(callback_id, cleanup) # EXP-TODO: if this is a cache, this should use a cache vfs, not a # store vfs with revlog.opener(datafile, b'w') as fd: @@ -136,6 +147,19 @@ def _rawdata_filepath(revlog, uid): return b"%s-%s.nd" % (prefix, uid) +def _other_rawdata_filepath(revlog, uid): + prefix = revlog.nodemap_file[:-2] + pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix) + new_file_path = _rawdata_filepath(revlog, uid) + new_file_name = revlog.opener.basename(new_file_path) + dirpath = revlog.opener.dirname(new_file_path) + others = [] + for f in revlog.opener.listdir(dirpath): + if pattern.match(f) and f != new_file_name: + others.append(f) + return others + + ### Nodemap Trie # # This is a simple reference implementation to compute and persist a nodemap diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -12,6 +12,8 @@ Test the persistent on-disk nodemap $ hg debugbuilddag .+5000 $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + $ f --sha256 .hg/store/00changelog-*.nd + .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob) $ hg debugnodemap --dump-new | f --sha256 --size size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size @@ -32,3 +34,15 @@ Test the persistent on-disk nodemap 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + +add a new commit + + $ hg up + 0 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ echo foo > foo + $ hg add foo + $ hg ci -m 'foo' + $ f --size .hg/store/00changelog.n + .hg/store/00changelog.n: size=18 + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)