# HG changeset patch # User Pierre-Yves David # Date 2020-01-15 14:50:24 # Node ID 8374b69aef7527c9e70d31e59a9b9f2be3f04c79 # Parent e41a164db7a98a957d5e88f4bd8b0d5a443e1714 nodemap: track the total and unused amount of data in the rawdata file We need to keep that information around: * total data will allow transaction to start appending new information without confusing other reader. * unused data will allow to detect when we should regenerate new rawdata file. Differential Revision: https://phab.mercurial-scm.org/D7889 diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -2138,6 +2138,8 @@ def debugnodemap(ui, repo, **opts): docket, data = nm_data ui.write((b"uid: %s\n") % docket.uid) ui.write((b"tip-rev: %d\n") % docket.tip_rev) + ui.write((b"data-length: %d\n") % docket.data_length) + ui.write((b"data-unused: %d\n") % docket.data_unused) @command( diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -164,11 +164,11 @@ class PersistentNodeMapIndexObject(Index """ if self._nm_root is None: return None - data = nodemaputil.update_persistent_data( + changed, data = nodemaputil.update_persistent_data( self, self._nm_root, self._nm_max_idx, self._nm_rev ) self._nm_root = self._nm_max_idx = self._nm_rev = None - return data + return changed, data def update_nodemap_data(self, docket, nm_data): """provide full block of persisted binary data for a nodemap diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -37,10 +37,12 @@ def persisted_data(revlog): return None offset += S_VERSION.size headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size]) - uid_size, tip_rev = headers + uid_size, tip_rev, data_length, data_unused = headers offset += S_HEADER.size docket = NodeMapDocket(pdata[offset : offset + uid_size]) docket.tip_rev = tip_rev + docket.data_length = data_length + docket.data_unused = data_unused filename = _rawdata_filepath(revlog, docket) return docket, revlog.opener.tryread(filename) @@ -78,12 +80,14 @@ def _persist_nodemap(tr, revlog): # first attemp an incremental update of the data if can_incremental and ondisk_docket is not None: target_docket = revlog._nodemap_docket.copy() - data = revlog.index.nodemap_data_incremental() + data_changed_count, data = revlog.index.nodemap_data_incremental() datafile = _rawdata_filepath(revlog, target_docket) # EXP-TODO: if this is a cache, this should use a cache vfs, not a # store vfs with revlog.opener(datafile, b'a') as fd: fd.write(data) + target_docket.data_length += len(data) + target_docket.data_unused += data_changed_count else: # otherwise fallback to a full new export target_docket = NodeMapDocket() @@ -96,6 +100,7 @@ def _persist_nodemap(tr, revlog): # store vfs with revlog.opener(datafile, b'w') as fd: fd.write(data) + target_docket.data_length = len(data) target_docket.tip_rev = revlog.tiprev() # EXP-TODO: if this is a cache, this should use a cache vfs, not a # store vfs @@ -143,9 +148,8 @@ def _persist_nodemap(tr, revlog): # version 0 is experimental, no BC garantee, do no use outside of tests. ONDISK_VERSION = 0 - S_VERSION = struct.Struct(">B") -S_HEADER = struct.Struct(">BQ") +S_HEADER = struct.Struct(">BQQQ") ID_SIZE = 8 @@ -168,17 +172,26 @@ class NodeMapDocket(object): uid = _make_uid() self.uid = uid self.tip_rev = None + self.data_length = None + self.data_unused = 0 def copy(self): new = NodeMapDocket(uid=self.uid) new.tip_rev = self.tip_rev + new.data_length = self.data_length + new.data_unused = self.data_unused return new def serialize(self): """return serialized bytes for a docket using the passed uid""" data = [] data.append(S_VERSION.pack(ONDISK_VERSION)) - headers = (len(self.uid), self.tip_rev) + headers = ( + len(self.uid), + self.tip_rev, + self.data_length, + self.data_unused, + ) data.append(S_HEADER.pack(*headers)) data.append(self.uid) return b''.join(data) @@ -236,8 +249,11 @@ def persistent_data(index): def update_persistent_data(index, root, max_idx, last_rev): """return the incremental update for persistent nodemap from a given index """ - trie = _update_trie(index, root, last_rev) - return _persist_trie(trie, existing_idx=max_idx) + changed_block, trie = _update_trie(index, root, last_rev) + return ( + changed_block * S_BLOCK.size, + _persist_trie(trie, existing_idx=max_idx), + ) S_BLOCK = struct.Struct(">" + ("l" * 16)) @@ -294,10 +310,11 @@ def _build_trie(index): def _update_trie(index, root, last_rev): """consume""" + changed = 0 for rev in range(last_rev + 1, len(index)): hex = nodemod.hex(index[rev][7]) - _insert_into_block(index, 0, root, rev, hex) - return root + changed += _insert_into_block(index, 0, root, rev, hex) + return changed, root def _insert_into_block(index, level, block, current_rev, current_hex): @@ -309,6 +326,7 @@ def _insert_into_block(index, level, blo current_rev: the revision number we are adding current_hex: the hexadecimal representation of the of that revision """ + changed = 1 if block.ondisk_id is not None: block.ondisk_id = None hex_digit = _to_int(current_hex[level : level + 1]) @@ -318,7 +336,9 @@ def _insert_into_block(index, level, blo block[hex_digit] = current_rev elif isinstance(entry, dict): # need to recurse to an underlying block - _insert_into_block(index, level + 1, entry, current_rev, current_hex) + changed += _insert_into_block( + index, level + 1, entry, current_rev, current_hex + ) else: # collision with a previously unique prefix, inserting new # vertices to fit both entry. @@ -328,6 +348,7 @@ def _insert_into_block(index, level, blo block[hex_digit] = new _insert_into_block(index, level + 1, new, other_rev, other_hex) _insert_into_block(index, level + 1, new, current_rev, current_hex) + return changed def _persist_trie(root, existing_idx=None): diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -15,8 +15,10 @@ Test the persistent on-disk nodemap $ hg debugnodemap --metadata uid: ???????????????? (glob) tip-rev: 5000 + data-length: 122880 + data-unused: 0 $ f --size .hg/store/00changelog.n - .hg/store/00changelog.n: size=26 + .hg/store/00changelog.n: size=42 $ f --sha256 .hg/store/00changelog-*.nd .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob) $ hg debugnodemap --dump-new | f --sha256 --size @@ -50,11 +52,22 @@ add a new commit $ echo foo > foo $ hg add foo $ hg ci -m 'foo' + +#if pure $ hg debugnodemap --metadata uid: ???????????????? (glob) tip-rev: 5001 + data-length: 123072 + data-unused: 192 +#else + $ hg debugnodemap --metadata + uid: ???????????????? (glob) + tip-rev: 5001 + data-length: 122880 + data-unused: 0 +#endif $ f --size .hg/store/00changelog.n - .hg/store/00changelog.n: size=26 + .hg/store/00changelog.n: size=42 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)