nodemap.py
204 lines
| 6.5 KiB
| text/x-python
|
PythonLexer
r44486 | # nodemap.py - nodemap related code and utilities | |||
# | ||||
# Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net> | ||||
# Copyright 2019 George Racinet <georges.racinet@octobus.net> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
from __future__ import absolute_import | ||||
r44788 | ||||
import struct | ||||
from .. import ( | ||||
error, | ||||
node as nodemod, | ||||
pycompat, | ||||
) | ||||
r44486 | ||||
class NodeMap(dict): | ||||
def __missing__(self, x): | ||||
raise error.RevlogError(b'unknown node: %s' % x) | ||||
r44788 | ||||
r44790 | def persisted_data(revlog): | |||
"""read the nodemap for a revlog from disk""" | ||||
if revlog.nodemap_file is None: | ||||
return None | ||||
return revlog.opener.tryread(revlog.nodemap_file) | ||||
r44789 | def setup_persistent_nodemap(tr, revlog): | |||
"""Install whatever is needed transaction side to persist a nodemap on disk | ||||
(only actually persist the nodemap if this is relevant for this revlog) | ||||
""" | ||||
r44791 | if revlog._inline: | |||
return # inlined revlog are too small for this to be relevant | ||||
r44789 | if revlog.nodemap_file is None: | |||
return # we do not use persistent_nodemap on this revlog | ||||
callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file | ||||
if tr.hasfinalize(callback_id): | ||||
return # no need to register again | ||||
tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog)) | ||||
def _persist_nodemap(tr, revlog): | ||||
"""Write nodemap data on disk for a given revlog | ||||
""" | ||||
if getattr(revlog, 'filteredrevs', ()): | ||||
raise error.ProgrammingError( | ||||
"cannot persist nodemap of a filtered changelog" | ||||
) | ||||
if revlog.nodemap_file is None: | ||||
msg = "calling persist nodemap on a revlog without the feature enableb" | ||||
raise error.ProgrammingError(msg) | ||||
data = persistent_data(revlog.index) | ||||
# EXP-TODO: if this is a cache, this should use a cache vfs, not a | ||||
# store vfs | ||||
with revlog.opener(revlog.nodemap_file, b'w') as f: | ||||
f.write(data) | ||||
# EXP-TODO: if the transaction abort, we should remove the new data and | ||||
# reinstall the old one. (This will be simpler when the file format get a | ||||
# bit more advanced) | ||||
r44788 | ### Nodemap Trie | |||
# | ||||
# This is a simple reference implementation to compute and persist a nodemap | ||||
# trie. This reference implementation is write only. The python version of this | ||||
# is not expected to be actually used, since it wont provide performance | ||||
# improvement over existing non-persistent C implementation. | ||||
# | ||||
# The nodemap is persisted as Trie using 4bits-address/16-entries block. each | ||||
# revision can be adressed using its node shortest prefix. | ||||
# | ||||
# The trie is stored as a sequence of block. Each block contains 16 entries | ||||
# (signed 64bit integer, big endian). Each entry can be one of the following: | ||||
# | ||||
# * value >= 0 -> index of sub-block | ||||
# * value == -1 -> no value | ||||
# * value < -1 -> a revision value: rev = -(value+10) | ||||
# | ||||
# The implementation focus on simplicity, not on performance. A Rust | ||||
# implementation should provide a efficient version of the same binary | ||||
# persistence. This reference python implementation is never meant to be | ||||
# extensively use in production. | ||||
def persistent_data(index): | ||||
"""return the persistent binary form for a nodemap for a given index | ||||
""" | ||||
trie = _build_trie(index) | ||||
return _persist_trie(trie) | ||||
S_BLOCK = struct.Struct(">" + ("l" * 16)) | ||||
NO_ENTRY = -1 | ||||
# rev 0 need to be -2 because 0 is used by block, -1 is a special value. | ||||
REV_OFFSET = 2 | ||||
def _transform_rev(rev): | ||||
"""Return the number used to represent the rev in the tree. | ||||
(or retrieve a rev number from such representation) | ||||
Note that this is an involution, a function equal to its inverse (i.e. | ||||
which gives the identity when applied to itself). | ||||
""" | ||||
return -(rev + REV_OFFSET) | ||||
def _to_int(hex_digit): | ||||
"""turn an hexadecimal digit into a proper integer""" | ||||
return int(hex_digit, 16) | ||||
def _build_trie(index): | ||||
"""build a nodemap trie | ||||
The nodemap stores revision number for each unique prefix. | ||||
Each block is a dictionary with keys in `[0, 15]`. Values are either | ||||
another block or a revision number. | ||||
""" | ||||
root = {} | ||||
for rev in range(len(index)): | ||||
hex = nodemod.hex(index[rev][7]) | ||||
_insert_into_block(index, 0, root, rev, hex) | ||||
return root | ||||
def _insert_into_block(index, level, block, current_rev, current_hex): | ||||
"""insert a new revision in a block | ||||
index: the index we are adding revision for | ||||
level: the depth of the current block in the trie | ||||
block: the block currently being considered | ||||
current_rev: the revision number we are adding | ||||
current_hex: the hexadecimal representation of the of that revision | ||||
""" | ||||
hex_digit = _to_int(current_hex[level : level + 1]) | ||||
entry = block.get(hex_digit) | ||||
if entry is None: | ||||
# no entry, simply store the revision number | ||||
block[hex_digit] = current_rev | ||||
elif isinstance(entry, dict): | ||||
# need to recurse to an underlying block | ||||
_insert_into_block(index, level + 1, entry, current_rev, current_hex) | ||||
else: | ||||
# collision with a previously unique prefix, inserting new | ||||
# vertices to fit both entry. | ||||
other_hex = nodemod.hex(index[entry][7]) | ||||
other_rev = entry | ||||
new = {} | ||||
block[hex_digit] = new | ||||
_insert_into_block(index, level + 1, new, other_rev, other_hex) | ||||
_insert_into_block(index, level + 1, new, current_rev, current_hex) | ||||
def _persist_trie(root): | ||||
"""turn a nodemap trie into persistent binary data | ||||
See `_build_trie` for nodemap trie structure""" | ||||
block_map = {} | ||||
chunks = [] | ||||
for tn in _walk_trie(root): | ||||
block_map[id(tn)] = len(chunks) | ||||
chunks.append(_persist_block(tn, block_map)) | ||||
return b''.join(chunks) | ||||
def _walk_trie(block): | ||||
"""yield all the block in a trie | ||||
Children blocks are always yield before their parent block. | ||||
""" | ||||
for (_, item) in sorted(block.items()): | ||||
if isinstance(item, dict): | ||||
for sub_block in _walk_trie(item): | ||||
yield sub_block | ||||
yield block | ||||
def _persist_block(block_node, block_map): | ||||
"""produce persistent binary data for a single block | ||||
Children block are assumed to be already persisted and present in | ||||
block_map. | ||||
""" | ||||
data = tuple(_to_value(block_node.get(i), block_map) for i in range(16)) | ||||
return S_BLOCK.pack(*data) | ||||
def _to_value(item, block_map): | ||||
"""persist any value as an integer""" | ||||
if item is None: | ||||
return NO_ENTRY | ||||
elif isinstance(item, dict): | ||||
return block_map[id(item)] | ||||
else: | ||||
return _transform_rev(item) | ||||