nodemap.rs
526 lines
| 15.4 KiB
| application/rls-services+xml
|
RustLexer
Georges Racinet
|
r44600 | // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> | ||
// and Mercurial contributors | ||||
// | ||||
// This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | ||||
//! Indexing facilities for fast retrieval of `Revision` from `Node` | ||||
//! | ||||
//! This provides a variation on the 16-ary radix tree that is | ||||
//! provided as "nodetree" in revlog.c, ready for append-only persistence | ||||
//! on disk. | ||||
//! | ||||
//! Following existing implicit conventions, the "nodemap" terminology | ||||
//! is used in a more abstract context. | ||||
Georges Racinet
|
r44644 | use super::{ | ||
Node, NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex, | ||||
}; | ||||
Georges Racinet
|
r44600 | use std::fmt; | ||
Georges Racinet
|
r44644 | use std::ops::Deref; | ||
Georges Racinet
|
r44645 | use std::ops::Index; | ||
Georges Racinet
|
r44644 | |||
#[derive(Debug, PartialEq)] | ||||
pub enum NodeMapError { | ||||
MultipleResults, | ||||
InvalidNodePrefix(NodeError), | ||||
/// A `Revision` stored in the nodemap could not be found in the index | ||||
RevisionNotInIndex(Revision), | ||||
} | ||||
impl From<NodeError> for NodeMapError { | ||||
fn from(err: NodeError) -> Self { | ||||
NodeMapError::InvalidNodePrefix(err) | ||||
} | ||||
} | ||||
/// Mapping system from Mercurial nodes to revision numbers. | ||||
/// | ||||
/// ## `RevlogIndex` and `NodeMap` | ||||
/// | ||||
/// One way to think about their relationship is that | ||||
/// the `NodeMap` is a prefix-oriented reverse index of the `Node` information | ||||
/// carried by a [`RevlogIndex`]. | ||||
/// | ||||
/// Many of the methods in this trait take a `RevlogIndex` argument | ||||
/// which is used for validation of their results. This index must naturally | ||||
/// be the one the `NodeMap` is about, and it must be consistent. | ||||
/// | ||||
/// Notably, the `NodeMap` must not store | ||||
/// information about more `Revision` values than there are in the index. | ||||
/// In these methods, an encountered `Revision` is not in the index, a | ||||
/// [`RevisionNotInIndex`] error is returned. | ||||
/// | ||||
/// In insert operations, the rule is thus that the `NodeMap` must always | ||||
/// be updated after the `RevlogIndex` | ||||
/// be updated first, and the `NodeMap` second. | ||||
/// | ||||
/// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex | ||||
/// [`RevlogIndex`]: ../trait.RevlogIndex.html | ||||
pub trait NodeMap { | ||||
/// Find the unique `Revision` having the given `Node` | ||||
/// | ||||
/// If no Revision matches the given `Node`, `Ok(None)` is returned. | ||||
fn find_node( | ||||
&self, | ||||
index: &impl RevlogIndex, | ||||
node: &Node, | ||||
) -> Result<Option<Revision>, NodeMapError> { | ||||
self.find_bin(index, node.into()) | ||||
} | ||||
/// Find the unique Revision whose `Node` starts with a given binary prefix | ||||
/// | ||||
/// If no Revision matches the given prefix, `Ok(None)` is returned. | ||||
/// | ||||
/// If several Revisions match the given prefix, a [`MultipleResults`] | ||||
/// error is returned. | ||||
fn find_bin<'a>( | ||||
&self, | ||||
idx: &impl RevlogIndex, | ||||
prefix: NodePrefixRef<'a>, | ||||
) -> Result<Option<Revision>, NodeMapError>; | ||||
/// Find the unique Revision whose `Node` hexadecimal string representation | ||||
/// starts with a given prefix | ||||
/// | ||||
/// If no Revision matches the given prefix, `Ok(None)` is returned. | ||||
/// | ||||
/// If several Revisions match the given prefix, a [`MultipleResults`] | ||||
/// error is returned. | ||||
fn find_hex( | ||||
&self, | ||||
idx: &impl RevlogIndex, | ||||
prefix: &str, | ||||
) -> Result<Option<Revision>, NodeMapError> { | ||||
self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow()) | ||||
} | ||||
} | ||||
Georges Racinet
|
r44600 | |||
/// Low level NodeTree [`Blocks`] elements | ||||
/// | ||||
/// These are exactly as for instance on persistent storage. | ||||
type RawElement = i32; | ||||
/// High level representation of values in NodeTree | ||||
/// [`Blocks`](struct.Block.html) | ||||
/// | ||||
/// This is the high level representation that most algorithms should | ||||
/// use. | ||||
#[derive(Clone, Debug, Eq, PartialEq)] | ||||
enum Element { | ||||
Rev(Revision), | ||||
Block(usize), | ||||
None, | ||||
} | ||||
impl From<RawElement> for Element { | ||||
/// Conversion from low level representation, after endianness conversion. | ||||
/// | ||||
/// See [`Block`](struct.Block.html) for explanation about the encoding. | ||||
fn from(raw: RawElement) -> Element { | ||||
if raw >= 0 { | ||||
Element::Block(raw as usize) | ||||
} else if raw == -1 { | ||||
Element::None | ||||
} else { | ||||
Element::Rev(-raw - 2) | ||||
} | ||||
} | ||||
} | ||||
impl From<Element> for RawElement { | ||||
fn from(element: Element) -> RawElement { | ||||
match element { | ||||
Element::None => 0, | ||||
Element::Block(i) => i as RawElement, | ||||
Element::Rev(rev) => -rev - 2, | ||||
} | ||||
} | ||||
} | ||||
/// A logical block of the `NodeTree`, packed with a fixed size. | ||||
/// | ||||
/// These are always used in container types implementing `Index<Block>`, | ||||
/// such as `&Block` | ||||
/// | ||||
/// As an array of integers, its ith element encodes that the | ||||
/// ith potential edge from the block, representing the ith hexadecimal digit | ||||
/// (nybble) `i` is either: | ||||
/// | ||||
/// - absent (value -1) | ||||
/// - another `Block` in the same indexable container (value ≥ 0) | ||||
/// - a `Revision` leaf (value ≤ -2) | ||||
/// | ||||
/// Endianness has to be fixed for consistency on shared storage across | ||||
/// different architectures. | ||||
/// | ||||
/// A key difference with the C `nodetree` is that we need to be | ||||
/// able to represent the [`Block`] at index 0, hence -1 is the empty marker | ||||
/// rather than 0 and the `Revision` range upper limit of -2 instead of -1. | ||||
/// | ||||
/// Another related difference is that `NULL_REVISION` (-1) is not | ||||
/// represented at all, because we want an immutable empty nodetree | ||||
/// to be valid. | ||||
#[derive(Clone, PartialEq)] | ||||
pub struct Block([RawElement; 16]); | ||||
impl Block { | ||||
fn new() -> Self { | ||||
Block([-1; 16]) | ||||
} | ||||
fn get(&self, nybble: u8) -> Element { | ||||
Element::from(RawElement::from_be(self.0[nybble as usize])) | ||||
} | ||||
fn set(&mut self, nybble: u8, element: Element) { | ||||
self.0[nybble as usize] = RawElement::to_be(element.into()) | ||||
} | ||||
} | ||||
impl fmt::Debug for Block { | ||||
/// sparse representation for testing and debugging purposes | ||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
f.debug_map() | ||||
.entries((0..16).filter_map(|i| match self.get(i) { | ||||
Element::None => None, | ||||
element => Some((i, element)), | ||||
})) | ||||
.finish() | ||||
} | ||||
} | ||||
Georges Racinet
|
r44646 | /// A mutable 16-radix tree with the root block logically at the end | ||
/// | ||||
/// Because of the append only nature of our node trees, we need to | ||||
/// keep the original untouched and store new blocks separately. | ||||
/// | ||||
/// The mutable root `Block` is kept apart so that we don't have to rebump | ||||
/// it on each insertion. | ||||
Georges Racinet
|
r44644 | pub struct NodeTree { | ||
readonly: Box<dyn Deref<Target = [Block]> + Send>, | ||||
Georges Racinet
|
r44646 | growable: Vec<Block>, | ||
root: Block, | ||||
Georges Racinet
|
r44644 | } | ||
Georges Racinet
|
r44645 | impl Index<usize> for NodeTree { | ||
type Output = Block; | ||||
fn index(&self, i: usize) -> &Block { | ||||
Georges Racinet
|
r44646 | let ro_len = self.readonly.len(); | ||
if i < ro_len { | ||||
&self.readonly[i] | ||||
} else if i == ro_len + self.growable.len() { | ||||
&self.root | ||||
} else { | ||||
&self.growable[i - ro_len] | ||||
} | ||||
Georges Racinet
|
r44645 | } | ||
} | ||||
Georges Racinet
|
r44644 | /// Return `None` unless the `Node` for `rev` has given prefix in `index`. | ||
fn has_prefix_or_none<'p>( | ||||
idx: &impl RevlogIndex, | ||||
prefix: NodePrefixRef<'p>, | ||||
rev: Revision, | ||||
) -> Result<Option<Revision>, NodeMapError> { | ||||
idx.node(rev) | ||||
.ok_or_else(|| NodeMapError::RevisionNotInIndex(rev)) | ||||
.map(|node| { | ||||
if prefix.is_prefix_of(node) { | ||||
Some(rev) | ||||
} else { | ||||
None | ||||
} | ||||
}) | ||||
} | ||||
impl NodeTree { | ||||
Georges Racinet
|
r44646 | /// Initiate a NodeTree from an immutable slice-like of `Block` | ||
/// | ||||
/// We keep `readonly` and clone its root block if it isn't empty. | ||||
fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self { | ||||
let root = readonly | ||||
.last() | ||||
.map(|b| b.clone()) | ||||
.unwrap_or_else(|| Block::new()); | ||||
NodeTree { | ||||
readonly: readonly, | ||||
growable: Vec::new(), | ||||
root: root, | ||||
} | ||||
Georges Racinet
|
r44645 | } | ||
Georges Racinet
|
r44646 | /// Total number of blocks | ||
fn len(&self) -> usize { | ||||
self.readonly.len() + self.growable.len() + 1 | ||||
} | ||||
/// Implemented for completeness | ||||
/// | ||||
/// A `NodeTree` always has at least the mutable root block. | ||||
#[allow(dead_code)] | ||||
Georges Racinet
|
r44645 | fn is_empty(&self) -> bool { | ||
Georges Racinet
|
r44646 | false | ||
Georges Racinet
|
r44645 | } | ||
Georges Racinet
|
r44644 | /// Main working method for `NodeTree` searches | ||
/// | ||||
/// This partial implementation lacks special cases for NULL_REVISION | ||||
fn lookup<'p>( | ||||
&self, | ||||
prefix: NodePrefixRef<'p>, | ||||
) -> Result<Option<Revision>, NodeMapError> { | ||||
Georges Racinet
|
r44647 | for visit_item in self.visit(prefix) { | ||
if let Some(opt) = visit_item.final_revision() { | ||||
return Ok(opt); | ||||
Georges Racinet
|
r44644 | } | ||
} | ||||
Err(NodeMapError::MultipleResults) | ||||
} | ||||
Georges Racinet
|
r44647 | |||
fn visit<'n, 'p>( | ||||
&'n self, | ||||
prefix: NodePrefixRef<'p>, | ||||
) -> NodeTreeVisitor<'n, 'p> { | ||||
NodeTreeVisitor { | ||||
nt: self, | ||||
prefix: prefix, | ||||
visit: self.len() - 1, | ||||
nybble_idx: 0, | ||||
done: false, | ||||
} | ||||
} | ||||
} | ||||
struct NodeTreeVisitor<'n, 'p> { | ||||
nt: &'n NodeTree, | ||||
prefix: NodePrefixRef<'p>, | ||||
visit: usize, | ||||
nybble_idx: usize, | ||||
done: bool, | ||||
} | ||||
#[derive(Debug, PartialEq, Clone)] | ||||
struct NodeTreeVisitItem { | ||||
block_idx: usize, | ||||
nybble: u8, | ||||
element: Element, | ||||
} | ||||
impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> { | ||||
type Item = NodeTreeVisitItem; | ||||
fn next(&mut self) -> Option<Self::Item> { | ||||
if self.done || self.nybble_idx >= self.prefix.len() { | ||||
return None; | ||||
} | ||||
let nybble = self.prefix.get_nybble(self.nybble_idx); | ||||
self.nybble_idx += 1; | ||||
let visit = self.visit; | ||||
let element = self.nt[visit].get(nybble); | ||||
if let Element::Block(idx) = element { | ||||
self.visit = idx; | ||||
} else { | ||||
self.done = true; | ||||
} | ||||
Some(NodeTreeVisitItem { | ||||
block_idx: visit, | ||||
nybble: nybble, | ||||
element: element, | ||||
}) | ||||
} | ||||
} | ||||
impl NodeTreeVisitItem { | ||||
// Return `Some(opt)` if this item is final, with `opt` being the | ||||
// `Revision` that it may represent. | ||||
// | ||||
// If the item is not terminal, return `None` | ||||
fn final_revision(&self) -> Option<Option<Revision>> { | ||||
match self.element { | ||||
Element::Block(_) => None, | ||||
Element::Rev(r) => Some(Some(r)), | ||||
Element::None => Some(None), | ||||
} | ||||
} | ||||
Georges Racinet
|
r44644 | } | ||
impl From<Vec<Block>> for NodeTree { | ||||
fn from(vec: Vec<Block>) -> Self { | ||||
Georges Racinet
|
r44646 | Self::new(Box::new(vec)) | ||
Georges Racinet
|
r44644 | } | ||
} | ||||
impl fmt::Debug for NodeTree { | ||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
Georges Racinet
|
r44646 | let readonly: &[Block] = &*self.readonly; | ||
write!( | ||||
f, | ||||
"readonly: {:?}, growable: {:?}, root: {:?}", | ||||
readonly, self.growable, self.root | ||||
) | ||||
Georges Racinet
|
r44644 | } | ||
} | ||||
impl NodeMap for NodeTree { | ||||
fn find_bin<'a>( | ||||
&self, | ||||
idx: &impl RevlogIndex, | ||||
prefix: NodePrefixRef<'a>, | ||||
) -> Result<Option<Revision>, NodeMapError> { | ||||
self.lookup(prefix.clone()).and_then(|opt| { | ||||
opt.map_or(Ok(None), |rev| has_prefix_or_none(idx, prefix, rev)) | ||||
}) | ||||
} | ||||
} | ||||
Georges Racinet
|
r44600 | #[cfg(test)] | ||
mod tests { | ||||
Georges Racinet
|
r44644 | use super::NodeMapError::*; | ||
Georges Racinet
|
r44600 | use super::*; | ||
Georges Racinet
|
r44644 | use crate::revlog::node::{hex_pad_right, Node}; | ||
use std::collections::HashMap; | ||||
Georges Racinet
|
r44600 | |||
/// Creates a `Block` using a syntax close to the `Debug` output | ||||
macro_rules! block { | ||||
{$($nybble:tt : $variant:ident($val:tt)),*} => ( | ||||
{ | ||||
let mut block = Block::new(); | ||||
$(block.set($nybble, Element::$variant($val)));*; | ||||
block | ||||
} | ||||
) | ||||
} | ||||
#[test] | ||||
fn test_block_debug() { | ||||
let mut block = Block::new(); | ||||
block.set(1, Element::Rev(3)); | ||||
block.set(10, Element::Block(0)); | ||||
assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}"); | ||||
} | ||||
#[test] | ||||
fn test_block_macro() { | ||||
let block = block! {5: Block(2)}; | ||||
assert_eq!(format!("{:?}", block), "{5: Block(2)}"); | ||||
let block = block! {13: Rev(15), 5: Block(2)}; | ||||
assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}"); | ||||
} | ||||
#[test] | ||||
fn test_raw_block() { | ||||
let mut raw = [-1; 16]; | ||||
raw[0] = 0; | ||||
raw[1] = RawElement::to_be(15); | ||||
raw[2] = RawElement::to_be(-2); | ||||
raw[3] = RawElement::to_be(-1); | ||||
raw[4] = RawElement::to_be(-3); | ||||
let block = Block(raw); | ||||
assert_eq!(block.get(0), Element::Block(0)); | ||||
assert_eq!(block.get(1), Element::Block(15)); | ||||
assert_eq!(block.get(3), Element::None); | ||||
assert_eq!(block.get(2), Element::Rev(0)); | ||||
assert_eq!(block.get(4), Element::Rev(1)); | ||||
} | ||||
Georges Racinet
|
r44644 | |||
type TestIndex = HashMap<Revision, Node>; | ||||
impl RevlogIndex for TestIndex { | ||||
fn node(&self, rev: Revision) -> Option<&Node> { | ||||
self.get(&rev) | ||||
} | ||||
fn len(&self) -> usize { | ||||
self.len() | ||||
} | ||||
} | ||||
/// Pad hexadecimal Node prefix with zeros on the right, then insert | ||||
/// | ||||
/// This avoids having to repeatedly write very long hexadecimal | ||||
/// strings for test data, and brings actual hash size independency. | ||||
fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) { | ||||
idx.insert(rev, Node::from_hex(&hex_pad_right(hex)).unwrap()); | ||||
} | ||||
fn sample_nodetree() -> NodeTree { | ||||
NodeTree::from(vec![ | ||||
block![0: Rev(9)], | ||||
block![0: Rev(0), 1: Rev(9)], | ||||
block![0: Block(1), 1:Rev(1)], | ||||
]) | ||||
} | ||||
#[test] | ||||
fn test_nt_debug() { | ||||
let nt = sample_nodetree(); | ||||
assert_eq!( | ||||
format!("{:?}", nt), | ||||
"readonly: \ | ||||
Georges Racinet
|
r44646 | [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \ | ||
growable: [], \ | ||||
root: {0: Block(1), 1: Rev(1)}", | ||||
Georges Racinet
|
r44644 | ); | ||
} | ||||
#[test] | ||||
fn test_immutable_find_simplest() -> Result<(), NodeMapError> { | ||||
let mut idx: TestIndex = HashMap::new(); | ||||
pad_insert(&mut idx, 1, "1234deadcafe"); | ||||
Georges Racinet
|
r44646 | let nt = NodeTree::from(vec![block! {1: Rev(1)}]); | ||
Georges Racinet
|
r44644 | assert_eq!(nt.find_hex(&idx, "1")?, Some(1)); | ||
assert_eq!(nt.find_hex(&idx, "12")?, Some(1)); | ||||
assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1)); | ||||
assert_eq!(nt.find_hex(&idx, "1a")?, None); | ||||
assert_eq!(nt.find_hex(&idx, "ab")?, None); | ||||
// and with full binary Nodes | ||||
assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1)); | ||||
let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap(); | ||||
assert_eq!(nt.find_node(&idx, &unknown)?, None); | ||||
Ok(()) | ||||
} | ||||
#[test] | ||||
fn test_immutable_find_one_jump() { | ||||
let mut idx = TestIndex::new(); | ||||
pad_insert(&mut idx, 9, "012"); | ||||
pad_insert(&mut idx, 0, "00a"); | ||||
let nt = sample_nodetree(); | ||||
assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults)); | ||||
assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9))); | ||||
assert_eq!(nt.find_hex(&idx, "00"), Ok(Some(0))); | ||||
assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0))); | ||||
} | ||||
Georges Racinet
|
r44646 | |||
#[test] | ||||
fn test_mutated_find() -> Result<(), NodeMapError> { | ||||
let mut idx = TestIndex::new(); | ||||
pad_insert(&mut idx, 9, "012"); | ||||
pad_insert(&mut idx, 0, "00a"); | ||||
pad_insert(&mut idx, 2, "cafe"); | ||||
pad_insert(&mut idx, 3, "15"); | ||||
pad_insert(&mut idx, 1, "10"); | ||||
let nt = NodeTree { | ||||
readonly: sample_nodetree().readonly, | ||||
growable: vec![block![0: Rev(1), 5: Rev(3)]], | ||||
root: block![0: Block(1), 1:Block(3), 12: Rev(2)], | ||||
}; | ||||
assert_eq!(nt.find_hex(&idx, "10")?, Some(1)); | ||||
assert_eq!(nt.find_hex(&idx, "c")?, Some(2)); | ||||
assert_eq!(nt.find_hex(&idx, "00")?, Some(0)); | ||||
assert_eq!(nt.find_hex(&idx, "01")?, Some(9)); | ||||
Ok(()) | ||||
} | ||||
Georges Racinet
|
r44600 | } | ||