upstream/mercurial-mirror Commit - r47161:18a261b1

rust: Remove hex parsing from the nodemap...

Simon Sapin -

r47161:18a261b1 default

parent child

rust/hg-core/examples/nodemap/main.rs

0 +1 -1

             // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use clap::*;
             use hg::revlog::node::*;
             use hg::revlog::nodemap::*;
             use hg::revlog::*;
             use memmap::MmapOptions;
             use rand::Rng;
             use std::fs::File;
             use std::io;
             use std::io::Write;
             use std::path::{Path, PathBuf};
             use std::str::FromStr;
             use std::time::Instant;
             mod index;
             use index::Index;
             fn mmap_index(repo_path: &Path) -> Index {
                 let mut path = PathBuf::from(repo_path);
                 path.extend([".hg", "store", "00changelog.i"].iter());
                 Index::load_mmap(path)
             }
             fn mmap_nodemap(path: &Path) -> NodeTree {
                 let file = File::open(path).unwrap();
                 let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
                 let len = mmap.len();
                 NodeTree::load_bytes(Box::new(mmap), len)
             }
             /// Scan the whole index and create the corresponding nodemap file at `path`
             fn create(index: &Index, path: &Path) -> io::Result<()> {
                 let mut file = File::create(path)?;
                 let start = Instant::now();
                 let mut nm = NodeTree::default();
                 for rev in 0..index.len() {
                     let rev = rev as Revision;
                     nm.insert(index, index.node(rev).unwrap(), rev).unwrap();
                 }
                 eprintln!("Nodemap constructed in RAM in {:?}", start.elapsed());
                 file.write(&nm.into_readonly_and_added_bytes().1)?;
                 eprintln!("Nodemap written to disk");
                 Ok(())
             }
             fn query(index: &Index, nm: &NodeTree, prefix: &str) {
                 let start = Instant::now();
-                let res = nm.find_hex(index, prefix);
+                let res = NodePrefix::from_hex(prefix).map(|p| nm.find_bin(index, p));
                 println!("Result found in {:?}: {:?}", start.elapsed(), res);
             }
             fn bench(index: &Index, nm: &NodeTree, queries: usize) {
                 let len = index.len() as u32;
                 let mut rng = rand::thread_rng();
                 let nodes: Vec<Node> = (0..queries)
                     .map(|_| {
                         index
                             .node((rng.gen::<u32>() % len) as Revision)
                             .unwrap()
                             .clone()
                     })
                     .collect();
                 if queries < 10 {
                     let nodes_hex: Vec<String> =
                         nodes.iter().map(|n| format!("{:x}", n)).collect();
                     println!("Nodes: {:?}", nodes_hex);
                 }
                 let mut last: Option<Revision> = None;
                 let start = Instant::now();
                 for node in nodes.iter() {
                     last = nm.find_bin(index, node.into()).unwrap();
                 }
                 let elapsed = start.elapsed();
                 println!(
                     "Did {} queries in {:?} (mean {:?}), last was {:x} with result {:?}",
                     queries,
                     elapsed,
                     elapsed / (queries as u32),
                     nodes.last().unwrap(),
                     last
                 );
             }
             fn main() {
                 let matches = App::new("Nodemap pure Rust example")
                     .arg(
                         Arg::with_name("REPOSITORY")
                             .help("Path to the repository, always necessary for its index")
                             .required(true),
                     )
                     .arg(
                         Arg::with_name("NODEMAP_FILE")
                             .help("Path to the nodemap file, independent of REPOSITORY")
                             .required(true),
                     )
                     .subcommand(
                         SubCommand::with_name("create")
                             .about("Create NODEMAP_FILE by scanning repository index"),
                     )
                     .subcommand(
                         SubCommand::with_name("query")
                             .about("Query NODEMAP_FILE for PREFIX")
                             .arg(Arg::with_name("PREFIX").required(true)),
                     )
                     .subcommand(
                         SubCommand::with_name("bench")
                             .about(
                                 "Perform #QUERIES random successful queries on NODEMAP_FILE")
                             .arg(Arg::with_name("QUERIES").required(true)),
                     )
                     .get_matches();
                 let repo = matches.value_of("REPOSITORY").unwrap();
                 let nm_path = matches.value_of("NODEMAP_FILE").unwrap();
                 let index = mmap_index(&Path::new(repo));
                 if let Some(_) = matches.subcommand_matches("create") {
                     println!("Creating nodemap file {} for repository {}", nm_path, repo);
                     create(&index, &Path::new(nm_path)).unwrap();
                     return;
                 }
                 let nm = mmap_nodemap(&Path::new(nm_path));
                 if let Some(matches) = matches.subcommand_matches("query") {
                     let prefix = matches.value_of("PREFIX").unwrap();
                     println!(
                         "Querying {} in nodemap file {} of repository {}",
                         prefix, nm_path, repo
                     );
                     query(&index, &nm, prefix);
                 }
                 if let Some(matches) = matches.subcommand_matches("bench") {
                     let queries =
                         usize::from_str(matches.value_of("QUERIES").unwrap()).unwrap();
                     println!(
                         "Doing {} random queries in nodemap file {} of repository {}",
                         queries, nm_path, repo
                     );
                     bench(&index, &nm, queries);
                 }
             }

rust/hg-core/src/revlog/nodemap.rs

0 +25 -54

             // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Indexing facilities for fast retrieval of `Revision` from `Node`
             //!
             //! This provides a variation on the 16-ary radix tree that is
             //! provided as "nodetree" in revlog.c, ready for append-only persistence
             //! on disk.
             //!
             //! Following existing implicit conventions, the "nodemap" terminology
             //! is used in a more abstract context.
             use super::{
-                node::NULL_NODE, FromHexError, Node, NodePrefix, Revision, RevlogIndex,
+                node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
-                NULL_REVISION,
             };
             use bytes_cast::{unaligned, BytesCast};
             use std::cmp::max;
             use std::fmt;
             use std::mem::{self, align_of, size_of};
             use std::ops::Deref;
             use std::ops::Index;
             #[derive(Debug, PartialEq)]
             pub enum NodeMapError {
                 MultipleResults,
-                InvalidNodePrefix,
                 /// A `Revision` stored in the nodemap could not be found in the index
                 RevisionNotInIndex(Revision),
             }
-            impl From<FromHexError> for NodeMapError {
-                fn from(_: FromHexError) -> Self {
-                    NodeMapError::InvalidNodePrefix
             /// Mapping system from Mercurial nodes to revision numbers.
             ///
             /// ## `RevlogIndex` and `NodeMap`
             ///
             /// One way to think about their relationship is that
             /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
             /// carried by a [`RevlogIndex`].
             ///
             /// Many of the methods in this trait take a `RevlogIndex` argument
             /// which is used for validation of their results. This index must naturally
             /// be the one the `NodeMap` is about, and it must be consistent.
             ///
             /// Notably, the `NodeMap` must not store
             /// information about more `Revision` values than there are in the index.
             /// In these methods, an encountered `Revision` is not in the index, a
             /// [`RevisionNotInIndex`] error is returned.
             ///
             /// In insert operations, the rule is thus that the `NodeMap` must always
             /// be updated after the `RevlogIndex`
             /// be updated first, and the `NodeMap` second.
             ///
             /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
             /// [`RevlogIndex`]: ../trait.RevlogIndex.html
             pub trait NodeMap {
                 /// Find the unique `Revision` having the given `Node`
                 ///
                 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
                 fn find_node(
                     &self,
                     index: &impl RevlogIndex,
                     node: &Node,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     self.find_bin(index, node.into())
                 }
                 /// Find the unique Revision whose `Node` starts with a given binary prefix
                 ///
                 /// If no Revision matches the given prefix, `Ok(None)` is returned.
                 ///
                 /// If several Revisions match the given prefix, a [`MultipleResults`]
                 /// error is returned.
                 fn find_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<Revision>, NodeMapError>;
-                /// Find the unique Revision whose `Node` hexadecimal string representation
-                /// starts with a given prefix
-                ///
-                /// If no Revision matches the given prefix, `Ok(None)` is returned.
-                ///
-                /// If several Revisions match the given prefix, a [`MultipleResults`]
-                /// error is returned.
-                fn find_hex(
-                    &self,
-                    idx: &impl RevlogIndex,
-                    prefix: &str,
-                ) -> Result<Option<Revision>, NodeMapError> {
-                    self.find_bin(idx, NodePrefix::from_hex(prefix)?)
                 /// Give the size of the shortest node prefix that determines
                 /// the revision uniquely.
                 ///
                 /// From a binary node prefix, if it is matched in the node map, this
                 /// returns the number of hexadecimal digits that would had sufficed
                 /// to find the revision uniquely.
                 ///
                 /// Returns `None` if no `Revision` could be found for the prefix.
                 ///
                 /// If several Revisions match the given prefix, a [`MultipleResults`]
                 /// error is returned.
                 fn unique_prefix_len_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     node_prefix: NodePrefix,
                 ) -> Result<Option<usize>, NodeMapError>;
-                /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
-                /// of the prefix as input.
-                fn unique_prefix_len_hex(
-                    &self,
-                    idx: &impl RevlogIndex,
-                    prefix: &str,
-                ) -> Result<Option<usize>, NodeMapError> {
-                    self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?)
                 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
                 fn unique_prefix_len_node(
                     &self,
                     idx: &impl RevlogIndex,
                     node: &Node,
                 ) -> Result<Option<usize>, NodeMapError> {
                     self.unique_prefix_len_bin(idx, node.into())
                 }
             }
             pub trait MutableNodeMap: NodeMap {
                 fn insert<I: RevlogIndex>(
                     &mut self,
                     index: &I,
                     node: &Node,
                     rev: Revision,
                 ) -> Result<(), NodeMapError>;
             }
             /// Low level NodeTree [`Blocks`] elements
             ///
             /// These are exactly as for instance on persistent storage.
             type RawElement = unaligned::I32Be;
             /// High level representation of values in NodeTree
             /// [`Blocks`](struct.Block.html)
             ///
             /// This is the high level representation that most algorithms should
             /// use.
             #[derive(Clone, Debug, Eq, PartialEq)]
             enum Element {
                 Rev(Revision),
                 Block(usize),
                 None,
             }
             impl From<RawElement> for Element {
                 /// Conversion from low level representation, after endianness conversion.
                 ///
                 /// See [`Block`](struct.Block.html) for explanation about the encoding.
                 fn from(raw: RawElement) -> Element {
                     let int = raw.get();
                     if int >= 0 {
                         Element::Block(int as usize)
                     } else if int == -1 {
                         Element::None
                     } else {
                         Element::Rev(-int - 2)
                     }
                 }
             }
             impl From<Element> for RawElement {
                 fn from(element: Element) -> RawElement {
                     RawElement::from(match element {
                         Element::None => 0,
                         Element::Block(i) => i as i32,
                         Element::Rev(rev) => -rev - 2,
                     })
                 }
             }
             /// A logical block of the `NodeTree`, packed with a fixed size.
             ///
             /// These are always used in container types implementing `Index<Block>`,
             /// such as `&Block`
             ///
             /// As an array of integers, its ith element encodes that the
             /// ith potential edge from the block, representing the ith hexadecimal digit
             /// (nybble) `i` is either:
             ///
             /// - absent (value -1)
             /// - another `Block` in the same indexable container (value ≥ 0)
             ///  - a `Revision` leaf (value ≤ -2)
             ///
             /// Endianness has to be fixed for consistency on shared storage across
             /// different architectures.
             ///
             /// A key difference with the C `nodetree` is that we need to be
             /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
             /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
             ///
             /// Another related difference is that `NULL_REVISION` (-1) is not
             /// represented at all, because we want an immutable empty nodetree
             /// to be valid.
             const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
             #[derive(Copy, Clone, BytesCast, PartialEq)]
             #[repr(transparent)]
             pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
             impl Block {
                 fn new() -> Self {
                     let absent_node = RawElement::from(-1);
                     Block([absent_node; ELEMENTS_PER_BLOCK])
                 }
                 fn get(&self, nybble: u8) -> Element {
                     self.0[nybble as usize].into()
                 }
                 fn set(&mut self, nybble: u8, element: Element) {
                     self.0[nybble as usize] = element.into()
                 }
             }
             impl fmt::Debug for Block {
                 /// sparse representation for testing and debugging purposes
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     f.debug_map()
                         .entries((0..16).filter_map(|i| match self.get(i) {
                             Element::None => None,
                             element => Some((i, element)),
                         }))
                         .finish()
                 }
             }
             /// A mutable 16-radix tree with the root block logically at the end
             ///
             /// Because of the append only nature of our node trees, we need to
             /// keep the original untouched and store new blocks separately.
             ///
             /// The mutable root `Block` is kept apart so that we don't have to rebump
             /// it on each insertion.
             pub struct NodeTree {
                 readonly: Box<dyn Deref<Target = [Block]> + Send>,
                 growable: Vec<Block>,
                 root: Block,
                 masked_inner_blocks: usize,
             }
             impl Index<usize> for NodeTree {
                 type Output = Block;
                 fn index(&self, i: usize) -> &Block {
                     let ro_len = self.readonly.len();
                     if i < ro_len {
                         &self.readonly[i]
                     } else if i == ro_len + self.growable.len() {
                         &self.root
                     } else {
                         &self.growable[i - ro_len]
                     }
                 }
             }
             /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
             fn has_prefix_or_none(
                 idx: &impl RevlogIndex,
                 prefix: NodePrefix,
                 rev: Revision,
             ) -> Result<Option<Revision>, NodeMapError> {
                 idx.node(rev)
                     .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
                     .map(|node| {
                         if prefix.is_prefix_of(node) {
                             Some(rev)
                         } else {
                             None
                         }
                     })
             }
             /// validate that the candidate's node starts indeed with given prefix,
             /// and treat ambiguities related to `NULL_REVISION`.
             ///
             /// From the data in the NodeTree, one can only conclude that some
             /// revision is the only one for a *subprefix* of the one being looked up.
             fn validate_candidate(
                 idx: &impl RevlogIndex,
                 prefix: NodePrefix,
                 candidate: (Option<Revision>, usize),
             ) -> Result<(Option<Revision>, usize), NodeMapError> {
                 let (rev, steps) = candidate;
                 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
                     rev.map_or(Ok((None, steps)), |r| {
                         has_prefix_or_none(idx, prefix, r)
                             .map(|opt| (opt, max(steps, nz_nybble + 1)))
                     })
                 } else {
                     // the prefix is only made of zeros; NULL_REVISION always matches it
                     // and any other *valid* result is an ambiguity
                     match rev {
                         None => Ok((Some(NULL_REVISION), steps + 1)),
                         Some(r) => match has_prefix_or_none(idx, prefix, r)? {
                             None => Ok((Some(NULL_REVISION), steps + 1)),
                             _ => Err(NodeMapError::MultipleResults),
                         },
                     }
                 }
             }
             impl NodeTree {
                 /// Initiate a NodeTree from an immutable slice-like of `Block`
                 ///
                 /// We keep `readonly` and clone its root block if it isn't empty.
                 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
                     let root = readonly.last().cloned().unwrap_or_else(Block::new);
                     NodeTree {
                         readonly,
                         growable: Vec::new(),
                         root,
                         masked_inner_blocks: 0,
                     }
                 }
                 /// Create from an opaque bunch of bytes
                 ///
                 /// The created `NodeTreeBytes` from `buffer`,
                 /// of which exactly `amount` bytes are used.
                 ///
                 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
                 /// - `offset` allows for the final file format to include fixed data
                 ///   (generation number, behavioural flags)
                 /// - `amount` is expressed in bytes, and is not automatically derived from
                 ///   `bytes`, so that a caller that manages them atomically can perform
                 ///   temporary disk serializations and still rollback easily if needed.
                 ///   First use-case for this would be to support Mercurial shell hooks.
                 ///
                 /// panics if `buffer` is smaller than `amount`
                 pub fn load_bytes(
                     bytes: Box<dyn Deref<Target = [u8]> + Send>,
                     amount: usize,
                 ) -> Self {
                     NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
                 }
                 /// Retrieve added `Block` and the original immutable data
                 pub fn into_readonly_and_added(
                     self,
                 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
                     let mut vec = self.growable;
                     let readonly = self.readonly;
                     if readonly.last() != Some(&self.root) {
                         vec.push(self.root);
                     }
                     (readonly, vec)
                 }
                 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
                 /// storage
                 pub fn into_readonly_and_added_bytes(
                     self,
                 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
                     let (readonly, vec) = self.into_readonly_and_added();
                     // Prevent running `v`'s destructor so we are in complete control
                     // of the allocation.
                     let vec = mem::ManuallyDrop::new(vec);
                     // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
                     // bytes, so this is perfectly safe.
                     let bytes = unsafe {
                         // Check for compatible allocation layout.
                         // (Optimized away by constant-folding + dead code elimination.)
                         assert_eq!(size_of::<Block>(), 64);
                         assert_eq!(align_of::<Block>(), 1);
                         // /!\ Any use of `vec` after this is use-after-free.
                         // TODO: use `into_raw_parts` once stabilized
                         Vec::from_raw_parts(
                             vec.as_ptr() as *mut u8,
                             vec.len() * size_of::<Block>(),
                             vec.capacity() * size_of::<Block>(),
                         )
                     };
                     (readonly, bytes)
                 }
                 /// Total number of blocks
                 fn len(&self) -> usize {
                     self.readonly.len() + self.growable.len() + 1
                 }
                 /// Implemented for completeness
                 ///
                 /// A `NodeTree` always has at least the mutable root block.
                 #[allow(dead_code)]
                 fn is_empty(&self) -> bool {
                     false
                 }
                 /// Main working method for `NodeTree` searches
                 ///
                 /// The first returned value is the result of analysing `NodeTree` data
                 /// *alone*: whereas `None` guarantees that the given prefix is absent
                 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
                 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
                 /// that could match the prefix. Actually, all that can be inferred from
                 /// the `NodeTree` data is that `rev` is the revision with the longest
                 /// common node prefix with the given prefix.
                 ///
                 /// The second returned value is the size of the smallest subprefix
                 /// of `prefix` that would give the same result, i.e. not the
                 /// `MultipleResults` error variant (again, using only the data of the
                 /// `NodeTree`).
                 fn lookup(
                     &self,
                     prefix: NodePrefix,
                 ) -> Result<(Option<Revision>, usize), NodeMapError> {
                     for (i, visit_item) in self.visit(prefix).enumerate() {
                         if let Some(opt) = visit_item.final_revision() {
                             return Ok((opt, i + 1));
                         }
                     }
                     Err(NodeMapError::MultipleResults)
                 }
                 fn visit<'n>(&'n self, prefix: NodePrefix) -> NodeTreeVisitor<'n> {
                     NodeTreeVisitor {
                         nt: self,
                         prefix,
                         visit: self.len() - 1,
                         nybble_idx: 0,
                         done: false,
                     }
                 }
                 /// Return a mutable reference for `Block` at index `idx`.
                 ///
                 /// If `idx` lies in the immutable area, then the reference is to
                 /// a newly appended copy.
                 ///
                 /// Returns (new_idx, glen, mut_ref) where
                 ///
                 /// - `new_idx` is the index of the mutable `Block`
                 /// - `mut_ref` is a mutable reference to the mutable Block.
                 /// - `glen` is the new length of `self.growable`
                 ///
                 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
                 /// itself because of the mutable borrow taken with the returned `Block`
                 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
                     let ro_blocks = &self.readonly;
                     let ro_len = ro_blocks.len();
                     let glen = self.growable.len();
                     if idx < ro_len {
                         self.masked_inner_blocks += 1;
                         self.growable.push(ro_blocks[idx]);
                         (glen + ro_len, &mut self.growable[glen], glen + 1)
                     } else if glen + ro_len == idx {
                         (idx, &mut self.root, glen)
                     } else {
                         (idx, &mut self.growable[idx - ro_len], glen)
                     }
                 }
                 /// Main insertion method
                 ///
                 /// This will dive in the node tree to find the deepest `Block` for
                 /// `node`, split it as much as needed and record `node` in there.
                 /// The method then backtracks, updating references in all the visited
                 /// blocks from the root.
                 ///
                 /// All the mutated `Block` are copied first to the growable part if
                 /// needed. That happens for those in the immutable part except the root.
                 pub fn insert<I: RevlogIndex>(
                     &mut self,
                     index: &I,
                     node: &Node,
                     rev: Revision,
                 ) -> Result<(), NodeMapError> {
                     let ro_len = &self.readonly.len();
                     let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
                     let read_nybbles = visit_steps.len();
                     // visit_steps cannot be empty, since we always visit the root block
                     let deepest = visit_steps.pop().unwrap();
                     let (mut block_idx, mut block, mut glen) =
                         self.mutable_block(deepest.block_idx);
                     if let Element::Rev(old_rev) = deepest.element {
                         let old_node = index
                             .node(old_rev)
                             .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
                         if old_node == node {
                             return Ok(()); // avoid creating lots of useless blocks
                         }
                         // Looping over the tail of nybbles in both nodes, creating
                         // new blocks until we find the difference
                         let mut new_block_idx = ro_len + glen;
                         let mut nybble = deepest.nybble;
                         for nybble_pos in read_nybbles..node.nybbles_len() {
                             block.set(nybble, Element::Block(new_block_idx));
                             let new_nybble = node.get_nybble(nybble_pos);
                             let old_nybble = old_node.get_nybble(nybble_pos);
                             if old_nybble == new_nybble {
                                 self.growable.push(Block::new());
                                 block = &mut self.growable[glen];
                                 glen += 1;
                                 new_block_idx += 1;
                                 nybble = new_nybble;
                             } else {
                                 let mut new_block = Block::new();
                                 new_block.set(old_nybble, Element::Rev(old_rev));
                                 new_block.set(new_nybble, Element::Rev(rev));
                                 self.growable.push(new_block);
                                 break;
                             }
                         }
                     } else {
                         // Free slot in the deepest block: no splitting has to be done
                         block.set(deepest.nybble, Element::Rev(rev));
                     }
                     // Backtrack over visit steps to update references
                     while let Some(visited) = visit_steps.pop() {
                         let to_write = Element::Block(block_idx);
                         if visit_steps.is_empty() {
                             self.root.set(visited.nybble, to_write);
                             break;
                         }
                         let (new_idx, block, _) = self.mutable_block(visited.block_idx);
                         if block.get(visited.nybble) == to_write {
                             break;
                         }
                         block.set(visited.nybble, to_write);
                         block_idx = new_idx;
                     }
                     Ok(())
                 }
                 /// Make the whole `NodeTree` logically empty, without touching the
                 /// immutable part.
                 pub fn invalidate_all(&mut self) {
                     self.root = Block::new();
                     self.growable = Vec::new();
                     self.masked_inner_blocks = self.readonly.len();
                 }
                 /// Return the number of blocks in the readonly part that are currently
                 /// masked in the mutable part.
                 ///
                 /// The `NodeTree` structure has no efficient way to know how many blocks
                 /// are already unreachable in the readonly part.
                 ///
                 /// After a call to `invalidate_all()`, the returned number can be actually
                 /// bigger than the whole readonly part, a conventional way to mean that
                 /// all the readonly blocks have been masked. This is what is really
                 /// useful to the caller and does not require to know how many were
                 /// actually unreachable to begin with.
                 pub fn masked_readonly_blocks(&self) -> usize {
                     if let Some(readonly_root) = self.readonly.last() {
                         if readonly_root == &self.root {
                             return 0;
                         }
                     } else {
                         return 0;
                     }
                     self.masked_inner_blocks + 1
                 }
             }
             pub struct NodeTreeBytes {
                 buffer: Box<dyn Deref<Target = [u8]> + Send>,
                 len_in_blocks: usize,
             }
             impl NodeTreeBytes {
                 fn new(
                     buffer: Box<dyn Deref<Target = [u8]> + Send>,
                     amount: usize,
                 ) -> Self {
                     assert!(buffer.len() >= amount);
                     let len_in_blocks = amount / size_of::<Block>();
                     NodeTreeBytes {
                         buffer,
                         len_in_blocks,
                     }
                 }
             }
             impl Deref for NodeTreeBytes {
                 type Target = [Block];
                 fn deref(&self) -> &[Block] {
                     Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
                         // `NodeTreeBytes::new` already asserted that `self.buffer` is
                         // large enough.
                         .unwrap()
                         .0
                 }
             }
             struct NodeTreeVisitor<'n> {
                 nt: &'n NodeTree,
                 prefix: NodePrefix,
                 visit: usize,
                 nybble_idx: usize,
                 done: bool,
             }
             #[derive(Debug, PartialEq, Clone)]
             struct NodeTreeVisitItem {
                 block_idx: usize,
                 nybble: u8,
                 element: Element,
             }
             impl<'n> Iterator for NodeTreeVisitor<'n> {
                 type Item = NodeTreeVisitItem;
                 fn next(&mut self) -> Option<Self::Item> {
                     if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
                         return None;
                     }
                     let nybble = self.prefix.get_nybble(self.nybble_idx);
                     self.nybble_idx += 1;
                     let visit = self.visit;
                     let element = self.nt[visit].get(nybble);
                     if let Element::Block(idx) = element {
                         self.visit = idx;
                     } else {
                         self.done = true;
                     }
                     Some(NodeTreeVisitItem {
                         block_idx: visit,
                         nybble,
                         element,
                     })
                 }
             }
             impl NodeTreeVisitItem {
                 // Return `Some(opt)` if this item is final, with `opt` being the
                 // `Revision` that it may represent.
                 //
                 // If the item is not terminal, return `None`
                 fn final_revision(&self) -> Option<Option<Revision>> {
                     match self.element {
                         Element::Block(_) => None,
                         Element::Rev(r) => Some(Some(r)),
                         Element::None => Some(None),
                     }
                 }
             }
             impl From<Vec<Block>> for NodeTree {
                 fn from(vec: Vec<Block>) -> Self {
                     Self::new(Box::new(vec))
                 }
             }
             impl fmt::Debug for NodeTree {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     let readonly: &[Block] = &*self.readonly;
                     write!(
                         f,
                         "readonly: {:?}, growable: {:?}, root: {:?}",
                         readonly, self.growable, self.root
                     )
                 }
             }
             impl Default for NodeTree {
                 /// Create a fully mutable empty NodeTree
                 fn default() -> Self {
                     NodeTree::new(Box::new(Vec::new()))
                 }
             }
             impl NodeMap for NodeTree {
                 fn find_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     validate_candidate(idx, prefix, self.lookup(prefix)?)
                         .map(|(opt, _shortest)| opt)
                 }
                 fn unique_prefix_len_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<usize>, NodeMapError> {
                     validate_candidate(idx, prefix, self.lookup(prefix)?)
                         .map(|(opt, shortest)| opt.map(|_rev| shortest))
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::NodeMapError::*;
                 use super::*;
                 use crate::revlog::node::{hex_pad_right, Node};
                 use std::collections::HashMap;
                 /// Creates a `Block` using a syntax close to the `Debug` output
                 macro_rules! block {
                     {$($nybble:tt : $variant:ident($val:tt)),*} => (
                         {
                             let mut block = Block::new();
                             $(block.set($nybble, Element::$variant($val)));*;
                             block
                         }
                     )
                 }
                 #[test]
                 fn test_block_debug() {
                     let mut block = Block::new();
                     block.set(1, Element::Rev(3));
                     block.set(10, Element::Block(0));
                     assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
                 }
                 #[test]
                 fn test_block_macro() {
                     let block = block! {5: Block(2)};
                     assert_eq!(format!("{:?}", block), "{5: Block(2)}");
                     let block = block! {13: Rev(15), 5: Block(2)};
                     assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
                 }
                 #[test]
                 fn test_raw_block() {
                     let mut raw = [255u8; 64];
                     let mut counter = 0;
                     for val in [0_i32, 15, -2, -1, -3].iter() {
                         for byte in val.to_be_bytes().iter() {
                             raw[counter] = *byte;
                             counter += 1;
                         }
                     }
                     let (block, _) = Block::from_bytes(&raw).unwrap();
                     assert_eq!(block.get(0), Element::Block(0));
                     assert_eq!(block.get(1), Element::Block(15));
                     assert_eq!(block.get(3), Element::None);
                     assert_eq!(block.get(2), Element::Rev(0));
                     assert_eq!(block.get(4), Element::Rev(1));
                 }
                 type TestIndex = HashMap<Revision, Node>;
                 impl RevlogIndex for TestIndex {
                     fn node(&self, rev: Revision) -> Option<&Node> {
                         self.get(&rev)
                     }
                     fn len(&self) -> usize {
                         self.len()
                     }
                 }
                 /// Pad hexadecimal Node prefix with zeros on the right
                 ///
                 /// This avoids having to repeatedly write very long hexadecimal
                 /// strings for test data, and brings actual hash size independency.
                 #[cfg(test)]
                 fn pad_node(hex: &str) -> Node {
                     Node::from_hex(&hex_pad_right(hex)).unwrap()
                 }
                 /// Pad hexadecimal Node prefix with zeros on the right, then insert
                 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
                     idx.insert(rev, pad_node(hex));
                 }
                 fn sample_nodetree() -> NodeTree {
                     NodeTree::from(vec![
                         block![0: Rev(9)],
                         block![0: Rev(0), 1: Rev(9)],
                         block![0: Block(1), 1:Rev(1)],
                     ])
                 }
+                fn hex(s: &str) -> NodePrefix {
+                    NodePrefix::from_hex(s).unwrap()
+                }
                 #[test]
                 fn test_nt_debug() {
                     let nt = sample_nodetree();
                     assert_eq!(
                         format!("{:?}", nt),
                         "readonly: \
                          [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
                          growable: [], \
                          root: {0: Block(1), 1: Rev(1)}",
                     );
                 }
                 #[test]
                 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
                     let mut idx: TestIndex = HashMap::new();
                     pad_insert(&mut idx, 1, "1234deadcafe");
                     let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
-                    assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
+                    assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
-                    assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
+                    assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
-                    assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
+                    assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
-                    assert_eq!(nt.find_hex(&idx, "1a")?, None);
+                    assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
-                    assert_eq!(nt.find_hex(&idx, "ab")?, None);
+                    assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
                     // and with full binary Nodes
                     assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
                     let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
                     assert_eq!(nt.find_node(&idx, &unknown)?, None);
                     Ok(())
                 }
                 #[test]
                 fn test_immutable_find_one_jump() {
                     let mut idx = TestIndex::new();
                     pad_insert(&mut idx, 9, "012");
                     pad_insert(&mut idx, 0, "00a");
                     let nt = sample_nodetree();
-                    assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
+                    assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
-                    assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
+                    assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
-                    assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
+                    assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
-                    assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
+                    assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
-                    assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
+                    assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
-                    assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
+                    assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
                 }
                 #[test]
                 fn test_mutated_find() -> Result<(), NodeMapError> {
                     let mut idx = TestIndex::new();
                     pad_insert(&mut idx, 9, "012");
                     pad_insert(&mut idx, 0, "00a");
                     pad_insert(&mut idx, 2, "cafe");
                     pad_insert(&mut idx, 3, "15");
                     pad_insert(&mut idx, 1, "10");
                     let nt = NodeTree {
                         readonly: sample_nodetree().readonly,
                         growable: vec![block![0: Rev(1), 5: Rev(3)]],
                         root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
                         masked_inner_blocks: 1,
                     };
-                    assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
+                    assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
-                    assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
+                    assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
-                    assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
+                    assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
-                    assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
+                    assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
-                    assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
+                    assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
-                    assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
+                    assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
-                    assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
+                    assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
                     assert_eq!(nt.masked_readonly_blocks(), 2);
                     Ok(())
                 }
                 struct TestNtIndex {
                     index: TestIndex,
                     nt: NodeTree,
                 }
                 impl TestNtIndex {
                     fn new() -> Self {
                         TestNtIndex {
                             index: HashMap::new(),
                             nt: NodeTree::default(),
                         }
                     }
                     fn insert(
                         &mut self,
                         rev: Revision,
                         hex: &str,
                     ) -> Result<(), NodeMapError> {
                         let node = pad_node(hex);
                         self.index.insert(rev, node.clone());
                         self.nt.insert(&self.index, &node, rev)?;
                         Ok(())
                     }
                     fn find_hex(
                         &self,
                         prefix: &str,
                     ) -> Result<Option<Revision>, NodeMapError> {
-                        self.nt.find_hex(&self.index, prefix)
+                        self.nt.find_bin(&self.index, hex(prefix))
                     }
                     fn unique_prefix_len_hex(
                         &self,
                         prefix: &str,
                     ) -> Result<Option<usize>, NodeMapError> {
-                        self.nt.unique_prefix_len_hex(&self.index, prefix)
+                        self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
                     }
                     /// Drain `added` and restart a new one
                     fn commit(self) -> Self {
                         let mut as_vec: Vec<Block> =
                             self.nt.readonly.iter().map(|block| block.clone()).collect();
                         as_vec.extend(self.nt.growable);
                         as_vec.push(self.nt.root);
                         Self {
                             index: self.index,
                             nt: NodeTree::from(as_vec).into(),
                         }
                     }
                 }
                 #[test]
                 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     assert_eq!(idx.find_hex("1")?, Some(0));
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     // let's trigger a simple split
                     idx.insert(1, "1a34")?;
                     assert_eq!(idx.nt.growable.len(), 1);
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a")?, Some(1));
                     // reinserting is a no_op
                     idx.insert(1, "1a34")?;
                     assert_eq!(idx.nt.growable.len(), 1);
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a")?, Some(1));
                     idx.insert(2, "1a01")?;
                     assert_eq!(idx.nt.growable.len(), 2);
                     assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a3")?, Some(1));
                     assert_eq!(idx.find_hex("1a0")?, Some(2));
                     assert_eq!(idx.find_hex("1a12")?, None);
                     // now let's make it split and create more than one additional block
                     idx.insert(3, "1a345")?;
                     assert_eq!(idx.nt.growable.len(), 4);
                     assert_eq!(idx.find_hex("1a340")?, Some(1));
                     assert_eq!(idx.find_hex("1a345")?, Some(3));
                     assert_eq!(idx.find_hex("1a341")?, None);
                     // there's no readonly block to mask
                     assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                     Ok(())
                 }
                 #[test]
                 fn test_unique_prefix_len_zero_prefix() {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "00000abcd").unwrap();
                     assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
                     // in the nodetree proper, this will be found at the first nybble
                     // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
                     // but the first difference with `NULL_NODE`
                     assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                     assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                     // same with odd result
                     idx.insert(1, "00123").unwrap();
                     assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
                     assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
                     // these are unchanged of course
                     assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                     assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                 }
                 #[test]
                 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
                     // check that the splitting loop is long enough
                     let mut nt_idx = TestNtIndex::new();
                     let nt = &mut nt_idx.nt;
                     let idx = &mut nt_idx.index;
                     let node0_hex = hex_pad_right("444444");
                     let mut node1_hex = hex_pad_right("444444").clone();
                     node1_hex.pop();
                     node1_hex.push('5');
                     let node0 = Node::from_hex(&node0_hex).unwrap();
                     let node1 = Node::from_hex(&node1_hex).unwrap();
                     idx.insert(0, node0.clone());
                     nt.insert(idx, &node0, 0)?;
                     idx.insert(1, node1.clone());
                     nt.insert(idx, &node1, 1)?;
                     assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
                     assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
                     Ok(())
                 }
                 #[test]
                 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     idx.insert(1, "1235")?;
                     idx.insert(2, "131")?;
                     idx.insert(3, "cafe")?;
                     let mut idx = idx.commit();
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     assert_eq!(idx.find_hex("1235")?, Some(1));
                     assert_eq!(idx.find_hex("131")?, Some(2));
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     // we did not add anything since init from readonly
                     assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                     idx.insert(4, "123A")?;
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     assert_eq!(idx.find_hex("1235")?, Some(1));
                     assert_eq!(idx.find_hex("131")?, Some(2));
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     assert_eq!(idx.find_hex("123A")?, Some(4));
                     // we masked blocks for all prefixes of "123", including the root
                     assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                     eprintln!("{:?}", idx.nt);
                     idx.insert(5, "c0")?;
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     assert_eq!(idx.find_hex("c0")?, Some(5));
                     assert_eq!(idx.find_hex("c1")?, None);
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     // inserting "c0" is just splitting the 'c' slot of the mutable root,
                     // it doesn't mask anything
                     assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                     Ok(())
                 }
                 #[test]
                 fn test_invalidate_all() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     idx.insert(1, "1235")?;
                     idx.insert(2, "131")?;
                     idx.insert(3, "cafe")?;
                     let mut idx = idx.commit();
                     idx.nt.invalidate_all();
                     assert_eq!(idx.find_hex("1234")?, None);
                     assert_eq!(idx.find_hex("1235")?, None);
                     assert_eq!(idx.find_hex("131")?, None);
                     assert_eq!(idx.find_hex("cafe")?, None);
                     // all the readonly blocks have been masked, this is the
                     // conventional expected response
                     assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
                     Ok(())
                 }
                 #[test]
                 fn test_into_added_empty() {
                     assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
                     assert!(sample_nodetree()
                         .into_readonly_and_added_bytes()
                         .1
                         .is_empty());
                 }
                 #[test]
                 fn test_into_added_bytes() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     let mut idx = idx.commit();
                     idx.insert(4, "cafe")?;
                     let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
                     // only the root block has been changed
                     assert_eq!(bytes.len(), size_of::<Block>());
                     // big endian for -2
                     assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
                     // big endian for -6
                     assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
                     Ok(())
                 }
             }

rust/hg-cpython/src/revlog.rs

0 +4 -5

             // revlog.rs
             //
             // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::{
                 cindex,
                 utils::{node_from_py_bytes, node_from_py_object},
             };
             use cpython::{
                 buffer::{Element, PyBuffer},
                 exc::{IndexError, ValueError},
                 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
                 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
             };
             use hg::{
                 nodemap::{Block, NodeMapError, NodeTree},
-                revlog::{nodemap::NodeMap, RevlogIndex},
+                revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
                 Revision,
             };
             use std::cell::RefCell;
             /// Return a Struct implementing the Graph trait
             pub(crate) fn pyindex_to_graph(
                 py: Python,
                 index: PyObject,
             ) -> PyResult<cindex::Index> {
                 match index.extract::<MixedIndex>(py) {
                     Ok(midx) => Ok(midx.clone_cindex(py)),
                     Err(_) => cindex::Index::new(py, index),
                 }
             }
             py_class!(pub class MixedIndex |py| {
                 data cindex: RefCell<cindex::Index>;
                 data nt: RefCell<Option<NodeTree>>;
                 data docket: RefCell<Option<PyObject>>;
                 // Holds a reference to the mmap'ed persistent nodemap data
                 data mmap: RefCell<Option<PyBuffer>>;
                 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
                     Self::new(py, cindex)
                 }
                 /// Compatibility layer used for Python consumers needing access to the C index
                 ///
                 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
                 /// that may need to build a custom `nodetree`, based on a specified revset.
                 /// With a Rust implementation of the nodemap, we will be able to get rid of
                 /// this, by exposing our own standalone nodemap class,
                 /// ready to accept `MixedIndex`.
                 def get_cindex(&self) -> PyResult<PyObject> {
                     Ok(self.cindex(py).borrow().inner().clone_ref(py))
                 }
                 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
                 /// Return Revision if found, raises a bare `error.RevlogError`
                 /// in case of ambiguity, same as C version does
                 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
                     let opt = self.get_nodetree(py)?.borrow();
                     let nt = opt.as_ref().unwrap();
                     let idx = &*self.cindex(py).borrow();
                     let node = node_from_py_bytes(py, &node)?;
                     nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
                 }
                 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
                 /// is not found.
                 ///
                 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
                 /// will catch and rewrap with it
                 def rev(&self, node: PyBytes) -> PyResult<Revision> {
                     self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
                 }
                 /// return True if the node exist in the index
                 def has_node(&self, node: PyBytes) -> PyResult<bool> {
                     self.get_rev(py, node).map(|opt| opt.is_some())
                 }
                 /// find length of shortest hex nodeid of a binary ID
                 def shortest(&self, node: PyBytes) -> PyResult<usize> {
                     let opt = self.get_nodetree(py)?.borrow();
                     let nt = opt.as_ref().unwrap();
                     let idx = &*self.cindex(py).borrow();
                     match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
                     {
                         Ok(Some(l)) => Ok(l),
                         Ok(None) => Err(revlog_error(py)),
                         Err(e) => Err(nodemap_error(py, e)),
                     }
                 }
                 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
                     let opt = self.get_nodetree(py)?.borrow();
                     let nt = opt.as_ref().unwrap();
                     let idx = &*self.cindex(py).borrow();
                     let node_as_string = if cfg!(feature = "python3-sys") {
                         node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
                     }
                     else {
                         let node = node.extract::<PyBytes>(py)?;
                         String::from_utf8_lossy(node.data(py)).to_string()
                     };
-                    nt.find_hex(idx, &node_as_string)
+                    let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
+                    nt.find_bin(idx, prefix)
                         // TODO make an inner API returning the node directly
                         .map(|opt| opt.map(
                             |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
                         .map_err(|e| nodemap_error(py, e))
                 }
                 /// append an index entry
                 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
                     if tup.len(py) < 8 {
                         // this is better than the panic promised by tup.get_item()
                         return Err(
                             PyErr::new::<IndexError, _>(py, "tuple index out of range"))
                     }
                     let node_bytes = tup.get_item(py, 7).extract(py)?;
                     let node = node_from_py_object(py, &node_bytes)?;
                     let mut idx = self.cindex(py).borrow_mut();
                     let rev = idx.len() as Revision;
                     idx.append(py, tup)?;
                     self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
                         .insert(&*idx, &node, rev)
                         .map_err(|e| nodemap_error(py, e))?;
                     Ok(py.None())
                 }
                 def __delitem__(&self, key: PyObject) -> PyResult<()> {
                     // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
                     self.cindex(py).borrow().inner().del_item(py, key)?;
                     let mut opt = self.get_nodetree(py)?.borrow_mut();
                     let mut nt = opt.as_mut().unwrap();
                     nt.invalidate_all();
                     self.fill_nodemap(py, &mut nt)?;
                     Ok(())
                 }
                 //
                 // Reforwarded C index API
                 //
                 // index_methods (tp_methods). Same ordering as in revlog.c
                 /// return the gca set of the given revs
                 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "ancestors", args, kw)
                 }
                 /// return the heads of the common ancestors of the given revs
                 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "commonancestorsheads", args, kw)
                 }
                 /// Clear the index caches and inner py_class data.
                 /// It is Python's responsibility to call `update_nodemap_data` again.
                 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
                     self.nt(py).borrow_mut().take();
                     self.docket(py).borrow_mut().take();
                     self.mmap(py).borrow_mut().take();
                     self.call_cindex(py, "clearcaches", args, kw)
                 }
                 /// get an index entry
                 def get(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "get", args, kw)
                 }
                 /// compute phases
                 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "computephasesmapsets", args, kw)
                 }
                 /// reachableroots
                 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "reachableroots2", args, kw)
                 }
                 /// get head revisions
                 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "headrevs", args, kw)
                 }
                 /// get filtered head revisions
                 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "headrevsfiltered", args, kw)
                 }
                 /// True if the object is a snapshot
                 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "issnapshot", args, kw)
                 }
                 /// Gather snapshot data in a cache dict
                 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "findsnapshots", args, kw)
                 }
                 /// determine revisions with deltas to reconstruct fulltext
                 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "deltachain", args, kw)
                 }
                 /// slice planned chunk read to reach a density threshold
                 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "slicechunktodensity", args, kw)
                 }
                 /// stats for the index
                 def stats(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "stats", args, kw)
                 }
                 // index_sequence_methods and index_mapping_methods.
                 //
                 // Since we call back through the high level Python API,
                 // there's no point making a distinction between index_get
                 // and index_getitem.
                 def __len__(&self) -> PyResult<usize> {
                     self.cindex(py).borrow().inner().len(py)
                 }
                 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
                     // this conversion seems needless, but that's actually because
                     // `index_getitem` does not handle conversion from PyLong,
                     // which expressions such as [e for e in index] internally use.
                     // Note that we don't seem to have a direct way to call
                     // PySequence_GetItem (does the job), which would possibly be better
                     // for performance
                     let key = match key.extract::<Revision>(py) {
                         Ok(rev) => rev.to_py_object(py).into_object(),
                         Err(_) => key,
                     };
                     self.cindex(py).borrow().inner().get_item(py, key)
                 }
                 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
                     self.cindex(py).borrow().inner().set_item(py, key, value)
                 }
                 def __contains__(&self, item: PyObject) -> PyResult<bool> {
                     // ObjectProtocol does not seem to provide contains(), so
                     // this is an equivalent implementation of the index_contains()
                     // defined in revlog.c
                     let cindex = self.cindex(py).borrow();
                     match item.extract::<Revision>(py) {
                         Ok(rev) => {
                             Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
                         }
                         Err(_) => {
                             cindex.inner().call_method(
                                 py,
                                 "has_node",
                                 PyTuple::new(py, &[item]),
                                 None)?
                             .extract(py)
                         }
                     }
                 }
                 def nodemap_data_all(&self) -> PyResult<PyBytes> {
                     self.inner_nodemap_data_all(py)
                 }
                 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
                     self.inner_nodemap_data_incremental(py)
                 }
                 def update_nodemap_data(
                     &self,
                     docket: PyObject,
                     nm_data: PyObject
                 ) -> PyResult<PyObject> {
                     self.inner_update_nodemap_data(py, docket, nm_data)
                 }
             });
             impl MixedIndex {
                 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
                     Self::create_instance(
                         py,
                         RefCell::new(cindex::Index::new(py, cindex)?),
                         RefCell::new(None),
                         RefCell::new(None),
                         RefCell::new(None),
                     )
                 }
                 /// This is scaffolding at this point, but it could also become
                 /// a way to start a persistent nodemap or perform a
                 /// vacuum / repack operation
                 fn fill_nodemap(
                     &self,
                     py: Python,
                     nt: &mut NodeTree,
                 ) -> PyResult<PyObject> {
                     let index = self.cindex(py).borrow();
                     for r in 0..index.len() {
                         let rev = r as Revision;
                         // in this case node() won't ever return None
                         nt.insert(&*index, index.node(rev).unwrap(), rev)
                             .map_err(|e| nodemap_error(py, e))?
                     }
                     Ok(py.None())
                 }
                 fn get_nodetree<'a>(
                     &'a self,
                     py: Python<'a>,
                 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
                     if self.nt(py).borrow().is_none() {
                         let readonly = Box::new(Vec::new());
                         let mut nt = NodeTree::load_bytes(readonly, 0);
                         self.fill_nodemap(py, &mut nt)?;
                         self.nt(py).borrow_mut().replace(nt);
                     }
                     Ok(self.nt(py))
                 }
                 /// forward a method call to the underlying C index
                 fn call_cindex(
                     &self,
                     py: Python,
                     name: &str,
                     args: &PyTuple,
                     kwargs: Option<&PyDict>,
                 ) -> PyResult<PyObject> {
                     self.cindex(py)
                         .borrow()
                         .inner()
                         .call_method(py, name, args, kwargs)
                 }
                 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
                     self.cindex(py).borrow().clone_ref(py)
                 }
                 /// Returns the full nodemap bytes to be written as-is to disk
                 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
                     let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
                     let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
                     // If there's anything readonly, we need to build the data again from
                     // scratch
                     let bytes = if readonly.len() > 0 {
                         let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
                         self.fill_nodemap(py, &mut nt)?;
                         let (readonly, bytes) = nt.into_readonly_and_added_bytes();
                         assert_eq!(readonly.len(), 0);
                         bytes
                     } else {
                         bytes
                     };
                     let bytes = PyBytes::new(py, &bytes);
                     Ok(bytes)
                 }
                 /// Returns the last saved docket along with the size of any changed data
                 /// (in number of blocks), and said data as bytes.
                 fn inner_nodemap_data_incremental(
                     &self,
                     py: Python,
                 ) -> PyResult<PyObject> {
                     let docket = self.docket(py).borrow();
                     let docket = match docket.as_ref() {
                         Some(d) => d,
                         None => return Ok(py.None()),
                     };
                     let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
                     let masked_blocks = node_tree.masked_readonly_blocks();
                     let (_, data) = node_tree.into_readonly_and_added_bytes();
                     let changed = masked_blocks * std::mem::size_of::<Block>();
                     Ok((docket, changed, PyBytes::new(py, &data))
                         .to_py_object(py)
                         .into_object())
                 }
                 /// Update the nodemap from the new (mmaped) data.
                 /// The docket is kept as a reference for later incremental calls.
                 fn inner_update_nodemap_data(
                     &self,
                     py: Python,
                     docket: PyObject,
                     nm_data: PyObject,
                 ) -> PyResult<PyObject> {
                     let buf = PyBuffer::get(py, &nm_data)?;
                     let len = buf.item_count();
                     // Build a slice from the mmap'ed buffer data
                     let cbuf = buf.buf_ptr();
                     let bytes = if std::mem::size_of::<u8>() == buf.item_size()
                         && buf.is_c_contiguous()
                         && u8::is_compatible_format(buf.format())
                     {
                         unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
                     } else {
                         return Err(PyErr::new::<ValueError, _>(
                             py,
                             "Nodemap data buffer has an invalid memory representation"
                                 .to_string(),
                         ));
                     };
                     // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
                     // pointer.
                     self.mmap(py).borrow_mut().replace(buf);
                     let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
                     let data_tip =
                         docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
                     self.docket(py).borrow_mut().replace(docket.clone_ref(py));
                     let idx = self.cindex(py).borrow();
                     let current_tip = idx.len();
                     for r in (data_tip + 1)..current_tip as Revision {
                         let rev = r as Revision;
                         // in this case node() won't ever return None
                         nt.insert(&*idx, idx.node(rev).unwrap(), rev)
                             .map_err(|e| nodemap_error(py, e))?
                     }
                     *self.nt(py).borrow_mut() = Some(nt);
                     Ok(py.None())
                 }
             }
             fn revlog_error(py: Python) -> PyErr {
                 match py
                     .import("mercurial.error")
                     .and_then(|m| m.get(py, "RevlogError"))
                 {
                     Err(e) => e,
                     Ok(cls) => PyErr::from_instance(py, cls),
                 }
             }
             fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
                 PyErr::new::<ValueError, _>(
                     py,
                     format!(
                         "Inconsistency: Revision {} found in nodemap \
                          is not in revlog index",
                         rev
                     ),
                 )
             }
             /// Standard treatment of NodeMapError
             fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
                 match err {
                     NodeMapError::MultipleResults => revlog_error(py),
                     NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
-                    NodeMapError::InvalidNodePrefix => {
-                        PyErr::new::<ValueError, _>(py, "Invalid node or prefix")
                 }
             }
             /// Create the module, with __package__ given from parent
             pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
                 let dotted_name = &format!("{}.revlog", package);
                 let m = PyModule::new(py, dotted_name)?;
                 m.add(py, "__package__", package)?;
                 m.add(py, "__doc__", "RevLog - Rust implementations")?;
                 m.add_class::<MixedIndex>(py)?;
                 let sys = PyModule::import(py, "sys")?;
                 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
                 sys_modules.set_item(py, dotted_name, &m)?;
                 Ok(m)
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages