upstream/mercurial-mirror Commit - r51879:eccf7dc7

revlog: make the rust test for node hex prefix resolution exercise the nodemap

Arseniy Alekseyev -

r51879:eccf7dc7 stable

parent child

rust/hg-core/src/revlog/mod.rs

0 +19 -1

             // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Mercurial concepts for handling revision history
             pub mod node;
             pub mod nodemap;
             mod nodemap_docket;
             pub mod path_encode;
             pub use node::{FromHexError, Node, NodePrefix};
             pub mod changelog;
             pub mod filelog;
             pub mod index;
             pub mod manifest;
             pub mod patch;
             use std::borrow::Cow;
             use std::io::Read;
             use std::ops::Deref;
             use std::path::Path;
             use flate2::read::ZlibDecoder;
             use sha1::{Digest, Sha1};
             use std::cell::RefCell;
             use zstd;
             use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
             use self::nodemap_docket::NodeMapDocket;
             use super::index::Index;
             use super::nodemap::{NodeMap, NodeMapError};
             use crate::errors::HgError;
             use crate::vfs::Vfs;
             /// Mercurial revision numbers
             ///
             /// As noted in revlog.c, revision numbers are actually encoded in
             /// 4 bytes, and are liberally converted to ints, whence the i32
             pub type Revision = i32;
             /// Marker expressing the absence of a parent
             ///
             /// Independently of the actual representation, `NULL_REVISION` is guaranteed
             /// to be smaller than all existing revisions.
             pub const NULL_REVISION: Revision = -1;
             /// Same as `mercurial.node.wdirrev`
             ///
             /// This is also equal to `i32::max_value()`, but it's better to spell
             /// it out explicitely, same as in `mercurial.node`
             #[allow(clippy::unreadable_literal)]
             pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
             pub const WORKING_DIRECTORY_HEX: &str =
                 "ffffffffffffffffffffffffffffffffffffffff";
             /// The simplest expression of what we need of Mercurial DAGs.
             pub trait Graph {
                 /// Return the two parents of the given `Revision`.
                 ///
                 /// Each of the parents can be independently `NULL_REVISION`
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
             }
             #[derive(Clone, Debug, PartialEq)]
             pub enum GraphError {
                 ParentOutOfRange(Revision),
                 WorkingDirectoryUnsupported,
             }
             /// The Mercurial Revlog Index
             ///
             /// This is currently limited to the minimal interface that is needed for
             /// the [`nodemap`](nodemap/index.html) module
             pub trait RevlogIndex {
                 /// Total number of Revisions referenced in this index
                 fn len(&self) -> usize;
                 fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 /// Return a reference to the Node or `None` if rev is out of bounds
                 ///
                 /// `NULL_REVISION` is not considered to be out of bounds.
                 fn node(&self, rev: Revision) -> Option<&Node>;
             }
             const REVISION_FLAG_CENSORED: u16 = 1 << 15;
             const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
             const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
             const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
             // Keep this in sync with REVIDX_KNOWN_FLAGS in
             // mercurial/revlogutils/flagutil.py
             const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
                 | REVISION_FLAG_ELLIPSIS
                 | REVISION_FLAG_EXTSTORED
                 | REVISION_FLAG_HASCOPIESINFO;
             const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
             #[derive(Debug, derive_more::From)]
             pub enum RevlogError {
                 InvalidRevision,
                 /// Working directory is not supported
                 WDirUnsupported,
                 /// Found more than one entry whose ID match the requested prefix
                 AmbiguousPrefix,
                 #[from]
                 Other(HgError),
             }
             impl From<NodeMapError> for RevlogError {
                 fn from(error: NodeMapError) -> Self {
                     match error {
                         NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                         NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
                             format!("nodemap point to revision {} not in index", rev),
                         ),
                     }
                 }
             }
             fn corrupted<S: AsRef<str>>(context: S) -> HgError {
                 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
             }
             impl RevlogError {
                 fn corrupted<S: AsRef<str>>(context: S) -> Self {
                     RevlogError::Other(corrupted(context))
                 }
             }
             /// Read only implementation of revlog.
             pub struct Revlog {
                 /// When index and data are not interleaved: bytes of the revlog index.
                 /// When index and data are interleaved: bytes of the revlog index and
                 /// data.
                 index: Index,
                 /// When index and data are not interleaved: bytes of the revlog data
                 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                 /// When present on disk: the persistent nodemap for this revlog
                 nodemap: Option<nodemap::NodeTree>,
             }
             impl Revlog {
                 /// Open a revlog index file.
                 ///
                 /// It will also open the associated data file if index and data are not
                 /// interleaved.
                 pub fn open(
                     store_vfs: &Vfs,
                     index_path: impl AsRef<Path>,
                     data_path: Option<&Path>,
                     use_nodemap: bool,
                 ) -> Result<Self, HgError> {
+                    Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
+                }
+                fn open_gen(
+                    store_vfs: &Vfs,
+                    index_path: impl AsRef<Path>,
+                    data_path: Option<&Path>,
+                    use_nodemap: bool,
+                    nodemap_for_test: Option<nodemap::NodeTree>,
+                ) -> Result<Self, HgError> {
                     let index_path = index_path.as_ref();
                     let index = {
                         match store_vfs.mmap_open_opt(&index_path)? {
                             None => Index::new(Box::new(vec![])),
                             Some(index_mmap) => {
                                 let index = Index::new(Box::new(index_mmap))?;
                                 Ok(index)
                             }
                         }
                     }?;
                     let default_data_path = index_path.with_extension("d");
                     // type annotation required
                     // won't recognize Mmap as Deref<Target = [u8]>
                     let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                         if index.is_inline() {
                             None
                         } else {
                             let data_path = data_path.unwrap_or(&default_data_path);
                             let data_mmap = store_vfs.mmap_open(data_path)?;
                             Some(Box::new(data_mmap))
                         };
                     let nodemap = if index.is_inline() || !use_nodemap {
                         None
                     } else {
                         NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
                             |(docket, data)| {
                                 nodemap::NodeTree::load_bytes(
                                     Box::new(data),
                                     docket.data_length,
                                 )
                             },
                         )
                     };
+                    let nodemap = nodemap_for_test.or(nodemap);
                     Ok(Revlog {
                         index,
                         data_bytes,
                         nodemap,
                     })
                 }
                 /// Return number of entries of the `Revlog`.
                 pub fn len(&self) -> usize {
                     self.index.len()
                 }
                 /// Returns `true` if the `Revlog` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.index.is_empty()
                 }
                 /// Returns the node ID for the given revision number, if it exists in this
                 /// revlog
                 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                     if rev == NULL_REVISION {
                         return Some(&NULL_NODE);
                     }
                     Some(self.index.get_entry(rev)?.hash())
                 }
                 /// Return the revision number for the given node ID, if it exists in this
                 /// revlog
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     if let Some(nodemap) = &self.nodemap {
                         nodemap
                             .find_bin(&self.index, node)?
                             .ok_or(RevlogError::InvalidRevision)
                     } else {
                         self.rev_from_node_no_persistent_nodemap(node)
                     }
                 }
                 /// Same as `rev_from_node`, without using a persistent nodemap
                 ///
                 /// This is used as fallback when a persistent nodemap is not present.
                 /// This happens when the persistent-nodemap experimental feature is not
                 /// enabled, or for small revlogs.
                 fn rev_from_node_no_persistent_nodemap(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     // Linear scan of the revlog
                     // TODO: consider building a non-persistent nodemap in memory to
                     // optimize these cases.
                     let mut found_by_prefix = None;
                     for rev in (-1..self.len() as Revision).rev() {
                         let candidate_node = if rev == -1 {
                             NULL_NODE
                         } else {
                             let index_entry =
                                 self.index.get_entry(rev).ok_or_else(|| {
                                     HgError::corrupted(
                                         "revlog references a revision not in the index",
                                     )
                                 })?;
                             *index_entry.hash()
                         };
                         if node == candidate_node {
                             return Ok(rev);
                         }
                         if node.is_prefix_of(&candidate_node) {
                             if found_by_prefix.is_some() {
                                 return Err(RevlogError::AmbiguousPrefix);
                             }
                             found_by_prefix = Some(rev)
                         }
                     }
                     found_by_prefix.ok_or(RevlogError::InvalidRevision)
                 }
                 /// Returns whether the given revision exists in this revlog.
                 pub fn has_rev(&self, rev: Revision) -> bool {
                     self.index.get_entry(rev).is_some()
                 }
                 /// Return the full data associated to a revision.
                 ///
                 /// All entries required to build the final data out of deltas will be
                 /// retrieved as needed, and the deltas will be applied to the inital
                 /// snapshot to rebuild the final data.
                 pub fn get_rev_data(
                     &self,
                     rev: Revision,
                 ) -> Result<Cow<[u8]>, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(Cow::Borrowed(&[]));
                     };
                     Ok(self.get_entry(rev)?.data()?)
                 }
                 /// Check the hash of some given data against the recorded hash.
                 pub fn check_hash(
                     &self,
                     p1: Revision,
                     p2: Revision,
                     expected: &[u8],
                     data: &[u8],
                 ) -> bool {
                     let e1 = self.index.get_entry(p1);
                     let h1 = match e1 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     let e2 = self.index.get_entry(p2);
                     let h2 = match e2 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                 }
                 /// Build the full data of a revision out its snapshot
                 /// and its deltas.
                 fn build_data_from_deltas(
                     snapshot: RevlogEntry,
                     deltas: &[RevlogEntry],
                 ) -> Result<Vec<u8>, HgError> {
                     let snapshot = snapshot.data_chunk()?;
                     let deltas = deltas
                         .iter()
                         .rev()
                         .map(RevlogEntry::data_chunk)
                         .collect::<Result<Vec<_>, _>>()?;
                     let patches: Vec<_> =
                         deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                     let patch = patch::fold_patch_lists(&patches);
                     Ok(patch.apply(&snapshot))
                 }
                 /// Return the revlog data.
                 fn data(&self) -> &[u8] {
                     match &self.data_bytes {
                         Some(data_bytes) => data_bytes,
                         None => panic!(
                             "forgot to load the data or trying to access inline data"
                         ),
                     }
                 }
                 pub fn make_null_entry(&self) -> RevlogEntry {
                     RevlogEntry {
                         revlog: self,
                         rev: NULL_REVISION,
                         bytes: b"",
                         compressed_len: 0,
                         uncompressed_len: 0,
                         base_rev_or_base_of_delta_chain: None,
                         p1: NULL_REVISION,
                         p2: NULL_REVISION,
                         flags: NULL_REVLOG_ENTRY_FLAGS,
                         hash: NULL_NODE,
                     }
                 }
                 /// Get an entry of the revlog.
                 pub fn get_entry(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(self.make_null_entry());
                     }
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let start = index_entry.offset();
                     let end = start + index_entry.compressed_len() as usize;
                     let data = if self.index.is_inline() {
                         self.index.data(start, end)
                     } else {
                         &self.data()[start..end]
                     };
                     let entry = RevlogEntry {
                         revlog: self,
                         rev,
                         bytes: data,
                         compressed_len: index_entry.compressed_len(),
                         uncompressed_len: index_entry.uncompressed_len(),
                         base_rev_or_base_of_delta_chain: if index_entry
                             .base_revision_or_base_of_delta_chain()
                             == rev
                         {
                             None
                         } else {
                             Some(index_entry.base_revision_or_base_of_delta_chain())
                         },
                         p1: index_entry.p1(),
                         p2: index_entry.p2(),
                         flags: index_entry.flags(),
                         hash: *index_entry.hash(),
                     };
                     Ok(entry)
                 }
                 /// when resolving internal references within revlog, any errors
                 /// should be reported as corruption, instead of e.g. "invalid revision"
                 fn get_entry_internal(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, HgError> {
                     self.get_entry(rev)
                         .map_err(|_| corrupted(format!("revision {} out of range", rev)))
                 }
             }
             /// The revlog entry's bytes and the necessary informations to extract
             /// the entry's data.
             #[derive(Clone)]
             pub struct RevlogEntry<'revlog> {
                 revlog: &'revlog Revlog,
                 rev: Revision,
                 bytes: &'revlog [u8],
                 compressed_len: u32,
                 uncompressed_len: i32,
                 base_rev_or_base_of_delta_chain: Option<Revision>,
                 p1: Revision,
                 p2: Revision,
                 flags: u16,
                 hash: Node,
             }
             thread_local! {
               // seems fine to [unwrap] here: this can only fail due to memory allocation
               // failing, and it's normal for that to cause panic.
               static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
                   RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
             }
             fn zstd_decompress_to_buffer(
                 bytes: &[u8],
                 buf: &mut Vec<u8>,
             ) -> Result<usize, std::io::Error> {
                 ZSTD_DECODER
                     .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
             }
             impl<'revlog> RevlogEntry<'revlog> {
                 pub fn revision(&self) -> Revision {
                     self.rev
                 }
                 pub fn node(&self) -> &Node {
                     &self.hash
                 }
                 pub fn uncompressed_len(&self) -> Option<u32> {
                     u32::try_from(self.uncompressed_len).ok()
                 }
                 pub fn has_p1(&self) -> bool {
                     self.p1 != NULL_REVISION
                 }
                 pub fn p1_entry(
                     &self,
                 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                     if self.p1 == NULL_REVISION {
                         Ok(None)
                     } else {
                         Ok(Some(self.revlog.get_entry(self.p1)?))
                     }
                 }
                 pub fn p2_entry(
                     &self,
                 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                     if self.p2 == NULL_REVISION {
                         Ok(None)
                     } else {
                         Ok(Some(self.revlog.get_entry(self.p2)?))
                     }
                 }
                 pub fn p1(&self) -> Option<Revision> {
                     if self.p1 == NULL_REVISION {
                         None
                     } else {
                         Some(self.p1)
                     }
                 }
                 pub fn p2(&self) -> Option<Revision> {
                     if self.p2 == NULL_REVISION {
                         None
                     } else {
                         Some(self.p2)
                     }
                 }
                 pub fn is_censored(&self) -> bool {
                     (self.flags & REVISION_FLAG_CENSORED) != 0
                 }
                 pub fn has_length_affecting_flag_processor(&self) -> bool {
                     // Relevant Python code: revlog.size()
                     // note: ELLIPSIS is known to not change the content
                     (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
                 }
                 /// The data for this entry, after resolving deltas if any.
                 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                     let mut entry = self.clone();
                     let mut delta_chain = vec![];
                     // The meaning of `base_rev_or_base_of_delta_chain` depends on
                     // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
                     // `mercurial/revlogutils/constants.py` and the code in
                     // [_chaininfo] and in [index_deltachain].
                     let uses_generaldelta = self.revlog.index.uses_generaldelta();
                     while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
                         let base_rev = if uses_generaldelta {
                             base_rev
                         } else {
                             entry.rev - 1
                         };
                         delta_chain.push(entry);
                         entry = self.revlog.get_entry_internal(base_rev)?;
                     }
                     let data = if delta_chain.is_empty() {
                         entry.data_chunk()?
                     } else {
                         Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
                     };
                     Ok(data)
                 }
                 fn check_data(
                     &self,
                     data: Cow<'revlog, [u8]>,
                 ) -> Result<Cow<'revlog, [u8]>, HgError> {
                     if self.revlog.check_hash(
                         self.p1,
                         self.p2,
                         self.hash.as_bytes(),
                         &data,
                     ) {
                         Ok(data)
                     } else {
                         if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
                             return Err(HgError::unsupported(
                                 "ellipsis revisions are not supported by rhg",
                             ));
                         }
                         Err(corrupted(format!(
                             "hash check failed for revision {}",
                             self.rev
                         )))
                     }
                 }
                 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                     let data = self.rawdata()?;
                     if self.rev == NULL_REVISION {
                         return Ok(data);
                     }
                     if self.is_censored() {
                         return Err(HgError::CensoredNodeError);
                     }
                     self.check_data(data)
                 }
                 /// Extract the data contained in the entry.
                 /// This may be a delta. (See `is_delta`.)
                 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                     if self.bytes.is_empty() {
                         return Ok(Cow::Borrowed(&[]));
                     }
                     match self.bytes[0] {
                         // Revision data is the entirety of the entry, including this
                         // header.
                         b'\0' => Ok(Cow::Borrowed(self.bytes)),
                         // Raw revision data follows.
                         b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                         // zlib (RFC 1950) data.
                         b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                         // zstd data.
                         b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                         // A proper new format should have had a repo/store requirement.
                         format_type => Err(corrupted(format!(
                             "unknown compression header '{}'",
                             format_type
                         ))),
                     }
                 }
                 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
                     let mut decoder = ZlibDecoder::new(self.bytes);
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len as usize);
                         decoder
                             .read_to_end(&mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         Ok(buf)
                     } else {
                         let cap = self.uncompressed_len.max(0) as usize;
                         let mut buf = vec![0; cap];
                         decoder
                             .read_exact(&mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         Ok(buf)
                     }
                 }
                 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
                     let cap = self.uncompressed_len.max(0) as usize;
                     if self.is_delta() {
                         // [cap] is usually an over-estimate of the space needed because
                         // it's the length of delta-decoded data, but we're interested
                         // in the size of the delta.
                         // This means we have to [shrink_to_fit] to avoid holding on
                         // to a large chunk of memory, but it also means we must have a
                         // fallback branch, for the case when the delta is longer than
                         // the original data (surprisingly, this does happen in practice)
                         let mut buf = Vec::with_capacity(cap);
                         match zstd_decompress_to_buffer(self.bytes, &mut buf) {
                             Ok(_) => buf.shrink_to_fit(),
                             Err(_) => {
                                 buf.clear();
                                 zstd::stream::copy_decode(self.bytes, &mut buf)
                                     .map_err(|e| corrupted(e.to_string()))?;
                             }
                         };
                         Ok(buf)
                     } else {
                         let mut buf = Vec::with_capacity(cap);
                         let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         if len != self.uncompressed_len as usize {
                             Err(corrupted("uncompressed length does not match"))
                         } else {
                             Ok(buf)
                         }
                     }
                 }
                 /// Tell if the entry is a snapshot or a delta
                 /// (influences on decompression).
                 fn is_delta(&self) -> bool {
                     self.base_rev_or_base_of_delta_chain.is_some()
                 }
             }
             /// Calculate the hash of a revision given its data and its parents.
             fn hash(
                 data: &[u8],
                 p1_hash: &[u8],
                 p2_hash: &[u8],
             ) -> [u8; NODE_BYTES_LENGTH] {
                 let mut hasher = Sha1::new();
                 let (a, b) = (p1_hash, p2_hash);
                 if a > b {
                     hasher.update(b);
                     hasher.update(a);
                 } else {
                     hasher.update(a);
                     hasher.update(b);
                 }
                 hasher.update(data);
                 *hasher.finalize().as_ref()
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
                 use itertools::Itertools;
                 #[test]
                 fn test_empty() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                     assert!(revlog.is_empty());
                     assert_eq!(revlog.len(), 0);
                     assert!(revlog.get_entry(0).is_err());
                     assert!(!revlog.has_rev(0));
                     assert_eq!(
                         revlog.rev_from_node(NULL_NODE.into()).unwrap(),
                         NULL_REVISION
                     );
                     let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
                     assert_eq!(null_entry.revision(), NULL_REVISION);
                     assert!(null_entry.data().unwrap().is_empty());
                 }
                 #[test]
                 fn test_inline() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
                         .unwrap();
                     let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                         .unwrap();
                     let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
                         .unwrap();
                     let entry0_bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .with_inline(true)
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node0)
                         .build();
                     let entry1_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node1)
                         .build();
                     let entry2_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_p1(0)
                         .with_p2(1)
                         .with_node(node2)
                         .build();
                     let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
                         .into_iter()
                         .flatten()
                         .collect_vec();
                     std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                     let entry0 = revlog.get_entry(0).ok().unwrap();
                     assert_eq!(entry0.revision(), 0);
                     assert_eq!(*entry0.node(), node0);
                     assert!(!entry0.has_p1());
                     assert_eq!(entry0.p1(), None);
                     assert_eq!(entry0.p2(), None);
                     let p1_entry = entry0.p1_entry().unwrap();
                     assert!(p1_entry.is_none());
                     let p2_entry = entry0.p2_entry().unwrap();
                     assert!(p2_entry.is_none());
                     let entry1 = revlog.get_entry(1).ok().unwrap();
                     assert_eq!(entry1.revision(), 1);
                     assert_eq!(*entry1.node(), node1);
                     assert!(!entry1.has_p1());
                     assert_eq!(entry1.p1(), None);
                     assert_eq!(entry1.p2(), None);
                     let p1_entry = entry1.p1_entry().unwrap();
                     assert!(p1_entry.is_none());
                     let p2_entry = entry1.p2_entry().unwrap();
                     assert!(p2_entry.is_none());
                     let entry2 = revlog.get_entry(2).ok().unwrap();
                     assert_eq!(entry2.revision(), 2);
                     assert_eq!(*entry2.node(), node2);
                     assert!(entry2.has_p1());
                     assert_eq!(entry2.p1(), Some(0));
                     assert_eq!(entry2.p2(), Some(1));
                     let p1_entry = entry2.p1_entry().unwrap();
                     assert!(p1_entry.is_some());
                     assert_eq!(p1_entry.unwrap().revision(), 0);
                     let p2_entry = entry2.p2_entry().unwrap();
                     assert!(p2_entry.is_some());
                     assert_eq!(p2_entry.unwrap().revision(), 1);
                 }
                 #[test]
                 fn test_nodemap() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     // building a revlog with a forced Node starting with zeros
                     // This is a corruption, but it does not preclude using the nodemap
                     // if we don't try and access the data
                     let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
                         .unwrap();
                     let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                         .unwrap();
                     let entry0_bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .with_inline(true)
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node0)
                         .build();
                     let entry1_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node1)
                         .build();
                     let contents = vec![entry0_bytes, entry1_bytes]
                         .into_iter()
                         .flatten()
                         .collect_vec();
                     std::fs::write(temp.path().join("foo.i"), contents).unwrap();
-                    let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
+                    let mut idx = nodemap::tests::TestNtIndex::new();
+                    idx.insert_node(0, node0).unwrap();
+                    idx.insert_node(1, node1).unwrap();
+                    let revlog =
+                        Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
                     // accessing the data shows the corruption
                     revlog.get_entry(0).unwrap().data().unwrap_err();
                     assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
                     assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
                     assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
                     assert_eq!(
                         revlog
                             .rev_from_node(NodePrefix::from_hex("000").unwrap())
                             .unwrap(),
                         -1
                     );
                     assert_eq!(
                         revlog
                             .rev_from_node(NodePrefix::from_hex("b00").unwrap())
                             .unwrap(),
                     );
                     // RevlogError does not implement PartialEq
                     // (ultimately because io::Error does not)
                     match revlog
                         .rev_from_node(NodePrefix::from_hex("00").unwrap())
                         .expect_err("Expected to give AmbiguousPrefix error")
                     {
                         RevlogError::AmbiguousPrefix => (),
                         e => {
                             panic!("Got another error than AmbiguousPrefix: {:?}", e);
                         }
                     };
                 }
             }

rust/hg-core/src/revlog/nodemap.rs

0 +17 -10

             // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Indexing facilities for fast retrieval of `Revision` from `Node`
             //!
             //! This provides a variation on the 16-ary radix tree that is
             //! provided as "nodetree" in revlog.c, ready for append-only persistence
             //! on disk.
             //!
             //! Following existing implicit conventions, the "nodemap" terminology
             //! is used in a more abstract context.
             use super::{
                 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
             };
             use bytes_cast::{unaligned, BytesCast};
             use std::cmp::max;
             use std::fmt;
             use std::mem::{self, align_of, size_of};
             use std::ops::Deref;
             use std::ops::Index;
             #[derive(Debug, PartialEq)]
             pub enum NodeMapError {
                 /// A `NodePrefix` matches several [`Revision`]s.
                 ///
                 /// This can be returned by methods meant for (at most) one match.
                 MultipleResults,
                 /// A `Revision` stored in the nodemap could not be found in the index
                 RevisionNotInIndex(Revision),
             }
             /// Mapping system from Mercurial nodes to revision numbers.
             ///
             /// ## `RevlogIndex` and `NodeMap`
             ///
             /// One way to think about their relationship is that
             /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
             /// information carried by a [`RevlogIndex`].
             ///
             /// Many of the methods in this trait take a `RevlogIndex` argument
             /// which is used for validation of their results. This index must naturally
             /// be the one the `NodeMap` is about, and it must be consistent.
             ///
             /// Notably, the `NodeMap` must not store
             /// information about more `Revision` values than there are in the index.
             /// In these methods, an encountered `Revision` is not in the index, a
             /// [RevisionNotInIndex](NodeMapError) error is returned.
             ///
             /// In insert operations, the rule is thus that the `NodeMap` must always
             /// be updated after the `RevlogIndex` it is about.
             pub trait NodeMap {
                 /// Find the unique `Revision` having the given `Node`
                 ///
                 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
                 fn find_node(
                     &self,
                     index: &impl RevlogIndex,
                     node: &Node,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     self.find_bin(index, node.into())
                 }
                 /// Find the unique Revision whose `Node` starts with a given binary prefix
                 ///
                 /// If no Revision matches the given prefix, `Ok(None)` is returned.
                 ///
                 /// If several Revisions match the given prefix, a
                 /// [MultipleResults](NodeMapError)  error is returned.
                 fn find_bin(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<Revision>, NodeMapError>;
                 /// Give the size of the shortest node prefix that determines
                 /// the revision uniquely.
                 ///
                 /// From a binary node prefix, if it is matched in the node map, this
                 /// returns the number of hexadecimal digits that would had sufficed
                 /// to find the revision uniquely.
                 ///
                 /// Returns `None` if no [`Revision`] could be found for the prefix.
                 ///
                 /// If several Revisions match the given prefix, a
                 /// [MultipleResults](NodeMapError)  error is returned.
                 fn unique_prefix_len_bin(
                     &self,
                     idx: &impl RevlogIndex,
                     node_prefix: NodePrefix,
                 ) -> Result<Option<usize>, NodeMapError>;
                 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
                 /// a full [`Node`] as input
                 fn unique_prefix_len_node(
                     &self,
                     idx: &impl RevlogIndex,
                     node: &Node,
                 ) -> Result<Option<usize>, NodeMapError> {
                     self.unique_prefix_len_bin(idx, node.into())
                 }
             }
             pub trait MutableNodeMap: NodeMap {
                 fn insert<I: RevlogIndex>(
                     &mut self,
                     index: &I,
                     node: &Node,
                     rev: Revision,
                 ) -> Result<(), NodeMapError>;
             }
             /// Low level NodeTree [`Block`] elements
             ///
             /// These are exactly as for instance on persistent storage.
             type RawElement = unaligned::I32Be;
             /// High level representation of values in NodeTree
             /// [`Blocks`](struct.Block.html)
             ///
             /// This is the high level representation that most algorithms should
             /// use.
             #[derive(Clone, Debug, Eq, PartialEq)]
             enum Element {
                 Rev(Revision),
                 Block(usize),
                 None,
             }
             impl From<RawElement> for Element {
                 /// Conversion from low level representation, after endianness conversion.
                 ///
                 /// See [`Block`](struct.Block.html) for explanation about the encoding.
                 fn from(raw: RawElement) -> Element {
                     let int = raw.get();
                     if int >= 0 {
                         Element::Block(int as usize)
                     } else if int == -1 {
                         Element::None
                     } else {
                         Element::Rev(-int - 2)
                     }
                 }
             }
             impl From<Element> for RawElement {
                 fn from(element: Element) -> RawElement {
                     RawElement::from(match element {
                         Element::None => 0,
                         Element::Block(i) => i as i32,
                         Element::Rev(rev) => -rev - 2,
                     })
                 }
             }
             const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
             /// A logical block of the [`NodeTree`], packed with a fixed size.
             ///
             /// These are always used in container types implementing `Index<Block>`,
             /// such as `&Block`
             ///
             /// As an array of integers, its ith element encodes that the
             /// ith potential edge from the block, representing the ith hexadecimal digit
             /// (nybble) `i` is either:
             ///
             /// - absent (value -1)
             /// - another `Block` in the same indexable container (value ≥ 0)
             ///  - a [`Revision`] leaf (value ≤ -2)
             ///
             /// Endianness has to be fixed for consistency on shared storage across
             /// different architectures.
             ///
             /// A key difference with the C `nodetree` is that we need to be
             /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
             /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
             ///
             /// Another related difference is that `NULL_REVISION` (-1) is not
             /// represented at all, because we want an immutable empty nodetree
             /// to be valid.
             #[derive(Copy, Clone, BytesCast, PartialEq)]
             #[repr(transparent)]
             pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
             impl Block {
                 fn new() -> Self {
                     let absent_node = RawElement::from(-1);
                     Block([absent_node; ELEMENTS_PER_BLOCK])
                 }
                 fn get(&self, nybble: u8) -> Element {
                     self.0[nybble as usize].into()
                 }
                 fn set(&mut self, nybble: u8, element: Element) {
                     self.0[nybble as usize] = element.into()
                 }
             }
             impl fmt::Debug for Block {
                 /// sparse representation for testing and debugging purposes
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     f.debug_map()
                         .entries((0..16).filter_map(|i| match self.get(i) {
                             Element::None => None,
                             element => Some((i, element)),
                         }))
                         .finish()
                 }
             }
             /// A mutable 16-radix tree with the root block logically at the end
             ///
             /// Because of the append only nature of our node trees, we need to
             /// keep the original untouched and store new blocks separately.
             ///
             /// The mutable root [`Block`] is kept apart so that we don't have to rebump
             /// it on each insertion.
             pub struct NodeTree {
                 readonly: Box<dyn Deref<Target = [Block]> + Send>,
                 growable: Vec<Block>,
                 root: Block,
                 masked_inner_blocks: usize,
             }
             impl Index<usize> for NodeTree {
                 type Output = Block;
                 fn index(&self, i: usize) -> &Block {
                     let ro_len = self.readonly.len();
                     if i < ro_len {
                         &self.readonly[i]
                     } else if i == ro_len + self.growable.len() {
                         &self.root
                     } else {
                         &self.growable[i - ro_len]
                     }
                 }
             }
             /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
             fn has_prefix_or_none(
                 idx: &impl RevlogIndex,
                 prefix: NodePrefix,
                 rev: Revision,
             ) -> Result<Option<Revision>, NodeMapError> {
                 idx.node(rev)
                     .ok_or(NodeMapError::RevisionNotInIndex(rev))
                     .map(|node| {
                         if prefix.is_prefix_of(node) {
                             Some(rev)
                         } else {
                             None
                         }
                     })
             }
             /// validate that the candidate's node starts indeed with given prefix,
             /// and treat ambiguities related to [`NULL_REVISION`].
             ///
             /// From the data in the NodeTree, one can only conclude that some
             /// revision is the only one for a *subprefix* of the one being looked up.
             fn validate_candidate(
                 idx: &impl RevlogIndex,
                 prefix: NodePrefix,
                 candidate: (Option<Revision>, usize),
             ) -> Result<(Option<Revision>, usize), NodeMapError> {
                 let (rev, steps) = candidate;
                 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
                     rev.map_or(Ok((None, steps)), |r| {
                         has_prefix_or_none(idx, prefix, r)
                             .map(|opt| (opt, max(steps, nz_nybble + 1)))
                     })
                 } else {
                     // the prefix is only made of zeros; NULL_REVISION always matches it
                     // and any other *valid* result is an ambiguity
                     match rev {
                         None => Ok((Some(NULL_REVISION), steps + 1)),
                         Some(r) => match has_prefix_or_none(idx, prefix, r)? {
                             None => Ok((Some(NULL_REVISION), steps + 1)),
                             _ => Err(NodeMapError::MultipleResults),
                         },
                     }
                 }
             }
             impl NodeTree {
                 /// Initiate a NodeTree from an immutable slice-like of `Block`
                 ///
                 /// We keep `readonly` and clone its root block if it isn't empty.
                 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
                     let root = readonly.last().cloned().unwrap_or_else(Block::new);
                     NodeTree {
                         readonly,
                         growable: Vec::new(),
                         root,
                         masked_inner_blocks: 0,
                     }
                 }
                 /// Create from an opaque bunch of bytes
                 ///
                 /// The created [`NodeTreeBytes`] from `bytes`,
                 /// of which exactly `amount` bytes are used.
                 ///
                 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
                 /// - `amount` is expressed in bytes, and is not automatically derived from
                 ///   `bytes`, so that a caller that manages them atomically can perform
                 ///   temporary disk serializations and still rollback easily if needed.
                 ///   First use-case for this would be to support Mercurial shell hooks.
                 ///
                 /// panics if `buffer` is smaller than `amount`
                 pub fn load_bytes(
                     bytes: Box<dyn Deref<Target = [u8]> + Send>,
                     amount: usize,
                 ) -> Self {
                     NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
                 }
                 /// Retrieve added [`Block`]s and the original immutable data
                 pub fn into_readonly_and_added(
                     self,
                 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
                     let mut vec = self.growable;
                     let readonly = self.readonly;
                     if readonly.last() != Some(&self.root) {
                         vec.push(self.root);
                     }
                     (readonly, vec)
                 }
                 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
                 /// storage
                 pub fn into_readonly_and_added_bytes(
                     self,
                 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
                     let (readonly, vec) = self.into_readonly_and_added();
                     // Prevent running `v`'s destructor so we are in complete control
                     // of the allocation.
                     let vec = mem::ManuallyDrop::new(vec);
                     // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
                     // bytes, so this is perfectly safe.
                     let bytes = unsafe {
                         // Check for compatible allocation layout.
                         // (Optimized away by constant-folding + dead code elimination.)
                         assert_eq!(size_of::<Block>(), 64);
                         assert_eq!(align_of::<Block>(), 1);
                         // /!\ Any use of `vec` after this is use-after-free.
                         // TODO: use `into_raw_parts` once stabilized
                         Vec::from_raw_parts(
                             vec.as_ptr() as *mut u8,
                             vec.len() * size_of::<Block>(),
                             vec.capacity() * size_of::<Block>(),
                         )
                     };
                     (readonly, bytes)
                 }
                 /// Total number of blocks
                 fn len(&self) -> usize {
                     self.readonly.len() + self.growable.len() + 1
                 }
                 /// Implemented for completeness
                 ///
                 /// A `NodeTree` always has at least the mutable root block.
                 #[allow(dead_code)]
                 fn is_empty(&self) -> bool {
                     false
                 }
                 /// Main working method for `NodeTree` searches
                 ///
                 /// The first returned value is the result of analysing `NodeTree` data
                 /// *alone*: whereas `None` guarantees that the given prefix is absent
                 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
                 /// `Some(rev)`, it is to be understood that `rev` is the unique
                 /// [`Revision`] that could match the prefix. Actually, all that can
                 /// be inferred from
                 /// the `NodeTree` data is that `rev` is the revision with the longest
                 /// common node prefix with the given prefix.
                 ///
                 /// The second returned value is the size of the smallest subprefix
                 /// of `prefix` that would give the same result, i.e. not the
                 /// [MultipleResults](NodeMapError) error variant (again, using only the
                 /// data of the [`NodeTree`]).
                 fn lookup(
                     &self,
                     prefix: NodePrefix,
                 ) -> Result<(Option<Revision>, usize), NodeMapError> {
                     for (i, visit_item) in self.visit(prefix).enumerate() {
                         if let Some(opt) = visit_item.final_revision() {
                             return Ok((opt, i + 1));
                         }
                     }
                     Err(NodeMapError::MultipleResults)
                 }
                 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
                     NodeTreeVisitor {
                         nt: self,
                         prefix,
                         visit: self.len() - 1,
                         nybble_idx: 0,
                         done: false,
                     }
                 }
                 /// Return a mutable reference for `Block` at index `idx`.
                 ///
                 /// If `idx` lies in the immutable area, then the reference is to
                 /// a newly appended copy.
                 ///
                 /// Returns (new_idx, glen, mut_ref) where
                 ///
                 /// - `new_idx` is the index of the mutable `Block`
                 /// - `mut_ref` is a mutable reference to the mutable Block.
                 /// - `glen` is the new length of `self.growable`
                 ///
                 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
                 /// itself because of the mutable borrow taken with the returned `Block`
                 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
                     let ro_blocks = &self.readonly;
                     let ro_len = ro_blocks.len();
                     let glen = self.growable.len();
                     if idx < ro_len {
                         self.masked_inner_blocks += 1;
                         self.growable.push(ro_blocks[idx]);
                         (glen + ro_len, &mut self.growable[glen], glen + 1)
                     } else if glen + ro_len == idx {
                         (idx, &mut self.root, glen)
                     } else {
                         (idx, &mut self.growable[idx - ro_len], glen)
                     }
                 }
                 /// Main insertion method
                 ///
                 /// This will dive in the node tree to find the deepest `Block` for
                 /// `node`, split it as much as needed and record `node` in there.
                 /// The method then backtracks, updating references in all the visited
                 /// blocks from the root.
                 ///
                 /// All the mutated `Block` are copied first to the growable part if
                 /// needed. That happens for those in the immutable part except the root.
                 pub fn insert<I: RevlogIndex>(
                     &mut self,
                     index: &I,
                     node: &Node,
                     rev: Revision,
                 ) -> Result<(), NodeMapError> {
                     let ro_len = &self.readonly.len();
                     let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
                     let read_nybbles = visit_steps.len();
                     // visit_steps cannot be empty, since we always visit the root block
                     let deepest = visit_steps.pop().unwrap();
                     let (mut block_idx, mut block, mut glen) =
                         self.mutable_block(deepest.block_idx);
                     if let Element::Rev(old_rev) = deepest.element {
                         let old_node = index
                             .node(old_rev)
                             .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
                         if old_node == node {
                             return Ok(()); // avoid creating lots of useless blocks
                         }
                         // Looping over the tail of nybbles in both nodes, creating
                         // new blocks until we find the difference
                         let mut new_block_idx = ro_len + glen;
                         let mut nybble = deepest.nybble;
                         for nybble_pos in read_nybbles..node.nybbles_len() {
                             block.set(nybble, Element::Block(new_block_idx));
                             let new_nybble = node.get_nybble(nybble_pos);
                             let old_nybble = old_node.get_nybble(nybble_pos);
                             if old_nybble == new_nybble {
                                 self.growable.push(Block::new());
                                 block = &mut self.growable[glen];
                                 glen += 1;
                                 new_block_idx += 1;
                                 nybble = new_nybble;
                             } else {
                                 let mut new_block = Block::new();
                                 new_block.set(old_nybble, Element::Rev(old_rev));
                                 new_block.set(new_nybble, Element::Rev(rev));
                                 self.growable.push(new_block);
                                 break;
                             }
                         }
                     } else {
                         // Free slot in the deepest block: no splitting has to be done
                         block.set(deepest.nybble, Element::Rev(rev));
                     }
                     // Backtrack over visit steps to update references
                     while let Some(visited) = visit_steps.pop() {
                         let to_write = Element::Block(block_idx);
                         if visit_steps.is_empty() {
                             self.root.set(visited.nybble, to_write);
                             break;
                         }
                         let (new_idx, block, _) = self.mutable_block(visited.block_idx);
                         if block.get(visited.nybble) == to_write {
                             break;
                         }
                         block.set(visited.nybble, to_write);
                         block_idx = new_idx;
                     }
                     Ok(())
                 }
                 /// Make the whole `NodeTree` logically empty, without touching the
                 /// immutable part.
                 pub fn invalidate_all(&mut self) {
                     self.root = Block::new();
                     self.growable = Vec::new();
                     self.masked_inner_blocks = self.readonly.len();
                 }
                 /// Return the number of blocks in the readonly part that are currently
                 /// masked in the mutable part.
                 ///
                 /// The `NodeTree` structure has no efficient way to know how many blocks
                 /// are already unreachable in the readonly part.
                 ///
                 /// After a call to `invalidate_all()`, the returned number can be actually
                 /// bigger than the whole readonly part, a conventional way to mean that
                 /// all the readonly blocks have been masked. This is what is really
                 /// useful to the caller and does not require to know how many were
                 /// actually unreachable to begin with.
                 pub fn masked_readonly_blocks(&self) -> usize {
                     if let Some(readonly_root) = self.readonly.last() {
                         if readonly_root == &self.root {
                             return 0;
                         }
                     } else {
                         return 0;
                     }
                     self.masked_inner_blocks + 1
                 }
             }
             pub struct NodeTreeBytes {
                 buffer: Box<dyn Deref<Target = [u8]> + Send>,
                 len_in_blocks: usize,
             }
             impl NodeTreeBytes {
                 fn new(
                     buffer: Box<dyn Deref<Target = [u8]> + Send>,
                     amount: usize,
                 ) -> Self {
                     assert!(buffer.len() >= amount);
                     let len_in_blocks = amount / size_of::<Block>();
                     NodeTreeBytes {
                         buffer,
                         len_in_blocks,
                     }
                 }
             }
             impl Deref for NodeTreeBytes {
                 type Target = [Block];
                 fn deref(&self) -> &[Block] {
                     Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
                         // `NodeTreeBytes::new` already asserted that `self.buffer` is
                         // large enough.
                         .unwrap()
                         .0
                 }
             }
             struct NodeTreeVisitor<'n> {
                 nt: &'n NodeTree,
                 prefix: NodePrefix,
                 visit: usize,
                 nybble_idx: usize,
                 done: bool,
             }
             #[derive(Debug, PartialEq, Clone)]
             struct NodeTreeVisitItem {
                 block_idx: usize,
                 nybble: u8,
                 element: Element,
             }
             impl<'n> Iterator for NodeTreeVisitor<'n> {
                 type Item = NodeTreeVisitItem;
                 fn next(&mut self) -> Option<Self::Item> {
                     if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
                         return None;
                     }
                     let nybble = self.prefix.get_nybble(self.nybble_idx);
                     self.nybble_idx += 1;
                     let visit = self.visit;
                     let element = self.nt[visit].get(nybble);
                     if let Element::Block(idx) = element {
                         self.visit = idx;
                     } else {
                         self.done = true;
                     }
                     Some(NodeTreeVisitItem {
                         block_idx: visit,
                         nybble,
                         element,
                     })
                 }
             }
             impl NodeTreeVisitItem {
                 // Return `Some(opt)` if this item is final, with `opt` being the
                 // `Revision` that it may represent.
                 //
                 // If the item is not terminal, return `None`
                 fn final_revision(&self) -> Option<Option<Revision>> {
                     match self.element {
                         Element::Block(_) => None,
                         Element::Rev(r) => Some(Some(r)),
                         Element::None => Some(None),
                     }
                 }
             }
             impl From<Vec<Block>> for NodeTree {
                 fn from(vec: Vec<Block>) -> Self {
                     Self::new(Box::new(vec))
                 }
             }
             impl fmt::Debug for NodeTree {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     let readonly: &[Block] = &*self.readonly;
                     write!(
                         f,
                         "readonly: {:?}, growable: {:?}, root: {:?}",
                         readonly, self.growable, self.root
                     )
                 }
             }
             impl Default for NodeTree {
                 /// Create a fully mutable empty NodeTree
                 fn default() -> Self {
                     NodeTree::new(Box::new(Vec::new()))
                 }
             }
             impl NodeMap for NodeTree {
                 fn find_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     validate_candidate(idx, prefix, self.lookup(prefix)?)
                         .map(|(opt, _shortest)| opt)
                 }
                 fn unique_prefix_len_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<usize>, NodeMapError> {
                     validate_candidate(idx, prefix, self.lookup(prefix)?)
                         .map(|(opt, shortest)| opt.map(|_rev| shortest))
                 }
             }
             #[cfg(test)]
-            mod tests {
+            pub mod tests {
                 use super::NodeMapError::*;
                 use super::*;
                 use crate::revlog::node::{hex_pad_right, Node};
                 use std::collections::HashMap;
                 /// Creates a `Block` using a syntax close to the `Debug` output
                 macro_rules! block {
                     {$($nybble:tt : $variant:ident($val:tt)),*} => (
                         {
                             let mut block = Block::new();
                             $(block.set($nybble, Element::$variant($val)));*;
                             block
                         }
                     )
                 }
                 #[test]
                 fn test_block_debug() {
                     let mut block = Block::new();
                     block.set(1, Element::Rev(3));
                     block.set(10, Element::Block(0));
                     assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
                 }
                 #[test]
                 fn test_block_macro() {
                     let block = block! {5: Block(2)};
                     assert_eq!(format!("{:?}", block), "{5: Block(2)}");
                     let block = block! {13: Rev(15), 5: Block(2)};
                     assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
                 }
                 #[test]
                 fn test_raw_block() {
                     let mut raw = [255u8; 64];
                     let mut counter = 0;
                     for val in [0_i32, 15, -2, -1, -3].iter() {
                         for byte in val.to_be_bytes().iter() {
                             raw[counter] = *byte;
                             counter += 1;
                         }
                     }
                     let (block, _) = Block::from_bytes(&raw).unwrap();
                     assert_eq!(block.get(0), Element::Block(0));
                     assert_eq!(block.get(1), Element::Block(15));
                     assert_eq!(block.get(3), Element::None);
                     assert_eq!(block.get(2), Element::Rev(0));
                     assert_eq!(block.get(4), Element::Rev(1));
                 }
                 type TestIndex = HashMap<Revision, Node>;
                 impl RevlogIndex for TestIndex {
                     fn node(&self, rev: Revision) -> Option<&Node> {
                         self.get(&rev)
                     }
                     fn len(&self) -> usize {
                         self.len()
                     }
                 }
                 /// Pad hexadecimal Node prefix with zeros on the right
                 ///
                 /// This avoids having to repeatedly write very long hexadecimal
                 /// strings for test data, and brings actual hash size independency.
                 #[cfg(test)]
                 fn pad_node(hex: &str) -> Node {
                     Node::from_hex(&hex_pad_right(hex)).unwrap()
                 }
                 /// Pad hexadecimal Node prefix with zeros on the right, then insert
                 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
                     idx.insert(rev, pad_node(hex));
                 }
                 fn sample_nodetree() -> NodeTree {
                     NodeTree::from(vec![
                         block![0: Rev(9)],
                         block![0: Rev(0), 1: Rev(9)],
                         block![0: Block(1), 1:Rev(1)],
                     ])
                 }
                 fn hex(s: &str) -> NodePrefix {
                     NodePrefix::from_hex(s).unwrap()
                 }
                 #[test]
                 fn test_nt_debug() {
                     let nt = sample_nodetree();
                     assert_eq!(
                         format!("{:?}", nt),
                         "readonly: \
                          [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
                          growable: [], \
                          root: {0: Block(1), 1: Rev(1)}",
                     );
                 }
                 #[test]
                 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
                     let mut idx: TestIndex = HashMap::new();
                     pad_insert(&mut idx, 1, "1234deadcafe");
                     let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
                     assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
                     assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
                     // and with full binary Nodes
                     assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
                     let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
                     assert_eq!(nt.find_node(&idx, &unknown)?, None);
                     Ok(())
                 }
                 #[test]
                 fn test_immutable_find_one_jump() {
                     let mut idx = TestIndex::new();
                     pad_insert(&mut idx, 9, "012");
                     pad_insert(&mut idx, 0, "00a");
                     let nt = sample_nodetree();
                     assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
                     assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
                     assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
                     assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
                     assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
                     assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
                 }
                 #[test]
                 fn test_mutated_find() -> Result<(), NodeMapError> {
                     let mut idx = TestIndex::new();
                     pad_insert(&mut idx, 9, "012");
                     pad_insert(&mut idx, 0, "00a");
                     pad_insert(&mut idx, 2, "cafe");
                     pad_insert(&mut idx, 3, "15");
                     pad_insert(&mut idx, 1, "10");
                     let nt = NodeTree {
                         readonly: sample_nodetree().readonly,
                         growable: vec![block![0: Rev(1), 5: Rev(3)]],
                         root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
                         masked_inner_blocks: 1,
                     };
                     assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
                     assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
                     assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
                     assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
                     assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
                     assert_eq!(nt.masked_readonly_blocks(), 2);
                     Ok(())
                 }
-                struct TestNtIndex {
+                pub struct TestNtIndex {
-                    index: TestIndex,
+                    pub index: TestIndex,
-                    nt: NodeTree,
+                    pub nt: NodeTree,
                 }
                 impl TestNtIndex {
-                    fn new() -> Self {
+                    pub fn new() -> Self {
                         TestNtIndex {
                             index: HashMap::new(),
                             nt: NodeTree::default(),
                         }
                     }
-                    fn insert(
+                    pub fn insert_node(
+                        &mut self,
+                        rev: Revision,
+                        node: Node,
+                    ) -> Result<(), NodeMapError> {
+                        self.index.insert(rev, node);
+                        self.nt.insert(&self.index, &node, rev)?;
+                        Ok(())
+                    }
+                    pub fn insert(
                         &mut self,
                         rev: Revision,
                         hex: &str,
                     ) -> Result<(), NodeMapError> {
-                        let node = pad_node(hex);
+                        return self.insert_node(rev, pad_node(hex));
-                        self.index.insert(rev, node);
-                        self.nt.insert(&self.index, &node, rev)?;
-                        Ok(())
                     }
                     fn find_hex(
                         &self,
                         prefix: &str,
                     ) -> Result<Option<Revision>, NodeMapError> {
                         self.nt.find_bin(&self.index, hex(prefix))
                     }
                     fn unique_prefix_len_hex(
                         &self,
                         prefix: &str,
                     ) -> Result<Option<usize>, NodeMapError> {
                         self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
                     }
                     /// Drain `added` and restart a new one
                     fn commit(self) -> Self {
                         let mut as_vec: Vec<Block> =
                             self.nt.readonly.iter().copied().collect();
                         as_vec.extend(self.nt.growable);
                         as_vec.push(self.nt.root);
                         Self {
                             index: self.index,
                             nt: NodeTree::from(as_vec),
                         }
                     }
                 }
                 #[test]
                 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     assert_eq!(idx.find_hex("1")?, Some(0));
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     // let's trigger a simple split
                     idx.insert(1, "1a34")?;
                     assert_eq!(idx.nt.growable.len(), 1);
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a")?, Some(1));
                     // reinserting is a no_op
                     idx.insert(1, "1a34")?;
                     assert_eq!(idx.nt.growable.len(), 1);
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a")?, Some(1));
                     idx.insert(2, "1a01")?;
                     assert_eq!(idx.nt.growable.len(), 2);
                     assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a3")?, Some(1));
                     assert_eq!(idx.find_hex("1a0")?, Some(2));
                     assert_eq!(idx.find_hex("1a12")?, None);
                     // now let's make it split and create more than one additional block
                     idx.insert(3, "1a345")?;
                     assert_eq!(idx.nt.growable.len(), 4);
                     assert_eq!(idx.find_hex("1a340")?, Some(1));
                     assert_eq!(idx.find_hex("1a345")?, Some(3));
                     assert_eq!(idx.find_hex("1a341")?, None);
                     // there's no readonly block to mask
                     assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                     Ok(())
                 }
                 #[test]
                 fn test_unique_prefix_len_zero_prefix() {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "00000abcd").unwrap();
                     assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
                     // in the nodetree proper, this will be found at the first nybble
                     // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
                     // but the first difference with `NULL_NODE`
                     assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                     assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                     // same with odd result
                     idx.insert(1, "00123").unwrap();
                     assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
                     assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
                     // these are unchanged of course
                     assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                     assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                 }
                 #[test]
                 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
                     // check that the splitting loop is long enough
                     let mut nt_idx = TestNtIndex::new();
                     let nt = &mut nt_idx.nt;
                     let idx = &mut nt_idx.index;
                     let node0_hex = hex_pad_right("444444");
                     let mut node1_hex = hex_pad_right("444444");
                     node1_hex.pop();
                     node1_hex.push('5');
                     let node0 = Node::from_hex(&node0_hex).unwrap();
                     let node1 = Node::from_hex(&node1_hex).unwrap();
                     idx.insert(0, node0);
                     nt.insert(idx, &node0, 0)?;
                     idx.insert(1, node1);
                     nt.insert(idx, &node1, 1)?;
                     assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
                     assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
                     Ok(())
                 }
                 #[test]
                 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     idx.insert(1, "1235")?;
                     idx.insert(2, "131")?;
                     idx.insert(3, "cafe")?;
                     let mut idx = idx.commit();
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     assert_eq!(idx.find_hex("1235")?, Some(1));
                     assert_eq!(idx.find_hex("131")?, Some(2));
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     // we did not add anything since init from readonly
                     assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                     idx.insert(4, "123A")?;
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     assert_eq!(idx.find_hex("1235")?, Some(1));
                     assert_eq!(idx.find_hex("131")?, Some(2));
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     assert_eq!(idx.find_hex("123A")?, Some(4));
                     // we masked blocks for all prefixes of "123", including the root
                     assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                     eprintln!("{:?}", idx.nt);
                     idx.insert(5, "c0")?;
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     assert_eq!(idx.find_hex("c0")?, Some(5));
                     assert_eq!(idx.find_hex("c1")?, None);
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     // inserting "c0" is just splitting the 'c' slot of the mutable root,
                     // it doesn't mask anything
                     assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                     Ok(())
                 }
                 #[test]
                 fn test_invalidate_all() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     idx.insert(1, "1235")?;
                     idx.insert(2, "131")?;
                     idx.insert(3, "cafe")?;
                     let mut idx = idx.commit();
                     idx.nt.invalidate_all();
                     assert_eq!(idx.find_hex("1234")?, None);
                     assert_eq!(idx.find_hex("1235")?, None);
                     assert_eq!(idx.find_hex("131")?, None);
                     assert_eq!(idx.find_hex("cafe")?, None);
                     // all the readonly blocks have been masked, this is the
                     // conventional expected response
                     assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
                     Ok(())
                 }
                 #[test]
                 fn test_into_added_empty() {
                     assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
                     assert!(sample_nodetree()
                         .into_readonly_and_added_bytes()
                         .1
                         .is_empty());
                 }
                 #[test]
                 fn test_into_added_bytes() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     let mut idx = idx.commit();
                     idx.insert(4, "cafe")?;
                     let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
                     // only the root block has been changed
                     assert_eq!(bytes.len(), size_of::<Block>());
                     // big endian for -2
                     assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
                     // big endian for -6
                     assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
                     Ok(())
                 }
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages