upstream/mercurial-mirror Commit - r51879:eccf7dc7

revlog: make the rust test for node hex prefix resolution exercise the nodemap

Arseniy Alekseyev -

r51879:eccf7dc7 stable

parent child

rust/hg-core/src/revlog/mod.rs

0 +19 -1

              // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
              //           and Mercurial contributors
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Mercurial concepts for handling revision history
              pub mod node;
              pub mod nodemap;
              mod nodemap_docket;
              pub mod path_encode;
              pub use node::{FromHexError, Node, NodePrefix};
              pub mod changelog;
              pub mod filelog;
              pub mod index;
              pub mod manifest;
              pub mod patch;
              use std::borrow::Cow;
              use std::io::Read;
              use std::ops::Deref;
              use std::path::Path;
              use flate2::read::ZlibDecoder;
              use sha1::{Digest, Sha1};
              use std::cell::RefCell;
              use zstd;
              use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
              use self::nodemap_docket::NodeMapDocket;
              use super::index::Index;
              use super::nodemap::{NodeMap, NodeMapError};
              use crate::errors::HgError;
              use crate::vfs::Vfs;
              /// Mercurial revision numbers
              ///
              /// As noted in revlog.c, revision numbers are actually encoded in
              /// 4 bytes, and are liberally converted to ints, whence the i32
              pub type Revision = i32;
              /// Marker expressing the absence of a parent
              ///
              /// Independently of the actual representation, `NULL_REVISION` is guaranteed
              /// to be smaller than all existing revisions.
              pub const NULL_REVISION: Revision = -1;
              /// Same as `mercurial.node.wdirrev`
              ///
              /// This is also equal to `i32::max_value()`, but it's better to spell
              /// it out explicitely, same as in `mercurial.node`
              #[allow(clippy::unreadable_literal)]
              pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
              pub const WORKING_DIRECTORY_HEX: &str =
                  "ffffffffffffffffffffffffffffffffffffffff";
              /// The simplest expression of what we need of Mercurial DAGs.
              pub trait Graph {
                  /// Return the two parents of the given `Revision`.
                  ///
                  /// Each of the parents can be independently `NULL_REVISION`
                  fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
              }
              #[derive(Clone, Debug, PartialEq)]
              pub enum GraphError {
                  ParentOutOfRange(Revision),
                  WorkingDirectoryUnsupported,
              }
              /// The Mercurial Revlog Index
              ///
              /// This is currently limited to the minimal interface that is needed for
              /// the [`nodemap`](nodemap/index.html) module
              pub trait RevlogIndex {
                  /// Total number of Revisions referenced in this index
                  fn len(&self) -> usize;
                  fn is_empty(&self) -> bool {
                      self.len() == 0
                  }
                  /// Return a reference to the Node or `None` if rev is out of bounds
                  ///
                  /// `NULL_REVISION` is not considered to be out of bounds.
                  fn node(&self, rev: Revision) -> Option<&Node>;
              }
              const REVISION_FLAG_CENSORED: u16 = 1 << 15;
              const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
              const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
              const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
              // Keep this in sync with REVIDX_KNOWN_FLAGS in
              // mercurial/revlogutils/flagutil.py
              const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
                  | REVISION_FLAG_ELLIPSIS
                  | REVISION_FLAG_EXTSTORED
                  | REVISION_FLAG_HASCOPIESINFO;
              const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
              #[derive(Debug, derive_more::From)]
              pub enum RevlogError {
                  InvalidRevision,
                  /// Working directory is not supported
                  WDirUnsupported,
                  /// Found more than one entry whose ID match the requested prefix
                  AmbiguousPrefix,
                  #[from]
                  Other(HgError),
              }
              impl From<NodeMapError> for RevlogError {
                  fn from(error: NodeMapError) -> Self {
                      match error {
                          NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                          NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
                              format!("nodemap point to revision {} not in index", rev),
                          ),
                      }
                  }
              }
              fn corrupted<S: AsRef<str>>(context: S) -> HgError {
                  HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
              }
              impl RevlogError {
                  fn corrupted<S: AsRef<str>>(context: S) -> Self {
                      RevlogError::Other(corrupted(context))
                  }
              }
              /// Read only implementation of revlog.
              pub struct Revlog {
                  /// When index and data are not interleaved: bytes of the revlog index.
                  /// When index and data are interleaved: bytes of the revlog index and
                  /// data.
                  index: Index,
                  /// When index and data are not interleaved: bytes of the revlog data
                  data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                  /// When present on disk: the persistent nodemap for this revlog
                  nodemap: Option<nodemap::NodeTree>,
              }
              impl Revlog {
                  /// Open a revlog index file.
                  ///
                  /// It will also open the associated data file if index and data are not
                  /// interleaved.
                  pub fn open(
                      store_vfs: &Vfs,
                      index_path: impl AsRef<Path>,
                      data_path: Option<&Path>,
                      use_nodemap: bool,
                  ) -> Result<Self, HgError> {
+                     Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
+                 }
+                 fn open_gen(
+                     store_vfs: &Vfs,
+                     index_path: impl AsRef<Path>,
+                     data_path: Option<&Path>,
+                     use_nodemap: bool,
+                     nodemap_for_test: Option<nodemap::NodeTree>,
+                 ) -> Result<Self, HgError> {
                      let index_path = index_path.as_ref();
                      let index = {
                          match store_vfs.mmap_open_opt(&index_path)? {
                              None => Index::new(Box::new(vec![])),
                              Some(index_mmap) => {
                                  let index = Index::new(Box::new(index_mmap))?;
                                  Ok(index)
                              }
                          }
                      }?;
                      let default_data_path = index_path.with_extension("d");
                      // type annotation required
                      // won't recognize Mmap as Deref<Target = [u8]>
                      let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                          if index.is_inline() {
                              None
                          } else {
                              let data_path = data_path.unwrap_or(&default_data_path);
                              let data_mmap = store_vfs.mmap_open(data_path)?;
                              Some(Box::new(data_mmap))
                          };
                      let nodemap = if index.is_inline() || !use_nodemap {
                          None
                      } else {
                          NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
                              |(docket, data)| {
                                  nodemap::NodeTree::load_bytes(
                                      Box::new(data),
                                      docket.data_length,
                                  )
                              },
                          )
                      };
+                     let nodemap = nodemap_for_test.or(nodemap);
                      Ok(Revlog {
                          index,
                          data_bytes,
                          nodemap,
                      })
                  }
                  /// Return number of entries of the `Revlog`.
                  pub fn len(&self) -> usize {
                      self.index.len()
                  }
                  /// Returns `true` if the `Revlog` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.index.is_empty()
                  }
                  /// Returns the node ID for the given revision number, if it exists in this
                  /// revlog
                  pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                      if rev == NULL_REVISION {
                          return Some(&NULL_NODE);
                      }
                      Some(self.index.get_entry(rev)?.hash())
                  }
                  /// Return the revision number for the given node ID, if it exists in this
                  /// revlog
                  pub fn rev_from_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      if let Some(nodemap) = &self.nodemap {
                          nodemap
                              .find_bin(&self.index, node)?
                              .ok_or(RevlogError::InvalidRevision)
                      } else {
                          self.rev_from_node_no_persistent_nodemap(node)
                      }
                  }
                  /// Same as `rev_from_node`, without using a persistent nodemap
                  ///
                  /// This is used as fallback when a persistent nodemap is not present.
                  /// This happens when the persistent-nodemap experimental feature is not
                  /// enabled, or for small revlogs.
                  fn rev_from_node_no_persistent_nodemap(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      // Linear scan of the revlog
                      // TODO: consider building a non-persistent nodemap in memory to
                      // optimize these cases.
                      let mut found_by_prefix = None;
                      for rev in (-1..self.len() as Revision).rev() {
                          let candidate_node = if rev == -1 {
                              NULL_NODE
                          } else {
                              let index_entry =
                                  self.index.get_entry(rev).ok_or_else(|| {
                                      HgError::corrupted(
                                          "revlog references a revision not in the index",
                                      )
                                  })?;
                              *index_entry.hash()
                          };
                          if node == candidate_node {
                              return Ok(rev);
                          }
                          if node.is_prefix_of(&candidate_node) {
                              if found_by_prefix.is_some() {
                                  return Err(RevlogError::AmbiguousPrefix);
                              }
                              found_by_prefix = Some(rev)
                          }
                      }
                      found_by_prefix.ok_or(RevlogError::InvalidRevision)
                  }
                  /// Returns whether the given revision exists in this revlog.
                  pub fn has_rev(&self, rev: Revision) -> bool {
                      self.index.get_entry(rev).is_some()
                  }
                  /// Return the full data associated to a revision.
                  ///
                  /// All entries required to build the final data out of deltas will be
                  /// retrieved as needed, and the deltas will be applied to the inital
                  /// snapshot to rebuild the final data.
                  pub fn get_rev_data(
                      &self,
                      rev: Revision,
                  ) -> Result<Cow<[u8]>, RevlogError> {
                      if rev == NULL_REVISION {
                          return Ok(Cow::Borrowed(&[]));
                      };
                      Ok(self.get_entry(rev)?.data()?)
                  }
                  /// Check the hash of some given data against the recorded hash.
                  pub fn check_hash(
                      &self,
                      p1: Revision,
                      p2: Revision,
                      expected: &[u8],
                      data: &[u8],
                  ) -> bool {
                      let e1 = self.index.get_entry(p1);
                      let h1 = match e1 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      let e2 = self.index.get_entry(p2);
                      let h2 = match e2 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                  }
                  /// Build the full data of a revision out its snapshot
                  /// and its deltas.
                  fn build_data_from_deltas(
                      snapshot: RevlogEntry,
                      deltas: &[RevlogEntry],
                  ) -> Result<Vec<u8>, HgError> {
                      let snapshot = snapshot.data_chunk()?;
                      let deltas = deltas
                          .iter()
                          .rev()
                          .map(RevlogEntry::data_chunk)
                          .collect::<Result<Vec<_>, _>>()?;
                      let patches: Vec<_> =
                          deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                      let patch = patch::fold_patch_lists(&patches);
                      Ok(patch.apply(&snapshot))
                  }
                  /// Return the revlog data.
                  fn data(&self) -> &[u8] {
                      match &self.data_bytes {
                          Some(data_bytes) => data_bytes,
                          None => panic!(
                              "forgot to load the data or trying to access inline data"
                          ),
                      }
                  }
                  pub fn make_null_entry(&self) -> RevlogEntry {
                      RevlogEntry {
                          revlog: self,
                          rev: NULL_REVISION,
                          bytes: b"",
                          compressed_len: 0,
                          uncompressed_len: 0,
                          base_rev_or_base_of_delta_chain: None,
                          p1: NULL_REVISION,
                          p2: NULL_REVISION,
                          flags: NULL_REVLOG_ENTRY_FLAGS,
                          hash: NULL_NODE,
                      }
                  }
                  /// Get an entry of the revlog.
                  pub fn get_entry(
                      &self,
                      rev: Revision,
                  ) -> Result<RevlogEntry, RevlogError> {
                      if rev == NULL_REVISION {
                          return Ok(self.make_null_entry());
                      }
                      let index_entry = self
                          .index
                          .get_entry(rev)
                          .ok_or(RevlogError::InvalidRevision)?;
                      let start = index_entry.offset();
                      let end = start + index_entry.compressed_len() as usize;
                      let data = if self.index.is_inline() {
                          self.index.data(start, end)
                      } else {
                          &self.data()[start..end]
                      };
                      let entry = RevlogEntry {
                          revlog: self,
                          rev,
                          bytes: data,
                          compressed_len: index_entry.compressed_len(),
                          uncompressed_len: index_entry.uncompressed_len(),
                          base_rev_or_base_of_delta_chain: if index_entry
                              .base_revision_or_base_of_delta_chain()
                              == rev
                          {
                              None
                          } else {
                              Some(index_entry.base_revision_or_base_of_delta_chain())
                          },
                          p1: index_entry.p1(),
                          p2: index_entry.p2(),
                          flags: index_entry.flags(),
                          hash: *index_entry.hash(),
                      };
                      Ok(entry)
                  }
                  /// when resolving internal references within revlog, any errors
                  /// should be reported as corruption, instead of e.g. "invalid revision"
                  fn get_entry_internal(
                      &self,
                      rev: Revision,
                  ) -> Result<RevlogEntry, HgError> {
                      self.get_entry(rev)
                          .map_err(|_| corrupted(format!("revision {} out of range", rev)))
                  }
              }
              /// The revlog entry's bytes and the necessary informations to extract
              /// the entry's data.
              #[derive(Clone)]
              pub struct RevlogEntry<'revlog> {
                  revlog: &'revlog Revlog,
                  rev: Revision,
                  bytes: &'revlog [u8],
                  compressed_len: u32,
                  uncompressed_len: i32,
                  base_rev_or_base_of_delta_chain: Option<Revision>,
                  p1: Revision,
                  p2: Revision,
                  flags: u16,
                  hash: Node,
              }
              thread_local! {
                // seems fine to [unwrap] here: this can only fail due to memory allocation
                // failing, and it's normal for that to cause panic.
                static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
                    RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
              }
              fn zstd_decompress_to_buffer(
                  bytes: &[u8],
                  buf: &mut Vec<u8>,
              ) -> Result<usize, std::io::Error> {
                  ZSTD_DECODER
                      .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
              }
              impl<'revlog> RevlogEntry<'revlog> {
                  pub fn revision(&self) -> Revision {
                      self.rev
                  }
                  pub fn node(&self) -> &Node {
                      &self.hash
                  }
                  pub fn uncompressed_len(&self) -> Option<u32> {
                      u32::try_from(self.uncompressed_len).ok()
                  }
                  pub fn has_p1(&self) -> bool {
                      self.p1 != NULL_REVISION
                  }
                  pub fn p1_entry(
                      &self,
                  ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                      if self.p1 == NULL_REVISION {
                          Ok(None)
                      } else {
                          Ok(Some(self.revlog.get_entry(self.p1)?))
                      }
                  }
                  pub fn p2_entry(
                      &self,
                  ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                      if self.p2 == NULL_REVISION {
                          Ok(None)
                      } else {
                          Ok(Some(self.revlog.get_entry(self.p2)?))
                      }
                  }
                  pub fn p1(&self) -> Option<Revision> {
                      if self.p1 == NULL_REVISION {
                          None
                      } else {
                          Some(self.p1)
                      }
                  }
                  pub fn p2(&self) -> Option<Revision> {
                      if self.p2 == NULL_REVISION {
                          None
                      } else {
                          Some(self.p2)
                      }
                  }
                  pub fn is_censored(&self) -> bool {
                      (self.flags & REVISION_FLAG_CENSORED) != 0
                  }
                  pub fn has_length_affecting_flag_processor(&self) -> bool {
                      // Relevant Python code: revlog.size()
                      // note: ELLIPSIS is known to not change the content
                      (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
                  }
                  /// The data for this entry, after resolving deltas if any.
                  pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                      let mut entry = self.clone();
                      let mut delta_chain = vec![];
                      // The meaning of `base_rev_or_base_of_delta_chain` depends on
                      // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
                      // `mercurial/revlogutils/constants.py` and the code in
                      // [_chaininfo] and in [index_deltachain].
                      let uses_generaldelta = self.revlog.index.uses_generaldelta();
                      while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
                          let base_rev = if uses_generaldelta {
                              base_rev
                          } else {
                              entry.rev - 1
                          };
                          delta_chain.push(entry);
                          entry = self.revlog.get_entry_internal(base_rev)?;
                      }
                      let data = if delta_chain.is_empty() {
                          entry.data_chunk()?
                      } else {
                          Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
                      };
                      Ok(data)
                  }
                  fn check_data(
                      &self,
                      data: Cow<'revlog, [u8]>,
                  ) -> Result<Cow<'revlog, [u8]>, HgError> {
                      if self.revlog.check_hash(
                          self.p1,
                          self.p2,
                          self.hash.as_bytes(),
                          &data,
                      ) {
                          Ok(data)
                      } else {
                          if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
                              return Err(HgError::unsupported(
                                  "ellipsis revisions are not supported by rhg",
                              ));
                          }
                          Err(corrupted(format!(
                              "hash check failed for revision {}",
                              self.rev
                          )))
                      }
                  }
                  pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                      let data = self.rawdata()?;
                      if self.rev == NULL_REVISION {
                          return Ok(data);
                      }
                      if self.is_censored() {
                          return Err(HgError::CensoredNodeError);
                      }
                      self.check_data(data)
                  }
                  /// Extract the data contained in the entry.
                  /// This may be a delta. (See `is_delta`.)
                  fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                      if self.bytes.is_empty() {
                          return Ok(Cow::Borrowed(&[]));
                      }
                      match self.bytes[0] {
                          // Revision data is the entirety of the entry, including this
                          // header.
                          b'\0' => Ok(Cow::Borrowed(self.bytes)),
                          // Raw revision data follows.
                          b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                          // zlib (RFC 1950) data.
                          b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                          // zstd data.
                          b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                          // A proper new format should have had a repo/store requirement.
                          format_type => Err(corrupted(format!(
                              "unknown compression header '{}'",
                              format_type
                          ))),
                      }
                  }
                  fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
                      let mut decoder = ZlibDecoder::new(self.bytes);
                      if self.is_delta() {
                          let mut buf = Vec::with_capacity(self.compressed_len as usize);
                          decoder
                              .read_to_end(&mut buf)
                              .map_err(|e| corrupted(e.to_string()))?;
                          Ok(buf)
                      } else {
                          let cap = self.uncompressed_len.max(0) as usize;
                          let mut buf = vec![0; cap];
                          decoder
                              .read_exact(&mut buf)
                              .map_err(|e| corrupted(e.to_string()))?;
                          Ok(buf)
                      }
                  }
                  fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
                      let cap = self.uncompressed_len.max(0) as usize;
                      if self.is_delta() {
                          // [cap] is usually an over-estimate of the space needed because
                          // it's the length of delta-decoded data, but we're interested
                          // in the size of the delta.
                          // This means we have to [shrink_to_fit] to avoid holding on
                          // to a large chunk of memory, but it also means we must have a
                          // fallback branch, for the case when the delta is longer than
                          // the original data (surprisingly, this does happen in practice)
                          let mut buf = Vec::with_capacity(cap);
                          match zstd_decompress_to_buffer(self.bytes, &mut buf) {
                              Ok(_) => buf.shrink_to_fit(),
                              Err(_) => {
                                  buf.clear();
                                  zstd::stream::copy_decode(self.bytes, &mut buf)
                                      .map_err(|e| corrupted(e.to_string()))?;
                              }
                          };
                          Ok(buf)
                      } else {
                          let mut buf = Vec::with_capacity(cap);
                          let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
                              .map_err(|e| corrupted(e.to_string()))?;
                          if len != self.uncompressed_len as usize {
                              Err(corrupted("uncompressed length does not match"))
                          } else {
                              Ok(buf)
                          }
                      }
                  }
                  /// Tell if the entry is a snapshot or a delta
                  /// (influences on decompression).
                  fn is_delta(&self) -> bool {
                      self.base_rev_or_base_of_delta_chain.is_some()
                  }
              }
              /// Calculate the hash of a revision given its data and its parents.
              fn hash(
                  data: &[u8],
                  p1_hash: &[u8],
                  p2_hash: &[u8],
              ) -> [u8; NODE_BYTES_LENGTH] {
                  let mut hasher = Sha1::new();
                  let (a, b) = (p1_hash, p2_hash);
                  if a > b {
                      hasher.update(b);
                      hasher.update(a);
                  } else {
                      hasher.update(a);
                      hasher.update(b);
                  }
                  hasher.update(data);
                  *hasher.finalize().as_ref()
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
                  use itertools::Itertools;
                  #[test]
                  fn test_empty() {
                      let temp = tempfile::tempdir().unwrap();
                      let vfs = Vfs { base: temp.path() };
                      std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                      let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                      assert!(revlog.is_empty());
                      assert_eq!(revlog.len(), 0);
                      assert!(revlog.get_entry(0).is_err());
                      assert!(!revlog.has_rev(0));
                      assert_eq!(
                          revlog.rev_from_node(NULL_NODE.into()).unwrap(),
                          NULL_REVISION
                      );
                      let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
                      assert_eq!(null_entry.revision(), NULL_REVISION);
                      assert!(null_entry.data().unwrap().is_empty());
                  }
                  #[test]
                  fn test_inline() {
                      let temp = tempfile::tempdir().unwrap();
                      let vfs = Vfs { base: temp.path() };
                      let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
                          .unwrap();
                      let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                          .unwrap();
                      let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
                          .unwrap();
                      let entry0_bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_version(1)
                          .with_inline(true)
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_node(node0)
                          .build();
                      let entry1_bytes = IndexEntryBuilder::new()
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_node(node1)
                          .build();
                      let entry2_bytes = IndexEntryBuilder::new()
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_p1(0)
                          .with_p2(1)
                          .with_node(node2)
                          .build();
                      let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
                          .into_iter()
                          .flatten()
                          .collect_vec();
                      std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                      let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                      let entry0 = revlog.get_entry(0).ok().unwrap();
                      assert_eq!(entry0.revision(), 0);
                      assert_eq!(*entry0.node(), node0);
                      assert!(!entry0.has_p1());
                      assert_eq!(entry0.p1(), None);
                      assert_eq!(entry0.p2(), None);
                      let p1_entry = entry0.p1_entry().unwrap();
                      assert!(p1_entry.is_none());
                      let p2_entry = entry0.p2_entry().unwrap();
                      assert!(p2_entry.is_none());
                      let entry1 = revlog.get_entry(1).ok().unwrap();
                      assert_eq!(entry1.revision(), 1);
                      assert_eq!(*entry1.node(), node1);
                      assert!(!entry1.has_p1());
                      assert_eq!(entry1.p1(), None);
                      assert_eq!(entry1.p2(), None);
                      let p1_entry = entry1.p1_entry().unwrap();
                      assert!(p1_entry.is_none());
                      let p2_entry = entry1.p2_entry().unwrap();
                      assert!(p2_entry.is_none());
                      let entry2 = revlog.get_entry(2).ok().unwrap();
                      assert_eq!(entry2.revision(), 2);
                      assert_eq!(*entry2.node(), node2);
                      assert!(entry2.has_p1());
                      assert_eq!(entry2.p1(), Some(0));
                      assert_eq!(entry2.p2(), Some(1));
                      let p1_entry = entry2.p1_entry().unwrap();
                      assert!(p1_entry.is_some());
                      assert_eq!(p1_entry.unwrap().revision(), 0);
                      let p2_entry = entry2.p2_entry().unwrap();
                      assert!(p2_entry.is_some());
                      assert_eq!(p2_entry.unwrap().revision(), 1);
                  }
                  #[test]
                  fn test_nodemap() {
                      let temp = tempfile::tempdir().unwrap();
                      let vfs = Vfs { base: temp.path() };
                      // building a revlog with a forced Node starting with zeros
                      // This is a corruption, but it does not preclude using the nodemap
                      // if we don't try and access the data
                      let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
                          .unwrap();
                      let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                          .unwrap();
                      let entry0_bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_version(1)
                          .with_inline(true)
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_node(node0)
                          .build();
                      let entry1_bytes = IndexEntryBuilder::new()
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_node(node1)
                          .build();
                      let contents = vec![entry0_bytes, entry1_bytes]
                          .into_iter()
                          .flatten()
                          .collect_vec();
                      std::fs::write(temp.path().join("foo.i"), contents).unwrap();
-                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
+                     let mut idx = nodemap::tests::TestNtIndex::new();
+                     idx.insert_node(0, node0).unwrap();
+                     idx.insert_node(1, node1).unwrap();
+                     let revlog =
+                         Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
                      // accessing the data shows the corruption
                      revlog.get_entry(0).unwrap().data().unwrap_err();
                      assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
                      assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
                      assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
                      assert_eq!(
                          revlog
                              .rev_from_node(NodePrefix::from_hex("000").unwrap())
                              .unwrap(),
                          -1
                      );
                      assert_eq!(
                          revlog
                              .rev_from_node(NodePrefix::from_hex("b00").unwrap())
                              .unwrap(),
 
                      );
                      // RevlogError does not implement PartialEq
                      // (ultimately because io::Error does not)
                      match revlog
                          .rev_from_node(NodePrefix::from_hex("00").unwrap())
                          .expect_err("Expected to give AmbiguousPrefix error")
                      {
                          RevlogError::AmbiguousPrefix => (),
                          e => {
                              panic!("Got another error than AmbiguousPrefix: {:?}", e);
                          }
                      };
                  }
              }

rust/hg-core/src/revlog/nodemap.rs

0 +17 -10

              // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
              //           and Mercurial contributors
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Indexing facilities for fast retrieval of `Revision` from `Node`
              //!
              //! This provides a variation on the 16-ary radix tree that is
              //! provided as "nodetree" in revlog.c, ready for append-only persistence
              //! on disk.
              //!
              //! Following existing implicit conventions, the "nodemap" terminology
              //! is used in a more abstract context.
              use super::{
                  node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
              };
              use bytes_cast::{unaligned, BytesCast};
              use std::cmp::max;
              use std::fmt;
              use std::mem::{self, align_of, size_of};
              use std::ops::Deref;
              use std::ops::Index;
              #[derive(Debug, PartialEq)]
              pub enum NodeMapError {
                  /// A `NodePrefix` matches several [`Revision`]s.
                  ///
                  /// This can be returned by methods meant for (at most) one match.
                  MultipleResults,
                  /// A `Revision` stored in the nodemap could not be found in the index
                  RevisionNotInIndex(Revision),
              }
              /// Mapping system from Mercurial nodes to revision numbers.
              ///
              /// ## `RevlogIndex` and `NodeMap`
              ///
              /// One way to think about their relationship is that
              /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
              /// information carried by a [`RevlogIndex`].
              ///
              /// Many of the methods in this trait take a `RevlogIndex` argument
              /// which is used for validation of their results. This index must naturally
              /// be the one the `NodeMap` is about, and it must be consistent.
              ///
              /// Notably, the `NodeMap` must not store
              /// information about more `Revision` values than there are in the index.
              /// In these methods, an encountered `Revision` is not in the index, a
              /// [RevisionNotInIndex](NodeMapError) error is returned.
              ///
              /// In insert operations, the rule is thus that the `NodeMap` must always
              /// be updated after the `RevlogIndex` it is about.
              pub trait NodeMap {
                  /// Find the unique `Revision` having the given `Node`
                  ///
                  /// If no Revision matches the given `Node`, `Ok(None)` is returned.
                  fn find_node(
                      &self,
                      index: &impl RevlogIndex,
                      node: &Node,
                  ) -> Result<Option<Revision>, NodeMapError> {
                      self.find_bin(index, node.into())
                  }
                  /// Find the unique Revision whose `Node` starts with a given binary prefix
                  ///
                  /// If no Revision matches the given prefix, `Ok(None)` is returned.
                  ///
                  /// If several Revisions match the given prefix, a
                  /// [MultipleResults](NodeMapError)  error is returned.
                  fn find_bin(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefix,
                  ) -> Result<Option<Revision>, NodeMapError>;
                  /// Give the size of the shortest node prefix that determines
                  /// the revision uniquely.
                  ///
                  /// From a binary node prefix, if it is matched in the node map, this
                  /// returns the number of hexadecimal digits that would had sufficed
                  /// to find the revision uniquely.
                  ///
                  /// Returns `None` if no [`Revision`] could be found for the prefix.
                  ///
                  /// If several Revisions match the given prefix, a
                  /// [MultipleResults](NodeMapError)  error is returned.
                  fn unique_prefix_len_bin(
                      &self,
                      idx: &impl RevlogIndex,
                      node_prefix: NodePrefix,
                  ) -> Result<Option<usize>, NodeMapError>;
                  /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
                  /// a full [`Node`] as input
                  fn unique_prefix_len_node(
                      &self,
                      idx: &impl RevlogIndex,
                      node: &Node,
                  ) -> Result<Option<usize>, NodeMapError> {
                      self.unique_prefix_len_bin(idx, node.into())
                  }
              }
              pub trait MutableNodeMap: NodeMap {
                  fn insert<I: RevlogIndex>(
                      &mut self,
                      index: &I,
                      node: &Node,
                      rev: Revision,
                  ) -> Result<(), NodeMapError>;
              }
              /// Low level NodeTree [`Block`] elements
              ///
              /// These are exactly as for instance on persistent storage.
              type RawElement = unaligned::I32Be;
              /// High level representation of values in NodeTree
              /// [`Blocks`](struct.Block.html)
              ///
              /// This is the high level representation that most algorithms should
              /// use.
              #[derive(Clone, Debug, Eq, PartialEq)]
              enum Element {
                  Rev(Revision),
                  Block(usize),
                  None,
              }
              impl From<RawElement> for Element {
                  /// Conversion from low level representation, after endianness conversion.
                  ///
                  /// See [`Block`](struct.Block.html) for explanation about the encoding.
                  fn from(raw: RawElement) -> Element {
                      let int = raw.get();
                      if int >= 0 {
                          Element::Block(int as usize)
                      } else if int == -1 {
                          Element::None
                      } else {
                          Element::Rev(-int - 2)
                      }
                  }
              }
              impl From<Element> for RawElement {
                  fn from(element: Element) -> RawElement {
                      RawElement::from(match element {
                          Element::None => 0,
                          Element::Block(i) => i as i32,
                          Element::Rev(rev) => -rev - 2,
                      })
                  }
              }
              const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
              /// A logical block of the [`NodeTree`], packed with a fixed size.
              ///
              /// These are always used in container types implementing `Index<Block>`,
              /// such as `&Block`
              ///
              /// As an array of integers, its ith element encodes that the
              /// ith potential edge from the block, representing the ith hexadecimal digit
              /// (nybble) `i` is either:
              ///
              /// - absent (value -1)
              /// - another `Block` in the same indexable container (value ≥ 0)
              ///  - a [`Revision`] leaf (value ≤ -2)
              ///
              /// Endianness has to be fixed for consistency on shared storage across
              /// different architectures.
              ///
              /// A key difference with the C `nodetree` is that we need to be
              /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
              /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
              ///
              /// Another related difference is that `NULL_REVISION` (-1) is not
              /// represented at all, because we want an immutable empty nodetree
              /// to be valid.
              #[derive(Copy, Clone, BytesCast, PartialEq)]
              #[repr(transparent)]
              pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
              impl Block {
                  fn new() -> Self {
                      let absent_node = RawElement::from(-1);
                      Block([absent_node; ELEMENTS_PER_BLOCK])
                  }
                  fn get(&self, nybble: u8) -> Element {
                      self.0[nybble as usize].into()
                  }
                  fn set(&mut self, nybble: u8, element: Element) {
                      self.0[nybble as usize] = element.into()
                  }
              }
              impl fmt::Debug for Block {
                  /// sparse representation for testing and debugging purposes
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      f.debug_map()
                          .entries((0..16).filter_map(|i| match self.get(i) {
                              Element::None => None,
                              element => Some((i, element)),
                          }))
                          .finish()
                  }
              }
              /// A mutable 16-radix tree with the root block logically at the end
              ///
              /// Because of the append only nature of our node trees, we need to
              /// keep the original untouched and store new blocks separately.
              ///
              /// The mutable root [`Block`] is kept apart so that we don't have to rebump
              /// it on each insertion.
              pub struct NodeTree {
                  readonly: Box<dyn Deref<Target = [Block]> + Send>,
                  growable: Vec<Block>,
                  root: Block,
                  masked_inner_blocks: usize,
              }
              impl Index<usize> for NodeTree {
                  type Output = Block;
                  fn index(&self, i: usize) -> &Block {
                      let ro_len = self.readonly.len();
                      if i < ro_len {
                          &self.readonly[i]
                      } else if i == ro_len + self.growable.len() {
                          &self.root
                      } else {
                          &self.growable[i - ro_len]
                      }
                  }
              }
              /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
              fn has_prefix_or_none(
                  idx: &impl RevlogIndex,
                  prefix: NodePrefix,
                  rev: Revision,
              ) -> Result<Option<Revision>, NodeMapError> {
                  idx.node(rev)
                      .ok_or(NodeMapError::RevisionNotInIndex(rev))
                      .map(|node| {
                          if prefix.is_prefix_of(node) {
                              Some(rev)
                          } else {
                              None
                          }
                      })
              }
              /// validate that the candidate's node starts indeed with given prefix,
              /// and treat ambiguities related to [`NULL_REVISION`].
              ///
              /// From the data in the NodeTree, one can only conclude that some
              /// revision is the only one for a *subprefix* of the one being looked up.
              fn validate_candidate(
                  idx: &impl RevlogIndex,
                  prefix: NodePrefix,
                  candidate: (Option<Revision>, usize),
              ) -> Result<(Option<Revision>, usize), NodeMapError> {
                  let (rev, steps) = candidate;
                  if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
                      rev.map_or(Ok((None, steps)), |r| {
                          has_prefix_or_none(idx, prefix, r)
                              .map(|opt| (opt, max(steps, nz_nybble + 1)))
                      })
                  } else {
                      // the prefix is only made of zeros; NULL_REVISION always matches it
                      // and any other *valid* result is an ambiguity
                      match rev {
                          None => Ok((Some(NULL_REVISION), steps + 1)),
                          Some(r) => match has_prefix_or_none(idx, prefix, r)? {
                              None => Ok((Some(NULL_REVISION), steps + 1)),
                              _ => Err(NodeMapError::MultipleResults),
                          },
                      }
                  }
              }
              impl NodeTree {
                  /// Initiate a NodeTree from an immutable slice-like of `Block`
                  ///
                  /// We keep `readonly` and clone its root block if it isn't empty.
                  fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
                      let root = readonly.last().cloned().unwrap_or_else(Block::new);
                      NodeTree {
                          readonly,
                          growable: Vec::new(),
                          root,
                          masked_inner_blocks: 0,
                      }
                  }
                  /// Create from an opaque bunch of bytes
                  ///
                  /// The created [`NodeTreeBytes`] from `bytes`,
                  /// of which exactly `amount` bytes are used.
                  ///
                  /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
                  /// - `amount` is expressed in bytes, and is not automatically derived from
                  ///   `bytes`, so that a caller that manages them atomically can perform
                  ///   temporary disk serializations and still rollback easily if needed.
                  ///   First use-case for this would be to support Mercurial shell hooks.
                  ///
                  /// panics if `buffer` is smaller than `amount`
                  pub fn load_bytes(
                      bytes: Box<dyn Deref<Target = [u8]> + Send>,
                      amount: usize,
                  ) -> Self {
                      NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
                  }
                  /// Retrieve added [`Block`]s and the original immutable data
                  pub fn into_readonly_and_added(
                      self,
                  ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
                      let mut vec = self.growable;
                      let readonly = self.readonly;
                      if readonly.last() != Some(&self.root) {
                          vec.push(self.root);
                      }
                      (readonly, vec)
                  }
                  /// Retrieve added [`Block]s as bytes, ready to be written to persistent
                  /// storage
                  pub fn into_readonly_and_added_bytes(
                      self,
                  ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
                      let (readonly, vec) = self.into_readonly_and_added();
                      // Prevent running `v`'s destructor so we are in complete control
                      // of the allocation.
                      let vec = mem::ManuallyDrop::new(vec);
                      // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
                      // bytes, so this is perfectly safe.
                      let bytes = unsafe {
                          // Check for compatible allocation layout.
                          // (Optimized away by constant-folding + dead code elimination.)
                          assert_eq!(size_of::<Block>(), 64);
                          assert_eq!(align_of::<Block>(), 1);
                          // /!\ Any use of `vec` after this is use-after-free.
                          // TODO: use `into_raw_parts` once stabilized
                          Vec::from_raw_parts(
                              vec.as_ptr() as *mut u8,
                              vec.len() * size_of::<Block>(),
                              vec.capacity() * size_of::<Block>(),
                          )
                      };
                      (readonly, bytes)
                  }
                  /// Total number of blocks
                  fn len(&self) -> usize {
                      self.readonly.len() + self.growable.len() + 1
                  }
                  /// Implemented for completeness
                  ///
                  /// A `NodeTree` always has at least the mutable root block.
                  #[allow(dead_code)]
                  fn is_empty(&self) -> bool {
                      false
                  }
                  /// Main working method for `NodeTree` searches
                  ///
                  /// The first returned value is the result of analysing `NodeTree` data
                  /// *alone*: whereas `None` guarantees that the given prefix is absent
                  /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
                  /// `Some(rev)`, it is to be understood that `rev` is the unique
                  /// [`Revision`] that could match the prefix. Actually, all that can
                  /// be inferred from
                  /// the `NodeTree` data is that `rev` is the revision with the longest
                  /// common node prefix with the given prefix.
                  ///
                  /// The second returned value is the size of the smallest subprefix
                  /// of `prefix` that would give the same result, i.e. not the
                  /// [MultipleResults](NodeMapError) error variant (again, using only the
                  /// data of the [`NodeTree`]).
                  fn lookup(
                      &self,
                      prefix: NodePrefix,
                  ) -> Result<(Option<Revision>, usize), NodeMapError> {
                      for (i, visit_item) in self.visit(prefix).enumerate() {
                          if let Some(opt) = visit_item.final_revision() {
                              return Ok((opt, i + 1));
                          }
                      }
                      Err(NodeMapError::MultipleResults)
                  }
                  fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
                      NodeTreeVisitor {
                          nt: self,
                          prefix,
                          visit: self.len() - 1,
                          nybble_idx: 0,
                          done: false,
                      }
                  }
                  /// Return a mutable reference for `Block` at index `idx`.
                  ///
                  /// If `idx` lies in the immutable area, then the reference is to
                  /// a newly appended copy.
                  ///
                  /// Returns (new_idx, glen, mut_ref) where
                  ///
                  /// - `new_idx` is the index of the mutable `Block`
                  /// - `mut_ref` is a mutable reference to the mutable Block.
                  /// - `glen` is the new length of `self.growable`
                  ///
                  /// Note: the caller wouldn't be allowed to query `self.growable.len()`
                  /// itself because of the mutable borrow taken with the returned `Block`
                  fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
                      let ro_blocks = &self.readonly;
                      let ro_len = ro_blocks.len();
                      let glen = self.growable.len();
                      if idx < ro_len {
                          self.masked_inner_blocks += 1;
                          self.growable.push(ro_blocks[idx]);
                          (glen + ro_len, &mut self.growable[glen], glen + 1)
                      } else if glen + ro_len == idx {
                          (idx, &mut self.root, glen)
                      } else {
                          (idx, &mut self.growable[idx - ro_len], glen)
                      }
                  }
                  /// Main insertion method
                  ///
                  /// This will dive in the node tree to find the deepest `Block` for
                  /// `node`, split it as much as needed and record `node` in there.
                  /// The method then backtracks, updating references in all the visited
                  /// blocks from the root.
                  ///
                  /// All the mutated `Block` are copied first to the growable part if
                  /// needed. That happens for those in the immutable part except the root.
                  pub fn insert<I: RevlogIndex>(
                      &mut self,
                      index: &I,
                      node: &Node,
                      rev: Revision,
                  ) -> Result<(), NodeMapError> {
                      let ro_len = &self.readonly.len();
                      let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
                      let read_nybbles = visit_steps.len();
                      // visit_steps cannot be empty, since we always visit the root block
                      let deepest = visit_steps.pop().unwrap();
                      let (mut block_idx, mut block, mut glen) =
                          self.mutable_block(deepest.block_idx);
                      if let Element::Rev(old_rev) = deepest.element {
                          let old_node = index
                              .node(old_rev)
                              .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
                          if old_node == node {
                              return Ok(()); // avoid creating lots of useless blocks
                          }
                          // Looping over the tail of nybbles in both nodes, creating
                          // new blocks until we find the difference
                          let mut new_block_idx = ro_len + glen;
                          let mut nybble = deepest.nybble;
                          for nybble_pos in read_nybbles..node.nybbles_len() {
                              block.set(nybble, Element::Block(new_block_idx));
                              let new_nybble = node.get_nybble(nybble_pos);
                              let old_nybble = old_node.get_nybble(nybble_pos);
                              if old_nybble == new_nybble {
                                  self.growable.push(Block::new());
                                  block = &mut self.growable[glen];
                                  glen += 1;
                                  new_block_idx += 1;
                                  nybble = new_nybble;
                              } else {
                                  let mut new_block = Block::new();
                                  new_block.set(old_nybble, Element::Rev(old_rev));
                                  new_block.set(new_nybble, Element::Rev(rev));
                                  self.growable.push(new_block);
                                  break;
                              }
                          }
                      } else {
                          // Free slot in the deepest block: no splitting has to be done
                          block.set(deepest.nybble, Element::Rev(rev));
                      }
                      // Backtrack over visit steps to update references
                      while let Some(visited) = visit_steps.pop() {
                          let to_write = Element::Block(block_idx);
                          if visit_steps.is_empty() {
                              self.root.set(visited.nybble, to_write);
                              break;
                          }
                          let (new_idx, block, _) = self.mutable_block(visited.block_idx);
                          if block.get(visited.nybble) == to_write {
                              break;
                          }
                          block.set(visited.nybble, to_write);
                          block_idx = new_idx;
                      }
                      Ok(())
                  }
                  /// Make the whole `NodeTree` logically empty, without touching the
                  /// immutable part.
                  pub fn invalidate_all(&mut self) {
                      self.root = Block::new();
                      self.growable = Vec::new();
                      self.masked_inner_blocks = self.readonly.len();
                  }
                  /// Return the number of blocks in the readonly part that are currently
                  /// masked in the mutable part.
                  ///
                  /// The `NodeTree` structure has no efficient way to know how many blocks
                  /// are already unreachable in the readonly part.
                  ///
                  /// After a call to `invalidate_all()`, the returned number can be actually
                  /// bigger than the whole readonly part, a conventional way to mean that
                  /// all the readonly blocks have been masked. This is what is really
                  /// useful to the caller and does not require to know how many were
                  /// actually unreachable to begin with.
                  pub fn masked_readonly_blocks(&self) -> usize {
                      if let Some(readonly_root) = self.readonly.last() {
                          if readonly_root == &self.root {
                              return 0;
                          }
                      } else {
                          return 0;
                      }
                      self.masked_inner_blocks + 1
                  }
              }
              pub struct NodeTreeBytes {
                  buffer: Box<dyn Deref<Target = [u8]> + Send>,
                  len_in_blocks: usize,
              }
              impl NodeTreeBytes {
                  fn new(
                      buffer: Box<dyn Deref<Target = [u8]> + Send>,
                      amount: usize,
                  ) -> Self {
                      assert!(buffer.len() >= amount);
                      let len_in_blocks = amount / size_of::<Block>();
                      NodeTreeBytes {
                          buffer,
                          len_in_blocks,
                      }
                  }
              }
              impl Deref for NodeTreeBytes {
                  type Target = [Block];
                  fn deref(&self) -> &[Block] {
                      Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
                          // `NodeTreeBytes::new` already asserted that `self.buffer` is
                          // large enough.
                          .unwrap()
                          .0
                  }
              }
              struct NodeTreeVisitor<'n> {
                  nt: &'n NodeTree,
                  prefix: NodePrefix,
                  visit: usize,
                  nybble_idx: usize,
                  done: bool,
              }
              #[derive(Debug, PartialEq, Clone)]
              struct NodeTreeVisitItem {
                  block_idx: usize,
                  nybble: u8,
                  element: Element,
              }
              impl<'n> Iterator for NodeTreeVisitor<'n> {
                  type Item = NodeTreeVisitItem;
                  fn next(&mut self) -> Option<Self::Item> {
                      if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
                          return None;
                      }
                      let nybble = self.prefix.get_nybble(self.nybble_idx);
                      self.nybble_idx += 1;
                      let visit = self.visit;
                      let element = self.nt[visit].get(nybble);
                      if let Element::Block(idx) = element {
                          self.visit = idx;
                      } else {
                          self.done = true;
                      }
                      Some(NodeTreeVisitItem {
                          block_idx: visit,
                          nybble,
                          element,
                      })
                  }
              }
              impl NodeTreeVisitItem {
                  // Return `Some(opt)` if this item is final, with `opt` being the
                  // `Revision` that it may represent.
                  //
                  // If the item is not terminal, return `None`
                  fn final_revision(&self) -> Option<Option<Revision>> {
                      match self.element {
                          Element::Block(_) => None,
                          Element::Rev(r) => Some(Some(r)),
                          Element::None => Some(None),
                      }
                  }
              }
              impl From<Vec<Block>> for NodeTree {
                  fn from(vec: Vec<Block>) -> Self {
                      Self::new(Box::new(vec))
                  }
              }
              impl fmt::Debug for NodeTree {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      let readonly: &[Block] = &*self.readonly;
                      write!(
                          f,
                          "readonly: {:?}, growable: {:?}, root: {:?}",
                          readonly, self.growable, self.root
                      )
                  }
              }
              impl Default for NodeTree {
                  /// Create a fully mutable empty NodeTree
                  fn default() -> Self {
                      NodeTree::new(Box::new(Vec::new()))
                  }
              }
              impl NodeMap for NodeTree {
                  fn find_bin<'a>(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefix,
                  ) -> Result<Option<Revision>, NodeMapError> {
                      validate_candidate(idx, prefix, self.lookup(prefix)?)
                          .map(|(opt, _shortest)| opt)
                  }
                  fn unique_prefix_len_bin<'a>(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefix,
                  ) -> Result<Option<usize>, NodeMapError> {
                      validate_candidate(idx, prefix, self.lookup(prefix)?)
                          .map(|(opt, shortest)| opt.map(|_rev| shortest))
                  }
              }
              #[cfg(test)]
-             mod tests {
+             pub mod tests {
                  use super::NodeMapError::*;
                  use super::*;
                  use crate::revlog::node::{hex_pad_right, Node};
                  use std::collections::HashMap;
                  /// Creates a `Block` using a syntax close to the `Debug` output
                  macro_rules! block {
                      {$($nybble:tt : $variant:ident($val:tt)),*} => (
                          {
                              let mut block = Block::new();
                              $(block.set($nybble, Element::$variant($val)));*;
                              block
                          }
                      )
                  }
                  #[test]
                  fn test_block_debug() {
                      let mut block = Block::new();
                      block.set(1, Element::Rev(3));
                      block.set(10, Element::Block(0));
                      assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
                  }
                  #[test]
                  fn test_block_macro() {
                      let block = block! {5: Block(2)};
                      assert_eq!(format!("{:?}", block), "{5: Block(2)}");
                      let block = block! {13: Rev(15), 5: Block(2)};
                      assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
                  }
                  #[test]
                  fn test_raw_block() {
                      let mut raw = [255u8; 64];
                      let mut counter = 0;
                      for val in [0_i32, 15, -2, -1, -3].iter() {
                          for byte in val.to_be_bytes().iter() {
                              raw[counter] = *byte;
                              counter += 1;
                          }
                      }
                      let (block, _) = Block::from_bytes(&raw).unwrap();
                      assert_eq!(block.get(0), Element::Block(0));
                      assert_eq!(block.get(1), Element::Block(15));
                      assert_eq!(block.get(3), Element::None);
                      assert_eq!(block.get(2), Element::Rev(0));
                      assert_eq!(block.get(4), Element::Rev(1));
                  }
                  type TestIndex = HashMap<Revision, Node>;
                  impl RevlogIndex for TestIndex {
                      fn node(&self, rev: Revision) -> Option<&Node> {
                          self.get(&rev)
                      }
                      fn len(&self) -> usize {
                          self.len()
                      }
                  }
                  /// Pad hexadecimal Node prefix with zeros on the right
                  ///
                  /// This avoids having to repeatedly write very long hexadecimal
                  /// strings for test data, and brings actual hash size independency.
                  #[cfg(test)]
                  fn pad_node(hex: &str) -> Node {
                      Node::from_hex(&hex_pad_right(hex)).unwrap()
                  }
                  /// Pad hexadecimal Node prefix with zeros on the right, then insert
                  fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
                      idx.insert(rev, pad_node(hex));
                  }
                  fn sample_nodetree() -> NodeTree {
                      NodeTree::from(vec![
                          block![0: Rev(9)],
                          block![0: Rev(0), 1: Rev(9)],
                          block![0: Block(1), 1:Rev(1)],
                      ])
                  }
                  fn hex(s: &str) -> NodePrefix {
                      NodePrefix::from_hex(s).unwrap()
                  }
                  #[test]
                  fn test_nt_debug() {
                      let nt = sample_nodetree();
                      assert_eq!(
                          format!("{:?}", nt),
                          "readonly: \
                           [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
                           growable: [], \
                           root: {0: Block(1), 1: Rev(1)}",
                      );
                  }
                  #[test]
                  fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
                      let mut idx: TestIndex = HashMap::new();
                      pad_insert(&mut idx, 1, "1234deadcafe");
                      let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
                      assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
                      assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
                      assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
                      assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
                      assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
                      // and with full binary Nodes
                      assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
                      let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
                      assert_eq!(nt.find_node(&idx, &unknown)?, None);
                      Ok(())
                  }
                  #[test]
                  fn test_immutable_find_one_jump() {
                      let mut idx = TestIndex::new();
                      pad_insert(&mut idx, 9, "012");
                      pad_insert(&mut idx, 0, "00a");
                      let nt = sample_nodetree();
                      assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
                      assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
                      assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
                      assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
                      assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
                      assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
                  }
                  #[test]
                  fn test_mutated_find() -> Result<(), NodeMapError> {
                      let mut idx = TestIndex::new();
                      pad_insert(&mut idx, 9, "012");
                      pad_insert(&mut idx, 0, "00a");
                      pad_insert(&mut idx, 2, "cafe");
                      pad_insert(&mut idx, 3, "15");
                      pad_insert(&mut idx, 1, "10");
                      let nt = NodeTree {
                          readonly: sample_nodetree().readonly,
                          growable: vec![block![0: Rev(1), 5: Rev(3)]],
                          root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
                          masked_inner_blocks: 1,
                      };
                      assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
                      assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
                      assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
                      assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
                      assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
                      assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
                      assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
                      assert_eq!(nt.masked_readonly_blocks(), 2);
                      Ok(())
                  }
-                 struct TestNtIndex {
-                     index: TestIndex,
-                     nt: NodeTree,
+                 pub struct TestNtIndex {
+                     pub index: TestIndex,
+                     pub nt: NodeTree,
                  }
                  impl TestNtIndex {
-                     fn new() -> Self {
+                     pub fn new() -> Self {
                          TestNtIndex {
                              index: HashMap::new(),
                              nt: NodeTree::default(),
                          }
                      }
-                     fn insert(
+                     pub fn insert_node(
+                         &mut self,
+                         rev: Revision,
+                         node: Node,
+                     ) -> Result<(), NodeMapError> {
+                         self.index.insert(rev, node);
+                         self.nt.insert(&self.index, &node, rev)?;
+                         Ok(())
+                     }
+                     pub fn insert(
                          &mut self,
                          rev: Revision,
                          hex: &str,
                      ) -> Result<(), NodeMapError> {
-                         let node = pad_node(hex);
-                         self.index.insert(rev, node);
-                         self.nt.insert(&self.index, &node, rev)?;
-                         Ok(())
+                         return self.insert_node(rev, pad_node(hex));
                      }
                      fn find_hex(
                          &self,
                          prefix: &str,
                      ) -> Result<Option<Revision>, NodeMapError> {
                          self.nt.find_bin(&self.index, hex(prefix))
                      }
                      fn unique_prefix_len_hex(
                          &self,
                          prefix: &str,
                      ) -> Result<Option<usize>, NodeMapError> {
                          self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
                      }
                      /// Drain `added` and restart a new one
                      fn commit(self) -> Self {
                          let mut as_vec: Vec<Block> =
                              self.nt.readonly.iter().copied().collect();
                          as_vec.extend(self.nt.growable);
                          as_vec.push(self.nt.root);
                          Self {
                              index: self.index,
                              nt: NodeTree::from(as_vec),
                          }
                      }
                  }
                  #[test]
                  fn test_insert_full_mutable() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "1234")?;
                      assert_eq!(idx.find_hex("1")?, Some(0));
                      assert_eq!(idx.find_hex("12")?, Some(0));
                      // let's trigger a simple split
                      idx.insert(1, "1a34")?;
                      assert_eq!(idx.nt.growable.len(), 1);
                      assert_eq!(idx.find_hex("12")?, Some(0));
                      assert_eq!(idx.find_hex("1a")?, Some(1));
                      // reinserting is a no_op
                      idx.insert(1, "1a34")?;
                      assert_eq!(idx.nt.growable.len(), 1);
                      assert_eq!(idx.find_hex("12")?, Some(0));
                      assert_eq!(idx.find_hex("1a")?, Some(1));
                      idx.insert(2, "1a01")?;
                      assert_eq!(idx.nt.growable.len(), 2);
                      assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
                      assert_eq!(idx.find_hex("12")?, Some(0));
                      assert_eq!(idx.find_hex("1a3")?, Some(1));
                      assert_eq!(idx.find_hex("1a0")?, Some(2));
                      assert_eq!(idx.find_hex("1a12")?, None);
                      // now let's make it split and create more than one additional block
                      idx.insert(3, "1a345")?;
                      assert_eq!(idx.nt.growable.len(), 4);
                      assert_eq!(idx.find_hex("1a340")?, Some(1));
                      assert_eq!(idx.find_hex("1a345")?, Some(3));
                      assert_eq!(idx.find_hex("1a341")?, None);
                      // there's no readonly block to mask
                      assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                      Ok(())
                  }
                  #[test]
                  fn test_unique_prefix_len_zero_prefix() {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "00000abcd").unwrap();
                      assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
                      // in the nodetree proper, this will be found at the first nybble
                      // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
                      // but the first difference with `NULL_NODE`
                      assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                      assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                      // same with odd result
                      idx.insert(1, "00123").unwrap();
                      assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
                      assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
                      // these are unchanged of course
                      assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                      assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                  }
                  #[test]
                  fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
                      // check that the splitting loop is long enough
                      let mut nt_idx = TestNtIndex::new();
                      let nt = &mut nt_idx.nt;
                      let idx = &mut nt_idx.index;
                      let node0_hex = hex_pad_right("444444");
                      let mut node1_hex = hex_pad_right("444444");
                      node1_hex.pop();
                      node1_hex.push('5');
                      let node0 = Node::from_hex(&node0_hex).unwrap();
                      let node1 = Node::from_hex(&node1_hex).unwrap();
                      idx.insert(0, node0);
                      nt.insert(idx, &node0, 0)?;
                      idx.insert(1, node1);
                      nt.insert(idx, &node1, 1)?;
                      assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
                      assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
                      Ok(())
                  }
                  #[test]
                  fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "1234")?;
                      idx.insert(1, "1235")?;
                      idx.insert(2, "131")?;
                      idx.insert(3, "cafe")?;
                      let mut idx = idx.commit();
                      assert_eq!(idx.find_hex("1234")?, Some(0));
                      assert_eq!(idx.find_hex("1235")?, Some(1));
                      assert_eq!(idx.find_hex("131")?, Some(2));
                      assert_eq!(idx.find_hex("cafe")?, Some(3));
                      // we did not add anything since init from readonly
                      assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                      idx.insert(4, "123A")?;
                      assert_eq!(idx.find_hex("1234")?, Some(0));
                      assert_eq!(idx.find_hex("1235")?, Some(1));
                      assert_eq!(idx.find_hex("131")?, Some(2));
                      assert_eq!(idx.find_hex("cafe")?, Some(3));
                      assert_eq!(idx.find_hex("123A")?, Some(4));
                      // we masked blocks for all prefixes of "123", including the root
                      assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                      eprintln!("{:?}", idx.nt);
                      idx.insert(5, "c0")?;
                      assert_eq!(idx.find_hex("cafe")?, Some(3));
                      assert_eq!(idx.find_hex("c0")?, Some(5));
                      assert_eq!(idx.find_hex("c1")?, None);
                      assert_eq!(idx.find_hex("1234")?, Some(0));
                      // inserting "c0" is just splitting the 'c' slot of the mutable root,
                      // it doesn't mask anything
                      assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                      Ok(())
                  }
                  #[test]
                  fn test_invalidate_all() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "1234")?;
                      idx.insert(1, "1235")?;
                      idx.insert(2, "131")?;
                      idx.insert(3, "cafe")?;
                      let mut idx = idx.commit();
                      idx.nt.invalidate_all();
                      assert_eq!(idx.find_hex("1234")?, None);
                      assert_eq!(idx.find_hex("1235")?, None);
                      assert_eq!(idx.find_hex("131")?, None);
                      assert_eq!(idx.find_hex("cafe")?, None);
                      // all the readonly blocks have been masked, this is the
                      // conventional expected response
                      assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
                      Ok(())
                  }
                  #[test]
                  fn test_into_added_empty() {
                      assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
                      assert!(sample_nodetree()
                          .into_readonly_and_added_bytes()
                          .1
                          .is_empty());
                  }
                  #[test]
                  fn test_into_added_bytes() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "1234")?;
                      let mut idx = idx.commit();
                      idx.insert(4, "cafe")?;
                      let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
                      // only the root block has been changed
                      assert_eq!(bytes.len(), size_of::<Block>());
                      // big endian for -2
                      assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
                      // big endian for -6
                      assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
                      Ok(())
                  }
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages