upstream/mercurial-mirror Commit - r47172:43d63979

rust: use HgError in RevlogError and Vfs...

Simon Sapin -

r47172:43d63979 default

parent child

rust/hg-core/src/errors.rs

0 +3 0

              use std::fmt;
              /// Common error cases that can happen in many different APIs
              #[derive(Debug)]
              pub enum HgError {
                  IoError {
                      error: std::io::Error,
                      context: IoErrorContext,
                  },
                  /// A file under `.hg/` normally only written by Mercurial
                  ///
                  /// The given string is a short explanation for users, not intended to be
                  /// machine-readable.
                  CorruptedRepository(String),
                  /// The respository or requested operation involves a feature not
                  /// supported by the Rust implementation. Falling back to the Python
                  /// implementation may or may not work.
                  ///
                  /// The given string is a short explanation for users, not intended to be
                  /// machine-readable.
                  UnsupportedFeature(String),
              }
              /// Details about where an I/O error happened
              #[derive(Debug, derive_more::From)]
              pub enum IoErrorContext {
                  /// A filesystem operation returned `std::io::Error`
                  #[from]
                  File(std::path::PathBuf),
                  /// `std::env::current_dir` returned `std::io::Error`
                  CurrentDir,
              }
              impl HgError {
                  pub fn corrupted(explanation: impl Into<String>) -> Self {
+                     // TODO: capture a backtrace here and keep it in the error value
+                     // to aid debugging?
+                     // https://doc.rust-lang.org/std/backtrace/struct.Backtrace.html
                      HgError::CorruptedRepository(explanation.into())
                  }
              }
              // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
              impl fmt::Display for HgError {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      match self {
                          HgError::IoError { error, context } => {
                              write!(f, "{}: {}", error, context)
                          }
                          HgError::CorruptedRepository(explanation) => {
                              write!(f, "corrupted repository: {}", explanation)
                          }
                          HgError::UnsupportedFeature(explanation) => {
                              write!(f, "unsupported feature: {}", explanation)
                          }
                      }
                  }
              }
              // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
              impl fmt::Display for IoErrorContext {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      match self {
                          IoErrorContext::File(path) => path.display().fmt(f),
                          IoErrorContext::CurrentDir => f.write_str("current directory"),
                      }
                  }
              }
              pub trait IoResultExt<T> {
                  /// Annotate a possible I/O error as related to a file at the given path.
                  ///
                  /// This allows printing something like “File not found: example.txt”
                  /// instead of just “File not found”.
                  ///
                  /// Converts a `Result` with `std::io::Error` into one with `HgError`.
                  fn for_file(self, path: &std::path::Path) -> Result<T, HgError>;
              }
              impl<T> IoResultExt<T> for std::io::Result<T> {
                  fn for_file(self, path: &std::path::Path) -> Result<T, HgError> {
                      self.map_err(|error| HgError::IoError {
                          error,
                          context: IoErrorContext::File(path.to_owned()),
                      })
                  }
              }
              pub trait HgResultExt<T> {
                  /// Handle missing files separately from other I/O error cases.
                  ///
                  /// Wraps the `Ok` type in an `Option`:
                  ///
                  /// * `Ok(x)` becomes `Ok(Some(x))`
                  /// * An I/O "not found" error becomes `Ok(None)`
                  /// * Other errors are unchanged
                  fn io_not_found_as_none(self) -> Result<Option<T>, HgError>;
              }
              impl<T> HgResultExt<T> for Result<T, HgError> {
                  fn io_not_found_as_none(self) -> Result<Option<T>, HgError> {
                      match self {
                          Ok(x) => Ok(Some(x)),
                          Err(HgError::IoError { error, .. })
                              if error.kind() == std::io::ErrorKind::NotFound =>
                          {
                              Ok(None)
                          }
                          Err(other_error) => Err(other_error),
                      }
                  }
              }

rust/hg-core/src/lib.rs

0 +1 0

              // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
              //           and Mercurial contributors
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              mod ancestors;
              pub mod dagops;
              pub mod errors;
              pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
              mod dirstate;
              pub mod discovery;
              pub mod requirements;
              pub mod testing; // unconditionally built, for use from integration tests
              pub use dirstate::{
                  dirs_multiset::{DirsMultiset, DirsMultisetIter},
                  dirstate_map::DirstateMap,
                  parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
                  status::{
                      status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
                  },
                  CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
                  StateMap, StateMapIter,
              };
              pub mod copy_tracing;
              mod filepatterns;
              pub mod matchers;
              pub mod repo;
              pub mod revlog;
              pub use revlog::*;
              pub mod config;
              pub mod operations;
              pub mod revset;
              pub mod utils;
              use crate::utils::hg_path::{HgPathBuf, HgPathError};
              pub use filepatterns::{
                  parse_pattern_syntax, read_pattern_file, IgnorePattern,
                  PatternFileWarning, PatternSyntax,
              };
              use std::collections::HashMap;
              use twox_hash::RandomXxHashBuilder64;
              /// This is a contract between the `micro-timer` crate and us, to expose
              /// the `log` crate as `crate::log`.
              use log;
              pub type LineNumber = usize;
              /// Rust's default hasher is too slow because it tries to prevent collision
              /// attacks. We are not concerned about those: if an ill-minded person has
              /// write access to your repository, you have other issues.
              pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
              #[derive(Debug, PartialEq)]
              pub enum DirstateMapError {
                  PathNotFound(HgPathBuf),
                  EmptyPath,
                  InvalidPath(HgPathError),
              }
              impl ToString for DirstateMapError {
                  fn to_string(&self) -> String {
                      match self {
                          DirstateMapError::PathNotFound(_) => {
                              "expected a value, found none".to_string()
                          }
                          DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
                          DirstateMapError::InvalidPath(e) => e.to_string(),
                      }
                  }
              }
              #[derive(Debug, derive_more::From)]
              pub enum DirstateError {
                  Map(DirstateMapError),
                  Common(errors::HgError),
              }
              #[derive(Debug, derive_more::From)]
              pub enum PatternError {
                  #[from]
                  Path(HgPathError),
                  UnsupportedSyntax(String),
                  UnsupportedSyntaxInFile(String, String, usize),
                  TooLong(usize),
                  #[from]
                  IO(std::io::Error),
                  /// Needed a pattern that can be turned into a regex but got one that
                  /// can't. This should only happen through programmer error.
                  NonRegexPattern(IgnorePattern),
              }
              impl ToString for PatternError {
                  fn to_string(&self) -> String {
                      match self {
                          PatternError::UnsupportedSyntax(syntax) => {
                              format!("Unsupported syntax {}", syntax)
                          }
                          PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
                              format!(
                                  "{}:{}: unsupported syntax {}",
                                  file_path, line, syntax
                              )
                          }
                          PatternError::TooLong(size) => {
                              format!("matcher pattern is too long ({} bytes)", size)
                          }
                          PatternError::IO(e) => e.to_string(),
                          PatternError::Path(e) => e.to_string(),
                          PatternError::NonRegexPattern(pattern) => {
                              format!("'{:?}' cannot be turned into a regex", pattern)
                          }
                      }
                  }
              }

rust/hg-core/src/operations/cat.rs

0 +3 -4

              // list_tracked_files.rs
              //
              // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use std::path::PathBuf;
              use crate::repo::Repo;
              use crate::revlog::changelog::Changelog;
              use crate::revlog::manifest::Manifest;
              use crate::revlog::path_encode::path_encode;
              use crate::revlog::revlog::Revlog;
              use crate::revlog::revlog::RevlogError;
              use crate::revlog::Node;
              use crate::utils::files::get_path_from_bytes;
              use crate::utils::hg_path::{HgPath, HgPathBuf};
              const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
              /// List files under Mercurial control at a given revision.
              ///
              /// * `root`: Repository root
              /// * `rev`: The revision to cat the files from.
              /// * `files`: The files to output.
              pub fn cat(
                  repo: &Repo,
                  revset: &str,
                  files: &[HgPathBuf],
              ) -> Result<Vec<u8>, RevlogError> {
                  let rev = crate::revset::resolve_single(revset, repo)?;
                  let changelog = Changelog::open(repo)?;
                  let manifest = Manifest::open(repo)?;
                  let changelog_entry = changelog.get_rev(rev)?;
-                 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
-                     .map_err(|_| RevlogError::Corrupted)?;
+                 let manifest_node =
+                     Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
                  let manifest_entry = manifest.get_node(manifest_node.into())?;
                  let mut bytes = vec![];
                  for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
                      for cat_file in files.iter() {
                          if cat_file.as_bytes() == manifest_file.as_bytes() {
                              let index_path = store_path(manifest_file, b".i");
                              let data_path = store_path(manifest_file, b".d");
                              let file_log =
                                  Revlog::open(repo, &index_path, Some(&data_path))?;
-                             let file_node = Node::from_hex(node_bytes)
-                                 .map_err(|_| RevlogError::Corrupted)?;
+                             let file_node = Node::from_hex_for_repo(node_bytes)?;
                              let file_rev = file_log.get_node_rev(file_node.into())?;
                              let data = file_log.get_rev_data(file_rev)?;
                              if data.starts_with(&METADATA_DELIMITER) {
                                  let end_delimiter_position = data
                                      [METADATA_DELIMITER.len()..]
                                      .windows(METADATA_DELIMITER.len())
                                      .position(|bytes| bytes == METADATA_DELIMITER);
                                  if let Some(position) = end_delimiter_position {
                                      let offset = METADATA_DELIMITER.len() * 2;
                                      bytes.extend(data[position + offset..].iter());
                                  }
                              } else {
                                  bytes.extend(data);
                              }
                          }
                      }
                  }
                  Ok(bytes)
              }
              fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
                  let encoded_bytes =
                      path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
                  get_path_from_bytes(&encoded_bytes).into()
              }

rust/hg-core/src/operations/list_tracked_files.rs

0 +4 -9

              // list_tracked_files.rs
              //
              // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::dirstate::parsers::parse_dirstate;
-             use crate::errors::{HgError, IoResultExt};
+             use crate::errors::HgError;
              use crate::repo::Repo;
              use crate::revlog::changelog::Changelog;
              use crate::revlog::manifest::{Manifest, ManifestEntry};
              use crate::revlog::node::Node;
              use crate::revlog::revlog::RevlogError;
              use crate::utils::hg_path::HgPath;
              use crate::EntryState;
              use rayon::prelude::*;
              /// List files under Mercurial control in the working directory
              /// by reading the dirstate
              pub struct Dirstate {
                  /// The `dirstate` content.
                  content: Vec<u8>,
              }
              impl Dirstate {
                  pub fn new(repo: &Repo) -> Result<Self, HgError> {
-                     let content = repo
-                         .hg_vfs()
-                         .read("dirstate")
-                         // TODO: this will be more accurate when we use `HgError` in
-                         // `Vfs::read`.
-                         .for_file("dirstate".as_ref())?;
+                     let content = repo.hg_vfs().read("dirstate")?;
                      Ok(Self { content })
                  }
                  pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> {
                      let (_, entries, _) = parse_dirstate(&self.content)?;
                      let mut files: Vec<&HgPath> = entries
                          .into_iter()
                          .filter_map(|(path, entry)| match entry.state {
                              EntryState::Removed => None,
                              _ => Some(path),
                          })
                          .collect();
                      files.par_sort_unstable();
                      Ok(files)
                  }
              }
              /// List files under Mercurial control at a given revision.
              pub fn list_rev_tracked_files(
                  repo: &Repo,
                  revset: &str,
              ) -> Result<FilesForRev, RevlogError> {
                  let rev = crate::revset::resolve_single(revset, repo)?;
                  let changelog = Changelog::open(repo)?;
                  let manifest = Manifest::open(repo)?;
                  let changelog_entry = changelog.get_rev(rev)?;
-                 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
-                     .map_err(|_| RevlogError::Corrupted)?;
+                 let manifest_node =
+                     Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
                  let manifest_entry = manifest.get_node(manifest_node.into())?;
                  Ok(FilesForRev(manifest_entry))
              }
              pub struct FilesForRev(ManifestEntry);
              impl FilesForRev {
                  pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
                      self.0.files()
                  }
              }

rust/hg-core/src/repo.rs

0 +8 -13

-             use crate::errors::HgError;
+             use crate::errors::{HgError, IoResultExt};
              use crate::operations::{find_root, FindRootError};
              use crate::requirements;
              use memmap::{Mmap, MmapOptions};
              use std::path::{Path, PathBuf};
              /// A repository on disk
              pub struct Repo {
                  working_directory: PathBuf,
                  dot_hg: PathBuf,
                  store: PathBuf,
              }
              /// Filesystem access abstraction for the contents of a given "base" diretory
              #[derive(Clone, Copy)]
              pub(crate) struct Vfs<'a> {
                  base: &'a Path,
              }
              impl Repo {
                  /// Returns `None` if the given path doesn’t look like a repository
                  /// (doesn’t contain a `.hg` sub-directory).
                  pub fn for_path(root: impl Into<PathBuf>) -> Self {
                      let working_directory = root.into();
                      let dot_hg = working_directory.join(".hg");
                      Self {
                          store: dot_hg.join("store"),
                          dot_hg,
                          working_directory,
                      }
                  }
                  pub fn find() -> Result<Self, FindRootError> {
                      find_root().map(Self::for_path)
                  }
                  pub fn check_requirements(&self) -> Result<(), HgError> {
                      requirements::check(self)
                  }
                  pub fn working_directory_path(&self) -> &Path {
                      &self.working_directory
                  }
                  /// For accessing repository files (in `.hg`), except for the store
                  /// (`.hg/store`).
                  pub(crate) fn hg_vfs(&self) -> Vfs<'_> {
                      Vfs { base: &self.dot_hg }
                  }
                  /// For accessing repository store files (in `.hg/store`)
                  pub(crate) fn store_vfs(&self) -> Vfs<'_> {
                      Vfs { base: &self.store }
                  }
                  /// For accessing the working copy
                  // The undescore prefix silences the "never used" warning. Remove before
                  // using.
                  pub(crate) fn _working_directory_vfs(&self) -> Vfs<'_> {
                      Vfs {
                          base: &self.working_directory,
                      }
                  }
              }
              impl Vfs<'_> {
                  pub(crate) fn read(
                      &self,
                      relative_path: impl AsRef<Path>,
-                 ) -> std::io::Result<Vec<u8>> {
-                     std::fs::read(self.base.join(relative_path))
+                 }
-                 pub(crate) fn open(
-                     &self,
-                     relative_path: impl AsRef<Path>,
-                 ) -> std::io::Result<std::fs::File> {
-                     std::fs::File::open(self.base.join(relative_path))
+                 ) -> Result<Vec<u8>, HgError> {
+                     let path = self.base.join(relative_path);
+                     std::fs::read(&path).for_file(&path)
                  }
                  pub(crate) fn mmap_open(
                      &self,
                      relative_path: impl AsRef<Path>,
-                 ) -> std::io::Result<Mmap> {
-                     let file = self.open(relative_path)?;
+                 ) -> Result<Mmap, HgError> {
+                     let path = self.base.join(relative_path);
+                     let file = std::fs::File::open(&path).for_file(&path)?;
                      // TODO: what are the safety requirements here?
-                     let mmap = unsafe { MmapOptions::new().map(&file) }?;
+                     let mmap = unsafe { MmapOptions::new().map(&file) }.for_file(&path)?;
                      Ok(mmap)
                  }
              }

rust/hg-core/src/requirements.rs

0 +3 -6

-             use crate::errors::{HgError, HgResultExt, IoResultExt};
+             use crate::errors::{HgError, HgResultExt};
              use crate::repo::Repo;
              fn parse(bytes: &[u8]) -> Result<Vec<String>, HgError> {
                  // The Python code reading this file uses `str.splitlines`
                  // which looks for a number of line separators (even including a couple of
                  // non-ASCII ones), but Python code writing it always uses `\n`.
                  let lines = bytes.split(|&byte| byte == b'\n');
                  lines
                      .filter(|line| !line.is_empty())
                      .map(|line| {
                          // Python uses Unicode `str.isalnum` but feature names are all
                          // ASCII
                          if line[0].is_ascii_alphanumeric() && line.is_ascii() {
                              Ok(String::from_utf8(line.into()).unwrap())
                          } else {
                              Err(HgError::corrupted("parse error in 'requires' file"))
                          }
                      })
                      .collect()
              }
              pub fn load(repo: &Repo) -> Result<Vec<String>, HgError> {
-                 if let Some(bytes) = repo
-                     .hg_vfs()
-                     .read("requires")
-                     .for_file("requires".as_ref())
-                     .io_not_found_as_none()?
+                 if let Some(bytes) =
+                     repo.hg_vfs().read("requires").io_not_found_as_none()?
                  {
                      parse(&bytes)
                  } else {
                      // Treat a missing file the same as an empty file.
                      // From `mercurial/localrepo.py`:
                      // > requires file contains a newline-delimited list of
                      // > features/capabilities the opener (us) must have in order to use
                      // > the repository. This file was introduced in Mercurial 0.9.2,
                      // > which means very old repositories may not have one. We assume
                      // > a missing file translates to no requirements.
                      Ok(Vec::new())
                  }
              }
              pub fn check(repo: &Repo) -> Result<(), HgError> {
                  for feature in load(repo)? {
                      if !SUPPORTED.contains(&&*feature) {
                          // TODO: collect and all unknown features and include them in the
                          // error message?
                          return Err(HgError::UnsupportedFeature(format!(
                              "repository requires feature unknown to this Mercurial: {}",
                              feature
                          )));
                      }
                  }
                  Ok(())
              }
              // TODO: set this to actually-supported features
              const SUPPORTED: &[&str] = &[
                  "dotencode",
                  "fncache",
                  "generaldelta",
                  "revlogv1",
                  "sparserevlog",
                  "store",
                  // As of this writing everything rhg does is read-only.
                  // When it starts writing to the repository, it’ll need to either keep the
                  // persistent nodemap up to date or remove this entry:
                  "persistent-nodemap",
              ];

rust/hg-core/src/revlog/changelog.rs

0 +4 -1

+             use crate::errors::HgError;
              use crate::repo::Repo;
              use crate::revlog::revlog::{Revlog, RevlogError};
              use crate::revlog::NodePrefix;
              use crate::revlog::Revision;
              /// A specialized `Revlog` to work with `changelog` data format.
              pub struct Changelog {
                  /// The generic `revlog` format.
                  pub(crate) revlog: Revlog,
              }
              impl Changelog {
                  /// Open the `changelog` of a repository given by its root.
                  pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
                      let revlog = Revlog::open(repo, "00changelog.i", None)?;
                      Ok(Self { revlog })
                  }
                  /// Return the `ChangelogEntry` a given node id.
                  pub fn get_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<ChangelogEntry, RevlogError> {
                      let rev = self.revlog.get_node_rev(node)?;
                      self.get_rev(rev)
                  }
                  /// Return the `ChangelogEntry` of a given node revision.
                  pub fn get_rev(
                      &self,
                      rev: Revision,
                  ) -> Result<ChangelogEntry, RevlogError> {
                      let bytes = self.revlog.get_rev_data(rev)?;
                      Ok(ChangelogEntry { bytes })
                  }
              }
              /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
              #[derive(Debug)]
              pub struct ChangelogEntry {
                  /// The data bytes of the `changelog` entry.
                  bytes: Vec<u8>,
              }
              impl ChangelogEntry {
                  /// Return an iterator over the lines of the entry.
                  pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                      self.bytes
                          .split(|b| b == &b'\n')
                          .filter(|line| !line.is_empty())
                  }
                  /// Return the node id of the `manifest` referenced by this `changelog`
                  /// entry.
                  pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
-                     self.lines().next().ok_or(RevlogError::Corrupted)
+                     self.lines()
+                         .next()
+                         .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
                  }
              }

rust/hg-core/src/revlog/index.rs

0 +3 -1

              use std::convert::TryInto;
              use std::ops::Deref;
              use byteorder::{BigEndian, ByteOrder};
+             use crate::errors::HgError;
              use crate::revlog::node::Node;
              use crate::revlog::revlog::RevlogError;
              use crate::revlog::{Revision, NULL_REVISION};
              pub const INDEX_ENTRY_SIZE: usize = 64;
              /// A Revlog index
              pub struct Index {
                  bytes: Box<dyn Deref<Target = [u8]> + Send>,
                  /// Offsets of starts of index blocks.
                  /// Only needed when the index is interleaved with data.
                  offsets: Option<Vec<usize>>,
              }
              impl Index {
                  /// Create an index from bytes.
                  /// Calculate the start of each entry when is_inline is true.
                  pub fn new(
                      bytes: Box<dyn Deref<Target = [u8]> + Send>,
                  ) -> Result<Self, RevlogError> {
                      if is_inline(&bytes) {
                          let mut offset: usize = 0;
                          let mut offsets = Vec::new();
                          while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                              offsets.push(offset);
                              let end = offset + INDEX_ENTRY_SIZE;
                              let entry = IndexEntry {
                                  bytes: &bytes[offset..end],
                                  offset_override: None,
                              };
                              offset += INDEX_ENTRY_SIZE + entry.compressed_len();
                          }
                          if offset == bytes.len() {
                              Ok(Self {
                                  bytes,
                                  offsets: Some(offsets),
                              })
                          } else {
-                             Err(RevlogError::Corrupted)
+                             Err(HgError::corrupted("unexpected inline revlog length")
+                                 .into())
                          }
                      } else {
                          Ok(Self {
                              bytes,
                              offsets: None,
                          })
                      }
                  }
                  /// Value of the inline flag.
                  pub fn is_inline(&self) -> bool {
                      is_inline(&self.bytes)
                  }
                  /// Return a slice of bytes if `revlog` is inline. Panic if not.
                  pub fn data(&self, start: usize, end: usize) -> &[u8] {
                      if !self.is_inline() {
                          panic!("tried to access data in the index of a revlog that is not inline");
                      }
                      &self.bytes[start..end]
                  }
                  /// Return number of entries of the revlog index.
                  pub fn len(&self) -> usize {
                      if let Some(offsets) = &self.offsets {
                          offsets.len()
                      } else {
                          self.bytes.len() / INDEX_ENTRY_SIZE
                      }
                  }
                  /// Returns `true` if the `Index` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.len() == 0
                  }
                  /// Return the index entry corresponding to the given revision if it
                  /// exists.
                  pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
                      if rev == NULL_REVISION {
                          return None;
                      }
                      if let Some(offsets) = &self.offsets {
                          self.get_entry_inline(rev, offsets)
                      } else {
                          self.get_entry_separated(rev)
                      }
                  }
                  fn get_entry_inline(
                      &self,
                      rev: Revision,
                      offsets: &[usize],
                  ) -> Option<IndexEntry> {
                      let start = *offsets.get(rev as usize)?;
                      let end = start.checked_add(INDEX_ENTRY_SIZE)?;
                      let bytes = &self.bytes[start..end];
                      // See IndexEntry for an explanation of this override.
                      let offset_override = Some(end);
                      Some(IndexEntry {
                          bytes,
                          offset_override,
                      })
                  }
                  fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
                      let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
                      if rev as usize >= max_rev {
                          return None;
                      }
                      let start = rev as usize * INDEX_ENTRY_SIZE;
                      let end = start + INDEX_ENTRY_SIZE;
                      let bytes = &self.bytes[start..end];
                      // Override the offset of the first revision as its bytes are used
                      // for the index's metadata (saving space because it is always 0)
                      let offset_override = if rev == 0 { Some(0) } else { None };
                      Some(IndexEntry {
                          bytes,
                          offset_override,
                      })
                  }
              }
              impl super::RevlogIndex for Index {
                  fn len(&self) -> usize {
                      self.len()
                  }
                  fn node(&self, rev: Revision) -> Option<&Node> {
                      self.get_entry(rev).map(|entry| entry.hash())
                  }
              }
              #[derive(Debug)]
              pub struct IndexEntry<'a> {
                  bytes: &'a [u8],
                  /// Allows to override the offset value of the entry.
                  ///
                  /// For interleaved index and data, the offset stored in the index
                  /// corresponds to the separated data offset.
                  /// It has to be overridden with the actual offset in the interleaved
                  /// index which is just after the index block.
                  ///
                  /// For separated index and data, the offset stored in the first index
                  /// entry is mixed with the index headers.
                  /// It has to be overridden with 0.
                  offset_override: Option<usize>,
              }
              impl<'a> IndexEntry<'a> {
                  /// Return the offset of the data.
                  pub fn offset(&self) -> usize {
                      if let Some(offset_override) = self.offset_override {
                          offset_override
                      } else {
                          let mut bytes = [0; 8];
                          bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
                          BigEndian::read_u64(&bytes[..]) as usize
                      }
                  }
                  /// Return the compressed length of the data.
                  pub fn compressed_len(&self) -> usize {
                      BigEndian::read_u32(&self.bytes[8..=11]) as usize
                  }
                  /// Return the uncompressed length of the data.
                  pub fn uncompressed_len(&self) -> usize {
                      BigEndian::read_u32(&self.bytes[12..=15]) as usize
                  }
                  /// Return the revision upon which the data has been derived.
                  pub fn base_revision(&self) -> Revision {
                      // TODO Maybe return an Option when base_revision == rev?
                      //      Requires to add rev to IndexEntry
                      BigEndian::read_i32(&self.bytes[16..])
                  }
                  pub fn p1(&self) -> Revision {
                      BigEndian::read_i32(&self.bytes[24..])
                  }
                  pub fn p2(&self) -> Revision {
                      BigEndian::read_i32(&self.bytes[28..])
                  }
                  /// Return the hash of revision's full text.
                  ///
                  /// Currently, SHA-1 is used and only the first 20 bytes of this field
                  /// are used.
                  pub fn hash(&self) -> &'a Node {
                      (&self.bytes[32..52]).try_into().unwrap()
                  }
              }
              /// Value of the inline flag.
              pub fn is_inline(index_bytes: &[u8]) -> bool {
                  match &index_bytes[0..=1] {
                      [0, 0] | [0, 2] => false,
                      _ => true,
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  #[cfg(test)]
                  #[derive(Debug, Copy, Clone)]
                  pub struct IndexEntryBuilder {
                      is_first: bool,
                      is_inline: bool,
                      is_general_delta: bool,
                      version: u16,
                      offset: usize,
                      compressed_len: usize,
                      uncompressed_len: usize,
                      base_revision: Revision,
                  }
                  #[cfg(test)]
                  impl IndexEntryBuilder {
                      pub fn new() -> Self {
                          Self {
                              is_first: false,
                              is_inline: false,
                              is_general_delta: true,
                              version: 2,
                              offset: 0,
                              compressed_len: 0,
                              uncompressed_len: 0,
                              base_revision: 0,
                          }
                      }
                      pub fn is_first(&mut self, value: bool) -> &mut Self {
                          self.is_first = value;
                          self
                      }
                      pub fn with_inline(&mut self, value: bool) -> &mut Self {
                          self.is_inline = value;
                          self
                      }
                      pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
                          self.is_general_delta = value;
                          self
                      }
                      pub fn with_version(&mut self, value: u16) -> &mut Self {
                          self.version = value;
                          self
                      }
                      pub fn with_offset(&mut self, value: usize) -> &mut Self {
                          self.offset = value;
                          self
                      }
                      pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
                          self.compressed_len = value;
                          self
                      }
                      pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
                          self.uncompressed_len = value;
                          self
                      }
                      pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
                          self.base_revision = value;
                          self
                      }
                      pub fn build(&self) -> Vec<u8> {
                          let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
                          if self.is_first {
                              bytes.extend(&match (self.is_general_delta, self.is_inline) {
                                  (false, false) => [0u8, 0],
                                  (false, true) => [0u8, 1],
                                  (true, false) => [0u8, 2],
                                  (true, true) => [0u8, 3],
                              });
                              bytes.extend(&self.version.to_be_bytes());
                              // Remaining offset bytes.
                              bytes.extend(&[0u8; 2]);
                          } else {
                              // Offset is only 6 bytes will usize is 8.
                              bytes.extend(&self.offset.to_be_bytes()[2..]);
                          }
                          bytes.extend(&[0u8; 2]); // Revision flags.
                          bytes.extend(&self.compressed_len.to_be_bytes()[4..]);
                          bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]);
                          bytes.extend(&self.base_revision.to_be_bytes());
                          bytes
                      }
                  }
                  #[test]
                  fn is_not_inline_when_no_inline_flag_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(false)
                          .with_inline(false)
                          .build();
                      assert_eq!(is_inline(&bytes), false)
                  }
                  #[test]
                  fn is_inline_when_inline_flag_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(false)
                          .with_inline(true)
                          .build();
                      assert_eq!(is_inline(&bytes), true)
                  }
                  #[test]
                  fn is_inline_when_inline_and_generaldelta_flags_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(true)
                          .with_inline(true)
                          .build();
                      assert_eq!(is_inline(&bytes), true)
                  }
                  #[test]
                  fn test_offset() {
                      let bytes = IndexEntryBuilder::new().with_offset(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.offset(), 1)
                  }
                  #[test]
                  fn test_with_overridden_offset() {
                      let bytes = IndexEntryBuilder::new().with_offset(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: Some(2),
                      };
                      assert_eq!(entry.offset(), 2)
                  }
                  #[test]
                  fn test_compressed_len() {
                      let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.compressed_len(), 1)
                  }
                  #[test]
                  fn test_uncompressed_len() {
                      let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.uncompressed_len(), 1)
                  }
                  #[test]
                  fn test_base_revision() {
                      let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.base_revision(), 1)
                  }
              }
              #[cfg(test)]
              pub use tests::IndexEntryBuilder;

rust/hg-core/src/revlog/node.rs

0 +14 0

              // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Definitions and utilities for Revision nodes
              //!
              //! In Mercurial code base, it is customary to call "a node" the binary SHA
              //! of a revision.
+             use crate::errors::HgError;
              use bytes_cast::BytesCast;
              use std::convert::{TryFrom, TryInto};
              use std::fmt;
              /// The length in bytes of a `Node`
              ///
              /// This constant is meant to ease refactors of this module, and
              /// are private so that calling code does not expect all nodes have
              /// the same size, should we support several formats concurrently in
              /// the future.
              pub const NODE_BYTES_LENGTH: usize = 20;
              /// Id of the null node.
              ///
              /// Used to indicate the absence of node.
              pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
              /// The length in bytes of a `Node`
              ///
              /// see also `NODES_BYTES_LENGTH` about it being private.
              const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
              /// Private alias for readability and to ease future change
              type NodeData = [u8; NODE_BYTES_LENGTH];
              /// Binary revision SHA
              ///
              /// ## Future changes of hash size
              ///
              /// To accomodate future changes of hash size, Rust callers
              /// should use the conversion methods at the boundaries (FFI, actual
              /// computation of hashes and I/O) only, and only if required.
              ///
              /// All other callers outside of unit tests should just handle `Node` values
              /// and never make any assumption on the actual length, using [`nybbles_len`]
              /// if they need a loop boundary.
              ///
              /// All methods that create a `Node` either take a type that enforces
              /// the size or return an error at runtime.
              ///
              /// [`nybbles_len`]: #method.nybbles_len
              #[derive(Copy, Clone, Debug, PartialEq, BytesCast, derive_more::From)]
              #[repr(transparent)]
              pub struct Node {
                  data: NodeData,
              }
              /// The node value for NULL_REVISION
              pub const NULL_NODE: Node = Node {
                  data: [0; NODE_BYTES_LENGTH],
              };
              /// Return an error if the slice has an unexpected length
              impl<'a> TryFrom<&'a [u8]> for &'a Node {
                  type Error = ();
                  #[inline]
                  fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
                      match Node::from_bytes(bytes) {
                          Ok((node, rest)) if rest.is_empty() => Ok(node),
                          _ => Err(()),
                      }
                  }
              }
              /// Return an error if the slice has an unexpected length
              impl TryFrom<&'_ [u8]> for Node {
                  type Error = std::array::TryFromSliceError;
                  #[inline]
                  fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
                      let data = bytes.try_into()?;
                      Ok(Self { data })
                  }
              }
              impl fmt::LowerHex for Node {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      for &byte in &self.data {
                          write!(f, "{:02x}", byte)?
                      }
                      Ok(())
                  }
              }
              #[derive(Debug)]
              pub struct FromHexError;
              /// Low level utility function, also for prefixes
              fn get_nybble(s: &[u8], i: usize) -> u8 {
                  if i % 2 == 0 {
                      s[i / 2] >> 4
                  } else {
                      s[i / 2] & 0x0f
                  }
              }
              impl Node {
                  /// Retrieve the `i`th half-byte of the binary data.
                  ///
                  /// This is also the `i`th hexadecimal digit in numeric form,
                  /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                  pub fn get_nybble(&self, i: usize) -> u8 {
                      get_nybble(&self.data, i)
                  }
                  /// Length of the data, in nybbles
                  pub fn nybbles_len(&self) -> usize {
                      // public exposure as an instance method only, so that we can
                      // easily support several sizes of hashes if needed in the future.
                      NODE_NYBBLES_LENGTH
                  }
                  /// Convert from hexadecimal string representation
                  ///
                  /// Exact length is required.
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
                      let prefix = NodePrefix::from_hex(hex)?;
                      if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
                          Ok(Self { data: prefix.data })
                      } else {
                          Err(FromHexError)
                      }
                  }
+                 /// `from_hex`, but for input from an internal file of the repository such
+                 /// as a changelog or manifest entry.
+                 ///
+                 /// An error is treated as repository corruption.
+                 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
+                     Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
+                         HgError::CorruptedRepository(format!(
+                             "Expected a full hexadecimal node ID, found {}",
+                             String::from_utf8_lossy(hex.as_ref())
+                         ))
+                     })
+                 }
                  /// Provide access to binary data
                  ///
                  /// This is needed by FFI layers, for instance to return expected
                  /// binary values to Python.
                  pub fn as_bytes(&self) -> &[u8] {
                      &self.data
                  }
              }
              /// The beginning of a binary revision SHA.
              ///
              /// Since it can potentially come from an hexadecimal representation with
              /// odd length, it needs to carry around whether the last 4 bits are relevant
              /// or not.
              #[derive(Debug, PartialEq, Copy, Clone)]
              pub struct NodePrefix {
                  /// In `1..=NODE_NYBBLES_LENGTH`
                  nybbles_len: u8,
                  /// The first `4 * length_in_nybbles` bits are used (considering bits
                  /// within a bytes in big-endian: most significant first), the rest
                  /// are zero.
                  data: NodeData,
              }
              impl NodePrefix {
                  /// Convert from hexadecimal string representation
                  ///
                  /// Similarly to `hex::decode`, can be used with Unicode string types
                  /// (`String`, `&str`) as well as bytes.
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
                      let hex = hex.as_ref();
                      let len = hex.len();
                      if len > NODE_NYBBLES_LENGTH || len == 0 {
                          return Err(FromHexError);
                      }
                      let mut data = [0; NODE_BYTES_LENGTH];
                      let mut nybbles_len = 0;
                      for &ascii_byte in hex {
                          let nybble = match char::from(ascii_byte).to_digit(16) {
                              Some(digit) => digit as u8,
                              None => return Err(FromHexError),
                          };
                          // Fill in the upper half of a byte first, then the lower half.
                          let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
                          data[nybbles_len as usize / 2] |= nybble << shift;
                          nybbles_len += 1;
                      }
                      Ok(Self { data, nybbles_len })
                  }
                  pub fn nybbles_len(&self) -> usize {
                      self.nybbles_len as _
                  }
                  pub fn is_prefix_of(&self, node: &Node) -> bool {
                      let full_bytes = self.nybbles_len() / 2;
                      if self.data[..full_bytes] != node.data[..full_bytes] {
                          return false;
                      }
                      if self.nybbles_len() % 2 == 0 {
                          return true;
                      }
                      let last = self.nybbles_len() - 1;
                      self.get_nybble(last) == node.get_nybble(last)
                  }
                  /// Retrieve the `i`th half-byte from the prefix.
                  ///
                  /// This is also the `i`th hexadecimal digit in numeric form,
                  /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                  pub fn get_nybble(&self, i: usize) -> u8 {
                      assert!(i < self.nybbles_len());
                      get_nybble(&self.data, i)
                  }
                  fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
                      (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
                  }
                  /// Return the index first nybble that's different from `node`
                  ///
                  /// If the return value is `None` that means that `self` is
                  /// a prefix of `node`, but the current method is a bit slower
                  /// than `is_prefix_of`.
                  ///
                  /// Returned index is as in `get_nybble`, i.e., starting at 0.
                  pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
                      self.iter_nybbles()
                          .zip(NodePrefix::from(*node).iter_nybbles())
                          .position(|(a, b)| a != b)
                  }
              }
              impl fmt::LowerHex for NodePrefix {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      let full_bytes = self.nybbles_len() / 2;
                      for &byte in &self.data[..full_bytes] {
                          write!(f, "{:02x}", byte)?
                      }
                      if self.nybbles_len() % 2 == 1 {
                          let last = self.nybbles_len() - 1;
                          write!(f, "{:x}", self.get_nybble(last))?
                      }
                      Ok(())
                  }
              }
              /// A shortcut for full `Node` references
              impl From<&'_ Node> for NodePrefix {
                  fn from(node: &'_ Node) -> Self {
                      NodePrefix {
                          nybbles_len: node.nybbles_len() as _,
                          data: node.data,
                      }
                  }
              }
              /// A shortcut for full `Node` references
              impl From<Node> for NodePrefix {
                  fn from(node: Node) -> Self {
                      NodePrefix {
                          nybbles_len: node.nybbles_len() as _,
                          data: node.data,
                      }
                  }
              }
              impl PartialEq<Node> for NodePrefix {
                  fn eq(&self, other: &Node) -> bool {
                      Self::from(*other) == *self
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
                  const SAMPLE_NODE: Node = Node {
                      data: [
 x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
 x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
                      ],
                  };
                  /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
                  /// The padding is made with zeros.
                  pub fn hex_pad_right(hex: &str) -> String {
                      let mut res = hex.to_string();
                      while res.len() < NODE_NYBBLES_LENGTH {
                          res.push('0');
                      }
                      res
                  }
                  #[test]
                  fn test_node_from_hex() {
                      let not_hex = "012... oops";
                      let too_short = "0123";
                      let too_long = format!("{}0", SAMPLE_NODE_HEX);
                      assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
                      assert!(Node::from_hex(not_hex).is_err());
                      assert!(Node::from_hex(too_short).is_err());
                      assert!(Node::from_hex(&too_long).is_err());
                  }
                  #[test]
                  fn test_node_encode_hex() {
                      assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
                  }
                  #[test]
                  fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
                      assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
                      assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
                      assert_eq!(
                          format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
                          SAMPLE_NODE_HEX
                      );
                      Ok(())
                  }
                  #[test]
                  fn test_prefix_from_hex_errors() {
                      assert!(NodePrefix::from_hex("testgr").is_err());
                      let mut long = format!("{:x}", NULL_NODE);
                      long.push('c');
                      assert!(NodePrefix::from_hex(&long).is_err())
                  }
                  #[test]
                  fn test_is_prefix_of() -> Result<(), FromHexError> {
                      let mut node_data = [0; NODE_BYTES_LENGTH];
                      node_data[0] = 0x12;
                      node_data[1] = 0xca;
                      let node = Node::from(node_data);
                      assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
                      assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
                      assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
                      assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
                      Ok(())
                  }
                  #[test]
                  fn test_get_nybble() -> Result<(), FromHexError> {
                      let prefix = NodePrefix::from_hex("dead6789cafe")?;
                      assert_eq!(prefix.get_nybble(0), 13);
                      assert_eq!(prefix.get_nybble(7), 9);
                      Ok(())
                  }
                  #[test]
                  fn test_first_different_nybble_even_prefix() {
                      let prefix = NodePrefix::from_hex("12ca").unwrap();
                      let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                      assert_eq!(prefix.first_different_nybble(&node), Some(0));
                      node.data[0] = 0x13;
                      assert_eq!(prefix.first_different_nybble(&node), Some(1));
                      node.data[0] = 0x12;
                      assert_eq!(prefix.first_different_nybble(&node), Some(2));
                      node.data[1] = 0xca;
                      // now it is a prefix
                      assert_eq!(prefix.first_different_nybble(&node), None);
                  }
                  #[test]
                  fn test_first_different_nybble_odd_prefix() {
                      let prefix = NodePrefix::from_hex("12c").unwrap();
                      let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                      assert_eq!(prefix.first_different_nybble(&node), Some(0));
                      node.data[0] = 0x13;
                      assert_eq!(prefix.first_different_nybble(&node), Some(1));
                      node.data[0] = 0x12;
                      assert_eq!(prefix.first_different_nybble(&node), Some(2));
                      node.data[1] = 0xca;
                      // now it is a prefix
                      assert_eq!(prefix.first_different_nybble(&node), None);
                  }
              }
              #[cfg(test)]
              pub use tests::hex_pad_right;

rust/hg-core/src/revlog/nodemap_docket.rs

0 +31 -26

+             use crate::errors::{HgError, HgResultExt};
              use bytes_cast::{unaligned, BytesCast};
              use memmap::Mmap;
              use std::path::{Path, PathBuf};
              use super::revlog::RevlogError;
              use crate::repo::Repo;
              use crate::utils::strip_suffix;
              const ONDISK_VERSION: u8 = 1;
              pub(super) struct NodeMapDocket {
                  pub data_length: usize,
                  // TODO: keep here more of the data from `parse()` when we need it
              }
              #[derive(BytesCast)]
              #[repr(C)]
              struct DocketHeader {
                  uid_size: u8,
                  _tip_rev: unaligned::U64Be,
                  data_length: unaligned::U64Be,
                  _data_unused: unaligned::U64Be,
                  tip_node_size: unaligned::U64Be,
              }
              impl NodeMapDocket {
                  /// Return `Ok(None)` when the caller should proceed without a persistent
                  /// nodemap:
                  ///
                  /// * This revlog does not have a `.n` docket file (it is not generated for
                  ///   small revlogs), or
                  /// * The docket has an unsupported version number (repositories created by
                  ///   later hg, maybe that should be a requirement instead?), or
                  /// * The docket file points to a missing (likely deleted) data file (this
                  ///   can happen in a rare race condition).
                  pub fn read_from_file(
                      repo: &Repo,
                      index_path: &Path,
                  ) -> Result<Option<(Self, Mmap)>, RevlogError> {
                      let docket_path = index_path.with_extension("n");
-                     let docket_bytes = match repo.store_vfs().read(&docket_path) {
-                         Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
-                             return Ok(None)
+                         }
-                         Err(e) => return Err(RevlogError::IoError(e)),
-                         Ok(bytes) => bytes,
+                     let docket_bytes = if let Some(bytes) =
+                         repo.store_vfs().read(&docket_path).io_not_found_as_none()?
+                     {
+                         bytes
+                     } else {
+                         return Ok(None);
                      };
                      let input = if let Some((&ONDISK_VERSION, rest)) =
                          docket_bytes.split_first()
                      {
                          rest
                      } else {
                          return Ok(None);
                      };
-                     let (header, rest) = DocketHeader::from_bytes(input)?;
+                     /// Treat any error as a parse error
+                     fn parse<T, E>(result: Result<T, E>) -> Result<T, RevlogError> {
+                         result.map_err(|_| {
+                             HgError::corrupted("nodemap docket parse error").into()
+                         })
+                     }
+                     let (header, rest) = parse(DocketHeader::from_bytes(input))?;
                      let uid_size = header.uid_size as usize;
                      // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
                      // systems?
                      let tip_node_size = header.tip_node_size.get() as usize;
                      let data_length = header.data_length.get() as usize;
-                     let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?;
-                     let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?;
-                     let uid =
-                         std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
+                     let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
+                     let (_tip_node, _rest) =
+                         parse(u8::slice_from_bytes(rest, tip_node_size))?;
+                     let uid = parse(std::str::from_utf8(uid))?;
                      let docket = NodeMapDocket { data_length };
                      let data_path = rawdata_path(&docket_path, uid);
-                     // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
+                     // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
                      // config is false?
-                     match repo.store_vfs().mmap_open(&data_path) {
-                         Ok(mmap) => {
-                             if mmap.len() >= data_length {
-                                 Ok(Some((docket, mmap)))
-                             } else {
-                                 Err(RevlogError::Corrupted)
+                             }
+                     if let Some(mmap) = repo
+                         .store_vfs()
+                         .mmap_open(&data_path)
+                         .io_not_found_as_none()?
+                     {
+                         if mmap.len() >= data_length {
+                             Ok(Some((docket, mmap)))
+                         } else {
+                             Err(HgError::corrupted("persistent nodemap too short").into())
                          }
-                         Err(error) => {
-                             if error.kind() == std::io::ErrorKind::NotFound {
-                                 Ok(None)
-                             } else {
-                                 Err(RevlogError::IoError(error))
+                             }
+                         }
+                     } else {
+                         Ok(None)
                      }
                  }
              }
              fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
                  let docket_name = docket_path
                      .file_name()
                      .expect("expected a base name")
                      .to_str()
                      .expect("expected an ASCII file name in the store");
                  let prefix = strip_suffix(docket_name, ".n.a")
                      .or_else(|| strip_suffix(docket_name, ".n"))
                      .expect("expected docket path in .n or .n.a");
                  let name = format!("{}-{}.nd", prefix, uid);
                  docket_path
                      .parent()
                      .expect("expected a non-root path")
                      .join(name)
              }

rust/hg-core/src/revlog/revlog.rs

0 +36 -30

              use std::borrow::Cow;
              use std::io::Read;
              use std::ops::Deref;
              use std::path::Path;
              use byteorder::{BigEndian, ByteOrder};
              use crypto::digest::Digest;
              use crypto::sha1::Sha1;
              use flate2::read::ZlibDecoder;
              use micro_timer::timed;
              use zstd;
              use super::index::Index;
              use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
              use super::nodemap;
-             use super::nodemap::NodeMap;
+             use super::nodemap::{NodeMap, NodeMapError};
              use super::nodemap_docket::NodeMapDocket;
              use super::patch;
+             use crate::errors::HgError;
              use crate::repo::Repo;
              use crate::revlog::Revision;
+             #[derive(derive_more::From)]
              pub enum RevlogError {
-                 IoError(std::io::Error),
-                 UnsuportedVersion(u16),
                  InvalidRevision,
                  /// Found more than one entry whose ID match the requested prefix
                  AmbiguousPrefix,
-                 Corrupted,
-                 UnknowDataFormat(u8),
+                 #[from]
+                 Other(HgError),
              }
-             impl From<bytes_cast::FromBytesError> for RevlogError {
-                 fn from(_: bytes_cast::FromBytesError) -> Self {
-                     RevlogError::Corrupted
+             impl From<NodeMapError> for RevlogError {
+                 fn from(error: NodeMapError) -> Self {
+                     match error {
+                         NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
+                         NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
+                     }
+                 }
+             }
+             impl RevlogError {
+                 fn corrupted() -> Self {
+                     RevlogError::Other(HgError::corrupted("corrupted revlog"))
                  }
              }
              /// Read only implementation of revlog.
              pub struct Revlog {
                  /// When index and data are not interleaved: bytes of the revlog index.
                  /// When index and data are interleaved: bytes of the revlog index and
                  /// data.
                  index: Index,
                  /// When index and data are not interleaved: bytes of the revlog data
                  data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                  /// When present on disk: the persistent nodemap for this revlog
                  nodemap: Option<nodemap::NodeTree>,
              }
              impl Revlog {
                  /// Open a revlog index file.
                  ///
                  /// It will also open the associated data file if index and data are not
                  /// interleaved.
                  #[timed]
                  pub fn open(
                      repo: &Repo,
                      index_path: impl AsRef<Path>,
                      data_path: Option<&Path>,
                  ) -> Result<Self, RevlogError> {
                      let index_path = index_path.as_ref();
-                     let index_mmap = repo
-                         .store_vfs()
-                         .mmap_open(&index_path)
-                         .map_err(RevlogError::IoError)?;
+                     let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
                      let version = get_version(&index_mmap);
                      if version != 1 {
-                         return Err(RevlogError::UnsuportedVersion(version));
+                         // A proper new version should have had a repo/store requirement.
+                         return Err(RevlogError::corrupted());
                      }
                      let index = Index::new(Box::new(index_mmap))?;
                      let default_data_path = index_path.with_extension("d");
                      // type annotation required
                      // won't recognize Mmap as Deref<Target = [u8]>
                      let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                          if index.is_inline() {
                              None
                          } else {
                              let data_path = data_path.unwrap_or(&default_data_path);
-                             let data_mmap = repo
-                                 .store_vfs()
-                                 .mmap_open(data_path)
-                                 .map_err(RevlogError::IoError)?;
+                             let data_mmap = repo.store_vfs().mmap_open(data_path)?;
                              Some(Box::new(data_mmap))
                          };
                      let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
                          |(docket, data)| {
                              nodemap::NodeTree::load_bytes(
                                  Box::new(data),
                                  docket.data_length,
                              )
                          },
                      );
                      Ok(Revlog {
                          index,
                          data_bytes,
                          nodemap,
                      })
                  }
                  /// Return number of entries of the `Revlog`.
                  pub fn len(&self) -> usize {
                      self.index.len()
                  }
                  /// Returns `true` if the `Revlog` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.index.is_empty()
                  }
                  /// Return the full data associated to a node.
                  #[timed]
                  pub fn get_node_rev(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      if let Some(nodemap) = &self.nodemap {
                          return nodemap
-                             .find_bin(&self.index, node)
-                             // TODO: propagate details of this error:
-                             .map_err(|_| RevlogError::Corrupted)?
+                             .find_bin(&self.index, node)?
                              .ok_or(RevlogError::InvalidRevision);
                      }
                      // Fallback to linear scan when a persistent nodemap is not present.
                      // This happens when the persistent-nodemap experimental feature is not
                      // enabled, or for small revlogs.
                      //
                      // TODO: consider building a non-persistent nodemap in memory to
                      // optimize these cases.
                      let mut found_by_prefix = None;
                      for rev in (0..self.len() as Revision).rev() {
                          let index_entry =
-                             self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
+                             self.index.get_entry(rev).ok_or(HgError::corrupted(
+                                 "revlog references a revision not in the index",
+                             ))?;
                          if node == *index_entry.hash() {
                              return Ok(rev);
                          }
                          if node.is_prefix_of(index_entry.hash()) {
                              if found_by_prefix.is_some() {
                                  return Err(RevlogError::AmbiguousPrefix);
                              }
                              found_by_prefix = Some(rev)
                          }
                      }
                      found_by_prefix.ok_or(RevlogError::InvalidRevision)
                  }
                  /// Returns whether the given revision exists in this revlog.
                  pub fn has_rev(&self, rev: Revision) -> bool {
                      self.index.get_entry(rev).is_some()
                  }
                  /// Return the full data associated to a revision.
                  ///
                  /// All entries required to build the final data out of deltas will be
                  /// retrieved as needed, and the deltas will be applied to the inital
                  /// snapshot to rebuild the final data.
                  #[timed]
                  pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
                      // Todo return -> Cow
                      let mut entry = self.get_entry(rev)?;
                      let mut delta_chain = vec![];
                      while let Some(base_rev) = entry.base_rev {
                          delta_chain.push(entry);
-                         entry =
-                             self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
+                         entry = self
+                             .get_entry(base_rev)
+                             .map_err(|_| RevlogError::corrupted())?;
                      }
                      // TODO do not look twice in the index
                      let index_entry = self
                          .index
                          .get_entry(rev)
                          .ok_or(RevlogError::InvalidRevision)?;
                      let data: Vec<u8> = if delta_chain.is_empty() {
                          entry.data()?.into()
                      } else {
                          Revlog::build_data_from_deltas(entry, &delta_chain)?
                      };
                      if self.check_hash(
                          index_entry.p1(),
                          index_entry.p2(),
                          index_entry.hash().as_bytes(),
                          &data,
                      ) {
                          Ok(data)
                      } else {
-                         Err(RevlogError::Corrupted)
+                         Err(RevlogError::corrupted())
                      }
                  }
                  /// Check the hash of some given data against the recorded hash.
                  pub fn check_hash(
                      &self,
                      p1: Revision,
                      p2: Revision,
                      expected: &[u8],
                      data: &[u8],
                  ) -> bool {
                      let e1 = self.index.get_entry(p1);
                      let h1 = match e1 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      let e2 = self.index.get_entry(p2);
                      let h2 = match e2 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
                  }
                  /// Build the full data of a revision out its snapshot
                  /// and its deltas.
                  #[timed]
                  fn build_data_from_deltas(
                      snapshot: RevlogEntry,
                      deltas: &[RevlogEntry],
                  ) -> Result<Vec<u8>, RevlogError> {
                      let snapshot = snapshot.data()?;
                      let deltas = deltas
                          .iter()
                          .rev()
                          .map(RevlogEntry::data)
                          .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
                      let patches: Vec<_> =
                          deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                      let patch = patch::fold_patch_lists(&patches);
                      Ok(patch.apply(&snapshot))
                  }
                  /// Return the revlog data.
                  fn data(&self) -> &[u8] {
                      match self.data_bytes {
                          Some(ref data_bytes) => &data_bytes,
                          None => panic!(
                              "forgot to load the data or trying to access inline data"
                          ),
                      }
                  }
                  /// Get an entry of the revlog.
                  fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
                      let index_entry = self
                          .index
                          .get_entry(rev)
                          .ok_or(RevlogError::InvalidRevision)?;
                      let start = index_entry.offset();
                      let end = start + index_entry.compressed_len();
                      let data = if self.index.is_inline() {
                          self.index.data(start, end)
                      } else {
                          &self.data()[start..end]
                      };
                      let entry = RevlogEntry {
                          rev,
                          bytes: data,
                          compressed_len: index_entry.compressed_len(),
                          uncompressed_len: index_entry.uncompressed_len(),
                          base_rev: if index_entry.base_revision() == rev {
                              None
                          } else {
                              Some(index_entry.base_revision())
                          },
                      };
                      Ok(entry)
                  }
              }
              /// The revlog entry's bytes and the necessary informations to extract
              /// the entry's data.
              #[derive(Debug)]
              pub struct RevlogEntry<'a> {
                  rev: Revision,
                  bytes: &'a [u8],
                  compressed_len: usize,
                  uncompressed_len: usize,
                  base_rev: Option<Revision>,
              }
              impl<'a> RevlogEntry<'a> {
                  /// Extract the data contained in the entry.
                  pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
                      if self.bytes.is_empty() {
                          return Ok(Cow::Borrowed(&[]));
                      }
                      match self.bytes[0] {
                          // Revision data is the entirety of the entry, including this
                          // header.
                          b'\0' => Ok(Cow::Borrowed(self.bytes)),
                          // Raw revision data follows.
                          b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                          // zlib (RFC 1950) data.
                          b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                          // zstd data.
                          b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
-                         format_type => Err(RevlogError::UnknowDataFormat(format_type)),
+                         // A proper new format should have had a repo/store requirement.
+                         _format_type => Err(RevlogError::corrupted()),
                      }
                  }
                  fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
                      let mut decoder = ZlibDecoder::new(self.bytes);
                      if self.is_delta() {
                          let mut buf = Vec::with_capacity(self.compressed_len);
                          decoder
                              .read_to_end(&mut buf)
-                             .or(Err(RevlogError::Corrupted))?;
+                             .map_err(|_| RevlogError::corrupted())?;
                          Ok(buf)
                      } else {
                          let mut buf = vec![0; self.uncompressed_len];
                          decoder
                              .read_exact(&mut buf)
-                             .or(Err(RevlogError::Corrupted))?;
+                             .map_err(|_| RevlogError::corrupted())?;
                          Ok(buf)
                      }
                  }
                  fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
                      if self.is_delta() {
                          let mut buf = Vec::with_capacity(self.compressed_len);
                          zstd::stream::copy_decode(self.bytes, &mut buf)
-                             .or(Err(RevlogError::Corrupted))?;
+                             .map_err(|_| RevlogError::corrupted())?;
                          Ok(buf)
                      } else {
                          let mut buf = vec![0; self.uncompressed_len];
                          let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
-                             .or(Err(RevlogError::Corrupted))?;
+                             .map_err(|_| RevlogError::corrupted())?;
                          if len != self.uncompressed_len {
-                             Err(RevlogError::Corrupted)
+                             Err(RevlogError::corrupted())
                          } else {
                              Ok(buf)
                          }
                      }
                  }
                  /// Tell if the entry is a snapshot or a delta
                  /// (influences on decompression).
                  fn is_delta(&self) -> bool {
                      self.base_rev.is_some()
                  }
              }
              /// Format version of the revlog.
              pub fn get_version(index_bytes: &[u8]) -> u16 {
                  BigEndian::read_u16(&index_bytes[2..=3])
              }
              /// Calculate the hash of a revision given its data and its parents.
              fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
                  let mut hasher = Sha1::new();
                  let (a, b) = (p1_hash, p2_hash);
                  if a > b {
                      hasher.input(b);
                      hasher.input(a);
                  } else {
                      hasher.input(a);
                      hasher.input(b);
                  }
                  hasher.input(data);
                  let mut hash = vec![0; NODE_BYTES_LENGTH];
                  hasher.result(&mut hash);
                  hash
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use super::super::index::IndexEntryBuilder;
                  #[test]
                  fn version_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_version(1)
                          .build();
                      assert_eq!(get_version(&bytes), 1)
                  }
              }

rust/rhg/src/error.rs

0 +1 -24

              use crate::exitcode;
              use crate::ui::utf8_to_local;
              use crate::ui::UiError;
              use format_bytes::format_bytes;
              use hg::errors::HgError;
              use hg::operations::FindRootError;
              use hg::revlog::revlog::RevlogError;
              use hg::utils::files::get_bytes_from_path;
              use std::convert::From;
              use std::path::PathBuf;
              /// The kind of command error
              #[derive(Debug, derive_more::From)]
              pub enum CommandError {
                  /// The root of the repository cannot be found
                  RootNotFound(PathBuf),
                  /// The current directory cannot be found
                  CurrentDirNotFound(std::io::Error),
                  /// The standard output stream cannot be written to
                  StdoutError,
                  /// The standard error stream cannot be written to
                  StderrError,
                  /// The command aborted
                  Abort(Option<Vec<u8>>),
                  /// A mercurial capability as not been implemented.
                  Unimplemented,
                  /// Common cases
                  #[from]
                  Other(HgError),
              }
              impl CommandError {
                  pub fn get_exit_code(&self) -> exitcode::ExitCode {
                      match self {
                          CommandError::RootNotFound(_) => exitcode::ABORT,
                          CommandError::CurrentDirNotFound(_) => exitcode::ABORT,
                          CommandError::StdoutError => exitcode::ABORT,
                          CommandError::StderrError => exitcode::ABORT,
                          CommandError::Abort(_) => exitcode::ABORT,
                          CommandError::Unimplemented => exitcode::UNIMPLEMENTED_COMMAND,
                          CommandError::Other(HgError::UnsupportedFeature(_)) => {
                              exitcode::UNIMPLEMENTED_COMMAND
                          }
                          CommandError::Other(_) => exitcode::ABORT,
                      }
                  }
                  /// Return the message corresponding to the error if any
                  pub fn get_error_message_bytes(&self) -> Option<Vec<u8>> {
                      match self {
                          CommandError::RootNotFound(path) => {
                              let bytes = get_bytes_from_path(path);
                              Some(format_bytes!(
                                  b"abort: no repository found in '{}' (.hg not found)!\n",
                                  bytes.as_slice()
                              ))
                          }
                          CommandError::CurrentDirNotFound(e) => Some(format_bytes!(
                              b"abort: error getting current working directory: {}\n",
                              e.to_string().as_bytes(),
                          )),
                          CommandError::Abort(message) => message.to_owned(),
                          CommandError::StdoutError
                          | CommandError::StderrError
                          | CommandError::Unimplemented
                          | CommandError::Other(HgError::UnsupportedFeature(_)) => None,
                          CommandError::Other(e) => {
                              Some(format_bytes!(b"{}\n", e.to_string().as_bytes()))
                          }
                      }
                  }
                  /// Exist the process with the corresponding exit code.
                  pub fn exit(&self) {
                      std::process::exit(self.get_exit_code())
                  }
              }
              impl From<UiError> for CommandError {
                  fn from(error: UiError) -> Self {
                      match error {
                          UiError::StdoutError(_) => CommandError::StdoutError,
                          UiError::StderrError(_) => CommandError::StderrError,
                      }
                  }
              }
              impl From<FindRootError> for CommandError {
                  fn from(err: FindRootError) -> Self {
                      match err {
                          FindRootError::RootNotFound(path) => {
                              CommandError::RootNotFound(path)
                          }
                          FindRootError::GetCurrentDirError(e) => {
                              CommandError::CurrentDirNotFound(e)
                          }
                      }
                  }
              }
              impl From<(RevlogError, &str)> for CommandError {
                  fn from((err, rev): (RevlogError, &str)) -> CommandError {
                      match err {
-                         RevlogError::IoError(err) => CommandError::Abort(Some(
-                             utf8_to_local(&format!("abort: {}\n", err)).into(),
-                         )),
                          RevlogError::InvalidRevision => CommandError::Abort(Some(
                              utf8_to_local(&format!(
                                  "abort: invalid revision identifier {}\n",
                                  rev
                              ))
                              .into(),
                          )),
                          RevlogError::AmbiguousPrefix => CommandError::Abort(Some(
                              utf8_to_local(&format!(
                                  "abort: ambiguous revision identifier {}\n",
                                  rev
                              ))
                              .into(),
                          )),
-                         RevlogError::UnsuportedVersion(version) => {
-                             CommandError::Abort(Some(
-                                 utf8_to_local(&format!(
-                                     "abort: unsupported revlog version {}\n",
-                                     version
-                                 ))
-                                 .into(),
-                             ))
+                         }
-                         RevlogError::Corrupted => {
-                             CommandError::Abort(Some("abort: corrupted revlog\n".into()))
+                         }
-                         RevlogError::UnknowDataFormat(format) => {
-                             CommandError::Abort(Some(
-                                 utf8_to_local(&format!(
-                                     "abort: unknow revlog dataformat {:?}\n",
-                                     format
-                                 ))
-                                 .into(),
-                             ))
+                         }
+                         RevlogError::Other(err) => CommandError::Other(err),
                      }
                  }
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages