upstream/mercurial-mirror Commit - r47172:43d63979

rust: use HgError in RevlogError and Vfs...

Simon Sapin -

r47172:43d63979 default

parent child

rust/hg-core/src/errors.rs

0 +3 0

             use std::fmt;
             /// Common error cases that can happen in many different APIs
             #[derive(Debug)]
             pub enum HgError {
                 IoError {
                     error: std::io::Error,
                     context: IoErrorContext,
                 },
                 /// A file under `.hg/` normally only written by Mercurial
                 ///
                 /// The given string is a short explanation for users, not intended to be
                 /// machine-readable.
                 CorruptedRepository(String),
                 /// The respository or requested operation involves a feature not
                 /// supported by the Rust implementation. Falling back to the Python
                 /// implementation may or may not work.
                 ///
                 /// The given string is a short explanation for users, not intended to be
                 /// machine-readable.
                 UnsupportedFeature(String),
             }
             /// Details about where an I/O error happened
             #[derive(Debug, derive_more::From)]
             pub enum IoErrorContext {
                 /// A filesystem operation returned `std::io::Error`
                 #[from]
                 File(std::path::PathBuf),
                 /// `std::env::current_dir` returned `std::io::Error`
                 CurrentDir,
             }
             impl HgError {
                 pub fn corrupted(explanation: impl Into<String>) -> Self {
+                    // TODO: capture a backtrace here and keep it in the error value
+                    // to aid debugging?
+                    // https://doc.rust-lang.org/std/backtrace/struct.Backtrace.html
                     HgError::CorruptedRepository(explanation.into())
                 }
             }
             // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
             impl fmt::Display for HgError {
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     match self {
                         HgError::IoError { error, context } => {
                             write!(f, "{}: {}", error, context)
                         }
                         HgError::CorruptedRepository(explanation) => {
                             write!(f, "corrupted repository: {}", explanation)
                         }
                         HgError::UnsupportedFeature(explanation) => {
                             write!(f, "unsupported feature: {}", explanation)
                         }
                     }
                 }
             }
             // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
             impl fmt::Display for IoErrorContext {
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     match self {
                         IoErrorContext::File(path) => path.display().fmt(f),
                         IoErrorContext::CurrentDir => f.write_str("current directory"),
                     }
                 }
             }
             pub trait IoResultExt<T> {
                 /// Annotate a possible I/O error as related to a file at the given path.
                 ///
                 /// This allows printing something like “File not found: example.txt”
                 /// instead of just “File not found”.
                 ///
                 /// Converts a `Result` with `std::io::Error` into one with `HgError`.
                 fn for_file(self, path: &std::path::Path) -> Result<T, HgError>;
             }
             impl<T> IoResultExt<T> for std::io::Result<T> {
                 fn for_file(self, path: &std::path::Path) -> Result<T, HgError> {
                     self.map_err(|error| HgError::IoError {
                         error,
                         context: IoErrorContext::File(path.to_owned()),
                     })
                 }
             }
             pub trait HgResultExt<T> {
                 /// Handle missing files separately from other I/O error cases.
                 ///
                 /// Wraps the `Ok` type in an `Option`:
                 ///
                 /// * `Ok(x)` becomes `Ok(Some(x))`
                 /// * An I/O "not found" error becomes `Ok(None)`
                 /// * Other errors are unchanged
                 fn io_not_found_as_none(self) -> Result<Option<T>, HgError>;
             }
             impl<T> HgResultExt<T> for Result<T, HgError> {
                 fn io_not_found_as_none(self) -> Result<Option<T>, HgError> {
                     match self {
                         Ok(x) => Ok(Some(x)),
                         Err(HgError::IoError { error, .. })
                             if error.kind() == std::io::ErrorKind::NotFound =>
                         {
                             Ok(None)
                         }
                         Err(other_error) => Err(other_error),
                     }
                 }
             }

rust/hg-core/src/lib.rs

0 +1 0

             // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             mod ancestors;
             pub mod dagops;
             pub mod errors;
             pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
             mod dirstate;
             pub mod discovery;
             pub mod requirements;
             pub mod testing; // unconditionally built, for use from integration tests
             pub use dirstate::{
                 dirs_multiset::{DirsMultiset, DirsMultisetIter},
                 dirstate_map::DirstateMap,
                 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
                 status::{
                     status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
                 },
                 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
                 StateMap, StateMapIter,
             };
             pub mod copy_tracing;
             mod filepatterns;
             pub mod matchers;
             pub mod repo;
             pub mod revlog;
             pub use revlog::*;
             pub mod config;
             pub mod operations;
             pub mod revset;
             pub mod utils;
             use crate::utils::hg_path::{HgPathBuf, HgPathError};
             pub use filepatterns::{
                 parse_pattern_syntax, read_pattern_file, IgnorePattern,
                 PatternFileWarning, PatternSyntax,
             };
             use std::collections::HashMap;
             use twox_hash::RandomXxHashBuilder64;
             /// This is a contract between the `micro-timer` crate and us, to expose
             /// the `log` crate as `crate::log`.
             use log;
             pub type LineNumber = usize;
             /// Rust's default hasher is too slow because it tries to prevent collision
             /// attacks. We are not concerned about those: if an ill-minded person has
             /// write access to your repository, you have other issues.
             pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
             #[derive(Debug, PartialEq)]
             pub enum DirstateMapError {
                 PathNotFound(HgPathBuf),
                 EmptyPath,
                 InvalidPath(HgPathError),
             }
             impl ToString for DirstateMapError {
                 fn to_string(&self) -> String {
                     match self {
                         DirstateMapError::PathNotFound(_) => {
                             "expected a value, found none".to_string()
                         }
                         DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
                         DirstateMapError::InvalidPath(e) => e.to_string(),
                     }
                 }
             }
             #[derive(Debug, derive_more::From)]
             pub enum DirstateError {
                 Map(DirstateMapError),
                 Common(errors::HgError),
             }
             #[derive(Debug, derive_more::From)]
             pub enum PatternError {
                 #[from]
                 Path(HgPathError),
                 UnsupportedSyntax(String),
                 UnsupportedSyntaxInFile(String, String, usize),
                 TooLong(usize),
                 #[from]
                 IO(std::io::Error),
                 /// Needed a pattern that can be turned into a regex but got one that
                 /// can't. This should only happen through programmer error.
                 NonRegexPattern(IgnorePattern),
             }
             impl ToString for PatternError {
                 fn to_string(&self) -> String {
                     match self {
                         PatternError::UnsupportedSyntax(syntax) => {
                             format!("Unsupported syntax {}", syntax)
                         }
                         PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
                             format!(
                                 "{}:{}: unsupported syntax {}",
                                 file_path, line, syntax
                             )
                         }
                         PatternError::TooLong(size) => {
                             format!("matcher pattern is too long ({} bytes)", size)
                         }
                         PatternError::IO(e) => e.to_string(),
                         PatternError::Path(e) => e.to_string(),
                         PatternError::NonRegexPattern(pattern) => {
                             format!("'{:?}' cannot be turned into a regex", pattern)
                         }
                     }
                 }
             }

rust/hg-core/src/operations/cat.rs

0 +3 -4

             // list_tracked_files.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use std::path::PathBuf;
             use crate::repo::Repo;
             use crate::revlog::changelog::Changelog;
             use crate::revlog::manifest::Manifest;
             use crate::revlog::path_encode::path_encode;
             use crate::revlog::revlog::Revlog;
             use crate::revlog::revlog::RevlogError;
             use crate::revlog::Node;
             use crate::utils::files::get_path_from_bytes;
             use crate::utils::hg_path::{HgPath, HgPathBuf};
             const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
             /// List files under Mercurial control at a given revision.
             ///
             /// * `root`: Repository root
             /// * `rev`: The revision to cat the files from.
             /// * `files`: The files to output.
             pub fn cat(
                 repo: &Repo,
                 revset: &str,
                 files: &[HgPathBuf],
             ) -> Result<Vec<u8>, RevlogError> {
                 let rev = crate::revset::resolve_single(revset, repo)?;
                 let changelog = Changelog::open(repo)?;
                 let manifest = Manifest::open(repo)?;
                 let changelog_entry = changelog.get_rev(rev)?;
-                let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
+                let manifest_node =
-                    .map_err(|_| RevlogError::Corrupted)?;
+                    Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
                 let manifest_entry = manifest.get_node(manifest_node.into())?;
                 let mut bytes = vec![];
                 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
                     for cat_file in files.iter() {
                         if cat_file.as_bytes() == manifest_file.as_bytes() {
                             let index_path = store_path(manifest_file, b".i");
                             let data_path = store_path(manifest_file, b".d");
                             let file_log =
                                 Revlog::open(repo, &index_path, Some(&data_path))?;
-                            let file_node = Node::from_hex(node_bytes)
+                            let file_node = Node::from_hex_for_repo(node_bytes)?;
-                                .map_err(|_| RevlogError::Corrupted)?;
                             let file_rev = file_log.get_node_rev(file_node.into())?;
                             let data = file_log.get_rev_data(file_rev)?;
                             if data.starts_with(&METADATA_DELIMITER) {
                                 let end_delimiter_position = data
                                     [METADATA_DELIMITER.len()..]
                                     .windows(METADATA_DELIMITER.len())
                                     .position(|bytes| bytes == METADATA_DELIMITER);
                                 if let Some(position) = end_delimiter_position {
                                     let offset = METADATA_DELIMITER.len() * 2;
                                     bytes.extend(data[position + offset..].iter());
                                 }
                             } else {
                                 bytes.extend(data);
                             }
                         }
                     }
                 }
                 Ok(bytes)
             }
             fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
                 let encoded_bytes =
                     path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
                 get_path_from_bytes(&encoded_bytes).into()
             }

rust/hg-core/src/operations/list_tracked_files.rs

0 +4 -9

             // list_tracked_files.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::dirstate::parsers::parse_dirstate;
-            use crate::errors::{HgError, IoResultExt};
+            use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::changelog::Changelog;
             use crate::revlog::manifest::{Manifest, ManifestEntry};
             use crate::revlog::node::Node;
             use crate::revlog::revlog::RevlogError;
             use crate::utils::hg_path::HgPath;
             use crate::EntryState;
             use rayon::prelude::*;
             /// List files under Mercurial control in the working directory
             /// by reading the dirstate
             pub struct Dirstate {
                 /// The `dirstate` content.
                 content: Vec<u8>,
             }
             impl Dirstate {
                 pub fn new(repo: &Repo) -> Result<Self, HgError> {
-                    let content = repo
+                    let content = repo.hg_vfs().read("dirstate")?;
-                        .hg_vfs()
-                        .read("dirstate")
-                        // TODO: this will be more accurate when we use `HgError` in
-                        // `Vfs::read`.
-                        .for_file("dirstate".as_ref())?;
                     Ok(Self { content })
                 }
                 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> {
                     let (_, entries, _) = parse_dirstate(&self.content)?;
                     let mut files: Vec<&HgPath> = entries
                         .into_iter()
                         .filter_map(|(path, entry)| match entry.state {
                             EntryState::Removed => None,
                             _ => Some(path),
                         })
                         .collect();
                     files.par_sort_unstable();
                     Ok(files)
                 }
             }
             /// List files under Mercurial control at a given revision.
             pub fn list_rev_tracked_files(
                 repo: &Repo,
                 revset: &str,
             ) -> Result<FilesForRev, RevlogError> {
                 let rev = crate::revset::resolve_single(revset, repo)?;
                 let changelog = Changelog::open(repo)?;
                 let manifest = Manifest::open(repo)?;
                 let changelog_entry = changelog.get_rev(rev)?;
-                let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
+                let manifest_node =
-                    .map_err(|_| RevlogError::Corrupted)?;
+                    Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
                 let manifest_entry = manifest.get_node(manifest_node.into())?;
                 Ok(FilesForRev(manifest_entry))
             }
             pub struct FilesForRev(ManifestEntry);
             impl FilesForRev {
                 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
                     self.0.files()
                 }
             }

rust/hg-core/src/repo.rs

0 +8 -13

-            use crate::errors::HgError;
+            use crate::errors::{HgError, IoResultExt};
             use crate::operations::{find_root, FindRootError};
             use crate::requirements;
             use memmap::{Mmap, MmapOptions};
             use std::path::{Path, PathBuf};
             /// A repository on disk
             pub struct Repo {
                 working_directory: PathBuf,
                 dot_hg: PathBuf,
                 store: PathBuf,
             }
             /// Filesystem access abstraction for the contents of a given "base" diretory
             #[derive(Clone, Copy)]
             pub(crate) struct Vfs<'a> {
                 base: &'a Path,
             }
             impl Repo {
                 /// Returns `None` if the given path doesn’t look like a repository
                 /// (doesn’t contain a `.hg` sub-directory).
                 pub fn for_path(root: impl Into<PathBuf>) -> Self {
                     let working_directory = root.into();
                     let dot_hg = working_directory.join(".hg");
                     Self {
                         store: dot_hg.join("store"),
                         dot_hg,
                         working_directory,
                     }
                 }
                 pub fn find() -> Result<Self, FindRootError> {
                     find_root().map(Self::for_path)
                 }
                 pub fn check_requirements(&self) -> Result<(), HgError> {
                     requirements::check(self)
                 }
                 pub fn working_directory_path(&self) -> &Path {
                     &self.working_directory
                 }
                 /// For accessing repository files (in `.hg`), except for the store
                 /// (`.hg/store`).
                 pub(crate) fn hg_vfs(&self) -> Vfs<'_> {
                     Vfs { base: &self.dot_hg }
                 }
                 /// For accessing repository store files (in `.hg/store`)
                 pub(crate) fn store_vfs(&self) -> Vfs<'_> {
                     Vfs { base: &self.store }
                 }
                 /// For accessing the working copy
                 // The undescore prefix silences the "never used" warning. Remove before
                 // using.
                 pub(crate) fn _working_directory_vfs(&self) -> Vfs<'_> {
                     Vfs {
                         base: &self.working_directory,
                     }
                 }
             }
             impl Vfs<'_> {
                 pub(crate) fn read(
                     &self,
                     relative_path: impl AsRef<Path>,
-                ) -> std::io::Result<Vec<u8>> {
+                ) -> Result<Vec<u8>, HgError> {
-                    std::fs::read(self.base.join(relative_path))
+                    let path = self.base.join(relative_path);
+                    std::fs::read(&path).for_file(&path)
-                pub(crate) fn open(
-                    &self,
-                    relative_path: impl AsRef<Path>,
-                ) -> std::io::Result<std::fs::File> {
-                    std::fs::File::open(self.base.join(relative_path))
                 }
                 pub(crate) fn mmap_open(
                     &self,
                     relative_path: impl AsRef<Path>,
-                ) -> std::io::Result<Mmap> {
+                ) -> Result<Mmap, HgError> {
-                    let file = self.open(relative_path)?;
+                    let path = self.base.join(relative_path);
+                    let file = std::fs::File::open(&path).for_file(&path)?;
                     // TODO: what are the safety requirements here?
-                    let mmap = unsafe { MmapOptions::new().map(&file) }?;
+                    let mmap = unsafe { MmapOptions::new().map(&file) }.for_file(&path)?;
                     Ok(mmap)
                 }
             }

rust/hg-core/src/requirements.rs

0 +3 -6

-            use crate::errors::{HgError, HgResultExt, IoResultExt};
+            use crate::errors::{HgError, HgResultExt};
             use crate::repo::Repo;
             fn parse(bytes: &[u8]) -> Result<Vec<String>, HgError> {
                 // The Python code reading this file uses `str.splitlines`
                 // which looks for a number of line separators (even including a couple of
                 // non-ASCII ones), but Python code writing it always uses `\n`.
                 let lines = bytes.split(|&byte| byte == b'\n');
                 lines
                     .filter(|line| !line.is_empty())
                     .map(|line| {
                         // Python uses Unicode `str.isalnum` but feature names are all
                         // ASCII
                         if line[0].is_ascii_alphanumeric() && line.is_ascii() {
                             Ok(String::from_utf8(line.into()).unwrap())
                         } else {
                             Err(HgError::corrupted("parse error in 'requires' file"))
                         }
                     })
                     .collect()
             }
             pub fn load(repo: &Repo) -> Result<Vec<String>, HgError> {
-                if let Some(bytes) = repo
+                if let Some(bytes) =
-                    .hg_vfs()
+                    repo.hg_vfs().read("requires").io_not_found_as_none()?
-                    .read("requires")
-                    .for_file("requires".as_ref())
-                    .io_not_found_as_none()?
                 {
                     parse(&bytes)
                 } else {
                     // Treat a missing file the same as an empty file.
                     // From `mercurial/localrepo.py`:
                     // > requires file contains a newline-delimited list of
                     // > features/capabilities the opener (us) must have in order to use
                     // > the repository. This file was introduced in Mercurial 0.9.2,
                     // > which means very old repositories may not have one. We assume
                     // > a missing file translates to no requirements.
                     Ok(Vec::new())
                 }
             }
             pub fn check(repo: &Repo) -> Result<(), HgError> {
                 for feature in load(repo)? {
                     if !SUPPORTED.contains(&&*feature) {
                         // TODO: collect and all unknown features and include them in the
                         // error message?
                         return Err(HgError::UnsupportedFeature(format!(
                             "repository requires feature unknown to this Mercurial: {}",
                             feature
                         )));
                     }
                 }
                 Ok(())
             }
             // TODO: set this to actually-supported features
             const SUPPORTED: &[&str] = &[
                 "dotencode",
                 "fncache",
                 "generaldelta",
                 "revlogv1",
                 "sparserevlog",
                 "store",
                 // As of this writing everything rhg does is read-only.
                 // When it starts writing to the repository, it’ll need to either keep the
                 // persistent nodemap up to date or remove this entry:
                 "persistent-nodemap",
             ];

rust/hg-core/src/revlog/changelog.rs

0 +4 -1

+            use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::revlog::{Revlog, RevlogError};
             use crate::revlog::NodePrefix;
             use crate::revlog::Revision;
             /// A specialized `Revlog` to work with `changelog` data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
                     let revlog = Revlog::open(repo, "00changelog.i", None)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogEntry` a given node id.
                 pub fn get_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let rev = self.revlog.get_node_rev(node)?;
                     self.get_rev(rev)
                 }
                 /// Return the `ChangelogEntry` of a given node revision.
                 pub fn get_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let bytes = self.revlog.get_rev_data(rev)?;
                     Ok(ChangelogEntry { bytes })
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(Debug)]
             pub struct ChangelogEntry {
                 /// The data bytes of the `changelog` entry.
                 bytes: Vec<u8>,
             }
             impl ChangelogEntry {
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes
                         .split(|b| b == &b'\n')
                         .filter(|line| !line.is_empty())
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
-                    self.lines().next().ok_or(RevlogError::Corrupted)
+                    self.lines()
+                        .next()
+                        .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
                 }
             }

rust/hg-core/src/revlog/index.rs

0 +3 -1

             use std::convert::TryInto;
             use std::ops::Deref;
             use byteorder::{BigEndian, ByteOrder};
+            use crate::errors::HgError;
             use crate::revlog::node::Node;
             use crate::revlog::revlog::RevlogError;
             use crate::revlog::{Revision, NULL_REVISION};
             pub const INDEX_ENTRY_SIZE: usize = 64;
             /// A Revlog index
             pub struct Index {
                 bytes: Box<dyn Deref<Target = [u8]> + Send>,
                 /// Offsets of starts of index blocks.
                 /// Only needed when the index is interleaved with data.
                 offsets: Option<Vec<usize>>,
             }
             impl Index {
                 /// Create an index from bytes.
                 /// Calculate the start of each entry when is_inline is true.
                 pub fn new(
                     bytes: Box<dyn Deref<Target = [u8]> + Send>,
                 ) -> Result<Self, RevlogError> {
                     if is_inline(&bytes) {
                         let mut offset: usize = 0;
                         let mut offsets = Vec::new();
                         while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                             offsets.push(offset);
                             let end = offset + INDEX_ENTRY_SIZE;
                             let entry = IndexEntry {
                                 bytes: &bytes[offset..end],
                                 offset_override: None,
                             };
                             offset += INDEX_ENTRY_SIZE + entry.compressed_len();
                         }
                         if offset == bytes.len() {
                             Ok(Self {
                                 bytes,
                                 offsets: Some(offsets),
                             })
                         } else {
-                            Err(RevlogError::Corrupted)
+                            Err(HgError::corrupted("unexpected inline revlog length")
+                                .into())
                         }
                     } else {
                         Ok(Self {
                             bytes,
                             offsets: None,
                         })
                     }
                 }
                 /// Value of the inline flag.
                 pub fn is_inline(&self) -> bool {
                     is_inline(&self.bytes)
                 }
                 /// Return a slice of bytes if `revlog` is inline. Panic if not.
                 pub fn data(&self, start: usize, end: usize) -> &[u8] {
                     if !self.is_inline() {
                         panic!("tried to access data in the index of a revlog that is not inline");
                     }
                     &self.bytes[start..end]
                 }
                 /// Return number of entries of the revlog index.
                 pub fn len(&self) -> usize {
                     if let Some(offsets) = &self.offsets {
                         offsets.len()
                     } else {
                         self.bytes.len() / INDEX_ENTRY_SIZE
                     }
                 }
                 /// Returns `true` if the `Index` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 /// Return the index entry corresponding to the given revision if it
                 /// exists.
                 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
                     if rev == NULL_REVISION {
                         return None;
                     }
                     if let Some(offsets) = &self.offsets {
                         self.get_entry_inline(rev, offsets)
                     } else {
                         self.get_entry_separated(rev)
                     }
                 }
                 fn get_entry_inline(
                     &self,
                     rev: Revision,
                     offsets: &[usize],
                 ) -> Option<IndexEntry> {
                     let start = *offsets.get(rev as usize)?;
                     let end = start.checked_add(INDEX_ENTRY_SIZE)?;
                     let bytes = &self.bytes[start..end];
                     // See IndexEntry for an explanation of this override.
                     let offset_override = Some(end);
                     Some(IndexEntry {
                         bytes,
                         offset_override,
                     })
                 }
                 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
                     let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
                     if rev as usize >= max_rev {
                         return None;
                     }
                     let start = rev as usize * INDEX_ENTRY_SIZE;
                     let end = start + INDEX_ENTRY_SIZE;
                     let bytes = &self.bytes[start..end];
                     // Override the offset of the first revision as its bytes are used
                     // for the index's metadata (saving space because it is always 0)
                     let offset_override = if rev == 0 { Some(0) } else { None };
                     Some(IndexEntry {
                         bytes,
                         offset_override,
                     })
                 }
             }
             impl super::RevlogIndex for Index {
                 fn len(&self) -> usize {
                     self.len()
                 }
                 fn node(&self, rev: Revision) -> Option<&Node> {
                     self.get_entry(rev).map(|entry| entry.hash())
                 }
             }
             #[derive(Debug)]
             pub struct IndexEntry<'a> {
                 bytes: &'a [u8],
                 /// Allows to override the offset value of the entry.
                 ///
                 /// For interleaved index and data, the offset stored in the index
                 /// corresponds to the separated data offset.
                 /// It has to be overridden with the actual offset in the interleaved
                 /// index which is just after the index block.
                 ///
                 /// For separated index and data, the offset stored in the first index
                 /// entry is mixed with the index headers.
                 /// It has to be overridden with 0.
                 offset_override: Option<usize>,
             }
             impl<'a> IndexEntry<'a> {
                 /// Return the offset of the data.
                 pub fn offset(&self) -> usize {
                     if let Some(offset_override) = self.offset_override {
                         offset_override
                     } else {
                         let mut bytes = [0; 8];
                         bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
                         BigEndian::read_u64(&bytes[..]) as usize
                     }
                 }
                 /// Return the compressed length of the data.
                 pub fn compressed_len(&self) -> usize {
                     BigEndian::read_u32(&self.bytes[8..=11]) as usize
                 }
                 /// Return the uncompressed length of the data.
                 pub fn uncompressed_len(&self) -> usize {
                     BigEndian::read_u32(&self.bytes[12..=15]) as usize
                 }
                 /// Return the revision upon which the data has been derived.
                 pub fn base_revision(&self) -> Revision {
                     // TODO Maybe return an Option when base_revision == rev?
                     //      Requires to add rev to IndexEntry
                     BigEndian::read_i32(&self.bytes[16..])
                 }
                 pub fn p1(&self) -> Revision {
                     BigEndian::read_i32(&self.bytes[24..])
                 }
                 pub fn p2(&self) -> Revision {
                     BigEndian::read_i32(&self.bytes[28..])
                 }
                 /// Return the hash of revision's full text.
                 ///
                 /// Currently, SHA-1 is used and only the first 20 bytes of this field
                 /// are used.
                 pub fn hash(&self) -> &'a Node {
                     (&self.bytes[32..52]).try_into().unwrap()
                 }
             }
             /// Value of the inline flag.
             pub fn is_inline(index_bytes: &[u8]) -> bool {
                 match &index_bytes[0..=1] {
                     [0, 0] | [0, 2] => false,
                     _ => true,
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 #[cfg(test)]
                 #[derive(Debug, Copy, Clone)]
                 pub struct IndexEntryBuilder {
                     is_first: bool,
                     is_inline: bool,
                     is_general_delta: bool,
                     version: u16,
                     offset: usize,
                     compressed_len: usize,
                     uncompressed_len: usize,
                     base_revision: Revision,
                 }
                 #[cfg(test)]
                 impl IndexEntryBuilder {
                     pub fn new() -> Self {
                         Self {
                             is_first: false,
                             is_inline: false,
                             is_general_delta: true,
                             version: 2,
                             offset: 0,
                             compressed_len: 0,
                             uncompressed_len: 0,
                             base_revision: 0,
                         }
                     }
                     pub fn is_first(&mut self, value: bool) -> &mut Self {
                         self.is_first = value;
                         self
                     }
                     pub fn with_inline(&mut self, value: bool) -> &mut Self {
                         self.is_inline = value;
                         self
                     }
                     pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
                         self.is_general_delta = value;
                         self
                     }
                     pub fn with_version(&mut self, value: u16) -> &mut Self {
                         self.version = value;
                         self
                     }
                     pub fn with_offset(&mut self, value: usize) -> &mut Self {
                         self.offset = value;
                         self
                     }
                     pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
                         self.compressed_len = value;
                         self
                     }
                     pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
                         self.uncompressed_len = value;
                         self
                     }
                     pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
                         self.base_revision = value;
                         self
                     }
                     pub fn build(&self) -> Vec<u8> {
                         let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
                         if self.is_first {
                             bytes.extend(&match (self.is_general_delta, self.is_inline) {
                                 (false, false) => [0u8, 0],
                                 (false, true) => [0u8, 1],
                                 (true, false) => [0u8, 2],
                                 (true, true) => [0u8, 3],
                             });
                             bytes.extend(&self.version.to_be_bytes());
                             // Remaining offset bytes.
                             bytes.extend(&[0u8; 2]);
                         } else {
                             // Offset is only 6 bytes will usize is 8.
                             bytes.extend(&self.offset.to_be_bytes()[2..]);
                         }
                         bytes.extend(&[0u8; 2]); // Revision flags.
                         bytes.extend(&self.compressed_len.to_be_bytes()[4..]);
                         bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]);
                         bytes.extend(&self.base_revision.to_be_bytes());
                         bytes
                     }
                 }
                 #[test]
                 fn is_not_inline_when_no_inline_flag_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(false)
                         .with_inline(false)
                         .build();
                     assert_eq!(is_inline(&bytes), false)
                 }
                 #[test]
                 fn is_inline_when_inline_flag_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(false)
                         .with_inline(true)
                         .build();
                     assert_eq!(is_inline(&bytes), true)
                 }
                 #[test]
                 fn is_inline_when_inline_and_generaldelta_flags_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(true)
                         .with_inline(true)
                         .build();
                     assert_eq!(is_inline(&bytes), true)
                 }
                 #[test]
                 fn test_offset() {
                     let bytes = IndexEntryBuilder::new().with_offset(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.offset(), 1)
                 }
                 #[test]
                 fn test_with_overridden_offset() {
                     let bytes = IndexEntryBuilder::new().with_offset(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: Some(2),
                     };
                     assert_eq!(entry.offset(), 2)
                 }
                 #[test]
                 fn test_compressed_len() {
                     let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.compressed_len(), 1)
                 }
                 #[test]
                 fn test_uncompressed_len() {
                     let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.uncompressed_len(), 1)
                 }
                 #[test]
                 fn test_base_revision() {
                     let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.base_revision(), 1)
                 }
             }
             #[cfg(test)]
             pub use tests::IndexEntryBuilder;

rust/hg-core/src/revlog/node.rs

0 +14 0

             // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Definitions and utilities for Revision nodes
             //!
             //! In Mercurial code base, it is customary to call "a node" the binary SHA
             //! of a revision.
+            use crate::errors::HgError;
             use bytes_cast::BytesCast;
             use std::convert::{TryFrom, TryInto};
             use std::fmt;
             /// The length in bytes of a `Node`
             ///
             /// This constant is meant to ease refactors of this module, and
             /// are private so that calling code does not expect all nodes have
             /// the same size, should we support several formats concurrently in
             /// the future.
             pub const NODE_BYTES_LENGTH: usize = 20;
             /// Id of the null node.
             ///
             /// Used to indicate the absence of node.
             pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
             /// The length in bytes of a `Node`
             ///
             /// see also `NODES_BYTES_LENGTH` about it being private.
             const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
             /// Private alias for readability and to ease future change
             type NodeData = [u8; NODE_BYTES_LENGTH];
             /// Binary revision SHA
             ///
             /// ## Future changes of hash size
             ///
             /// To accomodate future changes of hash size, Rust callers
             /// should use the conversion methods at the boundaries (FFI, actual
             /// computation of hashes and I/O) only, and only if required.
             ///
             /// All other callers outside of unit tests should just handle `Node` values
             /// and never make any assumption on the actual length, using [`nybbles_len`]
             /// if they need a loop boundary.
             ///
             /// All methods that create a `Node` either take a type that enforces
             /// the size or return an error at runtime.
             ///
             /// [`nybbles_len`]: #method.nybbles_len
             #[derive(Copy, Clone, Debug, PartialEq, BytesCast, derive_more::From)]
             #[repr(transparent)]
             pub struct Node {
                 data: NodeData,
             }
             /// The node value for NULL_REVISION
             pub const NULL_NODE: Node = Node {
                 data: [0; NODE_BYTES_LENGTH],
             };
             /// Return an error if the slice has an unexpected length
             impl<'a> TryFrom<&'a [u8]> for &'a Node {
                 type Error = ();
                 #[inline]
                 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
                     match Node::from_bytes(bytes) {
                         Ok((node, rest)) if rest.is_empty() => Ok(node),
                         _ => Err(()),
                     }
                 }
             }
             /// Return an error if the slice has an unexpected length
             impl TryFrom<&'_ [u8]> for Node {
                 type Error = std::array::TryFromSliceError;
                 #[inline]
                 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
                     let data = bytes.try_into()?;
                     Ok(Self { data })
                 }
             }
             impl fmt::LowerHex for Node {
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     for &byte in &self.data {
                         write!(f, "{:02x}", byte)?
                     }
                     Ok(())
                 }
             }
             #[derive(Debug)]
             pub struct FromHexError;
             /// Low level utility function, also for prefixes
             fn get_nybble(s: &[u8], i: usize) -> u8 {
                 if i % 2 == 0 {
                     s[i / 2] >> 4
                 } else {
                     s[i / 2] & 0x0f
                 }
             }
             impl Node {
                 /// Retrieve the `i`th half-byte of the binary data.
                 ///
                 /// This is also the `i`th hexadecimal digit in numeric form,
                 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                 pub fn get_nybble(&self, i: usize) -> u8 {
                     get_nybble(&self.data, i)
                 }
                 /// Length of the data, in nybbles
                 pub fn nybbles_len(&self) -> usize {
                     // public exposure as an instance method only, so that we can
                     // easily support several sizes of hashes if needed in the future.
                     NODE_NYBBLES_LENGTH
                 }
                 /// Convert from hexadecimal string representation
                 ///
                 /// Exact length is required.
                 ///
                 /// To be used in FFI and I/O only, in order to facilitate future
                 /// changes of hash format.
                 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
                     let prefix = NodePrefix::from_hex(hex)?;
                     if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
                         Ok(Self { data: prefix.data })
                     } else {
                         Err(FromHexError)
                     }
                 }
+                /// `from_hex`, but for input from an internal file of the repository such
+                /// as a changelog or manifest entry.
+                ///
+                /// An error is treated as repository corruption.
+                pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
+                    Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
+                        HgError::CorruptedRepository(format!(
+                            "Expected a full hexadecimal node ID, found {}",
+                            String::from_utf8_lossy(hex.as_ref())
+                        ))
+                    })
+                }
                 /// Provide access to binary data
                 ///
                 /// This is needed by FFI layers, for instance to return expected
                 /// binary values to Python.
                 pub fn as_bytes(&self) -> &[u8] {
                     &self.data
                 }
             }
             /// The beginning of a binary revision SHA.
             ///
             /// Since it can potentially come from an hexadecimal representation with
             /// odd length, it needs to carry around whether the last 4 bits are relevant
             /// or not.
             #[derive(Debug, PartialEq, Copy, Clone)]
             pub struct NodePrefix {
                 /// In `1..=NODE_NYBBLES_LENGTH`
                 nybbles_len: u8,
                 /// The first `4 * length_in_nybbles` bits are used (considering bits
                 /// within a bytes in big-endian: most significant first), the rest
                 /// are zero.
                 data: NodeData,
             }
             impl NodePrefix {
                 /// Convert from hexadecimal string representation
                 ///
                 /// Similarly to `hex::decode`, can be used with Unicode string types
                 /// (`String`, `&str`) as well as bytes.
                 ///
                 /// To be used in FFI and I/O only, in order to facilitate future
                 /// changes of hash format.
                 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
                     let hex = hex.as_ref();
                     let len = hex.len();
                     if len > NODE_NYBBLES_LENGTH || len == 0 {
                         return Err(FromHexError);
                     }
                     let mut data = [0; NODE_BYTES_LENGTH];
                     let mut nybbles_len = 0;
                     for &ascii_byte in hex {
                         let nybble = match char::from(ascii_byte).to_digit(16) {
                             Some(digit) => digit as u8,
                             None => return Err(FromHexError),
                         };
                         // Fill in the upper half of a byte first, then the lower half.
                         let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
                         data[nybbles_len as usize / 2] |= nybble << shift;
                         nybbles_len += 1;
                     }
                     Ok(Self { data, nybbles_len })
                 }
                 pub fn nybbles_len(&self) -> usize {
                     self.nybbles_len as _
                 }
                 pub fn is_prefix_of(&self, node: &Node) -> bool {
                     let full_bytes = self.nybbles_len() / 2;
                     if self.data[..full_bytes] != node.data[..full_bytes] {
                         return false;
                     }
                     if self.nybbles_len() % 2 == 0 {
                         return true;
                     }
                     let last = self.nybbles_len() - 1;
                     self.get_nybble(last) == node.get_nybble(last)
                 }
                 /// Retrieve the `i`th half-byte from the prefix.
                 ///
                 /// This is also the `i`th hexadecimal digit in numeric form,
                 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                 pub fn get_nybble(&self, i: usize) -> u8 {
                     assert!(i < self.nybbles_len());
                     get_nybble(&self.data, i)
                 }
                 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
                     (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
                 }
                 /// Return the index first nybble that's different from `node`
                 ///
                 /// If the return value is `None` that means that `self` is
                 /// a prefix of `node`, but the current method is a bit slower
                 /// than `is_prefix_of`.
                 ///
                 /// Returned index is as in `get_nybble`, i.e., starting at 0.
                 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
                     self.iter_nybbles()
                         .zip(NodePrefix::from(*node).iter_nybbles())
                         .position(|(a, b)| a != b)
                 }
             }
             impl fmt::LowerHex for NodePrefix {
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     let full_bytes = self.nybbles_len() / 2;
                     for &byte in &self.data[..full_bytes] {
                         write!(f, "{:02x}", byte)?
                     }
                     if self.nybbles_len() % 2 == 1 {
                         let last = self.nybbles_len() - 1;
                         write!(f, "{:x}", self.get_nybble(last))?
                     }
                     Ok(())
                 }
             }
             /// A shortcut for full `Node` references
             impl From<&'_ Node> for NodePrefix {
                 fn from(node: &'_ Node) -> Self {
                     NodePrefix {
                         nybbles_len: node.nybbles_len() as _,
                         data: node.data,
                     }
                 }
             }
             /// A shortcut for full `Node` references
             impl From<Node> for NodePrefix {
                 fn from(node: Node) -> Self {
                     NodePrefix {
                         nybbles_len: node.nybbles_len() as _,
                         data: node.data,
                     }
                 }
             }
             impl PartialEq<Node> for NodePrefix {
                 fn eq(&self, other: &Node) -> bool {
                     Self::from(*other) == *self
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
                 const SAMPLE_NODE: Node = Node {
                     data: [
 x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
 x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
                     ],
                 };
                 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
                 /// The padding is made with zeros.
                 pub fn hex_pad_right(hex: &str) -> String {
                     let mut res = hex.to_string();
                     while res.len() < NODE_NYBBLES_LENGTH {
                         res.push('0');
                     }
                     res
                 }
                 #[test]
                 fn test_node_from_hex() {
                     let not_hex = "012... oops";
                     let too_short = "0123";
                     let too_long = format!("{}0", SAMPLE_NODE_HEX);
                     assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
                     assert!(Node::from_hex(not_hex).is_err());
                     assert!(Node::from_hex(too_short).is_err());
                     assert!(Node::from_hex(&too_long).is_err());
                 }
                 #[test]
                 fn test_node_encode_hex() {
                     assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
                 }
                 #[test]
                 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
                     assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
                     assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
                     assert_eq!(
                         format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
                         SAMPLE_NODE_HEX
                     );
                     Ok(())
                 }
                 #[test]
                 fn test_prefix_from_hex_errors() {
                     assert!(NodePrefix::from_hex("testgr").is_err());
                     let mut long = format!("{:x}", NULL_NODE);
                     long.push('c');
                     assert!(NodePrefix::from_hex(&long).is_err())
                 }
                 #[test]
                 fn test_is_prefix_of() -> Result<(), FromHexError> {
                     let mut node_data = [0; NODE_BYTES_LENGTH];
                     node_data[0] = 0x12;
                     node_data[1] = 0xca;
                     let node = Node::from(node_data);
                     assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
                     assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
                     assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
                     assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
                     Ok(())
                 }
                 #[test]
                 fn test_get_nybble() -> Result<(), FromHexError> {
                     let prefix = NodePrefix::from_hex("dead6789cafe")?;
                     assert_eq!(prefix.get_nybble(0), 13);
                     assert_eq!(prefix.get_nybble(7), 9);
                     Ok(())
                 }
                 #[test]
                 fn test_first_different_nybble_even_prefix() {
                     let prefix = NodePrefix::from_hex("12ca").unwrap();
                     let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                     assert_eq!(prefix.first_different_nybble(&node), Some(0));
                     node.data[0] = 0x13;
                     assert_eq!(prefix.first_different_nybble(&node), Some(1));
                     node.data[0] = 0x12;
                     assert_eq!(prefix.first_different_nybble(&node), Some(2));
                     node.data[1] = 0xca;
                     // now it is a prefix
                     assert_eq!(prefix.first_different_nybble(&node), None);
                 }
                 #[test]
                 fn test_first_different_nybble_odd_prefix() {
                     let prefix = NodePrefix::from_hex("12c").unwrap();
                     let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                     assert_eq!(prefix.first_different_nybble(&node), Some(0));
                     node.data[0] = 0x13;
                     assert_eq!(prefix.first_different_nybble(&node), Some(1));
                     node.data[0] = 0x12;
                     assert_eq!(prefix.first_different_nybble(&node), Some(2));
                     node.data[1] = 0xca;
                     // now it is a prefix
                     assert_eq!(prefix.first_different_nybble(&node), None);
                 }
             }
             #[cfg(test)]
             pub use tests::hex_pad_right;

rust/hg-core/src/revlog/nodemap_docket.rs

0 +31 -26

+            use crate::errors::{HgError, HgResultExt};
             use bytes_cast::{unaligned, BytesCast};
             use memmap::Mmap;
             use std::path::{Path, PathBuf};
             use super::revlog::RevlogError;
             use crate::repo::Repo;
             use crate::utils::strip_suffix;
             const ONDISK_VERSION: u8 = 1;
             pub(super) struct NodeMapDocket {
                 pub data_length: usize,
                 // TODO: keep here more of the data from `parse()` when we need it
             }
             #[derive(BytesCast)]
             #[repr(C)]
             struct DocketHeader {
                 uid_size: u8,
                 _tip_rev: unaligned::U64Be,
                 data_length: unaligned::U64Be,
                 _data_unused: unaligned::U64Be,
                 tip_node_size: unaligned::U64Be,
             }
             impl NodeMapDocket {
                 /// Return `Ok(None)` when the caller should proceed without a persistent
                 /// nodemap:
                 ///
                 /// * This revlog does not have a `.n` docket file (it is not generated for
                 ///   small revlogs), or
                 /// * The docket has an unsupported version number (repositories created by
                 ///   later hg, maybe that should be a requirement instead?), or
                 /// * The docket file points to a missing (likely deleted) data file (this
                 ///   can happen in a rare race condition).
                 pub fn read_from_file(
                     repo: &Repo,
                     index_path: &Path,
                 ) -> Result<Option<(Self, Mmap)>, RevlogError> {
                     let docket_path = index_path.with_extension("n");
-                    let docket_bytes = match repo.store_vfs().read(&docket_path) {
+                    let docket_bytes = if let Some(bytes) =
-                        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                        repo.store_vfs().read(&docket_path).io_not_found_as_none()?
-                            return Ok(None)
+                        bytes
-                        Err(e) => return Err(RevlogError::IoError(e)),
+                    } else {
-                        Ok(bytes) => bytes,
+                        return Ok(None);
                     };
                     let input = if let Some((&ONDISK_VERSION, rest)) =
                         docket_bytes.split_first()
                     {
                         rest
                     } else {
                         return Ok(None);
                     };
-                    let (header, rest) = DocketHeader::from_bytes(input)?;
+                    /// Treat any error as a parse error
+                    fn parse<T, E>(result: Result<T, E>) -> Result<T, RevlogError> {
+                        result.map_err(|_| {
+                            HgError::corrupted("nodemap docket parse error").into()
+                        })
+                    }
+                    let (header, rest) = parse(DocketHeader::from_bytes(input))?;
                     let uid_size = header.uid_size as usize;
                     // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
                     // systems?
                     let tip_node_size = header.tip_node_size.get() as usize;
                     let data_length = header.data_length.get() as usize;
-                    let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?;
+                    let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
-                    let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?;
+                    let (_tip_node, _rest) =
-                    let uid =
+                        parse(u8::slice_from_bytes(rest, tip_node_size))?;
-                        std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
+                    let uid = parse(std::str::from_utf8(uid))?;
                     let docket = NodeMapDocket { data_length };
                     let data_path = rawdata_path(&docket_path, uid);
-                    // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
+                    // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
                     // config is false?
-                    match repo.store_vfs().mmap_open(&data_path) {
+                    if let Some(mmap) = repo
-                        Ok(mmap) => {
+                        .store_vfs()
-                            if mmap.len() >= data_length {
+                        .mmap_open(&data_path)
-                                Ok(Some((docket, mmap)))
+                        .io_not_found_as_none()?
-                            } else {
-                                Err(RevlogError::Corrupted)
+                        if mmap.len() >= data_length {
+                            Ok(Some((docket, mmap)))
+                        } else {
+                            Err(HgError::corrupted("persistent nodemap too short").into())
                         }
-                        Err(error) => {
+                    } else {
-                            if error.kind() == std::io::ErrorKind::NotFound {
+                        Ok(None)
-                                Ok(None)
-                            } else {
-                                Err(RevlogError::IoError(error))
                     }
                 }
             }
             fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
                 let docket_name = docket_path
                     .file_name()
                     .expect("expected a base name")
                     .to_str()
                     .expect("expected an ASCII file name in the store");
                 let prefix = strip_suffix(docket_name, ".n.a")
                     .or_else(|| strip_suffix(docket_name, ".n"))
                     .expect("expected docket path in .n or .n.a");
                 let name = format!("{}-{}.nd", prefix, uid);
                 docket_path
                     .parent()
                     .expect("expected a non-root path")
                     .join(name)
             }

rust/hg-core/src/revlog/revlog.rs

0 +36 -30

             use std::borrow::Cow;
             use std::io::Read;
             use std::ops::Deref;
             use std::path::Path;
             use byteorder::{BigEndian, ByteOrder};
             use crypto::digest::Digest;
             use crypto::sha1::Sha1;
             use flate2::read::ZlibDecoder;
             use micro_timer::timed;
             use zstd;
             use super::index::Index;
             use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
             use super::nodemap;
-            use super::nodemap::NodeMap;
+            use super::nodemap::{NodeMap, NodeMapError};
             use super::nodemap_docket::NodeMapDocket;
             use super::patch;
+            use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::Revision;
+            #[derive(derive_more::From)]
             pub enum RevlogError {
-                IoError(std::io::Error),
-                UnsuportedVersion(u16),
                 InvalidRevision,
                 /// Found more than one entry whose ID match the requested prefix
                 AmbiguousPrefix,
-                Corrupted,
+                #[from]
-                UnknowDataFormat(u8),
+                Other(HgError),
             }
-            impl From<bytes_cast::FromBytesError> for RevlogError {
+            impl From<NodeMapError> for RevlogError {
-                fn from(_: bytes_cast::FromBytesError) -> Self {
+                fn from(error: NodeMapError) -> Self {
-                    RevlogError::Corrupted
+                    match error {
+                        NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
+                        NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
+                    }
+                }
+            }
+            impl RevlogError {
+                fn corrupted() -> Self {
+                    RevlogError::Other(HgError::corrupted("corrupted revlog"))
                 }
             }
             /// Read only implementation of revlog.
             pub struct Revlog {
                 /// When index and data are not interleaved: bytes of the revlog index.
                 /// When index and data are interleaved: bytes of the revlog index and
                 /// data.
                 index: Index,
                 /// When index and data are not interleaved: bytes of the revlog data
                 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                 /// When present on disk: the persistent nodemap for this revlog
                 nodemap: Option<nodemap::NodeTree>,
             }
             impl Revlog {
                 /// Open a revlog index file.
                 ///
                 /// It will also open the associated data file if index and data are not
                 /// interleaved.
                 #[timed]
                 pub fn open(
                     repo: &Repo,
                     index_path: impl AsRef<Path>,
                     data_path: Option<&Path>,
                 ) -> Result<Self, RevlogError> {
                     let index_path = index_path.as_ref();
-                    let index_mmap = repo
+                    let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
-                        .store_vfs()
-                        .mmap_open(&index_path)
-                        .map_err(RevlogError::IoError)?;
                     let version = get_version(&index_mmap);
                     if version != 1 {
-                        return Err(RevlogError::UnsuportedVersion(version));
+                        // A proper new version should have had a repo/store requirement.
+                        return Err(RevlogError::corrupted());
                     }
                     let index = Index::new(Box::new(index_mmap))?;
                     let default_data_path = index_path.with_extension("d");
                     // type annotation required
                     // won't recognize Mmap as Deref<Target = [u8]>
                     let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                         if index.is_inline() {
                             None
                         } else {
                             let data_path = data_path.unwrap_or(&default_data_path);
-                            let data_mmap = repo
+                            let data_mmap = repo.store_vfs().mmap_open(data_path)?;
-                                .store_vfs()
-                                .mmap_open(data_path)
-                                .map_err(RevlogError::IoError)?;
                             Some(Box::new(data_mmap))
                         };
                     let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
                         |(docket, data)| {
                             nodemap::NodeTree::load_bytes(
                                 Box::new(data),
                                 docket.data_length,
                             )
                         },
                     );
                     Ok(Revlog {
                         index,
                         data_bytes,
                         nodemap,
                     })
                 }
                 /// Return number of entries of the `Revlog`.
                 pub fn len(&self) -> usize {
                     self.index.len()
                 }
                 /// Returns `true` if the `Revlog` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.index.is_empty()
                 }
                 /// Return the full data associated to a node.
                 #[timed]
                 pub fn get_node_rev(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     if let Some(nodemap) = &self.nodemap {
                         return nodemap
-                            .find_bin(&self.index, node)
+                            .find_bin(&self.index, node)?
-                            // TODO: propagate details of this error:
-                            .map_err(|_| RevlogError::Corrupted)?
                             .ok_or(RevlogError::InvalidRevision);
                     }
                     // Fallback to linear scan when a persistent nodemap is not present.
                     // This happens when the persistent-nodemap experimental feature is not
                     // enabled, or for small revlogs.
                     //
                     // TODO: consider building a non-persistent nodemap in memory to
                     // optimize these cases.
                     let mut found_by_prefix = None;
                     for rev in (0..self.len() as Revision).rev() {
                         let index_entry =
-                            self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
+                            self.index.get_entry(rev).ok_or(HgError::corrupted(
+                                "revlog references a revision not in the index",
+                            ))?;
                         if node == *index_entry.hash() {
                             return Ok(rev);
                         }
                         if node.is_prefix_of(index_entry.hash()) {
                             if found_by_prefix.is_some() {
                                 return Err(RevlogError::AmbiguousPrefix);
                             }
                             found_by_prefix = Some(rev)
                         }
                     }
                     found_by_prefix.ok_or(RevlogError::InvalidRevision)
                 }
                 /// Returns whether the given revision exists in this revlog.
                 pub fn has_rev(&self, rev: Revision) -> bool {
                     self.index.get_entry(rev).is_some()
                 }
                 /// Return the full data associated to a revision.
                 ///
                 /// All entries required to build the final data out of deltas will be
                 /// retrieved as needed, and the deltas will be applied to the inital
                 /// snapshot to rebuild the final data.
                 #[timed]
                 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
                     // Todo return -> Cow
                     let mut entry = self.get_entry(rev)?;
                     let mut delta_chain = vec![];
                     while let Some(base_rev) = entry.base_rev {
                         delta_chain.push(entry);
-                        entry =
+                        entry = self
-                            self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
+                            .get_entry(base_rev)
+                            .map_err(|_| RevlogError::corrupted())?;
                     }
                     // TODO do not look twice in the index
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let data: Vec<u8> = if delta_chain.is_empty() {
                         entry.data()?.into()
                     } else {
                         Revlog::build_data_from_deltas(entry, &delta_chain)?
                     };
                     if self.check_hash(
                         index_entry.p1(),
                         index_entry.p2(),
                         index_entry.hash().as_bytes(),
                         &data,
                     ) {
                         Ok(data)
                     } else {
-                        Err(RevlogError::Corrupted)
+                        Err(RevlogError::corrupted())
                     }
                 }
                 /// Check the hash of some given data against the recorded hash.
                 pub fn check_hash(
                     &self,
                     p1: Revision,
                     p2: Revision,
                     expected: &[u8],
                     data: &[u8],
                 ) -> bool {
                     let e1 = self.index.get_entry(p1);
                     let h1 = match e1 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     let e2 = self.index.get_entry(p2);
                     let h2 = match e2 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
                 }
                 /// Build the full data of a revision out its snapshot
                 /// and its deltas.
                 #[timed]
                 fn build_data_from_deltas(
                     snapshot: RevlogEntry,
                     deltas: &[RevlogEntry],
                 ) -> Result<Vec<u8>, RevlogError> {
                     let snapshot = snapshot.data()?;
                     let deltas = deltas
                         .iter()
                         .rev()
                         .map(RevlogEntry::data)
                         .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
                     let patches: Vec<_> =
                         deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                     let patch = patch::fold_patch_lists(&patches);
                     Ok(patch.apply(&snapshot))
                 }
                 /// Return the revlog data.
                 fn data(&self) -> &[u8] {
                     match self.data_bytes {
                         Some(ref data_bytes) => &data_bytes,
                         None => panic!(
                             "forgot to load the data or trying to access inline data"
                         ),
                     }
                 }
                 /// Get an entry of the revlog.
                 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let start = index_entry.offset();
                     let end = start + index_entry.compressed_len();
                     let data = if self.index.is_inline() {
                         self.index.data(start, end)
                     } else {
                         &self.data()[start..end]
                     };
                     let entry = RevlogEntry {
                         rev,
                         bytes: data,
                         compressed_len: index_entry.compressed_len(),
                         uncompressed_len: index_entry.uncompressed_len(),
                         base_rev: if index_entry.base_revision() == rev {
                             None
                         } else {
                             Some(index_entry.base_revision())
                         },
                     };
                     Ok(entry)
                 }
             }
             /// The revlog entry's bytes and the necessary informations to extract
             /// the entry's data.
             #[derive(Debug)]
             pub struct RevlogEntry<'a> {
                 rev: Revision,
                 bytes: &'a [u8],
                 compressed_len: usize,
                 uncompressed_len: usize,
                 base_rev: Option<Revision>,
             }
             impl<'a> RevlogEntry<'a> {
                 /// Extract the data contained in the entry.
                 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
                     if self.bytes.is_empty() {
                         return Ok(Cow::Borrowed(&[]));
                     }
                     match self.bytes[0] {
                         // Revision data is the entirety of the entry, including this
                         // header.
                         b'\0' => Ok(Cow::Borrowed(self.bytes)),
                         // Raw revision data follows.
                         b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                         // zlib (RFC 1950) data.
                         b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                         // zstd data.
                         b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
-                        format_type => Err(RevlogError::UnknowDataFormat(format_type)),
+                        // A proper new format should have had a repo/store requirement.
+                        _format_type => Err(RevlogError::corrupted()),
                     }
                 }
                 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
                     let mut decoder = ZlibDecoder::new(self.bytes);
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len);
                         decoder
                             .read_to_end(&mut buf)
-                            .or(Err(RevlogError::Corrupted))?;
+                            .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     } else {
                         let mut buf = vec![0; self.uncompressed_len];
                         decoder
                             .read_exact(&mut buf)
-                            .or(Err(RevlogError::Corrupted))?;
+                            .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     }
                 }
                 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len);
                         zstd::stream::copy_decode(self.bytes, &mut buf)
-                            .or(Err(RevlogError::Corrupted))?;
+                            .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     } else {
                         let mut buf = vec![0; self.uncompressed_len];
                         let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
-                            .or(Err(RevlogError::Corrupted))?;
+                            .map_err(|_| RevlogError::corrupted())?;
                         if len != self.uncompressed_len {
-                            Err(RevlogError::Corrupted)
+                            Err(RevlogError::corrupted())
                         } else {
                             Ok(buf)
                         }
                     }
                 }
                 /// Tell if the entry is a snapshot or a delta
                 /// (influences on decompression).
                 fn is_delta(&self) -> bool {
                     self.base_rev.is_some()
                 }
             }
             /// Format version of the revlog.
             pub fn get_version(index_bytes: &[u8]) -> u16 {
                 BigEndian::read_u16(&index_bytes[2..=3])
             }
             /// Calculate the hash of a revision given its data and its parents.
             fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
                 let mut hasher = Sha1::new();
                 let (a, b) = (p1_hash, p2_hash);
                 if a > b {
                     hasher.input(b);
                     hasher.input(a);
                 } else {
                     hasher.input(a);
                     hasher.input(b);
                 }
                 hasher.input(data);
                 let mut hash = vec![0; NODE_BYTES_LENGTH];
                 hasher.result(&mut hash);
                 hash
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use super::super::index::IndexEntryBuilder;
                 #[test]
                 fn version_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .build();
                     assert_eq!(get_version(&bytes), 1)
                 }
             }

rust/rhg/src/error.rs

0 +1 -24

             use crate::exitcode;
             use crate::ui::utf8_to_local;
             use crate::ui::UiError;
             use format_bytes::format_bytes;
             use hg::errors::HgError;
             use hg::operations::FindRootError;
             use hg::revlog::revlog::RevlogError;
             use hg::utils::files::get_bytes_from_path;
             use std::convert::From;
             use std::path::PathBuf;
             /// The kind of command error
             #[derive(Debug, derive_more::From)]
             pub enum CommandError {
                 /// The root of the repository cannot be found
                 RootNotFound(PathBuf),
                 /// The current directory cannot be found
                 CurrentDirNotFound(std::io::Error),
                 /// The standard output stream cannot be written to
                 StdoutError,
                 /// The standard error stream cannot be written to
                 StderrError,
                 /// The command aborted
                 Abort(Option<Vec<u8>>),
                 /// A mercurial capability as not been implemented.
                 Unimplemented,
                 /// Common cases
                 #[from]
                 Other(HgError),
             }
             impl CommandError {
                 pub fn get_exit_code(&self) -> exitcode::ExitCode {
                     match self {
                         CommandError::RootNotFound(_) => exitcode::ABORT,
                         CommandError::CurrentDirNotFound(_) => exitcode::ABORT,
                         CommandError::StdoutError => exitcode::ABORT,
                         CommandError::StderrError => exitcode::ABORT,
                         CommandError::Abort(_) => exitcode::ABORT,
                         CommandError::Unimplemented => exitcode::UNIMPLEMENTED_COMMAND,
                         CommandError::Other(HgError::UnsupportedFeature(_)) => {
                             exitcode::UNIMPLEMENTED_COMMAND
                         }
                         CommandError::Other(_) => exitcode::ABORT,
                     }
                 }
                 /// Return the message corresponding to the error if any
                 pub fn get_error_message_bytes(&self) -> Option<Vec<u8>> {
                     match self {
                         CommandError::RootNotFound(path) => {
                             let bytes = get_bytes_from_path(path);
                             Some(format_bytes!(
                                 b"abort: no repository found in '{}' (.hg not found)!\n",
                                 bytes.as_slice()
                             ))
                         }
                         CommandError::CurrentDirNotFound(e) => Some(format_bytes!(
                             b"abort: error getting current working directory: {}\n",
                             e.to_string().as_bytes(),
                         )),
                         CommandError::Abort(message) => message.to_owned(),
                         CommandError::StdoutError
                         | CommandError::StderrError
                         | CommandError::Unimplemented
                         | CommandError::Other(HgError::UnsupportedFeature(_)) => None,
                         CommandError::Other(e) => {
                             Some(format_bytes!(b"{}\n", e.to_string().as_bytes()))
                         }
                     }
                 }
                 /// Exist the process with the corresponding exit code.
                 pub fn exit(&self) {
                     std::process::exit(self.get_exit_code())
                 }
             }
             impl From<UiError> for CommandError {
                 fn from(error: UiError) -> Self {
                     match error {
                         UiError::StdoutError(_) => CommandError::StdoutError,
                         UiError::StderrError(_) => CommandError::StderrError,
                     }
                 }
             }
             impl From<FindRootError> for CommandError {
                 fn from(err: FindRootError) -> Self {
                     match err {
                         FindRootError::RootNotFound(path) => {
                             CommandError::RootNotFound(path)
                         }
                         FindRootError::GetCurrentDirError(e) => {
                             CommandError::CurrentDirNotFound(e)
                         }
                     }
                 }
             }
             impl From<(RevlogError, &str)> for CommandError {
                 fn from((err, rev): (RevlogError, &str)) -> CommandError {
                     match err {
-                        RevlogError::IoError(err) => CommandError::Abort(Some(
-                            utf8_to_local(&format!("abort: {}\n", err)).into(),
-                        )),
                         RevlogError::InvalidRevision => CommandError::Abort(Some(
                             utf8_to_local(&format!(
                                 "abort: invalid revision identifier {}\n",
                                 rev
                             ))
                             .into(),
                         )),
                         RevlogError::AmbiguousPrefix => CommandError::Abort(Some(
                             utf8_to_local(&format!(
                                 "abort: ambiguous revision identifier {}\n",
                                 rev
                             ))
                             .into(),
                         )),
-                        RevlogError::UnsuportedVersion(version) => {
+                        RevlogError::Other(err) => CommandError::Other(err),
-                            CommandError::Abort(Some(
-                                utf8_to_local(&format!(
-                                    "abort: unsupported revlog version {}\n",
-                                    version
-                                ))
-                                .into(),
-                            ))
-                        RevlogError::Corrupted => {
-                            CommandError::Abort(Some("abort: corrupted revlog\n".into()))
-                        RevlogError::UnknowDataFormat(format) => {
-                            CommandError::Abort(Some(
-                                utf8_to_local(&format!(
-                                    "abort: unknow revlog dataformat {:?}\n",
-                                    format
-                                ))
-                                .into(),
-                            ))
                     }
                 }
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages