upstream/mercurial-mirror Commit - r50832:75040950

rust-clippy: merge "revlog" module definition and struct implementation...

Raphaël Gomès -

r50832:75040950 default

parent child

rust/hg-core/src/copy_tracing/tests.rs

0 +1 -1

             use super::*;
             /// Unit tests for:
             ///
             /// ```ignore
             /// fn compare_value(
             ///     current_merge: Revision,
             ///     merge_case_for_dest: impl Fn() -> MergeCase,
             ///     src_minor: &CopySource,
             ///     src_major: &CopySource,
             /// ) -> (MergePick, /* overwrite: */ bool)
             ///  ```
             #[test]
             fn test_compare_value() {
                 // The `compare_value!` macro calls the `compare_value` function with
                 // arguments given in pseudo-syntax:
                 //
                 // * For `merge_case_for_dest` it takes a plain `MergeCase` value instead
                 //   of a closure.
                 // * `CopySource` values are represented as `(rev, path, overwritten)`
                 //   tuples of type `(Revision, Option<PathToken>, OrdSet<Revision>)`.
                 // * `PathToken` is an integer not read by `compare_value`. It only checks
                 //   for `Some(_)` indicating a file copy v.s. `None` for a file deletion.
                 // * `OrdSet<Revision>` is represented as a Python-like set literal.
                 use MergeCase::*;
                 use MergePick::*;
                 assert_eq!(
                     compare_value!(1, Normal, (1, None, { 1 }), (1, None, { 1 })),
                     (Any, false)
                 );
             }
             /// Unit tests for:
             ///
             /// ```ignore
             /// fn merge_copies_dict(
             ///     path_map: &TwoWayPathMap, // Not visible in test cases
             ///     current_merge: Revision,
             ///     minor: InternalPathCopies,
             ///     major: InternalPathCopies,
             ///     get_merge_case: impl Fn(&HgPath) -> MergeCase + Copy,
             /// ) -> InternalPathCopies
             /// ```
             #[test]
             fn test_merge_copies_dict() {
                 // The `merge_copies_dict!` macro calls the `merge_copies_dict` function
                 // with arguments given in pseudo-syntax:
                 //
                 // * `TwoWayPathMap` and path tokenization are implicitly taken care of.
                 //   All paths are given as string literals.
                 // * Key-value maps are represented with `{key1 => value1, key2 => value2}`
                 //   pseudo-syntax.
                 // * `InternalPathCopies` is a map of copy destination path keys to
                 //   `CopySource` values.
                 //   - `CopySource` is represented as a `(rev, source_path, overwritten)`
                 //     tuple of type `(Revision, Option<Path>, OrdSet<Revision>)`.
                 //   - Unlike in `test_compare_value`, source paths are string literals.
                 //   - `OrdSet<Revision>` is again represented as a Python-like set
                 //     literal.
                 // * `get_merge_case` is represented as a map of copy destination path to
                 //   `MergeCase`. The default for paths not in the map is
                 //   `MergeCase::Normal`.
                 //
                 // `internal_path_copies!` creates an `InternalPathCopies` value with the
                 // same pseudo-syntax as in `merge_copies_dict!`.
                 use MergeCase::*;
                 assert_eq!(
                     merge_copies_dict!(
 ,
                         {"foo" => (1, None, {})},
                         {},
                         {"foo" => Merged}
                     ),
                     internal_path_copies!("foo" => (1, None, {}))
                 );
             }
             /// Unit tests for:
             ///
             /// ```ignore
             /// impl CombineChangesetCopies {
             ///     fn new(children_count: HashMap<Revision, usize>) -> Self
             ///
             ///     // Called repeatedly:
             ///     fn add_revision_inner<'a>(
             ///         &mut self,
             ///         rev: Revision,
             ///         p1: Revision,
             ///         p2: Revision,
             ///         copy_actions: impl Iterator<Item = Action<'a>>,
             ///         get_merge_case: impl Fn(&HgPath) -> MergeCase + Copy,
             ///     )
             ///
             ///     fn finish(mut self, target_rev: Revision) -> PathCopies
             /// }
             /// ```
             #[test]
             fn test_combine_changeset_copies() {
                 // `combine_changeset_copies!` creates a `CombineChangesetCopies` with
                 // `new`, then calls `add_revision_inner` repeatedly, then calls `finish`
                 // for its return value.
                 //
                 // All paths given as string literals.
                 //
                 // * Key-value maps are represented with `{key1 => value1, key2 => value2}`
                 //   pseudo-syntax.
                 // * `children_count` is a map of revision numbers to count of children in
                 //   the DAG. It includes all revisions that should be considered by the
                 //   algorithm.
                 // * Calls to `add_revision_inner` are represented as an array of anonymous
                 //   structs with named fields, one pseudo-struct per call.
                 //
                 // `path_copies!` creates a `PathCopies` value, a map of copy destination
                 // keys to copy source values. Note: the arrows for map literal syntax
                 // point **backwards** compared to the logical direction of copy!
-                use crate::NULL_REVISION as NULL;
+                use crate::revlog::NULL_REVISION as NULL;
                 use Action::*;
                 use MergeCase::*;
                 assert_eq!(
                     combine_changeset_copies!(
                         { 1 => 1, 2 => 1 },
                         [
                             { rev: 1, p1: NULL, p2: NULL, actions: [], merge_cases: {}, },
                             { rev: 2, p1: NULL, p2: NULL, actions: [], merge_cases: {}, },
                             {
                                 rev: 3, p1: 1, p2: 2,
                                 actions: [CopiedFromP1("destination.txt", "source.txt")],
                                 merge_cases: {"destination.txt" => Merged},
                             },
                         ],
 ,
                     ),
                     path_copies!("destination.txt" => "source.txt")
                 );
             }

rust/hg-core/src/operations/cat.rs

0 +1 -1

             // list_tracked_files.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::repo::Repo;
-            use crate::revlog::revlog::RevlogError;
             use crate::revlog::Node;
+            use crate::revlog::RevlogError;
             use crate::utils::hg_path::HgPath;
             use crate::errors::HgError;
             use crate::manifest::Manifest;
             use crate::manifest::ManifestEntry;
             use itertools::put_back;
             use itertools::PutBack;
             use std::cmp::Ordering;
             pub struct CatOutput<'a> {
                 /// Whether any file in the manifest matched the paths given as CLI
                 /// arguments
                 pub found_any: bool,
                 /// The contents of matching files, in manifest order
                 pub results: Vec<(&'a HgPath, Vec<u8>)>,
                 /// Which of the CLI arguments did not match any manifest file
                 pub missing: Vec<&'a HgPath>,
                 /// The node ID that the given revset was resolved to
                 pub node: Node,
             }
             // Find an item in an iterator over a sorted collection.
             fn find_item<'a>(
                 i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>,
                 needle: &HgPath,
             ) -> Result<Option<Node>, HgError> {
                 loop {
                     match i.next() {
                         None => return Ok(None),
                         Some(result) => {
                             let entry = result?;
                             match needle.as_bytes().cmp(entry.path.as_bytes()) {
                                 Ordering::Less => {
                                     i.put_back(Ok(entry));
                                     return Ok(None);
                                 }
                                 Ordering::Greater => continue,
                                 Ordering::Equal => return Ok(Some(entry.node_id()?)),
                             }
                         }
                     }
                 }
             }
             // Tuple of (missing, found) paths in the manifest
             type ManifestQueryResponse<'a> = (Vec<(&'a HgPath, Node)>, Vec<&'a HgPath>);
             fn find_files_in_manifest<'query>(
                 manifest: &Manifest,
                 query: impl Iterator<Item = &'query HgPath>,
             ) -> Result<ManifestQueryResponse<'query>, HgError> {
                 let mut manifest = put_back(manifest.iter());
                 let mut res = vec![];
                 let mut missing = vec![];
                 for file in query {
                     match find_item(&mut manifest, file)? {
                         None => missing.push(file),
                         Some(item) => res.push((file, item)),
                     }
                 }
                 Ok((res, missing))
             }
             /// Output the given revision of files
             ///
             /// * `root`: Repository root
             /// * `rev`: The revision to cat the files from.
             /// * `files`: The files to output.
             pub fn cat<'a>(
                 repo: &Repo,
                 revset: &str,
                 mut files: Vec<&'a HgPath>,
             ) -> Result<CatOutput<'a>, RevlogError> {
                 let rev = crate::revset::resolve_single(revset, repo)?;
                 let manifest = repo.manifest_for_rev(rev)?;
                 let node = *repo
                     .changelog()?
                     .node_from_rev(rev)
                     .expect("should succeed when repo.manifest did");
                 let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![];
                 let mut found_any = false;
                 files.sort_unstable();
                 let (found, missing) =
                     find_files_in_manifest(&manifest, files.into_iter())?;
                 for (file_path, file_node) in found {
                     found_any = true;
                     let file_log = repo.filelog(file_path)?;
                     results.push((
                         file_path,
                         file_log.data_for_node(file_node)?.into_file_data()?,
                     ));
                 }
                 Ok(CatOutput {
                     found_any,
                     results,
                     missing,
                     node,
                 })
             }

rust/hg-core/src/operations/debugdata.rs

0 +1 -1

             // debugdata.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::repo::Repo;
             use crate::requirements;
-            use crate::revlog::revlog::{Revlog, RevlogError};
+            use crate::revlog::{Revlog, RevlogError};
             /// Kind of data to debug
             #[derive(Debug, Copy, Clone)]
             pub enum DebugDataKind {
                 Changelog,
                 Manifest,
             }
             /// Dump the contents data of a revision.
             pub fn debug_data(
                 repo: &Repo,
                 revset: &str,
                 kind: DebugDataKind,
             ) -> Result<Vec<u8>, RevlogError> {
                 let index_file = match kind {
                     DebugDataKind::Changelog => "00changelog.i",
                     DebugDataKind::Manifest => "00manifest.i",
                 };
                 let use_nodemap = repo
                     .requirements()
                     .contains(requirements::NODEMAP_REQUIREMENT);
                 let revlog =
                     Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?;
                 let rev =
                     crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
                 let data = revlog.get_rev_data(rev)?;
                 Ok(data.into_owned())
             }

rust/hg-core/src/operations/list_tracked_files.rs

0 +1 -1

             // list_tracked_files.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::dirstate::parsers::parse_dirstate_entries;
             use crate::dirstate_tree::on_disk::{for_each_tracked_path, read_docket};
             use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::manifest::Manifest;
-            use crate::revlog::revlog::RevlogError;
+            use crate::revlog::RevlogError;
             use crate::utils::hg_path::HgPath;
             use crate::DirstateError;
             use rayon::prelude::*;
             /// List files under Mercurial control in the working directory
             /// by reading the dirstate
             pub struct Dirstate {
                 /// The `dirstate` content.
                 content: Vec<u8>,
                 v2_metadata: Option<Vec<u8>>,
             }
             impl Dirstate {
                 pub fn new(repo: &Repo) -> Result<Self, HgError> {
                     let mut content = repo.hg_vfs().read("dirstate")?;
                     let v2_metadata = if repo.has_dirstate_v2() {
                         let docket = read_docket(&content)?;
                         let meta = docket.tree_metadata().to_vec();
                         content = repo.hg_vfs().read(docket.data_filename())?;
                         Some(meta)
                     } else {
                         None
                     };
                     Ok(Self {
                         content,
                         v2_metadata,
                     })
                 }
                 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, DirstateError> {
                     let mut files = Vec::new();
                     if !self.content.is_empty() {
                         if let Some(meta) = &self.v2_metadata {
                             for_each_tracked_path(&self.content, meta, |path| {
                                 files.push(path)
                             })?
                         } else {
                             let _parents = parse_dirstate_entries(
                                 &self.content,
                                 |path, entry, _copy_source| {
                                     if entry.tracked() {
                                         files.push(path)
                                     }
                                     Ok(())
                                 },
                             )?;
                         }
                     }
                     files.par_sort_unstable();
                     Ok(files)
                 }
             }
             /// List files under Mercurial control at a given revision.
             pub fn list_rev_tracked_files(
                 repo: &Repo,
                 revset: &str,
             ) -> Result<FilesForRev, RevlogError> {
                 let rev = crate::revset::resolve_single(revset, repo)?;
                 Ok(FilesForRev(repo.manifest_for_rev(rev)?))
             }
             pub struct FilesForRev(Manifest);
             impl FilesForRev {
                 pub fn iter(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> {
                     self.0.iter().map(|entry| Ok(entry?.path))
                 }
             }

rust/hg-core/src/repo.rs

0 +1 -1

             use crate::changelog::Changelog;
             use crate::config::{Config, ConfigError, ConfigParseError};
             use crate::dirstate::DirstateParents;
             use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
             use crate::dirstate_tree::owning::OwningDirstateMap;
             use crate::errors::HgResultExt;
             use crate::errors::{HgError, IoResultExt};
             use crate::lock::{try_with_lock_no_wait, LockError};
             use crate::manifest::{Manifest, Manifestlog};
             use crate::revlog::filelog::Filelog;
-            use crate::revlog::revlog::RevlogError;
+            use crate::revlog::RevlogError;
             use crate::utils::files::get_path_from_bytes;
             use crate::utils::hg_path::HgPath;
             use crate::utils::SliceExt;
             use crate::vfs::{is_dir, is_file, Vfs};
             use crate::{requirements, NodePrefix};
             use crate::{DirstateError, Revision};
             use std::cell::{Ref, RefCell, RefMut};
             use std::collections::HashSet;
             use std::io::Seek;
             use std::io::SeekFrom;
             use std::io::Write as IoWrite;
             use std::path::{Path, PathBuf};
             /// A repository on disk
             pub struct Repo {
                 working_directory: PathBuf,
                 dot_hg: PathBuf,
                 store: PathBuf,
                 requirements: HashSet<String>,
                 config: Config,
                 dirstate_parents: LazyCell<DirstateParents>,
                 dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>,
                 dirstate_map: LazyCell<OwningDirstateMap>,
                 changelog: LazyCell<Changelog>,
                 manifestlog: LazyCell<Manifestlog>,
             }
             #[derive(Debug, derive_more::From)]
             pub enum RepoError {
                 NotFound {
                     at: PathBuf,
                 },
                 #[from]
                 ConfigParseError(ConfigParseError),
                 #[from]
                 Other(HgError),
             }
             impl From<ConfigError> for RepoError {
                 fn from(error: ConfigError) -> Self {
                     match error {
                         ConfigError::Parse(error) => error.into(),
                         ConfigError::Other(error) => error.into(),
                     }
                 }
             }
             impl Repo {
                 /// tries to find nearest repository root in current working directory or
                 /// its ancestors
                 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
                     let current_directory = crate::utils::current_dir()?;
                     // ancestors() is inclusive: it first yields `current_directory`
                     // as-is.
                     for ancestor in current_directory.ancestors() {
                         if is_dir(ancestor.join(".hg"))? {
                             return Ok(ancestor.to_path_buf());
                         }
                     }
                     Err(RepoError::NotFound {
                         at: current_directory,
                     })
                 }
                 /// Find a repository, either at the given path (which must contain a `.hg`
                 /// sub-directory) or by searching the current directory and its
                 /// ancestors.
                 ///
                 /// A method with two very different "modes" like this usually a code smell
                 /// to make two methods instead, but in this case an `Option` is what rhg
                 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
                 /// Having two methods would just move that `if` to almost all callers.
                 pub fn find(
                     config: &Config,
                     explicit_path: Option<PathBuf>,
                 ) -> Result<Self, RepoError> {
                     if let Some(root) = explicit_path {
                         if is_dir(root.join(".hg"))? {
                             Self::new_at_path(root, config)
                         } else if is_file(&root)? {
                             Err(HgError::unsupported("bundle repository").into())
                         } else {
                             Err(RepoError::NotFound { at: root })
                         }
                     } else {
                         let root = Self::find_repo_root()?;
                         Self::new_at_path(root, config)
                     }
                 }
                 /// To be called after checking that `.hg` is a sub-directory
                 fn new_at_path(
                     working_directory: PathBuf,
                     config: &Config,
                 ) -> Result<Self, RepoError> {
                     let dot_hg = working_directory.join(".hg");
                     let mut repo_config_files =
                         vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
                     let hg_vfs = Vfs { base: &dot_hg };
                     let mut reqs = requirements::load_if_exists(hg_vfs)?;
                     let relative =
                         reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
                     let shared =
                         reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
                     // From `mercurial/localrepo.py`:
                     //
                     // if .hg/requires contains the sharesafe requirement, it means
                     // there exists a `.hg/store/requires` too and we should read it
                     // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
                     // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
                     // is not present, refer checkrequirementscompat() for that
                     //
                     // However, if SHARESAFE_REQUIREMENT is not present, it means that the
                     // repository was shared the old way. We check the share source
                     // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
                     // current repository needs to be reshared
                     let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
                     let store_path;
                     if !shared {
                         store_path = dot_hg.join("store");
                     } else {
                         let bytes = hg_vfs.read("sharedpath")?;
                         let mut shared_path =
                             get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
                                 .to_owned();
                         if relative {
                             shared_path = dot_hg.join(shared_path)
                         }
                         if !is_dir(&shared_path)? {
                             return Err(HgError::corrupted(format!(
                                 ".hg/sharedpath points to nonexistent directory {}",
                                 shared_path.display()
                             ))
                             .into());
                         }
                         store_path = shared_path.join("store");
                         let source_is_share_safe =
                             requirements::load(Vfs { base: &shared_path })?
                                 .contains(requirements::SHARESAFE_REQUIREMENT);
                         if share_safe != source_is_share_safe {
                             return Err(HgError::unsupported("share-safe mismatch").into());
                         }
                         if share_safe {
                             repo_config_files.insert(0, shared_path.join("hgrc"))
                         }
                     }
                     if share_safe {
                         reqs.extend(requirements::load(Vfs { base: &store_path })?);
                     }
                     let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
                         config.combine_with_repo(&repo_config_files)?
                     } else {
                         config.clone()
                     };
                     let repo = Self {
                         requirements: reqs,
                         working_directory,
                         store: store_path,
                         dot_hg,
                         config: repo_config,
                         dirstate_parents: LazyCell::new(),
                         dirstate_data_file_uuid: LazyCell::new(),
                         dirstate_map: LazyCell::new(),
                         changelog: LazyCell::new(),
                         manifestlog: LazyCell::new(),
                     };
                     requirements::check(&repo)?;
                     Ok(repo)
                 }
                 pub fn working_directory_path(&self) -> &Path {
                     &self.working_directory
                 }
                 pub fn requirements(&self) -> &HashSet<String> {
                     &self.requirements
                 }
                 pub fn config(&self) -> &Config {
                     &self.config
                 }
                 /// For accessing repository files (in `.hg`), except for the store
                 /// (`.hg/store`).
                 pub fn hg_vfs(&self) -> Vfs<'_> {
                     Vfs { base: &self.dot_hg }
                 }
                 /// For accessing repository store files (in `.hg/store`)
                 pub fn store_vfs(&self) -> Vfs<'_> {
                     Vfs { base: &self.store }
                 }
                 /// For accessing the working copy
                 pub fn working_directory_vfs(&self) -> Vfs<'_> {
                     Vfs {
                         base: &self.working_directory,
                     }
                 }
                 pub fn try_with_wlock_no_wait<R>(
                     &self,
                     f: impl FnOnce() -> R,
                 ) -> Result<R, LockError> {
                     try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
                 }
                 pub fn has_dirstate_v2(&self) -> bool {
                     self.requirements
                         .contains(requirements::DIRSTATE_V2_REQUIREMENT)
                 }
                 pub fn has_sparse(&self) -> bool {
                     self.requirements.contains(requirements::SPARSE_REQUIREMENT)
                 }
                 pub fn has_narrow(&self) -> bool {
                     self.requirements.contains(requirements::NARROW_REQUIREMENT)
                 }
                 pub fn has_nodemap(&self) -> bool {
                     self.requirements
                         .contains(requirements::NODEMAP_REQUIREMENT)
                 }
                 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
                     Ok(self
                         .hg_vfs()
                         .read("dirstate")
                         .io_not_found_as_none()?
                         .unwrap_or_default())
                 }
                 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
                     Ok(*self
                         .dirstate_parents
                         .get_or_init(|| self.read_dirstate_parents())?)
                 }
                 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
                     let dirstate = self.dirstate_file_contents()?;
                     let parents = if dirstate.is_empty() {
                         if self.has_dirstate_v2() {
                             self.dirstate_data_file_uuid.set(None);
                         }
                         DirstateParents::NULL
                     } else if self.has_dirstate_v2() {
                         let docket =
                             crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
                         self.dirstate_data_file_uuid
                             .set(Some(docket.uuid.to_owned()));
                         docket.parents()
                     } else {
                         *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
                     };
                     self.dirstate_parents.set(parents);
                     Ok(parents)
                 }
                 fn read_dirstate_data_file_uuid(
                     &self,
                 ) -> Result<Option<Vec<u8>>, HgError> {
                     assert!(
                         self.has_dirstate_v2(),
                         "accessing dirstate data file ID without dirstate-v2"
                     );
                     let dirstate = self.dirstate_file_contents()?;
                     if dirstate.is_empty() {
                         self.dirstate_parents.set(DirstateParents::NULL);
                         Ok(None)
                     } else {
                         let docket =
                             crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
                         self.dirstate_parents.set(docket.parents());
                         Ok(Some(docket.uuid.to_owned()))
                     }
                 }
                 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
                     let dirstate_file_contents = self.dirstate_file_contents()?;
                     if dirstate_file_contents.is_empty() {
                         self.dirstate_parents.set(DirstateParents::NULL);
                         if self.has_dirstate_v2() {
                             self.dirstate_data_file_uuid.set(None);
                         }
                         Ok(OwningDirstateMap::new_empty(Vec::new()))
                     } else if self.has_dirstate_v2() {
                         let docket = crate::dirstate_tree::on_disk::read_docket(
                             &dirstate_file_contents,
                         )?;
                         self.dirstate_parents.set(docket.parents());
                         self.dirstate_data_file_uuid
                             .set(Some(docket.uuid.to_owned()));
                         let data_size = docket.data_size();
                         let metadata = docket.tree_metadata();
                         if let Some(data_mmap) = self
                             .hg_vfs()
                             .mmap_open(docket.data_filename())
                             .io_not_found_as_none()?
                         {
                             OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
                         } else {
                             OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
                         }
                     } else {
                         let (map, parents) =
                             OwningDirstateMap::new_v1(dirstate_file_contents)?;
                         self.dirstate_parents.set(parents);
                         Ok(map)
                     }
                 }
                 pub fn dirstate_map(
                     &self,
                 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
                     self.dirstate_map.get_or_init(|| self.new_dirstate_map())
                 }
                 pub fn dirstate_map_mut(
                     &self,
                 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
                     self.dirstate_map
                         .get_mut_or_init(|| self.new_dirstate_map())
                 }
                 fn new_changelog(&self) -> Result<Changelog, HgError> {
                     Changelog::open(&self.store_vfs(), self.has_nodemap())
                 }
                 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
                     self.changelog.get_or_init(|| self.new_changelog())
                 }
                 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
                     self.changelog.get_mut_or_init(|| self.new_changelog())
                 }
                 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
                     Manifestlog::open(&self.store_vfs(), self.has_nodemap())
                 }
                 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
                     self.manifestlog.get_or_init(|| self.new_manifestlog())
                 }
                 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
                     self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
                 }
                 /// Returns the manifest of the *changeset* with the given node ID
                 pub fn manifest_for_node(
                     &self,
                     node: impl Into<NodePrefix>,
                 ) -> Result<Manifest, RevlogError> {
                     self.manifestlog()?.data_for_node(
                         self.changelog()?
                             .data_for_node(node.into())?
                             .manifest_node()?
                             .into(),
                     )
                 }
                 /// Returns the manifest of the *changeset* with the given revision number
                 pub fn manifest_for_rev(
                     &self,
                     revision: Revision,
                 ) -> Result<Manifest, RevlogError> {
                     self.manifestlog()?.data_for_node(
                         self.changelog()?
                             .data_for_rev(revision)?
                             .manifest_node()?
                             .into(),
                     )
                 }
                 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
                     if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
                         Ok(entry.tracked())
                     } else {
                         Ok(false)
                     }
                 }
                 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
                     Filelog::open(self, path)
                 }
                 /// Write to disk any updates that were made through `dirstate_map_mut`.
                 ///
                 /// The "wlock" must be held while calling this.
                 /// See for example `try_with_wlock_no_wait`.
                 ///
                 /// TODO: have a `WritableRepo` type only accessible while holding the
                 /// lock?
                 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
                     let map = self.dirstate_map()?;
                     // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
                     // it’s unset
                     let parents = self.dirstate_parents()?;
                     let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
                         let uuid_opt = self
                             .dirstate_data_file_uuid
                             .get_or_init(|| self.read_dirstate_data_file_uuid())?;
                         let uuid_opt = uuid_opt.as_ref();
                         let can_append = uuid_opt.is_some();
                         let (data, tree_metadata, append, old_data_size) =
                             map.pack_v2(can_append)?;
                         // Reuse the uuid, or generate a new one, keeping the old for
                         // deletion.
                         let (uuid, old_uuid) = match uuid_opt {
                             Some(uuid) => {
                                 let as_str = std::str::from_utf8(uuid)
                                     .map_err(|_| {
                                         HgError::corrupted(
                                             "non-UTF-8 dirstate data file ID",
                                         )
                                     })?
                                     .to_owned();
                                 if append {
                                     (as_str, None)
                                 } else {
                                     (DirstateDocket::new_uid(), Some(as_str))
                                 }
                             }
                             None => (DirstateDocket::new_uid(), None),
                         };
                         let data_filename = format!("dirstate.{}", uuid);
                         let data_filename = self.hg_vfs().join(data_filename);
                         let mut options = std::fs::OpenOptions::new();
                         options.write(true);
                         // Why are we not using the O_APPEND flag when appending?
                         //
                         // - O_APPEND makes it trickier to deal with garbage at the end of
                         //   the file, left by a previous uncommitted transaction. By
                         //   starting the write at [old_data_size] we make sure we erase
                         //   all such garbage.
                         //
                         // - O_APPEND requires to special-case 0-byte writes, whereas we
                         //   don't need that.
                         //
                         // - Some OSes have bugs in implementation O_APPEND:
                         //   revlog.py talks about a Solaris bug, but we also saw some ZFS
                         //   bug: https://github.com/openzfs/zfs/pull/3124,
                         //   https://github.com/openzfs/zfs/issues/13370
                         //
                         if !append {
                             options.create_new(true);
                         }
                         let data_size = (|| {
                             // TODO: loop and try another random ID if !append and this
                             // returns `ErrorKind::AlreadyExists`? Collision chance of two
                             // random IDs is one in 2**32
                             let mut file = options.open(&data_filename)?;
                             if append {
                                 file.seek(SeekFrom::Start(old_data_size as u64))?;
                             }
                             file.write_all(&data)?;
                             file.flush()?;
                             file.seek(SeekFrom::Current(0))
                         })()
                         .when_writing_file(&data_filename)?;
                         let packed_dirstate = DirstateDocket::serialize(
                             parents,
                             tree_metadata,
                             data_size,
                             uuid.as_bytes(),
                         )
                         .map_err(|_: std::num::TryFromIntError| {
                             HgError::corrupted("overflow in dirstate docket serialization")
                         })?;
                         (packed_dirstate, old_uuid)
                     } else {
                         (map.pack_v1(parents)?, None)
                     };
                     let vfs = self.hg_vfs();
                     vfs.atomic_write("dirstate", &packed_dirstate)?;
                     if let Some(uuid) = old_uuid_to_remove {
                         // Remove the old data file after the new docket pointing to the
                         // new data file was written.
                         vfs.remove_file(format!("dirstate.{}", uuid))?;
                     }
                     Ok(())
                 }
             }
             /// Lazily-initialized component of `Repo` with interior mutability
             ///
             /// This differs from `OnceCell` in that the value can still be "deinitialized"
             /// later by setting its inner `Option` to `None`. It also takes the
             /// initialization function as an argument when the value is requested, not
             /// when the instance is created.
             struct LazyCell<T> {
                 value: RefCell<Option<T>>,
             }
             impl<T> LazyCell<T> {
                 fn new() -> Self {
                     Self {
                         value: RefCell::new(None),
                     }
                 }
                 fn set(&self, value: T) {
                     *self.value.borrow_mut() = Some(value)
                 }
                 fn get_or_init<E>(
                     &self,
                     init: impl Fn() -> Result<T, E>,
                 ) -> Result<Ref<T>, E> {
                     let mut borrowed = self.value.borrow();
                     if borrowed.is_none() {
                         drop(borrowed);
                         // Only use `borrow_mut` if it is really needed to avoid panic in
                         // case there is another outstanding borrow but mutation is not
                         // needed.
                         *self.value.borrow_mut() = Some(init()?);
                         borrowed = self.value.borrow()
                     }
                     Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
                 }
                 fn get_mut_or_init<E>(
                     &self,
                     init: impl Fn() -> Result<T, E>,
                 ) -> Result<RefMut<T>, E> {
                     let mut borrowed = self.value.borrow_mut();
                     if borrowed.is_none() {
                         *borrowed = Some(init()?);
                     }
                     Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
                 }
             }

rust/hg-core/src/revlog/changelog.rs

0 +1 -1

             use crate::errors::HgError;
-            use crate::revlog::revlog::{Revlog, RevlogEntry, RevlogError};
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
+            use crate::revlog::{Revlog, RevlogEntry, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::vfs::Vfs;
             use itertools::Itertools;
             use std::ascii::escape_default;
             use std::borrow::Cow;
             use std::fmt::{Debug, Formatter};
             /// A specialized `Revlog` to work with `changelog` data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
                     let revlog =
                         Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogEntry` for the given node ID.
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.data_for_rev(rev)
                 }
                 /// Return the `RevlogEntry` of the given revision number.
                 pub fn entry_for_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     self.revlog.get_entry(rev)
                 }
                 /// Return the `ChangelogEntry` of the given revision number.
                 pub fn data_for_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let bytes = self.revlog.get_rev_data(rev)?;
                     if bytes.is_empty() {
                         Ok(ChangelogRevisionData::null())
                     } else {
                         Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                             RevlogError::Other(HgError::CorruptedRepository(format!(
                                 "Invalid changelog data for revision {}: {:?}",
                                 rev, err
                             )))
                         })?)
                     }
                 }
                 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                     self.revlog.node_from_rev(rev)
                 }
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     self.revlog.rev_from_node(node)
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(PartialEq)]
             pub struct ChangelogRevisionData<'changelog> {
                 /// The data bytes of the `changelog` entry.
                 bytes: Cow<'changelog, [u8]>,
                 /// The end offset for the hex manifest (not including the newline)
                 manifest_end: usize,
                 /// The end offset for the user+email (not including the newline)
                 user_end: usize,
                 /// The end offset for the timestamp+timezone+extras (not including the
                 /// newline)
                 timestamp_end: usize,
                 /// The end offset for the file list (not including the newline)
                 files_end: usize,
             }
             impl<'changelog> ChangelogRevisionData<'changelog> {
                 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
                     let mut line_iter = bytes.split(|b| b == &b'\n');
                     let manifest_end = line_iter
                         .next()
                         .expect("Empty iterator from split()?")
                         .len();
                     let user_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after manifest line")
                     })?;
                     let user_end = manifest_end + 1 + user_slice.len();
                     let timestamp_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after user line")
                     })?;
                     let timestamp_end = user_end + 1 + timestamp_slice.len();
                     let mut files_end = timestamp_end + 1;
                     loop {
                         let line = line_iter.next().ok_or_else(|| {
                             HgError::corrupted("Changeset data truncated in files list")
                         })?;
                         if line.is_empty() {
                             if files_end == bytes.len() {
                                 // The list of files ended with a single newline (there
                                 // should be two)
                                 return Err(HgError::corrupted(
                                     "Changeset data truncated after files list",
                                 ));
                             }
                             files_end -= 1;
                             break;
                         }
                         files_end += line.len() + 1;
                     }
                     Ok(Self {
                         bytes,
                         manifest_end,
                         user_end,
                         timestamp_end,
                         files_end,
                     })
                 }
                 fn null() -> Self {
                     Self::new(Cow::Borrowed(
                         b"0000000000000000000000000000000000000000\n\n0 0\n\n",
                     ))
                     .unwrap()
                 }
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes.split(|b| b == &b'\n')
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<Node, HgError> {
                     let manifest_node_hex = &self.bytes[..self.manifest_end];
                     Node::from_hex_for_repo(manifest_node_hex)
                 }
                 /// The full user string (usually a name followed by an email enclosed in
                 /// angle brackets)
                 pub fn user(&self) -> &[u8] {
                     &self.bytes[self.manifest_end + 1..self.user_end]
                 }
                 /// The full timestamp line (timestamp in seconds, offset in seconds, and
                 /// possibly extras)
                 // TODO: We should expose this in a more useful way
                 pub fn timestamp_line(&self) -> &[u8] {
                     &self.bytes[self.user_end + 1..self.timestamp_end]
                 }
                 /// The files changed in this revision.
                 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                     self.bytes[self.timestamp_end + 1..self.files_end]
                         .split(|b| b == &b'\n')
                         .map(HgPath::new)
                 }
                 /// The change description.
                 pub fn description(&self) -> &[u8] {
                     &self.bytes[self.files_end + 2..]
                 }
             }
             impl Debug for ChangelogRevisionData<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("ChangelogRevisionData")
                         .field("bytes", &debug_bytes(&self.bytes))
                         .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                         .field(
                             "user",
                             &debug_bytes(
                                 &self.bytes[self.manifest_end + 1..self.user_end],
                             ),
                         )
                         .field(
                             "timestamp",
                             &debug_bytes(
                                 &self.bytes[self.user_end + 1..self.timestamp_end],
                             ),
                         )
                         .field(
                             "files",
                             &debug_bytes(
                                 &self.bytes[self.timestamp_end + 1..self.files_end],
                             ),
                         )
                         .field(
                             "description",
                             &debug_bytes(&self.bytes[self.files_end + 2..]),
                         )
                         .finish()
                 }
             }
             fn debug_bytes(bytes: &[u8]) -> String {
                 String::from_utf8_lossy(
                     &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                 )
                 .to_string()
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn test_create_changelogrevisiondata_invalid() {
                     // Completely empty
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after manifest
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after user
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
                     // No newline after timestamp
                     assert!(
                         ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
                     );
                     // Missing newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2"
                     ))
                     .is_err(),);
                     // Only one newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2\n"
                     ))
                     .is_err(),);
                 }
                 #[test]
                 fn test_create_changelogrevisiondata() {
                     let data = ChangelogRevisionData::new(Cow::Borrowed(
                         b"0123456789abcdef0123456789abcdef01234567
             Some One <someone@example.com>
 0
             file1
             file2
             some
             commit
             message",
                     ))
                     .unwrap();
                     assert_eq!(
                         data.manifest_node().unwrap(),
                         Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                             .unwrap()
                     );
                     assert_eq!(data.user(), b"Some One <someone@example.com>");
                     assert_eq!(data.timestamp_line(), b"0 0");
                     assert_eq!(
                         data.files().collect_vec(),
                         vec![HgPath::new("file1"), HgPath::new("file2")]
                     );
                     assert_eq!(data.description(), b"some\ncommit\nmessage");
                 }
             }

rust/hg-core/src/revlog/filelog.rs

0 +2 -2

             use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::path_encode::path_encode;
-            use crate::revlog::revlog::RevlogEntry;
-            use crate::revlog::revlog::{Revlog, RevlogError};
             use crate::revlog::NodePrefix;
             use crate::revlog::Revision;
+            use crate::revlog::RevlogEntry;
+            use crate::revlog::{Revlog, RevlogError};
             use crate::utils::files::get_path_from_bytes;
             use crate::utils::hg_path::HgPath;
             use crate::utils::SliceExt;
             use std::path::PathBuf;
             /// A specialized `Revlog` to work with file data logs.
             pub struct Filelog {
                 /// The generic `revlog` format.
                 revlog: Revlog,
             }
             impl Filelog {
                 pub fn open_vfs(
                     store_vfs: &crate::vfs::Vfs<'_>,
                     file_path: &HgPath,
                 ) -> Result<Self, HgError> {
                     let index_path = store_path(file_path, b".i");
                     let data_path = store_path(file_path, b".d");
                     let revlog =
                         Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
                     Ok(Self { revlog })
                 }
                 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
                     Self::open_vfs(&repo.store_vfs(), file_path)
                 }
                 /// The given node ID is that of the file as found in a filelog, not of a
                 /// changeset.
                 pub fn data_for_node(
                     &self,
                     file_node: impl Into<NodePrefix>,
                 ) -> Result<FilelogRevisionData, RevlogError> {
                     let file_rev = self.revlog.rev_from_node(file_node.into())?;
                     self.data_for_rev(file_rev)
                 }
                 /// The given revision is that of the file as found in a filelog, not of a
                 /// changeset.
                 pub fn data_for_rev(
                     &self,
                     file_rev: Revision,
                 ) -> Result<FilelogRevisionData, RevlogError> {
                     let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
                     Ok(FilelogRevisionData(data))
                 }
                 /// The given node ID is that of the file as found in a filelog, not of a
                 /// changeset.
                 pub fn entry_for_node(
                     &self,
                     file_node: impl Into<NodePrefix>,
                 ) -> Result<FilelogEntry, RevlogError> {
                     let file_rev = self.revlog.rev_from_node(file_node.into())?;
                     self.entry_for_rev(file_rev)
                 }
                 /// The given revision is that of the file as found in a filelog, not of a
                 /// changeset.
                 pub fn entry_for_rev(
                     &self,
                     file_rev: Revision,
                 ) -> Result<FilelogEntry, RevlogError> {
                     Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
                 }
             }
             fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
                 let encoded_bytes =
                     path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
                 get_path_from_bytes(&encoded_bytes).into()
             }
             pub struct FilelogEntry<'a>(RevlogEntry<'a>);
             impl FilelogEntry<'_> {
                 /// `self.data()` can be expensive, with decompression and delta
                 /// resolution.
                 ///
                 /// *Without* paying this cost, based on revlog index information
                 /// including `RevlogEntry::uncompressed_len`:
                 ///
                 /// * Returns `true` if the length that `self.data().file_data().len()`
                 ///   would return is definitely **not equal** to `other_len`.
                 /// * Returns `false` if available information is inconclusive.
                 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
                     // Relevant code that implement this behavior in Python code:
                     // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
                     // revlog.size, revlog.rawsize
                     // Let’s call `file_data_len` what would be returned by
                     // `self.data().file_data().len()`.
                     if self.0.is_censored() {
                         let file_data_len = 0;
                         return other_len != file_data_len;
                     }
                     if self.0.has_length_affecting_flag_processor() {
                         // We can’t conclude anything about `file_data_len`.
                         return false;
                     }
                     // Revlog revisions (usually) have metadata for the size of
                     // their data after decompression and delta resolution
                     // as would be returned by `Revlog::get_rev_data`.
                     //
                     // For filelogs this is the file’s contents preceded by an optional
                     // metadata block.
                     let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
                         l as u64
                     } else {
                         // The field was set to -1, the actual uncompressed len is unknown.
                         // We need to decompress to say more.
                         return false;
                     };
                     // `uncompressed_len = file_data_len + optional_metadata_len`,
                     // so `file_data_len <= uncompressed_len`.
                     if uncompressed_len < other_len {
                         // Transitively, `file_data_len < other_len`.
                         // So `other_len != file_data_len` definitely.
                         return true;
                     }
                     if uncompressed_len == other_len + 4 {
                         // It’s possible that `file_data_len == other_len` with an empty
                         // metadata block (2 start marker bytes + 2 end marker bytes).
                         // This happens when there wouldn’t otherwise be metadata, but
                         // the first 2 bytes of file data happen to match a start marker
                         // and would be ambiguous.
                         return false;
                     }
                     if !self.0.has_p1() {
                         // There may or may not be copy metadata, so we can’t deduce more
                         // about `file_data_len` without computing file data.
                         return false;
                     }
                     // Filelog ancestry is not meaningful in the way changelog ancestry is.
                     // It only provides hints to delta generation.
                     // p1 and p2 are set to null when making a copy or rename since
                     // contents are likely unrelatedto what might have previously existed
                     // at the destination path.
                     //
                     // Conversely, since here p1 is non-null, there is no copy metadata.
                     // Note that this reasoning may be invalidated in the presence of
                     // merges made by some previous versions of Mercurial that
                     // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
                     // and `tests/test-issue6528.t`.
                     //
                     // Since copy metadata is currently the only kind of metadata
                     // kept in revlog data of filelogs,
                     // this `FilelogEntry` does not have such metadata:
                     let file_data_len = uncompressed_len;
                     file_data_len != other_len
                 }
                 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
                     Ok(FilelogRevisionData(self.0.data()?.into_owned()))
                 }
             }
             /// The data for one revision in a filelog, uncompressed and delta-resolved.
             pub struct FilelogRevisionData(Vec<u8>);
             impl FilelogRevisionData {
                 /// Split into metadata and data
                 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
                     const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
                     if let Some(rest) = self.0.drop_prefix(DELIMITER) {
                         if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
                             Ok((Some(metadata), data))
                         } else {
                             Err(HgError::corrupted(
                                 "Missing metadata end delimiter in filelog entry",
                             ))
                         }
                     } else {
                         Ok((None, &self.0))
                     }
                 }
                 /// Returns the file contents at this revision, stripped of any metadata
                 pub fn file_data(&self) -> Result<&[u8], HgError> {
                     let (_metadata, data) = self.split()?;
                     Ok(data)
                 }
                 /// Consume the entry, and convert it into data, discarding any metadata,
                 /// if present.
                 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
                     if let (Some(_metadata), data) = self.split()? {
                         Ok(data.to_owned())
                     } else {
                         Ok(self.0)
                     }
                 }
             }

rust/hg-core/src/revlog/manifest.rs

0 +1 -1

             use crate::errors::HgError;
-            use crate::revlog::revlog::{Revlog, RevlogError};
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
+            use crate::revlog::{Revlog, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::utils::SliceExt;
             use crate::vfs::Vfs;
             /// A specialized `Revlog` to work with `manifest` data format.
             pub struct Manifestlog {
                 /// The generic `revlog` format.
                 revlog: Revlog,
             }
             impl Manifestlog {
                 /// Open the `manifest` of a repository given by its root.
                 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
                     let revlog =
                         Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `Manifest` for the given node ID.
                 ///
                 /// Note: this is a node ID in the manifestlog, typically found through
                 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
                 /// changeset.
                 ///
                 /// See also `Repo::manifest_for_node`
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Manifest, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.data_for_rev(rev)
                 }
                 /// Return the `Manifest` of a given revision number.
                 ///
                 /// Note: this is a revision number in the manifestlog, *not* of any
                 /// changeset.
                 ///
                 /// See also `Repo::manifest_for_rev`
                 pub fn data_for_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<Manifest, RevlogError> {
                     let bytes = self.revlog.get_rev_data(rev)?.into_owned();
                     Ok(Manifest { bytes })
                 }
             }
             /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
             #[derive(Debug)]
             pub struct Manifest {
                 /// Format for a manifest: flat sequence of variable-size entries,
                 /// sorted by path, each as:
                 ///
                 /// ```text
                 /// <path> \0 <hex_node_id> <flags> \n
                 /// ```
                 ///
                 /// The last entry is also terminated by a newline character.
                 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
                 bytes: Vec<u8>,
             }
             impl Manifest {
                 pub fn iter(
                     &self,
                 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
                     self.bytes
                         .split(|b| b == &b'\n')
                         .filter(|line| !line.is_empty())
                         .map(ManifestEntry::from_raw)
                 }
                 /// If the given path is in this manifest, return its filelog node ID
                 pub fn find_by_path(
                     &self,
                     path: &HgPath,
                 ) -> Result<Option<ManifestEntry>, HgError> {
                     use std::cmp::Ordering::*;
                     let path = path.as_bytes();
                     // Both boundaries of this `&[u8]` slice are always at the boundary of
                     // an entry
                     let mut bytes = &*self.bytes;
                     // Binary search algorithm derived from `[T]::binary_search_by`
                     // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
                     // except we don’t have a slice of entries. Instead we jump to the
                     // middle of the byte slice and look around for entry delimiters
                     // (newlines).
                     while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
                         let (entry_path, rest) =
                             ManifestEntry::split_path(&bytes[entry_range.clone()])?;
                         let cmp = entry_path.cmp(path);
                         if cmp == Less {
                             let after_newline = entry_range.end + 1;
                             bytes = &bytes[after_newline..];
                         } else if cmp == Greater {
                             bytes = &bytes[..entry_range.start];
                         } else {
                             return Ok(Some(ManifestEntry::from_path_and_rest(
                                 entry_path, rest,
                             )));
                         }
                     }
                     Ok(None)
                 }
                 /// If there is at least one, return the byte range of an entry *excluding*
                 /// the final newline.
                 fn find_entry_near_middle_of(
                     bytes: &[u8],
                 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
                     let len = bytes.len();
                     if len > 0 {
                         let middle = bytes.len() / 2;
                         // Integer division rounds down, so `middle < len`.
                         let (before, after) = bytes.split_at(middle);
                         let is_newline = |&byte: &u8| byte == b'\n';
                         let entry_start = match before.iter().rposition(is_newline) {
                             Some(i) => i + 1,
                             None => 0, // We choose the first entry in `bytes`
                         };
                         let entry_end = match after.iter().position(is_newline) {
                             Some(i) => {
                                 // No `+ 1` here to exclude this newline from the range
                                 middle + i
                             }
                             None => {
                                 // In a well-formed manifest:
                                 //
                                 // * Since `len > 0`, `bytes` contains at least one entry
                                 // * Every entry ends with a newline
                                 // * Since `middle < len`, `after` contains at least the
                                 //   newline at the end of the last entry of `bytes`.
                                 //
                                 // We didn’t find a newline, so this manifest is not
                                 // well-formed.
                                 return Err(HgError::corrupted(
                                     "manifest entry without \\n delimiter",
                                 ));
                             }
                         };
                         Ok(Some(entry_start..entry_end))
                     } else {
                         // len == 0
                         Ok(None)
                     }
                 }
             }
             /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
             #[derive(Debug)]
             pub struct ManifestEntry<'manifest> {
                 pub path: &'manifest HgPath,
                 pub hex_node_id: &'manifest [u8],
                 /// `Some` values are b'x', b'l', or 't'
                 pub flags: Option<u8>,
             }
             impl<'a> ManifestEntry<'a> {
                 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
                     bytes.split_2(b'\0').ok_or_else(|| {
                         HgError::corrupted("manifest entry without \\0 delimiter")
                     })
                 }
                 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
                     let (hex_node_id, flags) = match rest.split_last() {
                         Some((&b'x', rest)) => (rest, Some(b'x')),
                         Some((&b'l', rest)) => (rest, Some(b'l')),
                         Some((&b't', rest)) => (rest, Some(b't')),
                         _ => (rest, None),
                     };
                     Self {
                         path: HgPath::new(path),
                         hex_node_id,
                         flags,
                     }
                 }
                 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
                     let (path, rest) = Self::split_path(bytes)?;
                     Ok(Self::from_path_and_rest(path, rest))
                 }
                 pub fn node_id(&self) -> Result<Node, HgError> {
                     Node::from_hex_for_repo(self.hex_node_id)
                 }
             }

rust/hg-core/src/revlog/mod.rs ~~rust/hg-core/src/revlog.rs~~

0 renamed +640 -2

This diff has been collapsed as it changes many lines, (642 lines changed) Show them Hide them
	@@ -1,72 +1,710 b''
	1	// Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>	1	// Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
	2	// and Mercurial contributors	2	// and Mercurial contributors
	3	//	3	//
	4	// This software may be used and distributed according to the terms of the	4	// This software may be used and distributed according to the terms of the
	5	// GNU General Public License version 2 or any later version.	5	// GNU General Public License version 2 or any later version.
	6	//! Mercurial concepts for handling revision history	6	//! Mercurial concepts for handling revision history
	7		7
	8	pub mod node;	8	pub mod node;
	9	pub mod nodemap;	9	pub mod nodemap;
	10	mod nodemap_docket;	10	mod nodemap_docket;
	11	pub mod path_encode;	11	pub mod path_encode;
	12	pub use node::{FromHexError, Node, NodePrefix};	12	pub use node::{FromHexError, Node, NodePrefix};
	13	pub mod changelog;	13	pub mod changelog;
	14	pub mod filelog;	14	pub mod filelog;
	15	pub mod index;	15	pub mod index;
	16	pub mod manifest;	16	pub mod manifest;
	17	pub mod patch;	17	pub mod patch;
	18	pub mod revlog;	18
			19	use std::borrow::Cow;
			20	use std::io::Read;
			21	use std::ops::Deref;
			22	use std::path::Path;
			23
			24	use flate2::read::ZlibDecoder;
			25	use sha1::{Digest, Sha1};
			26	use zstd;
			27
			28	use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
			29	use self::nodemap_docket::NodeMapDocket;
			30	use super::index::Index;
			31	use super::nodemap::{NodeMap, NodeMapError};
			32	use crate::errors::HgError;
			33	use crate::vfs::Vfs;
	19		34
	20	/// Mercurial revision numbers	35	/// Mercurial revision numbers
	21	///	36	///
	22	/// As noted in revlog.c, revision numbers are actually encoded in	37	/// As noted in revlog.c, revision numbers are actually encoded in
	23	/// 4 bytes, and are liberally converted to ints, whence the i32	38	/// 4 bytes, and are liberally converted to ints, whence the i32
	24	pub type Revision = i32;	39	pub type Revision = i32;
	25		40
	26	/// Marker expressing the absence of a parent	41	/// Marker expressing the absence of a parent
	27	///	42	///
	28	/// Independently of the actual representation, `NULL_REVISION` is guaranteed	43	/// Independently of the actual representation, `NULL_REVISION` is guaranteed
	29	/// to be smaller than all existing revisions.	44	/// to be smaller than all existing revisions.
	30	pub const NULL_REVISION: Revision = -1;	45	pub const NULL_REVISION: Revision = -1;
	31		46
	32	/// Same as `mercurial.node.wdirrev`	47	/// Same as `mercurial.node.wdirrev`
	33	///	48	///
	34	/// This is also equal to `i32::max_value()`, but it's better to spell	49	/// This is also equal to `i32::max_value()`, but it's better to spell
	35	/// it out explicitely, same as in `mercurial.node`	50	/// it out explicitely, same as in `mercurial.node`
	36	#[allow(clippy::unreadable_literal)]	51	#[allow(clippy::unreadable_literal)]
	37	pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;	52	pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
	38		53
	39	pub const WORKING_DIRECTORY_HEX: &str =	54	pub const WORKING_DIRECTORY_HEX: &str =
	40	"ffffffffffffffffffffffffffffffffffffffff";	55	"ffffffffffffffffffffffffffffffffffffffff";
	41		56
	42	/// The simplest expression of what we need of Mercurial DAGs.	57	/// The simplest expression of what we need of Mercurial DAGs.
	43	pub trait Graph {	58	pub trait Graph {
	44	/// Return the two parents of the given `Revision`.	59	/// Return the two parents of the given `Revision`.
	45	///	60	///
	46	/// Each of the parents can be independently `NULL_REVISION`	61	/// Each of the parents can be independently `NULL_REVISION`
	47	fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;	62	fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
	48	}	63	}
	49		64
	50	#[derive(Clone, Debug, PartialEq)]	65	#[derive(Clone, Debug, PartialEq)]
	51	pub enum GraphError {	66	pub enum GraphError {
	52	ParentOutOfRange(Revision),	67	ParentOutOfRange(Revision),
	53	WorkingDirectoryUnsupported,	68	WorkingDirectoryUnsupported,
	54	}	69	}
	55		70
	56	/// The Mercurial Revlog Index	71	/// The Mercurial Revlog Index
	57	///	72	///
	58	/// This is currently limited to the minimal interface that is needed for	73	/// This is currently limited to the minimal interface that is needed for
	59	/// the [`nodemap`](nodemap/index.html) module	74	/// the [`nodemap`](nodemap/index.html) module
	60	pub trait RevlogIndex {	75	pub trait RevlogIndex {
	61	/// Total number of Revisions referenced in this index	76	/// Total number of Revisions referenced in this index
	62	fn len(&self) -> usize;	77	fn len(&self) -> usize;
	63		78
	64	fn is_empty(&self) -> bool {	79	fn is_empty(&self) -> bool {
	65	self.len() == 0	80	self.len() == 0
	66	}	81	}
	67		82
	68	/// Return a reference to the Node or `None` if rev is out of bounds	83	/// Return a reference to the Node or `None` if rev is out of bounds
	69	///	84	///
	70	/// `NULL_REVISION` is not considered to be out of bounds.	85	/// `NULL_REVISION` is not considered to be out of bounds.
	71	fn node(&self, rev: Revision) -> Option<&Node>;	86	fn node(&self, rev: Revision) -> Option<&Node>;
	72	}	87	}
			88
			89	const REVISION_FLAG_CENSORED: u16 = 1 << 15;
			90	const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
			91	const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
			92	const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
			93
			94	// Keep this in sync with REVIDX_KNOWN_FLAGS in
			95	// mercurial/revlogutils/flagutil.py
			96	const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
			97	\| REVISION_FLAG_ELLIPSIS
			98	\| REVISION_FLAG_EXTSTORED
			99	\| REVISION_FLAG_HASCOPIESINFO;
			100
			101	const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
			102
			103	#[derive(Debug, derive_more::From)]
			104	pub enum RevlogError {
			105	InvalidRevision,
			106	/// Working directory is not supported
			107	WDirUnsupported,
			108	/// Found more than one entry whose ID match the requested prefix
			109	AmbiguousPrefix,
			110	#[from]
			111	Other(HgError),
			112	}
			113
			114	impl From<NodeMapError> for RevlogError {
			115	fn from(error: NodeMapError) -> Self {
			116	match error {
			117	NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
			118	NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
			119	format!("nodemap point to revision {} not in index", rev),
			120	),
			121	}
			122	}
			123	}
			124
			125	fn corrupted<S: AsRef<str>>(context: S) -> HgError {
			126	HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
			127	}
			128
			129	impl RevlogError {
			130	fn corrupted<S: AsRef<str>>(context: S) -> Self {
			131	RevlogError::Other(corrupted(context))
			132	}
			133	}
			134
			135	/// Read only implementation of revlog.
			136	pub struct Revlog {
			137	/// When index and data are not interleaved: bytes of the revlog index.
			138	/// When index and data are interleaved: bytes of the revlog index and
			139	/// data.
			140	index: Index,
			141	/// When index and data are not interleaved: bytes of the revlog data
			142	data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
			143	/// When present on disk: the persistent nodemap for this revlog
			144	nodemap: Option<nodemap::NodeTree>,
			145	}
			146
			147	impl Revlog {
			148	/// Open a revlog index file.
			149	///
			150	/// It will also open the associated data file if index and data are not
			151	/// interleaved.
			152	pub fn open(
			153	store_vfs: &Vfs,
			154	index_path: impl AsRef<Path>,
			155	data_path: Option<&Path>,
			156	use_nodemap: bool,
			157	) -> Result<Self, HgError> {
			158	let index_path = index_path.as_ref();
			159	let index = {
			160	match store_vfs.mmap_open_opt(&index_path)? {
			161	None => Index::new(Box::new(vec![])),
			162	Some(index_mmap) => {
			163	let index = Index::new(Box::new(index_mmap))?;
			164	Ok(index)
			165	}
			166	}
			167	}?;
			168
			169	let default_data_path = index_path.with_extension("d");
			170
			171	// type annotation required
			172	// won't recognize Mmap as Deref<Target = [u8]>
			173	let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
			174	if index.is_inline() {
			175	None
			176	} else {
			177	let data_path = data_path.unwrap_or(&default_data_path);
			178	let data_mmap = store_vfs.mmap_open(data_path)?;
			179	Some(Box::new(data_mmap))
			180	};
			181
			182	let nodemap = if index.is_inline() \|\| !use_nodemap {
			183	None
			184	} else {
			185	NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
			186	\|(docket, data)\| {
			187	nodemap::NodeTree::load_bytes(
			188	Box::new(data),
			189	docket.data_length,
			190	)
			191	},
			192	)
			193	};
			194
			195	Ok(Revlog {
			196	index,
			197	data_bytes,
			198	nodemap,
			199	})
			200	}
			201
			202	/// Return number of entries of the `Revlog`.
			203	pub fn len(&self) -> usize {
			204	self.index.len()
			205	}
			206
			207	/// Returns `true` if the `Revlog` has zero `entries`.
			208	pub fn is_empty(&self) -> bool {
			209	self.index.is_empty()
			210	}
			211
			212	/// Returns the node ID for the given revision number, if it exists in this
			213	/// revlog
			214	pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
			215	if rev == NULL_REVISION {
			216	return Some(&NULL_NODE);
			217	}
			218	Some(self.index.get_entry(rev)?.hash())
			219	}
			220
			221	/// Return the revision number for the given node ID, if it exists in this
			222	/// revlog
			223	pub fn rev_from_node(
			224	&self,
			225	node: NodePrefix,
			226	) -> Result<Revision, RevlogError> {
			227	if node.is_prefix_of(&NULL_NODE) {
			228	return Ok(NULL_REVISION);
			229	}
			230
			231	if let Some(nodemap) = &self.nodemap {
			232	return nodemap
			233	.find_bin(&self.index, node)?
			234	.ok_or(RevlogError::InvalidRevision);
			235	}
			236
			237	// Fallback to linear scan when a persistent nodemap is not present.
			238	// This happens when the persistent-nodemap experimental feature is not
			239	// enabled, or for small revlogs.
			240	//
			241	// TODO: consider building a non-persistent nodemap in memory to
			242	// optimize these cases.
			243	let mut found_by_prefix = None;
			244	for rev in (0..self.len() as Revision).rev() {
			245	let index_entry = self.index.get_entry(rev).ok_or_else(\|\| {
			246	HgError::corrupted(
			247	"revlog references a revision not in the index",
			248	)
			249	})?;
			250	if node == *index_entry.hash() {
			251	return Ok(rev);
			252	}
			253	if node.is_prefix_of(index_entry.hash()) {
			254	if found_by_prefix.is_some() {
			255	return Err(RevlogError::AmbiguousPrefix);
			256	}
			257	found_by_prefix = Some(rev)
			258	}
			259	}
			260	found_by_prefix.ok_or(RevlogError::InvalidRevision)
			261	}
			262
			263	/// Returns whether the given revision exists in this revlog.
			264	pub fn has_rev(&self, rev: Revision) -> bool {
			265	self.index.get_entry(rev).is_some()
			266	}
			267
			268	/// Return the full data associated to a revision.
			269	///
			270	/// All entries required to build the final data out of deltas will be
			271	/// retrieved as needed, and the deltas will be applied to the inital
			272	/// snapshot to rebuild the final data.
			273	pub fn get_rev_data(
			274	&self,
			275	rev: Revision,
			276	) -> Result<Cow<[u8]>, RevlogError> {
			277	if rev == NULL_REVISION {
			278	return Ok(Cow::Borrowed(&[]));
			279	};
			280	Ok(self.get_entry(rev)?.data()?)
			281	}
			282
			283	/// Check the hash of some given data against the recorded hash.
			284	pub fn check_hash(
			285	&self,
			286	p1: Revision,
			287	p2: Revision,
			288	expected: &[u8],
			289	data: &[u8],
			290	) -> bool {
			291	let e1 = self.index.get_entry(p1);
			292	let h1 = match e1 {
			293	Some(ref entry) => entry.hash(),
			294	None => &NULL_NODE,
			295	};
			296	let e2 = self.index.get_entry(p2);
			297	let h2 = match e2 {
			298	Some(ref entry) => entry.hash(),
			299	None => &NULL_NODE,
			300	};
			301
			302	hash(data, h1.as_bytes(), h2.as_bytes()) == expected
			303	}
			304
			305	/// Build the full data of a revision out its snapshot
			306	/// and its deltas.
			307	fn build_data_from_deltas(
			308	snapshot: RevlogEntry,
			309	deltas: &[RevlogEntry],
			310	) -> Result<Vec<u8>, HgError> {
			311	let snapshot = snapshot.data_chunk()?;
			312	let deltas = deltas
			313	.iter()
			314	.rev()
			315	.map(RevlogEntry::data_chunk)
			316	.collect::<Result<Vec<_>, _>>()?;
			317	let patches: Vec<_> =
			318	deltas.iter().map(\|d\| patch::PatchList::new(d)).collect();
			319	let patch = patch::fold_patch_lists(&patches);
			320	Ok(patch.apply(&snapshot))
			321	}
			322
			323	/// Return the revlog data.
			324	fn data(&self) -> &[u8] {
			325	match &self.data_bytes {
			326	Some(data_bytes) => data_bytes,
			327	None => panic!(
			328	"forgot to load the data or trying to access inline data"
			329	),
			330	}
			331	}
			332
			333	pub fn make_null_entry(&self) -> RevlogEntry {
			334	RevlogEntry {
			335	revlog: self,
			336	rev: NULL_REVISION,
			337	bytes: b"",
			338	compressed_len: 0,
			339	uncompressed_len: 0,
			340	base_rev_or_base_of_delta_chain: None,
			341	p1: NULL_REVISION,
			342	p2: NULL_REVISION,
			343	flags: NULL_REVLOG_ENTRY_FLAGS,
			344	hash: NULL_NODE,
			345	}
			346	}
			347
			348	/// Get an entry of the revlog.
			349	pub fn get_entry(
			350	&self,
			351	rev: Revision,
			352	) -> Result<RevlogEntry, RevlogError> {
			353	if rev == NULL_REVISION {
			354	return Ok(self.make_null_entry());
			355	}
			356	let index_entry = self
			357	.index
			358	.get_entry(rev)
			359	.ok_or(RevlogError::InvalidRevision)?;
			360	let start = index_entry.offset();
			361	let end = start + index_entry.compressed_len() as usize;
			362	let data = if self.index.is_inline() {
			363	self.index.data(start, end)
			364	} else {
			365	&self.data()[start..end]
			366	};
			367	let entry = RevlogEntry {
			368	revlog: self,
			369	rev,
			370	bytes: data,
			371	compressed_len: index_entry.compressed_len(),
			372	uncompressed_len: index_entry.uncompressed_len(),
			373	base_rev_or_base_of_delta_chain: if index_entry
			374	.base_revision_or_base_of_delta_chain()
			375	== rev
			376	{
			377	None
			378	} else {
			379	Some(index_entry.base_revision_or_base_of_delta_chain())
			380	},
			381	p1: index_entry.p1(),
			382	p2: index_entry.p2(),
			383	flags: index_entry.flags(),
			384	hash: *index_entry.hash(),
			385	};
			386	Ok(entry)
			387	}
			388
			389	/// when resolving internal references within revlog, any errors
			390	/// should be reported as corruption, instead of e.g. "invalid revision"
			391	fn get_entry_internal(
			392	&self,
			393	rev: Revision,
			394	) -> Result<RevlogEntry, HgError> {
			395	self.get_entry(rev)
			396	.map_err(\|_\| corrupted(format!("revision {} out of range", rev)))
			397	}
			398	}
			399
			400	/// The revlog entry's bytes and the necessary informations to extract
			401	/// the entry's data.
			402	#[derive(Clone)]
			403	pub struct RevlogEntry<'a> {
			404	revlog: &'a Revlog,
			405	rev: Revision,
			406	bytes: &'a [u8],
			407	compressed_len: u32,
			408	uncompressed_len: i32,
			409	base_rev_or_base_of_delta_chain: Option<Revision>,
			410	p1: Revision,
			411	p2: Revision,
			412	flags: u16,
			413	hash: Node,
			414	}
			415
			416	impl<'a> RevlogEntry<'a> {
			417	pub fn revision(&self) -> Revision {
			418	self.rev
			419	}
			420
			421	pub fn node(&self) -> &Node {
			422	&self.hash
			423	}
			424
			425	pub fn uncompressed_len(&self) -> Option<u32> {
			426	u32::try_from(self.uncompressed_len).ok()
			427	}
			428
			429	pub fn has_p1(&self) -> bool {
			430	self.p1 != NULL_REVISION
			431	}
			432
			433	pub fn p1_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
			434	if self.p1 == NULL_REVISION {
			435	Ok(None)
			436	} else {
			437	Ok(Some(self.revlog.get_entry(self.p1)?))
			438	}
			439	}
			440
			441	pub fn p2_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
			442	if self.p2 == NULL_REVISION {
			443	Ok(None)
			444	} else {
			445	Ok(Some(self.revlog.get_entry(self.p2)?))
			446	}
			447	}
			448
			449	pub fn p1(&self) -> Option<Revision> {
			450	if self.p1 == NULL_REVISION {
			451	None
			452	} else {
			453	Some(self.p1)
			454	}
			455	}
			456
			457	pub fn p2(&self) -> Option<Revision> {
			458	if self.p2 == NULL_REVISION {
			459	None
			460	} else {
			461	Some(self.p2)
			462	}
			463	}
			464
			465	pub fn is_censored(&self) -> bool {
			466	(self.flags & REVISION_FLAG_CENSORED) != 0
			467	}
			468
			469	pub fn has_length_affecting_flag_processor(&self) -> bool {
			470	// Relevant Python code: revlog.size()
			471	// note: ELLIPSIS is known to not change the content
			472	(self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
			473	}
			474
			475	/// The data for this entry, after resolving deltas if any.
			476	pub fn rawdata(&self) -> Result<Cow<'a, [u8]>, HgError> {
			477	let mut entry = self.clone();
			478	let mut delta_chain = vec![];
			479
			480	// The meaning of `base_rev_or_base_of_delta_chain` depends on
			481	// generaldelta. See the doc on `ENTRY_DELTA_BASE` in
			482	// `mercurial/revlogutils/constants.py` and the code in
			483	// [_chaininfo] and in [index_deltachain].
			484	let uses_generaldelta = self.revlog.index.uses_generaldelta();
			485	while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
			486	let base_rev = if uses_generaldelta {
			487	base_rev
			488	} else {
			489	entry.rev - 1
			490	};
			491	delta_chain.push(entry);
			492	entry = self.revlog.get_entry_internal(base_rev)?;
			493	}
			494
			495	let data = if delta_chain.is_empty() {
			496	entry.data_chunk()?
			497	} else {
			498	Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
			499	};
			500
			501	Ok(data)
			502	}
			503
			504	fn check_data(
			505	&self,
			506	data: Cow<'a, [u8]>,
			507	) -> Result<Cow<'a, [u8]>, HgError> {
			508	if self.revlog.check_hash(
			509	self.p1,
			510	self.p2,
			511	self.hash.as_bytes(),
			512	&data,
			513	) {
			514	Ok(data)
			515	} else {
			516	if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
			517	return Err(HgError::unsupported(
			518	"ellipsis revisions are not supported by rhg",
			519	));
			520	}
			521	Err(corrupted(format!(
			522	"hash check failed for revision {}",
			523	self.rev
			524	)))
			525	}
			526	}
			527
			528	pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
			529	let data = self.rawdata()?;
			530	if self.is_censored() {
			531	return Err(HgError::CensoredNodeError);
			532	}
			533	self.check_data(data)
			534	}
			535
			536	/// Extract the data contained in the entry.
			537	/// This may be a delta. (See `is_delta`.)
			538	fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
			539	if self.bytes.is_empty() {
			540	return Ok(Cow::Borrowed(&[]));
			541	}
			542	match self.bytes[0] {
			543	// Revision data is the entirety of the entry, including this
			544	// header.
			545	b'\0' => Ok(Cow::Borrowed(self.bytes)),
			546	// Raw revision data follows.
			547	b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
			548	// zlib (RFC 1950) data.
			549	b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
			550	// zstd data.
			551	b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
			552	// A proper new format should have had a repo/store requirement.
			553	format_type => Err(corrupted(format!(
			554	"unknown compression header '{}'",
			555	format_type
			556	))),
			557	}
			558	}
			559
			560	fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
			561	let mut decoder = ZlibDecoder::new(self.bytes);
			562	if self.is_delta() {
			563	let mut buf = Vec::with_capacity(self.compressed_len as usize);
			564	decoder
			565	.read_to_end(&mut buf)
			566	.map_err(\|e\| corrupted(e.to_string()))?;
			567	Ok(buf)
			568	} else {
			569	let cap = self.uncompressed_len.max(0) as usize;
			570	let mut buf = vec![0; cap];
			571	decoder
			572	.read_exact(&mut buf)
			573	.map_err(\|e\| corrupted(e.to_string()))?;
			574	Ok(buf)
			575	}
			576	}
			577
			578	fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
			579	if self.is_delta() {
			580	let mut buf = Vec::with_capacity(self.compressed_len as usize);
			581	zstd::stream::copy_decode(self.bytes, &mut buf)
			582	.map_err(\|e\| corrupted(e.to_string()))?;
			583	Ok(buf)
			584	} else {
			585	let cap = self.uncompressed_len.max(0) as usize;
			586	let mut buf = vec![0; cap];
			587	let len = zstd::bulk::decompress_to_buffer(self.bytes, &mut buf)
			588	.map_err(\|e\| corrupted(e.to_string()))?;
			589	if len != self.uncompressed_len as usize {
			590	Err(corrupted("uncompressed length does not match"))
			591	} else {
			592	Ok(buf)
			593	}
			594	}
			595	}
			596
			597	/// Tell if the entry is a snapshot or a delta
			598	/// (influences on decompression).
			599	fn is_delta(&self) -> bool {
			600	self.base_rev_or_base_of_delta_chain.is_some()
			601	}
			602	}
			603
			604	/// Calculate the hash of a revision given its data and its parents.
			605	fn hash(
			606	data: &[u8],
			607	p1_hash: &[u8],
			608	p2_hash: &[u8],
			609	) -> [u8; NODE_BYTES_LENGTH] {
			610	let mut hasher = Sha1::new();
			611	let (a, b) = (p1_hash, p2_hash);
			612	if a > b {
			613	hasher.update(b);
			614	hasher.update(a);
			615	} else {
			616	hasher.update(a);
			617	hasher.update(b);
			618	}
			619	hasher.update(data);
			620	*hasher.finalize().as_ref()
			621	}
			622
			623	#[cfg(test)]
			624	mod tests {
			625	use super::*;
			626	use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
			627	use itertools::Itertools;
			628
			629	#[test]
			630	fn test_empty() {
			631	let temp = tempfile::tempdir().unwrap();
			632	let vfs = Vfs { base: temp.path() };
			633	std::fs::write(temp.path().join("foo.i"), b"").unwrap();
			634	let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
			635	assert!(revlog.is_empty());
			636	assert_eq!(revlog.len(), 0);
			637	assert!(revlog.get_entry(0).is_err());
			638	assert!(!revlog.has_rev(0));
			639	}
			640
			641	#[test]
			642	fn test_inline() {
			643	let temp = tempfile::tempdir().unwrap();
			644	let vfs = Vfs { base: temp.path() };
			645	let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
			646	.unwrap();
			647	let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
			648	.unwrap();
			649	let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
			650	.unwrap();
			651	let entry0_bytes = IndexEntryBuilder::new()
			652	.is_first(true)
			653	.with_version(1)
			654	.with_inline(true)
			655	.with_offset(INDEX_ENTRY_SIZE)
			656	.with_node(node0)
			657	.build();
			658	let entry1_bytes = IndexEntryBuilder::new()
			659	.with_offset(INDEX_ENTRY_SIZE)
			660	.with_node(node1)
			661	.build();
			662	let entry2_bytes = IndexEntryBuilder::new()
			663	.with_offset(INDEX_ENTRY_SIZE)
			664	.with_p1(0)
			665	.with_p2(1)
			666	.with_node(node2)
			667	.build();
			668	let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
			669	.into_iter()
			670	.flatten()
			671	.collect_vec();
			672	std::fs::write(temp.path().join("foo.i"), contents).unwrap();
			673	let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
			674
			675	let entry0 = revlog.get_entry(0).ok().unwrap();
			676	assert_eq!(entry0.revision(), 0);
			677	assert_eq!(*entry0.node(), node0);
			678	assert!(!entry0.has_p1());
			679	assert_eq!(entry0.p1(), None);
			680	assert_eq!(entry0.p2(), None);
			681	let p1_entry = entry0.p1_entry().unwrap();
			682	assert!(p1_entry.is_none());
			683	let p2_entry = entry0.p2_entry().unwrap();
			684	assert!(p2_entry.is_none());
			685
			686	let entry1 = revlog.get_entry(1).ok().unwrap();
			687	assert_eq!(entry1.revision(), 1);
			688	assert_eq!(*entry1.node(), node1);
			689	assert!(!entry1.has_p1());
			690	assert_eq!(entry1.p1(), None);
			691	assert_eq!(entry1.p2(), None);
			692	let p1_entry = entry1.p1_entry().unwrap();
			693	assert!(p1_entry.is_none());
			694	let p2_entry = entry1.p2_entry().unwrap();
			695	assert!(p2_entry.is_none());
			696
			697	let entry2 = revlog.get_entry(2).ok().unwrap();
			698	assert_eq!(entry2.revision(), 2);
			699	assert_eq!(*entry2.node(), node2);
			700	assert!(entry2.has_p1());
			701	assert_eq!(entry2.p1(), Some(0));
			702	assert_eq!(entry2.p2(), Some(1));
			703	let p1_entry = entry2.p1_entry().unwrap();
			704	assert!(p1_entry.is_some());
			705	assert_eq!(p1_entry.unwrap().revision(), 0);
			706	let p2_entry = entry2.p2_entry().unwrap();
			707	assert!(p2_entry.is_some());
			708	assert_eq!(p2_entry.unwrap().revision(), 1);
			709	}
			710	}

rust/hg-core/src/revset.rs

0 +1 -1

             //! The revset query language
             //!
             //! <https://www.mercurial-scm.org/repo/hg/help/revsets>
             use crate::errors::HgError;
             use crate::repo::Repo;
-            use crate::revlog::revlog::{Revlog, RevlogError};
             use crate::revlog::NodePrefix;
             use crate::revlog::{Revision, NULL_REVISION, WORKING_DIRECTORY_HEX};
+            use crate::revlog::{Revlog, RevlogError};
             use crate::Node;
             /// Resolve a query string into a single revision.
             ///
             /// Only some of the revset language is implemented yet.
             pub fn resolve_single(
                 input: &str,
                 repo: &Repo,
             ) -> Result<Revision, RevlogError> {
                 let changelog = repo.changelog()?;
                 match input {
                     "." => {
                         let p1 = repo.dirstate_parents()?.p1;
                         return changelog.revlog.rev_from_node(p1.into());
                     }
                     "null" => return Ok(NULL_REVISION),
                     _ => {}
                 }
                 match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) {
                     Err(RevlogError::InvalidRevision) => {
                         // TODO: support for the rest of the language here.
                         let msg = format!("cannot parse revset '{}'", input);
                         Err(HgError::unsupported(msg).into())
                     }
                     result => result,
                 }
             }
             /// Resolve the small subset of the language suitable for revlogs other than
             /// the changelog, such as in `hg debugdata --manifest` CLI argument.
             ///
             /// * A non-negative decimal integer for a revision number, or
             /// * An hexadecimal string, for the unique node ID that starts with this
             ///   prefix
             pub fn resolve_rev_number_or_hex_prefix(
                 input: &str,
                 revlog: &Revlog,
             ) -> Result<Revision, RevlogError> {
                 // The Python equivalent of this is part of `revsymbol` in
                 // `mercurial/scmutil.py`
                 if let Ok(integer) = input.parse::<i32>() {
                     if integer.to_string() == input
                         && integer >= 0
                         && revlog.has_rev(integer)
                     {
                         return Ok(integer);
                     }
                 }
                 if let Ok(prefix) = NodePrefix::from_hex(input) {
                     if prefix.is_prefix_of(&Node::from_hex(WORKING_DIRECTORY_HEX).unwrap())
                     {
                         return Err(RevlogError::WDirUnsupported);
                     }
                     return revlog.rev_from_node(prefix);
                 }
                 Err(RevlogError::InvalidRevision)
             }

rust/rhg/src/error.rs

0 +1 -1

             use crate::ui::utf8_to_local;
             use crate::ui::UiError;
             use crate::NoRepoInCwdError;
             use format_bytes::format_bytes;
             use hg::config::{ConfigError, ConfigParseError, ConfigValueParseError};
             use hg::dirstate_tree::on_disk::DirstateV2ParseError;
             use hg::errors::HgError;
             use hg::exit_codes;
             use hg::repo::RepoError;
-            use hg::revlog::revlog::RevlogError;
+            use hg::revlog::RevlogError;
             use hg::sparse::SparseConfigError;
             use hg::utils::files::get_bytes_from_path;
             use hg::{DirstateError, DirstateMapError, StatusError};
             use std::convert::From;
             /// The kind of command error
             #[derive(Debug)]
             pub enum CommandError {
                 /// Exit with an error message and "standard" failure exit code.
                 Abort {
                     message: Vec<u8>,
                     detailed_exit_code: exit_codes::ExitCode,
                     hint: Option<Vec<u8>>,
                 },
                 /// Exit with a failure exit code but no message.
                 Unsuccessful,
                 /// Encountered something (such as a CLI argument, repository layout, …)
                 /// not supported by this version of `rhg`. Depending on configuration
                 /// `rhg` may attempt to silently fall back to Python-based `hg`, which
                 /// may or may not support this feature.
                 UnsupportedFeature { message: Vec<u8> },
                 /// The fallback executable does not exist (or has some other problem if
                 /// we end up being more precise about broken fallbacks).
                 InvalidFallback { path: Vec<u8>, err: String },
             }
             impl CommandError {
                 pub fn abort(message: impl AsRef<str>) -> Self {
                     CommandError::abort_with_exit_code(message, exit_codes::ABORT)
                 }
                 pub fn abort_with_exit_code(
                     message: impl AsRef<str>,
                     detailed_exit_code: exit_codes::ExitCode,
                 ) -> Self {
                     CommandError::Abort {
                         // TODO: bytes-based (instead of Unicode-based) formatting
                         // of error messages to handle non-UTF-8 filenames etc:
                         // https://www.mercurial-scm.org/wiki/EncodingStrategy#Mixing_output
                         message: utf8_to_local(message.as_ref()).into(),
                         detailed_exit_code,
                         hint: None,
                     }
                 }
                 pub fn abort_with_exit_code_and_hint(
                     message: impl AsRef<str>,
                     detailed_exit_code: exit_codes::ExitCode,
                     hint: Option<impl AsRef<str>>,
                 ) -> Self {
                     CommandError::Abort {
                         message: utf8_to_local(message.as_ref()).into(),
                         detailed_exit_code,
                         hint: hint.map(|h| utf8_to_local(h.as_ref()).into()),
                     }
                 }
                 pub fn abort_with_exit_code_bytes(
                     message: impl AsRef<[u8]>,
                     detailed_exit_code: exit_codes::ExitCode,
                 ) -> Self {
                     // TODO: use this everywhere it makes sense instead of the string
                     // version.
                     CommandError::Abort {
                         message: message.as_ref().into(),
                         detailed_exit_code,
                         hint: None,
                     }
                 }
                 pub fn unsupported(message: impl AsRef<str>) -> Self {
                     CommandError::UnsupportedFeature {
                         message: utf8_to_local(message.as_ref()).into(),
                     }
                 }
             }
             /// For now we don’t differenciate between invalid CLI args and valid for `hg`
             /// but not supported yet by `rhg`.
             impl From<clap::Error> for CommandError {
                 fn from(error: clap::Error) -> Self {
                     CommandError::unsupported(error.to_string())
                 }
             }
             impl From<HgError> for CommandError {
                 fn from(error: HgError) -> Self {
                     match error {
                         HgError::UnsupportedFeature(message) => {
                             CommandError::unsupported(message)
                         }
                         HgError::CensoredNodeError => {
                             CommandError::unsupported("Encountered a censored node")
                         }
                         HgError::Abort {
                             message,
                             detailed_exit_code,
                             hint,
                         } => CommandError::abort_with_exit_code_and_hint(
                             message,
                             detailed_exit_code,
                             hint,
                         ),
                         _ => CommandError::abort(error.to_string()),
                     }
                 }
             }
             impl From<ConfigValueParseError> for CommandError {
                 fn from(error: ConfigValueParseError) -> Self {
                     CommandError::abort_with_exit_code(
                         error.to_string(),
                         exit_codes::CONFIG_ERROR_ABORT,
                     )
                 }
             }
             impl From<UiError> for CommandError {
                 fn from(_error: UiError) -> Self {
                     // If we already failed writing to stdout or stderr,
                     // writing an error message to stderr about it would be likely to fail
                     // too.
                     CommandError::abort("")
                 }
             }
             impl From<RepoError> for CommandError {
                 fn from(error: RepoError) -> Self {
                     match error {
                         RepoError::NotFound { at } => {
                             CommandError::abort_with_exit_code_bytes(
                                 format_bytes!(
                                     b"abort: repository {} not found",
                                     get_bytes_from_path(at)
                                 ),
                                 exit_codes::ABORT,
                             )
                         }
                         RepoError::ConfigParseError(error) => error.into(),
                         RepoError::Other(error) => error.into(),
                     }
                 }
             }
             impl<'a> From<&'a NoRepoInCwdError> for CommandError {
                 fn from(error: &'a NoRepoInCwdError) -> Self {
                     let NoRepoInCwdError { cwd } = error;
                     CommandError::abort_with_exit_code_bytes(
                         format_bytes!(
                             b"abort: no repository found in '{}' (.hg not found)!",
                             get_bytes_from_path(cwd)
                         ),
                         exit_codes::ABORT,
                     )
                 }
             }
             impl From<ConfigError> for CommandError {
                 fn from(error: ConfigError) -> Self {
                     match error {
                         ConfigError::Parse(error) => error.into(),
                         ConfigError::Other(error) => error.into(),
                     }
                 }
             }
             impl From<ConfigParseError> for CommandError {
                 fn from(error: ConfigParseError) -> Self {
                     let ConfigParseError {
                         origin,
                         line,
                         message,
                     } = error;
                     let line_message = if let Some(line_number) = line {
                         format_bytes!(b":{}", line_number.to_string().into_bytes())
                     } else {
                         Vec::new()
                     };
                     CommandError::abort_with_exit_code_bytes(
                         format_bytes!(
                             b"config error at {}{}: {}",
                             origin,
                             line_message,
                             message
                         ),
                         exit_codes::CONFIG_ERROR_ABORT,
                     )
                 }
             }
             impl From<(RevlogError, &str)> for CommandError {
                 fn from((err, rev): (RevlogError, &str)) -> CommandError {
                     match err {
                         RevlogError::WDirUnsupported => CommandError::abort(
                             "abort: working directory revision cannot be specified",
                         ),
                         RevlogError::InvalidRevision => CommandError::abort(format!(
                             "abort: invalid revision identifier: {}",
                             rev
                         )),
                         RevlogError::AmbiguousPrefix => CommandError::abort(format!(
                             "abort: ambiguous revision identifier: {}",
                             rev
                         )),
                         RevlogError::Other(error) => error.into(),
                     }
                 }
             }
             impl From<StatusError> for CommandError {
                 fn from(error: StatusError) -> Self {
                     match error {
                         StatusError::Pattern(_) => {
                             CommandError::unsupported(format!("{}", error))
                         }
                         _ => CommandError::abort(format!("{}", error)),
                     }
                 }
             }
             impl From<DirstateMapError> for CommandError {
                 fn from(error: DirstateMapError) -> Self {
                     CommandError::abort(format!("{}", error))
                 }
             }
             impl From<DirstateError> for CommandError {
                 fn from(error: DirstateError) -> Self {
                     match error {
                         DirstateError::Common(error) => error.into(),
                         DirstateError::Map(error) => error.into(),
                     }
                 }
             }
             impl From<DirstateV2ParseError> for CommandError {
                 fn from(error: DirstateV2ParseError) -> Self {
                     HgError::from(error).into()
                 }
             }
             impl From<SparseConfigError> for CommandError {
                 fn from(e: SparseConfigError) -> Self {
                     match e {
                         SparseConfigError::IncludesAfterExcludes { context } => {
                             Self::abort_with_exit_code_bytes(
                                 format_bytes!(
                                     b"{} config cannot have includes after excludes",
                                     context
                                 ),
                                 exit_codes::CONFIG_PARSE_ERROR_ABORT,
                             )
                         }
                         SparseConfigError::EntryOutsideSection { context, line } => {
                             Self::abort_with_exit_code_bytes(
                                 format_bytes!(
                                     b"{} config entry outside of section: {}",
                                     context,
                                     &line,
                                 ),
                                 exit_codes::CONFIG_PARSE_ERROR_ABORT,
                             )
                         }
                         SparseConfigError::InvalidNarrowPrefix(prefix) => {
                             Self::abort_with_exit_code_bytes(
                                 format_bytes!(
                                     b"invalid prefix on narrow pattern: {}",
                                     &prefix
                                 ),
                                 exit_codes::ABORT,
                             )
                         }
                         SparseConfigError::IncludesInNarrow => Self::abort(
                             "including other spec files using '%include' \
                                 is not supported in narrowspec",
                         ),
                         SparseConfigError::HgError(e) => Self::from(e),
                         SparseConfigError::PatternError(e) => {
                             Self::unsupported(format!("{}", e))
                         }
                     }
                 }
             }

rust/hg-core/src/revlog/revlog.rs

0 removed 0 -642

	1	NO CONTENT: file was removed			NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (642 lines changed) Show them Hide them

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages