upstream/mercurial-mirror Commit - r51870:1928b770

rust: use the new `UncheckedRevision` everywhere applicable...

Raphaël Gomès -

r51870:1928b770 default

parent child

rust/hg-core/src/operations/cat.rs

0 +2 -2

             // list_tracked_files.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::repo::Repo;
             use crate::revlog::Node;
             use crate::revlog::RevlogError;
             use crate::utils::hg_path::HgPath;
             use crate::errors::HgError;
             use crate::manifest::Manifest;
             use crate::manifest::ManifestEntry;
             use itertools::put_back;
             use itertools::PutBack;
             use std::cmp::Ordering;
             pub struct CatOutput<'a> {
                 /// Whether any file in the manifest matched the paths given as CLI
                 /// arguments
                 pub found_any: bool,
                 /// The contents of matching files, in manifest order
                 pub results: Vec<(&'a HgPath, Vec<u8>)>,
                 /// Which of the CLI arguments did not match any manifest file
                 pub missing: Vec<&'a HgPath>,
                 /// The node ID that the given revset was resolved to
                 pub node: Node,
             }
             // Find an item in an iterator over a sorted collection.
             fn find_item<'a>(
                 i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>,
                 needle: &HgPath,
             ) -> Result<Option<Node>, HgError> {
                 loop {
                     match i.next() {
                         None => return Ok(None),
                         Some(result) => {
                             let entry = result?;
                             match needle.as_bytes().cmp(entry.path.as_bytes()) {
                                 Ordering::Less => {
                                     i.put_back(Ok(entry));
                                     return Ok(None);
                                 }
                                 Ordering::Greater => continue,
                                 Ordering::Equal => return Ok(Some(entry.node_id()?)),
                             }
                         }
                     }
                 }
             }
             // Tuple of (missing, found) paths in the manifest
             type ManifestQueryResponse<'a> = (Vec<(&'a HgPath, Node)>, Vec<&'a HgPath>);
             fn find_files_in_manifest<'query>(
                 manifest: &Manifest,
                 query: impl Iterator<Item = &'query HgPath>,
             ) -> Result<ManifestQueryResponse<'query>, HgError> {
                 let mut manifest = put_back(manifest.iter());
                 let mut res = vec![];
                 let mut missing = vec![];
                 for file in query {
                     match find_item(&mut manifest, file)? {
                         None => missing.push(file),
                         Some(item) => res.push((file, item)),
                     }
                 }
                 Ok((res, missing))
             }
             /// Output the given revision of files
             ///
             /// * `root`: Repository root
             /// * `rev`: The revision to cat the files from.
             /// * `files`: The files to output.
             pub fn cat<'a>(
                 repo: &Repo,
                 revset: &str,
                 mut files: Vec<&'a HgPath>,
             ) -> Result<CatOutput<'a>, RevlogError> {
                 let rev = crate::revset::resolve_single(revset, repo)?;
-                let manifest = repo.manifest_for_rev(rev)?;
+                let manifest = repo.manifest_for_rev(rev.into())?;
                 let node = *repo
                     .changelog()?
-                    .node_from_rev(rev)
+                    .node_from_rev(rev.into())
                     .expect("should succeed when repo.manifest did");
                 let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![];
                 let mut found_any = false;
                 files.sort_unstable();
                 let (found, missing) =
                     find_files_in_manifest(&manifest, files.into_iter())?;
                 for (file_path, file_node) in found {
                     found_any = true;
                     let file_log = repo.filelog(file_path)?;
                     results.push((
                         file_path,
                         file_log.data_for_node(file_node)?.into_file_data()?,
                     ));
                 }
                 Ok(CatOutput {
                     found_any,
                     results,
                     missing,
                     node,
                 })
             }

rust/hg-core/src/operations/debugdata.rs

0 +1 -1

             // debugdata.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::repo::Repo;
             use crate::requirements;
             use crate::revlog::{Revlog, RevlogError};
             /// Kind of data to debug
             #[derive(Debug, Copy, Clone)]
             pub enum DebugDataKind {
                 Changelog,
                 Manifest,
             }
             /// Dump the contents data of a revision.
             pub fn debug_data(
                 repo: &Repo,
                 revset: &str,
                 kind: DebugDataKind,
             ) -> Result<Vec<u8>, RevlogError> {
                 let index_file = match kind {
                     DebugDataKind::Changelog => "00changelog.i",
                     DebugDataKind::Manifest => "00manifest.i",
                 };
                 let use_nodemap = repo
                     .requirements()
                     .contains(requirements::NODEMAP_REQUIREMENT);
                 let revlog =
                     Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?;
                 let rev =
                     crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
-                let data = revlog.get_rev_data(rev)?;
+                let data = revlog.get_rev_data_for_checked_rev(rev)?;
                 Ok(data.into_owned())
             }

rust/hg-core/src/operations/list_tracked_files.rs

0 +1 -1

             // list_tracked_files.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::errors::HgError;
             use crate::matchers::Matcher;
             use crate::repo::Repo;
             use crate::revlog::manifest::Manifest;
             use crate::revlog::RevlogError;
             use crate::utils::filter_map_results;
             use crate::utils::hg_path::HgPath;
             /// List files under Mercurial control at a given revision.
             pub fn list_rev_tracked_files(
                 repo: &Repo,
                 revset: &str,
                 narrow_matcher: Box<dyn Matcher>,
             ) -> Result<FilesForRev, RevlogError> {
                 let rev = crate::revset::resolve_single(revset, repo)?;
                 Ok(FilesForRev {
-                    manifest: repo.manifest_for_rev(rev)?,
+                    manifest: repo.manifest_for_rev(rev.into())?,
                     narrow_matcher,
                 })
             }
             pub struct FilesForRev {
                 manifest: Manifest,
                 narrow_matcher: Box<dyn Matcher>,
             }
             impl FilesForRev {
                 pub fn iter(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> {
                     filter_map_results(self.manifest.iter(), |entry| {
                         let path = entry.path;
                         Ok(if self.narrow_matcher.matches(path) {
                             Some(path)
                         } else {
                             None
                         })
                     })
                 }
             }

rust/hg-core/src/repo.rs

0 +3 -3

             use crate::changelog::Changelog;
             use crate::config::{Config, ConfigError, ConfigParseError};
             use crate::dirstate::DirstateParents;
             use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
             use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
             use crate::dirstate_tree::owning::OwningDirstateMap;
             use crate::errors::HgResultExt;
             use crate::errors::{HgError, IoResultExt};
             use crate::lock::{try_with_lock_no_wait, LockError};
             use crate::manifest::{Manifest, Manifestlog};
             use crate::revlog::filelog::Filelog;
             use crate::revlog::RevlogError;
             use crate::utils::debug::debug_wait_for_file_or_print;
             use crate::utils::files::get_path_from_bytes;
             use crate::utils::hg_path::HgPath;
             use crate::utils::SliceExt;
             use crate::vfs::{is_dir, is_file, Vfs};
-            use crate::{requirements, NodePrefix};
+            use crate::DirstateError;
-            use crate::{DirstateError, Revision};
+            use crate::{requirements, NodePrefix, UncheckedRevision};
             use std::cell::{Ref, RefCell, RefMut};
             use std::collections::HashSet;
             use std::io::Seek;
             use std::io::SeekFrom;
             use std::io::Write as IoWrite;
             use std::path::{Path, PathBuf};
             const V2_MAX_READ_ATTEMPTS: usize = 5;
             type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
             /// A repository on disk
             pub struct Repo {
                 working_directory: PathBuf,
                 dot_hg: PathBuf,
                 store: PathBuf,
                 requirements: HashSet<String>,
                 config: Config,
                 dirstate_parents: LazyCell<DirstateParents>,
                 dirstate_map: LazyCell<OwningDirstateMap>,
                 changelog: LazyCell<Changelog>,
                 manifestlog: LazyCell<Manifestlog>,
             }
             #[derive(Debug, derive_more::From)]
             pub enum RepoError {
                 NotFound {
                     at: PathBuf,
                 },
                 #[from]
                 ConfigParseError(ConfigParseError),
                 #[from]
                 Other(HgError),
             }
             impl From<ConfigError> for RepoError {
                 fn from(error: ConfigError) -> Self {
                     match error {
                         ConfigError::Parse(error) => error.into(),
                         ConfigError::Other(error) => error.into(),
                     }
                 }
             }
             impl Repo {
                 /// tries to find nearest repository root in current working directory or
                 /// its ancestors
                 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
                     let current_directory = crate::utils::current_dir()?;
                     // ancestors() is inclusive: it first yields `current_directory`
                     // as-is.
                     for ancestor in current_directory.ancestors() {
                         if is_dir(ancestor.join(".hg"))? {
                             return Ok(ancestor.to_path_buf());
                         }
                     }
                     Err(RepoError::NotFound {
                         at: current_directory,
                     })
                 }
                 /// Find a repository, either at the given path (which must contain a `.hg`
                 /// sub-directory) or by searching the current directory and its
                 /// ancestors.
                 ///
                 /// A method with two very different "modes" like this usually a code smell
                 /// to make two methods instead, but in this case an `Option` is what rhg
                 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
                 /// Having two methods would just move that `if` to almost all callers.
                 pub fn find(
                     config: &Config,
                     explicit_path: Option<PathBuf>,
                 ) -> Result<Self, RepoError> {
                     if let Some(root) = explicit_path {
                         if is_dir(root.join(".hg"))? {
                             Self::new_at_path(root, config)
                         } else if is_file(&root)? {
                             Err(HgError::unsupported("bundle repository").into())
                         } else {
                             Err(RepoError::NotFound { at: root })
                         }
                     } else {
                         let root = Self::find_repo_root()?;
                         Self::new_at_path(root, config)
                     }
                 }
                 /// To be called after checking that `.hg` is a sub-directory
                 fn new_at_path(
                     working_directory: PathBuf,
                     config: &Config,
                 ) -> Result<Self, RepoError> {
                     let dot_hg = working_directory.join(".hg");
                     let mut repo_config_files =
                         vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
                     let hg_vfs = Vfs { base: &dot_hg };
                     let mut reqs = requirements::load_if_exists(hg_vfs)?;
                     let relative =
                         reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
                     let shared =
                         reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
                     // From `mercurial/localrepo.py`:
                     //
                     // if .hg/requires contains the sharesafe requirement, it means
                     // there exists a `.hg/store/requires` too and we should read it
                     // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
                     // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
                     // is not present, refer checkrequirementscompat() for that
                     //
                     // However, if SHARESAFE_REQUIREMENT is not present, it means that the
                     // repository was shared the old way. We check the share source
                     // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
                     // current repository needs to be reshared
                     let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
                     let store_path;
                     if !shared {
                         store_path = dot_hg.join("store");
                     } else {
                         let bytes = hg_vfs.read("sharedpath")?;
                         let mut shared_path =
                             get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
                                 .to_owned();
                         if relative {
                             shared_path = dot_hg.join(shared_path)
                         }
                         if !is_dir(&shared_path)? {
                             return Err(HgError::corrupted(format!(
                                 ".hg/sharedpath points to nonexistent directory {}",
                                 shared_path.display()
                             ))
                             .into());
                         }
                         store_path = shared_path.join("store");
                         let source_is_share_safe =
                             requirements::load(Vfs { base: &shared_path })?
                                 .contains(requirements::SHARESAFE_REQUIREMENT);
                         if share_safe != source_is_share_safe {
                             return Err(HgError::unsupported("share-safe mismatch").into());
                         }
                         if share_safe {
                             repo_config_files.insert(0, shared_path.join("hgrc"))
                         }
                     }
                     if share_safe {
                         reqs.extend(requirements::load(Vfs { base: &store_path })?);
                     }
                     let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
                         config.combine_with_repo(&repo_config_files)?
                     } else {
                         config.clone()
                     };
                     let repo = Self {
                         requirements: reqs,
                         working_directory,
                         store: store_path,
                         dot_hg,
                         config: repo_config,
                         dirstate_parents: LazyCell::new(),
                         dirstate_map: LazyCell::new(),
                         changelog: LazyCell::new(),
                         manifestlog: LazyCell::new(),
                     };
                     requirements::check(&repo)?;
                     Ok(repo)
                 }
                 pub fn working_directory_path(&self) -> &Path {
                     &self.working_directory
                 }
                 pub fn requirements(&self) -> &HashSet<String> {
                     &self.requirements
                 }
                 pub fn config(&self) -> &Config {
                     &self.config
                 }
                 /// For accessing repository files (in `.hg`), except for the store
                 /// (`.hg/store`).
                 pub fn hg_vfs(&self) -> Vfs<'_> {
                     Vfs { base: &self.dot_hg }
                 }
                 /// For accessing repository store files (in `.hg/store`)
                 pub fn store_vfs(&self) -> Vfs<'_> {
                     Vfs { base: &self.store }
                 }
                 /// For accessing the working copy
                 pub fn working_directory_vfs(&self) -> Vfs<'_> {
                     Vfs {
                         base: &self.working_directory,
                     }
                 }
                 pub fn try_with_wlock_no_wait<R>(
                     &self,
                     f: impl FnOnce() -> R,
                 ) -> Result<R, LockError> {
                     try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
                 }
                 /// Whether this repo should use dirstate-v2.
                 /// The presence of `dirstate-v2` in the requirements does not mean that
                 /// the on-disk dirstate is necessarily in version 2. In most cases,
                 /// a dirstate-v2 file will indeed be found, but in rare cases (like the
                 /// upgrade mechanism being cut short), the on-disk version will be a
                 /// v1 file.
                 /// Semantically, having a requirement only means that a client cannot
                 /// properly understand or properly update the repo if it lacks the support
                 /// for the required feature, but not that that feature is actually used
                 /// in all occasions.
                 pub fn use_dirstate_v2(&self) -> bool {
                     self.requirements
                         .contains(requirements::DIRSTATE_V2_REQUIREMENT)
                 }
                 pub fn has_sparse(&self) -> bool {
                     self.requirements.contains(requirements::SPARSE_REQUIREMENT)
                 }
                 pub fn has_narrow(&self) -> bool {
                     self.requirements.contains(requirements::NARROW_REQUIREMENT)
                 }
                 pub fn has_nodemap(&self) -> bool {
                     self.requirements
                         .contains(requirements::NODEMAP_REQUIREMENT)
                 }
                 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
                     Ok(self
                         .hg_vfs()
                         .read("dirstate")
                         .io_not_found_as_none()?
                         .unwrap_or_default())
                 }
                 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
                     use std::os::unix::fs::MetadataExt;
                     Ok(self
                         .hg_vfs()
                         .symlink_metadata("dirstate")
                         .io_not_found_as_none()?
                         .map(|meta| meta.ino()))
                 }
                 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
                     Ok(*self
                         .dirstate_parents
                         .get_or_init(|| self.read_dirstate_parents())?)
                 }
                 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
                     let dirstate = self.dirstate_file_contents()?;
                     let parents = if dirstate.is_empty() {
                         DirstateParents::NULL
                     } else if self.use_dirstate_v2() {
                         let docket_res =
                             crate::dirstate_tree::on_disk::read_docket(&dirstate);
                         match docket_res {
                             Ok(docket) => docket.parents(),
                             Err(_) => {
                                 log::info!(
                                     "Parsing dirstate docket failed, \
                                     falling back to dirstate-v1"
                                 );
                                 *crate::dirstate::parsers::parse_dirstate_parents(
                                     &dirstate,
                                 )?
                             }
                         }
                     } else {
                         *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
                     };
                     self.dirstate_parents.set(parents);
                     Ok(parents)
                 }
                 /// Returns the information read from the dirstate docket necessary to
                 /// check if the data file has been updated/deleted by another process
                 /// since we last read the dirstate.
                 /// Namely, the inode, data file uuid and the data size.
                 fn get_dirstate_data_file_integrity(
                     &self,
                 ) -> Result<DirstateMapIdentity, HgError> {
                     assert!(
                         self.use_dirstate_v2(),
                         "accessing dirstate data file ID without dirstate-v2"
                     );
                     // Get the identity before the contents since we could have a race
                     // between the two. Having an identity that is too old is fine, but
                     // one that is younger than the content change is bad.
                     let identity = self.dirstate_identity()?;
                     let dirstate = self.dirstate_file_contents()?;
                     if dirstate.is_empty() {
                         self.dirstate_parents.set(DirstateParents::NULL);
                         Ok((identity, None, 0))
                     } else {
                         let docket_res =
                             crate::dirstate_tree::on_disk::read_docket(&dirstate);
                         match docket_res {
                             Ok(docket) => {
                                 self.dirstate_parents.set(docket.parents());
                                 Ok((
                                     identity,
                                     Some(docket.uuid.to_owned()),
                                     docket.data_size(),
                                 ))
                             }
                             Err(_) => {
                                 log::info!(
                                     "Parsing dirstate docket failed, \
                                     falling back to dirstate-v1"
                                 );
                                 let parents =
                                     *crate::dirstate::parsers::parse_dirstate_parents(
                                         &dirstate,
                                     )?;
                                 self.dirstate_parents.set(parents);
                                 Ok((identity, None, 0))
                             }
                         }
                     }
                 }
                 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
                     if self.use_dirstate_v2() {
                         // The v2 dirstate is split into a docket and a data file.
                         // Since we don't always take the `wlock` to read it
                         // (like in `hg status`), it is susceptible to races.
                         // A simple retry method should be enough since full rewrites
                         // only happen when too much garbage data is present and
                         // this race is unlikely.
                         let mut tries = 0;
                         while tries < V2_MAX_READ_ATTEMPTS {
                             tries += 1;
                             match self.read_docket_and_data_file() {
                                 Ok(m) => {
                                     return Ok(m);
                                 }
                                 Err(e) => match e {
                                     DirstateError::Common(HgError::RaceDetected(
                                         context,
                                     )) => {
                                         log::info!(
                                             "dirstate read race detected {} (retry {}/{})",
                                             context,
                                             tries,
                                             V2_MAX_READ_ATTEMPTS,
                                         );
                                         continue;
                                     }
                                     _ => {
                                         log::info!(
                                             "Reading dirstate v2 failed, \
                                             falling back to v1"
                                         );
                                         return self.new_dirstate_map_v1();
                                     }
                                 },
                             }
                         }
                         let error = HgError::abort(
                             format!("dirstate read race happened {tries} times in a row"),
 ,
                             None,
                         );
                         Err(DirstateError::Common(error))
                     } else {
                         self.new_dirstate_map_v1()
                     }
                 }
                 fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
                     debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
                     let identity = self.dirstate_identity()?;
                     let dirstate_file_contents = self.dirstate_file_contents()?;
                     if dirstate_file_contents.is_empty() {
                         self.dirstate_parents.set(DirstateParents::NULL);
                         Ok(OwningDirstateMap::new_empty(Vec::new()))
                     } else {
                         let (map, parents) =
                             OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
                         self.dirstate_parents.set(parents);
                         Ok(map)
                     }
                 }
                 fn read_docket_and_data_file(
                     &self,
                 ) -> Result<OwningDirstateMap, DirstateError> {
                     debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
                     let dirstate_file_contents = self.dirstate_file_contents()?;
                     let identity = self.dirstate_identity()?;
                     if dirstate_file_contents.is_empty() {
                         self.dirstate_parents.set(DirstateParents::NULL);
                         return Ok(OwningDirstateMap::new_empty(Vec::new()));
                     }
                     let docket = crate::dirstate_tree::on_disk::read_docket(
                         &dirstate_file_contents,
                     )?;
                     debug_wait_for_file_or_print(
                         self.config(),
                         "dirstate.post-docket-read-file",
                     );
                     self.dirstate_parents.set(docket.parents());
                     let uuid = docket.uuid.to_owned();
                     let data_size = docket.data_size();
                     let context = "between reading dirstate docket and data file";
                     let race_error = HgError::RaceDetected(context.into());
                     let metadata = docket.tree_metadata();
                     let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
                         // Don't mmap on NFS to prevent `SIGBUS` error on deletion
                         let contents = self.hg_vfs().read(docket.data_filename());
                         let contents = match contents {
                             Ok(c) => c,
                             Err(HgError::IoError { error, context }) => {
                                 match error.raw_os_error().expect("real os error") {
                                     // 2 = ENOENT, No such file or directory
                                     // 116 = ESTALE, Stale NFS file handle
                                     //
                                     // TODO match on `error.kind()` when
                                     // `ErrorKind::StaleNetworkFileHandle` is stable.
 | 116 => {
                                         // Race where the data file was deleted right after
                                         // we read the docket, try again
                                         return Err(race_error.into());
                                     }
                                     _ => {
                                         return Err(
                                             HgError::IoError { error, context }.into()
                                         )
                                     }
                                 }
                             }
                             Err(e) => return Err(e.into()),
                         };
                         OwningDirstateMap::new_v2(
                             contents, data_size, metadata, uuid, identity,
                         )
                     } else {
                         match self
                             .hg_vfs()
                             .mmap_open(docket.data_filename())
                             .io_not_found_as_none()
                         {
                             Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
                                 data_mmap, data_size, metadata, uuid, identity,
                             ),
                             Ok(None) => {
                                 // Race where the data file was deleted right after we
                                 // read the docket, try again
                                 return Err(race_error.into());
                             }
                             Err(e) => return Err(e.into()),
                         }
                     }?;
                     let write_mode_config = self
                         .config()
                         .get_str(b"devel", b"dirstate.v2.data_update_mode")
                         .unwrap_or(Some("auto"))
                         .unwrap_or("auto"); // don't bother for devel options
                     let write_mode = match write_mode_config {
                         "auto" => DirstateMapWriteMode::Auto,
                         "force-new" => DirstateMapWriteMode::ForceNewDataFile,
                         "force-append" => DirstateMapWriteMode::ForceAppend,
                         _ => DirstateMapWriteMode::Auto,
                     };
                     map.with_dmap_mut(|m| m.set_write_mode(write_mode));
                     Ok(map)
                 }
                 pub fn dirstate_map(
                     &self,
                 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
                     self.dirstate_map.get_or_init(|| self.new_dirstate_map())
                 }
                 pub fn dirstate_map_mut(
                     &self,
                 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
                     self.dirstate_map
                         .get_mut_or_init(|| self.new_dirstate_map())
                 }
                 fn new_changelog(&self) -> Result<Changelog, HgError> {
                     Changelog::open(&self.store_vfs(), self.has_nodemap())
                 }
                 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
                     self.changelog.get_or_init(|| self.new_changelog())
                 }
                 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
                     self.changelog.get_mut_or_init(|| self.new_changelog())
                 }
                 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
                     Manifestlog::open(&self.store_vfs(), self.has_nodemap())
                 }
                 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
                     self.manifestlog.get_or_init(|| self.new_manifestlog())
                 }
                 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
                     self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
                 }
                 /// Returns the manifest of the *changeset* with the given node ID
                 pub fn manifest_for_node(
                     &self,
                     node: impl Into<NodePrefix>,
                 ) -> Result<Manifest, RevlogError> {
                     self.manifestlog()?.data_for_node(
                         self.changelog()?
                             .data_for_node(node.into())?
                             .manifest_node()?
                             .into(),
                     )
                 }
                 /// Returns the manifest of the *changeset* with the given revision number
                 pub fn manifest_for_rev(
                     &self,
-                    revision: Revision,
+                    revision: UncheckedRevision,
                 ) -> Result<Manifest, RevlogError> {
                     self.manifestlog()?.data_for_node(
                         self.changelog()?
                             .data_for_rev(revision)?
                             .manifest_node()?
                             .into(),
                     )
                 }
                 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
                     if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
                         Ok(entry.tracked())
                     } else {
                         Ok(false)
                     }
                 }
                 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
                     Filelog::open(self, path)
                 }
                 /// Write to disk any updates that were made through `dirstate_map_mut`.
                 ///
                 /// The "wlock" must be held while calling this.
                 /// See for example `try_with_wlock_no_wait`.
                 ///
                 /// TODO: have a `WritableRepo` type only accessible while holding the
                 /// lock?
                 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
                     let map = self.dirstate_map()?;
                     // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
                     // it’s unset
                     let parents = self.dirstate_parents()?;
                     let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
                         let (identity, uuid, data_size) =
                             self.get_dirstate_data_file_integrity()?;
                         let identity_changed = identity != map.old_identity();
                         let uuid_changed = uuid.as_deref() != map.old_uuid();
                         let data_length_changed = data_size != map.old_data_size();
                         if identity_changed || uuid_changed || data_length_changed {
                             // If any of identity, uuid or length have changed since
                             // last disk read, don't write.
                             // This is fine because either we're in a command that doesn't
                             // write anything too important (like `hg status`), or we're in
                             // `hg add` and we're supposed to have taken the lock before
                             // reading anyway.
                             //
                             // TODO complain loudly if we've changed anything important
                             // without taking the lock.
                             // (see `hg help config.format.use-dirstate-tracked-hint`)
                             log::debug!(
                                 "dirstate has changed since last read, not updating."
                             );
                             return Ok(());
                         }
                         let uuid_opt = map.old_uuid();
                         let write_mode = if uuid_opt.is_some() {
                             DirstateMapWriteMode::Auto
                         } else {
                             DirstateMapWriteMode::ForceNewDataFile
                         };
                         let (data, tree_metadata, append, old_data_size) =
                             map.pack_v2(write_mode)?;
                         // Reuse the uuid, or generate a new one, keeping the old for
                         // deletion.
                         let (uuid, old_uuid) = match uuid_opt {
                             Some(uuid) => {
                                 let as_str = std::str::from_utf8(uuid)
                                     .map_err(|_| {
                                         HgError::corrupted(
                                             "non-UTF-8 dirstate data file ID",
                                         )
                                     })?
                                     .to_owned();
                                 if append {
                                     (as_str, None)
                                 } else {
                                     (DirstateDocket::new_uid(), Some(as_str))
                                 }
                             }
                             None => (DirstateDocket::new_uid(), None),
                         };
                         let data_filename = format!("dirstate.{}", uuid);
                         let data_filename = self.hg_vfs().join(data_filename);
                         let mut options = std::fs::OpenOptions::new();
                         options.write(true);
                         // Why are we not using the O_APPEND flag when appending?
                         //
                         // - O_APPEND makes it trickier to deal with garbage at the end of
                         //   the file, left by a previous uncommitted transaction. By
                         //   starting the write at [old_data_size] we make sure we erase
                         //   all such garbage.
                         //
                         // - O_APPEND requires to special-case 0-byte writes, whereas we
                         //   don't need that.
                         //
                         // - Some OSes have bugs in implementation O_APPEND:
                         //   revlog.py talks about a Solaris bug, but we also saw some ZFS
                         //   bug: https://github.com/openzfs/zfs/pull/3124,
                         //   https://github.com/openzfs/zfs/issues/13370
                         //
                         if !append {
                             log::trace!("creating a new dirstate data file");
                             options.create_new(true);
                         } else {
                             log::trace!("appending to the dirstate data file");
                         }
                         let data_size = (|| {
                             // TODO: loop and try another random ID if !append and this
                             // returns `ErrorKind::AlreadyExists`? Collision chance of two
                             // random IDs is one in 2**32
                             let mut file = options.open(&data_filename)?;
                             if append {
                                 file.seek(SeekFrom::Start(old_data_size as u64))?;
                             }
                             file.write_all(&data)?;
                             file.flush()?;
                             file.seek(SeekFrom::Current(0))
                         })()
                         .when_writing_file(&data_filename)?;
                         let packed_dirstate = DirstateDocket::serialize(
                             parents,
                             tree_metadata,
                             data_size,
                             uuid.as_bytes(),
                         )
                         .map_err(|_: std::num::TryFromIntError| {
                             HgError::corrupted("overflow in dirstate docket serialization")
                         })?;
                         (packed_dirstate, old_uuid)
                     } else {
                         let identity = self.dirstate_identity()?;
                         if identity != map.old_identity() {
                             // If identity changed since last disk read, don't write.
                             // This is fine because either we're in a command that doesn't
                             // write anything too important (like `hg status`), or we're in
                             // `hg add` and we're supposed to have taken the lock before
                             // reading anyway.
                             //
                             // TODO complain loudly if we've changed anything important
                             // without taking the lock.
                             // (see `hg help config.format.use-dirstate-tracked-hint`)
                             log::debug!(
                                 "dirstate has changed since last read, not updating."
                             );
                             return Ok(());
                         }
                         (map.pack_v1(parents)?, None)
                     };
                     let vfs = self.hg_vfs();
                     vfs.atomic_write("dirstate", &packed_dirstate)?;
                     if let Some(uuid) = old_uuid_to_remove {
                         // Remove the old data file after the new docket pointing to the
                         // new data file was written.
                         vfs.remove_file(format!("dirstate.{}", uuid))?;
                     }
                     Ok(())
                 }
             }
             /// Lazily-initialized component of `Repo` with interior mutability
             ///
             /// This differs from `OnceCell` in that the value can still be "deinitialized"
             /// later by setting its inner `Option` to `None`. It also takes the
             /// initialization function as an argument when the value is requested, not
             /// when the instance is created.
             struct LazyCell<T> {
                 value: RefCell<Option<T>>,
             }
             impl<T> LazyCell<T> {
                 fn new() -> Self {
                     Self {
                         value: RefCell::new(None),
                     }
                 }
                 fn set(&self, value: T) {
                     *self.value.borrow_mut() = Some(value)
                 }
                 fn get_or_init<E>(
                     &self,
                     init: impl Fn() -> Result<T, E>,
                 ) -> Result<Ref<T>, E> {
                     let mut borrowed = self.value.borrow();
                     if borrowed.is_none() {
                         drop(borrowed);
                         // Only use `borrow_mut` if it is really needed to avoid panic in
                         // case there is another outstanding borrow but mutation is not
                         // needed.
                         *self.value.borrow_mut() = Some(init()?);
                         borrowed = self.value.borrow()
                     }
                     Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
                 }
                 fn get_mut_or_init<E>(
                     &self,
                     init: impl Fn() -> Result<T, E>,
                 ) -> Result<RefMut<T>, E> {
                     let mut borrowed = self.value.borrow_mut();
                     if borrowed.is_none() {
                         *borrowed = Some(init()?);
                     }
                     Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
                 }
             }

rust/hg-core/src/revlog/changelog.rs

0 +16 -6

             use crate::errors::HgError;
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             use crate::revlog::{Revlog, RevlogEntry, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::vfs::Vfs;
+            use crate::UncheckedRevision;
             use itertools::Itertools;
             use std::ascii::escape_default;
             use std::borrow::Cow;
             use std::fmt::{Debug, Formatter};
             /// A specialized `Revlog` to work with changelog data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
                     let revlog =
                         Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogRevisionData` for the given node ID.
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
-                    self.data_for_rev(rev)
+                    self.entry_for_checked_rev(rev)?.data()
                 }
                 /// Return the [`ChangelogEntry`] for the given revision number.
                 pub fn entry_for_rev(
                     &self,
+                    rev: UncheckedRevision,
+                ) -> Result<ChangelogEntry, RevlogError> {
+                    let revlog_entry = self.revlog.get_entry(rev)?;
+                    Ok(ChangelogEntry { revlog_entry })
+                }
+                /// Same as [`Self::entry_for_rev`] for checked revisions.
+                fn entry_for_checked_rev(
+                    &self,
                     rev: Revision,
                 ) -> Result<ChangelogEntry, RevlogError> {
-                    let revlog_entry = self.revlog.get_entry(rev)?;
+                    let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Return the [`ChangelogRevisionData`] for the given revision number.
                 ///
                 /// This is a useful shortcut in case the caller does not need the
                 /// generic revlog information (parents, hashes etc). Otherwise
                 /// consider taking a [`ChangelogEntry`] with
                 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
                 pub fn data_for_rev(
                     &self,
-                    rev: Revision,
+                    rev: UncheckedRevision,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     self.entry_for_rev(rev)?.data()
                 }
-                pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
+                pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                     self.revlog.node_from_rev(rev)
                 }
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     self.revlog.rev_from_node(node)
                 }
             }
             /// A specialized `RevlogEntry` for `changelog` data format
             ///
             /// This is a `RevlogEntry` with the added semantics that the associated
             /// data should meet the requirements for `changelog`, materialized by
             /// the fact that `data()` constructs a `ChangelogRevisionData`.
             /// In case that promise would be broken, the `data` method returns an error.
             #[derive(Clone)]
             pub struct ChangelogEntry<'changelog> {
                 /// Same data, as a generic `RevlogEntry`.
                 pub(crate) revlog_entry: RevlogEntry<'changelog>,
             }
             impl<'changelog> ChangelogEntry<'changelog> {
                 pub fn data<'a>(
                     &'a self,
                 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
                     let bytes = self.revlog_entry.data()?;
                     if bytes.is_empty() {
                         Ok(ChangelogRevisionData::null())
                     } else {
                         Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                             RevlogError::Other(HgError::CorruptedRepository(format!(
                                 "Invalid changelog data for revision {}: {:?}",
                                 self.revlog_entry.revision(),
                                 err
                             )))
                         })?)
                     }
                 }
                 /// Obtain a reference to the underlying `RevlogEntry`.
                 ///
                 /// This allows the caller to access the information that is common
                 /// to all revlog entries: revision number, node id, parent revisions etc.
                 pub fn as_revlog_entry(&self) -> &RevlogEntry {
                     &self.revlog_entry
                 }
                 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p1_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
                 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p2_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(PartialEq)]
             pub struct ChangelogRevisionData<'changelog> {
                 /// The data bytes of the `changelog` entry.
                 bytes: Cow<'changelog, [u8]>,
                 /// The end offset for the hex manifest (not including the newline)
                 manifest_end: usize,
                 /// The end offset for the user+email (not including the newline)
                 user_end: usize,
                 /// The end offset for the timestamp+timezone+extras (not including the
                 /// newline)
                 timestamp_end: usize,
                 /// The end offset for the file list (not including the newline)
                 files_end: usize,
             }
             impl<'changelog> ChangelogRevisionData<'changelog> {
                 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
                     let mut line_iter = bytes.split(|b| b == &b'\n');
                     let manifest_end = line_iter
                         .next()
                         .expect("Empty iterator from split()?")
                         .len();
                     let user_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after manifest line")
                     })?;
                     let user_end = manifest_end + 1 + user_slice.len();
                     let timestamp_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after user line")
                     })?;
                     let timestamp_end = user_end + 1 + timestamp_slice.len();
                     let mut files_end = timestamp_end + 1;
                     loop {
                         let line = line_iter.next().ok_or_else(|| {
                             HgError::corrupted("Changeset data truncated in files list")
                         })?;
                         if line.is_empty() {
                             if files_end == bytes.len() {
                                 // The list of files ended with a single newline (there
                                 // should be two)
                                 return Err(HgError::corrupted(
                                     "Changeset data truncated after files list",
                                 ));
                             }
                             files_end -= 1;
                             break;
                         }
                         files_end += line.len() + 1;
                     }
                     Ok(Self {
                         bytes,
                         manifest_end,
                         user_end,
                         timestamp_end,
                         files_end,
                     })
                 }
                 fn null() -> Self {
                     Self::new(Cow::Borrowed(
                         b"0000000000000000000000000000000000000000\n\n0 0\n\n",
                     ))
                     .unwrap()
                 }
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes.split(|b| b == &b'\n')
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<Node, HgError> {
                     let manifest_node_hex = &self.bytes[..self.manifest_end];
                     Node::from_hex_for_repo(manifest_node_hex)
                 }
                 /// The full user string (usually a name followed by an email enclosed in
                 /// angle brackets)
                 pub fn user(&self) -> &[u8] {
                     &self.bytes[self.manifest_end + 1..self.user_end]
                 }
                 /// The full timestamp line (timestamp in seconds, offset in seconds, and
                 /// possibly extras)
                 // TODO: We should expose this in a more useful way
                 pub fn timestamp_line(&self) -> &[u8] {
                     &self.bytes[self.user_end + 1..self.timestamp_end]
                 }
                 /// The files changed in this revision.
                 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                     self.bytes[self.timestamp_end + 1..self.files_end]
                         .split(|b| b == &b'\n')
                         .map(HgPath::new)
                 }
                 /// The change description.
                 pub fn description(&self) -> &[u8] {
                     &self.bytes[self.files_end + 2..]
                 }
             }
             impl Debug for ChangelogRevisionData<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("ChangelogRevisionData")
                         .field("bytes", &debug_bytes(&self.bytes))
                         .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                         .field(
                             "user",
                             &debug_bytes(
                                 &self.bytes[self.manifest_end + 1..self.user_end],
                             ),
                         )
                         .field(
                             "timestamp",
                             &debug_bytes(
                                 &self.bytes[self.user_end + 1..self.timestamp_end],
                             ),
                         )
                         .field(
                             "files",
                             &debug_bytes(
                                 &self.bytes[self.timestamp_end + 1..self.files_end],
                             ),
                         )
                         .field(
                             "description",
                             &debug_bytes(&self.bytes[self.files_end + 2..]),
                         )
                         .finish()
                 }
             }
             fn debug_bytes(bytes: &[u8]) -> String {
                 String::from_utf8_lossy(
                     &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                 )
                 .to_string()
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::vfs::Vfs;
                 use crate::NULL_REVISION;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn test_create_changelogrevisiondata_invalid() {
                     // Completely empty
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after manifest
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after user
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
                     // No newline after timestamp
                     assert!(
                         ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
                     );
                     // Missing newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2"
                     ))
                     .is_err(),);
                     // Only one newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2\n"
                     ))
                     .is_err(),);
                 }
                 #[test]
                 fn test_create_changelogrevisiondata() {
                     let data = ChangelogRevisionData::new(Cow::Borrowed(
                         b"0123456789abcdef0123456789abcdef01234567
             Some One <someone@example.com>
 0
             file1
             file2
             some
             commit
             message",
                     ))
                     .unwrap();
                     assert_eq!(
                         data.manifest_node().unwrap(),
                         Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                             .unwrap()
                     );
                     assert_eq!(data.user(), b"Some One <someone@example.com>");
                     assert_eq!(data.timestamp_line(), b"0 0");
                     assert_eq!(
                         data.files().collect_vec(),
                         vec![HgPath::new("file1"), HgPath::new("file2")]
                     );
                     assert_eq!(data.description(), b"some\ncommit\nmessage");
                 }
                 #[test]
                 fn test_data_from_rev_null() -> Result<(), RevlogError> {
                     // an empty revlog will be enough for this case
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                     let changelog = Changelog { revlog };
                     assert_eq!(
-                        changelog.data_for_rev(NULL_REVISION)?,
+                        changelog.data_for_rev(NULL_REVISION.into())?,
                         ChangelogRevisionData::null()
                     );
                     // same with the intermediate entry object
                     assert_eq!(
-                        changelog.entry_for_rev(NULL_REVISION)?.data()?,
+                        changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
                         ChangelogRevisionData::null()
                     );
                     Ok(())
                 }
             }

rust/hg-core/src/revlog/filelog.rs

0 +28 -5

             use crate::errors::HgError;
+            use crate::exit_codes;
             use crate::repo::Repo;
             use crate::revlog::path_encode::path_encode;
             use crate::revlog::NodePrefix;
             use crate::revlog::Revision;
             use crate::revlog::RevlogEntry;
             use crate::revlog::{Revlog, RevlogError};
             use crate::utils::files::get_path_from_bytes;
             use crate::utils::hg_path::HgPath;
             use crate::utils::SliceExt;
+            use crate::UncheckedRevision;
             use std::path::PathBuf;
             /// A specialized `Revlog` to work with file data logs.
             pub struct Filelog {
                 /// The generic `revlog` format.
                 revlog: Revlog,
             }
             impl Filelog {
                 pub fn open_vfs(
                     store_vfs: &crate::vfs::Vfs<'_>,
                     file_path: &HgPath,
                 ) -> Result<Self, HgError> {
                     let index_path = store_path(file_path, b".i");
                     let data_path = store_path(file_path, b".d");
                     let revlog =
                         Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
                     Ok(Self { revlog })
                 }
                 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
                     Self::open_vfs(&repo.store_vfs(), file_path)
                 }
                 /// The given node ID is that of the file as found in a filelog, not of a
                 /// changeset.
                 pub fn data_for_node(
                     &self,
                     file_node: impl Into<NodePrefix>,
                 ) -> Result<FilelogRevisionData, RevlogError> {
                     let file_rev = self.revlog.rev_from_node(file_node.into())?;
-                    self.data_for_rev(file_rev)
+                    self.data_for_rev(file_rev.into())
                 }
                 /// The given revision is that of the file as found in a filelog, not of a
                 /// changeset.
                 pub fn data_for_rev(
                     &self,
-                    file_rev: Revision,
+                    file_rev: UncheckedRevision,
                 ) -> Result<FilelogRevisionData, RevlogError> {
                     let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
                     Ok(FilelogRevisionData(data))
                 }
                 /// The given node ID is that of the file as found in a filelog, not of a
                 /// changeset.
                 pub fn entry_for_node(
                     &self,
                     file_node: impl Into<NodePrefix>,
                 ) -> Result<FilelogEntry, RevlogError> {
                     let file_rev = self.revlog.rev_from_node(file_node.into())?;
-                    self.entry_for_rev(file_rev)
+                    self.entry_for_checked_rev(file_rev)
                 }
                 /// The given revision is that of the file as found in a filelog, not of a
                 /// changeset.
                 pub fn entry_for_rev(
                     &self,
+                    file_rev: UncheckedRevision,
+                ) -> Result<FilelogEntry, RevlogError> {
+                    Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
+                }
+                fn entry_for_checked_rev(
+                    &self,
                     file_rev: Revision,
                 ) -> Result<FilelogEntry, RevlogError> {
-                    Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
+                    Ok(FilelogEntry(
+                        self.revlog.get_entry_for_checked_rev(file_rev)?,
+                    ))
                 }
             }
             fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
                 let encoded_bytes =
                     path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
                 get_path_from_bytes(&encoded_bytes).into()
             }
             pub struct FilelogEntry<'a>(RevlogEntry<'a>);
             impl FilelogEntry<'_> {
                 /// `self.data()` can be expensive, with decompression and delta
                 /// resolution.
                 ///
                 /// *Without* paying this cost, based on revlog index information
                 /// including `RevlogEntry::uncompressed_len`:
                 ///
                 /// * Returns `true` if the length that `self.data().file_data().len()`
                 ///   would return is definitely **not equal** to `other_len`.
                 /// * Returns `false` if available information is inconclusive.
                 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
                     // Relevant code that implement this behavior in Python code:
                     // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
                     // revlog.size, revlog.rawsize
                     // Let’s call `file_data_len` what would be returned by
                     // `self.data().file_data().len()`.
                     if self.0.is_censored() {
                         let file_data_len = 0;
                         return other_len != file_data_len;
                     }
                     if self.0.has_length_affecting_flag_processor() {
                         // We can’t conclude anything about `file_data_len`.
                         return false;
                     }
                     // Revlog revisions (usually) have metadata for the size of
                     // their data after decompression and delta resolution
                     // as would be returned by `Revlog::get_rev_data`.
                     //
                     // For filelogs this is the file’s contents preceded by an optional
                     // metadata block.
                     let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
                         l as u64
                     } else {
                         // The field was set to -1, the actual uncompressed len is unknown.
                         // We need to decompress to say more.
                         return false;
                     };
                     // `uncompressed_len = file_data_len + optional_metadata_len`,
                     // so `file_data_len <= uncompressed_len`.
                     if uncompressed_len < other_len {
                         // Transitively, `file_data_len < other_len`.
                         // So `other_len != file_data_len` definitely.
                         return true;
                     }
                     if uncompressed_len == other_len + 4 {
                         // It’s possible that `file_data_len == other_len` with an empty
                         // metadata block (2 start marker bytes + 2 end marker bytes).
                         // This happens when there wouldn’t otherwise be metadata, but
                         // the first 2 bytes of file data happen to match a start marker
                         // and would be ambiguous.
                         return false;
                     }
                     if !self.0.has_p1() {
                         // There may or may not be copy metadata, so we can’t deduce more
                         // about `file_data_len` without computing file data.
                         return false;
                     }
                     // Filelog ancestry is not meaningful in the way changelog ancestry is.
                     // It only provides hints to delta generation.
                     // p1 and p2 are set to null when making a copy or rename since
                     // contents are likely unrelatedto what might have previously existed
                     // at the destination path.
                     //
                     // Conversely, since here p1 is non-null, there is no copy metadata.
                     // Note that this reasoning may be invalidated in the presence of
                     // merges made by some previous versions of Mercurial that
                     // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
                     // and `tests/test-issue6528.t`.
                     //
                     // Since copy metadata is currently the only kind of metadata
                     // kept in revlog data of filelogs,
                     // this `FilelogEntry` does not have such metadata:
                     let file_data_len = uncompressed_len;
                     file_data_len != other_len
                 }
                 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
-                    Ok(FilelogRevisionData(self.0.data()?.into_owned()))
+                    let data = self.0.data();
+                    match data {
+                        Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
+                        // Errors other than `HgError` should not happen at this point
+                        Err(e) => match e {
+                            RevlogError::Other(hg_error) => Err(hg_error),
+                            revlog_error => Err(HgError::abort(
+                                revlog_error.to_string(),
+                                exit_codes::ABORT,
+                                None,
+                            )),
+                        },
+                    }
                 }
             }
             /// The data for one revision in a filelog, uncompressed and delta-resolved.
             pub struct FilelogRevisionData(Vec<u8>);
             impl FilelogRevisionData {
                 /// Split into metadata and data
                 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
                     const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
                     if let Some(rest) = self.0.drop_prefix(DELIMITER) {
                         if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
                             Ok((Some(metadata), data))
                         } else {
                             Err(HgError::corrupted(
                                 "Missing metadata end delimiter in filelog entry",
                             ))
                         }
                     } else {
                         Ok((None, &self.0))
                     }
                 }
                 /// Returns the file contents at this revision, stripped of any metadata
                 pub fn file_data(&self) -> Result<&[u8], HgError> {
                     let (_metadata, data) = self.split()?;
                     Ok(data)
                 }
                 /// Consume the entry, and convert it into data, discarding any metadata,
                 /// if present.
                 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
                     if let (Some(_metadata), data) = self.split()? {
                         Ok(data.to_owned())
                     } else {
                         Ok(self.0)
                     }
                 }
             }

rust/hg-core/src/revlog/index.rs

0 +33 -26

+            use std::fmt::Debug;
             use std::ops::Deref;
             use byteorder::{BigEndian, ByteOrder};
             use crate::errors::HgError;
             use crate::revlog::node::Node;
             use crate::revlog::{Revision, NULL_REVISION};
+            use crate::UncheckedRevision;
             pub const INDEX_ENTRY_SIZE: usize = 64;
             pub struct IndexHeader {
                 header_bytes: [u8; 4],
             }
             #[derive(Copy, Clone)]
             pub struct IndexHeaderFlags {
                 flags: u16,
             }
             /// Corresponds to the high bits of `_format_flags` in python
             impl IndexHeaderFlags {
                 /// Corresponds to FLAG_INLINE_DATA in python
                 pub fn is_inline(self) -> bool {
                     self.flags & 1 != 0
                 }
                 /// Corresponds to FLAG_GENERALDELTA in python
                 pub fn uses_generaldelta(self) -> bool {
                     self.flags & 2 != 0
                 }
             }
             /// Corresponds to the INDEX_HEADER structure,
             /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
             impl IndexHeader {
                 fn format_flags(&self) -> IndexHeaderFlags {
                     // No "unknown flags" check here, unlike in python. Maybe there should
                     // be.
                     IndexHeaderFlags {
                         flags: BigEndian::read_u16(&self.header_bytes[0..2]),
                     }
                 }
                 /// The only revlog version currently supported by rhg.
                 const REVLOGV1: u16 = 1;
                 /// Corresponds to `_format_version` in Python.
                 fn format_version(&self) -> u16 {
                     BigEndian::read_u16(&self.header_bytes[2..4])
                 }
                 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
                     // We treat an empty file as a valid index with no entries.
                     // Here we make an arbitrary choice of what we assume the format of the
                     // index to be (V1, using generaldelta).
                     // This doesn't matter too much, since we're only doing read-only
                     // access. but the value corresponds to the `new_header` variable in
                     // `revlog.py`, `_loadindex`
                     header_bytes: [0, 3, 0, 1],
                 };
                 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
                     if index_bytes.is_empty() {
                         return Ok(IndexHeader::EMPTY_INDEX_HEADER);
                     }
                     if index_bytes.len() < 4 {
                         return Err(HgError::corrupted(
                             "corrupted revlog: can't read the index format header",
                         ));
                     }
                     Ok(IndexHeader {
                         header_bytes: {
                             let bytes: [u8; 4] =
                                 index_bytes[0..4].try_into().expect("impossible");
                             bytes
                         },
                     })
                 }
             }
             /// A Revlog index
             pub struct Index {
                 bytes: Box<dyn Deref<Target = [u8]> + Send>,
                 /// Offsets of starts of index blocks.
                 /// Only needed when the index is interleaved with data.
                 offsets: Option<Vec<usize>>,
                 uses_generaldelta: bool,
             }
+            impl Debug for Index {
+                fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                    f.debug_struct("Index")
+                        .field("offsets", &self.offsets)
+                        .field("uses_generaldelta", &self.uses_generaldelta)
+                        .finish()
+                }
+            }
             impl Index {
                 /// Create an index from bytes.
                 /// Calculate the start of each entry when is_inline is true.
                 pub fn new(
                     bytes: Box<dyn Deref<Target = [u8]> + Send>,
                 ) -> Result<Self, HgError> {
                     let header = IndexHeader::parse(bytes.as_ref())?;
                     if header.format_version() != IndexHeader::REVLOGV1 {
                         // A proper new version should have had a repo/store
                         // requirement.
                         return Err(HgError::corrupted("unsupported revlog version"));
                     }
                     // This is only correct because we know version is REVLOGV1.
                     // In v2 we always use generaldelta, while in v0 we never use
                     // generaldelta. Similar for [is_inline] (it's only used in v1).
                     let uses_generaldelta = header.format_flags().uses_generaldelta();
                     if header.format_flags().is_inline() {
                         let mut offset: usize = 0;
                         let mut offsets = Vec::new();
                         while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                             offsets.push(offset);
                             let end = offset + INDEX_ENTRY_SIZE;
                             let entry = IndexEntry {
                                 bytes: &bytes[offset..end],
                                 offset_override: None,
                             };
                             offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
                         }
                         if offset == bytes.len() {
                             Ok(Self {
                                 bytes,
                                 offsets: Some(offsets),
                                 uses_generaldelta,
                             })
                         } else {
                             Err(HgError::corrupted("unexpected inline revlog length"))
                         }
                     } else {
                         Ok(Self {
                             bytes,
                             offsets: None,
                             uses_generaldelta,
                         })
                     }
                 }
                 pub fn uses_generaldelta(&self) -> bool {
                     self.uses_generaldelta
                 }
                 /// Value of the inline flag.
                 pub fn is_inline(&self) -> bool {
                     self.offsets.is_some()
                 }
                 /// Return a slice of bytes if `revlog` is inline. Panic if not.
                 pub fn data(&self, start: usize, end: usize) -> &[u8] {
                     if !self.is_inline() {
                         panic!("tried to access data in the index of a revlog that is not inline");
                     }
                     &self.bytes[start..end]
                 }
                 /// Return number of entries of the revlog index.
                 pub fn len(&self) -> usize {
                     if let Some(offsets) = &self.offsets {
                         offsets.len()
                     } else {
                         self.bytes.len() / INDEX_ENTRY_SIZE
                     }
                 }
                 /// Returns `true` if the `Index` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 /// Return the index entry corresponding to the given revision if it
                 /// exists.
                 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
                     if rev == NULL_REVISION {
                         return None;
                     }
-                    if let Some(offsets) = &self.offsets {
+                    Some(if let Some(offsets) = &self.offsets {
                         self.get_entry_inline(rev, offsets)
                     } else {
                         self.get_entry_separated(rev)
+                    })
                 }
                 fn get_entry_inline(
                     &self,
                     rev: Revision,
                     offsets: &[usize],
-                ) -> Option<IndexEntry> {
+                ) -> IndexEntry {
-                    let start = *offsets.get(rev as usize)?;
+                    let start = offsets[rev as usize];
-                    let end = start.checked_add(INDEX_ENTRY_SIZE)?;
+                    let end = start + INDEX_ENTRY_SIZE;
                     let bytes = &self.bytes[start..end];
                     // See IndexEntry for an explanation of this override.
                     let offset_override = Some(end);
-                    Some(IndexEntry {
+                    IndexEntry {
                         bytes,
                         offset_override,
-                    })
                 }
-                fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
+                fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
-                    let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
-                    if rev as usize >= max_rev {
-                        return None;
                     let start = rev as usize * INDEX_ENTRY_SIZE;
                     let end = start + INDEX_ENTRY_SIZE;
                     let bytes = &self.bytes[start..end];
                     // Override the offset of the first revision as its bytes are used
                     // for the index's metadata (saving space because it is always 0)
                     let offset_override = if rev == 0 { Some(0) } else { None };
-                    Some(IndexEntry {
+                    IndexEntry {
                         bytes,
                         offset_override,
-                    })
                 }
             }
             impl super::RevlogIndex for Index {
                 fn len(&self) -> usize {
                     self.len()
                 }
                 fn node(&self, rev: Revision) -> Option<&Node> {
                     self.get_entry(rev).map(|entry| entry.hash())
                 }
             }
             #[derive(Debug)]
             pub struct IndexEntry<'a> {
                 bytes: &'a [u8],
                 /// Allows to override the offset value of the entry.
                 ///
                 /// For interleaved index and data, the offset stored in the index
                 /// corresponds to the separated data offset.
                 /// It has to be overridden with the actual offset in the interleaved
                 /// index which is just after the index block.
                 ///
                 /// For separated index and data, the offset stored in the first index
                 /// entry is mixed with the index headers.
                 /// It has to be overridden with 0.
                 offset_override: Option<usize>,
             }
             impl<'a> IndexEntry<'a> {
                 /// Return the offset of the data.
                 pub fn offset(&self) -> usize {
                     if let Some(offset_override) = self.offset_override {
                         offset_override
                     } else {
                         let mut bytes = [0; 8];
                         bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
                         BigEndian::read_u64(&bytes[..]) as usize
                     }
                 }
                 pub fn flags(&self) -> u16 {
                     BigEndian::read_u16(&self.bytes[6..=7])
                 }
                 /// Return the compressed length of the data.
                 pub fn compressed_len(&self) -> u32 {
                     BigEndian::read_u32(&self.bytes[8..=11])
                 }
                 /// Return the uncompressed length of the data.
                 pub fn uncompressed_len(&self) -> i32 {
                     BigEndian::read_i32(&self.bytes[12..=15])
                 }
                 /// Return the revision upon which the data has been derived.
-                pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
+                pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
                     // TODO Maybe return an Option when base_revision == rev?
                     //      Requires to add rev to IndexEntry
-                    BigEndian::read_i32(&self.bytes[16..])
+                    BigEndian::read_i32(&self.bytes[16..]).into()
                 }
-                pub fn link_revision(&self) -> Revision {
+                pub fn link_revision(&self) -> UncheckedRevision {
-                    BigEndian::read_i32(&self.bytes[20..])
+                    BigEndian::read_i32(&self.bytes[20..]).into()
                 }
-                pub fn p1(&self) -> Revision {
+                pub fn p1(&self) -> UncheckedRevision {
-                    BigEndian::read_i32(&self.bytes[24..])
+                    BigEndian::read_i32(&self.bytes[24..]).into()
                 }
-                pub fn p2(&self) -> Revision {
+                pub fn p2(&self) -> UncheckedRevision {
-                    BigEndian::read_i32(&self.bytes[28..])
+                    BigEndian::read_i32(&self.bytes[28..]).into()
                 }
                 /// Return the hash of revision's full text.
                 ///
                 /// Currently, SHA-1 is used and only the first 20 bytes of this field
                 /// are used.
                 pub fn hash(&self) -> &'a Node {
                     (&self.bytes[32..52]).try_into().unwrap()
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::node::NULL_NODE;
                 #[cfg(test)]
                 #[derive(Debug, Copy, Clone)]
                 pub struct IndexEntryBuilder {
                     is_first: bool,
                     is_inline: bool,
                     is_general_delta: bool,
                     version: u16,
                     offset: usize,
                     compressed_len: usize,
                     uncompressed_len: usize,
                     base_revision_or_base_of_delta_chain: Revision,
                     link_revision: Revision,
                     p1: Revision,
                     p2: Revision,
                     node: Node,
                 }
                 #[cfg(test)]
                 impl IndexEntryBuilder {
                     #[allow(clippy::new_without_default)]
                     pub fn new() -> Self {
                         Self {
                             is_first: false,
                             is_inline: false,
                             is_general_delta: true,
                             version: 1,
                             offset: 0,
                             compressed_len: 0,
                             uncompressed_len: 0,
                             base_revision_or_base_of_delta_chain: 0,
                             link_revision: 0,
                             p1: NULL_REVISION,
                             p2: NULL_REVISION,
                             node: NULL_NODE,
                         }
                     }
                     pub fn is_first(&mut self, value: bool) -> &mut Self {
                         self.is_first = value;
                         self
                     }
                     pub fn with_inline(&mut self, value: bool) -> &mut Self {
                         self.is_inline = value;
                         self
                     }
                     pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
                         self.is_general_delta = value;
                         self
                     }
                     pub fn with_version(&mut self, value: u16) -> &mut Self {
                         self.version = value;
                         self
                     }
                     pub fn with_offset(&mut self, value: usize) -> &mut Self {
                         self.offset = value;
                         self
                     }
                     pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
                         self.compressed_len = value;
                         self
                     }
                     pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
                         self.uncompressed_len = value;
                         self
                     }
                     pub fn with_base_revision_or_base_of_delta_chain(
                         &mut self,
                         value: Revision,
                     ) -> &mut Self {
                         self.base_revision_or_base_of_delta_chain = value;
                         self
                     }
                     pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
                         self.link_revision = value;
                         self
                     }
                     pub fn with_p1(&mut self, value: Revision) -> &mut Self {
                         self.p1 = value;
                         self
                     }
                     pub fn with_p2(&mut self, value: Revision) -> &mut Self {
                         self.p2 = value;
                         self
                     }
                     pub fn with_node(&mut self, value: Node) -> &mut Self {
                         self.node = value;
                         self
                     }
                     pub fn build(&self) -> Vec<u8> {
                         let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
                         if self.is_first {
                             bytes.extend(&match (self.is_general_delta, self.is_inline) {
                                 (false, false) => [0u8, 0],
                                 (false, true) => [0u8, 1],
                                 (true, false) => [0u8, 2],
                                 (true, true) => [0u8, 3],
                             });
                             bytes.extend(&self.version.to_be_bytes());
                             // Remaining offset bytes.
                             bytes.extend(&[0u8; 2]);
                         } else {
                             // Offset stored on 48 bits (6 bytes)
                             bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
                         }
                         bytes.extend(&[0u8; 2]); // Revision flags.
                         bytes.extend(&(self.compressed_len as u32).to_be_bytes());
                         bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
                         bytes.extend(
                             &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
                         );
                         bytes.extend(&self.link_revision.to_be_bytes());
                         bytes.extend(&self.p1.to_be_bytes());
                         bytes.extend(&self.p2.to_be_bytes());
                         bytes.extend(self.node.as_bytes());
                         bytes.extend(vec![0u8; 12]);
                         bytes
                     }
                 }
                 pub fn is_inline(index_bytes: &[u8]) -> bool {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .format_flags()
                         .is_inline()
                 }
                 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .format_flags()
                         .uses_generaldelta()
                 }
                 pub fn get_version(index_bytes: &[u8]) -> u16 {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .format_version()
                 }
                 #[test]
                 fn flags_when_no_inline_flag_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(false)
                         .with_inline(false)
                         .build();
                     assert!(!is_inline(&bytes));
                     assert!(!uses_generaldelta(&bytes));
                 }
                 #[test]
                 fn flags_when_inline_flag_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(false)
                         .with_inline(true)
                         .build();
                     assert!(is_inline(&bytes));
                     assert!(!uses_generaldelta(&bytes));
                 }
                 #[test]
                 fn flags_when_inline_and_generaldelta_flags_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(true)
                         .with_inline(true)
                         .build();
                     assert!(is_inline(&bytes));
                     assert!(uses_generaldelta(&bytes));
                 }
                 #[test]
                 fn test_offset() {
                     let bytes = IndexEntryBuilder::new().with_offset(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.offset(), 1)
                 }
                 #[test]
                 fn test_with_overridden_offset() {
                     let bytes = IndexEntryBuilder::new().with_offset(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: Some(2),
                     };
                     assert_eq!(entry.offset(), 2)
                 }
                 #[test]
                 fn test_compressed_len() {
                     let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.compressed_len(), 1)
                 }
                 #[test]
                 fn test_uncompressed_len() {
                     let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.uncompressed_len(), 1)
                 }
                 #[test]
                 fn test_base_revision_or_base_of_delta_chain() {
                     let bytes = IndexEntryBuilder::new()
                         .with_base_revision_or_base_of_delta_chain(1)
                         .build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
-                    assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
+                    assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
                 }
                 #[test]
                 fn link_revision_test() {
                     let bytes = IndexEntryBuilder::new().with_link_revision(123).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
-                    assert_eq!(entry.link_revision(), 123);
+                    assert_eq!(entry.link_revision(), 123.into());
                 }
                 #[test]
                 fn p1_test() {
                     let bytes = IndexEntryBuilder::new().with_p1(123).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
-                    assert_eq!(entry.p1(), 123);
+                    assert_eq!(entry.p1(), 123.into());
                 }
                 #[test]
                 fn p2_test() {
                     let bytes = IndexEntryBuilder::new().with_p2(123).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
-                    assert_eq!(entry.p2(), 123);
+                    assert_eq!(entry.p2(), 123.into());
                 }
                 #[test]
                 fn node_test() {
                     let node = Node::from_hex("0123456789012345678901234567890123456789")
                         .unwrap();
                     let bytes = IndexEntryBuilder::new().with_node(node).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(*entry.hash(), node);
                 }
                 #[test]
                 fn version_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(2)
                         .build();
                     assert_eq!(get_version(&bytes), 2)
                 }
             }
             #[cfg(test)]
             pub use tests::IndexEntryBuilder;

rust/hg-core/src/revlog/manifest.rs

0 +12 -3

             use crate::errors::HgError;
-            use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             use crate::revlog::{Revlog, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::utils::SliceExt;
             use crate::vfs::Vfs;
+            use crate::{Revision, UncheckedRevision};
             /// A specialized `Revlog` to work with `manifest` data format.
             pub struct Manifestlog {
                 /// The generic `revlog` format.
                 revlog: Revlog,
             }
             impl Manifestlog {
                 /// Open the `manifest` of a repository given by its root.
                 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
                     let revlog =
                         Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `Manifest` for the given node ID.
                 ///
                 /// Note: this is a node ID in the manifestlog, typically found through
                 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
                 /// changeset.
                 ///
                 /// See also `Repo::manifest_for_node`
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Manifest, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
-                    self.data_for_rev(rev)
+                    self.data_for_checked_rev(rev)
                 }
                 /// Return the `Manifest` of a given revision number.
                 ///
                 /// Note: this is a revision number in the manifestlog, *not* of any
                 /// changeset.
                 ///
                 /// See also `Repo::manifest_for_rev`
                 pub fn data_for_rev(
                     &self,
+                    rev: UncheckedRevision,
+                ) -> Result<Manifest, RevlogError> {
+                    let bytes = self.revlog.get_rev_data(rev)?.into_owned();
+                    Ok(Manifest { bytes })
+                }
+                pub fn data_for_checked_rev(
+                    &self,
                     rev: Revision,
                 ) -> Result<Manifest, RevlogError> {
-                    let bytes = self.revlog.get_rev_data(rev)?.into_owned();
+                    let bytes =
+                        self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
                     Ok(Manifest { bytes })
                 }
             }
             /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
             #[derive(Debug)]
             pub struct Manifest {
                 /// Format for a manifest: flat sequence of variable-size entries,
                 /// sorted by path, each as:
                 ///
                 /// ```text
                 /// <path> \0 <hex_node_id> <flags> \n
                 /// ```
                 ///
                 /// The last entry is also terminated by a newline character.
                 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
                 bytes: Vec<u8>,
             }
             impl Manifest {
                 pub fn iter(
                     &self,
                 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
                     self.bytes
                         .split(|b| b == &b'\n')
                         .filter(|line| !line.is_empty())
                         .map(ManifestEntry::from_raw)
                 }
                 /// If the given path is in this manifest, return its filelog node ID
                 pub fn find_by_path(
                     &self,
                     path: &HgPath,
                 ) -> Result<Option<ManifestEntry>, HgError> {
                     use std::cmp::Ordering::*;
                     let path = path.as_bytes();
                     // Both boundaries of this `&[u8]` slice are always at the boundary of
                     // an entry
                     let mut bytes = &*self.bytes;
                     // Binary search algorithm derived from `[T]::binary_search_by`
                     // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
                     // except we don’t have a slice of entries. Instead we jump to the
                     // middle of the byte slice and look around for entry delimiters
                     // (newlines).
                     while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
                         let (entry_path, rest) =
                             ManifestEntry::split_path(&bytes[entry_range.clone()])?;
                         let cmp = entry_path.cmp(path);
                         if cmp == Less {
                             let after_newline = entry_range.end + 1;
                             bytes = &bytes[after_newline..];
                         } else if cmp == Greater {
                             bytes = &bytes[..entry_range.start];
                         } else {
                             return Ok(Some(ManifestEntry::from_path_and_rest(
                                 entry_path, rest,
                             )));
                         }
                     }
                     Ok(None)
                 }
                 /// If there is at least one, return the byte range of an entry *excluding*
                 /// the final newline.
                 fn find_entry_near_middle_of(
                     bytes: &[u8],
                 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
                     let len = bytes.len();
                     if len > 0 {
                         let middle = bytes.len() / 2;
                         // Integer division rounds down, so `middle < len`.
                         let (before, after) = bytes.split_at(middle);
                         let is_newline = |&byte: &u8| byte == b'\n';
                         let entry_start = match before.iter().rposition(is_newline) {
                             Some(i) => i + 1,
                             None => 0, // We choose the first entry in `bytes`
                         };
                         let entry_end = match after.iter().position(is_newline) {
                             Some(i) => {
                                 // No `+ 1` here to exclude this newline from the range
                                 middle + i
                             }
                             None => {
                                 // In a well-formed manifest:
                                 //
                                 // * Since `len > 0`, `bytes` contains at least one entry
                                 // * Every entry ends with a newline
                                 // * Since `middle < len`, `after` contains at least the
                                 //   newline at the end of the last entry of `bytes`.
                                 //
                                 // We didn’t find a newline, so this manifest is not
                                 // well-formed.
                                 return Err(HgError::corrupted(
                                     "manifest entry without \\n delimiter",
                                 ));
                             }
                         };
                         Ok(Some(entry_start..entry_end))
                     } else {
                         // len == 0
                         Ok(None)
                     }
                 }
             }
             /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
             #[derive(Debug)]
             pub struct ManifestEntry<'manifest> {
                 pub path: &'manifest HgPath,
                 pub hex_node_id: &'manifest [u8],
                 /// `Some` values are b'x', b'l', or 't'
                 pub flags: Option<u8>,
             }
             impl<'a> ManifestEntry<'a> {
                 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
                     bytes.split_2(b'\0').ok_or_else(|| {
                         HgError::corrupted("manifest entry without \\0 delimiter")
                     })
                 }
                 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
                     let (hex_node_id, flags) = match rest.split_last() {
                         Some((&b'x', rest)) => (rest, Some(b'x')),
                         Some((&b'l', rest)) => (rest, Some(b'l')),
                         Some((&b't', rest)) => (rest, Some(b't')),
                         _ => (rest, None),
                     };
                     Self {
                         path: HgPath::new(path),
                         hex_node_id,
                         flags,
                     }
                 }
                 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
                     let (path, rest) = Self::split_path(bytes)?;
                     Ok(Self::from_path_and_rest(path, rest))
                 }
                 pub fn node_id(&self) -> Result<Node, HgError> {
                     Node::from_hex_for_repo(self.hex_node_id)
                 }
             }

rust/hg-core/src/revlog/mod.rs

0 +100 -45

             // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Mercurial concepts for handling revision history
             pub mod node;
             pub mod nodemap;
             mod nodemap_docket;
             pub mod path_encode;
             pub use node::{FromHexError, Node, NodePrefix};
             pub mod changelog;
             pub mod filelog;
             pub mod index;
             pub mod manifest;
             pub mod patch;
             use std::borrow::Cow;
             use std::io::Read;
             use std::ops::Deref;
             use std::path::Path;
             use flate2::read::ZlibDecoder;
             use sha1::{Digest, Sha1};
             use std::cell::RefCell;
             use zstd;
             use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
             use self::nodemap_docket::NodeMapDocket;
             use super::index::Index;
             use super::nodemap::{NodeMap, NodeMapError};
             use crate::errors::HgError;
             use crate::vfs::Vfs;
             /// Mercurial revision numbers
             ///
             /// As noted in revlog.c, revision numbers are actually encoded in
             /// 4 bytes, and are liberally converted to ints, whence the i32
             pub type Revision = i32;
             /// Unchecked Mercurial revision numbers.
             ///
             /// Values of this type have no guarantee of being a valid revision number
             /// in any context. Use method `check_revision` to get a valid revision within
             /// the appropriate index object.
             ///
             /// As noted in revlog.c, revision numbers are actually encoded in
             /// 4 bytes, and are liberally converted to ints, whence the i32
-            pub type UncheckedRevision = i32;
+            #[derive(
+                Debug,
+                derive_more::Display,
+                Clone,
+                Copy,
+                Hash,
+                PartialEq,
+                Eq,
+                PartialOrd,
+                Ord,
+            )]
+            pub struct UncheckedRevision(i32);
+            impl From<Revision> for UncheckedRevision {
+                fn from(value: Revision) -> Self {
+                    Self(value)
+                }
+            }
             /// Marker expressing the absence of a parent
             ///
             /// Independently of the actual representation, `NULL_REVISION` is guaranteed
             /// to be smaller than all existing revisions.
             pub const NULL_REVISION: Revision = -1;
             /// Same as `mercurial.node.wdirrev`
             ///
             /// This is also equal to `i32::max_value()`, but it's better to spell
             /// it out explicitely, same as in `mercurial.node`
             #[allow(clippy::unreadable_literal)]
-            pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
+            pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
+                UncheckedRevision(0x7fffffff);
             pub const WORKING_DIRECTORY_HEX: &str =
                 "ffffffffffffffffffffffffffffffffffffffff";
             /// The simplest expression of what we need of Mercurial DAGs.
             pub trait Graph {
                 /// Return the two parents of the given `Revision`.
                 ///
                 /// Each of the parents can be independently `NULL_REVISION`
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
             }
             #[derive(Clone, Debug, PartialEq)]
             pub enum GraphError {
                 ParentOutOfRange(Revision),
             }
             /// The Mercurial Revlog Index
             ///
             /// This is currently limited to the minimal interface that is needed for
             /// the [`nodemap`](nodemap/index.html) module
             pub trait RevlogIndex {
                 /// Total number of Revisions referenced in this index
                 fn len(&self) -> usize;
                 fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
-                /// Return a reference to the Node or `None` if rev is out of bounds
+                /// Return a reference to the Node or `None` for `NULL_REVISION`
-                ///
-                /// `NULL_REVISION` is not considered to be out of bounds.
                 fn node(&self, rev: Revision) -> Option<&Node>;
                 /// Return a [`Revision`] if `rev` is a valid revision number for this
                 /// index
                 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
+                    let rev = rev.0;
                     if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {
                         Some(rev)
                     } else {
                         None
                     }
                 }
             }
             const REVISION_FLAG_CENSORED: u16 = 1 << 15;
             const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
             const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
             const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
             // Keep this in sync with REVIDX_KNOWN_FLAGS in
             // mercurial/revlogutils/flagutil.py
             const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
                 | REVISION_FLAG_ELLIPSIS
                 | REVISION_FLAG_EXTSTORED
                 | REVISION_FLAG_HASCOPIESINFO;
             const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
-            #[derive(Debug, derive_more::From)]
+            #[derive(Debug, derive_more::From, derive_more::Display)]
             pub enum RevlogError {
                 InvalidRevision,
                 /// Working directory is not supported
                 WDirUnsupported,
                 /// Found more than one entry whose ID match the requested prefix
                 AmbiguousPrefix,
                 #[from]
                 Other(HgError),
             }
             impl From<NodeMapError> for RevlogError {
                 fn from(error: NodeMapError) -> Self {
                     match error {
                         NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                         NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
                             format!("nodemap point to revision {} not in index", rev),
                         ),
                     }
                 }
             }
             fn corrupted<S: AsRef<str>>(context: S) -> HgError {
                 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
             }
             impl RevlogError {
                 fn corrupted<S: AsRef<str>>(context: S) -> Self {
                     RevlogError::Other(corrupted(context))
                 }
             }
             /// Read only implementation of revlog.
             pub struct Revlog {
                 /// When index and data are not interleaved: bytes of the revlog index.
                 /// When index and data are interleaved: bytes of the revlog index and
                 /// data.
                 index: Index,
                 /// When index and data are not interleaved: bytes of the revlog data
                 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                 /// When present on disk: the persistent nodemap for this revlog
                 nodemap: Option<nodemap::NodeTree>,
             }
             impl Revlog {
                 /// Open a revlog index file.
                 ///
                 /// It will also open the associated data file if index and data are not
                 /// interleaved.
                 pub fn open(
                     store_vfs: &Vfs,
                     index_path: impl AsRef<Path>,
                     data_path: Option<&Path>,
                     use_nodemap: bool,
                 ) -> Result<Self, HgError> {
                     let index_path = index_path.as_ref();
                     let index = {
                         match store_vfs.mmap_open_opt(&index_path)? {
                             None => Index::new(Box::new(vec![])),
                             Some(index_mmap) => {
                                 let index = Index::new(Box::new(index_mmap))?;
                                 Ok(index)
                             }
                         }
                     }?;
                     let default_data_path = index_path.with_extension("d");
                     // type annotation required
                     // won't recognize Mmap as Deref<Target = [u8]>
                     let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                         if index.is_inline() {
                             None
                         } else {
                             let data_path = data_path.unwrap_or(&default_data_path);
                             let data_mmap = store_vfs.mmap_open(data_path)?;
                             Some(Box::new(data_mmap))
                         };
                     let nodemap = if index.is_inline() || !use_nodemap {
                         None
                     } else {
                         NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
                             |(docket, data)| {
                                 nodemap::NodeTree::load_bytes(
                                     Box::new(data),
                                     docket.data_length,
                                 )
                             },
                         )
                     };
                     Ok(Revlog {
                         index,
                         data_bytes,
                         nodemap,
                     })
                 }
                 /// Return number of entries of the `Revlog`.
                 pub fn len(&self) -> usize {
                     self.index.len()
                 }
                 /// Returns `true` if the `Revlog` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.index.is_empty()
                 }
                 /// Returns the node ID for the given revision number, if it exists in this
                 /// revlog
-                pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
+                pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
-                    if rev == NULL_REVISION {
+                    if rev == NULL_REVISION.into() {
                         return Some(&NULL_NODE);
                     }
+                    let rev = self.index.check_revision(rev)?;
                     Some(self.index.get_entry(rev)?.hash())
                 }
                 /// Return the revision number for the given node ID, if it exists in this
                 /// revlog
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     let looked_up = if let Some(nodemap) = &self.nodemap {
                         nodemap
                             .find_bin(&self.index, node)?
                             .ok_or(RevlogError::InvalidRevision)
                     } else {
                         self.rev_from_node_no_persistent_nodemap(node)
                     };
                     if node.is_prefix_of(&NULL_NODE) {
                         return match looked_up {
                             Ok(_) => Err(RevlogError::AmbiguousPrefix),
                             Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
                             res => res,
                         };
                     };
                     looked_up
                 }
                 /// Same as `rev_from_node`, without using a persistent nodemap
                 ///
                 /// This is used as fallback when a persistent nodemap is not present.
                 /// This happens when the persistent-nodemap experimental feature is not
                 /// enabled, or for small revlogs.
                 fn rev_from_node_no_persistent_nodemap(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     // Linear scan of the revlog
                     // TODO: consider building a non-persistent nodemap in memory to
                     // optimize these cases.
                     let mut found_by_prefix = None;
                     for rev in (0..self.len() as Revision).rev() {
                         let index_entry = self.index.get_entry(rev).ok_or_else(|| {
                             HgError::corrupted(
                                 "revlog references a revision not in the index",
                             )
                         })?;
                         if node == *index_entry.hash() {
                             return Ok(rev);
                         }
                         if node.is_prefix_of(index_entry.hash()) {
                             if found_by_prefix.is_some() {
                                 return Err(RevlogError::AmbiguousPrefix);
                             }
                             found_by_prefix = Some(rev)
                         }
                     }
                     found_by_prefix.ok_or(RevlogError::InvalidRevision)
                 }
                 /// Returns whether the given revision exists in this revlog.
-                pub fn has_rev(&self, rev: Revision) -> bool {
+                pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
-                    self.index.get_entry(rev).is_some()
+                    self.index.check_revision(rev).is_some()
                 }
                 /// Return the full data associated to a revision.
                 ///
                 /// All entries required to build the final data out of deltas will be
                 /// retrieved as needed, and the deltas will be applied to the inital
                 /// snapshot to rebuild the final data.
                 pub fn get_rev_data(
                     &self,
+                    rev: UncheckedRevision,
+                ) -> Result<Cow<[u8]>, RevlogError> {
+                    if rev == NULL_REVISION.into() {
+                        return Ok(Cow::Borrowed(&[]));
+                    };
+                    self.get_entry(rev)?.data()
+                }
+                /// [`Self::get_rev_data`] for checked revisions.
+                pub fn get_rev_data_for_checked_rev(
+                    &self,
                     rev: Revision,
                 ) -> Result<Cow<[u8]>, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(Cow::Borrowed(&[]));
                     };
-                    Ok(self.get_entry(rev)?.data()?)
+                    self.get_entry_for_checked_rev(rev)?.data()
                 }
                 /// Check the hash of some given data against the recorded hash.
                 pub fn check_hash(
                     &self,
                     p1: Revision,
                     p2: Revision,
                     expected: &[u8],
                     data: &[u8],
                 ) -> bool {
                     let e1 = self.index.get_entry(p1);
                     let h1 = match e1 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     let e2 = self.index.get_entry(p2);
                     let h2 = match e2 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                 }
                 /// Build the full data of a revision out its snapshot
                 /// and its deltas.
                 fn build_data_from_deltas(
                     snapshot: RevlogEntry,
                     deltas: &[RevlogEntry],
                 ) -> Result<Vec<u8>, HgError> {
                     let snapshot = snapshot.data_chunk()?;
                     let deltas = deltas
                         .iter()
                         .rev()
                         .map(RevlogEntry::data_chunk)
                         .collect::<Result<Vec<_>, _>>()?;
                     let patches: Vec<_> =
                         deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                     let patch = patch::fold_patch_lists(&patches);
                     Ok(patch.apply(&snapshot))
                 }
                 /// Return the revlog data.
                 fn data(&self) -> &[u8] {
                     match &self.data_bytes {
                         Some(data_bytes) => data_bytes,
                         None => panic!(
                             "forgot to load the data or trying to access inline data"
                         ),
                     }
                 }
                 pub fn make_null_entry(&self) -> RevlogEntry {
                     RevlogEntry {
                         revlog: self,
                         rev: NULL_REVISION,
                         bytes: b"",
                         compressed_len: 0,
                         uncompressed_len: 0,
                         base_rev_or_base_of_delta_chain: None,
                         p1: NULL_REVISION,
                         p2: NULL_REVISION,
                         flags: NULL_REVLOG_ENTRY_FLAGS,
                         hash: NULL_NODE,
                     }
                 }
-                /// Get an entry of the revlog.
+                fn get_entry_for_checked_rev(
-                pub fn get_entry(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(self.make_null_entry());
                     }
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let start = index_entry.offset();
                     let end = start + index_entry.compressed_len() as usize;
                     let data = if self.index.is_inline() {
                         self.index.data(start, end)
                     } else {
                         &self.data()[start..end]
                     };
+                    let base_rev = self
+                        .index
+                        .check_revision(index_entry.base_revision_or_base_of_delta_chain())
+                        .ok_or_else(|| {
+                            RevlogError::corrupted(format!(
+                                "base revision for rev {} is invalid",
+                                rev
+                            ))
+                        })?;
+                    let p1 =
+                        self.index.check_revision(index_entry.p1()).ok_or_else(|| {
+                            RevlogError::corrupted(format!(
+                                "p1 for rev {} is invalid",
+                                rev
+                            ))
+                        })?;
+                    let p2 =
+                        self.index.check_revision(index_entry.p2()).ok_or_else(|| {
+                            RevlogError::corrupted(format!(
+                                "p2 for rev {} is invalid",
+                                rev
+                            ))
+                        })?;
                     let entry = RevlogEntry {
                         revlog: self,
                         rev,
                         bytes: data,
                         compressed_len: index_entry.compressed_len(),
                         uncompressed_len: index_entry.uncompressed_len(),
-                        base_rev_or_base_of_delta_chain: if index_entry
+                        base_rev_or_base_of_delta_chain: if base_rev == rev {
-                            .base_revision_or_base_of_delta_chain()
-                            == rev
                             None
                         } else {
-                            Some(index_entry.base_revision_or_base_of_delta_chain())
+                            Some(base_rev)
                         },
-                        p1: index_entry.p1(),
+                        p1,
-                        p2: index_entry.p2(),
+                        p2,
                         flags: index_entry.flags(),
                         hash: *index_entry.hash(),
                     };
                     Ok(entry)
                 }
-                /// when resolving internal references within revlog, any errors
+                /// Get an entry of the revlog.
-                /// should be reported as corruption, instead of e.g. "invalid revision"
+                pub fn get_entry(
-                fn get_entry_internal(
                     &self,
-                    rev: Revision,
+                    rev: UncheckedRevision,
-                ) -> Result<RevlogEntry, HgError> {
+                ) -> Result<RevlogEntry, RevlogError> {
-                    self.get_entry(rev)
+                    if rev == NULL_REVISION.into() {
-                        .map_err(|_| corrupted(format!("revision {} out of range", rev)))
+                        return Ok(self.make_null_entry());
+                    }
+                    let rev = self.index.check_revision(rev).ok_or_else(|| {
+                        RevlogError::corrupted(format!("rev {} is invalid", rev))
+                    })?;
+                    self.get_entry_for_checked_rev(rev)
                 }
             }
             /// The revlog entry's bytes and the necessary informations to extract
             /// the entry's data.
             #[derive(Clone)]
             pub struct RevlogEntry<'revlog> {
                 revlog: &'revlog Revlog,
                 rev: Revision,
                 bytes: &'revlog [u8],
                 compressed_len: u32,
                 uncompressed_len: i32,
                 base_rev_or_base_of_delta_chain: Option<Revision>,
                 p1: Revision,
                 p2: Revision,
                 flags: u16,
                 hash: Node,
             }
             thread_local! {
               // seems fine to [unwrap] here: this can only fail due to memory allocation
               // failing, and it's normal for that to cause panic.
               static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
                   RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
             }
             fn zstd_decompress_to_buffer(
                 bytes: &[u8],
                 buf: &mut Vec<u8>,
             ) -> Result<usize, std::io::Error> {
                 ZSTD_DECODER
                     .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
             }
             impl<'revlog> RevlogEntry<'revlog> {
                 pub fn revision(&self) -> Revision {
                     self.rev
                 }
                 pub fn node(&self) -> &Node {
                     &self.hash
                 }
                 pub fn uncompressed_len(&self) -> Option<u32> {
                     u32::try_from(self.uncompressed_len).ok()
                 }
                 pub fn has_p1(&self) -> bool {
                     self.p1 != NULL_REVISION
                 }
                 pub fn p1_entry(
                     &self,
                 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                     if self.p1 == NULL_REVISION {
                         Ok(None)
                     } else {
-                        Ok(Some(self.revlog.get_entry(self.p1)?))
+                        Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
                     }
                 }
                 pub fn p2_entry(
                     &self,
                 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                     if self.p2 == NULL_REVISION {
                         Ok(None)
                     } else {
-                        Ok(Some(self.revlog.get_entry(self.p2)?))
+                        Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
                     }
                 }
                 pub fn p1(&self) -> Option<Revision> {
                     if self.p1 == NULL_REVISION {
                         None
                     } else {
                         Some(self.p1)
                     }
                 }
                 pub fn p2(&self) -> Option<Revision> {
                     if self.p2 == NULL_REVISION {
                         None
                     } else {
                         Some(self.p2)
                     }
                 }
                 pub fn is_censored(&self) -> bool {
                     (self.flags & REVISION_FLAG_CENSORED) != 0
                 }
                 pub fn has_length_affecting_flag_processor(&self) -> bool {
                     // Relevant Python code: revlog.size()
                     // note: ELLIPSIS is known to not change the content
                     (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
                 }
                 /// The data for this entry, after resolving deltas if any.
-                pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
+                pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                     let mut entry = self.clone();
                     let mut delta_chain = vec![];
                     // The meaning of `base_rev_or_base_of_delta_chain` depends on
                     // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
                     // `mercurial/revlogutils/constants.py` and the code in
                     // [_chaininfo] and in [index_deltachain].
                     let uses_generaldelta = self.revlog.index.uses_generaldelta();
                     while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
                         entry = if uses_generaldelta {
                             delta_chain.push(entry);
-                            self.revlog.get_entry_internal(base_rev)?
+                            self.revlog.get_entry_for_checked_rev(base_rev)?
                         } else {
-                            let base_rev = entry.rev - 1;
+                            let base_rev = UncheckedRevision(entry.rev - 1);
                             delta_chain.push(entry);
-                            self.revlog.get_entry_internal(base_rev)?
+                            self.revlog.get_entry(base_rev)?
                         };
                     }
                     let data = if delta_chain.is_empty() {
                         entry.data_chunk()?
                     } else {
                         Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
                     };
                     Ok(data)
                 }
                 fn check_data(
                     &self,
                     data: Cow<'revlog, [u8]>,
-                ) -> Result<Cow<'revlog, [u8]>, HgError> {
+                ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                     if self.revlog.check_hash(
                         self.p1,
                         self.p2,
                         self.hash.as_bytes(),
                         &data,
                     ) {
                         Ok(data)
                     } else {
                         if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
                             return Err(HgError::unsupported(
                                 "ellipsis revisions are not supported by rhg",
-                            ));
+                            .into());
                         }
                         Err(corrupted(format!(
                             "hash check failed for revision {}",
                             self.rev
-                        )))
+                        ))
+                        .into())
                     }
                 }
-                pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
+                pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                     let data = self.rawdata()?;
                     if self.rev == NULL_REVISION {
                         return Ok(data);
                     }
                     if self.is_censored() {
-                        return Err(HgError::CensoredNodeError);
+                        return Err(HgError::CensoredNodeError.into());
                     }
                     self.check_data(data)
                 }
                 /// Extract the data contained in the entry.
                 /// This may be a delta. (See `is_delta`.)
                 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                     if self.bytes.is_empty() {
                         return Ok(Cow::Borrowed(&[]));
                     }
                     match self.bytes[0] {
                         // Revision data is the entirety of the entry, including this
                         // header.
                         b'\0' => Ok(Cow::Borrowed(self.bytes)),
                         // Raw revision data follows.
                         b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                         // zlib (RFC 1950) data.
                         b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                         // zstd data.
                         b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                         // A proper new format should have had a repo/store requirement.
                         format_type => Err(corrupted(format!(
                             "unknown compression header '{}'",
                             format_type
                         ))),
                     }
                 }
                 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
                     let mut decoder = ZlibDecoder::new(self.bytes);
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len as usize);
                         decoder
                             .read_to_end(&mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         Ok(buf)
                     } else {
                         let cap = self.uncompressed_len.max(0) as usize;
                         let mut buf = vec![0; cap];
                         decoder
                             .read_exact(&mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         Ok(buf)
                     }
                 }
                 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
                     let cap = self.uncompressed_len.max(0) as usize;
                     if self.is_delta() {
                         // [cap] is usually an over-estimate of the space needed because
                         // it's the length of delta-decoded data, but we're interested
                         // in the size of the delta.
                         // This means we have to [shrink_to_fit] to avoid holding on
                         // to a large chunk of memory, but it also means we must have a
                         // fallback branch, for the case when the delta is longer than
                         // the original data (surprisingly, this does happen in practice)
                         let mut buf = Vec::with_capacity(cap);
                         match zstd_decompress_to_buffer(self.bytes, &mut buf) {
                             Ok(_) => buf.shrink_to_fit(),
                             Err(_) => {
                                 buf.clear();
                                 zstd::stream::copy_decode(self.bytes, &mut buf)
                                     .map_err(|e| corrupted(e.to_string()))?;
                             }
                         };
                         Ok(buf)
                     } else {
                         let mut buf = Vec::with_capacity(cap);
                         let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         if len != self.uncompressed_len as usize {
                             Err(corrupted("uncompressed length does not match"))
                         } else {
                             Ok(buf)
                         }
                     }
                 }
                 /// Tell if the entry is a snapshot or a delta
                 /// (influences on decompression).
                 fn is_delta(&self) -> bool {
                     self.base_rev_or_base_of_delta_chain.is_some()
                 }
             }
             /// Calculate the hash of a revision given its data and its parents.
             fn hash(
                 data: &[u8],
                 p1_hash: &[u8],
                 p2_hash: &[u8],
             ) -> [u8; NODE_BYTES_LENGTH] {
                 let mut hasher = Sha1::new();
                 let (a, b) = (p1_hash, p2_hash);
                 if a > b {
                     hasher.update(b);
                     hasher.update(a);
                 } else {
                     hasher.update(a);
                     hasher.update(b);
                 }
                 hasher.update(data);
                 *hasher.finalize().as_ref()
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
                 use itertools::Itertools;
                 #[test]
                 fn test_empty() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                     assert!(revlog.is_empty());
                     assert_eq!(revlog.len(), 0);
-                    assert!(revlog.get_entry(0).is_err());
+                    assert!(revlog.get_entry(0.into()).is_err());
-                    assert!(!revlog.has_rev(0));
+                    assert!(!revlog.has_rev(0.into()));
                     assert_eq!(
                         revlog.rev_from_node(NULL_NODE.into()).unwrap(),
                         NULL_REVISION
                     );
-                    let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
+                    let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
                     assert_eq!(null_entry.revision(), NULL_REVISION);
                     assert!(null_entry.data().unwrap().is_empty());
                 }
                 #[test]
                 fn test_inline() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
                         .unwrap();
                     let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                         .unwrap();
                     let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
                         .unwrap();
                     let entry0_bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .with_inline(true)
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node0)
                         .build();
                     let entry1_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node1)
                         .build();
                     let entry2_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_p1(0)
                         .with_p2(1)
                         .with_node(node2)
                         .build();
                     let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
                         .into_iter()
                         .flatten()
                         .collect_vec();
                     std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
-                    let entry0 = revlog.get_entry(0).ok().unwrap();
+                    let entry0 = revlog.get_entry(0.into()).ok().unwrap();
                     assert_eq!(entry0.revision(), 0);
                     assert_eq!(*entry0.node(), node0);
                     assert!(!entry0.has_p1());
                     assert_eq!(entry0.p1(), None);
                     assert_eq!(entry0.p2(), None);
                     let p1_entry = entry0.p1_entry().unwrap();
                     assert!(p1_entry.is_none());
                     let p2_entry = entry0.p2_entry().unwrap();
                     assert!(p2_entry.is_none());
-                    let entry1 = revlog.get_entry(1).ok().unwrap();
+                    let entry1 = revlog.get_entry(1.into()).ok().unwrap();
                     assert_eq!(entry1.revision(), 1);
                     assert_eq!(*entry1.node(), node1);
                     assert!(!entry1.has_p1());
                     assert_eq!(entry1.p1(), None);
                     assert_eq!(entry1.p2(), None);
                     let p1_entry = entry1.p1_entry().unwrap();
                     assert!(p1_entry.is_none());
                     let p2_entry = entry1.p2_entry().unwrap();
                     assert!(p2_entry.is_none());
-                    let entry2 = revlog.get_entry(2).ok().unwrap();
+                    let entry2 = revlog.get_entry(2.into()).ok().unwrap();
                     assert_eq!(entry2.revision(), 2);
                     assert_eq!(*entry2.node(), node2);
                     assert!(entry2.has_p1());
                     assert_eq!(entry2.p1(), Some(0));
                     assert_eq!(entry2.p2(), Some(1));
                     let p1_entry = entry2.p1_entry().unwrap();
                     assert!(p1_entry.is_some());
                     assert_eq!(p1_entry.unwrap().revision(), 0);
                     let p2_entry = entry2.p2_entry().unwrap();
                     assert!(p2_entry.is_some());
                     assert_eq!(p2_entry.unwrap().revision(), 1);
                 }
                 #[test]
                 fn test_nodemap() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     // building a revlog with a forced Node starting with zeros
                     // This is a corruption, but it does not preclude using the nodemap
                     // if we don't try and access the data
                     let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
                         .unwrap();
                     let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                         .unwrap();
                     let entry0_bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .with_inline(true)
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node0)
                         .build();
                     let entry1_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node1)
                         .build();
                     let contents = vec![entry0_bytes, entry1_bytes]
                         .into_iter()
                         .flatten()
                         .collect_vec();
                     std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                     // accessing the data shows the corruption
-                    revlog.get_entry(0).unwrap().data().unwrap_err();
+                    revlog.get_entry(0.into()).unwrap().data().unwrap_err();
                     assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
                     assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
                     assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
                     assert_eq!(
                         revlog
                             .rev_from_node(NodePrefix::from_hex("000").unwrap())
                             .unwrap(),
                         -1
                     );
                     assert_eq!(
                         revlog
                             .rev_from_node(NodePrefix::from_hex("b00").unwrap())
                             .unwrap(),
                     );
                     // RevlogError does not implement PartialEq
                     // (ultimately because io::Error does not)
                     match revlog
                         .rev_from_node(NodePrefix::from_hex("00").unwrap())
                         .expect_err("Expected to give AmbiguousPrefix error")
                     {
                         RevlogError::AmbiguousPrefix => (),
                         e => {
                             panic!("Got another error than AmbiguousPrefix: {:?}", e);
                         }
                     };
                 }
             }

rust/hg-core/src/revlog/nodemap.rs

0 +47 -32

             // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Indexing facilities for fast retrieval of `Revision` from `Node`
             //!
             //! This provides a variation on the 16-ary radix tree that is
             //! provided as "nodetree" in revlog.c, ready for append-only persistence
             //! on disk.
             //!
             //! Following existing implicit conventions, the "nodemap" terminology
             //! is used in a more abstract context.
+            use crate::UncheckedRevision;
             use super::{
                 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
             };
             use bytes_cast::{unaligned, BytesCast};
             use std::cmp::max;
             use std::fmt;
             use std::mem::{self, align_of, size_of};
             use std::ops::Deref;
             use std::ops::Index;
             #[derive(Debug, PartialEq)]
             pub enum NodeMapError {
                 /// A `NodePrefix` matches several [`Revision`]s.
                 ///
                 /// This can be returned by methods meant for (at most) one match.
                 MultipleResults,
                 /// A `Revision` stored in the nodemap could not be found in the index
-                RevisionNotInIndex(Revision),
+                RevisionNotInIndex(UncheckedRevision),
             }
             /// Mapping system from Mercurial nodes to revision numbers.
             ///
             /// ## `RevlogIndex` and `NodeMap`
             ///
             /// One way to think about their relationship is that
             /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
             /// information carried by a [`RevlogIndex`].
             ///
             /// Many of the methods in this trait take a `RevlogIndex` argument
             /// which is used for validation of their results. This index must naturally
             /// be the one the `NodeMap` is about, and it must be consistent.
             ///
             /// Notably, the `NodeMap` must not store
             /// information about more `Revision` values than there are in the index.
             /// In these methods, an encountered `Revision` is not in the index, a
             /// [RevisionNotInIndex](NodeMapError) error is returned.
             ///
             /// In insert operations, the rule is thus that the `NodeMap` must always
             /// be updated after the `RevlogIndex` it is about.
             pub trait NodeMap {
                 /// Find the unique `Revision` having the given `Node`
                 ///
                 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
                 fn find_node(
                     &self,
                     index: &impl RevlogIndex,
                     node: &Node,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     self.find_bin(index, node.into())
                 }
                 /// Find the unique Revision whose `Node` starts with a given binary prefix
                 ///
                 /// If no Revision matches the given prefix, `Ok(None)` is returned.
                 ///
                 /// If several Revisions match the given prefix, a
                 /// [MultipleResults](NodeMapError)  error is returned.
                 fn find_bin(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<Revision>, NodeMapError>;
                 /// Give the size of the shortest node prefix that determines
                 /// the revision uniquely.
                 ///
                 /// From a binary node prefix, if it is matched in the node map, this
                 /// returns the number of hexadecimal digits that would had sufficed
                 /// to find the revision uniquely.
                 ///
                 /// Returns `None` if no [`Revision`] could be found for the prefix.
                 ///
                 /// If several Revisions match the given prefix, a
                 /// [MultipleResults](NodeMapError)  error is returned.
                 fn unique_prefix_len_bin(
                     &self,
                     idx: &impl RevlogIndex,
                     node_prefix: NodePrefix,
                 ) -> Result<Option<usize>, NodeMapError>;
                 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
                 /// a full [`Node`] as input
                 fn unique_prefix_len_node(
                     &self,
                     idx: &impl RevlogIndex,
                     node: &Node,
                 ) -> Result<Option<usize>, NodeMapError> {
                     self.unique_prefix_len_bin(idx, node.into())
                 }
             }
             pub trait MutableNodeMap: NodeMap {
                 fn insert<I: RevlogIndex>(
                     &mut self,
                     index: &I,
                     node: &Node,
                     rev: Revision,
                 ) -> Result<(), NodeMapError>;
             }
             /// Low level NodeTree [`Block`] elements
             ///
             /// These are exactly as for instance on persistent storage.
             type RawElement = unaligned::I32Be;
             /// High level representation of values in NodeTree
             /// [`Blocks`](struct.Block.html)
             ///
             /// This is the high level representation that most algorithms should
             /// use.
             #[derive(Clone, Debug, Eq, PartialEq)]
             enum Element {
-                Rev(Revision),
+                // This is not a Mercurial revision. It's a `i32` because this is the
+                // right type for this structure.
+                Rev(i32),
                 Block(usize),
                 None,
             }
             impl From<RawElement> for Element {
                 /// Conversion from low level representation, after endianness conversion.
                 ///
                 /// See [`Block`](struct.Block.html) for explanation about the encoding.
                 fn from(raw: RawElement) -> Element {
                     let int = raw.get();
                     if int >= 0 {
                         Element::Block(int as usize)
                     } else if int == -1 {
                         Element::None
                     } else {
                         Element::Rev(-int - 2)
                     }
                 }
             }
             impl From<Element> for RawElement {
                 fn from(element: Element) -> RawElement {
                     RawElement::from(match element {
                         Element::None => 0,
                         Element::Block(i) => i as i32,
                         Element::Rev(rev) => -rev - 2,
                     })
                 }
             }
             const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
             /// A logical block of the [`NodeTree`], packed with a fixed size.
             ///
             /// These are always used in container types implementing `Index<Block>`,
             /// such as `&Block`
             ///
             /// As an array of integers, its ith element encodes that the
             /// ith potential edge from the block, representing the ith hexadecimal digit
             /// (nybble) `i` is either:
             ///
             /// - absent (value -1)
             /// - another `Block` in the same indexable container (value ≥ 0)
             ///  - a [`Revision`] leaf (value ≤ -2)
             ///
             /// Endianness has to be fixed for consistency on shared storage across
             /// different architectures.
             ///
             /// A key difference with the C `nodetree` is that we need to be
             /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
             /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
             ///
             /// Another related difference is that `NULL_REVISION` (-1) is not
             /// represented at all, because we want an immutable empty nodetree
             /// to be valid.
             #[derive(Copy, Clone, BytesCast, PartialEq)]
             #[repr(transparent)]
             pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
             impl Block {
                 fn new() -> Self {
                     let absent_node = RawElement::from(-1);
                     Block([absent_node; ELEMENTS_PER_BLOCK])
                 }
                 fn get(&self, nybble: u8) -> Element {
                     self.0[nybble as usize].into()
                 }
                 fn set(&mut self, nybble: u8, element: Element) {
                     self.0[nybble as usize] = element.into()
                 }
             }
             impl fmt::Debug for Block {
                 /// sparse representation for testing and debugging purposes
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     f.debug_map()
                         .entries((0..16).filter_map(|i| match self.get(i) {
                             Element::None => None,
                             element => Some((i, element)),
                         }))
                         .finish()
                 }
             }
             /// A mutable 16-radix tree with the root block logically at the end
             ///
             /// Because of the append only nature of our node trees, we need to
             /// keep the original untouched and store new blocks separately.
             ///
             /// The mutable root [`Block`] is kept apart so that we don't have to rebump
             /// it on each insertion.
             pub struct NodeTree {
                 readonly: Box<dyn Deref<Target = [Block]> + Send>,
                 growable: Vec<Block>,
                 root: Block,
                 masked_inner_blocks: usize,
             }
             impl Index<usize> for NodeTree {
                 type Output = Block;
                 fn index(&self, i: usize) -> &Block {
                     let ro_len = self.readonly.len();
                     if i < ro_len {
                         &self.readonly[i]
                     } else if i == ro_len + self.growable.len() {
                         &self.root
                     } else {
                         &self.growable[i - ro_len]
                     }
                 }
             }
             /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
             fn has_prefix_or_none(
                 idx: &impl RevlogIndex,
                 prefix: NodePrefix,
-                rev: Revision,
+                rev: UncheckedRevision,
             ) -> Result<Option<Revision>, NodeMapError> {
-                idx.node(rev)
+                match idx.check_revision(rev) {
-                    .ok_or(NodeMapError::RevisionNotInIndex(rev))
+                    Some(checked) => idx
-                    .map(|node| {
+                        .node(checked)
-                        if prefix.is_prefix_of(node) {
+                        .ok_or(NodeMapError::RevisionNotInIndex(rev))
-                            Some(rev)
+                        .map(|node| {
-                        } else {
+                            if prefix.is_prefix_of(node) {
-                            None
+                                Some(checked)
+                            } else {
-                    })
+                                None
+                            }
+                        }),
+                    None => Err(NodeMapError::RevisionNotInIndex(rev)),
+                }
             }
             /// validate that the candidate's node starts indeed with given prefix,
             /// and treat ambiguities related to [`NULL_REVISION`].
             ///
             /// From the data in the NodeTree, one can only conclude that some
             /// revision is the only one for a *subprefix* of the one being looked up.
             fn validate_candidate(
                 idx: &impl RevlogIndex,
                 prefix: NodePrefix,
-                candidate: (Option<Revision>, usize),
+                candidate: (Option<UncheckedRevision>, usize),
             ) -> Result<(Option<Revision>, usize), NodeMapError> {
                 let (rev, steps) = candidate;
                 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
                     rev.map_or(Ok((None, steps)), |r| {
                         has_prefix_or_none(idx, prefix, r)
                             .map(|opt| (opt, max(steps, nz_nybble + 1)))
                     })
                 } else {
                     // the prefix is only made of zeros; NULL_REVISION always matches it
                     // and any other *valid* result is an ambiguity
                     match rev {
                         None => Ok((Some(NULL_REVISION), steps + 1)),
                         Some(r) => match has_prefix_or_none(idx, prefix, r)? {
                             None => Ok((Some(NULL_REVISION), steps + 1)),
                             _ => Err(NodeMapError::MultipleResults),
                         },
                     }
                 }
             }
             impl NodeTree {
                 /// Initiate a NodeTree from an immutable slice-like of `Block`
                 ///
                 /// We keep `readonly` and clone its root block if it isn't empty.
                 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
                     let root = readonly.last().cloned().unwrap_or_else(Block::new);
                     NodeTree {
                         readonly,
                         growable: Vec::new(),
                         root,
                         masked_inner_blocks: 0,
                     }
                 }
                 /// Create from an opaque bunch of bytes
                 ///
                 /// The created [`NodeTreeBytes`] from `bytes`,
                 /// of which exactly `amount` bytes are used.
                 ///
                 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
                 /// - `amount` is expressed in bytes, and is not automatically derived from
                 ///   `bytes`, so that a caller that manages them atomically can perform
                 ///   temporary disk serializations and still rollback easily if needed.
                 ///   First use-case for this would be to support Mercurial shell hooks.
                 ///
                 /// panics if `buffer` is smaller than `amount`
                 pub fn load_bytes(
                     bytes: Box<dyn Deref<Target = [u8]> + Send>,
                     amount: usize,
                 ) -> Self {
                     NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
                 }
                 /// Retrieve added [`Block`]s and the original immutable data
                 pub fn into_readonly_and_added(
                     self,
                 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
                     let mut vec = self.growable;
                     let readonly = self.readonly;
                     if readonly.last() != Some(&self.root) {
                         vec.push(self.root);
                     }
                     (readonly, vec)
                 }
                 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
                 /// storage
                 pub fn into_readonly_and_added_bytes(
                     self,
                 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
                     let (readonly, vec) = self.into_readonly_and_added();
                     // Prevent running `v`'s destructor so we are in complete control
                     // of the allocation.
                     let vec = mem::ManuallyDrop::new(vec);
                     // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
                     // bytes, so this is perfectly safe.
                     let bytes = unsafe {
                         // Check for compatible allocation layout.
                         // (Optimized away by constant-folding + dead code elimination.)
                         assert_eq!(size_of::<Block>(), 64);
                         assert_eq!(align_of::<Block>(), 1);
                         // /!\ Any use of `vec` after this is use-after-free.
                         // TODO: use `into_raw_parts` once stabilized
                         Vec::from_raw_parts(
                             vec.as_ptr() as *mut u8,
                             vec.len() * size_of::<Block>(),
                             vec.capacity() * size_of::<Block>(),
                         )
                     };
                     (readonly, bytes)
                 }
                 /// Total number of blocks
                 fn len(&self) -> usize {
                     self.readonly.len() + self.growable.len() + 1
                 }
                 /// Implemented for completeness
                 ///
                 /// A `NodeTree` always has at least the mutable root block.
                 #[allow(dead_code)]
                 fn is_empty(&self) -> bool {
                     false
                 }
                 /// Main working method for `NodeTree` searches
                 ///
                 /// The first returned value is the result of analysing `NodeTree` data
                 /// *alone*: whereas `None` guarantees that the given prefix is absent
                 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
                 /// `Some(rev)`, it is to be understood that `rev` is the unique
                 /// [`Revision`] that could match the prefix. Actually, all that can
                 /// be inferred from
                 /// the `NodeTree` data is that `rev` is the revision with the longest
                 /// common node prefix with the given prefix.
+                /// We return an [`UncheckedRevision`] because we have no guarantee that
+                /// the revision we found is valid for the index.
                 ///
                 /// The second returned value is the size of the smallest subprefix
                 /// of `prefix` that would give the same result, i.e. not the
                 /// [MultipleResults](NodeMapError) error variant (again, using only the
                 /// data of the [`NodeTree`]).
                 fn lookup(
                     &self,
                     prefix: NodePrefix,
-                ) -> Result<(Option<Revision>, usize), NodeMapError> {
+                ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> {
                     for (i, visit_item) in self.visit(prefix).enumerate() {
                         if let Some(opt) = visit_item.final_revision() {
                             return Ok((opt, i + 1));
                         }
                     }
                     Err(NodeMapError::MultipleResults)
                 }
                 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
                     NodeTreeVisitor {
                         nt: self,
                         prefix,
                         visit: self.len() - 1,
                         nybble_idx: 0,
                         done: false,
                     }
                 }
                 /// Return a mutable reference for `Block` at index `idx`.
                 ///
                 /// If `idx` lies in the immutable area, then the reference is to
                 /// a newly appended copy.
                 ///
                 /// Returns (new_idx, glen, mut_ref) where
                 ///
                 /// - `new_idx` is the index of the mutable `Block`
                 /// - `mut_ref` is a mutable reference to the mutable Block.
                 /// - `glen` is the new length of `self.growable`
                 ///
                 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
                 /// itself because of the mutable borrow taken with the returned `Block`
                 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
                     let ro_blocks = &self.readonly;
                     let ro_len = ro_blocks.len();
                     let glen = self.growable.len();
                     if idx < ro_len {
                         self.masked_inner_blocks += 1;
                         self.growable.push(ro_blocks[idx]);
                         (glen + ro_len, &mut self.growable[glen], glen + 1)
                     } else if glen + ro_len == idx {
                         (idx, &mut self.root, glen)
                     } else {
                         (idx, &mut self.growable[idx - ro_len], glen)
                     }
                 }
                 /// Main insertion method
                 ///
                 /// This will dive in the node tree to find the deepest `Block` for
                 /// `node`, split it as much as needed and record `node` in there.
                 /// The method then backtracks, updating references in all the visited
                 /// blocks from the root.
                 ///
                 /// All the mutated `Block` are copied first to the growable part if
                 /// needed. That happens for those in the immutable part except the root.
                 pub fn insert<I: RevlogIndex>(
                     &mut self,
                     index: &I,
                     node: &Node,
                     rev: Revision,
                 ) -> Result<(), NodeMapError> {
                     let ro_len = &self.readonly.len();
                     let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
                     let read_nybbles = visit_steps.len();
                     // visit_steps cannot be empty, since we always visit the root block
                     let deepest = visit_steps.pop().unwrap();
                     let (mut block_idx, mut block, mut glen) =
                         self.mutable_block(deepest.block_idx);
                     if let Element::Rev(old_rev) = deepest.element {
-                        let old_node = index
+                        let old_node = index.node(old_rev).ok_or_else(|| {
-                            .node(old_rev)
+                            NodeMapError::RevisionNotInIndex(old_rev.into())
-                            .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
+                        })?;
                         if old_node == node {
                             return Ok(()); // avoid creating lots of useless blocks
                         }
                         // Looping over the tail of nybbles in both nodes, creating
                         // new blocks until we find the difference
                         let mut new_block_idx = ro_len + glen;
                         let mut nybble = deepest.nybble;
                         for nybble_pos in read_nybbles..node.nybbles_len() {
                             block.set(nybble, Element::Block(new_block_idx));
                             let new_nybble = node.get_nybble(nybble_pos);
                             let old_nybble = old_node.get_nybble(nybble_pos);
                             if old_nybble == new_nybble {
                                 self.growable.push(Block::new());
                                 block = &mut self.growable[glen];
                                 glen += 1;
                                 new_block_idx += 1;
                                 nybble = new_nybble;
                             } else {
                                 let mut new_block = Block::new();
                                 new_block.set(old_nybble, Element::Rev(old_rev));
                                 new_block.set(new_nybble, Element::Rev(rev));
                                 self.growable.push(new_block);
                                 break;
                             }
                         }
                     } else {
                         // Free slot in the deepest block: no splitting has to be done
                         block.set(deepest.nybble, Element::Rev(rev));
                     }
                     // Backtrack over visit steps to update references
                     while let Some(visited) = visit_steps.pop() {
                         let to_write = Element::Block(block_idx);
                         if visit_steps.is_empty() {
                             self.root.set(visited.nybble, to_write);
                             break;
                         }
                         let (new_idx, block, _) = self.mutable_block(visited.block_idx);
                         if block.get(visited.nybble) == to_write {
                             break;
                         }
                         block.set(visited.nybble, to_write);
                         block_idx = new_idx;
                     }
                     Ok(())
                 }
                 /// Make the whole `NodeTree` logically empty, without touching the
                 /// immutable part.
                 pub fn invalidate_all(&mut self) {
                     self.root = Block::new();
                     self.growable = Vec::new();
                     self.masked_inner_blocks = self.readonly.len();
                 }
                 /// Return the number of blocks in the readonly part that are currently
                 /// masked in the mutable part.
                 ///
                 /// The `NodeTree` structure has no efficient way to know how many blocks
                 /// are already unreachable in the readonly part.
                 ///
                 /// After a call to `invalidate_all()`, the returned number can be actually
                 /// bigger than the whole readonly part, a conventional way to mean that
                 /// all the readonly blocks have been masked. This is what is really
                 /// useful to the caller and does not require to know how many were
                 /// actually unreachable to begin with.
                 pub fn masked_readonly_blocks(&self) -> usize {
                     if let Some(readonly_root) = self.readonly.last() {
                         if readonly_root == &self.root {
                             return 0;
                         }
                     } else {
                         return 0;
                     }
                     self.masked_inner_blocks + 1
                 }
             }
             pub struct NodeTreeBytes {
                 buffer: Box<dyn Deref<Target = [u8]> + Send>,
                 len_in_blocks: usize,
             }
             impl NodeTreeBytes {
                 fn new(
                     buffer: Box<dyn Deref<Target = [u8]> + Send>,
                     amount: usize,
                 ) -> Self {
                     assert!(buffer.len() >= amount);
                     let len_in_blocks = amount / size_of::<Block>();
                     NodeTreeBytes {
                         buffer,
                         len_in_blocks,
                     }
                 }
             }
             impl Deref for NodeTreeBytes {
                 type Target = [Block];
                 fn deref(&self) -> &[Block] {
                     Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
                         // `NodeTreeBytes::new` already asserted that `self.buffer` is
                         // large enough.
                         .unwrap()
                         .0
                 }
             }
             struct NodeTreeVisitor<'n> {
                 nt: &'n NodeTree,
                 prefix: NodePrefix,
                 visit: usize,
                 nybble_idx: usize,
                 done: bool,
             }
             #[derive(Debug, PartialEq, Clone)]
             struct NodeTreeVisitItem {
                 block_idx: usize,
                 nybble: u8,
                 element: Element,
             }
             impl<'n> Iterator for NodeTreeVisitor<'n> {
                 type Item = NodeTreeVisitItem;
                 fn next(&mut self) -> Option<Self::Item> {
                     if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
                         return None;
                     }
                     let nybble = self.prefix.get_nybble(self.nybble_idx);
                     self.nybble_idx += 1;
                     let visit = self.visit;
                     let element = self.nt[visit].get(nybble);
                     if let Element::Block(idx) = element {
                         self.visit = idx;
                     } else {
                         self.done = true;
                     }
                     Some(NodeTreeVisitItem {
                         block_idx: visit,
                         nybble,
                         element,
                     })
                 }
             }
             impl NodeTreeVisitItem {
                 // Return `Some(opt)` if this item is final, with `opt` being the
-                // `Revision` that it may represent.
+                // `UncheckedRevision` that it may represent.
                 //
                 // If the item is not terminal, return `None`
-                fn final_revision(&self) -> Option<Option<Revision>> {
+                fn final_revision(&self) -> Option<Option<UncheckedRevision>> {
                     match self.element {
                         Element::Block(_) => None,
-                        Element::Rev(r) => Some(Some(r)),
+                        Element::Rev(r) => Some(Some(r.into())),
                         Element::None => Some(None),
                     }
                 }
             }
             impl From<Vec<Block>> for NodeTree {
                 fn from(vec: Vec<Block>) -> Self {
                     Self::new(Box::new(vec))
                 }
             }
             impl fmt::Debug for NodeTree {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     let readonly: &[Block] = &*self.readonly;
                     write!(
                         f,
                         "readonly: {:?}, growable: {:?}, root: {:?}",
                         readonly, self.growable, self.root
                     )
                 }
             }
             impl Default for NodeTree {
                 /// Create a fully mutable empty NodeTree
                 fn default() -> Self {
                     NodeTree::new(Box::new(Vec::new()))
                 }
             }
             impl NodeMap for NodeTree {
                 fn find_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     validate_candidate(idx, prefix, self.lookup(prefix)?)
                         .map(|(opt, _shortest)| opt)
                 }
                 fn unique_prefix_len_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefix,
                 ) -> Result<Option<usize>, NodeMapError> {
                     validate_candidate(idx, prefix, self.lookup(prefix)?)
                         .map(|(opt, shortest)| opt.map(|_rev| shortest))
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::NodeMapError::*;
                 use super::*;
                 use crate::revlog::node::{hex_pad_right, Node};
                 use std::collections::HashMap;
                 /// Creates a `Block` using a syntax close to the `Debug` output
                 macro_rules! block {
                     {$($nybble:tt : $variant:ident($val:tt)),*} => (
                         {
                             let mut block = Block::new();
                             $(block.set($nybble, Element::$variant($val)));*;
                             block
                         }
                     )
                 }
                 #[test]
                 fn test_block_debug() {
                     let mut block = Block::new();
                     block.set(1, Element::Rev(3));
                     block.set(10, Element::Block(0));
                     assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
                 }
                 #[test]
                 fn test_block_macro() {
                     let block = block! {5: Block(2)};
                     assert_eq!(format!("{:?}", block), "{5: Block(2)}");
                     let block = block! {13: Rev(15), 5: Block(2)};
                     assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
                 }
                 #[test]
                 fn test_raw_block() {
                     let mut raw = [255u8; 64];
                     let mut counter = 0;
                     for val in [0_i32, 15, -2, -1, -3].iter() {
                         for byte in val.to_be_bytes().iter() {
                             raw[counter] = *byte;
                             counter += 1;
                         }
                     }
                     let (block, _) = Block::from_bytes(&raw).unwrap();
                     assert_eq!(block.get(0), Element::Block(0));
                     assert_eq!(block.get(1), Element::Block(15));
                     assert_eq!(block.get(3), Element::None);
                     assert_eq!(block.get(2), Element::Rev(0));
                     assert_eq!(block.get(4), Element::Rev(1));
                 }
-                type TestIndex = HashMap<Revision, Node>;
+                type TestIndex = HashMap<UncheckedRevision, Node>;
                 impl RevlogIndex for TestIndex {
                     fn node(&self, rev: Revision) -> Option<&Node> {
-                        self.get(&rev)
+                        self.get(&rev.into())
                     }
                     fn len(&self) -> usize {
                         self.len()
                     }
+                    fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
+                        self.get(&rev).map(|_| rev.0)
+                    }
                 }
                 /// Pad hexadecimal Node prefix with zeros on the right
                 ///
                 /// This avoids having to repeatedly write very long hexadecimal
                 /// strings for test data, and brings actual hash size independency.
                 #[cfg(test)]
                 fn pad_node(hex: &str) -> Node {
                     Node::from_hex(&hex_pad_right(hex)).unwrap()
                 }
                 /// Pad hexadecimal Node prefix with zeros on the right, then insert
                 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
-                    idx.insert(rev, pad_node(hex));
+                    idx.insert(rev.into(), pad_node(hex));
                 }
                 fn sample_nodetree() -> NodeTree {
                     NodeTree::from(vec![
                         block![0: Rev(9)],
                         block![0: Rev(0), 1: Rev(9)],
                         block![0: Block(1), 1:Rev(1)],
                     ])
                 }
                 fn hex(s: &str) -> NodePrefix {
                     NodePrefix::from_hex(s).unwrap()
                 }
                 #[test]
                 fn test_nt_debug() {
                     let nt = sample_nodetree();
                     assert_eq!(
                         format!("{:?}", nt),
                         "readonly: \
                          [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
                          growable: [], \
                          root: {0: Block(1), 1: Rev(1)}",
                     );
                 }
                 #[test]
                 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
                     let mut idx: TestIndex = HashMap::new();
                     pad_insert(&mut idx, 1, "1234deadcafe");
                     let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
                     assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
                     assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
                     // and with full binary Nodes
-                    assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
+                    assert_eq!(nt.find_node(&idx, idx.get(&1.into()).unwrap())?, Some(1));
                     let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
                     assert_eq!(nt.find_node(&idx, &unknown)?, None);
                     Ok(())
                 }
                 #[test]
                 fn test_immutable_find_one_jump() {
                     let mut idx = TestIndex::new();
                     pad_insert(&mut idx, 9, "012");
                     pad_insert(&mut idx, 0, "00a");
                     let nt = sample_nodetree();
                     assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
                     assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
                     assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
                     assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
                     assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
                     assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
                 }
                 #[test]
                 fn test_mutated_find() -> Result<(), NodeMapError> {
                     let mut idx = TestIndex::new();
                     pad_insert(&mut idx, 9, "012");
                     pad_insert(&mut idx, 0, "00a");
                     pad_insert(&mut idx, 2, "cafe");
                     pad_insert(&mut idx, 3, "15");
                     pad_insert(&mut idx, 1, "10");
                     let nt = NodeTree {
                         readonly: sample_nodetree().readonly,
                         growable: vec![block![0: Rev(1), 5: Rev(3)]],
                         root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
                         masked_inner_blocks: 1,
                     };
                     assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
                     assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
                     assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
                     assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
                     assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
                     assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
                     assert_eq!(nt.masked_readonly_blocks(), 2);
                     Ok(())
                 }
                 struct TestNtIndex {
                     index: TestIndex,
                     nt: NodeTree,
                 }
                 impl TestNtIndex {
                     fn new() -> Self {
                         TestNtIndex {
                             index: HashMap::new(),
                             nt: NodeTree::default(),
                         }
                     }
-                    fn insert(
+                    fn insert(&mut self, rev: i32, hex: &str) -> Result<(), NodeMapError> {
-                        &mut self,
-                        rev: Revision,
-                        hex: &str,
-                    ) -> Result<(), NodeMapError> {
                         let node = pad_node(hex);
+                        let rev: UncheckedRevision = rev.into();
                         self.index.insert(rev, node);
-                        self.nt.insert(&self.index, &node, rev)?;
+                        self.nt.insert(
+                            &self.index,
+                            &node,
+                            self.index.check_revision(rev).unwrap(),
+                        )?;
                         Ok(())
                     }
                     fn find_hex(
                         &self,
                         prefix: &str,
                     ) -> Result<Option<Revision>, NodeMapError> {
                         self.nt.find_bin(&self.index, hex(prefix))
                     }
                     fn unique_prefix_len_hex(
                         &self,
                         prefix: &str,
                     ) -> Result<Option<usize>, NodeMapError> {
                         self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
                     }
                     /// Drain `added` and restart a new one
                     fn commit(self) -> Self {
                         let mut as_vec: Vec<Block> =
                             self.nt.readonly.iter().copied().collect();
                         as_vec.extend(self.nt.growable);
                         as_vec.push(self.nt.root);
                         Self {
                             index: self.index,
                             nt: NodeTree::from(as_vec),
                         }
                     }
                 }
                 #[test]
                 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     assert_eq!(idx.find_hex("1")?, Some(0));
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     // let's trigger a simple split
                     idx.insert(1, "1a34")?;
                     assert_eq!(idx.nt.growable.len(), 1);
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a")?, Some(1));
                     // reinserting is a no_op
                     idx.insert(1, "1a34")?;
                     assert_eq!(idx.nt.growable.len(), 1);
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a")?, Some(1));
                     idx.insert(2, "1a01")?;
                     assert_eq!(idx.nt.growable.len(), 2);
                     assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a3")?, Some(1));
                     assert_eq!(idx.find_hex("1a0")?, Some(2));
                     assert_eq!(idx.find_hex("1a12")?, None);
                     // now let's make it split and create more than one additional block
                     idx.insert(3, "1a345")?;
                     assert_eq!(idx.nt.growable.len(), 4);
                     assert_eq!(idx.find_hex("1a340")?, Some(1));
                     assert_eq!(idx.find_hex("1a345")?, Some(3));
                     assert_eq!(idx.find_hex("1a341")?, None);
                     // there's no readonly block to mask
                     assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                     Ok(())
                 }
                 #[test]
                 fn test_unique_prefix_len_zero_prefix() {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "00000abcd").unwrap();
                     assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
                     // in the nodetree proper, this will be found at the first nybble
                     // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
                     // but the first difference with `NULL_NODE`
                     assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                     assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                     // same with odd result
                     idx.insert(1, "00123").unwrap();
                     assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
                     assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
                     // these are unchanged of course
                     assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                     assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                 }
                 #[test]
                 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
                     // check that the splitting loop is long enough
                     let mut nt_idx = TestNtIndex::new();
                     let nt = &mut nt_idx.nt;
                     let idx = &mut nt_idx.index;
                     let node0_hex = hex_pad_right("444444");
                     let mut node1_hex = hex_pad_right("444444");
                     node1_hex.pop();
                     node1_hex.push('5');
                     let node0 = Node::from_hex(&node0_hex).unwrap();
                     let node1 = Node::from_hex(&node1_hex).unwrap();
-                    idx.insert(0, node0);
+                    idx.insert(0.into(), node0);
                     nt.insert(idx, &node0, 0)?;
-                    idx.insert(1, node1);
+                    idx.insert(1.into(), node1);
                     nt.insert(idx, &node1, 1)?;
                     assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
                     assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
                     Ok(())
                 }
                 #[test]
                 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     idx.insert(1, "1235")?;
                     idx.insert(2, "131")?;
                     idx.insert(3, "cafe")?;
                     let mut idx = idx.commit();
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     assert_eq!(idx.find_hex("1235")?, Some(1));
                     assert_eq!(idx.find_hex("131")?, Some(2));
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     // we did not add anything since init from readonly
                     assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                     idx.insert(4, "123A")?;
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     assert_eq!(idx.find_hex("1235")?, Some(1));
                     assert_eq!(idx.find_hex("131")?, Some(2));
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     assert_eq!(idx.find_hex("123A")?, Some(4));
                     // we masked blocks for all prefixes of "123", including the root
                     assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                     eprintln!("{:?}", idx.nt);
                     idx.insert(5, "c0")?;
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     assert_eq!(idx.find_hex("c0")?, Some(5));
                     assert_eq!(idx.find_hex("c1")?, None);
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     // inserting "c0" is just splitting the 'c' slot of the mutable root,
                     // it doesn't mask anything
                     assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                     Ok(())
                 }
                 #[test]
                 fn test_invalidate_all() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     idx.insert(1, "1235")?;
                     idx.insert(2, "131")?;
                     idx.insert(3, "cafe")?;
                     let mut idx = idx.commit();
                     idx.nt.invalidate_all();
                     assert_eq!(idx.find_hex("1234")?, None);
                     assert_eq!(idx.find_hex("1235")?, None);
                     assert_eq!(idx.find_hex("131")?, None);
                     assert_eq!(idx.find_hex("cafe")?, None);
                     // all the readonly blocks have been masked, this is the
                     // conventional expected response
                     assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
                     Ok(())
                 }
                 #[test]
                 fn test_into_added_empty() {
                     assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
                     assert!(sample_nodetree()
                         .into_readonly_and_added_bytes()
                         .1
                         .is_empty());
                 }
                 #[test]
                 fn test_into_added_bytes() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     let mut idx = idx.commit();
                     idx.insert(4, "cafe")?;
                     let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
                     // only the root block has been changed
                     assert_eq!(bytes.len(), size_of::<Block>());
                     // big endian for -2
                     assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
                     // big endian for -6
                     assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
                     Ok(())
                 }
             }

rust/hg-core/src/revset.rs

0 +1 -1

             //! The revset query language
             //!
             //! <https://www.mercurial-scm.org/repo/hg/help/revsets>
             use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::NodePrefix;
             use crate::revlog::{Revision, NULL_REVISION, WORKING_DIRECTORY_HEX};
             use crate::revlog::{Revlog, RevlogError};
             use crate::Node;
             /// Resolve a query string into a single revision.
             ///
             /// Only some of the revset language is implemented yet.
             pub fn resolve_single(
                 input: &str,
                 repo: &Repo,
             ) -> Result<Revision, RevlogError> {
                 let changelog = repo.changelog()?;
                 match input {
                     "." => {
                         let p1 = repo.dirstate_parents()?.p1;
                         return changelog.revlog.rev_from_node(p1.into());
                     }
                     "null" => return Ok(NULL_REVISION),
                     _ => {}
                 }
                 match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) {
                     Err(RevlogError::InvalidRevision) => {
                         // TODO: support for the rest of the language here.
                         let msg = format!("cannot parse revset '{}'", input);
                         Err(HgError::unsupported(msg).into())
                     }
                     result => result,
                 }
             }
             /// Resolve the small subset of the language suitable for revlogs other than
             /// the changelog, such as in `hg debugdata --manifest` CLI argument.
             ///
             /// * A non-negative decimal integer for a revision number, or
             /// * An hexadecimal string, for the unique node ID that starts with this
             ///   prefix
             pub fn resolve_rev_number_or_hex_prefix(
                 input: &str,
                 revlog: &Revlog,
             ) -> Result<Revision, RevlogError> {
                 // The Python equivalent of this is part of `revsymbol` in
                 // `mercurial/scmutil.py`
                 if let Ok(integer) = input.parse::<i32>() {
                     if integer.to_string() == input
                         && integer >= 0
-                        && revlog.has_rev(integer)
+                        && revlog.has_rev(integer.into())
                     {
                         return Ok(integer);
                     }
                 }
                 if let Ok(prefix) = NodePrefix::from_hex(input) {
                     if prefix.is_prefix_of(&Node::from_hex(WORKING_DIRECTORY_HEX).unwrap())
                     {
                         return Err(RevlogError::WDirUnsupported);
                     }
                     return revlog.rev_from_node(prefix);
                 }
                 Err(RevlogError::InvalidRevision)
             }

rust/hg-cpython/src/revlog.rs

0 +8 -5

             // revlog.rs
             //
             // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::{
                 cindex,
                 utils::{node_from_py_bytes, node_from_py_object},
             };
             use cpython::{
                 buffer::{Element, PyBuffer},
                 exc::{IndexError, ValueError},
                 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
                 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
             };
             use hg::{
                 nodemap::{Block, NodeMapError, NodeTree},
                 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
-                Revision,
+                Revision, UncheckedRevision,
             };
             use std::cell::RefCell;
             /// Return a Struct implementing the Graph trait
             pub(crate) fn pyindex_to_graph(
                 py: Python,
                 index: PyObject,
             ) -> PyResult<cindex::Index> {
                 match index.extract::<MixedIndex>(py) {
                     Ok(midx) => Ok(midx.clone_cindex(py)),
                     Err(_) => cindex::Index::new(py, index),
                 }
             }
             py_class!(pub class MixedIndex |py| {
                 data cindex: RefCell<cindex::Index>;
                 data nt: RefCell<Option<NodeTree>>;
                 data docket: RefCell<Option<PyObject>>;
                 // Holds a reference to the mmap'ed persistent nodemap data
                 data mmap: RefCell<Option<PyBuffer>>;
                 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
                     Self::new(py, cindex)
                 }
                 /// Compatibility layer used for Python consumers needing access to the C index
                 ///
                 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
                 /// that may need to build a custom `nodetree`, based on a specified revset.
                 /// With a Rust implementation of the nodemap, we will be able to get rid of
                 /// this, by exposing our own standalone nodemap class,
                 /// ready to accept `MixedIndex`.
                 def get_cindex(&self) -> PyResult<PyObject> {
                     Ok(self.cindex(py).borrow().inner().clone_ref(py))
                 }
                 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
                 /// Return Revision if found, raises a bare `error.RevlogError`
                 /// in case of ambiguity, same as C version does
                 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
                     let opt = self.get_nodetree(py)?.borrow();
                     let nt = opt.as_ref().unwrap();
                     let idx = &*self.cindex(py).borrow();
                     let node = node_from_py_bytes(py, &node)?;
                     nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
                 }
                 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
                 /// is not found.
                 ///
                 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
                 /// will catch and rewrap with it
                 def rev(&self, node: PyBytes) -> PyResult<Revision> {
                     self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
                 }
                 /// return True if the node exist in the index
                 def has_node(&self, node: PyBytes) -> PyResult<bool> {
                     self.get_rev(py, node).map(|opt| opt.is_some())
                 }
                 /// find length of shortest hex nodeid of a binary ID
                 def shortest(&self, node: PyBytes) -> PyResult<usize> {
                     let opt = self.get_nodetree(py)?.borrow();
                     let nt = opt.as_ref().unwrap();
                     let idx = &*self.cindex(py).borrow();
                     match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
                     {
                         Ok(Some(l)) => Ok(l),
                         Ok(None) => Err(revlog_error(py)),
                         Err(e) => Err(nodemap_error(py, e)),
                     }
                 }
                 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
                     let opt = self.get_nodetree(py)?.borrow();
                     let nt = opt.as_ref().unwrap();
                     let idx = &*self.cindex(py).borrow();
                     let node_as_string = if cfg!(feature = "python3-sys") {
                         node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
                     }
                     else {
                         let node = node.extract::<PyBytes>(py)?;
                         String::from_utf8_lossy(node.data(py)).to_string()
                     };
                     let prefix = NodePrefix::from_hex(&node_as_string)
                         .map_err(|_| PyErr::new::<ValueError, _>(
                             py, format!("Invalid node or prefix '{}'", node_as_string))
                         )?;
                     nt.find_bin(idx, prefix)
                         // TODO make an inner API returning the node directly
                         .map(|opt| opt.map(
                             |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
                         .map_err(|e| nodemap_error(py, e))
                 }
                 /// append an index entry
                 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
                     if tup.len(py) < 8 {
                         // this is better than the panic promised by tup.get_item()
                         return Err(
                             PyErr::new::<IndexError, _>(py, "tuple index out of range"))
                     }
                     let node_bytes = tup.get_item(py, 7).extract(py)?;
                     let node = node_from_py_object(py, &node_bytes)?;
                     let mut idx = self.cindex(py).borrow_mut();
                     let rev = idx.len() as Revision;
                     idx.append(py, tup)?;
                     self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
                         .insert(&*idx, &node, rev)
                         .map_err(|e| nodemap_error(py, e))?;
                     Ok(py.None())
                 }
                 def __delitem__(&self, key: PyObject) -> PyResult<()> {
                     // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
                     self.cindex(py).borrow().inner().del_item(py, key)?;
                     let mut opt = self.get_nodetree(py)?.borrow_mut();
                     let nt = opt.as_mut().unwrap();
                     nt.invalidate_all();
                     self.fill_nodemap(py, nt)?;
                     Ok(())
                 }
                 //
                 // Reforwarded C index API
                 //
                 // index_methods (tp_methods). Same ordering as in revlog.c
                 /// return the gca set of the given revs
                 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "ancestors", args, kw)
                 }
                 /// return the heads of the common ancestors of the given revs
                 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "commonancestorsheads", args, kw)
                 }
                 /// Clear the index caches and inner py_class data.
                 /// It is Python's responsibility to call `update_nodemap_data` again.
                 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
                     self.nt(py).borrow_mut().take();
                     self.docket(py).borrow_mut().take();
                     self.mmap(py).borrow_mut().take();
                     self.call_cindex(py, "clearcaches", args, kw)
                 }
                 /// return the raw binary string representing a revision
                 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "entry_binary", args, kw)
                 }
                 /// return a binary packed version of the header
                 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "pack_header", args, kw)
                 }
                 /// get an index entry
                 def get(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "get", args, kw)
                 }
                 /// compute phases
                 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "computephasesmapsets", args, kw)
                 }
                 /// reachableroots
                 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "reachableroots2", args, kw)
                 }
                 /// get head revisions
                 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "headrevs", args, kw)
                 }
                 /// get filtered head revisions
                 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "headrevsfiltered", args, kw)
                 }
                 /// True if the object is a snapshot
                 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "issnapshot", args, kw)
                 }
                 /// Gather snapshot data in a cache dict
                 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "findsnapshots", args, kw)
                 }
                 /// determine revisions with deltas to reconstruct fulltext
                 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "deltachain", args, kw)
                 }
                 /// slice planned chunk read to reach a density threshold
                 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "slicechunktodensity", args, kw)
                 }
                 /// stats for the index
                 def stats(&self, *args, **kw) -> PyResult<PyObject> {
                     self.call_cindex(py, "stats", args, kw)
                 }
                 // index_sequence_methods and index_mapping_methods.
                 //
                 // Since we call back through the high level Python API,
                 // there's no point making a distinction between index_get
                 // and index_getitem.
                 def __len__(&self) -> PyResult<usize> {
                     self.cindex(py).borrow().inner().len(py)
                 }
                 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
                     // this conversion seems needless, but that's actually because
                     // `index_getitem` does not handle conversion from PyLong,
                     // which expressions such as [e for e in index] internally use.
                     // Note that we don't seem to have a direct way to call
                     // PySequence_GetItem (does the job), which would possibly be better
                     // for performance
-                    let key = match key.extract::<Revision>(py) {
+                    let key = match key.extract::<i32>(py) {
                         Ok(rev) => rev.to_py_object(py).into_object(),
                         Err(_) => key,
                     };
                     self.cindex(py).borrow().inner().get_item(py, key)
                 }
                 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
                     self.cindex(py).borrow().inner().set_item(py, key, value)
                 }
                 def __contains__(&self, item: PyObject) -> PyResult<bool> {
                     // ObjectProtocol does not seem to provide contains(), so
                     // this is an equivalent implementation of the index_contains()
                     // defined in revlog.c
                     let cindex = self.cindex(py).borrow();
-                    match item.extract::<Revision>(py) {
+                    match item.extract::<i32>(py) {
                         Ok(rev) => {
                             Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
                         }
                         Err(_) => {
                             cindex.inner().call_method(
                                 py,
                                 "has_node",
                                 PyTuple::new(py, &[item]),
                                 None)?
                             .extract(py)
                         }
                     }
                 }
                 def nodemap_data_all(&self) -> PyResult<PyBytes> {
                     self.inner_nodemap_data_all(py)
                 }
                 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
                     self.inner_nodemap_data_incremental(py)
                 }
                 def update_nodemap_data(
                     &self,
                     docket: PyObject,
                     nm_data: PyObject
                 ) -> PyResult<PyObject> {
                     self.inner_update_nodemap_data(py, docket, nm_data)
                 }
                 @property
                 def entry_size(&self) -> PyResult<PyInt> {
                     self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
                 }
                 @property
                 def rust_ext_compat(&self) -> PyResult<PyInt> {
                     self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
                 }
             });
             impl MixedIndex {
                 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
                     Self::create_instance(
                         py,
                         RefCell::new(cindex::Index::new(py, cindex)?),
                         RefCell::new(None),
                         RefCell::new(None),
                         RefCell::new(None),
                     )
                 }
                 /// This is scaffolding at this point, but it could also become
                 /// a way to start a persistent nodemap or perform a
                 /// vacuum / repack operation
                 fn fill_nodemap(
                     &self,
                     py: Python,
                     nt: &mut NodeTree,
                 ) -> PyResult<PyObject> {
                     let index = self.cindex(py).borrow();
                     for r in 0..index.len() {
                         let rev = r as Revision;
                         // in this case node() won't ever return None
                         nt.insert(&*index, index.node(rev).unwrap(), rev)
                             .map_err(|e| nodemap_error(py, e))?
                     }
                     Ok(py.None())
                 }
                 fn get_nodetree<'a>(
                     &'a self,
                     py: Python<'a>,
                 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
                     if self.nt(py).borrow().is_none() {
                         let readonly = Box::new(Vec::new());
                         let mut nt = NodeTree::load_bytes(readonly, 0);
                         self.fill_nodemap(py, &mut nt)?;
                         self.nt(py).borrow_mut().replace(nt);
                     }
                     Ok(self.nt(py))
                 }
                 /// forward a method call to the underlying C index
                 fn call_cindex(
                     &self,
                     py: Python,
                     name: &str,
                     args: &PyTuple,
                     kwargs: Option<&PyDict>,
                 ) -> PyResult<PyObject> {
                     self.cindex(py)
                         .borrow()
                         .inner()
                         .call_method(py, name, args, kwargs)
                 }
                 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
                     self.cindex(py).borrow().clone_ref(py)
                 }
                 /// Returns the full nodemap bytes to be written as-is to disk
                 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
                     let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
                     let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
                     // If there's anything readonly, we need to build the data again from
                     // scratch
                     let bytes = if readonly.len() > 0 {
                         let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
                         self.fill_nodemap(py, &mut nt)?;
                         let (readonly, bytes) = nt.into_readonly_and_added_bytes();
                         assert_eq!(readonly.len(), 0);
                         bytes
                     } else {
                         bytes
                     };
                     let bytes = PyBytes::new(py, &bytes);
                     Ok(bytes)
                 }
                 /// Returns the last saved docket along with the size of any changed data
                 /// (in number of blocks), and said data as bytes.
                 fn inner_nodemap_data_incremental(
                     &self,
                     py: Python,
                 ) -> PyResult<PyObject> {
                     let docket = self.docket(py).borrow();
                     let docket = match docket.as_ref() {
                         Some(d) => d,
                         None => return Ok(py.None()),
                     };
                     let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
                     let masked_blocks = node_tree.masked_readonly_blocks();
                     let (_, data) = node_tree.into_readonly_and_added_bytes();
                     let changed = masked_blocks * std::mem::size_of::<Block>();
                     Ok((docket, changed, PyBytes::new(py, &data))
                         .to_py_object(py)
                         .into_object())
                 }
                 /// Update the nodemap from the new (mmaped) data.
                 /// The docket is kept as a reference for later incremental calls.
                 fn inner_update_nodemap_data(
                     &self,
                     py: Python,
                     docket: PyObject,
                     nm_data: PyObject,
                 ) -> PyResult<PyObject> {
                     let buf = PyBuffer::get(py, &nm_data)?;
                     let len = buf.item_count();
                     // Build a slice from the mmap'ed buffer data
                     let cbuf = buf.buf_ptr();
                     let bytes = if std::mem::size_of::<u8>() == buf.item_size()
                         && buf.is_c_contiguous()
                         && u8::is_compatible_format(buf.format())
                     {
                         unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
                     } else {
                         return Err(PyErr::new::<ValueError, _>(
                             py,
                             "Nodemap data buffer has an invalid memory representation"
                                 .to_string(),
                         ));
                     };
                     // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
                     // pointer.
                     self.mmap(py).borrow_mut().replace(buf);
                     let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
                     let data_tip =
-                        docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
+                        docket.getattr(py, "tip_rev")?.extract::<i32>(py)?.into();
                     self.docket(py).borrow_mut().replace(docket.clone_ref(py));
                     let idx = self.cindex(py).borrow();
+                    let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
+                        nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
+                    })?;
                     let current_tip = idx.len();
                     for r in (data_tip + 1)..current_tip as Revision {
                         let rev = r as Revision;
                         // in this case node() won't ever return None
                         nt.insert(&*idx, idx.node(rev).unwrap(), rev)
                             .map_err(|e| nodemap_error(py, e))?
                     }
                     *self.nt(py).borrow_mut() = Some(nt);
                     Ok(py.None())
                 }
             }
             fn revlog_error(py: Python) -> PyErr {
                 match py
                     .import("mercurial.error")
                     .and_then(|m| m.get(py, "RevlogError"))
                 {
                     Err(e) => e,
                     Ok(cls) => PyErr::from_instance(
                         py,
                         cls.call(py, (py.None(),), None).ok().into_py_object(py),
                     ),
                 }
             }
-            fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
+            fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
                 PyErr::new::<ValueError, _>(
                     py,
                     format!(
                         "Inconsistency: Revision {} found in nodemap \
                          is not in revlog index",
                         rev
                     ),
                 )
             }
             /// Standard treatment of NodeMapError
             fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
                 match err {
                     NodeMapError::MultipleResults => revlog_error(py),
                     NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
                 }
             }
             /// Create the module, with __package__ given from parent
             pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
                 let dotted_name = &format!("{}.revlog", package);
                 let m = PyModule::new(py, dotted_name)?;
                 m.add(py, "__package__", package)?;
                 m.add(py, "__doc__", "RevLog - Rust implementations")?;
                 m.add_class::<MixedIndex>(py)?;
                 let sys = PyModule::import(py, "sys")?;
                 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
                 sys_modules.set_item(py, dotted_name, &m)?;
                 Ok(m)
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages