upstream/mercurial-mirror Commit - r49981:a5ef50be

rust-revlog: make `Changelog` and `ManifestLog` unaware of `Repo`...

Martin von Zweigbergk -

r49981:a5ef50be default

parent child

rust/hg-core/src/repo.rs

0 +16 -2

              use crate::changelog::Changelog;
              use crate::config::{Config, ConfigError, ConfigParseError};
              use crate::dirstate::DirstateParents;
              use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
              use crate::dirstate_tree::owning::OwningDirstateMap;
              use crate::errors::HgResultExt;
              use crate::errors::{HgError, IoResultExt};
              use crate::lock::{try_with_lock_no_wait, LockError};
              use crate::manifest::{Manifest, Manifestlog};
              use crate::revlog::filelog::Filelog;
              use crate::revlog::revlog::RevlogError;
              use crate::utils::files::get_path_from_bytes;
              use crate::utils::hg_path::HgPath;
              use crate::utils::SliceExt;
              use crate::vfs::{is_dir, is_file, Vfs};
              use crate::{requirements, NodePrefix};
              use crate::{DirstateError, Revision};
              use std::cell::{Ref, RefCell, RefMut};
              use std::collections::HashSet;
              use std::io::Seek;
              use std::io::SeekFrom;
              use std::io::Write as IoWrite;
              use std::path::{Path, PathBuf};
              /// A repository on disk
              pub struct Repo {
                  working_directory: PathBuf,
                  dot_hg: PathBuf,
                  store: PathBuf,
                  requirements: HashSet<String>,
                  config: Config,
                  dirstate_parents: LazyCell<DirstateParents, HgError>,
                  dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>, HgError>,
                  dirstate_map: LazyCell<OwningDirstateMap, DirstateError>,
                  changelog: LazyCell<Changelog, HgError>,
                  manifestlog: LazyCell<Manifestlog, HgError>,
              }
              #[derive(Debug, derive_more::From)]
              pub enum RepoError {
                  NotFound {
                      at: PathBuf,
                  },
                  #[from]
                  ConfigParseError(ConfigParseError),
                  #[from]
                  Other(HgError),
              }
              impl From<ConfigError> for RepoError {
                  fn from(error: ConfigError) -> Self {
                      match error {
                          ConfigError::Parse(error) => error.into(),
                          ConfigError::Other(error) => error.into(),
                      }
                  }
              }
              impl Repo {
                  /// tries to find nearest repository root in current working directory or
                  /// its ancestors
                  pub fn find_repo_root() -> Result<PathBuf, RepoError> {
                      let current_directory = crate::utils::current_dir()?;
                      // ancestors() is inclusive: it first yields `current_directory`
                      // as-is.
                      for ancestor in current_directory.ancestors() {
                          if is_dir(ancestor.join(".hg"))? {
                              return Ok(ancestor.to_path_buf());
                          }
                      }
                      return Err(RepoError::NotFound {
                          at: current_directory,
                      });
                  }
                  /// Find a repository, either at the given path (which must contain a `.hg`
                  /// sub-directory) or by searching the current directory and its
                  /// ancestors.
                  ///
                  /// A method with two very different "modes" like this usually a code smell
                  /// to make two methods instead, but in this case an `Option` is what rhg
                  /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
                  /// Having two methods would just move that `if` to almost all callers.
                  pub fn find(
                      config: &Config,
                      explicit_path: Option<PathBuf>,
                  ) -> Result<Self, RepoError> {
                      if let Some(root) = explicit_path {
                          if is_dir(root.join(".hg"))? {
                              Self::new_at_path(root.to_owned(), config)
                          } else if is_file(&root)? {
                              Err(HgError::unsupported("bundle repository").into())
                          } else {
                              Err(RepoError::NotFound {
                                  at: root.to_owned(),
                              })
                          }
                      } else {
                          let root = Self::find_repo_root()?;
                          Self::new_at_path(root, config)
                      }
                  }
                  /// To be called after checking that `.hg` is a sub-directory
                  fn new_at_path(
                      working_directory: PathBuf,
                      config: &Config,
                  ) -> Result<Self, RepoError> {
                      let dot_hg = working_directory.join(".hg");
                      let mut repo_config_files = Vec::new();
                      repo_config_files.push(dot_hg.join("hgrc"));
                      repo_config_files.push(dot_hg.join("hgrc-not-shared"));
                      let hg_vfs = Vfs { base: &dot_hg };
                      let mut reqs = requirements::load_if_exists(hg_vfs)?;
                      let relative =
                          reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
                      let shared =
                          reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
                      // From `mercurial/localrepo.py`:
                      //
                      // if .hg/requires contains the sharesafe requirement, it means
                      // there exists a `.hg/store/requires` too and we should read it
                      // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
                      // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
                      // is not present, refer checkrequirementscompat() for that
                      //
                      // However, if SHARESAFE_REQUIREMENT is not present, it means that the
                      // repository was shared the old way. We check the share source
                      // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
                      // current repository needs to be reshared
                      let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
                      let store_path;
                      if !shared {
                          store_path = dot_hg.join("store");
                      } else {
                          let bytes = hg_vfs.read("sharedpath")?;
                          let mut shared_path =
                              get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
                                  .to_owned();
                          if relative {
                              shared_path = dot_hg.join(shared_path)
                          }
                          if !is_dir(&shared_path)? {
                              return Err(HgError::corrupted(format!(
                                  ".hg/sharedpath points to nonexistent directory {}",
                                  shared_path.display()
                              ))
                              .into());
                          }
                          store_path = shared_path.join("store");
                          let source_is_share_safe =
                              requirements::load(Vfs { base: &shared_path })?
                                  .contains(requirements::SHARESAFE_REQUIREMENT);
                          if share_safe != source_is_share_safe {
                              return Err(HgError::unsupported("share-safe mismatch").into());
                          }
                          if share_safe {
                              repo_config_files.insert(0, shared_path.join("hgrc"))
                          }
                      }
                      if share_safe {
                          reqs.extend(requirements::load(Vfs { base: &store_path })?);
                      }
                      let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
                          config.combine_with_repo(&repo_config_files)?
                      } else {
                          config.clone()
                      };
                      let repo = Self {
                          requirements: reqs,
                          working_directory,
                          store: store_path,
                          dot_hg,
                          config: repo_config,
                          dirstate_parents: LazyCell::new(Self::read_dirstate_parents),
                          dirstate_data_file_uuid: LazyCell::new(
                              Self::read_dirstate_data_file_uuid,
                          ),
                          dirstate_map: LazyCell::new(Self::new_dirstate_map),
-                         changelog: LazyCell::new(Changelog::open),
-                         manifestlog: LazyCell::new(Manifestlog::open),
+                         changelog: LazyCell::new(Self::new_changelog),
+                         manifestlog: LazyCell::new(Self::new_manifestlog),
                      };
                      requirements::check(&repo)?;
                      Ok(repo)
                  }
                  pub fn working_directory_path(&self) -> &Path {
                      &self.working_directory
                  }
                  pub fn requirements(&self) -> &HashSet<String> {
                      &self.requirements
                  }
                  pub fn config(&self) -> &Config {
                      &self.config
                  }
                  /// For accessing repository files (in `.hg`), except for the store
                  /// (`.hg/store`).
                  pub fn hg_vfs(&self) -> Vfs<'_> {
                      Vfs { base: &self.dot_hg }
                  }
                  /// For accessing repository store files (in `.hg/store`)
                  pub fn store_vfs(&self) -> Vfs<'_> {
                      Vfs { base: &self.store }
                  }
                  /// For accessing the working copy
                  pub fn working_directory_vfs(&self) -> Vfs<'_> {
                      Vfs {
                          base: &self.working_directory,
                      }
                  }
                  pub fn try_with_wlock_no_wait<R>(
                      &self,
                      f: impl FnOnce() -> R,
                  ) -> Result<R, LockError> {
                      try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
                  }
                  pub fn has_dirstate_v2(&self) -> bool {
                      self.requirements
                          .contains(requirements::DIRSTATE_V2_REQUIREMENT)
                  }
                  pub fn has_sparse(&self) -> bool {
                      self.requirements.contains(requirements::SPARSE_REQUIREMENT)
                  }
                  pub fn has_narrow(&self) -> bool {
                      self.requirements.contains(requirements::NARROW_REQUIREMENT)
                  }
                  fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
                      Ok(self
                          .hg_vfs()
                          .read("dirstate")
                          .io_not_found_as_none()?
                          .unwrap_or(Vec::new()))
                  }
                  pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
                      Ok(*self.dirstate_parents.get_or_init(self)?)
                  }
                  fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
                      let dirstate = self.dirstate_file_contents()?;
                      let parents = if dirstate.is_empty() {
                          if self.has_dirstate_v2() {
                              self.dirstate_data_file_uuid.set(None);
                          }
                          DirstateParents::NULL
                      } else if self.has_dirstate_v2() {
                          let docket =
                              crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
                          self.dirstate_data_file_uuid
                              .set(Some(docket.uuid.to_owned()));
                          docket.parents()
                      } else {
                          crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
                              .clone()
                      };
                      self.dirstate_parents.set(parents);
                      Ok(parents)
                  }
                  fn read_dirstate_data_file_uuid(
                      &self,
                  ) -> Result<Option<Vec<u8>>, HgError> {
                      assert!(
                          self.has_dirstate_v2(),
                          "accessing dirstate data file ID without dirstate-v2"
                      );
                      let dirstate = self.dirstate_file_contents()?;
                      if dirstate.is_empty() {
                          self.dirstate_parents.set(DirstateParents::NULL);
                          Ok(None)
                      } else {
                          let docket =
                              crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
                          self.dirstate_parents.set(docket.parents());
                          Ok(Some(docket.uuid.to_owned()))
                      }
                  }
                  fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
                      let dirstate_file_contents = self.dirstate_file_contents()?;
                      if dirstate_file_contents.is_empty() {
                          self.dirstate_parents.set(DirstateParents::NULL);
                          if self.has_dirstate_v2() {
                              self.dirstate_data_file_uuid.set(None);
                          }
                          Ok(OwningDirstateMap::new_empty(Vec::new()))
                      } else if self.has_dirstate_v2() {
                          let docket = crate::dirstate_tree::on_disk::read_docket(
                              &dirstate_file_contents,
                          )?;
                          self.dirstate_parents.set(docket.parents());
                          self.dirstate_data_file_uuid
                              .set(Some(docket.uuid.to_owned()));
                          let data_size = docket.data_size();
                          let metadata = docket.tree_metadata();
                          if let Some(data_mmap) = self
                              .hg_vfs()
                              .mmap_open(docket.data_filename())
                              .io_not_found_as_none()?
                          {
                              OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
                          } else {
                              OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
                          }
                      } else {
                          let (map, parents) =
                              OwningDirstateMap::new_v1(dirstate_file_contents)?;
                          self.dirstate_parents.set(parents);
                          Ok(map)
                      }
                  }
                  pub fn dirstate_map(
                      &self,
                  ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
                      self.dirstate_map.get_or_init(self)
                  }
                  pub fn dirstate_map_mut(
                      &self,
                  ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
                      self.dirstate_map.get_mut_or_init(self)
                  }
+                 fn new_changelog(&self) -> Result<Changelog, HgError> {
+                     let use_nodemap = self
+                         .requirements
+                         .contains(requirements::NODEMAP_REQUIREMENT);
+                     Changelog::open(&self.store_vfs(), use_nodemap)
+                 }
                  pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
                      self.changelog.get_or_init(self)
                  }
                  pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
                      self.changelog.get_mut_or_init(self)
                  }
+                 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
+                     let use_nodemap = self
+                         .requirements
+                         .contains(requirements::NODEMAP_REQUIREMENT);
+                     Manifestlog::open(&self.store_vfs(), use_nodemap)
+                 }
                  pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
                      self.manifestlog.get_or_init(self)
                  }
                  pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
                      self.manifestlog.get_mut_or_init(self)
                  }
                  /// Returns the manifest of the *changeset* with the given node ID
                  pub fn manifest_for_node(
                      &self,
                      node: impl Into<NodePrefix>,
                  ) -> Result<Manifest, RevlogError> {
                      self.manifestlog()?.data_for_node(
                          self.changelog()?
                              .data_for_node(node.into())?
                              .manifest_node()?
                              .into(),
                      )
                  }
                  /// Returns the manifest of the *changeset* with the given revision number
                  pub fn manifest_for_rev(
                      &self,
                      revision: Revision,
                  ) -> Result<Manifest, RevlogError> {
                      self.manifestlog()?.data_for_node(
                          self.changelog()?
                              .data_for_rev(revision)?
                              .manifest_node()?
                              .into(),
                      )
                  }
                  pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
                      if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
                          Ok(entry.state().is_tracked())
                      } else {
                          Ok(false)
                      }
                  }
                  pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
                      Filelog::open(self, path)
                  }
                  /// Write to disk any updates that were made through `dirstate_map_mut`.
                  ///
                  /// The "wlock" must be held while calling this.
                  /// See for example `try_with_wlock_no_wait`.
                  ///
                  /// TODO: have a `WritableRepo` type only accessible while holding the
                  /// lock?
                  pub fn write_dirstate(&self) -> Result<(), DirstateError> {
                      let map = self.dirstate_map()?;
                      // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
                      // it’s unset
                      let parents = self.dirstate_parents()?;
                      let packed_dirstate = if self.has_dirstate_v2() {
                          let uuid = self.dirstate_data_file_uuid.get_or_init(self)?;
                          let mut uuid = uuid.as_ref();
                          let can_append = uuid.is_some();
                          let (data, tree_metadata, append) = map.pack_v2(can_append)?;
                          if !append {
                              uuid = None
                          }
                          let uuid = if let Some(uuid) = uuid {
                              std::str::from_utf8(uuid)
                                  .map_err(|_| {
                                      HgError::corrupted("non-UTF-8 dirstate data file ID")
                                  })?
                                  .to_owned()
                          } else {
                              DirstateDocket::new_uid()
                          };
                          let data_filename = format!("dirstate.{}", uuid);
                          let data_filename = self.hg_vfs().join(data_filename);
                          let mut options = std::fs::OpenOptions::new();
                          if append {
                              options.append(true);
                          } else {
                              options.write(true).create_new(true);
                          }
                          let data_size = (|| {
                              // TODO: loop and try another random ID if !append and this
                              // returns `ErrorKind::AlreadyExists`? Collision chance of two
                              // random IDs is one in 2**32
                              let mut file = options.open(&data_filename)?;
                              file.write_all(&data)?;
                              file.flush()?;
                              // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
                              file.seek(SeekFrom::Current(0))
                          })()
                          .when_writing_file(&data_filename)?;
                          DirstateDocket::serialize(
                              parents,
                              tree_metadata,
                              data_size,
                              uuid.as_bytes(),
                          )
                          .map_err(|_: std::num::TryFromIntError| {
                              HgError::corrupted("overflow in dirstate docket serialization")
                          })?
                      } else {
                          map.pack_v1(parents)?
                      };
                      self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?;
                      Ok(())
                  }
              }
              /// Lazily-initialized component of `Repo` with interior mutability
              ///
              /// This differs from `OnceCell` in that the value can still be "deinitialized"
              /// later by setting its inner `Option` to `None`.
              struct LazyCell<T, E> {
                  value: RefCell<Option<T>>,
                  // `Fn`s that don’t capture environment are zero-size, so this box does
                  // not allocate:
                  init: Box<dyn Fn(&Repo) -> Result<T, E>>,
              }
              impl<T, E> LazyCell<T, E> {
                  fn new(init: impl Fn(&Repo) -> Result<T, E> + 'static) -> Self {
                      Self {
                          value: RefCell::new(None),
                          init: Box::new(init),
                      }
                  }
                  fn set(&self, value: T) {
                      *self.value.borrow_mut() = Some(value)
                  }
                  fn get_or_init(&self, repo: &Repo) -> Result<Ref<T>, E> {
                      let mut borrowed = self.value.borrow();
                      if borrowed.is_none() {
                          drop(borrowed);
                          // Only use `borrow_mut` if it is really needed to avoid panic in
                          // case there is another outstanding borrow but mutation is not
                          // needed.
                          *self.value.borrow_mut() = Some((self.init)(repo)?);
                          borrowed = self.value.borrow()
                      }
                      Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
                  }
                  fn get_mut_or_init(&self, repo: &Repo) -> Result<RefMut<T>, E> {
                      let mut borrowed = self.value.borrow_mut();
                      if borrowed.is_none() {
                          *borrowed = Some((self.init)(repo)?);
                      }
                      Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
                  }
              }

rust/hg-core/src/revlog/changelog.rs

0 +4 -12

              use crate::errors::HgError;
-             use crate::repo::Repo;
-             use crate::requirements;
              use crate::revlog::revlog::{Revlog, RevlogEntry, RevlogError};
              use crate::revlog::Revision;
              use crate::revlog::{Node, NodePrefix};
              use crate::utils::hg_path::HgPath;
+             use crate::vfs::Vfs;
              use itertools::Itertools;
              use std::ascii::escape_default;
              use std::fmt::{Debug, Formatter};
              /// A specialized `Revlog` to work with `changelog` data format.
              pub struct Changelog {
                  /// The generic `revlog` format.
                  pub(crate) revlog: Revlog,
              }
              impl Changelog {
                  /// Open the `changelog` of a repository given by its root.
-                 pub fn open(repo: &Repo) -> Result<Self, HgError> {
-                     let use_nodemap = repo
-                         .requirements()
-                         .contains(requirements::NODEMAP_REQUIREMENT);
-                     let revlog = Revlog::open(
-                         &repo.store_vfs(),
-                         "00changelog.i",
-                         None,
-                         use_nodemap,
-                     )?;
+                 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
+                     let revlog =
+                         Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
                      Ok(Self { revlog })
                  }
                  /// Return the `ChangelogEntry` for the given node ID.
                  pub fn data_for_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<ChangelogRevisionData, RevlogError> {
                      let rev = self.revlog.rev_from_node(node)?;
                      self.data_for_rev(rev)
                  }
                  /// Return the `RevlogEntry` of the given revision number.
                  pub fn entry_for_rev(
                      &self,
                      rev: Revision,
                  ) -> Result<RevlogEntry, RevlogError> {
                      self.revlog.get_entry(rev)
                  }
                  /// Return the `ChangelogEntry` of the given revision number.
                  pub fn data_for_rev(
                      &self,
                      rev: Revision,
                  ) -> Result<ChangelogRevisionData, RevlogError> {
                      let bytes = self.revlog.get_rev_data(rev)?.into_owned();
                      if bytes.is_empty() {
                          Ok(ChangelogRevisionData::null())
                      } else {
                          Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                              RevlogError::Other(HgError::CorruptedRepository(format!(
                                  "Invalid changelog data for revision {}: {:?}",
                                  rev, err
                              )))
                          })?)
                      }
                  }
                  pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                      self.revlog.node_from_rev(rev)
                  }
                  pub fn rev_from_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      self.revlog.rev_from_node(node)
                  }
              }
              /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
              #[derive(PartialEq)]
              pub struct ChangelogRevisionData {
                  /// The data bytes of the `changelog` entry.
                  bytes: Vec<u8>,
                  /// The end offset for the hex manifest (not including the newline)
                  manifest_end: usize,
                  /// The end offset for the user+email (not including the newline)
                  user_end: usize,
                  /// The end offset for the timestamp+timezone+extras (not including the
                  /// newline)
                  timestamp_end: usize,
                  /// The end offset for the file list (not including the newline)
                  files_end: usize,
              }
              impl ChangelogRevisionData {
                  fn new(bytes: Vec<u8>) -> Result<Self, HgError> {
                      let mut line_iter = bytes.split(|b| b == &b'\n');
                      let manifest_end = line_iter
                          .next()
                          .expect("Empty iterator from split()?")
                          .len();
                      let user_slice = line_iter.next().ok_or_else(|| {
                          HgError::corrupted("Changeset data truncated after manifest line")
                      })?;
                      let user_end = manifest_end + 1 + user_slice.len();
                      let timestamp_slice = line_iter.next().ok_or_else(|| {
                          HgError::corrupted("Changeset data truncated after user line")
                      })?;
                      let timestamp_end = user_end + 1 + timestamp_slice.len();
                      let mut files_end = timestamp_end + 1;
                      loop {
                          let line = line_iter.next().ok_or_else(|| {
                              HgError::corrupted("Changeset data truncated in files list")
                          })?;
                          if line.is_empty() {
                              if files_end == bytes.len() {
                                  // The list of files ended with a single newline (there
                                  // should be two)
                                  return Err(HgError::corrupted(
                                      "Changeset data truncated after files list",
                                  ));
                              }
                              files_end -= 1;
                              break;
                          }
                          files_end += line.len() + 1;
                      }
                      Ok(Self {
                          bytes,
                          manifest_end,
                          user_end,
                          timestamp_end,
                          files_end,
                      })
                  }
                  fn null() -> Self {
                      Self::new(
                          b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
                      )
                      .unwrap()
                  }
                  /// Return an iterator over the lines of the entry.
                  pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                      self.bytes.split(|b| b == &b'\n')
                  }
                  /// Return the node id of the `manifest` referenced by this `changelog`
                  /// entry.
                  pub fn manifest_node(&self) -> Result<Node, HgError> {
                      let manifest_node_hex = &self.bytes[..self.manifest_end];
                      Node::from_hex_for_repo(manifest_node_hex)
                  }
                  /// The full user string (usually a name followed by an email enclosed in
                  /// angle brackets)
                  pub fn user(&self) -> &[u8] {
                      &self.bytes[self.manifest_end + 1..self.user_end]
                  }
                  /// The full timestamp line (timestamp in seconds, offset in seconds, and
                  /// possibly extras)
                  // TODO: We should expose this in a more useful way
                  pub fn timestamp_line(&self) -> &[u8] {
                      &self.bytes[self.user_end + 1..self.timestamp_end]
                  }
                  /// The files changed in this revision.
                  pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                      self.bytes[self.timestamp_end + 1..self.files_end]
                          .split(|b| b == &b'\n')
                          .map(|path| HgPath::new(path))
                  }
                  /// The change description.
                  pub fn description(&self) -> &[u8] {
                      &self.bytes[self.files_end + 2..]
                  }
              }
              impl Debug for ChangelogRevisionData {
                  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                      f.debug_struct("ChangelogRevisionData")
                          .field("bytes", &debug_bytes(&self.bytes))
                          .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                          .field(
                              "user",
                              &debug_bytes(
                                  &self.bytes[self.manifest_end + 1..self.user_end],
                              ),
                          )
                          .field(
                              "timestamp",
                              &debug_bytes(
                                  &self.bytes[self.user_end + 1..self.timestamp_end],
                              ),
                          )
                          .field(
                              "files",
                              &debug_bytes(
                                  &self.bytes[self.timestamp_end + 1..self.files_end],
                              ),
                          )
                          .field(
                              "description",
                              &debug_bytes(&self.bytes[self.files_end + 2..]),
                          )
                          .finish()
                  }
              }
              fn debug_bytes(bytes: &[u8]) -> String {
                  String::from_utf8_lossy(
                      &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                  )
                  .to_string()
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn test_create_changelogrevisiondata_invalid() {
                      // Completely empty
                      assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
                      // No newline after manifest
                      assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
                      // No newline after user
                      assert!(ChangelogRevisionData::new(b"abcd\n".to_vec()).is_err());
                      // No newline after timestamp
                      assert!(ChangelogRevisionData::new(b"abcd\n\n0 0".to_vec()).is_err());
                      // Missing newline after files
                      assert!(ChangelogRevisionData::new(
                          b"abcd\n\n0 0\nfile1\nfile2".to_vec()
                      )
                      .is_err(),);
                      // Only one newline after files
                      assert!(ChangelogRevisionData::new(
                          b"abcd\n\n0 0\nfile1\nfile2\n".to_vec()
                      )
                      .is_err(),);
                  }
                  #[test]
                  fn test_create_changelogrevisiondata() {
                      let data = ChangelogRevisionData::new(
                          b"0123456789abcdef0123456789abcdef01234567
              Some One <someone@example.com>
 0
              file1
              file2
              some
              commit
              message"
                              .to_vec(),
                      )
                      .unwrap();
                      assert_eq!(
                          data.manifest_node().unwrap(),
                          Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                              .unwrap()
                      );
                      assert_eq!(data.user(), b"Some One <someone@example.com>");
                      assert_eq!(data.timestamp_line(), b"0 0");
                      assert_eq!(
                          data.files().collect_vec(),
                          vec![HgPath::new("file1"), HgPath::new("file2")]
                      );
                      assert_eq!(data.description(), b"some\ncommit\nmessage");
                  }
              }

rust/hg-core/src/revlog/manifest.rs

0 +4 -12

              use crate::errors::HgError;
-             use crate::repo::Repo;
-             use crate::requirements;
              use crate::revlog::revlog::{Revlog, RevlogError};
              use crate::revlog::Revision;
              use crate::revlog::{Node, NodePrefix};
              use crate::utils::hg_path::HgPath;
              use crate::utils::SliceExt;
+             use crate::vfs::Vfs;
              /// A specialized `Revlog` to work with `manifest` data format.
              pub struct Manifestlog {
                  /// The generic `revlog` format.
                  revlog: Revlog,
              }
              impl Manifestlog {
                  /// Open the `manifest` of a repository given by its root.
-                 pub fn open(repo: &Repo) -> Result<Self, HgError> {
-                     let use_nodemap = repo
-                         .requirements()
-                         .contains(requirements::NODEMAP_REQUIREMENT);
-                     let revlog = Revlog::open(
-                         &repo.store_vfs(),
-                         "00manifest.i",
-                         None,
-                         use_nodemap,
-                     )?;
+                 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
+                     let revlog =
+                         Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
                      Ok(Self { revlog })
                  }
                  /// Return the `Manifest` for the given node ID.
                  ///
                  /// Note: this is a node ID in the manifestlog, typically found through
                  /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
                  /// changeset.
                  ///
                  /// See also `Repo::manifest_for_node`
                  pub fn data_for_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Manifest, RevlogError> {
                      let rev = self.revlog.rev_from_node(node)?;
                      self.data_for_rev(rev)
                  }
                  /// Return the `Manifest` of a given revision number.
                  ///
                  /// Note: this is a revision number in the manifestlog, *not* of any
                  /// changeset.
                  ///
                  /// See also `Repo::manifest_for_rev`
                  pub fn data_for_rev(
                      &self,
                      rev: Revision,
                  ) -> Result<Manifest, RevlogError> {
                      let bytes = self.revlog.get_rev_data(rev)?.into_owned();
                      Ok(Manifest { bytes })
                  }
              }
              /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
              #[derive(Debug)]
              pub struct Manifest {
                  /// Format for a manifest: flat sequence of variable-size entries,
                  /// sorted by path, each as:
                  ///
                  /// ```text
                  /// <path> \0 <hex_node_id> <flags> \n
                  /// ```
                  ///
                  /// The last entry is also terminated by a newline character.
                  /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
                  bytes: Vec<u8>,
              }
              impl Manifest {
                  pub fn iter(
                      &self,
                  ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
                      self.bytes
                          .split(|b| b == &b'\n')
                          .filter(|line| !line.is_empty())
                          .map(ManifestEntry::from_raw)
                  }
                  /// If the given path is in this manifest, return its filelog node ID
                  pub fn find_by_path(
                      &self,
                      path: &HgPath,
                  ) -> Result<Option<ManifestEntry>, HgError> {
                      use std::cmp::Ordering::*;
                      let path = path.as_bytes();
                      // Both boundaries of this `&[u8]` slice are always at the boundary of
                      // an entry
                      let mut bytes = &*self.bytes;
                      // Binary search algorithm derived from `[T]::binary_search_by`
                      // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
                      // except we don’t have a slice of entries. Instead we jump to the
                      // middle of the byte slice and look around for entry delimiters
                      // (newlines).
                      while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
                          let (entry_path, rest) =
                              ManifestEntry::split_path(&bytes[entry_range.clone()])?;
                          let cmp = entry_path.cmp(path);
                          if cmp == Less {
                              let after_newline = entry_range.end + 1;
                              bytes = &bytes[after_newline..];
                          } else if cmp == Greater {
                              bytes = &bytes[..entry_range.start];
                          } else {
                              return Ok(Some(ManifestEntry::from_path_and_rest(
                                  entry_path, rest,
                              )));
                          }
                      }
                      Ok(None)
                  }
                  /// If there is at least one, return the byte range of an entry *excluding*
                  /// the final newline.
                  fn find_entry_near_middle_of(
                      bytes: &[u8],
                  ) -> Result<Option<std::ops::Range<usize>>, HgError> {
                      let len = bytes.len();
                      if len > 0 {
                          let middle = bytes.len() / 2;
                          // Integer division rounds down, so `middle < len`.
                          let (before, after) = bytes.split_at(middle);
                          let is_newline = |&byte: &u8| byte == b'\n';
                          let entry_start = match before.iter().rposition(is_newline) {
                              Some(i) => i + 1,
                              None => 0, // We choose the first entry in `bytes`
                          };
                          let entry_end = match after.iter().position(is_newline) {
                              Some(i) => {
                                  // No `+ 1` here to exclude this newline from the range
                                  middle + i
                              }
                              None => {
                                  // In a well-formed manifest:
                                  //
                                  // * Since `len > 0`, `bytes` contains at least one entry
                                  // * Every entry ends with a newline
                                  // * Since `middle < len`, `after` contains at least the
                                  //   newline at the end of the last entry of `bytes`.
                                  //
                                  // We didn’t find a newline, so this manifest is not
                                  // well-formed.
                                  return Err(HgError::corrupted(
                                      "manifest entry without \\n delimiter",
                                  ));
                              }
                          };
                          Ok(Some(entry_start..entry_end))
                      } else {
                          // len == 0
                          Ok(None)
                      }
                  }
              }
              /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
              #[derive(Debug)]
              pub struct ManifestEntry<'manifest> {
                  pub path: &'manifest HgPath,
                  pub hex_node_id: &'manifest [u8],
                  /// `Some` values are b'x', b'l', or 't'
                  pub flags: Option<u8>,
              }
              impl<'a> ManifestEntry<'a> {
                  fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
                      bytes.split_2(b'\0').ok_or_else(|| {
                          HgError::corrupted("manifest entry without \\0 delimiter")
                      })
                  }
                  fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
                      let (hex_node_id, flags) = match rest.split_last() {
                          Some((&b'x', rest)) => (rest, Some(b'x')),
                          Some((&b'l', rest)) => (rest, Some(b'l')),
                          Some((&b't', rest)) => (rest, Some(b't')),
                          _ => (rest, None),
                      };
                      Self {
                          path: HgPath::new(path),
                          hex_node_id,
                          flags,
                      }
                  }
                  fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
                      let (path, rest) = Self::split_path(bytes)?;
                      Ok(Self::from_path_and_rest(path, rest))
                  }
                  pub fn node_id(&self) -> Result<Node, HgError> {
                      Node::from_hex_for_repo(self.hex_node_id)
                  }
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages