upstream/mercurial-mirror Commit - r49378:e91aa800

rhg: desambiguate status without decompressing filelog if possible...

Simon Sapin -

r49378:e91aa800 default

parent child

mercurial/revlogutils/flagutil.py

0 +1 0

              # flagutils.py - code to deal with revlog flags and their processors
              #
              # Copyright 2016 Remi Chaintron <remi@fb.com>
              # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              from ..i18n import _
              from .constants import (
                  REVIDX_DEFAULT_FLAGS,
                  REVIDX_ELLIPSIS,
                  REVIDX_EXTSTORED,
                  REVIDX_FLAGS_ORDER,
                  REVIDX_HASCOPIESINFO,
                  REVIDX_ISCENSORED,
                  REVIDX_RAWTEXT_CHANGING_FLAGS,
              )
              from .. import error, util
              # blanked usage of all the name to prevent pyflakes constraints
              # We need these name available in the module for extensions.
              REVIDX_ISCENSORED
              REVIDX_ELLIPSIS
              REVIDX_EXTSTORED
              REVIDX_HASCOPIESINFO,
              REVIDX_DEFAULT_FLAGS
              REVIDX_FLAGS_ORDER
              REVIDX_RAWTEXT_CHANGING_FLAGS
+             # Keep this in sync with REVIDX_KNOWN_FLAGS in rust/hg-core/src/revlog/revlog.rs
              REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
              # Store flag processors (cf. 'addflagprocessor()' to register)
              flagprocessors = {
                  REVIDX_ISCENSORED: None,
                  REVIDX_HASCOPIESINFO: None,
              }
              def addflagprocessor(flag, processor):
                  """Register a flag processor on a revision data flag.
                  Invariant:
                  - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                    and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                  - Only one flag processor can be registered on a specific flag.
                  - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                    following signatures:
                        - (read)  f(self, rawtext) -> text, bool
                        - (write) f(self, text) -> rawtext, bool
                        - (raw)   f(self, rawtext) -> bool
                    "text" is presented to the user. "rawtext" is stored in revlog data, not
                    directly visible to the user.
                    The boolean returned by these transforms is used to determine whether
                    the returned text can be used for hash integrity checking. For example,
                    if "write" returns False, then "text" is used to generate hash. If
                    "write" returns True, that basically means "rawtext" returned by "write"
                    should be used to generate hash. Usually, "write" and "read" return
                    different booleans. And "raw" returns a same boolean as "write".
                    Note: The 'raw' transform is used for changegroup generation and in some
                    debug commands. In this case the transform only indicates whether the
                    contents can be used for hash integrity checks.
                  """
                  insertflagprocessor(flag, processor, flagprocessors)
              def insertflagprocessor(flag, processor, flagprocessors):
                  if not flag & REVIDX_KNOWN_FLAGS:
                      msg = _(b"cannot register processor on unknown flag '%#x'.") % flag
                      raise error.ProgrammingError(msg)
                  if flag not in REVIDX_FLAGS_ORDER:
                      msg = _(b"flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % flag
                      raise error.ProgrammingError(msg)
                  if flag in flagprocessors:
                      msg = _(b"cannot register multiple processors on flag '%#x'.") % flag
                      raise error.Abort(msg)
                  flagprocessors[flag] = processor
              def processflagswrite(revlog, text, flags):
                  """Inspect revision data flags and applies write transformations defined
                  by registered flag processors.
                  ``text`` - the revision data to process
                  ``flags`` - the revision flags
                  This method processes the flags in the order (or reverse order if
                  ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                  flag processors registered for present flags. The order of flags defined
                  in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                  Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                  processed text and ``validatehash`` is a bool indicating whether the
                  returned text should be checked for hash integrity.
                  """
                  return _processflagsfunc(
                      revlog,
                      text,
                      flags,
                      b'write',
                  )[:2]
              def processflagsread(revlog, text, flags):
                  """Inspect revision data flags and applies read transformations defined
                  by registered flag processors.
                  ``text`` - the revision data to process
                  ``flags`` - the revision flags
                  ``raw`` - an optional argument describing if the raw transform should be
                  applied.
                  This method processes the flags in the order (or reverse order if
                  ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                  flag processors registered for present flags. The order of flags defined
                  in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                  Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                  processed text and ``validatehash`` is a bool indicating whether the
                  returned text should be checked for hash integrity.
                  """
                  return _processflagsfunc(revlog, text, flags, b'read')
              def processflagsraw(revlog, text, flags):
                  """Inspect revision data flags to check is the content hash should be
                  validated.
                  ``text`` - the revision data to process
                  ``flags`` - the revision flags
                  This method processes the flags in the order (or reverse order if
                  ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                  flag processors registered for present flags. The order of flags defined
                  in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                  Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                  processed text and ``validatehash`` is a bool indicating whether the
                  returned text should be checked for hash integrity.
                  """
                  return _processflagsfunc(revlog, text, flags, b'raw')[1]
              def _processflagsfunc(revlog, text, flags, operation):
                  """internal function to process flag on a revlog
                  This function is private to this module, code should never needs to call it
                  directly."""
                  # fast path: no flag processors will run
                  if flags == 0:
                      return text, True
                  if operation not in (b'read', b'write', b'raw'):
                      raise error.ProgrammingError(_(b"invalid '%s' operation") % operation)
                  # Check all flags are known.
                  if flags & ~REVIDX_KNOWN_FLAGS:
                      raise revlog._flagserrorclass(
                          _(b"incompatible revision flag '%#x'")
                          % (flags & ~REVIDX_KNOWN_FLAGS)
                      )
                  validatehash = True
                  # Depending on the operation (read or write), the order might be
                  # reversed due to non-commutative transforms.
                  orderedflags = REVIDX_FLAGS_ORDER
                  if operation == b'write':
                      orderedflags = reversed(orderedflags)
                  for flag in orderedflags:
                      # If a flagprocessor has been registered for a known flag, apply the
                      # related operation transform and update result tuple.
                      if flag & flags:
                          vhash = True
                          if flag not in revlog._flagprocessors:
                              message = _(b"missing processor for flag '%#x'") % flag
                              raise revlog._flagserrorclass(message)
                          processor = revlog._flagprocessors[flag]
                          if processor is not None:
                              readtransform, writetransform, rawtransform = processor
                              if operation == b'raw':
                                  vhash = rawtransform(revlog, text)
                              elif operation == b'read':
                                  text, vhash = readtransform(revlog, text)
                              else:  # write operation
                                  text, vhash = writetransform(revlog, text)
                          validatehash = validatehash and vhash
                  return text, validatehash

rust/hg-core/src/revlog/filelog.rs

0 +83 0

              use crate::errors::HgError;
              use crate::repo::Repo;
              use crate::revlog::path_encode::path_encode;
              use crate::revlog::revlog::RevlogEntry;
              use crate::revlog::revlog::{Revlog, RevlogError};
              use crate::revlog::NodePrefix;
              use crate::revlog::Revision;
              use crate::utils::files::get_path_from_bytes;
              use crate::utils::hg_path::HgPath;
              use crate::utils::SliceExt;
              use std::path::PathBuf;
              /// A specialized `Revlog` to work with file data logs.
              pub struct Filelog {
                  /// The generic `revlog` format.
                  revlog: Revlog,
              }
              impl Filelog {
                  pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
                      let index_path = store_path(file_path, b".i");
                      let data_path = store_path(file_path, b".d");
                      let revlog = Revlog::open(repo, index_path, Some(&data_path))?;
                      Ok(Self { revlog })
                  }
                  /// The given node ID is that of the file as found in a filelog, not of a
                  /// changeset.
                  pub fn data_for_node(
                      &self,
                      file_node: impl Into<NodePrefix>,
                  ) -> Result<FilelogRevisionData, RevlogError> {
                      let file_rev = self.revlog.rev_from_node(file_node.into())?;
                      self.data_for_rev(file_rev)
                  }
                  /// The given revision is that of the file as found in a filelog, not of a
                  /// changeset.
                  pub fn data_for_rev(
                      &self,
                      file_rev: Revision,
                  ) -> Result<FilelogRevisionData, RevlogError> {
                      let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
                      Ok(FilelogRevisionData(data.into()))
                  }
                  /// The given node ID is that of the file as found in a filelog, not of a
                  /// changeset.
                  pub fn entry_for_node(
                      &self,
                      file_node: impl Into<NodePrefix>,
                  ) -> Result<FilelogEntry, RevlogError> {
                      let file_rev = self.revlog.rev_from_node(file_node.into())?;
                      self.entry_for_rev(file_rev)
                  }
                  /// The given revision is that of the file as found in a filelog, not of a
                  /// changeset.
                  pub fn entry_for_rev(
                      &self,
                      file_rev: Revision,
                  ) -> Result<FilelogEntry, RevlogError> {
                      Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
                  }
              }
              fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
                  let encoded_bytes =
                      path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
                  get_path_from_bytes(&encoded_bytes).into()
              }
              pub struct FilelogEntry<'a>(RevlogEntry<'a>);
              impl FilelogEntry<'_> {
+                 /// `self.data()` can be expensive, with decompression and delta
+                 /// resolution.
+                 ///
+                 /// *Without* paying this cost, based on revlog index information
+                 /// including `RevlogEntry::uncompressed_len`:
+                 ///
+                 /// * Returns `true` if the length that `self.data().file_data().len()`
+                 ///   would return is definitely **not equal** to `other_len`.
+                 /// * Returns `false` if available information is inconclusive.
+                 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
+                     // Relevant code that implement this behavior in Python code:
+                     // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
+                     // revlog.size, revlog.rawsize
+                     // Let’s call `file_data_len` what would be returned by
+                     // `self.data().file_data().len()`.
+                     if self.0.is_cencored() {
+                         let file_data_len = 0;
+                         return other_len != file_data_len;
+                     }
+                     if self.0.has_length_affecting_flag_processor() {
+                         // We can’t conclude anything about `file_data_len`.
+                         return false;
+                     }
+                     // Revlog revisions (usually) have metadata for the size of
+                     // their data after decompression and delta resolution
+                     // as would be returned by `Revlog::get_rev_data`.
+                     //
+                     // For filelogs this is the file’s contents preceded by an optional
+                     // metadata block.
+                     let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
+                         l as u64
+                     } else {
+                         // The field was set to -1, the actual uncompressed len is unknown.
+                         // We need to decompress to say more.
+                         return false;
+                     };
+                     // `uncompressed_len = file_data_len + optional_metadata_len`,
+                     // so `file_data_len <= uncompressed_len`.
+                     if uncompressed_len < other_len {
+                         // Transitively, `file_data_len < other_len`.
+                         // So `other_len != file_data_len` definitely.
+                         return true;
+                     }
+                     if uncompressed_len == other_len + 4 {
+                         // It’s possible that `file_data_len == other_len` with an empty
+                         // metadata block (2 start marker bytes + 2 end marker bytes).
+                         // This happens when there wouldn’t otherwise be metadata, but
+                         // the first 2 bytes of file data happen to match a start marker
+                         // and would be ambiguous.
+                         return false;
+                     }
+                     if !self.0.has_p1() {
+                         // There may or may not be copy metadata, so we can’t deduce more
+                         // about `file_data_len` without computing file data.
+                         return false;
+                     }
+                     // Filelog ancestry is not meaningful in the way changelog ancestry is.
+                     // It only provides hints to delta generation.
+                     // p1 and p2 are set to null when making a copy or rename since
+                     // contents are likely unrelatedto what might have previously existed
+                     // at the destination path.
+                     //
+                     // Conversely, since here p1 is non-null, there is no copy metadata.
+                     // Note that this reasoning may be invalidated in the presence of
+                     // merges made by some previous versions of Mercurial that
+                     // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
+                     // and `tests/test-issue6528.t`.
+                     //
+                     // Since copy metadata is currently the only kind of metadata
+                     // kept in revlog data of filelogs,
+                     // this `FilelogEntry` does not have such metadata:
+                     let file_data_len = uncompressed_len;
+                     return file_data_len != other_len;
+                 }
                  pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
                      Ok(FilelogRevisionData(self.0.data()?.into_owned()))
                  }
              }
              /// The data for one revision in a filelog, uncompressed and delta-resolved.
              pub struct FilelogRevisionData(Vec<u8>);
              impl FilelogRevisionData {
                  /// Split into metadata and data
                  pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
                      const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
                      if let Some(rest) = self.0.drop_prefix(DELIMITER) {
                          if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
                              Ok((Some(metadata), data))
                          } else {
                              Err(HgError::corrupted(
                                  "Missing metadata end delimiter in filelog entry",
                              ))
                          }
                      } else {
                          Ok((None, &self.0))
                      }
                  }
                  /// Returns the file contents at this revision, stripped of any metadata
                  pub fn file_data(&self) -> Result<&[u8], HgError> {
                      let (_metadata, data) = self.split()?;
                      Ok(data)
                  }
                  /// Consume the entry, and convert it into data, discarding any metadata,
                  /// if present.
                  pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
                      if let (Some(_metadata), data) = self.split()? {
                          Ok(data.to_owned())
                      } else {
                          Ok(self.0)
                      }
                  }
              }

rust/hg-core/src/revlog/index.rs

0 +4 0

              use std::convert::TryInto;
              use std::ops::Deref;
              use byteorder::{BigEndian, ByteOrder};
              use crate::errors::HgError;
              use crate::revlog::node::Node;
              use crate::revlog::{Revision, NULL_REVISION};
              pub const INDEX_ENTRY_SIZE: usize = 64;
              pub struct IndexHeader {
                  header_bytes: [u8; 4],
              }
              #[derive(Copy, Clone)]
              pub struct IndexHeaderFlags {
                  flags: u16,
              }
              /// Corresponds to the high bits of `_format_flags` in python
              impl IndexHeaderFlags {
                  /// Corresponds to FLAG_INLINE_DATA in python
                  pub fn is_inline(self) -> bool {
                      return self.flags & 1 != 0;
                  }
                  /// Corresponds to FLAG_GENERALDELTA in python
                  pub fn uses_generaldelta(self) -> bool {
                      return self.flags & 2 != 0;
                  }
              }
              /// Corresponds to the INDEX_HEADER structure,
              /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
              impl IndexHeader {
                  fn format_flags(&self) -> IndexHeaderFlags {
                      // No "unknown flags" check here, unlike in python. Maybe there should
                      // be.
                      return IndexHeaderFlags {
                          flags: BigEndian::read_u16(&self.header_bytes[0..2]),
                      };
                  }
                  /// The only revlog version currently supported by rhg.
                  const REVLOGV1: u16 = 1;
                  /// Corresponds to `_format_version` in Python.
                  fn format_version(&self) -> u16 {
                      return BigEndian::read_u16(&self.header_bytes[2..4]);
                  }
                  const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
                      // We treat an empty file as a valid index with no entries.
                      // Here we make an arbitrary choice of what we assume the format of the
                      // index to be (V1, using generaldelta).
                      // This doesn't matter too much, since we're only doing read-only
                      // access. but the value corresponds to the `new_header` variable in
                      // `revlog.py`, `_loadindex`
                      header_bytes: [0, 3, 0, 1],
                  };
                  fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
                      if index_bytes.len() == 0 {
                          return Ok(IndexHeader::EMPTY_INDEX_HEADER);
                      }
                      if index_bytes.len() < 4 {
                          return Err(HgError::corrupted(
                              "corrupted revlog: can't read the index format header",
                          ));
                      }
                      return Ok(IndexHeader {
                          header_bytes: {
                              let bytes: [u8; 4] =
                                  index_bytes[0..4].try_into().expect("impossible");
                              bytes
                          },
                      });
                  }
              }
              /// A Revlog index
              pub struct Index {
                  bytes: Box<dyn Deref<Target = [u8]> + Send>,
                  /// Offsets of starts of index blocks.
                  /// Only needed when the index is interleaved with data.
                  offsets: Option<Vec<usize>>,
                  uses_generaldelta: bool,
              }
              impl Index {
                  /// Create an index from bytes.
                  /// Calculate the start of each entry when is_inline is true.
                  pub fn new(
                      bytes: Box<dyn Deref<Target = [u8]> + Send>,
                  ) -> Result<Self, HgError> {
                      let header = IndexHeader::parse(bytes.as_ref())?;
                      if header.format_version() != IndexHeader::REVLOGV1 {
                          // A proper new version should have had a repo/store
                          // requirement.
                          return Err(HgError::corrupted("unsupported revlog version"));
                      }
                      // This is only correct because we know version is REVLOGV1.
                      // In v2 we always use generaldelta, while in v0 we never use
                      // generaldelta. Similar for [is_inline] (it's only used in v1).
                      let uses_generaldelta = header.format_flags().uses_generaldelta();
                      if header.format_flags().is_inline() {
                          let mut offset: usize = 0;
                          let mut offsets = Vec::new();
                          while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                              offsets.push(offset);
                              let end = offset + INDEX_ENTRY_SIZE;
                              let entry = IndexEntry {
                                  bytes: &bytes[offset..end],
                                  offset_override: None,
                              };
                              offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
                          }
                          if offset == bytes.len() {
                              Ok(Self {
                                  bytes,
                                  offsets: Some(offsets),
                                  uses_generaldelta,
                              })
                          } else {
                              Err(HgError::corrupted("unexpected inline revlog length")
                                  .into())
                          }
                      } else {
                          Ok(Self {
                              bytes,
                              offsets: None,
                              uses_generaldelta,
                          })
                      }
                  }
                  pub fn uses_generaldelta(&self) -> bool {
                      self.uses_generaldelta
                  }
                  /// Value of the inline flag.
                  pub fn is_inline(&self) -> bool {
                      self.offsets.is_some()
                  }
                  /// Return a slice of bytes if `revlog` is inline. Panic if not.
                  pub fn data(&self, start: usize, end: usize) -> &[u8] {
                      if !self.is_inline() {
                          panic!("tried to access data in the index of a revlog that is not inline");
                      }
                      &self.bytes[start..end]
                  }
                  /// Return number of entries of the revlog index.
                  pub fn len(&self) -> usize {
                      if let Some(offsets) = &self.offsets {
                          offsets.len()
                      } else {
                          self.bytes.len() / INDEX_ENTRY_SIZE
                      }
                  }
                  /// Returns `true` if the `Index` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.len() == 0
                  }
                  /// Return the index entry corresponding to the given revision if it
                  /// exists.
                  pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
                      if rev == NULL_REVISION {
                          return None;
                      }
                      if let Some(offsets) = &self.offsets {
                          self.get_entry_inline(rev, offsets)
                      } else {
                          self.get_entry_separated(rev)
                      }
                  }
                  fn get_entry_inline(
                      &self,
                      rev: Revision,
                      offsets: &[usize],
                  ) -> Option<IndexEntry> {
                      let start = *offsets.get(rev as usize)?;
                      let end = start.checked_add(INDEX_ENTRY_SIZE)?;
                      let bytes = &self.bytes[start..end];
                      // See IndexEntry for an explanation of this override.
                      let offset_override = Some(end);
                      Some(IndexEntry {
                          bytes,
                          offset_override,
                      })
                  }
                  fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
                      let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
                      if rev as usize >= max_rev {
                          return None;
                      }
                      let start = rev as usize * INDEX_ENTRY_SIZE;
                      let end = start + INDEX_ENTRY_SIZE;
                      let bytes = &self.bytes[start..end];
                      // Override the offset of the first revision as its bytes are used
                      // for the index's metadata (saving space because it is always 0)
                      let offset_override = if rev == 0 { Some(0) } else { None };
                      Some(IndexEntry {
                          bytes,
                          offset_override,
                      })
                  }
              }
              impl super::RevlogIndex for Index {
                  fn len(&self) -> usize {
                      self.len()
                  }
                  fn node(&self, rev: Revision) -> Option<&Node> {
                      self.get_entry(rev).map(|entry| entry.hash())
                  }
              }
              #[derive(Debug)]
              pub struct IndexEntry<'a> {
                  bytes: &'a [u8],
                  /// Allows to override the offset value of the entry.
                  ///
                  /// For interleaved index and data, the offset stored in the index
                  /// corresponds to the separated data offset.
                  /// It has to be overridden with the actual offset in the interleaved
                  /// index which is just after the index block.
                  ///
                  /// For separated index and data, the offset stored in the first index
                  /// entry is mixed with the index headers.
                  /// It has to be overridden with 0.
                  offset_override: Option<usize>,
              }
              impl<'a> IndexEntry<'a> {
                  /// Return the offset of the data.
                  pub fn offset(&self) -> usize {
                      if let Some(offset_override) = self.offset_override {
                          offset_override
                      } else {
                          let mut bytes = [0; 8];
                          bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
                          BigEndian::read_u64(&bytes[..]) as usize
                      }
                  }
+                 pub fn flags(&self) -> u16 {
+                     BigEndian::read_u16(&self.bytes[6..=7])
+                 }
                  /// Return the compressed length of the data.
                  pub fn compressed_len(&self) -> u32 {
                      BigEndian::read_u32(&self.bytes[8..=11])
                  }
                  /// Return the uncompressed length of the data.
                  pub fn uncompressed_len(&self) -> i32 {
                      BigEndian::read_i32(&self.bytes[12..=15])
                  }
                  /// Return the revision upon which the data has been derived.
                  pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
                      // TODO Maybe return an Option when base_revision == rev?
                      //      Requires to add rev to IndexEntry
                      BigEndian::read_i32(&self.bytes[16..])
                  }
                  pub fn p1(&self) -> Revision {
                      BigEndian::read_i32(&self.bytes[24..])
                  }
                  pub fn p2(&self) -> Revision {
                      BigEndian::read_i32(&self.bytes[28..])
                  }
                  /// Return the hash of revision's full text.
                  ///
                  /// Currently, SHA-1 is used and only the first 20 bytes of this field
                  /// are used.
                  pub fn hash(&self) -> &'a Node {
                      (&self.bytes[32..52]).try_into().unwrap()
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  #[cfg(test)]
                  #[derive(Debug, Copy, Clone)]
                  pub struct IndexEntryBuilder {
                      is_first: bool,
                      is_inline: bool,
                      is_general_delta: bool,
                      version: u16,
                      offset: usize,
                      compressed_len: usize,
                      uncompressed_len: usize,
                      base_revision_or_base_of_delta_chain: Revision,
                  }
                  #[cfg(test)]
                  impl IndexEntryBuilder {
                      pub fn new() -> Self {
                          Self {
                              is_first: false,
                              is_inline: false,
                              is_general_delta: true,
                              version: 2,
                              offset: 0,
                              compressed_len: 0,
                              uncompressed_len: 0,
                              base_revision_or_base_of_delta_chain: 0,
                          }
                      }
                      pub fn is_first(&mut self, value: bool) -> &mut Self {
                          self.is_first = value;
                          self
                      }
                      pub fn with_inline(&mut self, value: bool) -> &mut Self {
                          self.is_inline = value;
                          self
                      }
                      pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
                          self.is_general_delta = value;
                          self
                      }
                      pub fn with_version(&mut self, value: u16) -> &mut Self {
                          self.version = value;
                          self
                      }
                      pub fn with_offset(&mut self, value: usize) -> &mut Self {
                          self.offset = value;
                          self
                      }
                      pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
                          self.compressed_len = value;
                          self
                      }
                      pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
                          self.uncompressed_len = value;
                          self
                      }
                      pub fn with_base_revision_or_base_of_delta_chain(
                          &mut self,
                          value: Revision,
                      ) -> &mut Self {
                          self.base_revision_or_base_of_delta_chain = value;
                          self
                      }
                      pub fn build(&self) -> Vec<u8> {
                          let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
                          if self.is_first {
                              bytes.extend(&match (self.is_general_delta, self.is_inline) {
                                  (false, false) => [0u8, 0],
                                  (false, true) => [0u8, 1],
                                  (true, false) => [0u8, 2],
                                  (true, true) => [0u8, 3],
                              });
                              bytes.extend(&self.version.to_be_bytes());
                              // Remaining offset bytes.
                              bytes.extend(&[0u8; 2]);
                          } else {
                              // Offset stored on 48 bits (6 bytes)
                              bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
                          }
                          bytes.extend(&[0u8; 2]); // Revision flags.
                          bytes.extend(&(self.compressed_len as u32).to_be_bytes());
                          bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
                          bytes.extend(
                              &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
                          );
                          bytes
                      }
                  }
                  pub fn is_inline(index_bytes: &[u8]) -> bool {
                      IndexHeader::parse(index_bytes)
                          .expect("too short")
                          .format_flags()
                          .is_inline()
                  }
                  pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
                      IndexHeader::parse(index_bytes)
                          .expect("too short")
                          .format_flags()
                          .uses_generaldelta()
                  }
                  pub fn get_version(index_bytes: &[u8]) -> u16 {
                      IndexHeader::parse(index_bytes)
                          .expect("too short")
                          .format_version()
                  }
                  #[test]
                  fn flags_when_no_inline_flag_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(false)
                          .with_inline(false)
                          .build();
                      assert_eq!(is_inline(&bytes), false);
                      assert_eq!(uses_generaldelta(&bytes), false);
                  }
                  #[test]
                  fn flags_when_inline_flag_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(false)
                          .with_inline(true)
                          .build();
                      assert_eq!(is_inline(&bytes), true);
                      assert_eq!(uses_generaldelta(&bytes), false);
                  }
                  #[test]
                  fn flags_when_inline_and_generaldelta_flags_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(true)
                          .with_inline(true)
                          .build();
                      assert_eq!(is_inline(&bytes), true);
                      assert_eq!(uses_generaldelta(&bytes), true);
                  }
                  #[test]
                  fn test_offset() {
                      let bytes = IndexEntryBuilder::new().with_offset(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.offset(), 1)
                  }
                  #[test]
                  fn test_with_overridden_offset() {
                      let bytes = IndexEntryBuilder::new().with_offset(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: Some(2),
                      };
                      assert_eq!(entry.offset(), 2)
                  }
                  #[test]
                  fn test_compressed_len() {
                      let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.compressed_len(), 1)
                  }
                  #[test]
                  fn test_uncompressed_len() {
                      let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.uncompressed_len(), 1)
                  }
                  #[test]
                  fn test_base_revision_or_base_of_delta_chain() {
                      let bytes = IndexEntryBuilder::new()
                          .with_base_revision_or_base_of_delta_chain(1)
                          .build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
                  }
                  #[test]
                  fn version_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_version(1)
                          .build();
                      assert_eq!(get_version(&bytes), 1)
                  }
              }
              #[cfg(test)]
              pub use tests::IndexEntryBuilder;

rust/hg-core/src/revlog/revlog.rs

0 +28 0

              use std::borrow::Cow;
              use std::convert::TryFrom;
              use std::io::Read;
              use std::ops::Deref;
              use std::path::Path;
              use flate2::read::ZlibDecoder;
              use micro_timer::timed;
              use sha1::{Digest, Sha1};
              use zstd;
              use super::index::Index;
              use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
              use super::nodemap;
              use super::nodemap::{NodeMap, NodeMapError};
              use super::nodemap_docket::NodeMapDocket;
              use super::patch;
              use crate::errors::HgError;
              use crate::repo::Repo;
              use crate::revlog::Revision;
              use crate::{Node, NULL_REVISION};
+             const REVISION_FLAG_CENSORED: u16 = 1 << 15;
+             const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
+             const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
+             const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
+             // Keep this in sync with REVIDX_KNOWN_FLAGS in
+             // mercurial/revlogutils/flagutil.py
+             const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
+                 | REVISION_FLAG_ELLIPSIS
+                 | REVISION_FLAG_EXTSTORED
+                 | REVISION_FLAG_HASCOPIESINFO;
              #[derive(derive_more::From)]
              pub enum RevlogError {
                  InvalidRevision,
                  /// Working directory is not supported
                  WDirUnsupported,
                  /// Found more than one entry whose ID match the requested prefix
                  AmbiguousPrefix,
                  #[from]
                  Other(HgError),
              }
              impl From<NodeMapError> for RevlogError {
                  fn from(error: NodeMapError) -> Self {
                      match error {
                          NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                          NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
                      }
                  }
              }
              fn corrupted() -> HgError {
                  HgError::corrupted("corrupted revlog")
              }
              impl RevlogError {
                  fn corrupted() -> Self {
                      RevlogError::Other(corrupted())
                  }
              }
              /// Read only implementation of revlog.
              pub struct Revlog {
                  /// When index and data are not interleaved: bytes of the revlog index.
                  /// When index and data are interleaved: bytes of the revlog index and
                  /// data.
                  index: Index,
                  /// When index and data are not interleaved: bytes of the revlog data
                  data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                  /// When present on disk: the persistent nodemap for this revlog
                  nodemap: Option<nodemap::NodeTree>,
              }
              impl Revlog {
                  /// Open a revlog index file.
                  ///
                  /// It will also open the associated data file if index and data are not
                  /// interleaved.
                  #[timed]
                  pub fn open(
                      repo: &Repo,
                      index_path: impl AsRef<Path>,
                      data_path: Option<&Path>,
                  ) -> Result<Self, HgError> {
                      let index_path = index_path.as_ref();
                      let index = {
                          match repo.store_vfs().mmap_open_opt(&index_path)? {
                              None => Index::new(Box::new(vec![])),
                              Some(index_mmap) => {
                                  let index = Index::new(Box::new(index_mmap))?;
                                  Ok(index)
                              }
                          }
                      }?;
                      let default_data_path = index_path.with_extension("d");
                      // type annotation required
                      // won't recognize Mmap as Deref<Target = [u8]>
                      let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                          if index.is_inline() {
                              None
                          } else {
                              let data_path = data_path.unwrap_or(&default_data_path);
                              let data_mmap = repo.store_vfs().mmap_open(data_path)?;
                              Some(Box::new(data_mmap))
                          };
                      let nodemap = if index.is_inline() {
                          None
                      } else {
                          NodeMapDocket::read_from_file(repo, index_path)?.map(
                              |(docket, data)| {
                                  nodemap::NodeTree::load_bytes(
                                      Box::new(data),
                                      docket.data_length,
                                  )
                              },
                          )
                      };
                      Ok(Revlog {
                          index,
                          data_bytes,
                          nodemap,
                      })
                  }
                  /// Return number of entries of the `Revlog`.
                  pub fn len(&self) -> usize {
                      self.index.len()
                  }
                  /// Returns `true` if the `Revlog` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.index.is_empty()
                  }
                  /// Returns the node ID for the given revision number, if it exists in this
                  /// revlog
                  pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                      if rev == NULL_REVISION {
                          return Some(&NULL_NODE);
                      }
                      Some(self.index.get_entry(rev)?.hash())
                  }
                  /// Return the revision number for the given node ID, if it exists in this
                  /// revlog
                  #[timed]
                  pub fn rev_from_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      if node.is_prefix_of(&NULL_NODE) {
                          return Ok(NULL_REVISION);
                      }
                      if let Some(nodemap) = &self.nodemap {
                          return nodemap
                              .find_bin(&self.index, node)?
                              .ok_or(RevlogError::InvalidRevision);
                      }
                      // Fallback to linear scan when a persistent nodemap is not present.
                      // This happens when the persistent-nodemap experimental feature is not
                      // enabled, or for small revlogs.
                      //
                      // TODO: consider building a non-persistent nodemap in memory to
                      // optimize these cases.
                      let mut found_by_prefix = None;
                      for rev in (0..self.len() as Revision).rev() {
                          let index_entry =
                              self.index.get_entry(rev).ok_or(HgError::corrupted(
                                  "revlog references a revision not in the index",
                              ))?;
                          if node == *index_entry.hash() {
                              return Ok(rev);
                          }
                          if node.is_prefix_of(index_entry.hash()) {
                              if found_by_prefix.is_some() {
                                  return Err(RevlogError::AmbiguousPrefix);
                              }
                              found_by_prefix = Some(rev)
                          }
                      }
                      found_by_prefix.ok_or(RevlogError::InvalidRevision)
                  }
                  /// Returns whether the given revision exists in this revlog.
                  pub fn has_rev(&self, rev: Revision) -> bool {
                      self.index.get_entry(rev).is_some()
                  }
                  /// Return the full data associated to a revision.
                  ///
                  /// All entries required to build the final data out of deltas will be
                  /// retrieved as needed, and the deltas will be applied to the inital
                  /// snapshot to rebuild the final data.
                  #[timed]
                  pub fn get_rev_data(
                      &self,
                      rev: Revision,
                  ) -> Result<Cow<[u8]>, RevlogError> {
                      if rev == NULL_REVISION {
                          return Ok(Cow::Borrowed(&[]));
                      };
                      Ok(self.get_entry(rev)?.data()?)
                  }
                  /// Check the hash of some given data against the recorded hash.
                  pub fn check_hash(
                      &self,
                      p1: Revision,
                      p2: Revision,
                      expected: &[u8],
                      data: &[u8],
                  ) -> bool {
                      let e1 = self.index.get_entry(p1);
                      let h1 = match e1 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      let e2 = self.index.get_entry(p2);
                      let h2 = match e2 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                  }
                  /// Build the full data of a revision out its snapshot
                  /// and its deltas.
                  #[timed]
                  fn build_data_from_deltas(
                      snapshot: RevlogEntry,
                      deltas: &[RevlogEntry],
                  ) -> Result<Vec<u8>, HgError> {
                      let snapshot = snapshot.data_chunk()?;
                      let deltas = deltas
                          .iter()
                          .rev()
                          .map(RevlogEntry::data_chunk)
                          .collect::<Result<Vec<_>, _>>()?;
                      let patches: Vec<_> =
                          deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                      let patch = patch::fold_patch_lists(&patches);
                      Ok(patch.apply(&snapshot))
                  }
                  /// Return the revlog data.
                  fn data(&self) -> &[u8] {
                      match self.data_bytes {
                          Some(ref data_bytes) => &data_bytes,
                          None => panic!(
                              "forgot to load the data or trying to access inline data"
                          ),
                      }
                  }
                  /// Get an entry of the revlog.
                  pub fn get_entry(
                      &self,
                      rev: Revision,
                  ) -> Result<RevlogEntry, RevlogError> {
                      let index_entry = self
                          .index
                          .get_entry(rev)
                          .ok_or(RevlogError::InvalidRevision)?;
                      let start = index_entry.offset();
                      let end = start + index_entry.compressed_len() as usize;
                      let data = if self.index.is_inline() {
                          self.index.data(start, end)
                      } else {
                          &self.data()[start..end]
                      };
                      let entry = RevlogEntry {
                          revlog: self,
                          rev,
                          bytes: data,
                          compressed_len: index_entry.compressed_len(),
                          uncompressed_len: index_entry.uncompressed_len(),
                          base_rev_or_base_of_delta_chain: if index_entry
                              .base_revision_or_base_of_delta_chain()
                              == rev
                          {
                              None
                          } else {
                              Some(index_entry.base_revision_or_base_of_delta_chain())
                          },
                          p1: index_entry.p1(),
                          p2: index_entry.p2(),
+                         flags: index_entry.flags(),
                          hash: *index_entry.hash(),
                      };
                      Ok(entry)
                  }
                  /// when resolving internal references within revlog, any errors
                  /// should be reported as corruption, instead of e.g. "invalid revision"
                  fn get_entry_internal(
                      &self,
                      rev: Revision,
                  ) -> Result<RevlogEntry, HgError> {
                      return self.get_entry(rev).map_err(|_| corrupted());
                  }
              }
              /// The revlog entry's bytes and the necessary informations to extract
              /// the entry's data.
              #[derive(Clone)]
              pub struct RevlogEntry<'a> {
                  revlog: &'a Revlog,
                  rev: Revision,
                  bytes: &'a [u8],
                  compressed_len: u32,
                  uncompressed_len: i32,
                  base_rev_or_base_of_delta_chain: Option<Revision>,
                  p1: Revision,
                  p2: Revision,
+                 flags: u16,
                  hash: Node,
              }
              impl<'a> RevlogEntry<'a> {
                  pub fn revision(&self) -> Revision {
                      self.rev
                  }
                  pub fn uncompressed_len(&self) -> Option<u32> {
                      u32::try_from(self.uncompressed_len).ok()
                  }
+                 pub fn has_p1(&self) -> bool {
+                     self.p1 != NULL_REVISION
+                 }
+                 pub fn is_cencored(&self) -> bool {
+                     (self.flags & REVISION_FLAG_CENSORED) != 0
+                 }
+                 pub fn has_length_affecting_flag_processor(&self) -> bool {
+                     // Relevant Python code: revlog.size()
+                     // note: ELLIPSIS is known to not change the content
+                     (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
+                 }
                  /// The data for this entry, after resolving deltas if any.
                  pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
                      let mut entry = self.clone();
                      let mut delta_chain = vec![];
                      // The meaning of `base_rev_or_base_of_delta_chain` depends on
                      // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
                      // `mercurial/revlogutils/constants.py` and the code in
                      // [_chaininfo] and in [index_deltachain].
                      let uses_generaldelta = self.revlog.index.uses_generaldelta();
                      while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
                          let base_rev = if uses_generaldelta {
                              base_rev
                          } else {
                              entry.rev - 1
                          };
                          delta_chain.push(entry);
                          entry = self.revlog.get_entry_internal(base_rev)?;
                      }
                      let data = if delta_chain.is_empty() {
                          entry.data_chunk()?
                      } else {
                          Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
                      };
                      if self.revlog.check_hash(
                          self.p1,
                          self.p2,
                          self.hash.as_bytes(),
                          &data,
                      ) {
                          Ok(data)
                      } else {
                          Err(corrupted())
                      }
                  }
                  /// Extract the data contained in the entry.
                  /// This may be a delta. (See `is_delta`.)
                  fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
                      if self.bytes.is_empty() {
                          return Ok(Cow::Borrowed(&[]));
                      }
                      match self.bytes[0] {
                          // Revision data is the entirety of the entry, including this
                          // header.
                          b'\0' => Ok(Cow::Borrowed(self.bytes)),
                          // Raw revision data follows.
                          b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                          // zlib (RFC 1950) data.
                          b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                          // zstd data.
                          b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                          // A proper new format should have had a repo/store requirement.
                          _format_type => Err(corrupted()),
                      }
                  }
                  fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
                      let mut decoder = ZlibDecoder::new(self.bytes);
                      if self.is_delta() {
                          let mut buf = Vec::with_capacity(self.compressed_len as usize);
                          decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
                          Ok(buf)
                      } else {
                          let cap = self.uncompressed_len.max(0) as usize;
                          let mut buf = vec![0; cap];
                          decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
                          Ok(buf)
                      }
                  }
                  fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
                      if self.is_delta() {
                          let mut buf = Vec::with_capacity(self.compressed_len as usize);
                          zstd::stream::copy_decode(self.bytes, &mut buf)
                              .map_err(|_| corrupted())?;
                          Ok(buf)
                      } else {
                          let cap = self.uncompressed_len.max(0) as usize;
                          let mut buf = vec![0; cap];
                          let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
                              .map_err(|_| corrupted())?;
                          if len != self.uncompressed_len as usize {
                              Err(corrupted())
                          } else {
                              Ok(buf)
                          }
                      }
                  }
                  /// Tell if the entry is a snapshot or a delta
                  /// (influences on decompression).
                  fn is_delta(&self) -> bool {
                      self.base_rev_or_base_of_delta_chain.is_some()
                  }
              }
              /// Calculate the hash of a revision given its data and its parents.
              fn hash(
                  data: &[u8],
                  p1_hash: &[u8],
                  p2_hash: &[u8],
              ) -> [u8; NODE_BYTES_LENGTH] {
                  let mut hasher = Sha1::new();
                  let (a, b) = (p1_hash, p2_hash);
                  if a > b {
                      hasher.update(b);
                      hasher.update(a);
                  } else {
                      hasher.update(a);
                      hasher.update(b);
                  }
                  hasher.update(data);
                  *hasher.finalize().as_ref()
              }

rust/rhg/src/commands/status.rs

0 +11 -11

              // status.rs
              //
              // Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::error::CommandError;
              use crate::ui::Ui;
              use crate::utils::path_utils::RelativizePaths;
              use clap::{Arg, SubCommand};
              use format_bytes::format_bytes;
              use hg;
              use hg::config::Config;
              use hg::dirstate::has_exec_bit;
              use hg::dirstate::status::StatusPath;
              use hg::dirstate::TruncatedTimestamp;
              use hg::dirstate::RANGE_MASK_31BIT;
              use hg::errors::{HgError, IoResultExt};
              use hg::lock::LockError;
              use hg::manifest::Manifest;
              use hg::matchers::AlwaysMatcher;
              use hg::repo::Repo;
              use hg::utils::files::get_bytes_from_os_string;
              use hg::utils::files::get_bytes_from_path;
              use hg::utils::files::get_path_from_bytes;
              use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
              use hg::StatusOptions;
              use log::info;
              use std::io;
              use std::path::PathBuf;
              pub const HELP_TEXT: &str = "
              Show changed files in the working directory
              This is a pure Rust version of `hg status`.
              Some options might be missing, check the list below.
              ";
              pub fn args() -> clap::App<'static, 'static> {
                  SubCommand::with_name("status")
                      .alias("st")
                      .about(HELP_TEXT)
                      .arg(
                          Arg::with_name("all")
                              .help("show status of all files")
                              .short("-A")
                              .long("--all"),
                      )
                      .arg(
                          Arg::with_name("modified")
                              .help("show only modified files")
                              .short("-m")
                              .long("--modified"),
                      )
                      .arg(
                          Arg::with_name("added")
                              .help("show only added files")
                              .short("-a")
                              .long("--added"),
                      )
                      .arg(
                          Arg::with_name("removed")
                              .help("show only removed files")
                              .short("-r")
                              .long("--removed"),
                      )
                      .arg(
                          Arg::with_name("clean")
                              .help("show only clean files")
                              .short("-c")
                              .long("--clean"),
                      )
                      .arg(
                          Arg::with_name("deleted")
                              .help("show only deleted files")
                              .short("-d")
                              .long("--deleted"),
                      )
                      .arg(
                          Arg::with_name("unknown")
                              .help("show only unknown (not tracked) files")
                              .short("-u")
                              .long("--unknown"),
                      )
                      .arg(
                          Arg::with_name("ignored")
                              .help("show only ignored files")
                              .short("-i")
                              .long("--ignored"),
                      )
                      .arg(
                          Arg::with_name("copies")
                              .help("show source of copied files (DEFAULT: ui.statuscopies)")
                              .short("-C")
                              .long("--copies"),
                      )
                      .arg(
                          Arg::with_name("no-status")
                              .help("hide status prefix")
                              .short("-n")
                              .long("--no-status"),
                      )
              }
              /// Pure data type allowing the caller to specify file states to display
              #[derive(Copy, Clone, Debug)]
              pub struct DisplayStates {
                  pub modified: bool,
                  pub added: bool,
                  pub removed: bool,
                  pub clean: bool,
                  pub deleted: bool,
                  pub unknown: bool,
                  pub ignored: bool,
              }
              pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
                  modified: true,
                  added: true,
                  removed: true,
                  clean: false,
                  deleted: true,
                  unknown: true,
                  ignored: false,
              };
              pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
                  modified: true,
                  added: true,
                  removed: true,
                  clean: true,
                  deleted: true,
                  unknown: true,
                  ignored: true,
              };
              impl DisplayStates {
                  pub fn is_empty(&self) -> bool {
                      !(self.modified
                          || self.added
                          || self.removed
                          || self.clean
                          || self.deleted
                          || self.unknown
                          || self.ignored)
                  }
              }
              pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
                  let status_enabled_default = false;
                  let status_enabled = invocation.config.get_option(b"rhg", b"status")?;
                  if !status_enabled.unwrap_or(status_enabled_default) {
                      return Err(CommandError::unsupported(
                          "status is experimental in rhg (enable it with 'rhg.status = true' \
                          or enable fallback with 'rhg.on-unsupported = fallback')"
                      ));
                  }
                  // TODO: lift these limitations
                  if invocation.config.get_bool(b"ui", b"tweakdefaults")? {
                      return Err(CommandError::unsupported(
                          "ui.tweakdefaults is not yet supported with rhg status",
                      ));
                  }
                  if invocation.config.get_bool(b"ui", b"statuscopies")? {
                      return Err(CommandError::unsupported(
                          "ui.statuscopies is not yet supported with rhg status",
                      ));
                  }
                  if invocation
                      .config
                      .get(b"commands", b"status.terse")
                      .is_some()
                  {
                      return Err(CommandError::unsupported(
                          "status.terse is not yet supported with rhg status",
                      ));
                  }
                  let ui = invocation.ui;
                  let config = invocation.config;
                  let args = invocation.subcommand_args;
                  let verbose = !ui.plain()
                      && !args.is_present("print0")
                      && (config.get_bool(b"ui", b"verbose")?
                          || config.get_bool(b"commands", b"status.verbose")?);
                  if verbose {
                      return Err(CommandError::unsupported(
                          "verbose status is not supported yet",
                      ));
                  }
                  let all = args.is_present("all");
                  let display_states = if all {
                      // TODO when implementing `--quiet`: it excludes clean files
                      // from `--all`
                      ALL_DISPLAY_STATES
                  } else {
                      let requested = DisplayStates {
                          modified: args.is_present("modified"),
                          added: args.is_present("added"),
                          removed: args.is_present("removed"),
                          clean: args.is_present("clean"),
                          deleted: args.is_present("deleted"),
                          unknown: args.is_present("unknown"),
                          ignored: args.is_present("ignored"),
                      };
                      if requested.is_empty() {
                          DEFAULT_DISPLAY_STATES
                      } else {
                          requested
                      }
                  };
                  let no_status = args.is_present("no-status");
                  let list_copies = all
                      || args.is_present("copies")
                      || config.get_bool(b"ui", b"statuscopies")?;
                  let repo = invocation.repo?;
                  if repo.has_sparse() || repo.has_narrow() {
                      return Err(CommandError::unsupported(
                          "rhg status is not supported for sparse checkouts or narrow clones yet"
                      ));
                  }
                  let mut dmap = repo.dirstate_map_mut()?;
                  let options = StatusOptions {
                      // we're currently supporting file systems with exec flags only
                      // anyway
                      check_exec: true,
                      list_clean: display_states.clean,
                      list_unknown: display_states.unknown,
                      list_ignored: display_states.ignored,
                      list_copies,
                      collect_traversed_dirs: false,
                  };
                  let (mut ds_status, pattern_warnings) = dmap.status(
                      &AlwaysMatcher,
                      repo.working_directory_path().to_owned(),
                      ignore_files(repo, config),
                      options,
                  )?;
                  for warning in pattern_warnings {
                      match warning {
                          hg::PatternFileWarning::InvalidSyntax(path, syntax) => ui
                              .write_stderr(&format_bytes!(
                                  b"{}: ignoring invalid syntax '{}'\n",
                                  get_bytes_from_path(path),
                                  &*syntax
                              ))?,
                          hg::PatternFileWarning::NoSuchFile(path) => {
                              let path = if let Ok(relative) =
                                  path.strip_prefix(repo.working_directory_path())
                              {
                                  relative
                              } else {
                                  &*path
                              };
                              ui.write_stderr(&format_bytes!(
                                  b"skipping unreadable pattern file '{}': \
                                    No such file or directory\n",
                                  get_bytes_from_path(path),
                              ))?
                          }
                      }
                  }
                  for (path, error) in ds_status.bad {
                      let error = match error {
                          hg::BadMatch::OsError(code) => {
                              std::io::Error::from_raw_os_error(code).to_string()
                          }
                          hg::BadMatch::BadType(ty) => {
                              format!("unsupported file type (type is {})", ty)
                          }
                      };
                      ui.write_stderr(&format_bytes!(
                          b"{}: {}\n",
                          path.as_bytes(),
                          error.as_bytes()
                      ))?
                  }
                  if !ds_status.unsure.is_empty() {
                      info!(
                          "Files to be rechecked by retrieval from filelog: {:?}",
                          ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
                      );
                  }
                  let mut fixup = Vec::new();
                  if !ds_status.unsure.is_empty()
                      && (display_states.modified || display_states.clean)
                  {
                      let p1 = repo.dirstate_parents()?.p1;
                      let manifest = repo.manifest_for_node(p1).map_err(|e| {
                          CommandError::from((e, &*format!("{:x}", p1.short())))
                      })?;
                      for to_check in ds_status.unsure {
                          if unsure_is_modified(repo, &manifest, &to_check.path)? {
                              if display_states.modified {
                                  ds_status.modified.push(to_check);
                              }
                          } else {
                              if display_states.clean {
                                  ds_status.clean.push(to_check.clone());
                              }
                              fixup.push(to_check.path.into_owned())
                          }
                      }
                  }
                  let relative_paths = (!ui.plain())
                      && config
                          .get_option(b"commands", b"status.relative")?
                          .unwrap_or(config.get_bool(b"ui", b"relative-paths")?);
                  let output = DisplayStatusPaths {
                      ui,
                      no_status,
                      relativize: if relative_paths {
                          Some(RelativizePaths::new(repo)?)
                      } else {
                          None
                      },
                  };
                  if display_states.modified {
                      output.display(b"M", ds_status.modified)?;
                  }
                  if display_states.added {
                      output.display(b"A", ds_status.added)?;
                  }
                  if display_states.removed {
                      output.display(b"R", ds_status.removed)?;
                  }
                  if display_states.deleted {
                      output.display(b"!", ds_status.deleted)?;
                  }
                  if display_states.unknown {
                      output.display(b"?", ds_status.unknown)?;
                  }
                  if display_states.ignored {
                      output.display(b"I", ds_status.ignored)?;
                  }
                  if display_states.clean {
                      output.display(b"C", ds_status.clean)?;
                  }
                  let mut dirstate_write_needed = ds_status.dirty;
                  let filesystem_time_at_status_start =
                      ds_status.filesystem_time_at_status_start;
                  if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
                      && !dirstate_write_needed
                  {
                      // Nothing to update
                      return Ok(());
                  }
                  // Update the dirstate on disk if we can
                  let with_lock_result =
                      repo.try_with_wlock_no_wait(|| -> Result<(), CommandError> {
                          if let Some(mtime_boundary) = filesystem_time_at_status_start {
                              for hg_path in fixup {
                                  use std::os::unix::fs::MetadataExt;
                                  let fs_path = hg_path_to_path_buf(&hg_path)
                                      .expect("HgPath conversion");
                                  // Specifically do not reuse `fs_metadata` from
                                  // `unsure_is_clean` which was needed before reading
                                  // contents. Here we access metadata again after reading
                                  // content, in case it changed in the meantime.
                                  let fs_metadata = repo
                                      .working_directory_vfs()
                                      .symlink_metadata(&fs_path)?;
                                  if let Some(mtime) =
                                      TruncatedTimestamp::for_reliable_mtime_of(
                                          &fs_metadata,
                                          &mtime_boundary,
                                      )
                                      .when_reading_file(&fs_path)?
                                  {
                                      let mode = fs_metadata.mode();
                                      let size = fs_metadata.len() as u32 & RANGE_MASK_31BIT;
                                      let mut entry = dmap
                                          .get(&hg_path)?
                                          .expect("ambiguous file not in dirstate");
                                      entry.set_clean(mode, size, mtime);
                                      dmap.add_file(&hg_path, entry)?;
                                      dirstate_write_needed = true
                                  }
                              }
                          }
                          drop(dmap); // Avoid "already mutably borrowed" RefCell panics
                          if dirstate_write_needed {
                              repo.write_dirstate()?
                          }
                          Ok(())
                      });
                  match with_lock_result {
                      Ok(closure_result) => closure_result?,
                      Err(LockError::AlreadyHeld) => {
                          // Not updating the dirstate is not ideal but not critical:
                          // don’t keep our caller waiting until some other Mercurial
                          // process releases the lock.
                      }
                      Err(LockError::Other(HgError::IoError { error, .. }))
                          if error.kind() == io::ErrorKind::PermissionDenied =>
                      {
                          // `hg status` on a read-only repository is fine
                      }
                      Err(LockError::Other(error)) => {
                          // Report other I/O errors
                          Err(error)?
                      }
                  }
                  Ok(())
              }
              fn ignore_files(repo: &Repo, config: &Config) -> Vec<PathBuf> {
                  let mut ignore_files = Vec::new();
                  let repo_ignore = repo.working_directory_vfs().join(".hgignore");
                  if repo_ignore.exists() {
                      ignore_files.push(repo_ignore)
                  }
                  for (key, value) in config.iter_section(b"ui") {
                      if key == b"ignore" || key.starts_with(b"ignore.") {
                          let path = get_path_from_bytes(value);
                          // TODO: expand "~/" and environment variable here, like Python
                          // does with `os.path.expanduser` and `os.path.expandvars`
                          let joined = repo.working_directory_path().join(path);
                          ignore_files.push(joined);
                      }
                  }
                  ignore_files
              }
              struct DisplayStatusPaths<'a> {
                  ui: &'a Ui,
                  no_status: bool,
                  relativize: Option<RelativizePaths>,
              }
              impl DisplayStatusPaths<'_> {
                  // Probably more elegant to use a Deref or Borrow trait rather than
                  // harcode HgPathBuf, but probably not really useful at this point
                  fn display(
                      &self,
                      status_prefix: &[u8],
                      mut paths: Vec<StatusPath<'_>>,
                  ) -> Result<(), CommandError> {
                      paths.sort_unstable();
                      for StatusPath { path, copy_source } in paths {
                          let relative;
                          let path = if let Some(relativize) = &self.relativize {
                              relative = relativize.relativize(&path);
                              &*relative
                          } else {
                              path.as_bytes()
                          };
                          // TODO optim, probably lots of unneeded copies here, especially
                          // if out stream is buffered
                          if self.no_status {
                              self.ui.write_stdout(&format_bytes!(b"{}\n", path))?
                          } else {
                              self.ui.write_stdout(&format_bytes!(
                                  b"{} {}\n",
                                  status_prefix,
                                  path
                              ))?
                          }
                          if let Some(source) = copy_source {
                              self.ui.write_stdout(&format_bytes!(
                                  b"  {}\n",
                                  source.as_bytes()
                              ))?
                          }
                      }
                      Ok(())
                  }
              }
              /// Check if a file is modified by comparing actual repo store and file system.
              ///
              /// This meant to be used for those that the dirstate cannot resolve, due
              /// to time resolution limits.
              fn unsure_is_modified(
                  repo: &Repo,
                  manifest: &Manifest,
                  hg_path: &HgPath,
              ) -> Result<bool, HgError> {
                  let vfs = repo.working_directory_vfs();
                  let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
                  let fs_metadata = vfs.symlink_metadata(&fs_path)?;
                  let is_symlink = fs_metadata.file_type().is_symlink();
                  // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
                  // dirstate
                  let fs_flags = if is_symlink {
                      Some(b'l')
                  } else if has_exec_bit(&fs_metadata) {
                      Some(b'x')
                  } else {
                      None
                  };
                  let entry = manifest
                      .find_by_path(hg_path)?
                      .expect("ambgious file not in p1");
                  if entry.flags != fs_flags {
                      return Ok(true);
                  }
                  let filelog = repo.filelog(hg_path)?;
                  let fs_len = fs_metadata.len();
                  let filelog_entry =
                      filelog.entry_for_node(entry.node_id()?).map_err(|_| {
                          HgError::corrupted("filelog missing node from manifest")
                      })?;
-                 // TODO: check `fs_len` here like below, but based on
-                 // `RevlogEntry::uncompressed_len` without decompressing the full filelog
-                 // contents where possible. This is only valid if the revlog data does not
-                 // contain metadata. See how Python’s `revlog.rawsize` calls
-                 // `storageutil.filerevisioncopied`.
-                 // (Maybe also check for content-modifying flags? See `revlog.size`.)
-                 let filelog_data = filelog_entry.data()?;
-                 let contents_in_p1 = filelog_data.file_data()?;
-                 if contents_in_p1.len() as u64 != fs_len {
-                     // No need to read the file contents:
+                 if filelog_entry.file_data_len_not_equal_to(fs_len) {
+                     // No need to read file contents:
+                     // it cannot be equal if it has a different length.
+                     return Ok(true);
+                 }
+                 let p1_filelog_data = filelog_entry.data()?;
+                 let p1_contents = p1_filelog_data.file_data()?;
+                 if p1_contents.len() as u64 != fs_len {
+                     // No need to read file contents:
                      // it cannot be equal if it has a different length.
                      return Ok(true);
                  }
                  let fs_contents = if is_symlink {
                      get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string())
                  } else {
                      vfs.read(fs_path)?
                  };
-                 Ok(contents_in_p1 != &*fs_contents)
+                 Ok(p1_contents != &*fs_contents)
              }

tests/test-issue6528.t

0 +8 -8

              ===============================================================
              Test non-regression on the corruption associated with issue6528
              ===============================================================
              Setup
              =====
                $ hg init base-repo
                $ cd base-repo
                $ cat <<EOF > a.txt
                > 1
                > 2
                > 3
                > 4
                > 5
                > 6
                > EOF
                $ hg add a.txt
                $ hg commit -m 'c_base_c - create a.txt'
              Modify a.txt
                $ sed -e 's/1/foo/' a.txt > a.tmp; mv a.tmp a.txt
                $ hg commit -m 'c_modify_c - modify a.txt'
              Modify and rename a.txt to b.txt
                $ hg up -r "desc('c_base_c')"
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ sed -e 's/6/bar/' a.txt > a.tmp; mv a.tmp a.txt
                $ hg mv a.txt b.txt
                $ hg commit -m 'c_rename_c - rename and modify a.txt to b.txt'
                created new head
              Merge each branch
                $ hg merge -r "desc('c_modify_c')"
                merging b.txt and a.txt to b.txt
 files updated, 1 files merged, 0 files removed, 0 files unresolved
                (branch merge, don't forget to commit)
                $ hg commit -m 'c_merge_c: commit merge'
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
              Check commit Graph
                $ hg log -G
                @    changeset:   3:a1cc2bdca0aa
                |\   tag:         tip
                | |  parent:      2:615c6ccefd15
                | |  parent:      1:373d507f4667
                | |  user:        test
                | |  date:        Thu Jan 01 00:00:00 1970 +0000
                | |  summary:     c_merge_c: commit merge
                | |
                | o  changeset:   2:615c6ccefd15
                | |  parent:      0:f5a5a568022f
                | |  user:        test
                | |  date:        Thu Jan 01 00:00:00 1970 +0000
                | |  summary:     c_rename_c - rename and modify a.txt to b.txt
                | |
                o |  changeset:   1:373d507f4667
                |/   user:        test
                |    date:        Thu Jan 01 00:00:00 1970 +0000
                |    summary:     c_modify_c - modify a.txt
                |
                o  changeset:   0:f5a5a568022f
                   user:        test
                   date:        Thu Jan 01 00:00:00 1970 +0000
                   summary:     c_base_c - create a.txt
                $ hg cat -r . b.txt
                foo
 
 
 
 
                bar
                $ cat b.txt
                foo
 
 
 
 
                bar
                $ cd ..
              Check the lack of corruption
              ============================
                $ hg clone --pull base-repo cloned
                requesting all changes
                adding changesets
                adding manifests
                adding file changes
                added 4 changesets with 4 changes to 2 files
                new changesets f5a5a568022f:a1cc2bdca0aa
                updating to branch default
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ cd cloned
                $ hg up -r "desc('c_merge_c')"
 files updated, 0 files merged, 0 files removed, 0 files unresolved
              Status is buggy, even with debugrebuilddirstate
                $ hg cat -r . b.txt
                foo
 
 
 
 
                bar
                $ cat b.txt
                foo
 
 
 
 
                bar
                $ hg status
                $ hg debugrebuilddirstate
                $ hg status
              the history was altered
              in theory p1/p2 order does not matter but in practice p1 == nullid is used as a
              marker that some metadata are present and should be fetched.
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
              Check commit Graph
                $ hg log -G
                @    changeset:   3:a1cc2bdca0aa
                |\   tag:         tip
                | |  parent:      2:615c6ccefd15
                | |  parent:      1:373d507f4667
                | |  user:        test
                | |  date:        Thu Jan 01 00:00:00 1970 +0000
                | |  summary:     c_merge_c: commit merge
                | |
                | o  changeset:   2:615c6ccefd15
                | |  parent:      0:f5a5a568022f
                | |  user:        test
                | |  date:        Thu Jan 01 00:00:00 1970 +0000
                | |  summary:     c_rename_c - rename and modify a.txt to b.txt
                | |
                o |  changeset:   1:373d507f4667
                |/   user:        test
                |    date:        Thu Jan 01 00:00:00 1970 +0000
                |    summary:     c_modify_c - modify a.txt
                |
                o  changeset:   0:f5a5a568022f
                   user:        test
                   date:        Thu Jan 01 00:00:00 1970 +0000
                   summary:     c_base_c - create a.txt
              Test the command that fixes the issue
              =====================================
              Restore a broken repository with multiple broken revisions and a filename that
              would get encoded to test the `report` options.
              It's a tarball because unbundle might magically fix the issue later.
                $ cd ..
                $ mkdir repo-to-fix
                $ cd repo-to-fix
              #if windows
              tar interprets `:` in paths (like `C:`) as being remote, force local on Windows
              only since some versions of tar don't have this flag.
                $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar
              #else
                $ tar xf $TESTDIR/bundles/issue6528.tar
              #endif
              Check that the issue is present
              (It is currently not present with rhg but will be when optimizations are added
              to resolve ambiguous files at the end of status without reading their content
              if the size differs, and reading the expected size without resolving filelog
              deltas where possible.)
                $ hg st
-               M D.txt (no-rhg !)
-               M b.txt (no-rhg !)
+               M D.txt
+               M b.txt
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 05b806ebe5ea 000000000000
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 2a8d3833f2fb 000000000000
              Dry-run the fix
                $ hg debug-repair-issue6528 --dry-run
                found affected revision 1 for filelog 'data/D.txt.i'
                found affected revision 1 for filelog 'data/b.txt.i'
                found affected revision 3 for filelog 'data/b.txt.i'
                $ hg st
-               M D.txt (no-rhg !)
-               M b.txt (no-rhg !)
+               M D.txt
+               M b.txt
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 05b806ebe5ea 000000000000
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 2a8d3833f2fb 000000000000
              Test the --paranoid option
                $ hg debug-repair-issue6528 --dry-run --paranoid
                found affected revision 1 for filelog 'data/D.txt.i'
                found affected revision 1 for filelog 'data/b.txt.i'
                found affected revision 3 for filelog 'data/b.txt.i'
                $ hg st
-               M D.txt (no-rhg !)
-               M b.txt (no-rhg !)
+               M D.txt
+               M b.txt
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 05b806ebe5ea 000000000000
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 2a8d3833f2fb 000000000000
              Run the fix
                $ hg debug-repair-issue6528
                found affected revision 1 for filelog 'data/D.txt.i'
                repaired revision 1 of 'filelog data/D.txt.i'
                found affected revision 1 for filelog 'data/b.txt.i'
                found affected revision 3 for filelog 'data/b.txt.i'
                repaired revision 1 of 'filelog data/b.txt.i'
                repaired revision 3 of 'filelog data/b.txt.i'
              Check that the fix worked and that running it twice does nothing
                $ hg st
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 000000000000 2a8d3833f2fb
                $ hg debug-repair-issue6528
                no affected revisions were found
                $ hg st
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 000000000000 2a8d3833f2fb
              Try the using the report options
              --------------------------------
                $ cd ..
                $ mkdir repo-to-fix-report
                $ cd repo-to-fix
              #if windows
              tar interprets `:` in paths (like `C:`) as being remote, force local on Windows
              only since some versions of tar don't have this flag.
                $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar
              #else
                $ tar xf $TESTDIR/bundles/issue6528.tar
              #endif
                $ hg debug-repair-issue6528 --to-report $TESTTMP/report.txt
                found affected revision 1 for filelog 'data/D.txt.i'
                found affected revision 1 for filelog 'data/b.txt.i'
                found affected revision 3 for filelog 'data/b.txt.i'
                $ cat $TESTTMP/report.txt
 a80419dfc31d7dfb308ac40f3f138282de7d73b D.txt
                a58b36ad6b6545195952793099613c2116f3563b,ea4f2f2463cca5b29ddf3461012b8ce5c6dac175 b.txt
                $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt --dry-run
                loading report file '$TESTTMP/report.txt'
                found affected revision 1 for filelog 'D.txt'
                found affected revision 1 for filelog 'b.txt'
                found affected revision 3 for filelog 'b.txt'
                $ hg st
-               M D.txt (no-rhg !)
-               M b.txt (no-rhg !)
+               M D.txt
+               M b.txt
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 05b806ebe5ea 000000000000
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 2a8d3833f2fb 000000000000
                $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt
                loading report file '$TESTTMP/report.txt'
                found affected revision 1 for filelog 'D.txt'
                repaired revision 1 of 'filelog data/D.txt.i'
                found affected revision 1 for filelog 'b.txt'
                found affected revision 3 for filelog 'b.txt'
                repaired revision 1 of 'filelog data/b.txt.i'
                repaired revision 3 of 'filelog data/b.txt.i'
                $ hg st
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 000000000000 2a8d3833f2fb
              Check that the revision is not "fixed" again
                $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt
                loading report file '$TESTTMP/report.txt'
                revision 2a80419dfc31d7dfb308ac40f3f138282de7d73b of file 'D.txt' is not affected
                no affected revisions were found for 'D.txt'
                revision a58b36ad6b6545195952793099613c2116f3563b of file 'b.txt' is not affected
                revision ea4f2f2463cca5b29ddf3461012b8ce5c6dac175 of file 'b.txt' is not affected
                no affected revisions were found for 'b.txt'
                $ hg st
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 000000000000 2a8d3833f2fb
              Try it with a non-inline revlog
              -------------------------------
                $ cd ..
                $ mkdir $TESTTMP/ext
                $ cat << EOF > $TESTTMP/ext/small_inline.py
                > from mercurial import revlog
                > revlog._maxinline = 8
                > EOF
                $ cat << EOF >> $HGRCPATH
                > [extensions]
                > small_inline=$TESTTMP/ext/small_inline.py
                > EOF
                $ mkdir repo-to-fix-not-inline
                $ cd repo-to-fix-not-inline
              #if windows
              tar interprets `:` in paths (like `C:`) as being remote, force local on Windows
              only since some versions of tar don't have this flag.
                $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar
              #else
                $ tar xf $TESTDIR/bundles/issue6528.tar
              #endif
                $ echo b >> b.txt
                $ hg commit -qm "inline -> separate"
                $ find .hg -name *b.txt.d
                .hg/store/data/b.txt.d
              Status is correct, but the problem is still there, in the earlier revision
                $ hg st
                $ hg up 3
 files updated, 0 files merged, 1 files removed, 0 files unresolved
                $ hg st
                M b.txt
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 05b806ebe5ea 000000000000
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
 8 db234885e2fe ea4f2f2463cc 000000000000
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 2a8d3833f2fb 000000000000
 8 65aecc89bb5d 2a80419dfc31 000000000000
              Run the fix on the non-inline revlog
                $ hg debug-repair-issue6528
                found affected revision 1 for filelog 'data/D.txt.i'
                repaired revision 1 of 'filelog data/D.txt.i'
                found affected revision 1 for filelog 'data/b.txt.i'
                found affected revision 3 for filelog 'data/b.txt.i'
                repaired revision 1 of 'filelog data/b.txt.i'
                repaired revision 3 of 'filelog data/b.txt.i'
              Check that it worked
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
 8 db234885e2fe ea4f2f2463cc 000000000000
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 000000000000 2a8d3833f2fb
 8 65aecc89bb5d 2a80419dfc31 000000000000
                $ hg debug-repair-issue6528
                no affected revisions were found
                $ hg st
                $ cd ..
              Applying a bad bundle should fix it on the fly
              ----------------------------------------------
              from a v1 bundle
              ~~~~~~~~~~~~~~~~
                $ hg debugbundle  --spec "$TESTDIR"/bundles/issue6528.hg-v1
                bzip2-v1
                $ hg init unbundle-v1
                $ cd unbundle-v1
                $ hg unbundle "$TESTDIR"/bundles/issue6528.hg-v1
                adding changesets
                adding manifests
                adding file changes
                added 8 changesets with 12 changes to 4 files
                new changesets f5a5a568022f:3beabb508514 (8 drafts)
                (run 'hg update' to get a working copy)
              Check that revision were fixed on the fly
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 000000000000 2a8d3833f2fb
              That we don't see the symptoms of the bug
                $ hg up -- -1
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ hg status
              And that the repair command does not find anything to fix
                $ hg debug-repair-issue6528
                no affected revisions were found
                $ cd ..
              from a v2 bundle
              ~~~~~~~~~~~~~~~~
                $ hg debugbundle --spec "$TESTDIR"/bundles/issue6528.hg-v2
                bzip2-v2
                $ hg init unbundle-v2
                $ cd unbundle-v2
                $ hg unbundle "$TESTDIR"/bundles/issue6528.hg-v2
                adding changesets
                adding manifests
                adding file changes
                added 8 changesets with 12 changes to 4 files
                new changesets f5a5a568022f:3beabb508514 (8 drafts)
                (run 'hg update' to get a working copy)
              Check that revision were fixed on the fly
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 000000000000 05b806ebe5ea
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 000000000000 216a5fe8b8ed
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 000000000000 2a8d3833f2fb
              That we don't see the symptoms of the bug
                $ hg up -- -1
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ hg status
              And that the repair command does not find anything to fix
                $ hg debug-repair-issue6528
                no affected revisions were found
                $ cd ..
              A config option can disable the fixing of the bad bundle on the fly
              -------------------------------------------------------------------
              from a v1 bundle
              ~~~~~~~~~~~~~~~~
                $ hg debugbundle  --spec "$TESTDIR"/bundles/issue6528.hg-v1
                bzip2-v1
                $ hg init unbundle-v1-no-fix
                $ cd unbundle-v1-no-fix
                $ hg unbundle "$TESTDIR"/bundles/issue6528.hg-v1 --config storage.revlog.issue6528.fix-incoming=no
                adding changesets
                adding manifests
                adding file changes
                added 8 changesets with 12 changes to 4 files
                new changesets f5a5a568022f:3beabb508514 (8 drafts)
                (run 'hg update' to get a working copy)
              Check that revision were not fixed on the fly
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 05b806ebe5ea 000000000000
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 2a8d3833f2fb 000000000000
              That we do see the symptoms of the bug
                $ hg up -- -1
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ hg status
                M D.txt (?)
                M b.txt (?)
              And that the repair command find issue to fix.
                $ hg debug-repair-issue6528 --dry-run
                found affected revision 1 for filelog 'data/D.txt.i'
                found affected revision 1 for filelog 'data/b.txt.i'
                found affected revision 3 for filelog 'data/b.txt.i'
                $ cd ..
              from a v2 bundle
              ~~~~~~~~~~~~~~~~
                $ hg debugbundle --spec "$TESTDIR"/bundles/issue6528.hg-v2
                bzip2-v2
                $ hg init unbundle-v2-no-fix
                $ cd unbundle-v2-no-fix
                $ hg unbundle "$TESTDIR"/bundles/issue6528.hg-v2 --config storage.revlog.issue6528.fix-incoming=no
                adding changesets
                adding manifests
                adding file changes
                added 8 changesets with 12 changes to 4 files
                new changesets f5a5a568022f:3beabb508514 (8 drafts)
                (run 'hg update' to get a working copy)
              Check that revision were not fixed on the fly
                $ hg debugrevlogindex b.txt
                   rev linkrev nodeid       p1           p2
 2 05b806ebe5ea 000000000000 000000000000
 3 a58b36ad6b65 05b806ebe5ea 000000000000
 6 216a5fe8b8ed 000000000000 000000000000
 7 ea4f2f2463cc 216a5fe8b8ed 000000000000
                $ hg debugrevlogindex D.txt
                   rev linkrev nodeid       p1           p2
 6 2a8d3833f2fb 000000000000 000000000000
 7 2a80419dfc31 2a8d3833f2fb 000000000000
              That we do see the symptoms of the bug
                $ hg up -- -1
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ hg status
                M D.txt (?)
                M b.txt (?)
              And that the repair command find issue to fix.
                $ hg debug-repair-issue6528 --dry-run
                found affected revision 1 for filelog 'data/D.txt.i'
                found affected revision 1 for filelog 'data/b.txt.i'
                found affected revision 3 for filelog 'data/b.txt.i'
                $ cd ..

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages