upstream/mercurial-mirror Commit - r49289:96ea4db4

rhg: fix a crash on non-generaldelta revlogs...

Arseniy Alekseyev -

r49289:96ea4db4 default

parent child

rust/hg-core/src/revlog/index.rs

0 +28 -9

             use std::convert::TryInto;
             use std::ops::Deref;
             use byteorder::{BigEndian, ByteOrder};
             use crate::errors::HgError;
             use crate::revlog::node::Node;
             use crate::revlog::{Revision, NULL_REVISION};
             pub const INDEX_ENTRY_SIZE: usize = 64;
             pub struct IndexHeader {
                 header_bytes: [u8; 4],
             }
             #[derive(Copy, Clone)]
             pub struct IndexHeaderFlags {
                 flags: u16,
             }
             /// Corresponds to the high bits of `_format_flags` in python
             impl IndexHeaderFlags {
                 /// Corresponds to FLAG_INLINE_DATA in python
                 pub fn is_inline(self) -> bool {
                     return self.flags & 1 != 0;
                 }
                 /// Corresponds to FLAG_GENERALDELTA in python
                 pub fn uses_generaldelta(self) -> bool {
                     return self.flags & 2 != 0;
                 }
             }
             /// Corresponds to the INDEX_HEADER structure,
             /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
             impl IndexHeader {
                 fn format_flags(&self) -> IndexHeaderFlags {
                     // No "unknown flags" check here, unlike in python. Maybe there should
                     // be.
                     return IndexHeaderFlags {
                         flags: BigEndian::read_u16(&self.header_bytes[0..2]),
                     };
                 }
                 /// The only revlog version currently supported by rhg.
                 const REVLOGV1: u16 = 1;
                 /// Corresponds to `_format_version` in Python.
                 fn format_version(&self) -> u16 {
                     return BigEndian::read_u16(&self.header_bytes[2..4]);
                 }
                 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
                     // We treat an empty file as a valid index with no entries.
                     // Here we make an arbitrary choice of what we assume the format of the
                     // index to be (V1, using generaldelta).
                     // This doesn't matter too much, since we're only doing read-only
                     // access. but the value corresponds to the `new_header` variable in
                     // `revlog.py`, `_loadindex`
                     header_bytes: [0, 3, 0, 1],
                 };
                 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
                     if index_bytes.len() == 0 {
                         return Ok(IndexHeader::EMPTY_INDEX_HEADER);
                     }
                     if index_bytes.len() < 4 {
                         return Err(HgError::corrupted(
                             "corrupted revlog: can't read the index format header",
                         ));
                     }
                     return Ok(IndexHeader {
                         header_bytes: {
                             let bytes: [u8; 4] =
                                 index_bytes[0..4].try_into().expect("impossible");
                             bytes
                         },
                     });
                 }
             }
             /// A Revlog index
             pub struct Index {
                 bytes: Box<dyn Deref<Target = [u8]> + Send>,
                 /// Offsets of starts of index blocks.
                 /// Only needed when the index is interleaved with data.
                 offsets: Option<Vec<usize>>,
+                uses_generaldelta: bool,
             }
             impl Index {
                 /// Create an index from bytes.
                 /// Calculate the start of each entry when is_inline is true.
                 pub fn new(
                     bytes: Box<dyn Deref<Target = [u8]> + Send>,
                 ) -> Result<Self, HgError> {
                     let header = IndexHeader::parse(bytes.as_ref())?;
                     if header.format_version() != IndexHeader::REVLOGV1 {
                         // A proper new version should have had a repo/store
                         // requirement.
                         return Err(HgError::corrupted("unsupported revlog version"));
                     }
+                    // This is only correct because we know version is REVLOGV1.
+                    // In v2 we always use generaldelta, while in v0 we never use
+                    // generaldelta. Similar for [is_inline] (it's only used in v1).
+                    let uses_generaldelta = header.format_flags().uses_generaldelta();
                     if header.format_flags().is_inline() {
                         let mut offset: usize = 0;
                         let mut offsets = Vec::new();
                         while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                             offsets.push(offset);
                             let end = offset + INDEX_ENTRY_SIZE;
                             let entry = IndexEntry {
                                 bytes: &bytes[offset..end],
                                 offset_override: None,
                             };
                             offset += INDEX_ENTRY_SIZE + entry.compressed_len();
                         }
                         if offset == bytes.len() {
                             Ok(Self {
                                 bytes,
                                 offsets: Some(offsets),
+                                uses_generaldelta,
                             })
                         } else {
                             Err(HgError::corrupted("unexpected inline revlog length")
                                 .into())
                         }
                     } else {
                         Ok(Self {
                             bytes,
                             offsets: None,
+                            uses_generaldelta,
                         })
                     }
                 }
+                pub fn uses_generaldelta(&self) -> bool {
+                    self.uses_generaldelta
+                }
                 /// Value of the inline flag.
                 pub fn is_inline(&self) -> bool {
                     self.offsets.is_some()
                 }
                 /// Return a slice of bytes if `revlog` is inline. Panic if not.
                 pub fn data(&self, start: usize, end: usize) -> &[u8] {
                     if !self.is_inline() {
                         panic!("tried to access data in the index of a revlog that is not inline");
                     }
                     &self.bytes[start..end]
                 }
                 /// Return number of entries of the revlog index.
                 pub fn len(&self) -> usize {
                     if let Some(offsets) = &self.offsets {
                         offsets.len()
                     } else {
                         self.bytes.len() / INDEX_ENTRY_SIZE
                     }
                 }
                 /// Returns `true` if the `Index` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 /// Return the index entry corresponding to the given revision if it
                 /// exists.
                 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
                     if rev == NULL_REVISION {
                         return None;
                     }
                     if let Some(offsets) = &self.offsets {
                         self.get_entry_inline(rev, offsets)
                     } else {
                         self.get_entry_separated(rev)
                     }
                 }
                 fn get_entry_inline(
                     &self,
                     rev: Revision,
                     offsets: &[usize],
                 ) -> Option<IndexEntry> {
                     let start = *offsets.get(rev as usize)?;
                     let end = start.checked_add(INDEX_ENTRY_SIZE)?;
                     let bytes = &self.bytes[start..end];
                     // See IndexEntry for an explanation of this override.
                     let offset_override = Some(end);
                     Some(IndexEntry {
                         bytes,
                         offset_override,
                     })
                 }
                 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
                     let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
                     if rev as usize >= max_rev {
                         return None;
                     }
                     let start = rev as usize * INDEX_ENTRY_SIZE;
                     let end = start + INDEX_ENTRY_SIZE;
                     let bytes = &self.bytes[start..end];
                     // Override the offset of the first revision as its bytes are used
                     // for the index's metadata (saving space because it is always 0)
                     let offset_override = if rev == 0 { Some(0) } else { None };
                     Some(IndexEntry {
                         bytes,
                         offset_override,
                     })
                 }
             }
             impl super::RevlogIndex for Index {
                 fn len(&self) -> usize {
                     self.len()
                 }
                 fn node(&self, rev: Revision) -> Option<&Node> {
                     self.get_entry(rev).map(|entry| entry.hash())
                 }
             }
             #[derive(Debug)]
             pub struct IndexEntry<'a> {
                 bytes: &'a [u8],
                 /// Allows to override the offset value of the entry.
                 ///
                 /// For interleaved index and data, the offset stored in the index
                 /// corresponds to the separated data offset.
                 /// It has to be overridden with the actual offset in the interleaved
                 /// index which is just after the index block.
                 ///
                 /// For separated index and data, the offset stored in the first index
                 /// entry is mixed with the index headers.
                 /// It has to be overridden with 0.
                 offset_override: Option<usize>,
             }
             impl<'a> IndexEntry<'a> {
                 /// Return the offset of the data.
                 pub fn offset(&self) -> usize {
                     if let Some(offset_override) = self.offset_override {
                         offset_override
                     } else {
                         let mut bytes = [0; 8];
                         bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
                         BigEndian::read_u64(&bytes[..]) as usize
                     }
                 }
                 /// Return the compressed length of the data.
                 pub fn compressed_len(&self) -> usize {
                     BigEndian::read_u32(&self.bytes[8..=11]) as usize
                 }
                 /// Return the uncompressed length of the data.
                 pub fn uncompressed_len(&self) -> usize {
                     BigEndian::read_u32(&self.bytes[12..=15]) as usize
                 }
                 /// Return the revision upon which the data has been derived.
-                pub fn base_revision(&self) -> Revision {
+                pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
                     // TODO Maybe return an Option when base_revision == rev?
                     //      Requires to add rev to IndexEntry
                     BigEndian::read_i32(&self.bytes[16..])
                 }
                 pub fn p1(&self) -> Revision {
                     BigEndian::read_i32(&self.bytes[24..])
                 }
                 pub fn p2(&self) -> Revision {
                     BigEndian::read_i32(&self.bytes[28..])
                 }
                 /// Return the hash of revision's full text.
                 ///
                 /// Currently, SHA-1 is used and only the first 20 bytes of this field
                 /// are used.
                 pub fn hash(&self) -> &'a Node {
                     (&self.bytes[32..52]).try_into().unwrap()
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 #[cfg(test)]
                 #[derive(Debug, Copy, Clone)]
                 pub struct IndexEntryBuilder {
                     is_first: bool,
                     is_inline: bool,
                     is_general_delta: bool,
                     version: u16,
                     offset: usize,
                     compressed_len: usize,
                     uncompressed_len: usize,
-                    base_revision: Revision,
+                    base_revision_or_base_of_delta_chain: Revision,
                 }
                 #[cfg(test)]
                 impl IndexEntryBuilder {
                     pub fn new() -> Self {
                         Self {
                             is_first: false,
                             is_inline: false,
                             is_general_delta: true,
                             version: 2,
                             offset: 0,
                             compressed_len: 0,
                             uncompressed_len: 0,
-                            base_revision: 0,
+                            base_revision_or_base_of_delta_chain: 0,
                         }
                     }
                     pub fn is_first(&mut self, value: bool) -> &mut Self {
                         self.is_first = value;
                         self
                     }
                     pub fn with_inline(&mut self, value: bool) -> &mut Self {
                         self.is_inline = value;
                         self
                     }
                     pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
                         self.is_general_delta = value;
                         self
                     }
                     pub fn with_version(&mut self, value: u16) -> &mut Self {
                         self.version = value;
                         self
                     }
                     pub fn with_offset(&mut self, value: usize) -> &mut Self {
                         self.offset = value;
                         self
                     }
                     pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
                         self.compressed_len = value;
                         self
                     }
                     pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
                         self.uncompressed_len = value;
                         self
                     }
-                    pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
+                    pub fn with_base_revision_or_base_of_delta_chain(
-                        self.base_revision = value;
+                        &mut self,
+                        value: Revision,
+                    ) -> &mut Self {
+                        self.base_revision_or_base_of_delta_chain = value;
                         self
                     }
                     pub fn build(&self) -> Vec<u8> {
                         let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
                         if self.is_first {
                             bytes.extend(&match (self.is_general_delta, self.is_inline) {
                                 (false, false) => [0u8, 0],
                                 (false, true) => [0u8, 1],
                                 (true, false) => [0u8, 2],
                                 (true, true) => [0u8, 3],
                             });
                             bytes.extend(&self.version.to_be_bytes());
                             // Remaining offset bytes.
                             bytes.extend(&[0u8; 2]);
                         } else {
                             // Offset stored on 48 bits (6 bytes)
                             bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
                         }
                         bytes.extend(&[0u8; 2]); // Revision flags.
                         bytes.extend(&(self.compressed_len as u32).to_be_bytes());
                         bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
-                        bytes.extend(&self.base_revision.to_be_bytes());
+                        bytes.extend(
+                            &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
+                        );
                         bytes
                     }
                 }
                 pub fn is_inline(index_bytes: &[u8]) -> bool {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .format_flags()
                         .is_inline()
                 }
                 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .format_flags()
                         .uses_generaldelta()
                 }
                 pub fn get_version(index_bytes: &[u8]) -> u16 {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .format_version()
                 }
                 #[test]
                 fn flags_when_no_inline_flag_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(false)
                         .with_inline(false)
                         .build();
                     assert_eq!(is_inline(&bytes), false);
                     assert_eq!(uses_generaldelta(&bytes), false);
                 }
                 #[test]
                 fn flags_when_inline_flag_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(false)
                         .with_inline(true)
                         .build();
                     assert_eq!(is_inline(&bytes), true);
                     assert_eq!(uses_generaldelta(&bytes), false);
                 }
                 #[test]
                 fn flags_when_inline_and_generaldelta_flags_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(true)
                         .with_inline(true)
                         .build();
                     assert_eq!(is_inline(&bytes), true);
                     assert_eq!(uses_generaldelta(&bytes), true);
                 }
                 #[test]
                 fn test_offset() {
                     let bytes = IndexEntryBuilder::new().with_offset(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.offset(), 1)
                 }
                 #[test]
                 fn test_with_overridden_offset() {
                     let bytes = IndexEntryBuilder::new().with_offset(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: Some(2),
                     };
                     assert_eq!(entry.offset(), 2)
                 }
                 #[test]
                 fn test_compressed_len() {
                     let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.compressed_len(), 1)
                 }
                 #[test]
                 fn test_uncompressed_len() {
                     let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
                     assert_eq!(entry.uncompressed_len(), 1)
                 }
                 #[test]
-                fn test_base_revision() {
+                fn test_base_revision_or_base_of_delta_chain() {
-                    let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
+                    let bytes = IndexEntryBuilder::new()
+                        .with_base_revision_or_base_of_delta_chain(1)
+                        .build();
                     let entry = IndexEntry {
                         bytes: &bytes,
                         offset_override: None,
                     };
-                    assert_eq!(entry.base_revision(), 1)
+                    assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
                 }
                 #[test]
                 fn version_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .build();
                     assert_eq!(get_version(&bytes), 1)
                 }
             }
             #[cfg(test)]
             pub use tests::IndexEntryBuilder;

rust/hg-core/src/revlog/revlog.rs

0 +29 -8

             use std::borrow::Cow;
             use std::io::Read;
             use std::ops::Deref;
             use std::path::Path;
             use flate2::read::ZlibDecoder;
             use micro_timer::timed;
             use sha1::{Digest, Sha1};
             use zstd;
             use super::index::Index;
             use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
             use super::nodemap;
             use super::nodemap::{NodeMap, NodeMapError};
             use super::nodemap_docket::NodeMapDocket;
             use super::patch;
             use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::Revision;
             use crate::{Node, NULL_REVISION};
             #[derive(derive_more::From)]
             pub enum RevlogError {
                 InvalidRevision,
                 /// Working directory is not supported
                 WDirUnsupported,
                 /// Found more than one entry whose ID match the requested prefix
                 AmbiguousPrefix,
                 #[from]
                 Other(HgError),
             }
             impl From<NodeMapError> for RevlogError {
                 fn from(error: NodeMapError) -> Self {
                     match error {
                         NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                         NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
                     }
                 }
             }
             impl RevlogError {
                 fn corrupted() -> Self {
                     RevlogError::Other(HgError::corrupted("corrupted revlog"))
                 }
             }
             /// Read only implementation of revlog.
             pub struct Revlog {
                 /// When index and data are not interleaved: bytes of the revlog index.
                 /// When index and data are interleaved: bytes of the revlog index and
                 /// data.
                 index: Index,
                 /// When index and data are not interleaved: bytes of the revlog data
                 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                 /// When present on disk: the persistent nodemap for this revlog
                 nodemap: Option<nodemap::NodeTree>,
             }
             impl Revlog {
                 /// Open a revlog index file.
                 ///
                 /// It will also open the associated data file if index and data are not
                 /// interleaved.
                 #[timed]
                 pub fn open(
                     repo: &Repo,
                     index_path: impl AsRef<Path>,
                     data_path: Option<&Path>,
                 ) -> Result<Self, HgError> {
                     let index_path = index_path.as_ref();
                     let index = {
                         match repo.store_vfs().mmap_open_opt(&index_path)? {
                             None => Index::new(Box::new(vec![])),
                             Some(index_mmap) => {
                                 let index = Index::new(Box::new(index_mmap))?;
                                 Ok(index)
                             }
                         }
                     }?;
                     let default_data_path = index_path.with_extension("d");
                     // type annotation required
                     // won't recognize Mmap as Deref<Target = [u8]>
                     let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                         if index.is_inline() {
                             None
                         } else {
                             let data_path = data_path.unwrap_or(&default_data_path);
                             let data_mmap = repo.store_vfs().mmap_open(data_path)?;
                             Some(Box::new(data_mmap))
                         };
                     let nodemap = if index.is_inline() {
                         None
                     } else {
                         NodeMapDocket::read_from_file(repo, index_path)?.map(
                             |(docket, data)| {
                                 nodemap::NodeTree::load_bytes(
                                     Box::new(data),
                                     docket.data_length,
                                 )
                             },
                         )
                     };
                     Ok(Revlog {
                         index,
                         data_bytes,
                         nodemap,
                     })
                 }
                 /// Return number of entries of the `Revlog`.
                 pub fn len(&self) -> usize {
                     self.index.len()
                 }
                 /// Returns `true` if the `Revlog` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.index.is_empty()
                 }
                 /// Returns the node ID for the given revision number, if it exists in this
                 /// revlog
                 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                     if rev == NULL_REVISION {
                         return Some(&NULL_NODE);
                     }
                     Some(self.index.get_entry(rev)?.hash())
                 }
                 /// Return the revision number for the given node ID, if it exists in this
                 /// revlog
                 #[timed]
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     if node.is_prefix_of(&NULL_NODE) {
                         return Ok(NULL_REVISION);
                     }
                     if let Some(nodemap) = &self.nodemap {
                         return nodemap
                             .find_bin(&self.index, node)?
                             .ok_or(RevlogError::InvalidRevision);
                     }
                     // Fallback to linear scan when a persistent nodemap is not present.
                     // This happens when the persistent-nodemap experimental feature is not
                     // enabled, or for small revlogs.
                     //
                     // TODO: consider building a non-persistent nodemap in memory to
                     // optimize these cases.
                     let mut found_by_prefix = None;
                     for rev in (0..self.len() as Revision).rev() {
                         let index_entry =
                             self.index.get_entry(rev).ok_or(HgError::corrupted(
                                 "revlog references a revision not in the index",
                             ))?;
                         if node == *index_entry.hash() {
                             return Ok(rev);
                         }
                         if node.is_prefix_of(index_entry.hash()) {
                             if found_by_prefix.is_some() {
                                 return Err(RevlogError::AmbiguousPrefix);
                             }
                             found_by_prefix = Some(rev)
                         }
                     }
                     found_by_prefix.ok_or(RevlogError::InvalidRevision)
                 }
                 /// Returns whether the given revision exists in this revlog.
                 pub fn has_rev(&self, rev: Revision) -> bool {
                     self.index.get_entry(rev).is_some()
                 }
                 /// Return the full data associated to a revision.
                 ///
                 /// All entries required to build the final data out of deltas will be
                 /// retrieved as needed, and the deltas will be applied to the inital
                 /// snapshot to rebuild the final data.
                 #[timed]
                 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(vec![]);
                     };
                     // Todo return -> Cow
                     let mut entry = self.get_entry(rev)?;
                     let mut delta_chain = vec![];
-                    while let Some(base_rev) = entry.base_rev {
+                    // The meaning of `base_rev_or_base_of_delta_chain` depends on
+                    // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
+                    // `mercurial/revlogutils/constants.py` and the code in
+                    // [_chaininfo] and in [index_deltachain].
+                    let uses_generaldelta = self.index.uses_generaldelta();
+                    while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
+                        let base_rev = if uses_generaldelta {
+                            base_rev
+                        } else {
+                            entry.rev - 1
+                        };
                         delta_chain.push(entry);
-                        entry = self
+                        entry = self.get_entry_internal(base_rev)?;
-                            .get_entry(base_rev)
-                            .map_err(|_| RevlogError::corrupted())?;
                     }
                     // TODO do not look twice in the index
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let data: Vec<u8> = if delta_chain.is_empty() {
                         entry.data()?.into()
                     } else {
                         Revlog::build_data_from_deltas(entry, &delta_chain)?
                     };
                     if self.check_hash(
                         index_entry.p1(),
                         index_entry.p2(),
                         index_entry.hash().as_bytes(),
                         &data,
                     ) {
                         Ok(data)
                     } else {
                         Err(RevlogError::corrupted())
                     }
                 }
                 /// Check the hash of some given data against the recorded hash.
                 pub fn check_hash(
                     &self,
                     p1: Revision,
                     p2: Revision,
                     expected: &[u8],
                     data: &[u8],
                 ) -> bool {
                     let e1 = self.index.get_entry(p1);
                     let h1 = match e1 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     let e2 = self.index.get_entry(p2);
                     let h2 = match e2 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                 }
                 /// Build the full data of a revision out its snapshot
                 /// and its deltas.
                 #[timed]
                 fn build_data_from_deltas(
                     snapshot: RevlogEntry,
                     deltas: &[RevlogEntry],
                 ) -> Result<Vec<u8>, RevlogError> {
                     let snapshot = snapshot.data()?;
                     let deltas = deltas
                         .iter()
                         .rev()
                         .map(RevlogEntry::data)
                         .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
                     let patches: Vec<_> =
                         deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                     let patch = patch::fold_patch_lists(&patches);
                     Ok(patch.apply(&snapshot))
                 }
                 /// Return the revlog data.
                 fn data(&self) -> &[u8] {
                     match self.data_bytes {
                         Some(ref data_bytes) => &data_bytes,
                         None => panic!(
                             "forgot to load the data or trying to access inline data"
                         ),
                     }
                 }
                 /// Get an entry of the revlog.
                 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let start = index_entry.offset();
                     let end = start + index_entry.compressed_len();
                     let data = if self.index.is_inline() {
                         self.index.data(start, end)
                     } else {
                         &self.data()[start..end]
                     };
                     let entry = RevlogEntry {
                         rev,
                         bytes: data,
                         compressed_len: index_entry.compressed_len(),
                         uncompressed_len: index_entry.uncompressed_len(),
-                        base_rev: if index_entry.base_revision() == rev {
+                        base_rev_or_base_of_delta_chain: if index_entry
+                            .base_revision_or_base_of_delta_chain()
+                            == rev
+                        {
                             None
                         } else {
-                            Some(index_entry.base_revision())
+                            Some(index_entry.base_revision_or_base_of_delta_chain())
                         },
                     };
                     Ok(entry)
                 }
+                /// when resolving internal references within revlog, any errors
+                /// should be reported as corruption, instead of e.g. "invalid revision"
+                fn get_entry_internal(
+                    &self,
+                    rev: Revision,
+                ) -> Result<RevlogEntry, RevlogError> {
+                    return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
+                }
             }
             /// The revlog entry's bytes and the necessary informations to extract
             /// the entry's data.
             #[derive(Debug)]
             pub struct RevlogEntry<'a> {
                 rev: Revision,
                 bytes: &'a [u8],
                 compressed_len: usize,
                 uncompressed_len: usize,
-                base_rev: Option<Revision>,
+                base_rev_or_base_of_delta_chain: Option<Revision>,
             }
             impl<'a> RevlogEntry<'a> {
                 pub fn revision(&self) -> Revision {
                     self.rev
                 }
                 /// Extract the data contained in the entry.
                 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
                     if self.bytes.is_empty() {
                         return Ok(Cow::Borrowed(&[]));
                     }
                     match self.bytes[0] {
                         // Revision data is the entirety of the entry, including this
                         // header.
                         b'\0' => Ok(Cow::Borrowed(self.bytes)),
                         // Raw revision data follows.
                         b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                         // zlib (RFC 1950) data.
                         b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                         // zstd data.
                         b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                         // A proper new format should have had a repo/store requirement.
                         _format_type => Err(RevlogError::corrupted()),
                     }
                 }
                 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
                     let mut decoder = ZlibDecoder::new(self.bytes);
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len);
                         decoder
                             .read_to_end(&mut buf)
                             .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     } else {
                         let mut buf = vec![0; self.uncompressed_len];
                         decoder
                             .read_exact(&mut buf)
                             .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     }
                 }
                 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len);
                         zstd::stream::copy_decode(self.bytes, &mut buf)
                             .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     } else {
                         let mut buf = vec![0; self.uncompressed_len];
                         let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
                             .map_err(|_| RevlogError::corrupted())?;
                         if len != self.uncompressed_len {
                             Err(RevlogError::corrupted())
                         } else {
                             Ok(buf)
                         }
                     }
                 }
                 /// Tell if the entry is a snapshot or a delta
                 /// (influences on decompression).
                 fn is_delta(&self) -> bool {
-                    self.base_rev.is_some()
+                    self.base_rev_or_base_of_delta_chain.is_some()
                 }
             }
             /// Calculate the hash of a revision given its data and its parents.
             fn hash(
                 data: &[u8],
                 p1_hash: &[u8],
                 p2_hash: &[u8],
             ) -> [u8; NODE_BYTES_LENGTH] {
                 let mut hasher = Sha1::new();
                 let (a, b) = (p1_hash, p2_hash);
                 if a > b {
                     hasher.update(b);
                     hasher.update(a);
                 } else {
                     hasher.update(a);
                     hasher.update(b);
                 }
                 hasher.update(data);
                 *hasher.finalize().as_ref()
             }

tests/test-rhg-no-generaldelta.t

0 +23 -6

               $ NO_FALLBACK="env RHG_ON_UNSUPPORTED=abort"
               $ cat << EOF >> $HGRCPATH
               > [format]
               > sparse-revlog = no
               > EOF
               $ hg init repo --config format.generaldelta=no --config format.usegeneraldelta=no
               $ cd repo
               $ (echo header; seq.py 20) > f
               $ hg commit -q -Am initial
               $ (echo header; seq.py 20; echo footer) > f
               $ hg commit -q -Am x
               $ hg update ".^"
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               $ (seq.py 20; echo footer) > f
               $ hg commit -q -Am y
               $ hg debugdeltachain f --template '{rev} {prevrev} {deltatype}\n'
 -1 base
 0 prev
 1 prev
-            rhg breaks on non-generaldelta revlogs:
+            rhg works on non-generaldelta revlogs:
-              $ $NO_FALLBACK hg cat f -r . | f --sha256 --size
+              $ $NO_FALLBACK hg cat f -r .
-              abort: corrupted revlog (rhg !)
-              size=0, sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 (rhg !)
-              size=58, sha256=0cf0386dd4813cc3b957ea790146627dfc0ec42ad3fcf47221b9842e4d5764c1 (no-rhg !)
+              footer

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages