upstream/mercurial-mirror Commit - r49012:61ce70fd

rhg: handle null changelog and manifest revisions...

Arseniy Alekseyev -

r49012:61ce70fd default

parent child

tests/test-empty-manifest-index.t

0 created 644 +23 0

			@@ -0,0 +1,23
		1	Create a repo such that the changelog entry refers to a null manifest node:
		2
		3	$ hg init a
		4	$ cd a
		5	$ hg log
		6	$ touch x
		7	$ hg add x
		8	$ hg commit -m "init"
		9	$ hg rm x
		10	$ hg commit -q --amend
		11
		12	$ wc -c < .hg/store/00manifest.i
		13	0
		14
		15	Make sure that the manifest can be read (and is empty):
		16
		17	$ hg --config rhg.on-unsupported=abort files -r .
		18	[1]
		19
		20	Test a null changelog rev, too:
		21
		22	$ hg --config rhg.on-unsupported=abort files -r 0000000000000000000000000000000000000000
		23	[1]

rust/hg-core/src/revlog/changelog.rs

0 +5 -5

              use crate::errors::HgError;
              use crate::repo::Repo;
+             use crate::revlog::node::NULL_NODE;
              use crate::revlog::revlog::{Revlog, RevlogError};
              use crate::revlog::Revision;
              use crate::revlog::{Node, NodePrefix};
              /// A specialized `Revlog` to work with `changelog` data format.
              pub struct Changelog {
                  /// The generic `revlog` format.
                  pub(crate) revlog: Revlog,
              }
              impl Changelog {
                  /// Open the `changelog` of a repository given by its root.
                  pub fn open(repo: &Repo) -> Result<Self, HgError> {
                      let revlog = Revlog::open(repo, "00changelog.i", None)?;
                      Ok(Self { revlog })
                  }
                  /// Return the `ChangelogEntry` for the given node ID.
                  pub fn data_for_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<ChangelogEntry, RevlogError> {
                      let rev = self.revlog.rev_from_node(node)?;
                      self.data_for_rev(rev)
                  }
                  /// Return the `ChangelogEntry` of the given revision number.
                  pub fn data_for_rev(
                      &self,
                      rev: Revision,
                  ) -> Result<ChangelogEntry, RevlogError> {
                      let bytes = self.revlog.get_rev_data(rev)?;
                      Ok(ChangelogEntry { bytes })
                  }
                  pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                      self.revlog.node_from_rev(rev)
                  }
              }
              /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
              #[derive(Debug)]
              pub struct ChangelogEntry {
                  /// The data bytes of the `changelog` entry.
                  bytes: Vec<u8>,
              }
              impl ChangelogEntry {
                  /// Return an iterator over the lines of the entry.
                  pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                      self.bytes
                          .split(|b| b == &b'\n')
                          .filter(|line| !line.is_empty())
                  }
                  /// Return the node id of the `manifest` referenced by this `changelog`
                  /// entry.
                  pub fn manifest_node(&self) -> Result<Node, HgError> {
-                     Node::from_hex_for_repo(
-                         self.lines()
-                             .next()
-                             .ok_or_else(|| HgError::corrupted("empty changelog entry"))?,
+                     )
+                     match self.lines().next() {
+                         None => Ok(NULL_NODE),
+                         Some(x) => Node::from_hex_for_repo(x),
                  }
              }
+             }

rust/hg-core/src/revlog/index.rs

0 +3 0

              use std::convert::TryInto;
              use std::ops::Deref;
              use byteorder::{BigEndian, ByteOrder};
              use crate::errors::HgError;
              use crate::revlog::node::Node;
              use crate::revlog::{Revision, NULL_REVISION};
              pub const INDEX_ENTRY_SIZE: usize = 64;
              /// A Revlog index
              pub struct Index {
                  bytes: Box<dyn Deref<Target = [u8]> + Send>,
                  /// Offsets of starts of index blocks.
                  /// Only needed when the index is interleaved with data.
                  offsets: Option<Vec<usize>>,
              }
              impl Index {
                  /// Create an index from bytes.
                  /// Calculate the start of each entry when is_inline is true.
                  pub fn new(
                      bytes: Box<dyn Deref<Target = [u8]> + Send>,
                  ) -> Result<Self, HgError> {
                      if is_inline(&bytes) {
                          let mut offset: usize = 0;
                          let mut offsets = Vec::new();
                          while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                              offsets.push(offset);
                              let end = offset + INDEX_ENTRY_SIZE;
                              let entry = IndexEntry {
                                  bytes: &bytes[offset..end],
                                  offset_override: None,
                              };
                              offset += INDEX_ENTRY_SIZE + entry.compressed_len();
                          }
                          if offset == bytes.len() {
                              Ok(Self {
                                  bytes,
                                  offsets: Some(offsets),
                              })
                          } else {
                              Err(HgError::corrupted("unexpected inline revlog length")
                                  .into())
                          }
                      } else {
                          Ok(Self {
                              bytes,
                              offsets: None,
                          })
                      }
                  }
                  /// Value of the inline flag.
                  pub fn is_inline(&self) -> bool {
                      is_inline(&self.bytes)
                  }
                  /// Return a slice of bytes if `revlog` is inline. Panic if not.
                  pub fn data(&self, start: usize, end: usize) -> &[u8] {
                      if !self.is_inline() {
                          panic!("tried to access data in the index of a revlog that is not inline");
                      }
                      &self.bytes[start..end]
                  }
                  /// Return number of entries of the revlog index.
                  pub fn len(&self) -> usize {
                      if let Some(offsets) = &self.offsets {
                          offsets.len()
                      } else {
                          self.bytes.len() / INDEX_ENTRY_SIZE
                      }
                  }
                  /// Returns `true` if the `Index` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.len() == 0
                  }
                  /// Return the index entry corresponding to the given revision if it
                  /// exists.
                  pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
                      if rev == NULL_REVISION {
                          return None;
                      }
                      if let Some(offsets) = &self.offsets {
                          self.get_entry_inline(rev, offsets)
                      } else {
                          self.get_entry_separated(rev)
                      }
                  }
                  fn get_entry_inline(
                      &self,
                      rev: Revision,
                      offsets: &[usize],
                  ) -> Option<IndexEntry> {
                      let start = *offsets.get(rev as usize)?;
                      let end = start.checked_add(INDEX_ENTRY_SIZE)?;
                      let bytes = &self.bytes[start..end];
                      // See IndexEntry for an explanation of this override.
                      let offset_override = Some(end);
                      Some(IndexEntry {
                          bytes,
                          offset_override,
                      })
                  }
                  fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
                      let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
                      if rev as usize >= max_rev {
                          return None;
                      }
                      let start = rev as usize * INDEX_ENTRY_SIZE;
                      let end = start + INDEX_ENTRY_SIZE;
                      let bytes = &self.bytes[start..end];
                      // Override the offset of the first revision as its bytes are used
                      // for the index's metadata (saving space because it is always 0)
                      let offset_override = if rev == 0 { Some(0) } else { None };
                      Some(IndexEntry {
                          bytes,
                          offset_override,
                      })
                  }
              }
              impl super::RevlogIndex for Index {
                  fn len(&self) -> usize {
                      self.len()
                  }
                  fn node(&self, rev: Revision) -> Option<&Node> {
                      self.get_entry(rev).map(|entry| entry.hash())
                  }
              }
              #[derive(Debug)]
              pub struct IndexEntry<'a> {
                  bytes: &'a [u8],
                  /// Allows to override the offset value of the entry.
                  ///
                  /// For interleaved index and data, the offset stored in the index
                  /// corresponds to the separated data offset.
                  /// It has to be overridden with the actual offset in the interleaved
                  /// index which is just after the index block.
                  ///
                  /// For separated index and data, the offset stored in the first index
                  /// entry is mixed with the index headers.
                  /// It has to be overridden with 0.
                  offset_override: Option<usize>,
              }
              impl<'a> IndexEntry<'a> {
                  /// Return the offset of the data.
                  pub fn offset(&self) -> usize {
                      if let Some(offset_override) = self.offset_override {
                          offset_override
                      } else {
                          let mut bytes = [0; 8];
                          bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
                          BigEndian::read_u64(&bytes[..]) as usize
                      }
                  }
                  /// Return the compressed length of the data.
                  pub fn compressed_len(&self) -> usize {
                      BigEndian::read_u32(&self.bytes[8..=11]) as usize
                  }
                  /// Return the uncompressed length of the data.
                  pub fn uncompressed_len(&self) -> usize {
                      BigEndian::read_u32(&self.bytes[12..=15]) as usize
                  }
                  /// Return the revision upon which the data has been derived.
                  pub fn base_revision(&self) -> Revision {
                      // TODO Maybe return an Option when base_revision == rev?
                      //      Requires to add rev to IndexEntry
                      BigEndian::read_i32(&self.bytes[16..])
                  }
                  pub fn p1(&self) -> Revision {
                      BigEndian::read_i32(&self.bytes[24..])
                  }
                  pub fn p2(&self) -> Revision {
                      BigEndian::read_i32(&self.bytes[28..])
                  }
                  /// Return the hash of revision's full text.
                  ///
                  /// Currently, SHA-1 is used and only the first 20 bytes of this field
                  /// are used.
                  pub fn hash(&self) -> &'a Node {
                      (&self.bytes[32..52]).try_into().unwrap()
                  }
              }
              /// Value of the inline flag.
              pub fn is_inline(index_bytes: &[u8]) -> bool {
+                 if index_bytes.len() < 4 {
+                     return true;
+                 }
                  match &index_bytes[0..=1] {
                      [0, 0] | [0, 2] => false,
                      _ => true,
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  #[cfg(test)]
                  #[derive(Debug, Copy, Clone)]
                  pub struct IndexEntryBuilder {
                      is_first: bool,
                      is_inline: bool,
                      is_general_delta: bool,
                      version: u16,
                      offset: usize,
                      compressed_len: usize,
                      uncompressed_len: usize,
                      base_revision: Revision,
                  }
                  #[cfg(test)]
                  impl IndexEntryBuilder {
                      pub fn new() -> Self {
                          Self {
                              is_first: false,
                              is_inline: false,
                              is_general_delta: true,
                              version: 2,
                              offset: 0,
                              compressed_len: 0,
                              uncompressed_len: 0,
                              base_revision: 0,
                          }
                      }
                      pub fn is_first(&mut self, value: bool) -> &mut Self {
                          self.is_first = value;
                          self
                      }
                      pub fn with_inline(&mut self, value: bool) -> &mut Self {
                          self.is_inline = value;
                          self
                      }
                      pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
                          self.is_general_delta = value;
                          self
                      }
                      pub fn with_version(&mut self, value: u16) -> &mut Self {
                          self.version = value;
                          self
                      }
                      pub fn with_offset(&mut self, value: usize) -> &mut Self {
                          self.offset = value;
                          self
                      }
                      pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
                          self.compressed_len = value;
                          self
                      }
                      pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
                          self.uncompressed_len = value;
                          self
                      }
                      pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
                          self.base_revision = value;
                          self
                      }
                      pub fn build(&self) -> Vec<u8> {
                          let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
                          if self.is_first {
                              bytes.extend(&match (self.is_general_delta, self.is_inline) {
                                  (false, false) => [0u8, 0],
                                  (false, true) => [0u8, 1],
                                  (true, false) => [0u8, 2],
                                  (true, true) => [0u8, 3],
                              });
                              bytes.extend(&self.version.to_be_bytes());
                              // Remaining offset bytes.
                              bytes.extend(&[0u8; 2]);
                          } else {
                              // Offset stored on 48 bits (6 bytes)
                              bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
                          }
                          bytes.extend(&[0u8; 2]); // Revision flags.
                          bytes.extend(&(self.compressed_len as u32).to_be_bytes());
                          bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
                          bytes.extend(&self.base_revision.to_be_bytes());
                          bytes
                      }
                  }
                  #[test]
                  fn is_not_inline_when_no_inline_flag_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(false)
                          .with_inline(false)
                          .build();
                      assert_eq!(is_inline(&bytes), false)
                  }
                  #[test]
                  fn is_inline_when_inline_flag_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(false)
                          .with_inline(true)
                          .build();
                      assert_eq!(is_inline(&bytes), true)
                  }
                  #[test]
                  fn is_inline_when_inline_and_generaldelta_flags_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(true)
                          .with_inline(true)
                          .build();
                      assert_eq!(is_inline(&bytes), true)
                  }
                  #[test]
                  fn test_offset() {
                      let bytes = IndexEntryBuilder::new().with_offset(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.offset(), 1)
                  }
                  #[test]
                  fn test_with_overridden_offset() {
                      let bytes = IndexEntryBuilder::new().with_offset(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: Some(2),
                      };
                      assert_eq!(entry.offset(), 2)
                  }
                  #[test]
                  fn test_compressed_len() {
                      let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.compressed_len(), 1)
                  }
                  #[test]
                  fn test_uncompressed_len() {
                      let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.uncompressed_len(), 1)
                  }
                  #[test]
                  fn test_base_revision() {
                      let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.base_revision(), 1)
                  }
              }
              #[cfg(test)]
              pub use tests::IndexEntryBuilder;

rust/hg-core/src/revlog/revlog.rs

0 +14 -3

              use std::borrow::Cow;
              use std::io::Read;
              use std::ops::Deref;
              use std::path::Path;
              use byteorder::{BigEndian, ByteOrder};
              use flate2::read::ZlibDecoder;
              use micro_timer::timed;
              use sha1::{Digest, Sha1};
              use zstd;
              use super::index::Index;
              use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
              use super::nodemap;
              use super::nodemap::{NodeMap, NodeMapError};
              use super::nodemap_docket::NodeMapDocket;
              use super::patch;
              use crate::errors::HgError;
              use crate::repo::Repo;
              use crate::revlog::Revision;
              use crate::{Node, NULL_REVISION};
              #[derive(derive_more::From)]
              pub enum RevlogError {
                  InvalidRevision,
                  /// Working directory is not supported
                  WDirUnsupported,
                  /// Found more than one entry whose ID match the requested prefix
                  AmbiguousPrefix,
                  #[from]
                  Other(HgError),
              }
              impl From<NodeMapError> for RevlogError {
                  fn from(error: NodeMapError) -> Self {
                      match error {
                          NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                          NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
                      }
                  }
              }
              impl RevlogError {
                  fn corrupted() -> Self {
                      RevlogError::Other(HgError::corrupted("corrupted revlog"))
                  }
              }
              /// Read only implementation of revlog.
              pub struct Revlog {
                  /// When index and data are not interleaved: bytes of the revlog index.
                  /// When index and data are interleaved: bytes of the revlog index and
                  /// data.
                  index: Index,
                  /// When index and data are not interleaved: bytes of the revlog data
                  data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                  /// When present on disk: the persistent nodemap for this revlog
                  nodemap: Option<nodemap::NodeTree>,
              }
              impl Revlog {
                  /// Open a revlog index file.
                  ///
                  /// It will also open the associated data file if index and data are not
                  /// interleaved.
                  #[timed]
                  pub fn open(
                      repo: &Repo,
                      index_path: impl AsRef<Path>,
                      data_path: Option<&Path>,
                  ) -> Result<Self, HgError> {
                      let index_path = index_path.as_ref();
                      let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
-                     let version = get_version(&index_mmap);
+                     let version = get_version(&index_mmap)?;
                      if version != 1 {
                          // A proper new version should have had a repo/store requirement.
                          return Err(HgError::corrupted("corrupted revlog"));
                      }
                      let index = Index::new(Box::new(index_mmap))?;
                      let default_data_path = index_path.with_extension("d");
                      // type annotation required
                      // won't recognize Mmap as Deref<Target = [u8]>
                      let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                          if index.is_inline() {
                              None
                          } else {
                              let data_path = data_path.unwrap_or(&default_data_path);
                              let data_mmap = repo.store_vfs().mmap_open(data_path)?;
                              Some(Box::new(data_mmap))
                          };
                      let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
                          |(docket, data)| {
                              nodemap::NodeTree::load_bytes(
                                  Box::new(data),
                                  docket.data_length,
                              )
                          },
                      );
                      Ok(Revlog {
                          index,
                          data_bytes,
                          nodemap,
                      })
                  }
                  /// Return number of entries of the `Revlog`.
                  pub fn len(&self) -> usize {
                      self.index.len()
                  }
                  /// Returns `true` if the `Revlog` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.index.is_empty()
                  }
                  /// Returns the node ID for the given revision number, if it exists in this
                  /// revlog
                  pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                      Some(self.index.get_entry(rev)?.hash())
                  }
                  /// Return the revision number for the given node ID, if it exists in this
                  /// revlog
                  #[timed]
                  pub fn rev_from_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      if node.is_prefix_of(&NULL_NODE) {
                          return Ok(NULL_REVISION);
                      }
                      if let Some(nodemap) = &self.nodemap {
                          return nodemap
                              .find_bin(&self.index, node)?
                              .ok_or(RevlogError::InvalidRevision);
                      }
                      // Fallback to linear scan when a persistent nodemap is not present.
                      // This happens when the persistent-nodemap experimental feature is not
                      // enabled, or for small revlogs.
                      //
                      // TODO: consider building a non-persistent nodemap in memory to
                      // optimize these cases.
                      let mut found_by_prefix = None;
                      for rev in (0..self.len() as Revision).rev() {
                          let index_entry =
                              self.index.get_entry(rev).ok_or(HgError::corrupted(
                                  "revlog references a revision not in the index",
                              ))?;
                          if node == *index_entry.hash() {
                              return Ok(rev);
                          }
                          if node.is_prefix_of(index_entry.hash()) {
                              if found_by_prefix.is_some() {
                                  return Err(RevlogError::AmbiguousPrefix);
                              }
                              found_by_prefix = Some(rev)
                          }
                      }
                      found_by_prefix.ok_or(RevlogError::InvalidRevision)
                  }
                  /// Returns whether the given revision exists in this revlog.
                  pub fn has_rev(&self, rev: Revision) -> bool {
                      self.index.get_entry(rev).is_some()
                  }
                  /// Return the full data associated to a revision.
                  ///
                  /// All entries required to build the final data out of deltas will be
                  /// retrieved as needed, and the deltas will be applied to the inital
                  /// snapshot to rebuild the final data.
                  #[timed]
                  pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
+                     if rev == NULL_REVISION {
+                         return Ok(vec![]);
+                     };
                      // Todo return -> Cow
                      let mut entry = self.get_entry(rev)?;
                      let mut delta_chain = vec![];
                      while let Some(base_rev) = entry.base_rev {
                          delta_chain.push(entry);
                          entry = self
                              .get_entry(base_rev)
                              .map_err(|_| RevlogError::corrupted())?;
                      }
                      // TODO do not look twice in the index
                      let index_entry = self
                          .index
                          .get_entry(rev)
                          .ok_or(RevlogError::InvalidRevision)?;
                      let data: Vec<u8> = if delta_chain.is_empty() {
                          entry.data()?.into()
                      } else {
                          Revlog::build_data_from_deltas(entry, &delta_chain)?
                      };
                      if self.check_hash(
                          index_entry.p1(),
                          index_entry.p2(),
                          index_entry.hash().as_bytes(),
                          &data,
                      ) {
                          Ok(data)
                      } else {
                          Err(RevlogError::corrupted())
                      }
                  }
                  /// Check the hash of some given data against the recorded hash.
                  pub fn check_hash(
                      &self,
                      p1: Revision,
                      p2: Revision,
                      expected: &[u8],
                      data: &[u8],
                  ) -> bool {
                      let e1 = self.index.get_entry(p1);
                      let h1 = match e1 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      let e2 = self.index.get_entry(p2);
                      let h2 = match e2 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                  }
                  /// Build the full data of a revision out its snapshot
                  /// and its deltas.
                  #[timed]
                  fn build_data_from_deltas(
                      snapshot: RevlogEntry,
                      deltas: &[RevlogEntry],
                  ) -> Result<Vec<u8>, RevlogError> {
                      let snapshot = snapshot.data()?;
                      let deltas = deltas
                          .iter()
                          .rev()
                          .map(RevlogEntry::data)
                          .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
                      let patches: Vec<_> =
                          deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                      let patch = patch::fold_patch_lists(&patches);
                      Ok(patch.apply(&snapshot))
                  }
                  /// Return the revlog data.
                  fn data(&self) -> &[u8] {
                      match self.data_bytes {
                          Some(ref data_bytes) => &data_bytes,
                          None => panic!(
                              "forgot to load the data or trying to access inline data"
                          ),
                      }
                  }
                  /// Get an entry of the revlog.
                  fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
                      let index_entry = self
                          .index
                          .get_entry(rev)
                          .ok_or(RevlogError::InvalidRevision)?;
                      let start = index_entry.offset();
                      let end = start + index_entry.compressed_len();
                      let data = if self.index.is_inline() {
                          self.index.data(start, end)
                      } else {
                          &self.data()[start..end]
                      };
                      let entry = RevlogEntry {
                          rev,
                          bytes: data,
                          compressed_len: index_entry.compressed_len(),
                          uncompressed_len: index_entry.uncompressed_len(),
                          base_rev: if index_entry.base_revision() == rev {
                              None
                          } else {
                              Some(index_entry.base_revision())
                          },
                      };
                      Ok(entry)
                  }
              }
              /// The revlog entry's bytes and the necessary informations to extract
              /// the entry's data.
              #[derive(Debug)]
              pub struct RevlogEntry<'a> {
                  rev: Revision,
                  bytes: &'a [u8],
                  compressed_len: usize,
                  uncompressed_len: usize,
                  base_rev: Option<Revision>,
              }
              impl<'a> RevlogEntry<'a> {
                  pub fn revision(&self) -> Revision {
                      self.rev
                  }
                  /// Extract the data contained in the entry.
                  pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
                      if self.bytes.is_empty() {
                          return Ok(Cow::Borrowed(&[]));
                      }
                      match self.bytes[0] {
                          // Revision data is the entirety of the entry, including this
                          // header.
                          b'\0' => Ok(Cow::Borrowed(self.bytes)),
                          // Raw revision data follows.
                          b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                          // zlib (RFC 1950) data.
                          b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                          // zstd data.
                          b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                          // A proper new format should have had a repo/store requirement.
                          _format_type => Err(RevlogError::corrupted()),
                      }
                  }
                  fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
                      let mut decoder = ZlibDecoder::new(self.bytes);
                      if self.is_delta() {
                          let mut buf = Vec::with_capacity(self.compressed_len);
                          decoder
                              .read_to_end(&mut buf)
                              .map_err(|_| RevlogError::corrupted())?;
                          Ok(buf)
                      } else {
                          let mut buf = vec![0; self.uncompressed_len];
                          decoder
                              .read_exact(&mut buf)
                              .map_err(|_| RevlogError::corrupted())?;
                          Ok(buf)
                      }
                  }
                  fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
                      if self.is_delta() {
                          let mut buf = Vec::with_capacity(self.compressed_len);
                          zstd::stream::copy_decode(self.bytes, &mut buf)
                              .map_err(|_| RevlogError::corrupted())?;
                          Ok(buf)
                      } else {
                          let mut buf = vec![0; self.uncompressed_len];
                          let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
                              .map_err(|_| RevlogError::corrupted())?;
                          if len != self.uncompressed_len {
                              Err(RevlogError::corrupted())
                          } else {
                              Ok(buf)
                          }
                      }
                  }
                  /// Tell if the entry is a snapshot or a delta
                  /// (influences on decompression).
                  fn is_delta(&self) -> bool {
                      self.base_rev.is_some()
                  }
              }
              /// Format version of the revlog.
-             pub fn get_version(index_bytes: &[u8]) -> u16 {
-                 BigEndian::read_u16(&index_bytes[2..=3])
+             pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> {
+                 if index_bytes.len() == 0 {
+                     return Ok(1);
+                 };
+                 if index_bytes.len() < 4 {
+                     return Err(HgError::corrupted(
+                         "corrupted revlog: can't read the index format header",
+                     ));
+                 };
+                 Ok(BigEndian::read_u16(&index_bytes[2..=3]))
              }
              /// Calculate the hash of a revision given its data and its parents.
              fn hash(
                  data: &[u8],
                  p1_hash: &[u8],
                  p2_hash: &[u8],
              ) -> [u8; NODE_BYTES_LENGTH] {
                  let mut hasher = Sha1::new();
                  let (a, b) = (p1_hash, p2_hash);
                  if a > b {
                      hasher.update(b);
                      hasher.update(a);
                  } else {
                      hasher.update(a);
                      hasher.update(b);
                  }
                  hasher.update(data);
                  *hasher.finalize().as_ref()
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use super::super::index::IndexEntryBuilder;
                  #[test]
                  fn version_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_version(1)
                          .build();
                      assert_eq!(get_version(&bytes), 1)
                  }
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages