upstream/mercurial-mirror Commit - r52082:b4d152a2

rust-index: add append method...

Raphaël Gomès -

r52082:b4d152a2 default

parent child

rust/hg-core/src/revlog/index.rs

0 +94 -1

              use std::fmt::Debug;
              use std::ops::Deref;
              use byteorder::{BigEndian, ByteOrder};
+             use bytes_cast::{unaligned, BytesCast};
+             use super::REVIDX_KNOWN_FLAGS;
              use crate::errors::HgError;
+             use crate::node::{NODE_BYTES_LENGTH, STORED_NODE_ID_BYTES};
              use crate::revlog::node::Node;
              use crate::revlog::{Revision, NULL_REVISION};
-             use crate::{Graph, GraphError, RevlogIndex, UncheckedRevision};
+             use crate::{Graph, GraphError, RevlogError, RevlogIndex, UncheckedRevision};
              pub const INDEX_ENTRY_SIZE: usize = 64;
+             pub const COMPRESSION_MODE_INLINE: u8 = 2;
              pub struct IndexHeader {
                  header_bytes: [u8; 4],
              }
              #[derive(Copy, Clone)]
              pub struct IndexHeaderFlags {
                  flags: u16,
              }
              /// Corresponds to the high bits of `_format_flags` in python
              impl IndexHeaderFlags {
                  /// Corresponds to FLAG_INLINE_DATA in python
                  pub fn is_inline(self) -> bool {
                      self.flags & 1 != 0
                  }
                  /// Corresponds to FLAG_GENERALDELTA in python
                  pub fn uses_generaldelta(self) -> bool {
                      self.flags & 2 != 0
                  }
              }
              /// Corresponds to the INDEX_HEADER structure,
              /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
              impl IndexHeader {
                  fn format_flags(&self) -> IndexHeaderFlags {
                      // No "unknown flags" check here, unlike in python. Maybe there should
                      // be.
                      IndexHeaderFlags {
                          flags: BigEndian::read_u16(&self.header_bytes[0..2]),
                      }
                  }
                  /// The only revlog version currently supported by rhg.
                  const REVLOGV1: u16 = 1;
                  /// Corresponds to `_format_version` in Python.
                  fn format_version(&self) -> u16 {
                      BigEndian::read_u16(&self.header_bytes[2..4])
                  }
                  const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
                      // We treat an empty file as a valid index with no entries.
                      // Here we make an arbitrary choice of what we assume the format of the
                      // index to be (V1, using generaldelta).
                      // This doesn't matter too much, since we're only doing read-only
                      // access. but the value corresponds to the `new_header` variable in
                      // `revlog.py`, `_loadindex`
                      header_bytes: [0, 3, 0, 1],
                  };
                  fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
                      if index_bytes.is_empty() {
                          return Ok(IndexHeader::EMPTY_INDEX_HEADER);
                      }
                      if index_bytes.len() < 4 {
                          return Err(HgError::corrupted(
                              "corrupted revlog: can't read the index format header",
                          ));
                      }
                      Ok(IndexHeader {
                          header_bytes: {
                              let bytes: [u8; 4] =
                                  index_bytes[0..4].try_into().expect("impossible");
                              bytes
                          },
                      })
                  }
              }
              /// Abstracts the access to the index bytes since they can be spread between
              /// the immutable (bytes) part and the mutable (added) part if any appends
              /// happened. This makes it transparent for the callers.
              struct IndexData {
                  /// Immutable bytes, most likely taken from disk
                  bytes: Box<dyn Deref<Target = [u8]> + Send>,
                  /// Bytes that were added after reading the index
                  added: Vec<u8>,
              }
              impl IndexData {
                  pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
                      Self {
                          bytes,
                          added: vec![],
                      }
                  }
                  pub fn len(&self) -> usize {
                      self.bytes.len() + self.added.len()
                  }
              }
              impl std::ops::Index<std::ops::Range<usize>> for IndexData {
                  type Output = [u8];
                  fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
                      let start = index.start;
                      let end = index.end;
                      let immutable_len = self.bytes.len();
                      if start < immutable_len {
                          if end > immutable_len {
                              panic!("index data cannot span existing and added ranges");
                          }
                          &self.bytes[index]
                      } else {
                          &self.added[start - immutable_len..end - immutable_len]
                      }
                  }
              }
+             pub struct RevisionDataParams {
+                 flags: u16,
+                 data_offset: u64,
+                 data_compressed_length: i32,
+                 data_uncompressed_length: i32,
+                 data_delta_base: i32,
+                 link_rev: i32,
+                 parent_rev_1: i32,
+                 parent_rev_2: i32,
+                 node_id: [u8; NODE_BYTES_LENGTH],
+                 _sidedata_offset: u64,
+                 _sidedata_compressed_length: i32,
+                 data_compression_mode: u8,
+                 _sidedata_compression_mode: u8,
+                 _rank: i32,
+             }
+             #[derive(BytesCast)]
+             #[repr(C)]
+             pub struct RevisionDataV1 {
+                 data_offset_or_flags: unaligned::U64Be,
+                 data_compressed_length: unaligned::I32Be,
+                 data_uncompressed_length: unaligned::I32Be,
+                 data_delta_base: unaligned::I32Be,
+                 link_rev: unaligned::I32Be,
+                 parent_rev_1: unaligned::I32Be,
+                 parent_rev_2: unaligned::I32Be,
+                 node_id: [u8; STORED_NODE_ID_BYTES],
+             }
+             fn _static_assert_size_of_revision_data_v1() {
+                 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
+             }
+             impl RevisionDataParams {
+                 pub fn validate(&self) -> Result<(), RevlogError> {
+                     if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
+                         return Err(RevlogError::corrupted(format!(
+                             "unknown revlog index flags: {}",
+                             self.flags
+                         )));
+                     }
+                     if self.data_compression_mode != COMPRESSION_MODE_INLINE {
+                         return Err(RevlogError::corrupted(format!(
+                             "invalid data compression mode: {}",
+                             self.data_compression_mode
+                         )));
+                     }
+                     // FIXME isn't this only for v2 or changelog v2?
+                     if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
+                         return Err(RevlogError::corrupted(format!(
+                             "invalid sidedata compression mode: {}",
+                             self._sidedata_compression_mode
+                         )));
+                     }
+                     Ok(())
+                 }
+                 pub fn into_v1(self) -> RevisionDataV1 {
+                     let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
+                     let mut node_id = [0; STORED_NODE_ID_BYTES];
+                     node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
+                     RevisionDataV1 {
+                         data_offset_or_flags: data_offset_or_flags.into(),
+                         data_compressed_length: self.data_compressed_length.into(),
+                         data_uncompressed_length: self.data_uncompressed_length.into(),
+                         data_delta_base: self.data_delta_base.into(),
+                         link_rev: self.link_rev.into(),
+                         parent_rev_1: self.parent_rev_1.into(),
+                         parent_rev_2: self.parent_rev_2.into(),
+                         node_id,
+                     }
+                 }
+             }
              /// A Revlog index
              pub struct Index {
                  bytes: IndexData,
                  /// Offsets of starts of index blocks.
                  /// Only needed when the index is interleaved with data.
                  offsets: Option<Vec<usize>>,
                  uses_generaldelta: bool,
              }
              impl Debug for Index {
                  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
                      f.debug_struct("Index")
                          .field("offsets", &self.offsets)
                          .field("uses_generaldelta", &self.uses_generaldelta)
                          .finish()
                  }
              }
              impl Graph for Index {
                  fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                      let err = || GraphError::ParentOutOfRange(rev);
                      match self.get_entry(rev) {
                          Some(entry) => {
                              // The C implementation checks that the parents are valid
                              // before returning
                              Ok([
                                  self.check_revision(entry.p1()).ok_or_else(err)?,
                                  self.check_revision(entry.p2()).ok_or_else(err)?,
                              ])
                          }
                          None => Ok([NULL_REVISION, NULL_REVISION]),
                      }
                  }
              }
              impl Index {
                  /// Create an index from bytes.
                  /// Calculate the start of each entry when is_inline is true.
                  pub fn new(
                      bytes: Box<dyn Deref<Target = [u8]> + Send>,
                  ) -> Result<Self, HgError> {
                      let header = IndexHeader::parse(bytes.as_ref())?;
                      if header.format_version() != IndexHeader::REVLOGV1 {
                          // A proper new version should have had a repo/store
                          // requirement.
                          return Err(HgError::corrupted("unsupported revlog version"));
                      }
                      // This is only correct because we know version is REVLOGV1.
                      // In v2 we always use generaldelta, while in v0 we never use
                      // generaldelta. Similar for [is_inline] (it's only used in v1).
                      let uses_generaldelta = header.format_flags().uses_generaldelta();
                      if header.format_flags().is_inline() {
                          let mut offset: usize = 0;
                          let mut offsets = Vec::new();
                          while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                              offsets.push(offset);
                              let end = offset + INDEX_ENTRY_SIZE;
                              let entry = IndexEntry {
                                  bytes: &bytes[offset..end],
                                  offset_override: None,
                              };
                              offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
                          }
                          if offset == bytes.len() {
                              Ok(Self {
                                  bytes: IndexData::new(bytes),
                                  offsets: Some(offsets),
                                  uses_generaldelta,
                              })
                          } else {
                              Err(HgError::corrupted("unexpected inline revlog length"))
                          }
                      } else {
                          Ok(Self {
                              bytes: IndexData::new(bytes),
                              offsets: None,
                              uses_generaldelta,
                          })
                      }
                  }
                  pub fn uses_generaldelta(&self) -> bool {
                      self.uses_generaldelta
                  }
                  /// Value of the inline flag.
                  pub fn is_inline(&self) -> bool {
                      self.offsets.is_some()
                  }
                  /// Return a slice of bytes if `revlog` is inline. Panic if not.
                  pub fn data(&self, start: usize, end: usize) -> &[u8] {
                      if !self.is_inline() {
                          panic!("tried to access data in the index of a revlog that is not inline");
                      }
                      &self.bytes[start..end]
                  }
                  /// Return number of entries of the revlog index.
                  pub fn len(&self) -> usize {
                      if let Some(offsets) = &self.offsets {
                          offsets.len()
                      } else {
                          self.bytes.len() / INDEX_ENTRY_SIZE
                      }
                  }
                  /// Returns `true` if the `Index` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.len() == 0
                  }
                  /// Return the index entry corresponding to the given revision if it
                  /// exists.
                  pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
                      if rev == NULL_REVISION {
                          return None;
                      }
                      Some(if let Some(offsets) = &self.offsets {
                          self.get_entry_inline(rev, offsets)
                      } else {
                          self.get_entry_separated(rev)
                      })
                  }
                  fn get_entry_inline(
                      &self,
                      rev: Revision,
                      offsets: &[usize],
                  ) -> IndexEntry {
                      let start = offsets[rev.0 as usize];
                      let end = start + INDEX_ENTRY_SIZE;
                      let bytes = &self.bytes[start..end];
                      // See IndexEntry for an explanation of this override.
                      let offset_override = Some(end);
                      IndexEntry {
                          bytes,
                          offset_override,
                      }
                  }
                  fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
                      let start = rev.0 as usize * INDEX_ENTRY_SIZE;
                      let end = start + INDEX_ENTRY_SIZE;
                      let bytes = &self.bytes[start..end];
                      // Override the offset of the first revision as its bytes are used
                      // for the index's metadata (saving space because it is always 0)
                      let offset_override = if rev == Revision(0) { Some(0) } else { None };
                      IndexEntry {
                          bytes,
                          offset_override,
                      }
                  }
+                 /// TODO move this to the trait probably, along with other things
+                 pub fn append(
+                     &mut self,
+                     revision_data: RevisionDataParams,
+                 ) -> Result<(), RevlogError> {
+                     revision_data.validate()?;
+                     let new_offset = self.bytes.len();
+                     if let Some(offsets) = self.offsets.as_mut() {
+                         offsets.push(new_offset)
+                     }
+                     self.bytes.added.extend(revision_data.into_v1().as_bytes());
+                     Ok(())
+                 }
              }
              impl super::RevlogIndex for Index {
                  fn len(&self) -> usize {
                      self.len()
                  }
                  fn node(&self, rev: Revision) -> Option<&Node> {
                      self.get_entry(rev).map(|entry| entry.hash())
                  }
              }
              #[derive(Debug)]
              pub struct IndexEntry<'a> {
                  bytes: &'a [u8],
                  /// Allows to override the offset value of the entry.
                  ///
                  /// For interleaved index and data, the offset stored in the index
                  /// corresponds to the separated data offset.
                  /// It has to be overridden with the actual offset in the interleaved
                  /// index which is just after the index block.
                  ///
                  /// For separated index and data, the offset stored in the first index
                  /// entry is mixed with the index headers.
                  /// It has to be overridden with 0.
                  offset_override: Option<usize>,
              }
              impl<'a> IndexEntry<'a> {
                  /// Return the offset of the data.
                  pub fn offset(&self) -> usize {
                      if let Some(offset_override) = self.offset_override {
                          offset_override
                      } else {
                          let mut bytes = [0; 8];
                          bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
                          BigEndian::read_u64(&bytes[..]) as usize
                      }
                  }
                  pub fn flags(&self) -> u16 {
                      BigEndian::read_u16(&self.bytes[6..=7])
                  }
                  /// Return the compressed length of the data.
                  pub fn compressed_len(&self) -> u32 {
                      BigEndian::read_u32(&self.bytes[8..=11])
                  }
                  /// Return the uncompressed length of the data.
                  pub fn uncompressed_len(&self) -> i32 {
                      BigEndian::read_i32(&self.bytes[12..=15])
                  }
                  /// Return the revision upon which the data has been derived.
                  pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
                      // TODO Maybe return an Option when base_revision == rev?
                      //      Requires to add rev to IndexEntry
                      BigEndian::read_i32(&self.bytes[16..]).into()
                  }
                  pub fn link_revision(&self) -> UncheckedRevision {
                      BigEndian::read_i32(&self.bytes[20..]).into()
                  }
                  pub fn p1(&self) -> UncheckedRevision {
                      BigEndian::read_i32(&self.bytes[24..]).into()
                  }
                  pub fn p2(&self) -> UncheckedRevision {
                      BigEndian::read_i32(&self.bytes[28..]).into()
                  }
                  /// Return the hash of revision's full text.
                  ///
                  /// Currently, SHA-1 is used and only the first 20 bytes of this field
                  /// are used.
                  pub fn hash(&self) -> &'a Node {
                      (&self.bytes[32..52]).try_into().unwrap()
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::node::NULL_NODE;
                  #[cfg(test)]
                  #[derive(Debug, Copy, Clone)]
                  pub struct IndexEntryBuilder {
                      is_first: bool,
                      is_inline: bool,
                      is_general_delta: bool,
                      version: u16,
                      offset: usize,
                      compressed_len: usize,
                      uncompressed_len: usize,
                      base_revision_or_base_of_delta_chain: Revision,
                      link_revision: Revision,
                      p1: Revision,
                      p2: Revision,
                      node: Node,
                  }
                  #[cfg(test)]
                  impl IndexEntryBuilder {
                      #[allow(clippy::new_without_default)]
                      pub fn new() -> Self {
                          Self {
                              is_first: false,
                              is_inline: false,
                              is_general_delta: true,
                              version: 1,
                              offset: 0,
                              compressed_len: 0,
                              uncompressed_len: 0,
                              base_revision_or_base_of_delta_chain: Revision(0),
                              link_revision: Revision(0),
                              p1: NULL_REVISION,
                              p2: NULL_REVISION,
                              node: NULL_NODE,
                          }
                      }
                      pub fn is_first(&mut self, value: bool) -> &mut Self {
                          self.is_first = value;
                          self
                      }
                      pub fn with_inline(&mut self, value: bool) -> &mut Self {
                          self.is_inline = value;
                          self
                      }
                      pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
                          self.is_general_delta = value;
                          self
                      }
                      pub fn with_version(&mut self, value: u16) -> &mut Self {
                          self.version = value;
                          self
                      }
                      pub fn with_offset(&mut self, value: usize) -> &mut Self {
                          self.offset = value;
                          self
                      }
                      pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
                          self.compressed_len = value;
                          self
                      }
                      pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
                          self.uncompressed_len = value;
                          self
                      }
                      pub fn with_base_revision_or_base_of_delta_chain(
                          &mut self,
                          value: Revision,
                      ) -> &mut Self {
                          self.base_revision_or_base_of_delta_chain = value;
                          self
                      }
                      pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
                          self.link_revision = value;
                          self
                      }
                      pub fn with_p1(&mut self, value: Revision) -> &mut Self {
                          self.p1 = value;
                          self
                      }
                      pub fn with_p2(&mut self, value: Revision) -> &mut Self {
                          self.p2 = value;
                          self
                      }
                      pub fn with_node(&mut self, value: Node) -> &mut Self {
                          self.node = value;
                          self
                      }
                      pub fn build(&self) -> Vec<u8> {
                          let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
                          if self.is_first {
                              bytes.extend(&match (self.is_general_delta, self.is_inline) {
                                  (false, false) => [0u8, 0],
                                  (false, true) => [0u8, 1],
                                  (true, false) => [0u8, 2],
                                  (true, true) => [0u8, 3],
                              });
                              bytes.extend(&self.version.to_be_bytes());
                              // Remaining offset bytes.
                              bytes.extend(&[0u8; 2]);
                          } else {
                              // Offset stored on 48 bits (6 bytes)
                              bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
                          }
                          bytes.extend(&[0u8; 2]); // Revision flags.
                          bytes.extend(&(self.compressed_len as u32).to_be_bytes());
                          bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
                          bytes.extend(
                              &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
                          );
                          bytes.extend(&self.link_revision.0.to_be_bytes());
                          bytes.extend(&self.p1.0.to_be_bytes());
                          bytes.extend(&self.p2.0.to_be_bytes());
                          bytes.extend(self.node.as_bytes());
                          bytes.extend(vec![0u8; 12]);
                          bytes
                      }
                  }
                  pub fn is_inline(index_bytes: &[u8]) -> bool {
                      IndexHeader::parse(index_bytes)
                          .expect("too short")
                          .format_flags()
                          .is_inline()
                  }
                  pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
                      IndexHeader::parse(index_bytes)
                          .expect("too short")
                          .format_flags()
                          .uses_generaldelta()
                  }
                  pub fn get_version(index_bytes: &[u8]) -> u16 {
                      IndexHeader::parse(index_bytes)
                          .expect("too short")
                          .format_version()
                  }
                  #[test]
                  fn flags_when_no_inline_flag_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(false)
                          .with_inline(false)
                          .build();
                      assert!(!is_inline(&bytes));
                      assert!(!uses_generaldelta(&bytes));
                  }
                  #[test]
                  fn flags_when_inline_flag_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(false)
                          .with_inline(true)
                          .build();
                      assert!(is_inline(&bytes));
                      assert!(!uses_generaldelta(&bytes));
                  }
                  #[test]
                  fn flags_when_inline_and_generaldelta_flags_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_general_delta(true)
                          .with_inline(true)
                          .build();
                      assert!(is_inline(&bytes));
                      assert!(uses_generaldelta(&bytes));
                  }
                  #[test]
                  fn test_offset() {
                      let bytes = IndexEntryBuilder::new().with_offset(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.offset(), 1)
                  }
                  #[test]
                  fn test_with_overridden_offset() {
                      let bytes = IndexEntryBuilder::new().with_offset(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: Some(2),
                      };
                      assert_eq!(entry.offset(), 2)
                  }
                  #[test]
                  fn test_compressed_len() {
                      let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.compressed_len(), 1)
                  }
                  #[test]
                  fn test_uncompressed_len() {
                      let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.uncompressed_len(), 1)
                  }
                  #[test]
                  fn test_base_revision_or_base_of_delta_chain() {
                      let bytes = IndexEntryBuilder::new()
                          .with_base_revision_or_base_of_delta_chain(Revision(1))
                          .build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
                  }
                  #[test]
                  fn link_revision_test() {
                      let bytes = IndexEntryBuilder::new()
                          .with_link_revision(Revision(123))
                          .build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.link_revision(), 123.into());
                  }
                  #[test]
                  fn p1_test() {
                      let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.p1(), 123.into());
                  }
                  #[test]
                  fn p2_test() {
                      let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(entry.p2(), 123.into());
                  }
                  #[test]
                  fn node_test() {
                      let node = Node::from_hex("0123456789012345678901234567890123456789")
                          .unwrap();
                      let bytes = IndexEntryBuilder::new().with_node(node).build();
                      let entry = IndexEntry {
                          bytes: &bytes,
                          offset_override: None,
                      };
                      assert_eq!(*entry.hash(), node);
                  }
                  #[test]
                  fn version_test() {
                      let bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_version(2)
                          .build();
                      assert_eq!(get_version(&bytes), 2)
                  }
              }
              #[cfg(test)]
              pub use tests::IndexEntryBuilder;

rust/hg-core/src/revlog/node.rs

0 +4 0

              // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Definitions and utilities for Revision nodes
              //!
              //! In Mercurial code base, it is customary to call "a node" the binary SHA
              //! of a revision.
              use crate::errors::HgError;
              use bytes_cast::BytesCast;
              use std::fmt;
              /// The length in bytes of a `Node`
              ///
              /// This constant is meant to ease refactors of this module, and
              /// are private so that calling code does not expect all nodes have
              /// the same size, should we support several formats concurrently in
              /// the future.
              pub const NODE_BYTES_LENGTH: usize = 20;
+             /// The length in bytes set aside on disk for a `Node`. Revlog up to v1 only
+             /// use 20 out of those 32.
+             pub const STORED_NODE_ID_BYTES: usize = 32;
              /// Id of the null node.
              ///
              /// Used to indicate the absence of node.
              pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
              /// The length in bytes of a `Node`
              ///
              /// see also `NODES_BYTES_LENGTH` about it being private.
              const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
              /// Default for UI presentation
              const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
              /// Private alias for readability and to ease future change
              type NodeData = [u8; NODE_BYTES_LENGTH];
              /// Binary revision SHA
              ///
              /// ## Future changes of hash size
              ///
              /// To accomodate future changes of hash size, Rust callers
              /// should use the conversion methods at the boundaries (FFI, actual
              /// computation of hashes and I/O) only, and only if required.
              ///
              /// All other callers outside of unit tests should just handle `Node` values
              /// and never make any assumption on the actual length, using [`nybbles_len`]
              /// if they need a loop boundary.
              ///
              /// All methods that create a `Node` either take a type that enforces
              /// the size or return an error at runtime.
              ///
              /// [`nybbles_len`]: #method.nybbles_len
              #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
              #[repr(transparent)]
              pub struct Node {
                  data: NodeData,
              }
              impl fmt::Debug for Node {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      let n = format!("{:x?}", self.data);
                      // We're using debug_tuple because it makes the output a little
                      // more compact without losing data.
                      f.debug_tuple("Node").field(&n).finish()
                  }
              }
              /// The node value for NULL_REVISION
              pub const NULL_NODE: Node = Node {
                  data: [0; NODE_BYTES_LENGTH],
              };
              /// Return an error if the slice has an unexpected length
              impl<'a> TryFrom<&'a [u8]> for &'a Node {
                  type Error = ();
                  #[inline]
                  fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
                      match Node::from_bytes(bytes) {
                          Ok((node, rest)) if rest.is_empty() => Ok(node),
                          _ => Err(()),
                      }
                  }
              }
              /// Return an error if the slice has an unexpected length
              impl TryFrom<&'_ [u8]> for Node {
                  type Error = std::array::TryFromSliceError;
                  #[inline]
                  fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
                      let data = bytes.try_into()?;
                      Ok(Self { data })
                  }
              }
              impl From<&'_ NodeData> for Node {
                  #[inline]
                  fn from(data: &'_ NodeData) -> Self {
                      Self { data: *data }
                  }
              }
              impl fmt::LowerHex for Node {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      for &byte in &self.data {
                          write!(f, "{:02x}", byte)?
                      }
                      Ok(())
                  }
              }
              #[derive(Debug)]
              pub struct FromHexError;
              /// Low level utility function, also for prefixes
              fn get_nybble(s: &[u8], i: usize) -> u8 {
                  if i % 2 == 0 {
                      s[i / 2] >> 4
                  } else {
                      s[i / 2] & 0x0f
                  }
              }
              impl Node {
                  /// Retrieve the `i`th half-byte of the binary data.
                  ///
                  /// This is also the `i`th hexadecimal digit in numeric form,
                  /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                  pub fn get_nybble(&self, i: usize) -> u8 {
                      get_nybble(&self.data, i)
                  }
                  /// Length of the data, in nybbles
                  pub fn nybbles_len(&self) -> usize {
                      // public exposure as an instance method only, so that we can
                      // easily support several sizes of hashes if needed in the future.
                      NODE_NYBBLES_LENGTH
                  }
                  /// Convert from hexadecimal string representation
                  ///
                  /// Exact length is required.
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
                      let prefix = NodePrefix::from_hex(hex)?;
                      if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
                          Ok(Self { data: prefix.data })
                      } else {
                          Err(FromHexError)
                      }
                  }
                  /// `from_hex`, but for input from an internal file of the repository such
                  /// as a changelog or manifest entry.
                  ///
                  /// An error is treated as repository corruption.
                  pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
                      Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
                          HgError::CorruptedRepository(format!(
                              "Expected a full hexadecimal node ID, found {}",
                              String::from_utf8_lossy(hex.as_ref())
                          ))
                      })
                  }
                  /// Provide access to binary data
                  ///
                  /// This is needed by FFI layers, for instance to return expected
                  /// binary values to Python.
                  pub fn as_bytes(&self) -> &[u8] {
                      &self.data
                  }
                  pub fn short(&self) -> NodePrefix {
                      NodePrefix {
                          nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
                          data: self.data,
                      }
                  }
                  pub fn pad_to_256_bits(&self) -> [u8; 32] {
                      let mut bits = [0; 32];
                      bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
                      bits
                  }
              }
              /// The beginning of a binary revision SHA.
              ///
              /// Since it can potentially come from an hexadecimal representation with
              /// odd length, it needs to carry around whether the last 4 bits are relevant
              /// or not.
              #[derive(Debug, PartialEq, Copy, Clone)]
              pub struct NodePrefix {
                  /// In `1..=NODE_NYBBLES_LENGTH`
                  nybbles_len: u8,
                  /// The first `4 * length_in_nybbles` bits are used (considering bits
                  /// within a bytes in big-endian: most significant first), the rest
                  /// are zero.
                  data: NodeData,
              }
              impl NodePrefix {
                  /// Convert from hexadecimal string representation
                  ///
                  /// Similarly to `hex::decode`, can be used with Unicode string types
                  /// (`String`, `&str`) as well as bytes.
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
                      let hex = hex.as_ref();
                      let len = hex.len();
                      if len > NODE_NYBBLES_LENGTH || len == 0 {
                          return Err(FromHexError);
                      }
                      let mut data = [0; NODE_BYTES_LENGTH];
                      let mut nybbles_len = 0;
                      for &ascii_byte in hex {
                          let nybble = match char::from(ascii_byte).to_digit(16) {
                              Some(digit) => digit as u8,
                              None => return Err(FromHexError),
                          };
                          // Fill in the upper half of a byte first, then the lower half.
                          let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
                          data[nybbles_len as usize / 2] |= nybble << shift;
                          nybbles_len += 1;
                      }
                      Ok(Self { data, nybbles_len })
                  }
                  pub fn nybbles_len(&self) -> usize {
                      self.nybbles_len as _
                  }
                  pub fn is_prefix_of(&self, node: &Node) -> bool {
                      let full_bytes = self.nybbles_len() / 2;
                      if self.data[..full_bytes] != node.data[..full_bytes] {
                          return false;
                      }
                      if self.nybbles_len() % 2 == 0 {
                          return true;
                      }
                      let last = self.nybbles_len() - 1;
                      self.get_nybble(last) == node.get_nybble(last)
                  }
                  /// Retrieve the `i`th half-byte from the prefix.
                  ///
                  /// This is also the `i`th hexadecimal digit in numeric form,
                  /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                  pub fn get_nybble(&self, i: usize) -> u8 {
                      assert!(i < self.nybbles_len());
                      get_nybble(&self.data, i)
                  }
                  fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
                      (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
                  }
                  /// Return the index first nybble that's different from `node`
                  ///
                  /// If the return value is `None` that means that `self` is
                  /// a prefix of `node`, but the current method is a bit slower
                  /// than `is_prefix_of`.
                  ///
                  /// Returned index is as in `get_nybble`, i.e., starting at 0.
                  pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
                      self.iter_nybbles()
                          .zip(NodePrefix::from(*node).iter_nybbles())
                          .position(|(a, b)| a != b)
                  }
              }
              impl fmt::LowerHex for NodePrefix {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      let full_bytes = self.nybbles_len() / 2;
                      for &byte in &self.data[..full_bytes] {
                          write!(f, "{:02x}", byte)?
                      }
                      if self.nybbles_len() % 2 == 1 {
                          let last = self.nybbles_len() - 1;
                          write!(f, "{:x}", self.get_nybble(last))?
                      }
                      Ok(())
                  }
              }
              /// A shortcut for full `Node` references
              impl From<&'_ Node> for NodePrefix {
                  fn from(node: &'_ Node) -> Self {
                      NodePrefix {
                          nybbles_len: node.nybbles_len() as _,
                          data: node.data,
                      }
                  }
              }
              /// A shortcut for full `Node` references
              impl From<Node> for NodePrefix {
                  fn from(node: Node) -> Self {
                      NodePrefix {
                          nybbles_len: node.nybbles_len() as _,
                          data: node.data,
                      }
                  }
              }
              impl PartialEq<Node> for NodePrefix {
                  fn eq(&self, other: &Node) -> bool {
                      self.data == other.data && self.nybbles_len() == other.nybbles_len()
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
                  const SAMPLE_NODE: Node = Node {
                      data: [
 x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
 x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
                      ],
                  };
                  /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
                  /// The padding is made with zeros.
                  pub fn hex_pad_right(hex: &str) -> String {
                      let mut res = hex.to_string();
                      while res.len() < NODE_NYBBLES_LENGTH {
                          res.push('0');
                      }
                      res
                  }
                  #[test]
                  fn test_node_from_hex() {
                      let not_hex = "012... oops";
                      let too_short = "0123";
                      let too_long = format!("{}0", SAMPLE_NODE_HEX);
                      assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
                      assert!(Node::from_hex(not_hex).is_err());
                      assert!(Node::from_hex(too_short).is_err());
                      assert!(Node::from_hex(too_long).is_err());
                  }
                  #[test]
                  fn test_node_encode_hex() {
                      assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
                  }
                  #[test]
                  fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
                      assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
                      assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
                      assert_eq!(
                          format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
                          SAMPLE_NODE_HEX
                      );
                      Ok(())
                  }
                  #[test]
                  fn test_prefix_from_hex_errors() {
                      assert!(NodePrefix::from_hex("testgr").is_err());
                      let mut long = format!("{:x}", NULL_NODE);
                      long.push('c');
                      assert!(NodePrefix::from_hex(&long).is_err())
                  }
                  #[test]
                  fn test_is_prefix_of() -> Result<(), FromHexError> {
                      let mut node_data = [0; NODE_BYTES_LENGTH];
                      node_data[0] = 0x12;
                      node_data[1] = 0xca;
                      let node = Node::from(node_data);
                      assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
                      assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
                      assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
                      assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
                      Ok(())
                  }
                  #[test]
                  fn test_get_nybble() -> Result<(), FromHexError> {
                      let prefix = NodePrefix::from_hex("dead6789cafe")?;
                      assert_eq!(prefix.get_nybble(0), 13);
                      assert_eq!(prefix.get_nybble(7), 9);
                      Ok(())
                  }
                  #[test]
                  fn test_first_different_nybble_even_prefix() {
                      let prefix = NodePrefix::from_hex("12ca").unwrap();
                      let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                      assert_eq!(prefix.first_different_nybble(&node), Some(0));
                      node.data[0] = 0x13;
                      assert_eq!(prefix.first_different_nybble(&node), Some(1));
                      node.data[0] = 0x12;
                      assert_eq!(prefix.first_different_nybble(&node), Some(2));
                      node.data[1] = 0xca;
                      // now it is a prefix
                      assert_eq!(prefix.first_different_nybble(&node), None);
                  }
                  #[test]
                  fn test_first_different_nybble_odd_prefix() {
                      let prefix = NodePrefix::from_hex("12c").unwrap();
                      let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                      assert_eq!(prefix.first_different_nybble(&node), Some(0));
                      node.data[0] = 0x13;
                      assert_eq!(prefix.first_different_nybble(&node), Some(1));
                      node.data[0] = 0x12;
                      assert_eq!(prefix.first_different_nybble(&node), Some(2));
                      node.data[1] = 0xca;
                      // now it is a prefix
                      assert_eq!(prefix.first_different_nybble(&node), None);
                  }
              }
              #[cfg(test)]
              pub use tests::hex_pad_right;

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages