// Copyright 2019-2020 Georges Racinet // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. //! Definitions and utilities for Revision nodes //! //! In Mercurial code base, it is customary to call "a node" the binary SHA //! of a revision. use bytes_cast::BytesCast; use hex::{self, FromHex}; use std::convert::TryFrom; use std::fmt; /// The length in bytes of a `Node` /// /// This constant is meant to ease refactors of this module, and /// are private so that calling code does not expect all nodes have /// the same size, should we support several formats concurrently in /// the future. pub const NODE_BYTES_LENGTH: usize = 20; /// Id of the null node. /// /// Used to indicate the absence of node. pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH]; /// The length in bytes of a `Node` /// /// see also `NODES_BYTES_LENGTH` about it being private. const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH; /// Private alias for readability and to ease future change type NodeData = [u8; NODE_BYTES_LENGTH]; /// Binary revision SHA /// /// ## Future changes of hash size /// /// To accomodate future changes of hash size, Rust callers /// should use the conversion methods at the boundaries (FFI, actual /// computation of hashes and I/O) only, and only if required. /// /// All other callers outside of unit tests should just handle `Node` values /// and never make any assumption on the actual length, using [`nybbles_len`] /// if they need a loop boundary. /// /// All methods that create a `Node` either take a type that enforces /// the size or return an error at runtime. /// /// [`nybbles_len`]: #method.nybbles_len #[derive(Clone, Debug, PartialEq, BytesCast)] #[repr(transparent)] pub struct Node { data: NodeData, } /// The node value for NULL_REVISION pub const NULL_NODE: Node = Node { data: [0; NODE_BYTES_LENGTH], }; impl From for Node { fn from(data: NodeData) -> Node { Node { data } } } /// Return an error if the slice has an unexpected length impl<'a> TryFrom<&'a [u8]> for &'a Node { type Error = (); #[inline] fn try_from(bytes: &'a [u8]) -> Result<&'a Node, Self::Error> { match Node::from_bytes(bytes) { Ok((node, rest)) if rest.is_empty() => Ok(node), _ => Err(()), } } } impl fmt::LowerHex for Node { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for &byte in &self.data { write!(f, "{:02x}", byte)? } Ok(()) } } #[derive(Debug)] pub struct FromHexError; /// Low level utility function, also for prefixes fn get_nybble(s: &[u8], i: usize) -> u8 { if i % 2 == 0 { s[i / 2] >> 4 } else { s[i / 2] & 0x0f } } impl Node { /// Retrieve the `i`th half-byte of the binary data. /// /// This is also the `i`th hexadecimal digit in numeric form, /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). pub fn get_nybble(&self, i: usize) -> u8 { get_nybble(&self.data, i) } /// Length of the data, in nybbles pub fn nybbles_len(&self) -> usize { // public exposure as an instance method only, so that we can // easily support several sizes of hashes if needed in the future. NODE_NYBBLES_LENGTH } /// Convert from hexadecimal string representation /// /// Exact length is required. /// /// To be used in FFI and I/O only, in order to facilitate future /// changes of hash format. pub fn from_hex(hex: impl AsRef<[u8]>) -> Result { Ok(NodeData::from_hex(hex.as_ref()) .map_err(|_| FromHexError)? .into()) } /// Provide access to binary data /// /// This is needed by FFI layers, for instance to return expected /// binary values to Python. pub fn as_bytes(&self) -> &[u8] { &self.data } } /// The beginning of a binary revision SHA. /// /// Since it can potentially come from an hexadecimal representation with /// odd length, it needs to carry around whether the last 4 bits are relevant /// or not. #[derive(Debug, PartialEq)] pub struct NodePrefix { buf: Vec, is_odd: bool, } impl NodePrefix { /// Convert from hexadecimal string representation /// /// Similarly to `hex::decode`, can be used with Unicode string types /// (`String`, `&str`) as well as bytes. /// /// To be used in FFI and I/O only, in order to facilitate future /// changes of hash format. pub fn from_hex(hex: impl AsRef<[u8]>) -> Result { let hex = hex.as_ref(); let len = hex.len(); if len > NODE_NYBBLES_LENGTH || len == 0 { return Err(FromHexError); } let is_odd = len % 2 == 1; let even_part = if is_odd { &hex[..len - 1] } else { hex }; let mut buf: Vec = Vec::from_hex(&even_part).map_err(|_| FromHexError)?; if is_odd { let latest_char = char::from(hex[len - 1]); let latest_nybble = latest_char.to_digit(16).ok_or_else(|| FromHexError)? as u8; buf.push(latest_nybble << 4); } Ok(NodePrefix { buf, is_odd }) } pub fn borrow(&self) -> NodePrefixRef { NodePrefixRef { buf: &self.buf, is_odd: self.is_odd, } } } #[derive(Clone, Debug, PartialEq)] pub struct NodePrefixRef<'a> { buf: &'a [u8], is_odd: bool, } impl<'a> NodePrefixRef<'a> { pub fn len(&self) -> usize { if self.is_odd { self.buf.len() * 2 - 1 } else { self.buf.len() * 2 } } pub fn is_prefix_of(&self, node: &Node) -> bool { if self.is_odd { let buf = self.buf; let last_pos = buf.len() - 1; node.data.starts_with(buf.split_at(last_pos).0) && node.data[last_pos] >> 4 == buf[last_pos] >> 4 } else { node.data.starts_with(self.buf) } } /// Retrieve the `i`th half-byte from the prefix. /// /// This is also the `i`th hexadecimal digit in numeric form, /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). pub fn get_nybble(&self, i: usize) -> u8 { assert!(i < self.len()); get_nybble(self.buf, i) } /// Return the index first nybble that's different from `node` /// /// If the return value is `None` that means that `self` is /// a prefix of `node`, but the current method is a bit slower /// than `is_prefix_of`. /// /// Returned index is as in `get_nybble`, i.e., starting at 0. pub fn first_different_nybble(&self, node: &Node) -> Option { let buf = self.buf; let until = if self.is_odd { buf.len() - 1 } else { buf.len() }; for (i, item) in buf.iter().enumerate().take(until) { if *item != node.data[i] { return if *item & 0xf0 == node.data[i] & 0xf0 { Some(2 * i + 1) } else { Some(2 * i) }; } } if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 { Some(until * 2) } else { None } } } /// A shortcut for full `Node` references impl<'a> From<&'a Node> for NodePrefixRef<'a> { fn from(node: &'a Node) -> Self { NodePrefixRef { buf: &node.data, is_odd: false, } } } impl PartialEq for NodePrefixRef<'_> { fn eq(&self, other: &Node) -> bool { !self.is_odd && self.buf == other.data } } #[cfg(test)] mod tests { use super::*; fn sample_node() -> Node { let mut data = [0; NODE_BYTES_LENGTH]; data.copy_from_slice(&[ 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef, ]); data.into() } /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH` ///check_hash /// The padding is made with zeros pub fn hex_pad_right(hex: &str) -> String { let mut res = hex.to_string(); while res.len() < NODE_NYBBLES_LENGTH { res.push('0'); } res } fn sample_node_hex() -> String { hex_pad_right("0123456789abcdeffedcba9876543210deadbeef") } #[test] fn test_node_from_hex() { assert_eq!(Node::from_hex(&sample_node_hex()).unwrap(), sample_node()); let mut short = hex_pad_right("0123"); short.pop(); short.pop(); assert!(Node::from_hex(&short).is_err()); let not_hex = hex_pad_right("012... oops"); assert!(Node::from_hex(¬_hex).is_err(),); } #[test] fn test_node_encode_hex() { assert_eq!(format!("{:x}", sample_node()), sample_node_hex()); } #[test] fn test_prefix_from_hex() -> Result<(), FromHexError> { assert_eq!( NodePrefix::from_hex("0e1")?, NodePrefix { buf: vec![14, 16], is_odd: true } ); assert_eq!( NodePrefix::from_hex("0e1a")?, NodePrefix { buf: vec![14, 26], is_odd: false } ); // checking limit case let node_as_vec = sample_node().data.iter().cloned().collect(); assert_eq!( NodePrefix::from_hex(sample_node_hex())?, NodePrefix { buf: node_as_vec, is_odd: false } ); Ok(()) } #[test] fn test_prefix_from_hex_errors() { assert!(NodePrefix::from_hex("testgr").is_err()); let mut long = format!("{:x}", NULL_NODE); long.push('c'); assert!(NodePrefix::from_hex(&long).is_err()) } #[test] fn test_is_prefix_of() -> Result<(), FromHexError> { let mut node_data = [0; NODE_BYTES_LENGTH]; node_data[0] = 0x12; node_data[1] = 0xca; let node = Node::from(node_data); assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node)); assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node)); assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node)); assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node)); Ok(()) } #[test] fn test_get_nybble() -> Result<(), FromHexError> { let prefix = NodePrefix::from_hex("dead6789cafe")?; assert_eq!(prefix.borrow().get_nybble(0), 13); assert_eq!(prefix.borrow().get_nybble(7), 9); Ok(()) } #[test] fn test_first_different_nybble_even_prefix() { let prefix = NodePrefix::from_hex("12ca").unwrap(); let prefref = prefix.borrow(); let mut node = Node::from([0; NODE_BYTES_LENGTH]); assert_eq!(prefref.first_different_nybble(&node), Some(0)); node.data[0] = 0x13; assert_eq!(prefref.first_different_nybble(&node), Some(1)); node.data[0] = 0x12; assert_eq!(prefref.first_different_nybble(&node), Some(2)); node.data[1] = 0xca; // now it is a prefix assert_eq!(prefref.first_different_nybble(&node), None); } #[test] fn test_first_different_nybble_odd_prefix() { let prefix = NodePrefix::from_hex("12c").unwrap(); let prefref = prefix.borrow(); let mut node = Node::from([0; NODE_BYTES_LENGTH]); assert_eq!(prefref.first_different_nybble(&node), Some(0)); node.data[0] = 0x13; assert_eq!(prefref.first_different_nybble(&node), Some(1)); node.data[0] = 0x12; assert_eq!(prefref.first_different_nybble(&node), Some(2)); node.data[1] = 0xca; // now it is a prefix assert_eq!(prefref.first_different_nybble(&node), None); } } #[cfg(test)] pub use tests::hex_pad_right;