node.rs
412 lines
| 11.9 KiB
| application/rls-services+xml
|
RustLexer
Georges Racinet
|
r44601 | // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> | ||
// | ||||
// This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | ||||
//! Definitions and utilities for Revision nodes | ||||
//! | ||||
//! In Mercurial code base, it is customary to call "a node" the binary SHA | ||||
//! of a revision. | ||||
Simon Sapin
|
r47120 | use bytes_cast::BytesCast; | ||
Simon Sapin
|
r47156 | use hex::{self, FromHex}; | ||
Simon Sapin
|
r47120 | use std::convert::TryFrom; | ||
Simon Sapin
|
r47155 | use std::fmt; | ||
Georges Racinet
|
r44601 | |||
/// The length in bytes of a `Node` | ||||
/// | ||||
/// This constant is meant to ease refactors of this module, and | ||||
/// are private so that calling code does not expect all nodes have | ||||
/// the same size, should we support several formats concurrently in | ||||
/// the future. | ||||
Antoine Cezar
|
r46102 | pub const NODE_BYTES_LENGTH: usize = 20; | ||
/// Id of the null node. | ||||
/// | ||||
/// Used to indicate the absence of node. | ||||
pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH]; | ||||
Georges Racinet
|
r44601 | |||
/// The length in bytes of a `Node` | ||||
/// | ||||
/// see also `NODES_BYTES_LENGTH` about it being private. | ||||
const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH; | ||||
/// Private alias for readability and to ease future change | ||||
type NodeData = [u8; NODE_BYTES_LENGTH]; | ||||
/// Binary revision SHA | ||||
/// | ||||
/// ## Future changes of hash size | ||||
/// | ||||
/// To accomodate future changes of hash size, Rust callers | ||||
/// should use the conversion methods at the boundaries (FFI, actual | ||||
/// computation of hashes and I/O) only, and only if required. | ||||
/// | ||||
/// All other callers outside of unit tests should just handle `Node` values | ||||
/// and never make any assumption on the actual length, using [`nybbles_len`] | ||||
/// if they need a loop boundary. | ||||
/// | ||||
/// All methods that create a `Node` either take a type that enforces | ||||
Simon Sapin
|
r47156 | /// the size or return an error at runtime. | ||
Georges Racinet
|
r44601 | /// | ||
/// [`nybbles_len`]: #method.nybbles_len | ||||
Simon Sapin
|
r47120 | #[derive(Clone, Debug, PartialEq, BytesCast)] | ||
Georges Racinet
|
r44989 | #[repr(transparent)] | ||
Georges Racinet
|
r44601 | pub struct Node { | ||
data: NodeData, | ||||
} | ||||
/// The node value for NULL_REVISION | ||||
pub const NULL_NODE: Node = Node { | ||||
data: [0; NODE_BYTES_LENGTH], | ||||
}; | ||||
impl From<NodeData> for Node { | ||||
fn from(data: NodeData) -> Node { | ||||
Node { data } | ||||
} | ||||
} | ||||
Simon Sapin
|
r46647 | /// Return an error if the slice has an unexpected length | ||
impl<'a> TryFrom<&'a [u8]> for &'a Node { | ||||
Simon Sapin
|
r47120 | type Error = (); | ||
Simon Sapin
|
r46647 | |||
#[inline] | ||||
fn try_from(bytes: &'a [u8]) -> Result<&'a Node, Self::Error> { | ||||
Simon Sapin
|
r47120 | match Node::from_bytes(bytes) { | ||
Ok((node, rest)) if rest.is_empty() => Ok(node), | ||||
_ => Err(()), | ||||
} | ||||
Simon Sapin
|
r46647 | } | ||
} | ||||
Simon Sapin
|
r47155 | impl fmt::LowerHex for Node { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
for &byte in &self.data { | ||||
write!(f, "{:02x}", byte)? | ||||
} | ||||
Ok(()) | ||||
} | ||||
} | ||||
Simon Sapin
|
r47156 | #[derive(Debug)] | ||
pub struct FromHexError; | ||||
Georges Racinet
|
r44601 | |||
/// Low level utility function, also for prefixes | ||||
fn get_nybble(s: &[u8], i: usize) -> u8 { | ||||
if i % 2 == 0 { | ||||
s[i / 2] >> 4 | ||||
} else { | ||||
s[i / 2] & 0x0f | ||||
} | ||||
} | ||||
impl Node { | ||||
/// Retrieve the `i`th half-byte of the binary data. | ||||
/// | ||||
/// This is also the `i`th hexadecimal digit in numeric form, | ||||
/// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). | ||||
pub fn get_nybble(&self, i: usize) -> u8 { | ||||
get_nybble(&self.data, i) | ||||
} | ||||
/// Length of the data, in nybbles | ||||
pub fn nybbles_len(&self) -> usize { | ||||
// public exposure as an instance method only, so that we can | ||||
// easily support several sizes of hashes if needed in the future. | ||||
NODE_NYBBLES_LENGTH | ||||
} | ||||
/// Convert from hexadecimal string representation | ||||
/// | ||||
/// Exact length is required. | ||||
/// | ||||
/// To be used in FFI and I/O only, in order to facilitate future | ||||
/// changes of hash format. | ||||
Simon Sapin
|
r47156 | pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> { | ||
Simon Sapin
|
r46647 | Ok(NodeData::from_hex(hex.as_ref()) | ||
Simon Sapin
|
r47156 | .map_err(|_| FromHexError)? | ||
Georges Racinet
|
r44601 | .into()) | ||
} | ||||
/// Provide access to binary data | ||||
/// | ||||
/// This is needed by FFI layers, for instance to return expected | ||||
/// binary values to Python. | ||||
pub fn as_bytes(&self) -> &[u8] { | ||||
&self.data | ||||
} | ||||
} | ||||
Georges Racinet
|
r44643 | /// The beginning of a binary revision SHA. | ||
/// | ||||
/// Since it can potentially come from an hexadecimal representation with | ||||
/// odd length, it needs to carry around whether the last 4 bits are relevant | ||||
/// or not. | ||||
#[derive(Debug, PartialEq)] | ||||
pub struct NodePrefix { | ||||
buf: Vec<u8>, | ||||
is_odd: bool, | ||||
} | ||||
impl NodePrefix { | ||||
/// Convert from hexadecimal string representation | ||||
/// | ||||
/// Similarly to `hex::decode`, can be used with Unicode string types | ||||
/// (`String`, `&str`) as well as bytes. | ||||
/// | ||||
/// To be used in FFI and I/O only, in order to facilitate future | ||||
/// changes of hash format. | ||||
Simon Sapin
|
r47156 | pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> { | ||
Georges Racinet
|
r44643 | let hex = hex.as_ref(); | ||
let len = hex.len(); | ||||
if len > NODE_NYBBLES_LENGTH { | ||||
Simon Sapin
|
r47156 | return Err(FromHexError); | ||
Georges Racinet
|
r44643 | } | ||
let is_odd = len % 2 == 1; | ||||
let even_part = if is_odd { &hex[..len - 1] } else { hex }; | ||||
Simon Sapin
|
r46647 | let mut buf: Vec<u8> = | ||
Simon Sapin
|
r47156 | Vec::from_hex(&even_part).map_err(|_| FromHexError)?; | ||
Georges Racinet
|
r44643 | |||
if is_odd { | ||||
let latest_char = char::from(hex[len - 1]); | ||||
Simon Sapin
|
r47156 | let latest_nybble = | ||
latest_char.to_digit(16).ok_or_else(|| FromHexError)? as u8; | ||||
Georges Racinet
|
r44643 | buf.push(latest_nybble << 4); | ||
} | ||||
Ok(NodePrefix { buf, is_odd }) | ||||
} | ||||
pub fn borrow(&self) -> NodePrefixRef { | ||||
NodePrefixRef { | ||||
buf: &self.buf, | ||||
is_odd: self.is_odd, | ||||
} | ||||
} | ||||
} | ||||
#[derive(Clone, Debug, PartialEq)] | ||||
pub struct NodePrefixRef<'a> { | ||||
buf: &'a [u8], | ||||
is_odd: bool, | ||||
} | ||||
impl<'a> NodePrefixRef<'a> { | ||||
pub fn len(&self) -> usize { | ||||
if self.is_odd { | ||||
self.buf.len() * 2 - 1 | ||||
} else { | ||||
self.buf.len() * 2 | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r45500 | pub fn is_empty(&self) -> bool { | ||
self.len() == 0 | ||||
} | ||||
Georges Racinet
|
r44643 | pub fn is_prefix_of(&self, node: &Node) -> bool { | ||
if self.is_odd { | ||||
let buf = self.buf; | ||||
let last_pos = buf.len() - 1; | ||||
node.data.starts_with(buf.split_at(last_pos).0) | ||||
&& node.data[last_pos] >> 4 == buf[last_pos] >> 4 | ||||
} else { | ||||
node.data.starts_with(self.buf) | ||||
} | ||||
} | ||||
/// Retrieve the `i`th half-byte from the prefix. | ||||
/// | ||||
/// This is also the `i`th hexadecimal digit in numeric form, | ||||
/// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). | ||||
pub fn get_nybble(&self, i: usize) -> u8 { | ||||
Georges Racinet
|
r44648 | assert!(i < self.len()); | ||
Georges Racinet
|
r44643 | get_nybble(self.buf, i) | ||
} | ||||
Georges Racinet
|
r44872 | |||
/// Return the index first nybble that's different from `node` | ||||
/// | ||||
/// If the return value is `None` that means that `self` is | ||||
/// a prefix of `node`, but the current method is a bit slower | ||||
/// than `is_prefix_of`. | ||||
/// | ||||
/// Returned index is as in `get_nybble`, i.e., starting at 0. | ||||
pub fn first_different_nybble(&self, node: &Node) -> Option<usize> { | ||||
let buf = self.buf; | ||||
let until = if self.is_odd { | ||||
buf.len() - 1 | ||||
} else { | ||||
buf.len() | ||||
}; | ||||
Raphaël Gomès
|
r45500 | for (i, item) in buf.iter().enumerate().take(until) { | ||
if *item != node.data[i] { | ||||
return if *item & 0xf0 == node.data[i] & 0xf0 { | ||||
Some(2 * i + 1) | ||||
Georges Racinet
|
r44872 | } else { | ||
Raphaël Gomès
|
r45500 | Some(2 * i) | ||
}; | ||||
Georges Racinet
|
r44872 | } | ||
} | ||||
if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 { | ||||
Some(until * 2) | ||||
} else { | ||||
None | ||||
} | ||||
} | ||||
Georges Racinet
|
r44643 | } | ||
/// A shortcut for full `Node` references | ||||
impl<'a> From<&'a Node> for NodePrefixRef<'a> { | ||||
fn from(node: &'a Node) -> Self { | ||||
NodePrefixRef { | ||||
buf: &node.data, | ||||
is_odd: false, | ||||
Georges Racinet
|
r44601 | } | ||
} | ||||
} | ||||
Simon Sapin
|
r46647 | impl PartialEq<Node> for NodePrefixRef<'_> { | ||
fn eq(&self, other: &Node) -> bool { | ||||
!self.is_odd && self.buf == other.data | ||||
} | ||||
} | ||||
Georges Racinet
|
r44601 | #[cfg(test)] | ||
mod tests { | ||||
use super::*; | ||||
fn sample_node() -> Node { | ||||
let mut data = [0; NODE_BYTES_LENGTH]; | ||||
data.copy_from_slice(&[ | ||||
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, | ||||
0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef, | ||||
]); | ||||
data.into() | ||||
} | ||||
/// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH` | ||||
Simon Sapin
|
r46647 | ///check_hash | ||
Georges Racinet
|
r44601 | /// The padding is made with zeros | ||
Georges Racinet
|
r44644 | pub fn hex_pad_right(hex: &str) -> String { | ||
Georges Racinet
|
r44601 | let mut res = hex.to_string(); | ||
while res.len() < NODE_NYBBLES_LENGTH { | ||||
res.push('0'); | ||||
} | ||||
res | ||||
} | ||||
fn sample_node_hex() -> String { | ||||
hex_pad_right("0123456789abcdeffedcba9876543210deadbeef") | ||||
} | ||||
#[test] | ||||
fn test_node_from_hex() { | ||||
Simon Sapin
|
r47156 | assert_eq!(Node::from_hex(&sample_node_hex()).unwrap(), sample_node()); | ||
Georges Racinet
|
r44601 | |||
let mut short = hex_pad_right("0123"); | ||||
short.pop(); | ||||
short.pop(); | ||||
Simon Sapin
|
r47156 | assert!(Node::from_hex(&short).is_err()); | ||
Georges Racinet
|
r44601 | |||
let not_hex = hex_pad_right("012... oops"); | ||||
Simon Sapin
|
r47156 | assert!(Node::from_hex(¬_hex).is_err(),); | ||
Georges Racinet
|
r44601 | } | ||
#[test] | ||||
fn test_node_encode_hex() { | ||||
Simon Sapin
|
r47155 | assert_eq!(format!("{:x}", sample_node()), sample_node_hex()); | ||
Georges Racinet
|
r44601 | } | ||
Georges Racinet
|
r44643 | |||
#[test] | ||||
Simon Sapin
|
r47156 | fn test_prefix_from_hex() -> Result<(), FromHexError> { | ||
Georges Racinet
|
r44643 | assert_eq!( | ||
NodePrefix::from_hex("0e1")?, | ||||
NodePrefix { | ||||
buf: vec![14, 16], | ||||
is_odd: true | ||||
} | ||||
); | ||||
assert_eq!( | ||||
NodePrefix::from_hex("0e1a")?, | ||||
NodePrefix { | ||||
buf: vec![14, 26], | ||||
is_odd: false | ||||
} | ||||
); | ||||
// checking limit case | ||||
let node_as_vec = sample_node().data.iter().cloned().collect(); | ||||
assert_eq!( | ||||
NodePrefix::from_hex(sample_node_hex())?, | ||||
NodePrefix { | ||||
buf: node_as_vec, | ||||
is_odd: false | ||||
} | ||||
); | ||||
Ok(()) | ||||
} | ||||
#[test] | ||||
fn test_prefix_from_hex_errors() { | ||||
Simon Sapin
|
r47156 | assert!(NodePrefix::from_hex("testgr").is_err()); | ||
Simon Sapin
|
r47155 | let mut long = format!("{:x}", NULL_NODE); | ||
Georges Racinet
|
r44643 | long.push('c'); | ||
Simon Sapin
|
r47156 | assert!(NodePrefix::from_hex(&long).is_err()) | ||
Georges Racinet
|
r44643 | } | ||
#[test] | ||||
Simon Sapin
|
r47156 | fn test_is_prefix_of() -> Result<(), FromHexError> { | ||
Georges Racinet
|
r44643 | let mut node_data = [0; NODE_BYTES_LENGTH]; | ||
node_data[0] = 0x12; | ||||
node_data[1] = 0xca; | ||||
let node = Node::from(node_data); | ||||
assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node)); | ||||
assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node)); | ||||
assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node)); | ||||
assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node)); | ||||
Ok(()) | ||||
} | ||||
#[test] | ||||
Simon Sapin
|
r47156 | fn test_get_nybble() -> Result<(), FromHexError> { | ||
Georges Racinet
|
r44643 | let prefix = NodePrefix::from_hex("dead6789cafe")?; | ||
assert_eq!(prefix.borrow().get_nybble(0), 13); | ||||
assert_eq!(prefix.borrow().get_nybble(7), 9); | ||||
Ok(()) | ||||
} | ||||
Georges Racinet
|
r44872 | |||
#[test] | ||||
fn test_first_different_nybble_even_prefix() { | ||||
let prefix = NodePrefix::from_hex("12ca").unwrap(); | ||||
let prefref = prefix.borrow(); | ||||
let mut node = Node::from([0; NODE_BYTES_LENGTH]); | ||||
assert_eq!(prefref.first_different_nybble(&node), Some(0)); | ||||
node.data[0] = 0x13; | ||||
assert_eq!(prefref.first_different_nybble(&node), Some(1)); | ||||
node.data[0] = 0x12; | ||||
assert_eq!(prefref.first_different_nybble(&node), Some(2)); | ||||
node.data[1] = 0xca; | ||||
// now it is a prefix | ||||
assert_eq!(prefref.first_different_nybble(&node), None); | ||||
} | ||||
#[test] | ||||
fn test_first_different_nybble_odd_prefix() { | ||||
let prefix = NodePrefix::from_hex("12c").unwrap(); | ||||
let prefref = prefix.borrow(); | ||||
let mut node = Node::from([0; NODE_BYTES_LENGTH]); | ||||
assert_eq!(prefref.first_different_nybble(&node), Some(0)); | ||||
node.data[0] = 0x13; | ||||
assert_eq!(prefref.first_different_nybble(&node), Some(1)); | ||||
node.data[0] = 0x12; | ||||
assert_eq!(prefref.first_different_nybble(&node), Some(2)); | ||||
node.data[1] = 0xca; | ||||
// now it is a prefix | ||||
assert_eq!(prefref.first_different_nybble(&node), None); | ||||
} | ||||
Georges Racinet
|
r44601 | } | ||
Georges Racinet
|
r44644 | |||
#[cfg(test)] | ||||
pub use tests::hex_pad_right; | ||||