node.rs
433 lines
| 13.4 KiB
| application/rls-services+xml
|
RustLexer
Georges Racinet
|
r44601 | // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> | ||
// | ||||
// This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | ||||
//! Definitions and utilities for Revision nodes | ||||
//! | ||||
//! In Mercurial code base, it is customary to call "a node" the binary SHA | ||||
//! of a revision. | ||||
Simon Sapin
|
r47172 | use crate::errors::HgError; | ||
Simon Sapin
|
r47120 | use bytes_cast::BytesCast; | ||
Simon Sapin
|
r47155 | use std::fmt; | ||
Georges Racinet
|
r44601 | |||
/// The length in bytes of a `Node` | ||||
/// | ||||
/// This constant is meant to ease refactors of this module, and | ||||
/// are private so that calling code does not expect all nodes have | ||||
/// the same size, should we support several formats concurrently in | ||||
/// the future. | ||||
Antoine Cezar
|
r46102 | pub const NODE_BYTES_LENGTH: usize = 20; | ||
Raphaël Gomès
|
r52082 | /// The length in bytes set aside on disk for a `Node`. Revlog up to v1 only | ||
/// use 20 out of those 32. | ||||
pub const STORED_NODE_ID_BYTES: usize = 32; | ||||
Antoine Cezar
|
r46102 | /// Id of the null node. | ||
/// | ||||
/// Used to indicate the absence of node. | ||||
pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH]; | ||||
Georges Racinet
|
r44601 | |||
/// The length in bytes of a `Node` | ||||
/// | ||||
/// see also `NODES_BYTES_LENGTH` about it being private. | ||||
const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH; | ||||
Simon Sapin
|
r47478 | /// Default for UI presentation | ||
const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12; | ||||
Georges Racinet
|
r44601 | /// Private alias for readability and to ease future change | ||
type NodeData = [u8; NODE_BYTES_LENGTH]; | ||||
/// Binary revision SHA | ||||
/// | ||||
/// ## Future changes of hash size | ||||
/// | ||||
/// To accomodate future changes of hash size, Rust callers | ||||
/// should use the conversion methods at the boundaries (FFI, actual | ||||
/// computation of hashes and I/O) only, and only if required. | ||||
/// | ||||
/// All other callers outside of unit tests should just handle `Node` values | ||||
/// and never make any assumption on the actual length, using [`nybbles_len`] | ||||
/// if they need a loop boundary. | ||||
/// | ||||
/// All methods that create a `Node` either take a type that enforces | ||||
Simon Sapin
|
r47156 | /// the size or return an error at runtime. | ||
Georges Racinet
|
r44601 | /// | ||
/// [`nybbles_len`]: #method.nybbles_len | ||||
Augie Fackler
|
r50070 | #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)] | ||
Georges Racinet
|
r44989 | #[repr(transparent)] | ||
Georges Racinet
|
r44601 | pub struct Node { | ||
data: NodeData, | ||||
} | ||||
Augie Fackler
|
r50070 | impl fmt::Debug for Node { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
let n = format!("{:x?}", self.data); | ||||
// We're using debug_tuple because it makes the output a little | ||||
// more compact without losing data. | ||||
f.debug_tuple("Node").field(&n).finish() | ||||
} | ||||
} | ||||
Georges Racinet
|
r44601 | /// The node value for NULL_REVISION | ||
pub const NULL_NODE: Node = Node { | ||||
data: [0; NODE_BYTES_LENGTH], | ||||
}; | ||||
Simon Sapin
|
r46647 | /// Return an error if the slice has an unexpected length | ||
impl<'a> TryFrom<&'a [u8]> for &'a Node { | ||||
Simon Sapin
|
r47120 | type Error = (); | ||
Simon Sapin
|
r46647 | |||
#[inline] | ||||
Simon Sapin
|
r47160 | fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> { | ||
Simon Sapin
|
r47120 | match Node::from_bytes(bytes) { | ||
Ok((node, rest)) if rest.is_empty() => Ok(node), | ||||
_ => Err(()), | ||||
} | ||||
Simon Sapin
|
r46647 | } | ||
} | ||||
Simon Sapin
|
r47160 | /// Return an error if the slice has an unexpected length | ||
impl TryFrom<&'_ [u8]> for Node { | ||||
type Error = std::array::TryFromSliceError; | ||||
#[inline] | ||||
fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> { | ||||
let data = bytes.try_into()?; | ||||
Ok(Self { data }) | ||||
} | ||||
} | ||||
Simon Sapin
|
r47337 | impl From<&'_ NodeData> for Node { | ||
#[inline] | ||||
fn from(data: &'_ NodeData) -> Self { | ||||
Self { data: *data } | ||||
} | ||||
} | ||||
Simon Sapin
|
r47155 | impl fmt::LowerHex for Node { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
for &byte in &self.data { | ||||
write!(f, "{:02x}", byte)? | ||||
} | ||||
Ok(()) | ||||
} | ||||
} | ||||
Simon Sapin
|
r47156 | #[derive(Debug)] | ||
pub struct FromHexError; | ||||
Georges Racinet
|
r44601 | |||
/// Low level utility function, also for prefixes | ||||
fn get_nybble(s: &[u8], i: usize) -> u8 { | ||||
if i % 2 == 0 { | ||||
s[i / 2] >> 4 | ||||
} else { | ||||
s[i / 2] & 0x0f | ||||
} | ||||
} | ||||
impl Node { | ||||
/// Retrieve the `i`th half-byte of the binary data. | ||||
/// | ||||
/// This is also the `i`th hexadecimal digit in numeric form, | ||||
/// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). | ||||
pub fn get_nybble(&self, i: usize) -> u8 { | ||||
get_nybble(&self.data, i) | ||||
} | ||||
/// Length of the data, in nybbles | ||||
pub fn nybbles_len(&self) -> usize { | ||||
// public exposure as an instance method only, so that we can | ||||
// easily support several sizes of hashes if needed in the future. | ||||
NODE_NYBBLES_LENGTH | ||||
} | ||||
/// Convert from hexadecimal string representation | ||||
/// | ||||
/// Exact length is required. | ||||
/// | ||||
/// To be used in FFI and I/O only, in order to facilitate future | ||||
/// changes of hash format. | ||||
Simon Sapin
|
r47156 | pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> { | ||
Simon Sapin
|
r47160 | let prefix = NodePrefix::from_hex(hex)?; | ||
if prefix.nybbles_len() == NODE_NYBBLES_LENGTH { | ||||
Ok(Self { data: prefix.data }) | ||||
} else { | ||||
Err(FromHexError) | ||||
} | ||||
Georges Racinet
|
r44601 | } | ||
Simon Sapin
|
r47172 | /// `from_hex`, but for input from an internal file of the repository such | ||
/// as a changelog or manifest entry. | ||||
/// | ||||
/// An error is treated as repository corruption. | ||||
pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> { | ||||
Self::from_hex(hex.as_ref()).map_err(|FromHexError| { | ||||
HgError::CorruptedRepository(format!( | ||||
"Expected a full hexadecimal node ID, found {}", | ||||
String::from_utf8_lossy(hex.as_ref()) | ||||
)) | ||||
}) | ||||
} | ||||
Georges Racinet
|
r44601 | /// Provide access to binary data | ||
/// | ||||
/// This is needed by FFI layers, for instance to return expected | ||||
/// binary values to Python. | ||||
pub fn as_bytes(&self) -> &[u8] { | ||||
&self.data | ||||
} | ||||
Simon Sapin
|
r47478 | |||
pub fn short(&self) -> NodePrefix { | ||||
NodePrefix { | ||||
nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH, | ||||
data: self.data, | ||||
} | ||||
} | ||||
Simon Sapin
|
r49249 | |||
pub fn pad_to_256_bits(&self) -> [u8; 32] { | ||||
let mut bits = [0; 32]; | ||||
bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data); | ||||
bits | ||||
} | ||||
Georges Racinet
|
r44601 | } | ||
Georges Racinet
|
r44643 | /// The beginning of a binary revision SHA. | ||
/// | ||||
/// Since it can potentially come from an hexadecimal representation with | ||||
/// odd length, it needs to carry around whether the last 4 bits are relevant | ||||
/// or not. | ||||
Simon Sapin
|
r47160 | #[derive(Debug, PartialEq, Copy, Clone)] | ||
Georges Racinet
|
r44643 | pub struct NodePrefix { | ||
Simon Sapin
|
r47160 | /// In `1..=NODE_NYBBLES_LENGTH` | ||
nybbles_len: u8, | ||||
/// The first `4 * length_in_nybbles` bits are used (considering bits | ||||
/// within a bytes in big-endian: most significant first), the rest | ||||
/// are zero. | ||||
data: NodeData, | ||||
Georges Racinet
|
r44643 | } | ||
impl NodePrefix { | ||||
/// Convert from hexadecimal string representation | ||||
/// | ||||
/// Similarly to `hex::decode`, can be used with Unicode string types | ||||
/// (`String`, `&str`) as well as bytes. | ||||
/// | ||||
/// To be used in FFI and I/O only, in order to facilitate future | ||||
/// changes of hash format. | ||||
Simon Sapin
|
r47156 | pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> { | ||
Georges Racinet
|
r44643 | let hex = hex.as_ref(); | ||
let len = hex.len(); | ||||
Simon Sapin
|
r47157 | if len > NODE_NYBBLES_LENGTH || len == 0 { | ||
Simon Sapin
|
r47156 | return Err(FromHexError); | ||
Georges Racinet
|
r44643 | } | ||
Simon Sapin
|
r47160 | let mut data = [0; NODE_BYTES_LENGTH]; | ||
let mut nybbles_len = 0; | ||||
for &ascii_byte in hex { | ||||
let nybble = match char::from(ascii_byte).to_digit(16) { | ||||
Some(digit) => digit as u8, | ||||
None => return Err(FromHexError), | ||||
}; | ||||
// Fill in the upper half of a byte first, then the lower half. | ||||
let shift = if nybbles_len % 2 == 0 { 4 } else { 0 }; | ||||
data[nybbles_len as usize / 2] |= nybble << shift; | ||||
nybbles_len += 1; | ||||
Georges Racinet
|
r44643 | } | ||
Simon Sapin
|
r47160 | Ok(Self { data, nybbles_len }) | ||
Georges Racinet
|
r44643 | } | ||
Simon Sapin
|
r47160 | pub fn nybbles_len(&self) -> usize { | ||
self.nybbles_len as _ | ||||
Georges Racinet
|
r44643 | } | ||
pub fn is_prefix_of(&self, node: &Node) -> bool { | ||||
Simon Sapin
|
r47160 | let full_bytes = self.nybbles_len() / 2; | ||
if self.data[..full_bytes] != node.data[..full_bytes] { | ||||
return false; | ||||
Georges Racinet
|
r44643 | } | ||
Simon Sapin
|
r47160 | if self.nybbles_len() % 2 == 0 { | ||
return true; | ||||
} | ||||
let last = self.nybbles_len() - 1; | ||||
self.get_nybble(last) == node.get_nybble(last) | ||||
Georges Racinet
|
r44643 | } | ||
/// Retrieve the `i`th half-byte from the prefix. | ||||
/// | ||||
/// This is also the `i`th hexadecimal digit in numeric form, | ||||
/// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). | ||||
pub fn get_nybble(&self, i: usize) -> u8 { | ||||
Simon Sapin
|
r47160 | assert!(i < self.nybbles_len()); | ||
get_nybble(&self.data, i) | ||||
} | ||||
fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ { | ||||
(0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i)) | ||||
Georges Racinet
|
r44643 | } | ||
Georges Racinet
|
r44872 | |||
/// Return the index first nybble that's different from `node` | ||||
/// | ||||
/// If the return value is `None` that means that `self` is | ||||
/// a prefix of `node`, but the current method is a bit slower | ||||
/// than `is_prefix_of`. | ||||
/// | ||||
/// Returned index is as in `get_nybble`, i.e., starting at 0. | ||||
pub fn first_different_nybble(&self, node: &Node) -> Option<usize> { | ||||
Simon Sapin
|
r47160 | self.iter_nybbles() | ||
.zip(NodePrefix::from(*node).iter_nybbles()) | ||||
.position(|(a, b)| a != b) | ||||
} | ||||
} | ||||
impl fmt::LowerHex for NodePrefix { | ||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
let full_bytes = self.nybbles_len() / 2; | ||||
for &byte in &self.data[..full_bytes] { | ||||
write!(f, "{:02x}", byte)? | ||||
Georges Racinet
|
r44872 | } | ||
Simon Sapin
|
r47160 | if self.nybbles_len() % 2 == 1 { | ||
let last = self.nybbles_len() - 1; | ||||
write!(f, "{:x}", self.get_nybble(last))? | ||||
} | ||||
Ok(()) | ||||
} | ||||
} | ||||
/// A shortcut for full `Node` references | ||||
impl From<&'_ Node> for NodePrefix { | ||||
fn from(node: &'_ Node) -> Self { | ||||
NodePrefix { | ||||
nybbles_len: node.nybbles_len() as _, | ||||
data: node.data, | ||||
Georges Racinet
|
r44872 | } | ||
} | ||||
Georges Racinet
|
r44643 | } | ||
/// A shortcut for full `Node` references | ||||
Simon Sapin
|
r47160 | impl From<Node> for NodePrefix { | ||
fn from(node: Node) -> Self { | ||||
NodePrefix { | ||||
nybbles_len: node.nybbles_len() as _, | ||||
data: node.data, | ||||
Georges Racinet
|
r44601 | } | ||
} | ||||
} | ||||
Simon Sapin
|
r47160 | impl PartialEq<Node> for NodePrefix { | ||
Simon Sapin
|
r46647 | fn eq(&self, other: &Node) -> bool { | ||
Raphaël Gomès
|
r50822 | self.data == other.data && self.nybbles_len() == other.nybbles_len() | ||
Simon Sapin
|
r46647 | } | ||
} | ||||
Georges Racinet
|
r44601 | #[cfg(test)] | ||
mod tests { | ||||
use super::*; | ||||
Simon Sapin
|
r47160 | const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef"; | ||
const SAMPLE_NODE: Node = Node { | ||||
data: [ | ||||
Georges Racinet
|
r44601 | 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, | ||
0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef, | ||||
Simon Sapin
|
r47160 | ], | ||
}; | ||||
Georges Racinet
|
r44601 | |||
/// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH` | ||||
Simon Sapin
|
r47160 | /// The padding is made with zeros. | ||
Georges Racinet
|
r44644 | pub fn hex_pad_right(hex: &str) -> String { | ||
Georges Racinet
|
r44601 | let mut res = hex.to_string(); | ||
while res.len() < NODE_NYBBLES_LENGTH { | ||||
res.push('0'); | ||||
} | ||||
res | ||||
} | ||||
#[test] | ||||
fn test_node_from_hex() { | ||||
Simon Sapin
|
r47160 | let not_hex = "012... oops"; | ||
let too_short = "0123"; | ||||
let too_long = format!("{}0", SAMPLE_NODE_HEX); | ||||
assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE); | ||||
assert!(Node::from_hex(not_hex).is_err()); | ||||
assert!(Node::from_hex(too_short).is_err()); | ||||
Raphaël Gomès
|
r52013 | assert!(Node::from_hex(too_long).is_err()); | ||
Georges Racinet
|
r44601 | } | ||
#[test] | ||||
fn test_node_encode_hex() { | ||||
Simon Sapin
|
r47160 | assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX); | ||
Georges Racinet
|
r44601 | } | ||
Georges Racinet
|
r44643 | |||
#[test] | ||||
Simon Sapin
|
r47160 | fn test_prefix_from_to_hex() -> Result<(), FromHexError> { | ||
assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1"); | ||||
assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a"); | ||||
Georges Racinet
|
r44643 | assert_eq!( | ||
Simon Sapin
|
r47160 | format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?), | ||
SAMPLE_NODE_HEX | ||||
Georges Racinet
|
r44643 | ); | ||
Ok(()) | ||||
} | ||||
#[test] | ||||
fn test_prefix_from_hex_errors() { | ||||
Simon Sapin
|
r47156 | assert!(NodePrefix::from_hex("testgr").is_err()); | ||
Simon Sapin
|
r47155 | let mut long = format!("{:x}", NULL_NODE); | ||
Georges Racinet
|
r44643 | long.push('c'); | ||
Simon Sapin
|
r47156 | assert!(NodePrefix::from_hex(&long).is_err()) | ||
Georges Racinet
|
r44643 | } | ||
#[test] | ||||
Simon Sapin
|
r47156 | fn test_is_prefix_of() -> Result<(), FromHexError> { | ||
Georges Racinet
|
r44643 | let mut node_data = [0; NODE_BYTES_LENGTH]; | ||
node_data[0] = 0x12; | ||||
node_data[1] = 0xca; | ||||
let node = Node::from(node_data); | ||||
Simon Sapin
|
r47160 | assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node)); | ||
assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node)); | ||||
assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node)); | ||||
assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node)); | ||||
Georges Racinet
|
r44643 | Ok(()) | ||
} | ||||
#[test] | ||||
Simon Sapin
|
r47156 | fn test_get_nybble() -> Result<(), FromHexError> { | ||
Georges Racinet
|
r44643 | let prefix = NodePrefix::from_hex("dead6789cafe")?; | ||
Simon Sapin
|
r47160 | assert_eq!(prefix.get_nybble(0), 13); | ||
assert_eq!(prefix.get_nybble(7), 9); | ||||
Georges Racinet
|
r44643 | Ok(()) | ||
} | ||||
Georges Racinet
|
r44872 | |||
#[test] | ||||
fn test_first_different_nybble_even_prefix() { | ||||
let prefix = NodePrefix::from_hex("12ca").unwrap(); | ||||
let mut node = Node::from([0; NODE_BYTES_LENGTH]); | ||||
Simon Sapin
|
r47160 | assert_eq!(prefix.first_different_nybble(&node), Some(0)); | ||
Georges Racinet
|
r44872 | node.data[0] = 0x13; | ||
Simon Sapin
|
r47160 | assert_eq!(prefix.first_different_nybble(&node), Some(1)); | ||
Georges Racinet
|
r44872 | node.data[0] = 0x12; | ||
Simon Sapin
|
r47160 | assert_eq!(prefix.first_different_nybble(&node), Some(2)); | ||
Georges Racinet
|
r44872 | node.data[1] = 0xca; | ||
// now it is a prefix | ||||
Simon Sapin
|
r47160 | assert_eq!(prefix.first_different_nybble(&node), None); | ||
Georges Racinet
|
r44872 | } | ||
#[test] | ||||
fn test_first_different_nybble_odd_prefix() { | ||||
let prefix = NodePrefix::from_hex("12c").unwrap(); | ||||
let mut node = Node::from([0; NODE_BYTES_LENGTH]); | ||||
Simon Sapin
|
r47160 | assert_eq!(prefix.first_different_nybble(&node), Some(0)); | ||
Georges Racinet
|
r44872 | node.data[0] = 0x13; | ||
Simon Sapin
|
r47160 | assert_eq!(prefix.first_different_nybble(&node), Some(1)); | ||
Georges Racinet
|
r44872 | node.data[0] = 0x12; | ||
Simon Sapin
|
r47160 | assert_eq!(prefix.first_different_nybble(&node), Some(2)); | ||
Georges Racinet
|
r44872 | node.data[1] = 0xca; | ||
// now it is a prefix | ||||
Simon Sapin
|
r47160 | assert_eq!(prefix.first_different_nybble(&node), None); | ||
Georges Racinet
|
r44872 | } | ||
Georges Racinet
|
r44601 | } | ||
Georges Racinet
|
r44644 | |||
#[cfg(test)] | ||||
pub use tests::hex_pad_right; | ||||