##// END OF EJS Templates
rust: Exclude empty node prefixes...
Simon Sapin -
r47157:e61c2dc6 default
parent child Browse files
Show More
@@ -1,412 +1,408 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use bytes_cast::BytesCast;
12 12 use hex::{self, FromHex};
13 13 use std::convert::TryFrom;
14 14 use std::fmt;
15 15
16 16 /// The length in bytes of a `Node`
17 17 ///
18 18 /// This constant is meant to ease refactors of this module, and
19 19 /// are private so that calling code does not expect all nodes have
20 20 /// the same size, should we support several formats concurrently in
21 21 /// the future.
22 22 pub const NODE_BYTES_LENGTH: usize = 20;
23 23
24 24 /// Id of the null node.
25 25 ///
26 26 /// Used to indicate the absence of node.
27 27 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
28 28
29 29 /// The length in bytes of a `Node`
30 30 ///
31 31 /// see also `NODES_BYTES_LENGTH` about it being private.
32 32 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
33 33
34 34 /// Private alias for readability and to ease future change
35 35 type NodeData = [u8; NODE_BYTES_LENGTH];
36 36
37 37 /// Binary revision SHA
38 38 ///
39 39 /// ## Future changes of hash size
40 40 ///
41 41 /// To accomodate future changes of hash size, Rust callers
42 42 /// should use the conversion methods at the boundaries (FFI, actual
43 43 /// computation of hashes and I/O) only, and only if required.
44 44 ///
45 45 /// All other callers outside of unit tests should just handle `Node` values
46 46 /// and never make any assumption on the actual length, using [`nybbles_len`]
47 47 /// if they need a loop boundary.
48 48 ///
49 49 /// All methods that create a `Node` either take a type that enforces
50 50 /// the size or return an error at runtime.
51 51 ///
52 52 /// [`nybbles_len`]: #method.nybbles_len
53 53 #[derive(Clone, Debug, PartialEq, BytesCast)]
54 54 #[repr(transparent)]
55 55 pub struct Node {
56 56 data: NodeData,
57 57 }
58 58
59 59 /// The node value for NULL_REVISION
60 60 pub const NULL_NODE: Node = Node {
61 61 data: [0; NODE_BYTES_LENGTH],
62 62 };
63 63
64 64 impl From<NodeData> for Node {
65 65 fn from(data: NodeData) -> Node {
66 66 Node { data }
67 67 }
68 68 }
69 69
70 70 /// Return an error if the slice has an unexpected length
71 71 impl<'a> TryFrom<&'a [u8]> for &'a Node {
72 72 type Error = ();
73 73
74 74 #[inline]
75 75 fn try_from(bytes: &'a [u8]) -> Result<&'a Node, Self::Error> {
76 76 match Node::from_bytes(bytes) {
77 77 Ok((node, rest)) if rest.is_empty() => Ok(node),
78 78 _ => Err(()),
79 79 }
80 80 }
81 81 }
82 82
83 83 impl fmt::LowerHex for Node {
84 84 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85 85 for &byte in &self.data {
86 86 write!(f, "{:02x}", byte)?
87 87 }
88 88 Ok(())
89 89 }
90 90 }
91 91
92 92 #[derive(Debug)]
93 93 pub struct FromHexError;
94 94
95 95 /// Low level utility function, also for prefixes
96 96 fn get_nybble(s: &[u8], i: usize) -> u8 {
97 97 if i % 2 == 0 {
98 98 s[i / 2] >> 4
99 99 } else {
100 100 s[i / 2] & 0x0f
101 101 }
102 102 }
103 103
104 104 impl Node {
105 105 /// Retrieve the `i`th half-byte of the binary data.
106 106 ///
107 107 /// This is also the `i`th hexadecimal digit in numeric form,
108 108 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
109 109 pub fn get_nybble(&self, i: usize) -> u8 {
110 110 get_nybble(&self.data, i)
111 111 }
112 112
113 113 /// Length of the data, in nybbles
114 114 pub fn nybbles_len(&self) -> usize {
115 115 // public exposure as an instance method only, so that we can
116 116 // easily support several sizes of hashes if needed in the future.
117 117 NODE_NYBBLES_LENGTH
118 118 }
119 119
120 120 /// Convert from hexadecimal string representation
121 121 ///
122 122 /// Exact length is required.
123 123 ///
124 124 /// To be used in FFI and I/O only, in order to facilitate future
125 125 /// changes of hash format.
126 126 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
127 127 Ok(NodeData::from_hex(hex.as_ref())
128 128 .map_err(|_| FromHexError)?
129 129 .into())
130 130 }
131 131
132 132 /// Provide access to binary data
133 133 ///
134 134 /// This is needed by FFI layers, for instance to return expected
135 135 /// binary values to Python.
136 136 pub fn as_bytes(&self) -> &[u8] {
137 137 &self.data
138 138 }
139 139 }
140 140
141 141 /// The beginning of a binary revision SHA.
142 142 ///
143 143 /// Since it can potentially come from an hexadecimal representation with
144 144 /// odd length, it needs to carry around whether the last 4 bits are relevant
145 145 /// or not.
146 146 #[derive(Debug, PartialEq)]
147 147 pub struct NodePrefix {
148 148 buf: Vec<u8>,
149 149 is_odd: bool,
150 150 }
151 151
152 152 impl NodePrefix {
153 153 /// Convert from hexadecimal string representation
154 154 ///
155 155 /// Similarly to `hex::decode`, can be used with Unicode string types
156 156 /// (`String`, `&str`) as well as bytes.
157 157 ///
158 158 /// To be used in FFI and I/O only, in order to facilitate future
159 159 /// changes of hash format.
160 160 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
161 161 let hex = hex.as_ref();
162 162 let len = hex.len();
163 if len > NODE_NYBBLES_LENGTH {
163 if len > NODE_NYBBLES_LENGTH || len == 0 {
164 164 return Err(FromHexError);
165 165 }
166 166
167 167 let is_odd = len % 2 == 1;
168 168 let even_part = if is_odd { &hex[..len - 1] } else { hex };
169 169 let mut buf: Vec<u8> =
170 170 Vec::from_hex(&even_part).map_err(|_| FromHexError)?;
171 171
172 172 if is_odd {
173 173 let latest_char = char::from(hex[len - 1]);
174 174 let latest_nybble =
175 175 latest_char.to_digit(16).ok_or_else(|| FromHexError)? as u8;
176 176 buf.push(latest_nybble << 4);
177 177 }
178 178 Ok(NodePrefix { buf, is_odd })
179 179 }
180 180
181 181 pub fn borrow(&self) -> NodePrefixRef {
182 182 NodePrefixRef {
183 183 buf: &self.buf,
184 184 is_odd: self.is_odd,
185 185 }
186 186 }
187 187 }
188 188
189 189 #[derive(Clone, Debug, PartialEq)]
190 190 pub struct NodePrefixRef<'a> {
191 191 buf: &'a [u8],
192 192 is_odd: bool,
193 193 }
194 194
195 195 impl<'a> NodePrefixRef<'a> {
196 196 pub fn len(&self) -> usize {
197 197 if self.is_odd {
198 198 self.buf.len() * 2 - 1
199 199 } else {
200 200 self.buf.len() * 2
201 201 }
202 202 }
203 203
204 pub fn is_empty(&self) -> bool {
205 self.len() == 0
206 }
207
208 204 pub fn is_prefix_of(&self, node: &Node) -> bool {
209 205 if self.is_odd {
210 206 let buf = self.buf;
211 207 let last_pos = buf.len() - 1;
212 208 node.data.starts_with(buf.split_at(last_pos).0)
213 209 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
214 210 } else {
215 211 node.data.starts_with(self.buf)
216 212 }
217 213 }
218 214
219 215 /// Retrieve the `i`th half-byte from the prefix.
220 216 ///
221 217 /// This is also the `i`th hexadecimal digit in numeric form,
222 218 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
223 219 pub fn get_nybble(&self, i: usize) -> u8 {
224 220 assert!(i < self.len());
225 221 get_nybble(self.buf, i)
226 222 }
227 223
228 224 /// Return the index first nybble that's different from `node`
229 225 ///
230 226 /// If the return value is `None` that means that `self` is
231 227 /// a prefix of `node`, but the current method is a bit slower
232 228 /// than `is_prefix_of`.
233 229 ///
234 230 /// Returned index is as in `get_nybble`, i.e., starting at 0.
235 231 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
236 232 let buf = self.buf;
237 233 let until = if self.is_odd {
238 234 buf.len() - 1
239 235 } else {
240 236 buf.len()
241 237 };
242 238 for (i, item) in buf.iter().enumerate().take(until) {
243 239 if *item != node.data[i] {
244 240 return if *item & 0xf0 == node.data[i] & 0xf0 {
245 241 Some(2 * i + 1)
246 242 } else {
247 243 Some(2 * i)
248 244 };
249 245 }
250 246 }
251 247 if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
252 248 Some(until * 2)
253 249 } else {
254 250 None
255 251 }
256 252 }
257 253 }
258 254
259 255 /// A shortcut for full `Node` references
260 256 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
261 257 fn from(node: &'a Node) -> Self {
262 258 NodePrefixRef {
263 259 buf: &node.data,
264 260 is_odd: false,
265 261 }
266 262 }
267 263 }
268 264
269 265 impl PartialEq<Node> for NodePrefixRef<'_> {
270 266 fn eq(&self, other: &Node) -> bool {
271 267 !self.is_odd && self.buf == other.data
272 268 }
273 269 }
274 270
275 271 #[cfg(test)]
276 272 mod tests {
277 273 use super::*;
278 274
279 275 fn sample_node() -> Node {
280 276 let mut data = [0; NODE_BYTES_LENGTH];
281 277 data.copy_from_slice(&[
282 278 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
283 279 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
284 280 ]);
285 281 data.into()
286 282 }
287 283
288 284 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
289 285 ///check_hash
290 286 /// The padding is made with zeros
291 287 pub fn hex_pad_right(hex: &str) -> String {
292 288 let mut res = hex.to_string();
293 289 while res.len() < NODE_NYBBLES_LENGTH {
294 290 res.push('0');
295 291 }
296 292 res
297 293 }
298 294
299 295 fn sample_node_hex() -> String {
300 296 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
301 297 }
302 298
303 299 #[test]
304 300 fn test_node_from_hex() {
305 301 assert_eq!(Node::from_hex(&sample_node_hex()).unwrap(), sample_node());
306 302
307 303 let mut short = hex_pad_right("0123");
308 304 short.pop();
309 305 short.pop();
310 306 assert!(Node::from_hex(&short).is_err());
311 307
312 308 let not_hex = hex_pad_right("012... oops");
313 309 assert!(Node::from_hex(&not_hex).is_err(),);
314 310 }
315 311
316 312 #[test]
317 313 fn test_node_encode_hex() {
318 314 assert_eq!(format!("{:x}", sample_node()), sample_node_hex());
319 315 }
320 316
321 317 #[test]
322 318 fn test_prefix_from_hex() -> Result<(), FromHexError> {
323 319 assert_eq!(
324 320 NodePrefix::from_hex("0e1")?,
325 321 NodePrefix {
326 322 buf: vec![14, 16],
327 323 is_odd: true
328 324 }
329 325 );
330 326 assert_eq!(
331 327 NodePrefix::from_hex("0e1a")?,
332 328 NodePrefix {
333 329 buf: vec![14, 26],
334 330 is_odd: false
335 331 }
336 332 );
337 333
338 334 // checking limit case
339 335 let node_as_vec = sample_node().data.iter().cloned().collect();
340 336 assert_eq!(
341 337 NodePrefix::from_hex(sample_node_hex())?,
342 338 NodePrefix {
343 339 buf: node_as_vec,
344 340 is_odd: false
345 341 }
346 342 );
347 343
348 344 Ok(())
349 345 }
350 346
351 347 #[test]
352 348 fn test_prefix_from_hex_errors() {
353 349 assert!(NodePrefix::from_hex("testgr").is_err());
354 350 let mut long = format!("{:x}", NULL_NODE);
355 351 long.push('c');
356 352 assert!(NodePrefix::from_hex(&long).is_err())
357 353 }
358 354
359 355 #[test]
360 356 fn test_is_prefix_of() -> Result<(), FromHexError> {
361 357 let mut node_data = [0; NODE_BYTES_LENGTH];
362 358 node_data[0] = 0x12;
363 359 node_data[1] = 0xca;
364 360 let node = Node::from(node_data);
365 361 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
366 362 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
367 363 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
368 364 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
369 365 Ok(())
370 366 }
371 367
372 368 #[test]
373 369 fn test_get_nybble() -> Result<(), FromHexError> {
374 370 let prefix = NodePrefix::from_hex("dead6789cafe")?;
375 371 assert_eq!(prefix.borrow().get_nybble(0), 13);
376 372 assert_eq!(prefix.borrow().get_nybble(7), 9);
377 373 Ok(())
378 374 }
379 375
380 376 #[test]
381 377 fn test_first_different_nybble_even_prefix() {
382 378 let prefix = NodePrefix::from_hex("12ca").unwrap();
383 379 let prefref = prefix.borrow();
384 380 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
385 381 assert_eq!(prefref.first_different_nybble(&node), Some(0));
386 382 node.data[0] = 0x13;
387 383 assert_eq!(prefref.first_different_nybble(&node), Some(1));
388 384 node.data[0] = 0x12;
389 385 assert_eq!(prefref.first_different_nybble(&node), Some(2));
390 386 node.data[1] = 0xca;
391 387 // now it is a prefix
392 388 assert_eq!(prefref.first_different_nybble(&node), None);
393 389 }
394 390
395 391 #[test]
396 392 fn test_first_different_nybble_odd_prefix() {
397 393 let prefix = NodePrefix::from_hex("12c").unwrap();
398 394 let prefref = prefix.borrow();
399 395 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
400 396 assert_eq!(prefref.first_different_nybble(&node), Some(0));
401 397 node.data[0] = 0x13;
402 398 assert_eq!(prefref.first_different_nybble(&node), Some(1));
403 399 node.data[0] = 0x12;
404 400 assert_eq!(prefref.first_different_nybble(&node), Some(2));
405 401 node.data[1] = 0xca;
406 402 // now it is a prefix
407 403 assert_eq!(prefref.first_different_nybble(&node), None);
408 404 }
409 405 }
410 406
411 407 #[cfg(test)]
412 408 pub use tests::hex_pad_right;
General Comments 0
You need to be logged in to leave comments. Login now