##// END OF EJS Templates
rust: replace an unsafe use of transmute with a safe use of bytes-cast...
Simon Sapin -
r47120:cfb6c10c default
parent child Browse files
Show More
@@ -1,456 +1,457 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 use bytes_cast::BytesCast;
11 12 use hex::{self, FromHex, FromHexError};
12 use std::convert::{TryFrom, TryInto};
13 use std::convert::TryFrom;
13 14
14 15 /// The length in bytes of a `Node`
15 16 ///
16 17 /// This constant is meant to ease refactors of this module, and
17 18 /// are private so that calling code does not expect all nodes have
18 19 /// the same size, should we support several formats concurrently in
19 20 /// the future.
20 21 pub const NODE_BYTES_LENGTH: usize = 20;
21 22
22 23 /// Id of the null node.
23 24 ///
24 25 /// Used to indicate the absence of node.
25 26 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
26 27
27 28 /// The length in bytes of a `Node`
28 29 ///
29 30 /// see also `NODES_BYTES_LENGTH` about it being private.
30 31 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
31 32
32 33 /// Private alias for readability and to ease future change
33 34 type NodeData = [u8; NODE_BYTES_LENGTH];
34 35
35 36 /// Binary revision SHA
36 37 ///
37 38 /// ## Future changes of hash size
38 39 ///
39 40 /// To accomodate future changes of hash size, Rust callers
40 41 /// should use the conversion methods at the boundaries (FFI, actual
41 42 /// computation of hashes and I/O) only, and only if required.
42 43 ///
43 44 /// All other callers outside of unit tests should just handle `Node` values
44 45 /// and never make any assumption on the actual length, using [`nybbles_len`]
45 46 /// if they need a loop boundary.
46 47 ///
47 48 /// All methods that create a `Node` either take a type that enforces
48 49 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
49 50 ///
50 51 /// [`nybbles_len`]: #method.nybbles_len
51 52 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
52 #[derive(Clone, Debug, PartialEq)]
53 #[derive(Clone, Debug, PartialEq, BytesCast)]
53 54 #[repr(transparent)]
54 55 pub struct Node {
55 56 data: NodeData,
56 57 }
57 58
58 59 /// The node value for NULL_REVISION
59 60 pub const NULL_NODE: Node = Node {
60 61 data: [0; NODE_BYTES_LENGTH],
61 62 };
62 63
63 64 impl From<NodeData> for Node {
64 65 fn from(data: NodeData) -> Node {
65 66 Node { data }
66 67 }
67 68 }
68 69
69 70 /// Return an error if the slice has an unexpected length
70 71 impl<'a> TryFrom<&'a [u8]> for &'a Node {
71 type Error = std::array::TryFromSliceError;
72 type Error = ();
72 73
73 74 #[inline]
74 75 fn try_from(bytes: &'a [u8]) -> Result<&'a Node, Self::Error> {
75 let data = bytes.try_into()?;
76 // Safety: `#[repr(transparent)]` makes it ok to "wrap" the target
77 // of a reference to the type of the single field.
78 Ok(unsafe { std::mem::transmute::<&NodeData, &Node>(data) })
76 match Node::from_bytes(bytes) {
77 Ok((node, rest)) if rest.is_empty() => Ok(node),
78 _ => Err(()),
79 }
79 80 }
80 81 }
81 82
82 83 #[derive(Debug, PartialEq)]
83 84 pub enum NodeError {
84 85 ExactLengthRequired(usize, String),
85 86 PrefixTooLong(String),
86 87 HexError(FromHexError, String),
87 88 }
88 89
89 90 /// Low level utility function, also for prefixes
90 91 fn get_nybble(s: &[u8], i: usize) -> u8 {
91 92 if i % 2 == 0 {
92 93 s[i / 2] >> 4
93 94 } else {
94 95 s[i / 2] & 0x0f
95 96 }
96 97 }
97 98
98 99 impl Node {
99 100 /// Retrieve the `i`th half-byte of the binary data.
100 101 ///
101 102 /// This is also the `i`th hexadecimal digit in numeric form,
102 103 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
103 104 pub fn get_nybble(&self, i: usize) -> u8 {
104 105 get_nybble(&self.data, i)
105 106 }
106 107
107 108 /// Length of the data, in nybbles
108 109 pub fn nybbles_len(&self) -> usize {
109 110 // public exposure as an instance method only, so that we can
110 111 // easily support several sizes of hashes if needed in the future.
111 112 NODE_NYBBLES_LENGTH
112 113 }
113 114
114 115 /// Convert from hexadecimal string representation
115 116 ///
116 117 /// Exact length is required.
117 118 ///
118 119 /// To be used in FFI and I/O only, in order to facilitate future
119 120 /// changes of hash format.
120 121 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, NodeError> {
121 122 Ok(NodeData::from_hex(hex.as_ref())
122 123 .map_err(|e| NodeError::from((e, hex)))?
123 124 .into())
124 125 }
125 126
126 127 /// Convert to hexadecimal string representation
127 128 ///
128 129 /// To be used in FFI and I/O only, in order to facilitate future
129 130 /// changes of hash format.
130 131 pub fn encode_hex(&self) -> String {
131 132 hex::encode(self.data)
132 133 }
133 134
134 135 /// Provide access to binary data
135 136 ///
136 137 /// This is needed by FFI layers, for instance to return expected
137 138 /// binary values to Python.
138 139 pub fn as_bytes(&self) -> &[u8] {
139 140 &self.data
140 141 }
141 142 }
142 143
143 144 impl<T: AsRef<[u8]>> From<(FromHexError, T)> for NodeError {
144 145 fn from(err_offender: (FromHexError, T)) -> Self {
145 146 let (err, offender) = err_offender;
146 147 let offender = String::from_utf8_lossy(offender.as_ref()).into_owned();
147 148 match err {
148 149 FromHexError::InvalidStringLength => {
149 150 NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, offender)
150 151 }
151 152 _ => NodeError::HexError(err, offender),
152 153 }
153 154 }
154 155 }
155 156
156 157 /// The beginning of a binary revision SHA.
157 158 ///
158 159 /// Since it can potentially come from an hexadecimal representation with
159 160 /// odd length, it needs to carry around whether the last 4 bits are relevant
160 161 /// or not.
161 162 #[derive(Debug, PartialEq)]
162 163 pub struct NodePrefix {
163 164 buf: Vec<u8>,
164 165 is_odd: bool,
165 166 }
166 167
167 168 impl NodePrefix {
168 169 /// Convert from hexadecimal string representation
169 170 ///
170 171 /// Similarly to `hex::decode`, can be used with Unicode string types
171 172 /// (`String`, `&str`) as well as bytes.
172 173 ///
173 174 /// To be used in FFI and I/O only, in order to facilitate future
174 175 /// changes of hash format.
175 176 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
176 177 let hex = hex.as_ref();
177 178 let len = hex.len();
178 179 if len > NODE_NYBBLES_LENGTH {
179 180 return Err(NodeError::PrefixTooLong(
180 181 String::from_utf8_lossy(hex).to_owned().to_string(),
181 182 ));
182 183 }
183 184
184 185 let is_odd = len % 2 == 1;
185 186 let even_part = if is_odd { &hex[..len - 1] } else { hex };
186 187 let mut buf: Vec<u8> =
187 188 Vec::from_hex(&even_part).map_err(|e| (e, hex))?;
188 189
189 190 if is_odd {
190 191 let latest_char = char::from(hex[len - 1]);
191 192 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
192 193 (
193 194 FromHexError::InvalidHexCharacter {
194 195 c: latest_char,
195 196 index: len - 1,
196 197 },
197 198 hex,
198 199 )
199 200 })? as u8;
200 201 buf.push(latest_nybble << 4);
201 202 }
202 203 Ok(NodePrefix { buf, is_odd })
203 204 }
204 205
205 206 pub fn borrow(&self) -> NodePrefixRef {
206 207 NodePrefixRef {
207 208 buf: &self.buf,
208 209 is_odd: self.is_odd,
209 210 }
210 211 }
211 212 }
212 213
213 214 #[derive(Clone, Debug, PartialEq)]
214 215 pub struct NodePrefixRef<'a> {
215 216 buf: &'a [u8],
216 217 is_odd: bool,
217 218 }
218 219
219 220 impl<'a> NodePrefixRef<'a> {
220 221 pub fn len(&self) -> usize {
221 222 if self.is_odd {
222 223 self.buf.len() * 2 - 1
223 224 } else {
224 225 self.buf.len() * 2
225 226 }
226 227 }
227 228
228 229 pub fn is_empty(&self) -> bool {
229 230 self.len() == 0
230 231 }
231 232
232 233 pub fn is_prefix_of(&self, node: &Node) -> bool {
233 234 if self.is_odd {
234 235 let buf = self.buf;
235 236 let last_pos = buf.len() - 1;
236 237 node.data.starts_with(buf.split_at(last_pos).0)
237 238 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
238 239 } else {
239 240 node.data.starts_with(self.buf)
240 241 }
241 242 }
242 243
243 244 /// Retrieve the `i`th half-byte from the prefix.
244 245 ///
245 246 /// This is also the `i`th hexadecimal digit in numeric form,
246 247 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
247 248 pub fn get_nybble(&self, i: usize) -> u8 {
248 249 assert!(i < self.len());
249 250 get_nybble(self.buf, i)
250 251 }
251 252
252 253 /// Return the index first nybble that's different from `node`
253 254 ///
254 255 /// If the return value is `None` that means that `self` is
255 256 /// a prefix of `node`, but the current method is a bit slower
256 257 /// than `is_prefix_of`.
257 258 ///
258 259 /// Returned index is as in `get_nybble`, i.e., starting at 0.
259 260 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
260 261 let buf = self.buf;
261 262 let until = if self.is_odd {
262 263 buf.len() - 1
263 264 } else {
264 265 buf.len()
265 266 };
266 267 for (i, item) in buf.iter().enumerate().take(until) {
267 268 if *item != node.data[i] {
268 269 return if *item & 0xf0 == node.data[i] & 0xf0 {
269 270 Some(2 * i + 1)
270 271 } else {
271 272 Some(2 * i)
272 273 };
273 274 }
274 275 }
275 276 if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
276 277 Some(until * 2)
277 278 } else {
278 279 None
279 280 }
280 281 }
281 282 }
282 283
283 284 /// A shortcut for full `Node` references
284 285 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
285 286 fn from(node: &'a Node) -> Self {
286 287 NodePrefixRef {
287 288 buf: &node.data,
288 289 is_odd: false,
289 290 }
290 291 }
291 292 }
292 293
293 294 impl PartialEq<Node> for NodePrefixRef<'_> {
294 295 fn eq(&self, other: &Node) -> bool {
295 296 !self.is_odd && self.buf == other.data
296 297 }
297 298 }
298 299
299 300 #[cfg(test)]
300 301 mod tests {
301 302 use super::*;
302 303
303 304 fn sample_node() -> Node {
304 305 let mut data = [0; NODE_BYTES_LENGTH];
305 306 data.copy_from_slice(&[
306 307 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
307 308 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
308 309 ]);
309 310 data.into()
310 311 }
311 312
312 313 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
313 314 ///check_hash
314 315 /// The padding is made with zeros
315 316 pub fn hex_pad_right(hex: &str) -> String {
316 317 let mut res = hex.to_string();
317 318 while res.len() < NODE_NYBBLES_LENGTH {
318 319 res.push('0');
319 320 }
320 321 res
321 322 }
322 323
323 324 fn sample_node_hex() -> String {
324 325 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
325 326 }
326 327
327 328 #[test]
328 329 fn test_node_from_hex() {
329 330 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
330 331
331 332 let mut short = hex_pad_right("0123");
332 333 short.pop();
333 334 short.pop();
334 335 assert_eq!(
335 336 Node::from_hex(&short),
336 337 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
337 338 );
338 339
339 340 let not_hex = hex_pad_right("012... oops");
340 341 assert_eq!(
341 342 Node::from_hex(&not_hex),
342 343 Err(NodeError::HexError(
343 344 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
344 345 not_hex,
345 346 )),
346 347 );
347 348 }
348 349
349 350 #[test]
350 351 fn test_node_encode_hex() {
351 352 assert_eq!(sample_node().encode_hex(), sample_node_hex());
352 353 }
353 354
354 355 #[test]
355 356 fn test_prefix_from_hex() -> Result<(), NodeError> {
356 357 assert_eq!(
357 358 NodePrefix::from_hex("0e1")?,
358 359 NodePrefix {
359 360 buf: vec![14, 16],
360 361 is_odd: true
361 362 }
362 363 );
363 364 assert_eq!(
364 365 NodePrefix::from_hex("0e1a")?,
365 366 NodePrefix {
366 367 buf: vec![14, 26],
367 368 is_odd: false
368 369 }
369 370 );
370 371
371 372 // checking limit case
372 373 let node_as_vec = sample_node().data.iter().cloned().collect();
373 374 assert_eq!(
374 375 NodePrefix::from_hex(sample_node_hex())?,
375 376 NodePrefix {
376 377 buf: node_as_vec,
377 378 is_odd: false
378 379 }
379 380 );
380 381
381 382 Ok(())
382 383 }
383 384
384 385 #[test]
385 386 fn test_prefix_from_hex_errors() {
386 387 assert_eq!(
387 388 NodePrefix::from_hex("testgr"),
388 389 Err(NodeError::HexError(
389 390 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
390 391 "testgr".to_string()
391 392 ))
392 393 );
393 394 let mut long = NULL_NODE.encode_hex();
394 395 long.push('c');
395 396 match NodePrefix::from_hex(&long)
396 397 .expect_err("should be refused as too long")
397 398 {
398 399 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
399 400 err => panic!(format!("Should have been TooLong, got {:?}", err)),
400 401 }
401 402 }
402 403
403 404 #[test]
404 405 fn test_is_prefix_of() -> Result<(), NodeError> {
405 406 let mut node_data = [0; NODE_BYTES_LENGTH];
406 407 node_data[0] = 0x12;
407 408 node_data[1] = 0xca;
408 409 let node = Node::from(node_data);
409 410 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
410 411 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
411 412 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
412 413 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
413 414 Ok(())
414 415 }
415 416
416 417 #[test]
417 418 fn test_get_nybble() -> Result<(), NodeError> {
418 419 let prefix = NodePrefix::from_hex("dead6789cafe")?;
419 420 assert_eq!(prefix.borrow().get_nybble(0), 13);
420 421 assert_eq!(prefix.borrow().get_nybble(7), 9);
421 422 Ok(())
422 423 }
423 424
424 425 #[test]
425 426 fn test_first_different_nybble_even_prefix() {
426 427 let prefix = NodePrefix::from_hex("12ca").unwrap();
427 428 let prefref = prefix.borrow();
428 429 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
429 430 assert_eq!(prefref.first_different_nybble(&node), Some(0));
430 431 node.data[0] = 0x13;
431 432 assert_eq!(prefref.first_different_nybble(&node), Some(1));
432 433 node.data[0] = 0x12;
433 434 assert_eq!(prefref.first_different_nybble(&node), Some(2));
434 435 node.data[1] = 0xca;
435 436 // now it is a prefix
436 437 assert_eq!(prefref.first_different_nybble(&node), None);
437 438 }
438 439
439 440 #[test]
440 441 fn test_first_different_nybble_odd_prefix() {
441 442 let prefix = NodePrefix::from_hex("12c").unwrap();
442 443 let prefref = prefix.borrow();
443 444 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
444 445 assert_eq!(prefref.first_different_nybble(&node), Some(0));
445 446 node.data[0] = 0x13;
446 447 assert_eq!(prefref.first_different_nybble(&node), Some(1));
447 448 node.data[0] = 0x12;
448 449 assert_eq!(prefref.first_different_nybble(&node), Some(2));
449 450 node.data[1] = 0xca;
450 451 // now it is a prefix
451 452 assert_eq!(prefref.first_different_nybble(&node), None);
452 453 }
453 454 }
454 455
455 456 #[cfg(test)]
456 457 pub use tests::hex_pad_right;
General Comments 0
You need to be logged in to leave comments. Login now