##// END OF EJS Templates
rust-node: handling binary Node prefix...
Georges Racinet -
r44643:9896a8d0 default
parent child Browse files
Show More
@@ -1,56 +1,56
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 pub use node::{Node, NodeError};
10 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
11 11
12 12 /// Mercurial revision numbers
13 13 ///
14 14 /// As noted in revlog.c, revision numbers are actually encoded in
15 15 /// 4 bytes, and are liberally converted to ints, whence the i32
16 16 pub type Revision = i32;
17 17
18 18 /// Marker expressing the absence of a parent
19 19 ///
20 20 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
21 21 /// to be smaller than all existing revisions.
22 22 pub const NULL_REVISION: Revision = -1;
23 23
24 24 /// Same as `mercurial.node.wdirrev`
25 25 ///
26 26 /// This is also equal to `i32::max_value()`, but it's better to spell
27 27 /// it out explicitely, same as in `mercurial.node`
28 28 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
29 29
30 30 /// The simplest expression of what we need of Mercurial DAGs.
31 31 pub trait Graph {
32 32 /// Return the two parents of the given `Revision`.
33 33 ///
34 34 /// Each of the parents can be independently `NULL_REVISION`
35 35 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
36 36 }
37 37
38 38 #[derive(Clone, Debug, PartialEq)]
39 39 pub enum GraphError {
40 40 ParentOutOfRange(Revision),
41 41 WorkingDirectoryUnsupported,
42 42 }
43 43
44 44 /// The Mercurial Revlog Index
45 45 ///
46 46 /// This is currently limited to the minimal interface that is needed for
47 47 /// the [`nodemap`](nodemap/index.html) module
48 48 pub trait RevlogIndex {
49 49 /// Total number of Revisions referenced in this index
50 50 fn len(&self) -> usize;
51 51
52 52 /// Return a reference to the Node or `None` if rev is out of bounds
53 53 ///
54 54 /// `NULL_REVISION` is not considered to be out of bounds.
55 55 fn node(&self, rev: Revision) -> Option<&Node>;
56 56 }
@@ -1,191 +1,364
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use hex::{self, FromHex, FromHexError};
12 12
13 13 /// The length in bytes of a `Node`
14 14 ///
15 15 /// This constant is meant to ease refactors of this module, and
16 16 /// are private so that calling code does not expect all nodes have
17 17 /// the same size, should we support several formats concurrently in
18 18 /// the future.
19 19 const NODE_BYTES_LENGTH: usize = 20;
20 20
21 21 /// The length in bytes of a `Node`
22 22 ///
23 23 /// see also `NODES_BYTES_LENGTH` about it being private.
24 24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
25 25
26 26 /// Private alias for readability and to ease future change
27 27 type NodeData = [u8; NODE_BYTES_LENGTH];
28 28
29 29 /// Binary revision SHA
30 30 ///
31 31 /// ## Future changes of hash size
32 32 ///
33 33 /// To accomodate future changes of hash size, Rust callers
34 34 /// should use the conversion methods at the boundaries (FFI, actual
35 35 /// computation of hashes and I/O) only, and only if required.
36 36 ///
37 37 /// All other callers outside of unit tests should just handle `Node` values
38 38 /// and never make any assumption on the actual length, using [`nybbles_len`]
39 39 /// if they need a loop boundary.
40 40 ///
41 41 /// All methods that create a `Node` either take a type that enforces
42 42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
43 43 ///
44 44 /// [`nybbles_len`]: #method.nybbles_len
45 45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
46 46 #[derive(Clone, Debug, PartialEq)]
47 47 pub struct Node {
48 48 data: NodeData,
49 49 }
50 50
51 51 /// The node value for NULL_REVISION
52 52 pub const NULL_NODE: Node = Node {
53 53 data: [0; NODE_BYTES_LENGTH],
54 54 };
55 55
56 56 impl From<NodeData> for Node {
57 57 fn from(data: NodeData) -> Node {
58 58 Node { data }
59 59 }
60 60 }
61 61
62 62 #[derive(Debug, PartialEq)]
63 63 pub enum NodeError {
64 64 ExactLengthRequired(usize, String),
65 PrefixTooLong(String),
65 66 HexError(FromHexError, String),
66 67 }
67 68
68 69 /// Low level utility function, also for prefixes
69 70 fn get_nybble(s: &[u8], i: usize) -> u8 {
70 71 if i % 2 == 0 {
71 72 s[i / 2] >> 4
72 73 } else {
73 74 s[i / 2] & 0x0f
74 75 }
75 76 }
76 77
77 78 impl Node {
78 79 /// Retrieve the `i`th half-byte of the binary data.
79 80 ///
80 81 /// This is also the `i`th hexadecimal digit in numeric form,
81 82 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
82 83 pub fn get_nybble(&self, i: usize) -> u8 {
83 84 get_nybble(&self.data, i)
84 85 }
85 86
86 87 /// Length of the data, in nybbles
87 88 pub fn nybbles_len(&self) -> usize {
88 89 // public exposure as an instance method only, so that we can
89 90 // easily support several sizes of hashes if needed in the future.
90 91 NODE_NYBBLES_LENGTH
91 92 }
92 93
93 94 /// Convert from hexadecimal string representation
94 95 ///
95 96 /// Exact length is required.
96 97 ///
97 98 /// To be used in FFI and I/O only, in order to facilitate future
98 99 /// changes of hash format.
99 100 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
100 101 Ok(NodeData::from_hex(hex)
101 102 .map_err(|e| NodeError::from((e, hex)))?
102 103 .into())
103 104 }
104 105
105 106 /// Convert to hexadecimal string representation
106 107 ///
107 108 /// To be used in FFI and I/O only, in order to facilitate future
108 109 /// changes of hash format.
109 110 pub fn encode_hex(&self) -> String {
110 111 hex::encode(self.data)
111 112 }
112 113
113 114 /// Provide access to binary data
114 115 ///
115 116 /// This is needed by FFI layers, for instance to return expected
116 117 /// binary values to Python.
117 118 pub fn as_bytes(&self) -> &[u8] {
118 119 &self.data
119 120 }
120 121 }
121 122
122 impl From<(FromHexError, &str)> for NodeError {
123 fn from(err_offender: (FromHexError, &str)) -> Self {
123 impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
124 fn from(err_offender: (FromHexError, T)) -> Self {
124 125 let (err, offender) = err_offender;
125 126 match err {
126 127 FromHexError::InvalidStringLength => {
127 128 NodeError::ExactLengthRequired(
128 129 NODE_NYBBLES_LENGTH,
129 offender.to_string(),
130 offender.as_ref().to_owned(),
130 131 )
131 132 }
132 _ => NodeError::HexError(err, offender.to_string()),
133 _ => NodeError::HexError(err, offender.as_ref().to_owned()),
134 }
135 }
136 }
137
138 /// The beginning of a binary revision SHA.
139 ///
140 /// Since it can potentially come from an hexadecimal representation with
141 /// odd length, it needs to carry around whether the last 4 bits are relevant
142 /// or not.
143 #[derive(Debug, PartialEq)]
144 pub struct NodePrefix {
145 buf: Vec<u8>,
146 is_odd: bool,
147 }
148
149 impl NodePrefix {
150 /// Convert from hexadecimal string representation
151 ///
152 /// Similarly to `hex::decode`, can be used with Unicode string types
153 /// (`String`, `&str`) as well as bytes.
154 ///
155 /// To be used in FFI and I/O only, in order to facilitate future
156 /// changes of hash format.
157 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
158 let hex = hex.as_ref();
159 let len = hex.len();
160 if len > NODE_NYBBLES_LENGTH {
161 return Err(NodeError::PrefixTooLong(
162 String::from_utf8_lossy(hex).to_owned().to_string(),
163 ));
164 }
165
166 let is_odd = len % 2 == 1;
167 let even_part = if is_odd { &hex[..len - 1] } else { hex };
168 let mut buf: Vec<u8> = Vec::from_hex(&even_part)
169 .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
170
171 if is_odd {
172 let latest_char = char::from(hex[len - 1]);
173 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
174 (
175 FromHexError::InvalidHexCharacter {
176 c: latest_char,
177 index: len - 1,
178 },
179 String::from_utf8_lossy(hex),
180 )
181 })? as u8;
182 buf.push(latest_nybble << 4);
183 }
184 Ok(NodePrefix { buf, is_odd })
185 }
186
187 pub fn borrow(&self) -> NodePrefixRef {
188 NodePrefixRef {
189 buf: &self.buf,
190 is_odd: self.is_odd,
191 }
192 }
193 }
194
195 #[derive(Clone, Debug, PartialEq)]
196 pub struct NodePrefixRef<'a> {
197 buf: &'a [u8],
198 is_odd: bool,
199 }
200
201 impl<'a> NodePrefixRef<'a> {
202 pub fn len(&self) -> usize {
203 if self.is_odd {
204 self.buf.len() * 2 - 1
205 } else {
206 self.buf.len() * 2
207 }
208 }
209
210 pub fn is_prefix_of(&self, node: &Node) -> bool {
211 if self.is_odd {
212 let buf = self.buf;
213 let last_pos = buf.len() - 1;
214 node.data.starts_with(buf.split_at(last_pos).0)
215 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
216 } else {
217 node.data.starts_with(self.buf)
218 }
219 }
220
221 /// Retrieve the `i`th half-byte from the prefix.
222 ///
223 /// This is also the `i`th hexadecimal digit in numeric form,
224 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
225 pub fn get_nybble(&self, i: usize) -> u8 {
226 get_nybble(self.buf, i)
227 }
228 }
229
230 /// A shortcut for full `Node` references
231 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
232 fn from(node: &'a Node) -> Self {
233 NodePrefixRef {
234 buf: &node.data,
235 is_odd: false,
133 236 }
134 237 }
135 238 }
136 239
137 240 #[cfg(test)]
138 241 mod tests {
139 242 use super::*;
140 243
141 244 fn sample_node() -> Node {
142 245 let mut data = [0; NODE_BYTES_LENGTH];
143 246 data.copy_from_slice(&[
144 247 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
145 248 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
146 249 ]);
147 250 data.into()
148 251 }
149 252
150 253 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
151 254 ///
152 255 /// The padding is made with zeros
153 256 fn hex_pad_right(hex: &str) -> String {
154 257 let mut res = hex.to_string();
155 258 while res.len() < NODE_NYBBLES_LENGTH {
156 259 res.push('0');
157 260 }
158 261 res
159 262 }
160 263
161 264 fn sample_node_hex() -> String {
162 265 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
163 266 }
164 267
165 268 #[test]
166 269 fn test_node_from_hex() {
167 270 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
168 271
169 272 let mut short = hex_pad_right("0123");
170 273 short.pop();
171 274 short.pop();
172 275 assert_eq!(
173 276 Node::from_hex(&short),
174 277 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
175 278 );
176 279
177 280 let not_hex = hex_pad_right("012... oops");
178 281 assert_eq!(
179 282 Node::from_hex(&not_hex),
180 283 Err(NodeError::HexError(
181 284 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
182 285 not_hex,
183 286 )),
184 287 );
185 288 }
186 289
187 290 #[test]
188 291 fn test_node_encode_hex() {
189 292 assert_eq!(sample_node().encode_hex(), sample_node_hex());
190 293 }
294
295 #[test]
296 fn test_prefix_from_hex() -> Result<(), NodeError> {
297 assert_eq!(
298 NodePrefix::from_hex("0e1")?,
299 NodePrefix {
300 buf: vec![14, 16],
301 is_odd: true
191 302 }
303 );
304 assert_eq!(
305 NodePrefix::from_hex("0e1a")?,
306 NodePrefix {
307 buf: vec![14, 26],
308 is_odd: false
309 }
310 );
311
312 // checking limit case
313 let node_as_vec = sample_node().data.iter().cloned().collect();
314 assert_eq!(
315 NodePrefix::from_hex(sample_node_hex())?,
316 NodePrefix {
317 buf: node_as_vec,
318 is_odd: false
319 }
320 );
321
322 Ok(())
323 }
324
325 #[test]
326 fn test_prefix_from_hex_errors() {
327 assert_eq!(
328 NodePrefix::from_hex("testgr"),
329 Err(NodeError::HexError(
330 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
331 "testgr".to_string()
332 ))
333 );
334 let mut long = NULL_NODE.encode_hex();
335 long.push('c');
336 match NodePrefix::from_hex(&long)
337 .expect_err("should be refused as too long")
338 {
339 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
340 err => panic!(format!("Should have been TooLong, got {:?}", err)),
341 }
342 }
343
344 #[test]
345 fn test_is_prefix_of() -> Result<(), NodeError> {
346 let mut node_data = [0; NODE_BYTES_LENGTH];
347 node_data[0] = 0x12;
348 node_data[1] = 0xca;
349 let node = Node::from(node_data);
350 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
351 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
352 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
353 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
354 Ok(())
355 }
356
357 #[test]
358 fn test_get_nybble() -> Result<(), NodeError> {
359 let prefix = NodePrefix::from_hex("dead6789cafe")?;
360 assert_eq!(prefix.borrow().get_nybble(0), 13);
361 assert_eq!(prefix.borrow().get_nybble(7), 9);
362 Ok(())
363 }
364 }
General Comments 0
You need to be logged in to leave comments. Login now