##// END OF EJS Templates
rust-node: handling binary Node prefix...
Georges Racinet -
r44643:9896a8d0 default
parent child Browse files
Show More
@@ -1,56 +1,56
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 pub use node::{Node, NodeError};
10 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
11
11
12 /// Mercurial revision numbers
12 /// Mercurial revision numbers
13 ///
13 ///
14 /// As noted in revlog.c, revision numbers are actually encoded in
14 /// As noted in revlog.c, revision numbers are actually encoded in
15 /// 4 bytes, and are liberally converted to ints, whence the i32
15 /// 4 bytes, and are liberally converted to ints, whence the i32
16 pub type Revision = i32;
16 pub type Revision = i32;
17
17
18 /// Marker expressing the absence of a parent
18 /// Marker expressing the absence of a parent
19 ///
19 ///
20 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
20 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
21 /// to be smaller than all existing revisions.
21 /// to be smaller than all existing revisions.
22 pub const NULL_REVISION: Revision = -1;
22 pub const NULL_REVISION: Revision = -1;
23
23
24 /// Same as `mercurial.node.wdirrev`
24 /// Same as `mercurial.node.wdirrev`
25 ///
25 ///
26 /// This is also equal to `i32::max_value()`, but it's better to spell
26 /// This is also equal to `i32::max_value()`, but it's better to spell
27 /// it out explicitely, same as in `mercurial.node`
27 /// it out explicitely, same as in `mercurial.node`
28 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
28 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
29
29
30 /// The simplest expression of what we need of Mercurial DAGs.
30 /// The simplest expression of what we need of Mercurial DAGs.
31 pub trait Graph {
31 pub trait Graph {
32 /// Return the two parents of the given `Revision`.
32 /// Return the two parents of the given `Revision`.
33 ///
33 ///
34 /// Each of the parents can be independently `NULL_REVISION`
34 /// Each of the parents can be independently `NULL_REVISION`
35 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
35 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
36 }
36 }
37
37
38 #[derive(Clone, Debug, PartialEq)]
38 #[derive(Clone, Debug, PartialEq)]
39 pub enum GraphError {
39 pub enum GraphError {
40 ParentOutOfRange(Revision),
40 ParentOutOfRange(Revision),
41 WorkingDirectoryUnsupported,
41 WorkingDirectoryUnsupported,
42 }
42 }
43
43
44 /// The Mercurial Revlog Index
44 /// The Mercurial Revlog Index
45 ///
45 ///
46 /// This is currently limited to the minimal interface that is needed for
46 /// This is currently limited to the minimal interface that is needed for
47 /// the [`nodemap`](nodemap/index.html) module
47 /// the [`nodemap`](nodemap/index.html) module
48 pub trait RevlogIndex {
48 pub trait RevlogIndex {
49 /// Total number of Revisions referenced in this index
49 /// Total number of Revisions referenced in this index
50 fn len(&self) -> usize;
50 fn len(&self) -> usize;
51
51
52 /// Return a reference to the Node or `None` if rev is out of bounds
52 /// Return a reference to the Node or `None` if rev is out of bounds
53 ///
53 ///
54 /// `NULL_REVISION` is not considered to be out of bounds.
54 /// `NULL_REVISION` is not considered to be out of bounds.
55 fn node(&self, rev: Revision) -> Option<&Node>;
55 fn node(&self, rev: Revision) -> Option<&Node>;
56 }
56 }
@@ -1,191 +1,364
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5
5
6 //! Definitions and utilities for Revision nodes
6 //! Definitions and utilities for Revision nodes
7 //!
7 //!
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 //! of a revision.
9 //! of a revision.
10
10
11 use hex::{self, FromHex, FromHexError};
11 use hex::{self, FromHex, FromHexError};
12
12
13 /// The length in bytes of a `Node`
13 /// The length in bytes of a `Node`
14 ///
14 ///
15 /// This constant is meant to ease refactors of this module, and
15 /// This constant is meant to ease refactors of this module, and
16 /// are private so that calling code does not expect all nodes have
16 /// are private so that calling code does not expect all nodes have
17 /// the same size, should we support several formats concurrently in
17 /// the same size, should we support several formats concurrently in
18 /// the future.
18 /// the future.
19 const NODE_BYTES_LENGTH: usize = 20;
19 const NODE_BYTES_LENGTH: usize = 20;
20
20
21 /// The length in bytes of a `Node`
21 /// The length in bytes of a `Node`
22 ///
22 ///
23 /// see also `NODES_BYTES_LENGTH` about it being private.
23 /// see also `NODES_BYTES_LENGTH` about it being private.
24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
25
25
26 /// Private alias for readability and to ease future change
26 /// Private alias for readability and to ease future change
27 type NodeData = [u8; NODE_BYTES_LENGTH];
27 type NodeData = [u8; NODE_BYTES_LENGTH];
28
28
29 /// Binary revision SHA
29 /// Binary revision SHA
30 ///
30 ///
31 /// ## Future changes of hash size
31 /// ## Future changes of hash size
32 ///
32 ///
33 /// To accomodate future changes of hash size, Rust callers
33 /// To accomodate future changes of hash size, Rust callers
34 /// should use the conversion methods at the boundaries (FFI, actual
34 /// should use the conversion methods at the boundaries (FFI, actual
35 /// computation of hashes and I/O) only, and only if required.
35 /// computation of hashes and I/O) only, and only if required.
36 ///
36 ///
37 /// All other callers outside of unit tests should just handle `Node` values
37 /// All other callers outside of unit tests should just handle `Node` values
38 /// and never make any assumption on the actual length, using [`nybbles_len`]
38 /// and never make any assumption on the actual length, using [`nybbles_len`]
39 /// if they need a loop boundary.
39 /// if they need a loop boundary.
40 ///
40 ///
41 /// All methods that create a `Node` either take a type that enforces
41 /// All methods that create a `Node` either take a type that enforces
42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
43 ///
43 ///
44 /// [`nybbles_len`]: #method.nybbles_len
44 /// [`nybbles_len`]: #method.nybbles_len
45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
46 #[derive(Clone, Debug, PartialEq)]
46 #[derive(Clone, Debug, PartialEq)]
47 pub struct Node {
47 pub struct Node {
48 data: NodeData,
48 data: NodeData,
49 }
49 }
50
50
51 /// The node value for NULL_REVISION
51 /// The node value for NULL_REVISION
52 pub const NULL_NODE: Node = Node {
52 pub const NULL_NODE: Node = Node {
53 data: [0; NODE_BYTES_LENGTH],
53 data: [0; NODE_BYTES_LENGTH],
54 };
54 };
55
55
56 impl From<NodeData> for Node {
56 impl From<NodeData> for Node {
57 fn from(data: NodeData) -> Node {
57 fn from(data: NodeData) -> Node {
58 Node { data }
58 Node { data }
59 }
59 }
60 }
60 }
61
61
62 #[derive(Debug, PartialEq)]
62 #[derive(Debug, PartialEq)]
63 pub enum NodeError {
63 pub enum NodeError {
64 ExactLengthRequired(usize, String),
64 ExactLengthRequired(usize, String),
65 PrefixTooLong(String),
65 HexError(FromHexError, String),
66 HexError(FromHexError, String),
66 }
67 }
67
68
68 /// Low level utility function, also for prefixes
69 /// Low level utility function, also for prefixes
69 fn get_nybble(s: &[u8], i: usize) -> u8 {
70 fn get_nybble(s: &[u8], i: usize) -> u8 {
70 if i % 2 == 0 {
71 if i % 2 == 0 {
71 s[i / 2] >> 4
72 s[i / 2] >> 4
72 } else {
73 } else {
73 s[i / 2] & 0x0f
74 s[i / 2] & 0x0f
74 }
75 }
75 }
76 }
76
77
77 impl Node {
78 impl Node {
78 /// Retrieve the `i`th half-byte of the binary data.
79 /// Retrieve the `i`th half-byte of the binary data.
79 ///
80 ///
80 /// This is also the `i`th hexadecimal digit in numeric form,
81 /// This is also the `i`th hexadecimal digit in numeric form,
81 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
82 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
82 pub fn get_nybble(&self, i: usize) -> u8 {
83 pub fn get_nybble(&self, i: usize) -> u8 {
83 get_nybble(&self.data, i)
84 get_nybble(&self.data, i)
84 }
85 }
85
86
86 /// Length of the data, in nybbles
87 /// Length of the data, in nybbles
87 pub fn nybbles_len(&self) -> usize {
88 pub fn nybbles_len(&self) -> usize {
88 // public exposure as an instance method only, so that we can
89 // public exposure as an instance method only, so that we can
89 // easily support several sizes of hashes if needed in the future.
90 // easily support several sizes of hashes if needed in the future.
90 NODE_NYBBLES_LENGTH
91 NODE_NYBBLES_LENGTH
91 }
92 }
92
93
93 /// Convert from hexadecimal string representation
94 /// Convert from hexadecimal string representation
94 ///
95 ///
95 /// Exact length is required.
96 /// Exact length is required.
96 ///
97 ///
97 /// To be used in FFI and I/O only, in order to facilitate future
98 /// To be used in FFI and I/O only, in order to facilitate future
98 /// changes of hash format.
99 /// changes of hash format.
99 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
100 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
100 Ok(NodeData::from_hex(hex)
101 Ok(NodeData::from_hex(hex)
101 .map_err(|e| NodeError::from((e, hex)))?
102 .map_err(|e| NodeError::from((e, hex)))?
102 .into())
103 .into())
103 }
104 }
104
105
105 /// Convert to hexadecimal string representation
106 /// Convert to hexadecimal string representation
106 ///
107 ///
107 /// To be used in FFI and I/O only, in order to facilitate future
108 /// To be used in FFI and I/O only, in order to facilitate future
108 /// changes of hash format.
109 /// changes of hash format.
109 pub fn encode_hex(&self) -> String {
110 pub fn encode_hex(&self) -> String {
110 hex::encode(self.data)
111 hex::encode(self.data)
111 }
112 }
112
113
113 /// Provide access to binary data
114 /// Provide access to binary data
114 ///
115 ///
115 /// This is needed by FFI layers, for instance to return expected
116 /// This is needed by FFI layers, for instance to return expected
116 /// binary values to Python.
117 /// binary values to Python.
117 pub fn as_bytes(&self) -> &[u8] {
118 pub fn as_bytes(&self) -> &[u8] {
118 &self.data
119 &self.data
119 }
120 }
120 }
121 }
121
122
122 impl From<(FromHexError, &str)> for NodeError {
123 impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
123 fn from(err_offender: (FromHexError, &str)) -> Self {
124 fn from(err_offender: (FromHexError, T)) -> Self {
124 let (err, offender) = err_offender;
125 let (err, offender) = err_offender;
125 match err {
126 match err {
126 FromHexError::InvalidStringLength => {
127 FromHexError::InvalidStringLength => {
127 NodeError::ExactLengthRequired(
128 NodeError::ExactLengthRequired(
128 NODE_NYBBLES_LENGTH,
129 NODE_NYBBLES_LENGTH,
129 offender.to_string(),
130 offender.as_ref().to_owned(),
130 )
131 )
131 }
132 }
132 _ => NodeError::HexError(err, offender.to_string()),
133 _ => NodeError::HexError(err, offender.as_ref().to_owned()),
134 }
135 }
136 }
137
138 /// The beginning of a binary revision SHA.
139 ///
140 /// Since it can potentially come from an hexadecimal representation with
141 /// odd length, it needs to carry around whether the last 4 bits are relevant
142 /// or not.
143 #[derive(Debug, PartialEq)]
144 pub struct NodePrefix {
145 buf: Vec<u8>,
146 is_odd: bool,
147 }
148
149 impl NodePrefix {
150 /// Convert from hexadecimal string representation
151 ///
152 /// Similarly to `hex::decode`, can be used with Unicode string types
153 /// (`String`, `&str`) as well as bytes.
154 ///
155 /// To be used in FFI and I/O only, in order to facilitate future
156 /// changes of hash format.
157 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
158 let hex = hex.as_ref();
159 let len = hex.len();
160 if len > NODE_NYBBLES_LENGTH {
161 return Err(NodeError::PrefixTooLong(
162 String::from_utf8_lossy(hex).to_owned().to_string(),
163 ));
164 }
165
166 let is_odd = len % 2 == 1;
167 let even_part = if is_odd { &hex[..len - 1] } else { hex };
168 let mut buf: Vec<u8> = Vec::from_hex(&even_part)
169 .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
170
171 if is_odd {
172 let latest_char = char::from(hex[len - 1]);
173 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
174 (
175 FromHexError::InvalidHexCharacter {
176 c: latest_char,
177 index: len - 1,
178 },
179 String::from_utf8_lossy(hex),
180 )
181 })? as u8;
182 buf.push(latest_nybble << 4);
183 }
184 Ok(NodePrefix { buf, is_odd })
185 }
186
187 pub fn borrow(&self) -> NodePrefixRef {
188 NodePrefixRef {
189 buf: &self.buf,
190 is_odd: self.is_odd,
191 }
192 }
193 }
194
195 #[derive(Clone, Debug, PartialEq)]
196 pub struct NodePrefixRef<'a> {
197 buf: &'a [u8],
198 is_odd: bool,
199 }
200
201 impl<'a> NodePrefixRef<'a> {
202 pub fn len(&self) -> usize {
203 if self.is_odd {
204 self.buf.len() * 2 - 1
205 } else {
206 self.buf.len() * 2
207 }
208 }
209
210 pub fn is_prefix_of(&self, node: &Node) -> bool {
211 if self.is_odd {
212 let buf = self.buf;
213 let last_pos = buf.len() - 1;
214 node.data.starts_with(buf.split_at(last_pos).0)
215 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
216 } else {
217 node.data.starts_with(self.buf)
218 }
219 }
220
221 /// Retrieve the `i`th half-byte from the prefix.
222 ///
223 /// This is also the `i`th hexadecimal digit in numeric form,
224 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
225 pub fn get_nybble(&self, i: usize) -> u8 {
226 get_nybble(self.buf, i)
227 }
228 }
229
230 /// A shortcut for full `Node` references
231 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
232 fn from(node: &'a Node) -> Self {
233 NodePrefixRef {
234 buf: &node.data,
235 is_odd: false,
133 }
236 }
134 }
237 }
135 }
238 }
136
239
137 #[cfg(test)]
240 #[cfg(test)]
138 mod tests {
241 mod tests {
139 use super::*;
242 use super::*;
140
243
141 fn sample_node() -> Node {
244 fn sample_node() -> Node {
142 let mut data = [0; NODE_BYTES_LENGTH];
245 let mut data = [0; NODE_BYTES_LENGTH];
143 data.copy_from_slice(&[
246 data.copy_from_slice(&[
144 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
247 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
145 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
248 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
146 ]);
249 ]);
147 data.into()
250 data.into()
148 }
251 }
149
252
150 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
253 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
151 ///
254 ///
152 /// The padding is made with zeros
255 /// The padding is made with zeros
153 fn hex_pad_right(hex: &str) -> String {
256 fn hex_pad_right(hex: &str) -> String {
154 let mut res = hex.to_string();
257 let mut res = hex.to_string();
155 while res.len() < NODE_NYBBLES_LENGTH {
258 while res.len() < NODE_NYBBLES_LENGTH {
156 res.push('0');
259 res.push('0');
157 }
260 }
158 res
261 res
159 }
262 }
160
263
161 fn sample_node_hex() -> String {
264 fn sample_node_hex() -> String {
162 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
265 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
163 }
266 }
164
267
165 #[test]
268 #[test]
166 fn test_node_from_hex() {
269 fn test_node_from_hex() {
167 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
270 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
168
271
169 let mut short = hex_pad_right("0123");
272 let mut short = hex_pad_right("0123");
170 short.pop();
273 short.pop();
171 short.pop();
274 short.pop();
172 assert_eq!(
275 assert_eq!(
173 Node::from_hex(&short),
276 Node::from_hex(&short),
174 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
277 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
175 );
278 );
176
279
177 let not_hex = hex_pad_right("012... oops");
280 let not_hex = hex_pad_right("012... oops");
178 assert_eq!(
281 assert_eq!(
179 Node::from_hex(&not_hex),
282 Node::from_hex(&not_hex),
180 Err(NodeError::HexError(
283 Err(NodeError::HexError(
181 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
284 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
182 not_hex,
285 not_hex,
183 )),
286 )),
184 );
287 );
185 }
288 }
186
289
187 #[test]
290 #[test]
188 fn test_node_encode_hex() {
291 fn test_node_encode_hex() {
189 assert_eq!(sample_node().encode_hex(), sample_node_hex());
292 assert_eq!(sample_node().encode_hex(), sample_node_hex());
190 }
293 }
294
295 #[test]
296 fn test_prefix_from_hex() -> Result<(), NodeError> {
297 assert_eq!(
298 NodePrefix::from_hex("0e1")?,
299 NodePrefix {
300 buf: vec![14, 16],
301 is_odd: true
302 }
303 );
304 assert_eq!(
305 NodePrefix::from_hex("0e1a")?,
306 NodePrefix {
307 buf: vec![14, 26],
308 is_odd: false
309 }
310 );
311
312 // checking limit case
313 let node_as_vec = sample_node().data.iter().cloned().collect();
314 assert_eq!(
315 NodePrefix::from_hex(sample_node_hex())?,
316 NodePrefix {
317 buf: node_as_vec,
318 is_odd: false
319 }
320 );
321
322 Ok(())
323 }
324
325 #[test]
326 fn test_prefix_from_hex_errors() {
327 assert_eq!(
328 NodePrefix::from_hex("testgr"),
329 Err(NodeError::HexError(
330 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
331 "testgr".to_string()
332 ))
333 );
334 let mut long = NULL_NODE.encode_hex();
335 long.push('c');
336 match NodePrefix::from_hex(&long)
337 .expect_err("should be refused as too long")
338 {
339 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
340 err => panic!(format!("Should have been TooLong, got {:?}", err)),
341 }
342 }
343
344 #[test]
345 fn test_is_prefix_of() -> Result<(), NodeError> {
346 let mut node_data = [0; NODE_BYTES_LENGTH];
347 node_data[0] = 0x12;
348 node_data[1] = 0xca;
349 let node = Node::from(node_data);
350 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
351 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
352 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
353 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
354 Ok(())
355 }
356
357 #[test]
358 fn test_get_nybble() -> Result<(), NodeError> {
359 let prefix = NodePrefix::from_hex("dead6789cafe")?;
360 assert_eq!(prefix.borrow().get_nybble(0), 13);
361 assert_eq!(prefix.borrow().get_nybble(7), 9);
362 Ok(())
363 }
191 }
364 }
General Comments 0
You need to be logged in to leave comments. Login now