##// END OF EJS Templates
rust-nodemap: NodeMap trait with simplest implementation...
Georges Racinet -
r44644:e52401a9 default
parent child Browse files
Show More
@@ -1,364 +1,367 b''
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5
5
6 //! Definitions and utilities for Revision nodes
6 //! Definitions and utilities for Revision nodes
7 //!
7 //!
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 //! of a revision.
9 //! of a revision.
10
10
11 use hex::{self, FromHex, FromHexError};
11 use hex::{self, FromHex, FromHexError};
12
12
13 /// The length in bytes of a `Node`
13 /// The length in bytes of a `Node`
14 ///
14 ///
15 /// This constant is meant to ease refactors of this module, and
15 /// This constant is meant to ease refactors of this module, and
16 /// are private so that calling code does not expect all nodes have
16 /// are private so that calling code does not expect all nodes have
17 /// the same size, should we support several formats concurrently in
17 /// the same size, should we support several formats concurrently in
18 /// the future.
18 /// the future.
19 const NODE_BYTES_LENGTH: usize = 20;
19 const NODE_BYTES_LENGTH: usize = 20;
20
20
21 /// The length in bytes of a `Node`
21 /// The length in bytes of a `Node`
22 ///
22 ///
23 /// see also `NODES_BYTES_LENGTH` about it being private.
23 /// see also `NODES_BYTES_LENGTH` about it being private.
24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
25
25
26 /// Private alias for readability and to ease future change
26 /// Private alias for readability and to ease future change
27 type NodeData = [u8; NODE_BYTES_LENGTH];
27 type NodeData = [u8; NODE_BYTES_LENGTH];
28
28
29 /// Binary revision SHA
29 /// Binary revision SHA
30 ///
30 ///
31 /// ## Future changes of hash size
31 /// ## Future changes of hash size
32 ///
32 ///
33 /// To accomodate future changes of hash size, Rust callers
33 /// To accomodate future changes of hash size, Rust callers
34 /// should use the conversion methods at the boundaries (FFI, actual
34 /// should use the conversion methods at the boundaries (FFI, actual
35 /// computation of hashes and I/O) only, and only if required.
35 /// computation of hashes and I/O) only, and only if required.
36 ///
36 ///
37 /// All other callers outside of unit tests should just handle `Node` values
37 /// All other callers outside of unit tests should just handle `Node` values
38 /// and never make any assumption on the actual length, using [`nybbles_len`]
38 /// and never make any assumption on the actual length, using [`nybbles_len`]
39 /// if they need a loop boundary.
39 /// if they need a loop boundary.
40 ///
40 ///
41 /// All methods that create a `Node` either take a type that enforces
41 /// All methods that create a `Node` either take a type that enforces
42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
43 ///
43 ///
44 /// [`nybbles_len`]: #method.nybbles_len
44 /// [`nybbles_len`]: #method.nybbles_len
45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
46 #[derive(Clone, Debug, PartialEq)]
46 #[derive(Clone, Debug, PartialEq)]
47 pub struct Node {
47 pub struct Node {
48 data: NodeData,
48 data: NodeData,
49 }
49 }
50
50
51 /// The node value for NULL_REVISION
51 /// The node value for NULL_REVISION
52 pub const NULL_NODE: Node = Node {
52 pub const NULL_NODE: Node = Node {
53 data: [0; NODE_BYTES_LENGTH],
53 data: [0; NODE_BYTES_LENGTH],
54 };
54 };
55
55
56 impl From<NodeData> for Node {
56 impl From<NodeData> for Node {
57 fn from(data: NodeData) -> Node {
57 fn from(data: NodeData) -> Node {
58 Node { data }
58 Node { data }
59 }
59 }
60 }
60 }
61
61
62 #[derive(Debug, PartialEq)]
62 #[derive(Debug, PartialEq)]
63 pub enum NodeError {
63 pub enum NodeError {
64 ExactLengthRequired(usize, String),
64 ExactLengthRequired(usize, String),
65 PrefixTooLong(String),
65 PrefixTooLong(String),
66 HexError(FromHexError, String),
66 HexError(FromHexError, String),
67 }
67 }
68
68
69 /// Low level utility function, also for prefixes
69 /// Low level utility function, also for prefixes
70 fn get_nybble(s: &[u8], i: usize) -> u8 {
70 fn get_nybble(s: &[u8], i: usize) -> u8 {
71 if i % 2 == 0 {
71 if i % 2 == 0 {
72 s[i / 2] >> 4
72 s[i / 2] >> 4
73 } else {
73 } else {
74 s[i / 2] & 0x0f
74 s[i / 2] & 0x0f
75 }
75 }
76 }
76 }
77
77
78 impl Node {
78 impl Node {
79 /// Retrieve the `i`th half-byte of the binary data.
79 /// Retrieve the `i`th half-byte of the binary data.
80 ///
80 ///
81 /// This is also the `i`th hexadecimal digit in numeric form,
81 /// This is also the `i`th hexadecimal digit in numeric form,
82 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
82 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
83 pub fn get_nybble(&self, i: usize) -> u8 {
83 pub fn get_nybble(&self, i: usize) -> u8 {
84 get_nybble(&self.data, i)
84 get_nybble(&self.data, i)
85 }
85 }
86
86
87 /// Length of the data, in nybbles
87 /// Length of the data, in nybbles
88 pub fn nybbles_len(&self) -> usize {
88 pub fn nybbles_len(&self) -> usize {
89 // public exposure as an instance method only, so that we can
89 // public exposure as an instance method only, so that we can
90 // easily support several sizes of hashes if needed in the future.
90 // easily support several sizes of hashes if needed in the future.
91 NODE_NYBBLES_LENGTH
91 NODE_NYBBLES_LENGTH
92 }
92 }
93
93
94 /// Convert from hexadecimal string representation
94 /// Convert from hexadecimal string representation
95 ///
95 ///
96 /// Exact length is required.
96 /// Exact length is required.
97 ///
97 ///
98 /// To be used in FFI and I/O only, in order to facilitate future
98 /// To be used in FFI and I/O only, in order to facilitate future
99 /// changes of hash format.
99 /// changes of hash format.
100 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
100 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
101 Ok(NodeData::from_hex(hex)
101 Ok(NodeData::from_hex(hex)
102 .map_err(|e| NodeError::from((e, hex)))?
102 .map_err(|e| NodeError::from((e, hex)))?
103 .into())
103 .into())
104 }
104 }
105
105
106 /// Convert to hexadecimal string representation
106 /// Convert to hexadecimal string representation
107 ///
107 ///
108 /// To be used in FFI and I/O only, in order to facilitate future
108 /// To be used in FFI and I/O only, in order to facilitate future
109 /// changes of hash format.
109 /// changes of hash format.
110 pub fn encode_hex(&self) -> String {
110 pub fn encode_hex(&self) -> String {
111 hex::encode(self.data)
111 hex::encode(self.data)
112 }
112 }
113
113
114 /// Provide access to binary data
114 /// Provide access to binary data
115 ///
115 ///
116 /// This is needed by FFI layers, for instance to return expected
116 /// This is needed by FFI layers, for instance to return expected
117 /// binary values to Python.
117 /// binary values to Python.
118 pub fn as_bytes(&self) -> &[u8] {
118 pub fn as_bytes(&self) -> &[u8] {
119 &self.data
119 &self.data
120 }
120 }
121 }
121 }
122
122
123 impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
123 impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
124 fn from(err_offender: (FromHexError, T)) -> Self {
124 fn from(err_offender: (FromHexError, T)) -> Self {
125 let (err, offender) = err_offender;
125 let (err, offender) = err_offender;
126 match err {
126 match err {
127 FromHexError::InvalidStringLength => {
127 FromHexError::InvalidStringLength => {
128 NodeError::ExactLengthRequired(
128 NodeError::ExactLengthRequired(
129 NODE_NYBBLES_LENGTH,
129 NODE_NYBBLES_LENGTH,
130 offender.as_ref().to_owned(),
130 offender.as_ref().to_owned(),
131 )
131 )
132 }
132 }
133 _ => NodeError::HexError(err, offender.as_ref().to_owned()),
133 _ => NodeError::HexError(err, offender.as_ref().to_owned()),
134 }
134 }
135 }
135 }
136 }
136 }
137
137
138 /// The beginning of a binary revision SHA.
138 /// The beginning of a binary revision SHA.
139 ///
139 ///
140 /// Since it can potentially come from an hexadecimal representation with
140 /// Since it can potentially come from an hexadecimal representation with
141 /// odd length, it needs to carry around whether the last 4 bits are relevant
141 /// odd length, it needs to carry around whether the last 4 bits are relevant
142 /// or not.
142 /// or not.
143 #[derive(Debug, PartialEq)]
143 #[derive(Debug, PartialEq)]
144 pub struct NodePrefix {
144 pub struct NodePrefix {
145 buf: Vec<u8>,
145 buf: Vec<u8>,
146 is_odd: bool,
146 is_odd: bool,
147 }
147 }
148
148
149 impl NodePrefix {
149 impl NodePrefix {
150 /// Convert from hexadecimal string representation
150 /// Convert from hexadecimal string representation
151 ///
151 ///
152 /// Similarly to `hex::decode`, can be used with Unicode string types
152 /// Similarly to `hex::decode`, can be used with Unicode string types
153 /// (`String`, `&str`) as well as bytes.
153 /// (`String`, `&str`) as well as bytes.
154 ///
154 ///
155 /// To be used in FFI and I/O only, in order to facilitate future
155 /// To be used in FFI and I/O only, in order to facilitate future
156 /// changes of hash format.
156 /// changes of hash format.
157 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
157 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
158 let hex = hex.as_ref();
158 let hex = hex.as_ref();
159 let len = hex.len();
159 let len = hex.len();
160 if len > NODE_NYBBLES_LENGTH {
160 if len > NODE_NYBBLES_LENGTH {
161 return Err(NodeError::PrefixTooLong(
161 return Err(NodeError::PrefixTooLong(
162 String::from_utf8_lossy(hex).to_owned().to_string(),
162 String::from_utf8_lossy(hex).to_owned().to_string(),
163 ));
163 ));
164 }
164 }
165
165
166 let is_odd = len % 2 == 1;
166 let is_odd = len % 2 == 1;
167 let even_part = if is_odd { &hex[..len - 1] } else { hex };
167 let even_part = if is_odd { &hex[..len - 1] } else { hex };
168 let mut buf: Vec<u8> = Vec::from_hex(&even_part)
168 let mut buf: Vec<u8> = Vec::from_hex(&even_part)
169 .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
169 .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
170
170
171 if is_odd {
171 if is_odd {
172 let latest_char = char::from(hex[len - 1]);
172 let latest_char = char::from(hex[len - 1]);
173 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
173 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
174 (
174 (
175 FromHexError::InvalidHexCharacter {
175 FromHexError::InvalidHexCharacter {
176 c: latest_char,
176 c: latest_char,
177 index: len - 1,
177 index: len - 1,
178 },
178 },
179 String::from_utf8_lossy(hex),
179 String::from_utf8_lossy(hex),
180 )
180 )
181 })? as u8;
181 })? as u8;
182 buf.push(latest_nybble << 4);
182 buf.push(latest_nybble << 4);
183 }
183 }
184 Ok(NodePrefix { buf, is_odd })
184 Ok(NodePrefix { buf, is_odd })
185 }
185 }
186
186
187 pub fn borrow(&self) -> NodePrefixRef {
187 pub fn borrow(&self) -> NodePrefixRef {
188 NodePrefixRef {
188 NodePrefixRef {
189 buf: &self.buf,
189 buf: &self.buf,
190 is_odd: self.is_odd,
190 is_odd: self.is_odd,
191 }
191 }
192 }
192 }
193 }
193 }
194
194
195 #[derive(Clone, Debug, PartialEq)]
195 #[derive(Clone, Debug, PartialEq)]
196 pub struct NodePrefixRef<'a> {
196 pub struct NodePrefixRef<'a> {
197 buf: &'a [u8],
197 buf: &'a [u8],
198 is_odd: bool,
198 is_odd: bool,
199 }
199 }
200
200
201 impl<'a> NodePrefixRef<'a> {
201 impl<'a> NodePrefixRef<'a> {
202 pub fn len(&self) -> usize {
202 pub fn len(&self) -> usize {
203 if self.is_odd {
203 if self.is_odd {
204 self.buf.len() * 2 - 1
204 self.buf.len() * 2 - 1
205 } else {
205 } else {
206 self.buf.len() * 2
206 self.buf.len() * 2
207 }
207 }
208 }
208 }
209
209
210 pub fn is_prefix_of(&self, node: &Node) -> bool {
210 pub fn is_prefix_of(&self, node: &Node) -> bool {
211 if self.is_odd {
211 if self.is_odd {
212 let buf = self.buf;
212 let buf = self.buf;
213 let last_pos = buf.len() - 1;
213 let last_pos = buf.len() - 1;
214 node.data.starts_with(buf.split_at(last_pos).0)
214 node.data.starts_with(buf.split_at(last_pos).0)
215 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
215 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
216 } else {
216 } else {
217 node.data.starts_with(self.buf)
217 node.data.starts_with(self.buf)
218 }
218 }
219 }
219 }
220
220
221 /// Retrieve the `i`th half-byte from the prefix.
221 /// Retrieve the `i`th half-byte from the prefix.
222 ///
222 ///
223 /// This is also the `i`th hexadecimal digit in numeric form,
223 /// This is also the `i`th hexadecimal digit in numeric form,
224 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
224 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
225 pub fn get_nybble(&self, i: usize) -> u8 {
225 pub fn get_nybble(&self, i: usize) -> u8 {
226 get_nybble(self.buf, i)
226 get_nybble(self.buf, i)
227 }
227 }
228 }
228 }
229
229
230 /// A shortcut for full `Node` references
230 /// A shortcut for full `Node` references
231 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
231 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
232 fn from(node: &'a Node) -> Self {
232 fn from(node: &'a Node) -> Self {
233 NodePrefixRef {
233 NodePrefixRef {
234 buf: &node.data,
234 buf: &node.data,
235 is_odd: false,
235 is_odd: false,
236 }
236 }
237 }
237 }
238 }
238 }
239
239
240 #[cfg(test)]
240 #[cfg(test)]
241 mod tests {
241 mod tests {
242 use super::*;
242 use super::*;
243
243
244 fn sample_node() -> Node {
244 fn sample_node() -> Node {
245 let mut data = [0; NODE_BYTES_LENGTH];
245 let mut data = [0; NODE_BYTES_LENGTH];
246 data.copy_from_slice(&[
246 data.copy_from_slice(&[
247 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
247 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
248 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
248 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
249 ]);
249 ]);
250 data.into()
250 data.into()
251 }
251 }
252
252
253 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
253 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
254 ///
254 ///
255 /// The padding is made with zeros
255 /// The padding is made with zeros
256 fn hex_pad_right(hex: &str) -> String {
256 pub fn hex_pad_right(hex: &str) -> String {
257 let mut res = hex.to_string();
257 let mut res = hex.to_string();
258 while res.len() < NODE_NYBBLES_LENGTH {
258 while res.len() < NODE_NYBBLES_LENGTH {
259 res.push('0');
259 res.push('0');
260 }
260 }
261 res
261 res
262 }
262 }
263
263
264 fn sample_node_hex() -> String {
264 fn sample_node_hex() -> String {
265 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
265 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
266 }
266 }
267
267
268 #[test]
268 #[test]
269 fn test_node_from_hex() {
269 fn test_node_from_hex() {
270 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
270 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
271
271
272 let mut short = hex_pad_right("0123");
272 let mut short = hex_pad_right("0123");
273 short.pop();
273 short.pop();
274 short.pop();
274 short.pop();
275 assert_eq!(
275 assert_eq!(
276 Node::from_hex(&short),
276 Node::from_hex(&short),
277 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
277 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
278 );
278 );
279
279
280 let not_hex = hex_pad_right("012... oops");
280 let not_hex = hex_pad_right("012... oops");
281 assert_eq!(
281 assert_eq!(
282 Node::from_hex(&not_hex),
282 Node::from_hex(&not_hex),
283 Err(NodeError::HexError(
283 Err(NodeError::HexError(
284 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
284 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
285 not_hex,
285 not_hex,
286 )),
286 )),
287 );
287 );
288 }
288 }
289
289
290 #[test]
290 #[test]
291 fn test_node_encode_hex() {
291 fn test_node_encode_hex() {
292 assert_eq!(sample_node().encode_hex(), sample_node_hex());
292 assert_eq!(sample_node().encode_hex(), sample_node_hex());
293 }
293 }
294
294
295 #[test]
295 #[test]
296 fn test_prefix_from_hex() -> Result<(), NodeError> {
296 fn test_prefix_from_hex() -> Result<(), NodeError> {
297 assert_eq!(
297 assert_eq!(
298 NodePrefix::from_hex("0e1")?,
298 NodePrefix::from_hex("0e1")?,
299 NodePrefix {
299 NodePrefix {
300 buf: vec![14, 16],
300 buf: vec![14, 16],
301 is_odd: true
301 is_odd: true
302 }
302 }
303 );
303 );
304 assert_eq!(
304 assert_eq!(
305 NodePrefix::from_hex("0e1a")?,
305 NodePrefix::from_hex("0e1a")?,
306 NodePrefix {
306 NodePrefix {
307 buf: vec![14, 26],
307 buf: vec![14, 26],
308 is_odd: false
308 is_odd: false
309 }
309 }
310 );
310 );
311
311
312 // checking limit case
312 // checking limit case
313 let node_as_vec = sample_node().data.iter().cloned().collect();
313 let node_as_vec = sample_node().data.iter().cloned().collect();
314 assert_eq!(
314 assert_eq!(
315 NodePrefix::from_hex(sample_node_hex())?,
315 NodePrefix::from_hex(sample_node_hex())?,
316 NodePrefix {
316 NodePrefix {
317 buf: node_as_vec,
317 buf: node_as_vec,
318 is_odd: false
318 is_odd: false
319 }
319 }
320 );
320 );
321
321
322 Ok(())
322 Ok(())
323 }
323 }
324
324
325 #[test]
325 #[test]
326 fn test_prefix_from_hex_errors() {
326 fn test_prefix_from_hex_errors() {
327 assert_eq!(
327 assert_eq!(
328 NodePrefix::from_hex("testgr"),
328 NodePrefix::from_hex("testgr"),
329 Err(NodeError::HexError(
329 Err(NodeError::HexError(
330 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
330 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
331 "testgr".to_string()
331 "testgr".to_string()
332 ))
332 ))
333 );
333 );
334 let mut long = NULL_NODE.encode_hex();
334 let mut long = NULL_NODE.encode_hex();
335 long.push('c');
335 long.push('c');
336 match NodePrefix::from_hex(&long)
336 match NodePrefix::from_hex(&long)
337 .expect_err("should be refused as too long")
337 .expect_err("should be refused as too long")
338 {
338 {
339 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
339 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
340 err => panic!(format!("Should have been TooLong, got {:?}", err)),
340 err => panic!(format!("Should have been TooLong, got {:?}", err)),
341 }
341 }
342 }
342 }
343
343
344 #[test]
344 #[test]
345 fn test_is_prefix_of() -> Result<(), NodeError> {
345 fn test_is_prefix_of() -> Result<(), NodeError> {
346 let mut node_data = [0; NODE_BYTES_LENGTH];
346 let mut node_data = [0; NODE_BYTES_LENGTH];
347 node_data[0] = 0x12;
347 node_data[0] = 0x12;
348 node_data[1] = 0xca;
348 node_data[1] = 0xca;
349 let node = Node::from(node_data);
349 let node = Node::from(node_data);
350 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
350 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
351 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
351 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
352 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
352 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
353 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
353 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
354 Ok(())
354 Ok(())
355 }
355 }
356
356
357 #[test]
357 #[test]
358 fn test_get_nybble() -> Result<(), NodeError> {
358 fn test_get_nybble() -> Result<(), NodeError> {
359 let prefix = NodePrefix::from_hex("dead6789cafe")?;
359 let prefix = NodePrefix::from_hex("dead6789cafe")?;
360 assert_eq!(prefix.borrow().get_nybble(0), 13);
360 assert_eq!(prefix.borrow().get_nybble(0), 13);
361 assert_eq!(prefix.borrow().get_nybble(7), 9);
361 assert_eq!(prefix.borrow().get_nybble(7), 9);
362 Ok(())
362 Ok(())
363 }
363 }
364 }
364 }
365
366 #[cfg(test)]
367 pub use tests::hex_pad_right;
@@ -1,160 +1,388 b''
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 //!
7 //!
8 //! This provides a variation on the 16-ary radix tree that is
8 //! This provides a variation on the 16-ary radix tree that is
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 //! on disk.
10 //! on disk.
11 //!
11 //!
12 //! Following existing implicit conventions, the "nodemap" terminology
12 //! Following existing implicit conventions, the "nodemap" terminology
13 //! is used in a more abstract context.
13 //! is used in a more abstract context.
14
14
15 use super::Revision;
15 use super::{
16 Node, NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex,
17 };
16 use std::fmt;
18 use std::fmt;
19 use std::ops::Deref;
20
21 #[derive(Debug, PartialEq)]
22 pub enum NodeMapError {
23 MultipleResults,
24 InvalidNodePrefix(NodeError),
25 /// A `Revision` stored in the nodemap could not be found in the index
26 RevisionNotInIndex(Revision),
27 }
28
29 impl From<NodeError> for NodeMapError {
30 fn from(err: NodeError) -> Self {
31 NodeMapError::InvalidNodePrefix(err)
32 }
33 }
34
35 /// Mapping system from Mercurial nodes to revision numbers.
36 ///
37 /// ## `RevlogIndex` and `NodeMap`
38 ///
39 /// One way to think about their relationship is that
40 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
41 /// carried by a [`RevlogIndex`].
42 ///
43 /// Many of the methods in this trait take a `RevlogIndex` argument
44 /// which is used for validation of their results. This index must naturally
45 /// be the one the `NodeMap` is about, and it must be consistent.
46 ///
47 /// Notably, the `NodeMap` must not store
48 /// information about more `Revision` values than there are in the index.
49 /// In these methods, an encountered `Revision` is not in the index, a
50 /// [`RevisionNotInIndex`] error is returned.
51 ///
52 /// In insert operations, the rule is thus that the `NodeMap` must always
53 /// be updated after the `RevlogIndex`
54 /// be updated first, and the `NodeMap` second.
55 ///
56 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
57 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
58 pub trait NodeMap {
59 /// Find the unique `Revision` having the given `Node`
60 ///
61 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
62 fn find_node(
63 &self,
64 index: &impl RevlogIndex,
65 node: &Node,
66 ) -> Result<Option<Revision>, NodeMapError> {
67 self.find_bin(index, node.into())
68 }
69
70 /// Find the unique Revision whose `Node` starts with a given binary prefix
71 ///
72 /// If no Revision matches the given prefix, `Ok(None)` is returned.
73 ///
74 /// If several Revisions match the given prefix, a [`MultipleResults`]
75 /// error is returned.
76 fn find_bin<'a>(
77 &self,
78 idx: &impl RevlogIndex,
79 prefix: NodePrefixRef<'a>,
80 ) -> Result<Option<Revision>, NodeMapError>;
81
82 /// Find the unique Revision whose `Node` hexadecimal string representation
83 /// starts with a given prefix
84 ///
85 /// If no Revision matches the given prefix, `Ok(None)` is returned.
86 ///
87 /// If several Revisions match the given prefix, a [`MultipleResults`]
88 /// error is returned.
89 fn find_hex(
90 &self,
91 idx: &impl RevlogIndex,
92 prefix: &str,
93 ) -> Result<Option<Revision>, NodeMapError> {
94 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
95 }
96 }
17
97
18 /// Low level NodeTree [`Blocks`] elements
98 /// Low level NodeTree [`Blocks`] elements
19 ///
99 ///
20 /// These are exactly as for instance on persistent storage.
100 /// These are exactly as for instance on persistent storage.
21 type RawElement = i32;
101 type RawElement = i32;
22
102
23 /// High level representation of values in NodeTree
103 /// High level representation of values in NodeTree
24 /// [`Blocks`](struct.Block.html)
104 /// [`Blocks`](struct.Block.html)
25 ///
105 ///
26 /// This is the high level representation that most algorithms should
106 /// This is the high level representation that most algorithms should
27 /// use.
107 /// use.
28 #[derive(Clone, Debug, Eq, PartialEq)]
108 #[derive(Clone, Debug, Eq, PartialEq)]
29 enum Element {
109 enum Element {
30 Rev(Revision),
110 Rev(Revision),
31 Block(usize),
111 Block(usize),
32 None,
112 None,
33 }
113 }
34
114
35 impl From<RawElement> for Element {
115 impl From<RawElement> for Element {
36 /// Conversion from low level representation, after endianness conversion.
116 /// Conversion from low level representation, after endianness conversion.
37 ///
117 ///
38 /// See [`Block`](struct.Block.html) for explanation about the encoding.
118 /// See [`Block`](struct.Block.html) for explanation about the encoding.
39 fn from(raw: RawElement) -> Element {
119 fn from(raw: RawElement) -> Element {
40 if raw >= 0 {
120 if raw >= 0 {
41 Element::Block(raw as usize)
121 Element::Block(raw as usize)
42 } else if raw == -1 {
122 } else if raw == -1 {
43 Element::None
123 Element::None
44 } else {
124 } else {
45 Element::Rev(-raw - 2)
125 Element::Rev(-raw - 2)
46 }
126 }
47 }
127 }
48 }
128 }
49
129
50 impl From<Element> for RawElement {
130 impl From<Element> for RawElement {
51 fn from(element: Element) -> RawElement {
131 fn from(element: Element) -> RawElement {
52 match element {
132 match element {
53 Element::None => 0,
133 Element::None => 0,
54 Element::Block(i) => i as RawElement,
134 Element::Block(i) => i as RawElement,
55 Element::Rev(rev) => -rev - 2,
135 Element::Rev(rev) => -rev - 2,
56 }
136 }
57 }
137 }
58 }
138 }
59
139
60 /// A logical block of the `NodeTree`, packed with a fixed size.
140 /// A logical block of the `NodeTree`, packed with a fixed size.
61 ///
141 ///
62 /// These are always used in container types implementing `Index<Block>`,
142 /// These are always used in container types implementing `Index<Block>`,
63 /// such as `&Block`
143 /// such as `&Block`
64 ///
144 ///
65 /// As an array of integers, its ith element encodes that the
145 /// As an array of integers, its ith element encodes that the
66 /// ith potential edge from the block, representing the ith hexadecimal digit
146 /// ith potential edge from the block, representing the ith hexadecimal digit
67 /// (nybble) `i` is either:
147 /// (nybble) `i` is either:
68 ///
148 ///
69 /// - absent (value -1)
149 /// - absent (value -1)
70 /// - another `Block` in the same indexable container (value β‰₯ 0)
150 /// - another `Block` in the same indexable container (value β‰₯ 0)
71 /// - a `Revision` leaf (value ≀ -2)
151 /// - a `Revision` leaf (value ≀ -2)
72 ///
152 ///
73 /// Endianness has to be fixed for consistency on shared storage across
153 /// Endianness has to be fixed for consistency on shared storage across
74 /// different architectures.
154 /// different architectures.
75 ///
155 ///
76 /// A key difference with the C `nodetree` is that we need to be
156 /// A key difference with the C `nodetree` is that we need to be
77 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
157 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
78 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
158 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
79 ///
159 ///
80 /// Another related difference is that `NULL_REVISION` (-1) is not
160 /// Another related difference is that `NULL_REVISION` (-1) is not
81 /// represented at all, because we want an immutable empty nodetree
161 /// represented at all, because we want an immutable empty nodetree
82 /// to be valid.
162 /// to be valid.
83
163
84 #[derive(Clone, PartialEq)]
164 #[derive(Clone, PartialEq)]
85 pub struct Block([RawElement; 16]);
165 pub struct Block([RawElement; 16]);
86
166
87 impl Block {
167 impl Block {
88 fn new() -> Self {
168 fn new() -> Self {
89 Block([-1; 16])
169 Block([-1; 16])
90 }
170 }
91
171
92 fn get(&self, nybble: u8) -> Element {
172 fn get(&self, nybble: u8) -> Element {
93 Element::from(RawElement::from_be(self.0[nybble as usize]))
173 Element::from(RawElement::from_be(self.0[nybble as usize]))
94 }
174 }
95
175
96 fn set(&mut self, nybble: u8, element: Element) {
176 fn set(&mut self, nybble: u8, element: Element) {
97 self.0[nybble as usize] = RawElement::to_be(element.into())
177 self.0[nybble as usize] = RawElement::to_be(element.into())
98 }
178 }
99 }
179 }
100
180
101 impl fmt::Debug for Block {
181 impl fmt::Debug for Block {
102 /// sparse representation for testing and debugging purposes
182 /// sparse representation for testing and debugging purposes
103 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
183 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
104 f.debug_map()
184 f.debug_map()
105 .entries((0..16).filter_map(|i| match self.get(i) {
185 .entries((0..16).filter_map(|i| match self.get(i) {
106 Element::None => None,
186 Element::None => None,
107 element => Some((i, element)),
187 element => Some((i, element)),
108 }))
188 }))
109 .finish()
189 .finish()
110 }
190 }
111 }
191 }
112
192
193 /// A 16-radix tree with the root block at the end
194 pub struct NodeTree {
195 readonly: Box<dyn Deref<Target = [Block]> + Send>,
196 }
197
198 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
199 fn has_prefix_or_none<'p>(
200 idx: &impl RevlogIndex,
201 prefix: NodePrefixRef<'p>,
202 rev: Revision,
203 ) -> Result<Option<Revision>, NodeMapError> {
204 idx.node(rev)
205 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
206 .map(|node| {
207 if prefix.is_prefix_of(node) {
208 Some(rev)
209 } else {
210 None
211 }
212 })
213 }
214
215 impl NodeTree {
216 /// Main working method for `NodeTree` searches
217 ///
218 /// This partial implementation lacks special cases for NULL_REVISION
219 fn lookup<'p>(
220 &self,
221 prefix: NodePrefixRef<'p>,
222 ) -> Result<Option<Revision>, NodeMapError> {
223 let blocks: &[Block] = &*self.readonly;
224 if blocks.is_empty() {
225 return Ok(None);
226 }
227 let mut visit = blocks.len() - 1;
228 for i in 0..prefix.len() {
229 let nybble = prefix.get_nybble(i);
230 match blocks[visit].get(nybble) {
231 Element::None => return Ok(None),
232 Element::Rev(r) => return Ok(Some(r)),
233 Element::Block(idx) => visit = idx,
234 }
235 }
236 Err(NodeMapError::MultipleResults)
237 }
238 }
239
240 impl From<Vec<Block>> for NodeTree {
241 fn from(vec: Vec<Block>) -> Self {
242 NodeTree {
243 readonly: Box::new(vec),
244 }
245 }
246 }
247
248 impl fmt::Debug for NodeTree {
249 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250 let blocks: &[Block] = &*self.readonly;
251 write!(f, "readonly: {:?}", blocks)
252 }
253 }
254
255 impl NodeMap for NodeTree {
256 fn find_bin<'a>(
257 &self,
258 idx: &impl RevlogIndex,
259 prefix: NodePrefixRef<'a>,
260 ) -> Result<Option<Revision>, NodeMapError> {
261 self.lookup(prefix.clone()).and_then(|opt| {
262 opt.map_or(Ok(None), |rev| has_prefix_or_none(idx, prefix, rev))
263 })
264 }
265 }
266
113 #[cfg(test)]
267 #[cfg(test)]
114 mod tests {
268 mod tests {
269 use super::NodeMapError::*;
115 use super::*;
270 use super::*;
271 use crate::revlog::node::{hex_pad_right, Node};
272 use std::collections::HashMap;
116
273
117 /// Creates a `Block` using a syntax close to the `Debug` output
274 /// Creates a `Block` using a syntax close to the `Debug` output
118 macro_rules! block {
275 macro_rules! block {
119 {$($nybble:tt : $variant:ident($val:tt)),*} => (
276 {$($nybble:tt : $variant:ident($val:tt)),*} => (
120 {
277 {
121 let mut block = Block::new();
278 let mut block = Block::new();
122 $(block.set($nybble, Element::$variant($val)));*;
279 $(block.set($nybble, Element::$variant($val)));*;
123 block
280 block
124 }
281 }
125 )
282 )
126 }
283 }
127
284
128 #[test]
285 #[test]
129 fn test_block_debug() {
286 fn test_block_debug() {
130 let mut block = Block::new();
287 let mut block = Block::new();
131 block.set(1, Element::Rev(3));
288 block.set(1, Element::Rev(3));
132 block.set(10, Element::Block(0));
289 block.set(10, Element::Block(0));
133 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
290 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
134 }
291 }
135
292
136 #[test]
293 #[test]
137 fn test_block_macro() {
294 fn test_block_macro() {
138 let block = block! {5: Block(2)};
295 let block = block! {5: Block(2)};
139 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
296 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
140
297
141 let block = block! {13: Rev(15), 5: Block(2)};
298 let block = block! {13: Rev(15), 5: Block(2)};
142 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
299 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
143 }
300 }
144
301
145 #[test]
302 #[test]
146 fn test_raw_block() {
303 fn test_raw_block() {
147 let mut raw = [-1; 16];
304 let mut raw = [-1; 16];
148 raw[0] = 0;
305 raw[0] = 0;
149 raw[1] = RawElement::to_be(15);
306 raw[1] = RawElement::to_be(15);
150 raw[2] = RawElement::to_be(-2);
307 raw[2] = RawElement::to_be(-2);
151 raw[3] = RawElement::to_be(-1);
308 raw[3] = RawElement::to_be(-1);
152 raw[4] = RawElement::to_be(-3);
309 raw[4] = RawElement::to_be(-3);
153 let block = Block(raw);
310 let block = Block(raw);
154 assert_eq!(block.get(0), Element::Block(0));
311 assert_eq!(block.get(0), Element::Block(0));
155 assert_eq!(block.get(1), Element::Block(15));
312 assert_eq!(block.get(1), Element::Block(15));
156 assert_eq!(block.get(3), Element::None);
313 assert_eq!(block.get(3), Element::None);
157 assert_eq!(block.get(2), Element::Rev(0));
314 assert_eq!(block.get(2), Element::Rev(0));
158 assert_eq!(block.get(4), Element::Rev(1));
315 assert_eq!(block.get(4), Element::Rev(1));
159 }
316 }
317
318 type TestIndex = HashMap<Revision, Node>;
319
320 impl RevlogIndex for TestIndex {
321 fn node(&self, rev: Revision) -> Option<&Node> {
322 self.get(&rev)
323 }
324
325 fn len(&self) -> usize {
326 self.len()
327 }
328 }
329
330 /// Pad hexadecimal Node prefix with zeros on the right, then insert
331 ///
332 /// This avoids having to repeatedly write very long hexadecimal
333 /// strings for test data, and brings actual hash size independency.
334 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
335 idx.insert(rev, Node::from_hex(&hex_pad_right(hex)).unwrap());
336 }
337
338 fn sample_nodetree() -> NodeTree {
339 NodeTree::from(vec![
340 block![0: Rev(9)],
341 block![0: Rev(0), 1: Rev(9)],
342 block![0: Block(1), 1:Rev(1)],
343 ])
344 }
345
346 #[test]
347 fn test_nt_debug() {
348 let nt = sample_nodetree();
349 assert_eq!(
350 format!("{:?}", nt),
351 "readonly: \
352 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}]"
353 );
354 }
355
356 #[test]
357 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
358 let mut idx: TestIndex = HashMap::new();
359 pad_insert(&mut idx, 1, "1234deadcafe");
360
361 let nt = NodeTree::from(vec![block![1: Rev(1)]]);
362 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
363 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
364 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
365 assert_eq!(nt.find_hex(&idx, "1a")?, None);
366 assert_eq!(nt.find_hex(&idx, "ab")?, None);
367
368 // and with full binary Nodes
369 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
370 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
371 assert_eq!(nt.find_node(&idx, &unknown)?, None);
372 Ok(())
373 }
374
375 #[test]
376 fn test_immutable_find_one_jump() {
377 let mut idx = TestIndex::new();
378 pad_insert(&mut idx, 9, "012");
379 pad_insert(&mut idx, 0, "00a");
380
381 let nt = sample_nodetree();
382
383 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
384 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
385 assert_eq!(nt.find_hex(&idx, "00"), Ok(Some(0)));
386 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
387 }
160 }
388 }
General Comments 0
You need to be logged in to leave comments. Login now