##// END OF EJS Templates
rust-nodemap: core implementation for shortest...
Georges Racinet -
r44872:5ac1eecc default
parent child Browse files
Show More
@@ -1,368 +1,428
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use hex::{self, FromHex, FromHexError};
12 12
13 13 /// The length in bytes of a `Node`
14 14 ///
15 15 /// This constant is meant to ease refactors of this module, and
16 16 /// are private so that calling code does not expect all nodes have
17 17 /// the same size, should we support several formats concurrently in
18 18 /// the future.
19 19 const NODE_BYTES_LENGTH: usize = 20;
20 20
21 21 /// The length in bytes of a `Node`
22 22 ///
23 23 /// see also `NODES_BYTES_LENGTH` about it being private.
24 24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
25 25
26 26 /// Private alias for readability and to ease future change
27 27 type NodeData = [u8; NODE_BYTES_LENGTH];
28 28
29 29 /// Binary revision SHA
30 30 ///
31 31 /// ## Future changes of hash size
32 32 ///
33 33 /// To accomodate future changes of hash size, Rust callers
34 34 /// should use the conversion methods at the boundaries (FFI, actual
35 35 /// computation of hashes and I/O) only, and only if required.
36 36 ///
37 37 /// All other callers outside of unit tests should just handle `Node` values
38 38 /// and never make any assumption on the actual length, using [`nybbles_len`]
39 39 /// if they need a loop boundary.
40 40 ///
41 41 /// All methods that create a `Node` either take a type that enforces
42 42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
43 43 ///
44 44 /// [`nybbles_len`]: #method.nybbles_len
45 45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
46 46 #[derive(Clone, Debug, PartialEq)]
47 47 pub struct Node {
48 48 data: NodeData,
49 49 }
50 50
51 51 /// The node value for NULL_REVISION
52 52 pub const NULL_NODE: Node = Node {
53 53 data: [0; NODE_BYTES_LENGTH],
54 54 };
55 55
56 56 impl From<NodeData> for Node {
57 57 fn from(data: NodeData) -> Node {
58 58 Node { data }
59 59 }
60 60 }
61 61
62 62 #[derive(Debug, PartialEq)]
63 63 pub enum NodeError {
64 64 ExactLengthRequired(usize, String),
65 65 PrefixTooLong(String),
66 66 HexError(FromHexError, String),
67 67 }
68 68
69 69 /// Low level utility function, also for prefixes
70 70 fn get_nybble(s: &[u8], i: usize) -> u8 {
71 71 if i % 2 == 0 {
72 72 s[i / 2] >> 4
73 73 } else {
74 74 s[i / 2] & 0x0f
75 75 }
76 76 }
77 77
78 78 impl Node {
79 79 /// Retrieve the `i`th half-byte of the binary data.
80 80 ///
81 81 /// This is also the `i`th hexadecimal digit in numeric form,
82 82 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
83 83 pub fn get_nybble(&self, i: usize) -> u8 {
84 84 get_nybble(&self.data, i)
85 85 }
86 86
87 87 /// Length of the data, in nybbles
88 88 pub fn nybbles_len(&self) -> usize {
89 89 // public exposure as an instance method only, so that we can
90 90 // easily support several sizes of hashes if needed in the future.
91 91 NODE_NYBBLES_LENGTH
92 92 }
93 93
94 94 /// Convert from hexadecimal string representation
95 95 ///
96 96 /// Exact length is required.
97 97 ///
98 98 /// To be used in FFI and I/O only, in order to facilitate future
99 99 /// changes of hash format.
100 100 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
101 101 Ok(NodeData::from_hex(hex)
102 102 .map_err(|e| NodeError::from((e, hex)))?
103 103 .into())
104 104 }
105 105
106 106 /// Convert to hexadecimal string representation
107 107 ///
108 108 /// To be used in FFI and I/O only, in order to facilitate future
109 109 /// changes of hash format.
110 110 pub fn encode_hex(&self) -> String {
111 111 hex::encode(self.data)
112 112 }
113 113
114 114 /// Provide access to binary data
115 115 ///
116 116 /// This is needed by FFI layers, for instance to return expected
117 117 /// binary values to Python.
118 118 pub fn as_bytes(&self) -> &[u8] {
119 119 &self.data
120 120 }
121 121 }
122 122
123 123 impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
124 124 fn from(err_offender: (FromHexError, T)) -> Self {
125 125 let (err, offender) = err_offender;
126 126 match err {
127 127 FromHexError::InvalidStringLength => {
128 128 NodeError::ExactLengthRequired(
129 129 NODE_NYBBLES_LENGTH,
130 130 offender.as_ref().to_owned(),
131 131 )
132 132 }
133 133 _ => NodeError::HexError(err, offender.as_ref().to_owned()),
134 134 }
135 135 }
136 136 }
137 137
138 138 /// The beginning of a binary revision SHA.
139 139 ///
140 140 /// Since it can potentially come from an hexadecimal representation with
141 141 /// odd length, it needs to carry around whether the last 4 bits are relevant
142 142 /// or not.
143 143 #[derive(Debug, PartialEq)]
144 144 pub struct NodePrefix {
145 145 buf: Vec<u8>,
146 146 is_odd: bool,
147 147 }
148 148
149 149 impl NodePrefix {
150 150 /// Convert from hexadecimal string representation
151 151 ///
152 152 /// Similarly to `hex::decode`, can be used with Unicode string types
153 153 /// (`String`, `&str`) as well as bytes.
154 154 ///
155 155 /// To be used in FFI and I/O only, in order to facilitate future
156 156 /// changes of hash format.
157 157 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
158 158 let hex = hex.as_ref();
159 159 let len = hex.len();
160 160 if len > NODE_NYBBLES_LENGTH {
161 161 return Err(NodeError::PrefixTooLong(
162 162 String::from_utf8_lossy(hex).to_owned().to_string(),
163 163 ));
164 164 }
165 165
166 166 let is_odd = len % 2 == 1;
167 167 let even_part = if is_odd { &hex[..len - 1] } else { hex };
168 168 let mut buf: Vec<u8> = Vec::from_hex(&even_part)
169 169 .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
170 170
171 171 if is_odd {
172 172 let latest_char = char::from(hex[len - 1]);
173 173 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
174 174 (
175 175 FromHexError::InvalidHexCharacter {
176 176 c: latest_char,
177 177 index: len - 1,
178 178 },
179 179 String::from_utf8_lossy(hex),
180 180 )
181 181 })? as u8;
182 182 buf.push(latest_nybble << 4);
183 183 }
184 184 Ok(NodePrefix { buf, is_odd })
185 185 }
186 186
187 187 pub fn borrow(&self) -> NodePrefixRef {
188 188 NodePrefixRef {
189 189 buf: &self.buf,
190 190 is_odd: self.is_odd,
191 191 }
192 192 }
193 193 }
194 194
195 195 #[derive(Clone, Debug, PartialEq)]
196 196 pub struct NodePrefixRef<'a> {
197 197 buf: &'a [u8],
198 198 is_odd: bool,
199 199 }
200 200
201 201 impl<'a> NodePrefixRef<'a> {
202 202 pub fn len(&self) -> usize {
203 203 if self.is_odd {
204 204 self.buf.len() * 2 - 1
205 205 } else {
206 206 self.buf.len() * 2
207 207 }
208 208 }
209 209
210 210 pub fn is_prefix_of(&self, node: &Node) -> bool {
211 211 if self.is_odd {
212 212 let buf = self.buf;
213 213 let last_pos = buf.len() - 1;
214 214 node.data.starts_with(buf.split_at(last_pos).0)
215 215 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
216 216 } else {
217 217 node.data.starts_with(self.buf)
218 218 }
219 219 }
220 220
221 221 /// Retrieve the `i`th half-byte from the prefix.
222 222 ///
223 223 /// This is also the `i`th hexadecimal digit in numeric form,
224 224 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
225 225 pub fn get_nybble(&self, i: usize) -> u8 {
226 226 assert!(i < self.len());
227 227 get_nybble(self.buf, i)
228 228 }
229
230 /// Return the index first nybble that's different from `node`
231 ///
232 /// If the return value is `None` that means that `self` is
233 /// a prefix of `node`, but the current method is a bit slower
234 /// than `is_prefix_of`.
235 ///
236 /// Returned index is as in `get_nybble`, i.e., starting at 0.
237 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
238 let buf = self.buf;
239 let until = if self.is_odd {
240 buf.len() - 1
241 } else {
242 buf.len()
243 };
244 for i in 0..until {
245 if buf[i] != node.data[i] {
246 if buf[i] & 0xf0 == node.data[i] & 0xf0 {
247 return Some(2 * i + 1);
248 } else {
249 return Some(2 * i);
250 }
251 }
252 }
253 if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
254 Some(until * 2)
255 } else {
256 None
257 }
258 }
229 259 }
230 260
231 261 /// A shortcut for full `Node` references
232 262 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
233 263 fn from(node: &'a Node) -> Self {
234 264 NodePrefixRef {
235 265 buf: &node.data,
236 266 is_odd: false,
237 267 }
238 268 }
239 269 }
240 270
241 271 #[cfg(test)]
242 272 mod tests {
243 273 use super::*;
244 274
245 275 fn sample_node() -> Node {
246 276 let mut data = [0; NODE_BYTES_LENGTH];
247 277 data.copy_from_slice(&[
248 278 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
249 279 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
250 280 ]);
251 281 data.into()
252 282 }
253 283
254 284 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
255 285 ///
256 286 /// The padding is made with zeros
257 287 pub fn hex_pad_right(hex: &str) -> String {
258 288 let mut res = hex.to_string();
259 289 while res.len() < NODE_NYBBLES_LENGTH {
260 290 res.push('0');
261 291 }
262 292 res
263 293 }
264 294
265 295 fn sample_node_hex() -> String {
266 296 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
267 297 }
268 298
269 299 #[test]
270 300 fn test_node_from_hex() {
271 301 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
272 302
273 303 let mut short = hex_pad_right("0123");
274 304 short.pop();
275 305 short.pop();
276 306 assert_eq!(
277 307 Node::from_hex(&short),
278 308 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
279 309 );
280 310
281 311 let not_hex = hex_pad_right("012... oops");
282 312 assert_eq!(
283 313 Node::from_hex(&not_hex),
284 314 Err(NodeError::HexError(
285 315 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
286 316 not_hex,
287 317 )),
288 318 );
289 319 }
290 320
291 321 #[test]
292 322 fn test_node_encode_hex() {
293 323 assert_eq!(sample_node().encode_hex(), sample_node_hex());
294 324 }
295 325
296 326 #[test]
297 327 fn test_prefix_from_hex() -> Result<(), NodeError> {
298 328 assert_eq!(
299 329 NodePrefix::from_hex("0e1")?,
300 330 NodePrefix {
301 331 buf: vec![14, 16],
302 332 is_odd: true
303 333 }
304 334 );
305 335 assert_eq!(
306 336 NodePrefix::from_hex("0e1a")?,
307 337 NodePrefix {
308 338 buf: vec![14, 26],
309 339 is_odd: false
310 340 }
311 341 );
312 342
313 343 // checking limit case
314 344 let node_as_vec = sample_node().data.iter().cloned().collect();
315 345 assert_eq!(
316 346 NodePrefix::from_hex(sample_node_hex())?,
317 347 NodePrefix {
318 348 buf: node_as_vec,
319 349 is_odd: false
320 350 }
321 351 );
322 352
323 353 Ok(())
324 354 }
325 355
326 356 #[test]
327 357 fn test_prefix_from_hex_errors() {
328 358 assert_eq!(
329 359 NodePrefix::from_hex("testgr"),
330 360 Err(NodeError::HexError(
331 361 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
332 362 "testgr".to_string()
333 363 ))
334 364 );
335 365 let mut long = NULL_NODE.encode_hex();
336 366 long.push('c');
337 367 match NodePrefix::from_hex(&long)
338 368 .expect_err("should be refused as too long")
339 369 {
340 370 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
341 371 err => panic!(format!("Should have been TooLong, got {:?}", err)),
342 372 }
343 373 }
344 374
345 375 #[test]
346 376 fn test_is_prefix_of() -> Result<(), NodeError> {
347 377 let mut node_data = [0; NODE_BYTES_LENGTH];
348 378 node_data[0] = 0x12;
349 379 node_data[1] = 0xca;
350 380 let node = Node::from(node_data);
351 381 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
352 382 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
353 383 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
354 384 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
355 385 Ok(())
356 386 }
357 387
358 388 #[test]
359 389 fn test_get_nybble() -> Result<(), NodeError> {
360 390 let prefix = NodePrefix::from_hex("dead6789cafe")?;
361 391 assert_eq!(prefix.borrow().get_nybble(0), 13);
362 392 assert_eq!(prefix.borrow().get_nybble(7), 9);
363 393 Ok(())
364 394 }
395
396 #[test]
397 fn test_first_different_nybble_even_prefix() {
398 let prefix = NodePrefix::from_hex("12ca").unwrap();
399 let prefref = prefix.borrow();
400 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
401 assert_eq!(prefref.first_different_nybble(&node), Some(0));
402 node.data[0] = 0x13;
403 assert_eq!(prefref.first_different_nybble(&node), Some(1));
404 node.data[0] = 0x12;
405 assert_eq!(prefref.first_different_nybble(&node), Some(2));
406 node.data[1] = 0xca;
407 // now it is a prefix
408 assert_eq!(prefref.first_different_nybble(&node), None);
409 }
410
411 #[test]
412 fn test_first_different_nybble_odd_prefix() {
413 let prefix = NodePrefix::from_hex("12c").unwrap();
414 let prefref = prefix.borrow();
415 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
416 assert_eq!(prefref.first_different_nybble(&node), Some(0));
417 node.data[0] = 0x13;
418 assert_eq!(prefref.first_different_nybble(&node), Some(1));
419 node.data[0] = 0x12;
420 assert_eq!(prefref.first_different_nybble(&node), Some(2));
421 node.data[1] = 0xca;
422 // now it is a prefix
423 assert_eq!(prefref.first_different_nybble(&node), None);
424 }
365 425 }
366 426
367 427 #[cfg(test)]
368 428 pub use tests::hex_pad_right;
@@ -1,960 +1,1056
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 16 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
17 17 RevlogIndex, NULL_REVISION,
18 18 };
19 19
20 use std::cmp::max;
20 21 use std::fmt;
21 22 use std::mem;
22 23 use std::ops::Deref;
23 24 use std::ops::Index;
24 25 use std::slice;
25 26
26 27 #[derive(Debug, PartialEq)]
27 28 pub enum NodeMapError {
28 29 MultipleResults,
29 30 InvalidNodePrefix(NodeError),
30 31 /// A `Revision` stored in the nodemap could not be found in the index
31 32 RevisionNotInIndex(Revision),
32 33 }
33 34
34 35 impl From<NodeError> for NodeMapError {
35 36 fn from(err: NodeError) -> Self {
36 37 NodeMapError::InvalidNodePrefix(err)
37 38 }
38 39 }
39 40
40 41 /// Mapping system from Mercurial nodes to revision numbers.
41 42 ///
42 43 /// ## `RevlogIndex` and `NodeMap`
43 44 ///
44 45 /// One way to think about their relationship is that
45 46 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
46 47 /// carried by a [`RevlogIndex`].
47 48 ///
48 49 /// Many of the methods in this trait take a `RevlogIndex` argument
49 50 /// which is used for validation of their results. This index must naturally
50 51 /// be the one the `NodeMap` is about, and it must be consistent.
51 52 ///
52 53 /// Notably, the `NodeMap` must not store
53 54 /// information about more `Revision` values than there are in the index.
54 55 /// In these methods, an encountered `Revision` is not in the index, a
55 56 /// [`RevisionNotInIndex`] error is returned.
56 57 ///
57 58 /// In insert operations, the rule is thus that the `NodeMap` must always
58 59 /// be updated after the `RevlogIndex`
59 60 /// be updated first, and the `NodeMap` second.
60 61 ///
61 62 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
62 63 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
63 64 pub trait NodeMap {
64 65 /// Find the unique `Revision` having the given `Node`
65 66 ///
66 67 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
67 68 fn find_node(
68 69 &self,
69 70 index: &impl RevlogIndex,
70 71 node: &Node,
71 72 ) -> Result<Option<Revision>, NodeMapError> {
72 73 self.find_bin(index, node.into())
73 74 }
74 75
75 76 /// Find the unique Revision whose `Node` starts with a given binary prefix
76 77 ///
77 78 /// If no Revision matches the given prefix, `Ok(None)` is returned.
78 79 ///
79 80 /// If several Revisions match the given prefix, a [`MultipleResults`]
80 81 /// error is returned.
81 82 fn find_bin<'a>(
82 83 &self,
83 84 idx: &impl RevlogIndex,
84 85 prefix: NodePrefixRef<'a>,
85 86 ) -> Result<Option<Revision>, NodeMapError>;
86 87
87 88 /// Find the unique Revision whose `Node` hexadecimal string representation
88 89 /// starts with a given prefix
89 90 ///
90 91 /// If no Revision matches the given prefix, `Ok(None)` is returned.
91 92 ///
92 93 /// If several Revisions match the given prefix, a [`MultipleResults`]
93 94 /// error is returned.
94 95 fn find_hex(
95 96 &self,
96 97 idx: &impl RevlogIndex,
97 98 prefix: &str,
98 99 ) -> Result<Option<Revision>, NodeMapError> {
99 100 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
100 101 }
102
103 /// Give the size of the shortest node prefix that determines
104 /// the revision uniquely.
105 ///
106 /// From a binary node prefix, if it is matched in the node map, this
107 /// returns the number of hexadecimal digits that would had sufficed
108 /// to find the revision uniquely.
109 ///
110 /// Returns `None` if no `Revision` could be found for the prefix.
111 ///
112 /// If several Revisions match the given prefix, a [`MultipleResults`]
113 /// error is returned.
114 fn unique_prefix_len_bin<'a>(
115 &self,
116 idx: &impl RevlogIndex,
117 node_prefix: NodePrefixRef<'a>,
118 ) -> Result<Option<usize>, NodeMapError>;
119
120 /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
121 /// of the prefix as input.
122 fn unique_prefix_len_hex(
123 &self,
124 idx: &impl RevlogIndex,
125 prefix: &str,
126 ) -> Result<Option<usize>, NodeMapError> {
127 self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
128 }
129
130 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
131 fn unique_prefix_len_node(
132 &self,
133 idx: &impl RevlogIndex,
134 node: &Node,
135 ) -> Result<Option<usize>, NodeMapError> {
136 self.unique_prefix_len_bin(idx, node.into())
137 }
101 138 }
102 139
103 140 pub trait MutableNodeMap: NodeMap {
104 141 fn insert<I: RevlogIndex>(
105 142 &mut self,
106 143 index: &I,
107 144 node: &Node,
108 145 rev: Revision,
109 146 ) -> Result<(), NodeMapError>;
110 147 }
111 148
112 149 /// Low level NodeTree [`Blocks`] elements
113 150 ///
114 151 /// These are exactly as for instance on persistent storage.
115 152 type RawElement = i32;
116 153
117 154 /// High level representation of values in NodeTree
118 155 /// [`Blocks`](struct.Block.html)
119 156 ///
120 157 /// This is the high level representation that most algorithms should
121 158 /// use.
122 159 #[derive(Clone, Debug, Eq, PartialEq)]
123 160 enum Element {
124 161 Rev(Revision),
125 162 Block(usize),
126 163 None,
127 164 }
128 165
129 166 impl From<RawElement> for Element {
130 167 /// Conversion from low level representation, after endianness conversion.
131 168 ///
132 169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
133 170 fn from(raw: RawElement) -> Element {
134 171 if raw >= 0 {
135 172 Element::Block(raw as usize)
136 173 } else if raw == -1 {
137 174 Element::None
138 175 } else {
139 176 Element::Rev(-raw - 2)
140 177 }
141 178 }
142 179 }
143 180
144 181 impl From<Element> for RawElement {
145 182 fn from(element: Element) -> RawElement {
146 183 match element {
147 184 Element::None => 0,
148 185 Element::Block(i) => i as RawElement,
149 186 Element::Rev(rev) => -rev - 2,
150 187 }
151 188 }
152 189 }
153 190
154 191 /// A logical block of the `NodeTree`, packed with a fixed size.
155 192 ///
156 193 /// These are always used in container types implementing `Index<Block>`,
157 194 /// such as `&Block`
158 195 ///
159 196 /// As an array of integers, its ith element encodes that the
160 197 /// ith potential edge from the block, representing the ith hexadecimal digit
161 198 /// (nybble) `i` is either:
162 199 ///
163 200 /// - absent (value -1)
164 201 /// - another `Block` in the same indexable container (value β‰₯ 0)
165 202 /// - a `Revision` leaf (value ≀ -2)
166 203 ///
167 204 /// Endianness has to be fixed for consistency on shared storage across
168 205 /// different architectures.
169 206 ///
170 207 /// A key difference with the C `nodetree` is that we need to be
171 208 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
172 209 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
173 210 ///
174 211 /// Another related difference is that `NULL_REVISION` (-1) is not
175 212 /// represented at all, because we want an immutable empty nodetree
176 213 /// to be valid.
177 214
178 215 #[derive(Copy, Clone)]
179 216 pub struct Block([u8; BLOCK_SIZE]);
180 217
181 218 /// Not derivable for arrays of length >32 until const generics are stable
182 219 impl PartialEq for Block {
183 220 fn eq(&self, other: &Self) -> bool {
184 221 &self.0[..] == &other.0[..]
185 222 }
186 223 }
187 224
188 225 pub const BLOCK_SIZE: usize = 64;
189 226
190 227 impl Block {
191 228 fn new() -> Self {
192 229 // -1 in 2's complement to create an absent node
193 230 let byte: u8 = 255;
194 231 Block([byte; BLOCK_SIZE])
195 232 }
196 233
197 234 fn get(&self, nybble: u8) -> Element {
198 235 let index = nybble as usize * mem::size_of::<RawElement>();
199 236 Element::from(RawElement::from_be_bytes([
200 237 self.0[index],
201 238 self.0[index + 1],
202 239 self.0[index + 2],
203 240 self.0[index + 3],
204 241 ]))
205 242 }
206 243
207 244 fn set(&mut self, nybble: u8, element: Element) {
208 245 let values = RawElement::to_be_bytes(element.into());
209 246 let index = nybble as usize * mem::size_of::<RawElement>();
210 247 self.0[index] = values[0];
211 248 self.0[index + 1] = values[1];
212 249 self.0[index + 2] = values[2];
213 250 self.0[index + 3] = values[3];
214 251 }
215 252 }
216 253
217 254 impl fmt::Debug for Block {
218 255 /// sparse representation for testing and debugging purposes
219 256 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
220 257 f.debug_map()
221 258 .entries((0..16).filter_map(|i| match self.get(i) {
222 259 Element::None => None,
223 260 element => Some((i, element)),
224 261 }))
225 262 .finish()
226 263 }
227 264 }
228 265
229 266 /// A mutable 16-radix tree with the root block logically at the end
230 267 ///
231 268 /// Because of the append only nature of our node trees, we need to
232 269 /// keep the original untouched and store new blocks separately.
233 270 ///
234 271 /// The mutable root `Block` is kept apart so that we don't have to rebump
235 272 /// it on each insertion.
236 273 pub struct NodeTree {
237 274 readonly: Box<dyn Deref<Target = [Block]> + Send>,
238 275 growable: Vec<Block>,
239 276 root: Block,
240 277 }
241 278
242 279 impl Index<usize> for NodeTree {
243 280 type Output = Block;
244 281
245 282 fn index(&self, i: usize) -> &Block {
246 283 let ro_len = self.readonly.len();
247 284 if i < ro_len {
248 285 &self.readonly[i]
249 286 } else if i == ro_len + self.growable.len() {
250 287 &self.root
251 288 } else {
252 289 &self.growable[i - ro_len]
253 290 }
254 291 }
255 292 }
256 293
257 294 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
258 295 fn has_prefix_or_none(
259 296 idx: &impl RevlogIndex,
260 297 prefix: NodePrefixRef,
261 298 rev: Revision,
262 299 ) -> Result<Option<Revision>, NodeMapError> {
263 300 idx.node(rev)
264 301 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
265 302 .map(|node| {
266 303 if prefix.is_prefix_of(node) {
267 304 Some(rev)
268 305 } else {
269 306 None
270 307 }
271 308 })
272 309 }
273 310
274 311 /// validate that the candidate's node starts indeed with given prefix,
275 312 /// and treat ambiguities related to `NULL_REVISION`.
276 313 ///
277 314 /// From the data in the NodeTree, one can only conclude that some
278 315 /// revision is the only one for a *subprefix* of the one being looked up.
279 316 fn validate_candidate(
280 317 idx: &impl RevlogIndex,
281 318 prefix: NodePrefixRef,
282 rev: Option<Revision>,
283 ) -> Result<Option<Revision>, NodeMapError> {
284 if prefix.is_prefix_of(&NULL_NODE) {
285 // NULL_REVISION always matches a prefix made only of zeros
319 candidate: (Option<Revision>, usize),
320 ) -> Result<(Option<Revision>, usize), NodeMapError> {
321 let (rev, steps) = candidate;
322 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
323 rev.map_or(Ok((None, steps)), |r| {
324 has_prefix_or_none(idx, prefix, r)
325 .map(|opt| (opt, max(steps, nz_nybble + 1)))
326 })
327 } else {
328 // the prefix is only made of zeros; NULL_REVISION always matches it
286 329 // and any other *valid* result is an ambiguity
287 330 match rev {
288 None => Ok(Some(NULL_REVISION)),
331 None => Ok((Some(NULL_REVISION), steps + 1)),
289 332 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
290 None => Ok(Some(NULL_REVISION)),
333 None => Ok((Some(NULL_REVISION), steps + 1)),
291 334 _ => Err(NodeMapError::MultipleResults),
292 335 },
293 336 }
294 } else {
295 rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
296 337 }
297 338 }
298 339
299 340 impl NodeTree {
300 341 /// Initiate a NodeTree from an immutable slice-like of `Block`
301 342 ///
302 343 /// We keep `readonly` and clone its root block if it isn't empty.
303 344 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
304 345 let root = readonly
305 346 .last()
306 347 .map(|b| b.clone())
307 348 .unwrap_or_else(|| Block::new());
308 349 NodeTree {
309 350 readonly: readonly,
310 351 growable: Vec::new(),
311 352 root: root,
312 353 }
313 354 }
314 355
315 356 /// Create from an opaque bunch of bytes
316 357 ///
317 358 /// The created `NodeTreeBytes` from `buffer`,
318 359 /// of which exactly `amount` bytes are used.
319 360 ///
320 361 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
321 362 /// - `offset` allows for the final file format to include fixed data
322 363 /// (generation number, behavioural flags)
323 364 /// - `amount` is expressed in bytes, and is not automatically derived from
324 365 /// `bytes`, so that a caller that manages them atomically can perform
325 366 /// temporary disk serializations and still rollback easily if needed.
326 367 /// First use-case for this would be to support Mercurial shell hooks.
327 368 ///
328 369 /// panics if `buffer` is smaller than `amount`
329 370 pub fn load_bytes(
330 371 bytes: Box<dyn Deref<Target = [u8]> + Send>,
331 372 amount: usize,
332 373 ) -> Self {
333 374 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
334 375 }
335 376
336 377 /// Retrieve added `Block` and the original immutable data
337 378 pub fn into_readonly_and_added(
338 379 self,
339 380 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
340 381 let mut vec = self.growable;
341 382 let readonly = self.readonly;
342 383 if readonly.last() != Some(&self.root) {
343 384 vec.push(self.root);
344 385 }
345 386 (readonly, vec)
346 387 }
347 388
348 389 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
349 390 /// storage
350 391 pub fn into_readonly_and_added_bytes(
351 392 self,
352 393 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
353 394 let (readonly, vec) = self.into_readonly_and_added();
354 395 // Prevent running `v`'s destructor so we are in complete control
355 396 // of the allocation.
356 397 let vec = mem::ManuallyDrop::new(vec);
357 398
358 399 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
359 400 // bytes, so this is perfectly safe.
360 401 let bytes = unsafe {
361 402 // Assert that `Block` hasn't been changed and has no padding
362 403 let _: [u8; 4 * BLOCK_SIZE] =
363 404 std::mem::transmute([Block::new(); 4]);
364 405
365 406 // /!\ Any use of `vec` after this is use-after-free.
366 407 // TODO: use `into_raw_parts` once stabilized
367 408 Vec::from_raw_parts(
368 409 vec.as_ptr() as *mut u8,
369 410 vec.len() * BLOCK_SIZE,
370 411 vec.capacity() * BLOCK_SIZE,
371 412 )
372 413 };
373 414 (readonly, bytes)
374 415 }
375 416
376 417 /// Total number of blocks
377 418 fn len(&self) -> usize {
378 419 self.readonly.len() + self.growable.len() + 1
379 420 }
380 421
381 422 /// Implemented for completeness
382 423 ///
383 424 /// A `NodeTree` always has at least the mutable root block.
384 425 #[allow(dead_code)]
385 426 fn is_empty(&self) -> bool {
386 427 false
387 428 }
388 429
389 430 /// Main working method for `NodeTree` searches
390 fn lookup<'p>(
431 ///
432 /// The first returned value is the result of analysing `NodeTree` data
433 /// *alone*: whereas `None` guarantees that the given prefix is absent
434 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
435 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
436 /// that could match the prefix. Actually, all that can be inferred from
437 /// the `NodeTree` data is that `rev` is the revision with the longest
438 /// common node prefix with the given prefix.
439 ///
440 /// The second returned value is the size of the smallest subprefix
441 /// of `prefix` that would give the same result, i.e. not the
442 /// `MultipleResults` error variant (again, using only the data of the
443 /// `NodeTree`).
444 fn lookup(
391 445 &self,
392 prefix: NodePrefixRef<'p>,
393 ) -> Result<Option<Revision>, NodeMapError> {
394 for visit_item in self.visit(prefix) {
446 prefix: NodePrefixRef,
447 ) -> Result<(Option<Revision>, usize), NodeMapError> {
448 for (i, visit_item) in self.visit(prefix).enumerate() {
395 449 if let Some(opt) = visit_item.final_revision() {
396 return Ok(opt);
450 return Ok((opt, i + 1));
397 451 }
398 452 }
399 453 Err(NodeMapError::MultipleResults)
400 454 }
401 455
402 456 fn visit<'n, 'p>(
403 457 &'n self,
404 458 prefix: NodePrefixRef<'p>,
405 459 ) -> NodeTreeVisitor<'n, 'p> {
406 460 NodeTreeVisitor {
407 461 nt: self,
408 462 prefix: prefix,
409 463 visit: self.len() - 1,
410 464 nybble_idx: 0,
411 465 done: false,
412 466 }
413 467 }
414 468 /// Return a mutable reference for `Block` at index `idx`.
415 469 ///
416 470 /// If `idx` lies in the immutable area, then the reference is to
417 471 /// a newly appended copy.
418 472 ///
419 473 /// Returns (new_idx, glen, mut_ref) where
420 474 ///
421 475 /// - `new_idx` is the index of the mutable `Block`
422 476 /// - `mut_ref` is a mutable reference to the mutable Block.
423 477 /// - `glen` is the new length of `self.growable`
424 478 ///
425 479 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
426 480 /// itself because of the mutable borrow taken with the returned `Block`
427 481 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
428 482 let ro_blocks = &self.readonly;
429 483 let ro_len = ro_blocks.len();
430 484 let glen = self.growable.len();
431 485 if idx < ro_len {
432 486 // TODO OPTIM I think this makes two copies
433 487 self.growable.push(ro_blocks[idx].clone());
434 488 (glen + ro_len, &mut self.growable[glen], glen + 1)
435 489 } else if glen + ro_len == idx {
436 490 (idx, &mut self.root, glen)
437 491 } else {
438 492 (idx, &mut self.growable[idx - ro_len], glen)
439 493 }
440 494 }
441 495
442 496 /// Main insertion method
443 497 ///
444 498 /// This will dive in the node tree to find the deepest `Block` for
445 499 /// `node`, split it as much as needed and record `node` in there.
446 500 /// The method then backtracks, updating references in all the visited
447 501 /// blocks from the root.
448 502 ///
449 503 /// All the mutated `Block` are copied first to the growable part if
450 504 /// needed. That happens for those in the immutable part except the root.
451 505 pub fn insert<I: RevlogIndex>(
452 506 &mut self,
453 507 index: &I,
454 508 node: &Node,
455 509 rev: Revision,
456 510 ) -> Result<(), NodeMapError> {
457 511 let ro_len = &self.readonly.len();
458 512
459 513 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
460 514 let read_nybbles = visit_steps.len();
461 515 // visit_steps cannot be empty, since we always visit the root block
462 516 let deepest = visit_steps.pop().unwrap();
463 517
464 518 let (mut block_idx, mut block, mut glen) =
465 519 self.mutable_block(deepest.block_idx);
466 520
467 521 if let Element::Rev(old_rev) = deepest.element {
468 522 let old_node = index
469 523 .node(old_rev)
470 524 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
471 525 if old_node == node {
472 526 return Ok(()); // avoid creating lots of useless blocks
473 527 }
474 528
475 529 // Looping over the tail of nybbles in both nodes, creating
476 530 // new blocks until we find the difference
477 531 let mut new_block_idx = ro_len + glen;
478 532 let mut nybble = deepest.nybble;
479 533 for nybble_pos in read_nybbles..node.nybbles_len() {
480 534 block.set(nybble, Element::Block(new_block_idx));
481 535
482 536 let new_nybble = node.get_nybble(nybble_pos);
483 537 let old_nybble = old_node.get_nybble(nybble_pos);
484 538
485 539 if old_nybble == new_nybble {
486 540 self.growable.push(Block::new());
487 541 block = &mut self.growable[glen];
488 542 glen += 1;
489 543 new_block_idx += 1;
490 544 nybble = new_nybble;
491 545 } else {
492 546 let mut new_block = Block::new();
493 547 new_block.set(old_nybble, Element::Rev(old_rev));
494 548 new_block.set(new_nybble, Element::Rev(rev));
495 549 self.growable.push(new_block);
496 550 break;
497 551 }
498 552 }
499 553 } else {
500 554 // Free slot in the deepest block: no splitting has to be done
501 555 block.set(deepest.nybble, Element::Rev(rev));
502 556 }
503 557
504 558 // Backtrack over visit steps to update references
505 559 while let Some(visited) = visit_steps.pop() {
506 560 let to_write = Element::Block(block_idx);
507 561 if visit_steps.is_empty() {
508 562 self.root.set(visited.nybble, to_write);
509 563 break;
510 564 }
511 565 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
512 566 if block.get(visited.nybble) == to_write {
513 567 break;
514 568 }
515 569 block.set(visited.nybble, to_write);
516 570 block_idx = new_idx;
517 571 }
518 572 Ok(())
519 573 }
520 574 }
521 575
522 576 pub struct NodeTreeBytes {
523 577 buffer: Box<dyn Deref<Target = [u8]> + Send>,
524 578 len_in_blocks: usize,
525 579 }
526 580
527 581 impl NodeTreeBytes {
528 582 fn new(
529 583 buffer: Box<dyn Deref<Target = [u8]> + Send>,
530 584 amount: usize,
531 585 ) -> Self {
532 586 assert!(buffer.len() >= amount);
533 587 let len_in_blocks = amount / BLOCK_SIZE;
534 588 NodeTreeBytes {
535 589 buffer,
536 590 len_in_blocks,
537 591 }
538 592 }
539 593 }
540 594
541 595 impl Deref for NodeTreeBytes {
542 596 type Target = [Block];
543 597
544 598 fn deref(&self) -> &[Block] {
545 599 unsafe {
546 600 slice::from_raw_parts(
547 601 (&self.buffer).as_ptr() as *const Block,
548 602 self.len_in_blocks,
549 603 )
550 604 }
551 605 }
552 606 }
553 607
554 608 struct NodeTreeVisitor<'n, 'p> {
555 609 nt: &'n NodeTree,
556 610 prefix: NodePrefixRef<'p>,
557 611 visit: usize,
558 612 nybble_idx: usize,
559 613 done: bool,
560 614 }
561 615
562 616 #[derive(Debug, PartialEq, Clone)]
563 617 struct NodeTreeVisitItem {
564 618 block_idx: usize,
565 619 nybble: u8,
566 620 element: Element,
567 621 }
568 622
569 623 impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
570 624 type Item = NodeTreeVisitItem;
571 625
572 626 fn next(&mut self) -> Option<Self::Item> {
573 627 if self.done || self.nybble_idx >= self.prefix.len() {
574 628 return None;
575 629 }
576 630
577 631 let nybble = self.prefix.get_nybble(self.nybble_idx);
578 632 self.nybble_idx += 1;
579 633
580 634 let visit = self.visit;
581 635 let element = self.nt[visit].get(nybble);
582 636 if let Element::Block(idx) = element {
583 637 self.visit = idx;
584 638 } else {
585 639 self.done = true;
586 640 }
587 641
588 642 Some(NodeTreeVisitItem {
589 643 block_idx: visit,
590 644 nybble: nybble,
591 645 element: element,
592 646 })
593 647 }
594 648 }
595 649
596 650 impl NodeTreeVisitItem {
597 651 // Return `Some(opt)` if this item is final, with `opt` being the
598 652 // `Revision` that it may represent.
599 653 //
600 654 // If the item is not terminal, return `None`
601 655 fn final_revision(&self) -> Option<Option<Revision>> {
602 656 match self.element {
603 657 Element::Block(_) => None,
604 658 Element::Rev(r) => Some(Some(r)),
605 659 Element::None => Some(None),
606 660 }
607 661 }
608 662 }
609 663
610 664 impl From<Vec<Block>> for NodeTree {
611 665 fn from(vec: Vec<Block>) -> Self {
612 666 Self::new(Box::new(vec))
613 667 }
614 668 }
615 669
616 670 impl fmt::Debug for NodeTree {
617 671 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
618 672 let readonly: &[Block] = &*self.readonly;
619 673 write!(
620 674 f,
621 675 "readonly: {:?}, growable: {:?}, root: {:?}",
622 676 readonly, self.growable, self.root
623 677 )
624 678 }
625 679 }
626 680
627 681 impl Default for NodeTree {
628 682 /// Create a fully mutable empty NodeTree
629 683 fn default() -> Self {
630 684 NodeTree::new(Box::new(Vec::new()))
631 685 }
632 686 }
633 687
634 688 impl NodeMap for NodeTree {
635 689 fn find_bin<'a>(
636 690 &self,
637 691 idx: &impl RevlogIndex,
638 692 prefix: NodePrefixRef<'a>,
639 693 ) -> Result<Option<Revision>, NodeMapError> {
640 694 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
695 .map(|(opt, _shortest)| opt)
696 }
697
698 fn unique_prefix_len_bin<'a>(
699 &self,
700 idx: &impl RevlogIndex,
701 prefix: NodePrefixRef<'a>,
702 ) -> Result<Option<usize>, NodeMapError> {
703 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
704 .map(|(opt, shortest)| opt.map(|_rev| shortest))
641 705 }
642 706 }
643 707
644 708 #[cfg(test)]
645 709 mod tests {
646 710 use super::NodeMapError::*;
647 711 use super::*;
648 712 use crate::revlog::node::{hex_pad_right, Node};
649 713 use std::collections::HashMap;
650 714
651 715 /// Creates a `Block` using a syntax close to the `Debug` output
652 716 macro_rules! block {
653 717 {$($nybble:tt : $variant:ident($val:tt)),*} => (
654 718 {
655 719 let mut block = Block::new();
656 720 $(block.set($nybble, Element::$variant($val)));*;
657 721 block
658 722 }
659 723 )
660 724 }
661 725
662 726 #[test]
663 727 fn test_block_debug() {
664 728 let mut block = Block::new();
665 729 block.set(1, Element::Rev(3));
666 730 block.set(10, Element::Block(0));
667 731 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
668 732 }
669 733
670 734 #[test]
671 735 fn test_block_macro() {
672 736 let block = block! {5: Block(2)};
673 737 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
674 738
675 739 let block = block! {13: Rev(15), 5: Block(2)};
676 740 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
677 741 }
678 742
679 743 #[test]
680 744 fn test_raw_block() {
681 745 let mut raw = [255u8; 64];
682 746
683 747 let mut counter = 0;
684 748 for val in [0, 15, -2, -1, -3].iter() {
685 749 for byte in RawElement::to_be_bytes(*val).iter() {
686 750 raw[counter] = *byte;
687 751 counter += 1;
688 752 }
689 753 }
690 754 let block = Block(raw);
691 755 assert_eq!(block.get(0), Element::Block(0));
692 756 assert_eq!(block.get(1), Element::Block(15));
693 757 assert_eq!(block.get(3), Element::None);
694 758 assert_eq!(block.get(2), Element::Rev(0));
695 759 assert_eq!(block.get(4), Element::Rev(1));
696 760 }
697 761
698 762 type TestIndex = HashMap<Revision, Node>;
699 763
700 764 impl RevlogIndex for TestIndex {
701 765 fn node(&self, rev: Revision) -> Option<&Node> {
702 766 self.get(&rev)
703 767 }
704 768
705 769 fn len(&self) -> usize {
706 770 self.len()
707 771 }
708 772 }
709 773
710 774 /// Pad hexadecimal Node prefix with zeros on the right
711 775 ///
712 776 /// This avoids having to repeatedly write very long hexadecimal
713 777 /// strings for test data, and brings actual hash size independency.
714 778 #[cfg(test)]
715 779 fn pad_node(hex: &str) -> Node {
716 780 Node::from_hex(&hex_pad_right(hex)).unwrap()
717 781 }
718 782
719 783 /// Pad hexadecimal Node prefix with zeros on the right, then insert
720 784 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
721 785 idx.insert(rev, pad_node(hex));
722 786 }
723 787
724 788 fn sample_nodetree() -> NodeTree {
725 789 NodeTree::from(vec![
726 790 block![0: Rev(9)],
727 791 block![0: Rev(0), 1: Rev(9)],
728 792 block![0: Block(1), 1:Rev(1)],
729 793 ])
730 794 }
731 795
732 796 #[test]
733 797 fn test_nt_debug() {
734 798 let nt = sample_nodetree();
735 799 assert_eq!(
736 800 format!("{:?}", nt),
737 801 "readonly: \
738 802 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
739 803 growable: [], \
740 804 root: {0: Block(1), 1: Rev(1)}",
741 805 );
742 806 }
743 807
744 808 #[test]
745 809 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
746 810 let mut idx: TestIndex = HashMap::new();
747 811 pad_insert(&mut idx, 1, "1234deadcafe");
748 812
749 813 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
750 814 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
751 815 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
752 816 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
753 817 assert_eq!(nt.find_hex(&idx, "1a")?, None);
754 818 assert_eq!(nt.find_hex(&idx, "ab")?, None);
755 819
756 820 // and with full binary Nodes
757 821 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
758 822 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
759 823 assert_eq!(nt.find_node(&idx, &unknown)?, None);
760 824 Ok(())
761 825 }
762 826
763 827 #[test]
764 828 fn test_immutable_find_one_jump() {
765 829 let mut idx = TestIndex::new();
766 830 pad_insert(&mut idx, 9, "012");
767 831 pad_insert(&mut idx, 0, "00a");
768 832
769 833 let nt = sample_nodetree();
770 834
771 835 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
772 836 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
773 837 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
774 838 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
839 assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
775 840 assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
776 841 }
777 842
778 843 #[test]
779 844 fn test_mutated_find() -> Result<(), NodeMapError> {
780 845 let mut idx = TestIndex::new();
781 846 pad_insert(&mut idx, 9, "012");
782 847 pad_insert(&mut idx, 0, "00a");
783 848 pad_insert(&mut idx, 2, "cafe");
784 849 pad_insert(&mut idx, 3, "15");
785 850 pad_insert(&mut idx, 1, "10");
786 851
787 852 let nt = NodeTree {
788 853 readonly: sample_nodetree().readonly,
789 854 growable: vec![block![0: Rev(1), 5: Rev(3)]],
790 855 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
791 856 };
792 857 assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
793 858 assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
859 assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
794 860 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
795 861 assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
862 assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
796 863 assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
797 864 Ok(())
798 865 }
799 866
800 867 struct TestNtIndex {
801 868 index: TestIndex,
802 869 nt: NodeTree,
803 870 }
804 871
805 872 impl TestNtIndex {
806 873 fn new() -> Self {
807 874 TestNtIndex {
808 875 index: HashMap::new(),
809 876 nt: NodeTree::default(),
810 877 }
811 878 }
812 879
813 880 fn insert(
814 881 &mut self,
815 882 rev: Revision,
816 883 hex: &str,
817 884 ) -> Result<(), NodeMapError> {
818 885 let node = pad_node(hex);
819 886 self.index.insert(rev, node.clone());
820 887 self.nt.insert(&self.index, &node, rev)?;
821 888 Ok(())
822 889 }
823 890
824 891 fn find_hex(
825 892 &self,
826 893 prefix: &str,
827 894 ) -> Result<Option<Revision>, NodeMapError> {
828 895 self.nt.find_hex(&self.index, prefix)
829 896 }
830 897
898 fn unique_prefix_len_hex(
899 &self,
900 prefix: &str,
901 ) -> Result<Option<usize>, NodeMapError> {
902 self.nt.unique_prefix_len_hex(&self.index, prefix)
903 }
904
831 905 /// Drain `added` and restart a new one
832 906 fn commit(self) -> Self {
833 907 let mut as_vec: Vec<Block> =
834 908 self.nt.readonly.iter().map(|block| block.clone()).collect();
835 909 as_vec.extend(self.nt.growable);
836 910 as_vec.push(self.nt.root);
837 911
838 912 Self {
839 913 index: self.index,
840 914 nt: NodeTree::from(as_vec).into(),
841 915 }
842 916 }
843 917 }
844 918
845 919 #[test]
846 920 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
847 921 let mut idx = TestNtIndex::new();
848 922 idx.insert(0, "1234")?;
849 923 assert_eq!(idx.find_hex("1")?, Some(0));
850 924 assert_eq!(idx.find_hex("12")?, Some(0));
851 925
852 926 // let's trigger a simple split
853 927 idx.insert(1, "1a34")?;
854 928 assert_eq!(idx.nt.growable.len(), 1);
855 929 assert_eq!(idx.find_hex("12")?, Some(0));
856 930 assert_eq!(idx.find_hex("1a")?, Some(1));
857 931
858 932 // reinserting is a no_op
859 933 idx.insert(1, "1a34")?;
860 934 assert_eq!(idx.nt.growable.len(), 1);
861 935 assert_eq!(idx.find_hex("12")?, Some(0));
862 936 assert_eq!(idx.find_hex("1a")?, Some(1));
863 937
864 938 idx.insert(2, "1a01")?;
865 939 assert_eq!(idx.nt.growable.len(), 2);
866 940 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
867 941 assert_eq!(idx.find_hex("12")?, Some(0));
868 942 assert_eq!(idx.find_hex("1a3")?, Some(1));
869 943 assert_eq!(idx.find_hex("1a0")?, Some(2));
870 944 assert_eq!(idx.find_hex("1a12")?, None);
871 945
872 946 // now let's make it split and create more than one additional block
873 947 idx.insert(3, "1a345")?;
874 948 assert_eq!(idx.nt.growable.len(), 4);
875 949 assert_eq!(idx.find_hex("1a340")?, Some(1));
876 950 assert_eq!(idx.find_hex("1a345")?, Some(3));
877 951 assert_eq!(idx.find_hex("1a341")?, None);
878 952
879 953 Ok(())
880 954 }
881 955
882 956 #[test]
957 fn test_unique_prefix_len_zero_prefix() {
958 let mut idx = TestNtIndex::new();
959 idx.insert(0, "00000abcd").unwrap();
960
961 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
962 // in the nodetree proper, this will be found at the first nybble
963 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
964 // but the first difference with `NULL_NODE`
965 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
966 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
967
968 // same with odd result
969 idx.insert(1, "00123").unwrap();
970 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
971 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
972
973 // these are unchanged of course
974 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
975 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
976 }
977
978 #[test]
883 979 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
884 980 // check that the splitting loop is long enough
885 981 let mut nt_idx = TestNtIndex::new();
886 982 let nt = &mut nt_idx.nt;
887 983 let idx = &mut nt_idx.index;
888 984
889 985 let node0_hex = hex_pad_right("444444");
890 986 let mut node1_hex = hex_pad_right("444444").clone();
891 987 node1_hex.pop();
892 988 node1_hex.push('5');
893 989 let node0 = Node::from_hex(&node0_hex).unwrap();
894 990 let node1 = Node::from_hex(&node1_hex).unwrap();
895 991
896 992 idx.insert(0, node0.clone());
897 993 nt.insert(idx, &node0, 0)?;
898 994 idx.insert(1, node1.clone());
899 995 nt.insert(idx, &node1, 1)?;
900 996
901 997 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
902 998 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
903 999 Ok(())
904 1000 }
905 1001
906 1002 #[test]
907 1003 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
908 1004 let mut idx = TestNtIndex::new();
909 1005 idx.insert(0, "1234")?;
910 1006 idx.insert(1, "1235")?;
911 1007 idx.insert(2, "131")?;
912 1008 idx.insert(3, "cafe")?;
913 1009 let mut idx = idx.commit();
914 1010 assert_eq!(idx.find_hex("1234")?, Some(0));
915 1011 assert_eq!(idx.find_hex("1235")?, Some(1));
916 1012 assert_eq!(idx.find_hex("131")?, Some(2));
917 1013 assert_eq!(idx.find_hex("cafe")?, Some(3));
918 1014
919 1015 idx.insert(4, "123A")?;
920 1016 assert_eq!(idx.find_hex("1234")?, Some(0));
921 1017 assert_eq!(idx.find_hex("1235")?, Some(1));
922 1018 assert_eq!(idx.find_hex("131")?, Some(2));
923 1019 assert_eq!(idx.find_hex("cafe")?, Some(3));
924 1020 assert_eq!(idx.find_hex("123A")?, Some(4));
925 1021
926 1022 idx.insert(5, "c0")?;
927 1023 assert_eq!(idx.find_hex("cafe")?, Some(3));
928 1024 assert_eq!(idx.find_hex("c0")?, Some(5));
929 1025 assert_eq!(idx.find_hex("c1")?, None);
930 1026 assert_eq!(idx.find_hex("1234")?, Some(0));
931 1027
932 1028 Ok(())
933 1029 }
934 1030
935 1031 #[test]
936 1032 fn test_into_added_empty() {
937 1033 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
938 1034 assert!(sample_nodetree()
939 1035 .into_readonly_and_added_bytes()
940 1036 .1
941 1037 .is_empty());
942 1038 }
943 1039
944 1040 #[test]
945 1041 fn test_into_added_bytes() -> Result<(), NodeMapError> {
946 1042 let mut idx = TestNtIndex::new();
947 1043 idx.insert(0, "1234")?;
948 1044 let mut idx = idx.commit();
949 1045 idx.insert(4, "cafe")?;
950 1046 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
951 1047
952 1048 // only the root block has been changed
953 1049 assert_eq!(bytes.len(), BLOCK_SIZE);
954 1050 // big endian for -2
955 1051 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
956 1052 // big endian for -6
957 1053 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
958 1054 Ok(())
959 1055 }
960 1056 }
General Comments 0
You need to be logged in to leave comments. Login now