##// END OF EJS Templates
rust-nodemap: special case for prefixes of NULL_NODE...
Georges Racinet -
r44871:00d251d3 default
parent child Browse files
Show More
@@ -1,936 +1,960 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 Node, NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex,
16 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
17 RevlogIndex, NULL_REVISION,
17 18 };
18 19
19 20 use std::fmt;
20 21 use std::mem;
21 22 use std::ops::Deref;
22 23 use std::ops::Index;
23 24 use std::slice;
24 25
25 26 #[derive(Debug, PartialEq)]
26 27 pub enum NodeMapError {
27 28 MultipleResults,
28 29 InvalidNodePrefix(NodeError),
29 30 /// A `Revision` stored in the nodemap could not be found in the index
30 31 RevisionNotInIndex(Revision),
31 32 }
32 33
33 34 impl From<NodeError> for NodeMapError {
34 35 fn from(err: NodeError) -> Self {
35 36 NodeMapError::InvalidNodePrefix(err)
36 37 }
37 38 }
38 39
39 40 /// Mapping system from Mercurial nodes to revision numbers.
40 41 ///
41 42 /// ## `RevlogIndex` and `NodeMap`
42 43 ///
43 44 /// One way to think about their relationship is that
44 45 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
45 46 /// carried by a [`RevlogIndex`].
46 47 ///
47 48 /// Many of the methods in this trait take a `RevlogIndex` argument
48 49 /// which is used for validation of their results. This index must naturally
49 50 /// be the one the `NodeMap` is about, and it must be consistent.
50 51 ///
51 52 /// Notably, the `NodeMap` must not store
52 53 /// information about more `Revision` values than there are in the index.
53 54 /// In these methods, an encountered `Revision` is not in the index, a
54 55 /// [`RevisionNotInIndex`] error is returned.
55 56 ///
56 57 /// In insert operations, the rule is thus that the `NodeMap` must always
57 58 /// be updated after the `RevlogIndex`
58 59 /// be updated first, and the `NodeMap` second.
59 60 ///
60 61 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
61 62 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
62 63 pub trait NodeMap {
63 64 /// Find the unique `Revision` having the given `Node`
64 65 ///
65 66 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
66 67 fn find_node(
67 68 &self,
68 69 index: &impl RevlogIndex,
69 70 node: &Node,
70 71 ) -> Result<Option<Revision>, NodeMapError> {
71 72 self.find_bin(index, node.into())
72 73 }
73 74
74 75 /// Find the unique Revision whose `Node` starts with a given binary prefix
75 76 ///
76 77 /// If no Revision matches the given prefix, `Ok(None)` is returned.
77 78 ///
78 79 /// If several Revisions match the given prefix, a [`MultipleResults`]
79 80 /// error is returned.
80 81 fn find_bin<'a>(
81 82 &self,
82 83 idx: &impl RevlogIndex,
83 84 prefix: NodePrefixRef<'a>,
84 85 ) -> Result<Option<Revision>, NodeMapError>;
85 86
86 87 /// Find the unique Revision whose `Node` hexadecimal string representation
87 88 /// starts with a given prefix
88 89 ///
89 90 /// If no Revision matches the given prefix, `Ok(None)` is returned.
90 91 ///
91 92 /// If several Revisions match the given prefix, a [`MultipleResults`]
92 93 /// error is returned.
93 94 fn find_hex(
94 95 &self,
95 96 idx: &impl RevlogIndex,
96 97 prefix: &str,
97 98 ) -> Result<Option<Revision>, NodeMapError> {
98 99 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
99 100 }
100 101 }
101 102
102 103 pub trait MutableNodeMap: NodeMap {
103 104 fn insert<I: RevlogIndex>(
104 105 &mut self,
105 106 index: &I,
106 107 node: &Node,
107 108 rev: Revision,
108 109 ) -> Result<(), NodeMapError>;
109 110 }
110 111
111 112 /// Low level NodeTree [`Blocks`] elements
112 113 ///
113 114 /// These are exactly as for instance on persistent storage.
114 115 type RawElement = i32;
115 116
116 117 /// High level representation of values in NodeTree
117 118 /// [`Blocks`](struct.Block.html)
118 119 ///
119 120 /// This is the high level representation that most algorithms should
120 121 /// use.
121 122 #[derive(Clone, Debug, Eq, PartialEq)]
122 123 enum Element {
123 124 Rev(Revision),
124 125 Block(usize),
125 126 None,
126 127 }
127 128
128 129 impl From<RawElement> for Element {
129 130 /// Conversion from low level representation, after endianness conversion.
130 131 ///
131 132 /// See [`Block`](struct.Block.html) for explanation about the encoding.
132 133 fn from(raw: RawElement) -> Element {
133 134 if raw >= 0 {
134 135 Element::Block(raw as usize)
135 136 } else if raw == -1 {
136 137 Element::None
137 138 } else {
138 139 Element::Rev(-raw - 2)
139 140 }
140 141 }
141 142 }
142 143
143 144 impl From<Element> for RawElement {
144 145 fn from(element: Element) -> RawElement {
145 146 match element {
146 147 Element::None => 0,
147 148 Element::Block(i) => i as RawElement,
148 149 Element::Rev(rev) => -rev - 2,
149 150 }
150 151 }
151 152 }
152 153
153 154 /// A logical block of the `NodeTree`, packed with a fixed size.
154 155 ///
155 156 /// These are always used in container types implementing `Index<Block>`,
156 157 /// such as `&Block`
157 158 ///
158 159 /// As an array of integers, its ith element encodes that the
159 160 /// ith potential edge from the block, representing the ith hexadecimal digit
160 161 /// (nybble) `i` is either:
161 162 ///
162 163 /// - absent (value -1)
163 164 /// - another `Block` in the same indexable container (value β‰₯ 0)
164 165 /// - a `Revision` leaf (value ≀ -2)
165 166 ///
166 167 /// Endianness has to be fixed for consistency on shared storage across
167 168 /// different architectures.
168 169 ///
169 170 /// A key difference with the C `nodetree` is that we need to be
170 171 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
171 172 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
172 173 ///
173 174 /// Another related difference is that `NULL_REVISION` (-1) is not
174 175 /// represented at all, because we want an immutable empty nodetree
175 176 /// to be valid.
176 177
177 178 #[derive(Copy, Clone)]
178 179 pub struct Block([u8; BLOCK_SIZE]);
179 180
180 181 /// Not derivable for arrays of length >32 until const generics are stable
181 182 impl PartialEq for Block {
182 183 fn eq(&self, other: &Self) -> bool {
183 184 &self.0[..] == &other.0[..]
184 185 }
185 186 }
186 187
187 188 pub const BLOCK_SIZE: usize = 64;
188 189
189 190 impl Block {
190 191 fn new() -> Self {
191 192 // -1 in 2's complement to create an absent node
192 193 let byte: u8 = 255;
193 194 Block([byte; BLOCK_SIZE])
194 195 }
195 196
196 197 fn get(&self, nybble: u8) -> Element {
197 198 let index = nybble as usize * mem::size_of::<RawElement>();
198 199 Element::from(RawElement::from_be_bytes([
199 200 self.0[index],
200 201 self.0[index + 1],
201 202 self.0[index + 2],
202 203 self.0[index + 3],
203 204 ]))
204 205 }
205 206
206 207 fn set(&mut self, nybble: u8, element: Element) {
207 208 let values = RawElement::to_be_bytes(element.into());
208 209 let index = nybble as usize * mem::size_of::<RawElement>();
209 210 self.0[index] = values[0];
210 211 self.0[index + 1] = values[1];
211 212 self.0[index + 2] = values[2];
212 213 self.0[index + 3] = values[3];
213 214 }
214 215 }
215 216
216 217 impl fmt::Debug for Block {
217 218 /// sparse representation for testing and debugging purposes
218 219 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
219 220 f.debug_map()
220 221 .entries((0..16).filter_map(|i| match self.get(i) {
221 222 Element::None => None,
222 223 element => Some((i, element)),
223 224 }))
224 225 .finish()
225 226 }
226 227 }
227 228
228 229 /// A mutable 16-radix tree with the root block logically at the end
229 230 ///
230 231 /// Because of the append only nature of our node trees, we need to
231 232 /// keep the original untouched and store new blocks separately.
232 233 ///
233 234 /// The mutable root `Block` is kept apart so that we don't have to rebump
234 235 /// it on each insertion.
235 236 pub struct NodeTree {
236 237 readonly: Box<dyn Deref<Target = [Block]> + Send>,
237 238 growable: Vec<Block>,
238 239 root: Block,
239 240 }
240 241
241 242 impl Index<usize> for NodeTree {
242 243 type Output = Block;
243 244
244 245 fn index(&self, i: usize) -> &Block {
245 246 let ro_len = self.readonly.len();
246 247 if i < ro_len {
247 248 &self.readonly[i]
248 249 } else if i == ro_len + self.growable.len() {
249 250 &self.root
250 251 } else {
251 252 &self.growable[i - ro_len]
252 253 }
253 254 }
254 255 }
255 256
256 257 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
257 258 fn has_prefix_or_none(
258 259 idx: &impl RevlogIndex,
259 260 prefix: NodePrefixRef,
260 261 rev: Revision,
261 262 ) -> Result<Option<Revision>, NodeMapError> {
262 263 idx.node(rev)
263 264 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
264 265 .map(|node| {
265 266 if prefix.is_prefix_of(node) {
266 267 Some(rev)
267 268 } else {
268 269 None
269 270 }
270 271 })
271 272 }
272 273
274 /// validate that the candidate's node starts indeed with given prefix,
275 /// and treat ambiguities related to `NULL_REVISION`.
276 ///
277 /// From the data in the NodeTree, one can only conclude that some
278 /// revision is the only one for a *subprefix* of the one being looked up.
279 fn validate_candidate(
280 idx: &impl RevlogIndex,
281 prefix: NodePrefixRef,
282 rev: Option<Revision>,
283 ) -> Result<Option<Revision>, NodeMapError> {
284 if prefix.is_prefix_of(&NULL_NODE) {
285 // NULL_REVISION always matches a prefix made only of zeros
286 // and any other *valid* result is an ambiguity
287 match rev {
288 None => Ok(Some(NULL_REVISION)),
289 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
290 None => Ok(Some(NULL_REVISION)),
291 _ => Err(NodeMapError::MultipleResults),
292 },
293 }
294 } else {
295 rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
296 }
297 }
298
273 299 impl NodeTree {
274 300 /// Initiate a NodeTree from an immutable slice-like of `Block`
275 301 ///
276 302 /// We keep `readonly` and clone its root block if it isn't empty.
277 303 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
278 304 let root = readonly
279 305 .last()
280 306 .map(|b| b.clone())
281 307 .unwrap_or_else(|| Block::new());
282 308 NodeTree {
283 309 readonly: readonly,
284 310 growable: Vec::new(),
285 311 root: root,
286 312 }
287 313 }
288 314
289 315 /// Create from an opaque bunch of bytes
290 316 ///
291 317 /// The created `NodeTreeBytes` from `buffer`,
292 318 /// of which exactly `amount` bytes are used.
293 319 ///
294 320 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
295 321 /// - `offset` allows for the final file format to include fixed data
296 322 /// (generation number, behavioural flags)
297 323 /// - `amount` is expressed in bytes, and is not automatically derived from
298 324 /// `bytes`, so that a caller that manages them atomically can perform
299 325 /// temporary disk serializations and still rollback easily if needed.
300 326 /// First use-case for this would be to support Mercurial shell hooks.
301 327 ///
302 328 /// panics if `buffer` is smaller than `amount`
303 329 pub fn load_bytes(
304 330 bytes: Box<dyn Deref<Target = [u8]> + Send>,
305 331 amount: usize,
306 332 ) -> Self {
307 333 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
308 334 }
309 335
310 336 /// Retrieve added `Block` and the original immutable data
311 337 pub fn into_readonly_and_added(
312 338 self,
313 339 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
314 340 let mut vec = self.growable;
315 341 let readonly = self.readonly;
316 342 if readonly.last() != Some(&self.root) {
317 343 vec.push(self.root);
318 344 }
319 345 (readonly, vec)
320 346 }
321 347
322 348 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
323 349 /// storage
324 350 pub fn into_readonly_and_added_bytes(
325 351 self,
326 352 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
327 353 let (readonly, vec) = self.into_readonly_and_added();
328 354 // Prevent running `v`'s destructor so we are in complete control
329 355 // of the allocation.
330 356 let vec = mem::ManuallyDrop::new(vec);
331 357
332 358 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
333 359 // bytes, so this is perfectly safe.
334 360 let bytes = unsafe {
335 361 // Assert that `Block` hasn't been changed and has no padding
336 362 let _: [u8; 4 * BLOCK_SIZE] =
337 363 std::mem::transmute([Block::new(); 4]);
338 364
339 365 // /!\ Any use of `vec` after this is use-after-free.
340 366 // TODO: use `into_raw_parts` once stabilized
341 367 Vec::from_raw_parts(
342 368 vec.as_ptr() as *mut u8,
343 369 vec.len() * BLOCK_SIZE,
344 370 vec.capacity() * BLOCK_SIZE,
345 371 )
346 372 };
347 373 (readonly, bytes)
348 374 }
349 375
350 376 /// Total number of blocks
351 377 fn len(&self) -> usize {
352 378 self.readonly.len() + self.growable.len() + 1
353 379 }
354 380
355 381 /// Implemented for completeness
356 382 ///
357 383 /// A `NodeTree` always has at least the mutable root block.
358 384 #[allow(dead_code)]
359 385 fn is_empty(&self) -> bool {
360 386 false
361 387 }
362 388
363 389 /// Main working method for `NodeTree` searches
364 ///
365 /// This partial implementation lacks special cases for NULL_REVISION
366 390 fn lookup<'p>(
367 391 &self,
368 392 prefix: NodePrefixRef<'p>,
369 393 ) -> Result<Option<Revision>, NodeMapError> {
370 394 for visit_item in self.visit(prefix) {
371 395 if let Some(opt) = visit_item.final_revision() {
372 396 return Ok(opt);
373 397 }
374 398 }
375 399 Err(NodeMapError::MultipleResults)
376 400 }
377 401
378 402 fn visit<'n, 'p>(
379 403 &'n self,
380 404 prefix: NodePrefixRef<'p>,
381 405 ) -> NodeTreeVisitor<'n, 'p> {
382 406 NodeTreeVisitor {
383 407 nt: self,
384 408 prefix: prefix,
385 409 visit: self.len() - 1,
386 410 nybble_idx: 0,
387 411 done: false,
388 412 }
389 413 }
390 414 /// Return a mutable reference for `Block` at index `idx`.
391 415 ///
392 416 /// If `idx` lies in the immutable area, then the reference is to
393 417 /// a newly appended copy.
394 418 ///
395 419 /// Returns (new_idx, glen, mut_ref) where
396 420 ///
397 421 /// - `new_idx` is the index of the mutable `Block`
398 422 /// - `mut_ref` is a mutable reference to the mutable Block.
399 423 /// - `glen` is the new length of `self.growable`
400 424 ///
401 425 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
402 426 /// itself because of the mutable borrow taken with the returned `Block`
403 427 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
404 428 let ro_blocks = &self.readonly;
405 429 let ro_len = ro_blocks.len();
406 430 let glen = self.growable.len();
407 431 if idx < ro_len {
408 432 // TODO OPTIM I think this makes two copies
409 433 self.growable.push(ro_blocks[idx].clone());
410 434 (glen + ro_len, &mut self.growable[glen], glen + 1)
411 435 } else if glen + ro_len == idx {
412 436 (idx, &mut self.root, glen)
413 437 } else {
414 438 (idx, &mut self.growable[idx - ro_len], glen)
415 439 }
416 440 }
417 441
418 442 /// Main insertion method
419 443 ///
420 444 /// This will dive in the node tree to find the deepest `Block` for
421 445 /// `node`, split it as much as needed and record `node` in there.
422 446 /// The method then backtracks, updating references in all the visited
423 447 /// blocks from the root.
424 448 ///
425 449 /// All the mutated `Block` are copied first to the growable part if
426 450 /// needed. That happens for those in the immutable part except the root.
427 451 pub fn insert<I: RevlogIndex>(
428 452 &mut self,
429 453 index: &I,
430 454 node: &Node,
431 455 rev: Revision,
432 456 ) -> Result<(), NodeMapError> {
433 457 let ro_len = &self.readonly.len();
434 458
435 459 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
436 460 let read_nybbles = visit_steps.len();
437 461 // visit_steps cannot be empty, since we always visit the root block
438 462 let deepest = visit_steps.pop().unwrap();
439 463
440 464 let (mut block_idx, mut block, mut glen) =
441 465 self.mutable_block(deepest.block_idx);
442 466
443 467 if let Element::Rev(old_rev) = deepest.element {
444 468 let old_node = index
445 469 .node(old_rev)
446 470 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
447 471 if old_node == node {
448 472 return Ok(()); // avoid creating lots of useless blocks
449 473 }
450 474
451 475 // Looping over the tail of nybbles in both nodes, creating
452 476 // new blocks until we find the difference
453 477 let mut new_block_idx = ro_len + glen;
454 478 let mut nybble = deepest.nybble;
455 479 for nybble_pos in read_nybbles..node.nybbles_len() {
456 480 block.set(nybble, Element::Block(new_block_idx));
457 481
458 482 let new_nybble = node.get_nybble(nybble_pos);
459 483 let old_nybble = old_node.get_nybble(nybble_pos);
460 484
461 485 if old_nybble == new_nybble {
462 486 self.growable.push(Block::new());
463 487 block = &mut self.growable[glen];
464 488 glen += 1;
465 489 new_block_idx += 1;
466 490 nybble = new_nybble;
467 491 } else {
468 492 let mut new_block = Block::new();
469 493 new_block.set(old_nybble, Element::Rev(old_rev));
470 494 new_block.set(new_nybble, Element::Rev(rev));
471 495 self.growable.push(new_block);
472 496 break;
473 497 }
474 498 }
475 499 } else {
476 500 // Free slot in the deepest block: no splitting has to be done
477 501 block.set(deepest.nybble, Element::Rev(rev));
478 502 }
479 503
480 504 // Backtrack over visit steps to update references
481 505 while let Some(visited) = visit_steps.pop() {
482 506 let to_write = Element::Block(block_idx);
483 507 if visit_steps.is_empty() {
484 508 self.root.set(visited.nybble, to_write);
485 509 break;
486 510 }
487 511 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
488 512 if block.get(visited.nybble) == to_write {
489 513 break;
490 514 }
491 515 block.set(visited.nybble, to_write);
492 516 block_idx = new_idx;
493 517 }
494 518 Ok(())
495 519 }
496 520 }
497 521
498 522 pub struct NodeTreeBytes {
499 523 buffer: Box<dyn Deref<Target = [u8]> + Send>,
500 524 len_in_blocks: usize,
501 525 }
502 526
503 527 impl NodeTreeBytes {
504 528 fn new(
505 529 buffer: Box<dyn Deref<Target = [u8]> + Send>,
506 530 amount: usize,
507 531 ) -> Self {
508 532 assert!(buffer.len() >= amount);
509 533 let len_in_blocks = amount / BLOCK_SIZE;
510 534 NodeTreeBytes {
511 535 buffer,
512 536 len_in_blocks,
513 537 }
514 538 }
515 539 }
516 540
517 541 impl Deref for NodeTreeBytes {
518 542 type Target = [Block];
519 543
520 544 fn deref(&self) -> &[Block] {
521 545 unsafe {
522 546 slice::from_raw_parts(
523 547 (&self.buffer).as_ptr() as *const Block,
524 548 self.len_in_blocks,
525 549 )
526 550 }
527 551 }
528 552 }
529 553
530 554 struct NodeTreeVisitor<'n, 'p> {
531 555 nt: &'n NodeTree,
532 556 prefix: NodePrefixRef<'p>,
533 557 visit: usize,
534 558 nybble_idx: usize,
535 559 done: bool,
536 560 }
537 561
538 562 #[derive(Debug, PartialEq, Clone)]
539 563 struct NodeTreeVisitItem {
540 564 block_idx: usize,
541 565 nybble: u8,
542 566 element: Element,
543 567 }
544 568
545 569 impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
546 570 type Item = NodeTreeVisitItem;
547 571
548 572 fn next(&mut self) -> Option<Self::Item> {
549 573 if self.done || self.nybble_idx >= self.prefix.len() {
550 574 return None;
551 575 }
552 576
553 577 let nybble = self.prefix.get_nybble(self.nybble_idx);
554 578 self.nybble_idx += 1;
555 579
556 580 let visit = self.visit;
557 581 let element = self.nt[visit].get(nybble);
558 582 if let Element::Block(idx) = element {
559 583 self.visit = idx;
560 584 } else {
561 585 self.done = true;
562 586 }
563 587
564 588 Some(NodeTreeVisitItem {
565 589 block_idx: visit,
566 590 nybble: nybble,
567 591 element: element,
568 592 })
569 593 }
570 594 }
571 595
572 596 impl NodeTreeVisitItem {
573 597 // Return `Some(opt)` if this item is final, with `opt` being the
574 598 // `Revision` that it may represent.
575 599 //
576 600 // If the item is not terminal, return `None`
577 601 fn final_revision(&self) -> Option<Option<Revision>> {
578 602 match self.element {
579 603 Element::Block(_) => None,
580 604 Element::Rev(r) => Some(Some(r)),
581 605 Element::None => Some(None),
582 606 }
583 607 }
584 608 }
585 609
586 610 impl From<Vec<Block>> for NodeTree {
587 611 fn from(vec: Vec<Block>) -> Self {
588 612 Self::new(Box::new(vec))
589 613 }
590 614 }
591 615
592 616 impl fmt::Debug for NodeTree {
593 617 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
594 618 let readonly: &[Block] = &*self.readonly;
595 619 write!(
596 620 f,
597 621 "readonly: {:?}, growable: {:?}, root: {:?}",
598 622 readonly, self.growable, self.root
599 623 )
600 624 }
601 625 }
602 626
603 627 impl Default for NodeTree {
604 628 /// Create a fully mutable empty NodeTree
605 629 fn default() -> Self {
606 630 NodeTree::new(Box::new(Vec::new()))
607 631 }
608 632 }
609 633
610 634 impl NodeMap for NodeTree {
611 635 fn find_bin<'a>(
612 636 &self,
613 637 idx: &impl RevlogIndex,
614 638 prefix: NodePrefixRef<'a>,
615 639 ) -> Result<Option<Revision>, NodeMapError> {
616 self.lookup(prefix.clone()).and_then(|opt| {
617 opt.map_or(Ok(None), |rev| has_prefix_or_none(idx, prefix, rev))
618 })
640 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
619 641 }
620 642 }
621 643
622 644 #[cfg(test)]
623 645 mod tests {
624 646 use super::NodeMapError::*;
625 647 use super::*;
626 648 use crate::revlog::node::{hex_pad_right, Node};
627 649 use std::collections::HashMap;
628 650
629 651 /// Creates a `Block` using a syntax close to the `Debug` output
630 652 macro_rules! block {
631 653 {$($nybble:tt : $variant:ident($val:tt)),*} => (
632 654 {
633 655 let mut block = Block::new();
634 656 $(block.set($nybble, Element::$variant($val)));*;
635 657 block
636 658 }
637 659 )
638 660 }
639 661
640 662 #[test]
641 663 fn test_block_debug() {
642 664 let mut block = Block::new();
643 665 block.set(1, Element::Rev(3));
644 666 block.set(10, Element::Block(0));
645 667 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
646 668 }
647 669
648 670 #[test]
649 671 fn test_block_macro() {
650 672 let block = block! {5: Block(2)};
651 673 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
652 674
653 675 let block = block! {13: Rev(15), 5: Block(2)};
654 676 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
655 677 }
656 678
657 679 #[test]
658 680 fn test_raw_block() {
659 681 let mut raw = [255u8; 64];
660 682
661 683 let mut counter = 0;
662 684 for val in [0, 15, -2, -1, -3].iter() {
663 685 for byte in RawElement::to_be_bytes(*val).iter() {
664 686 raw[counter] = *byte;
665 687 counter += 1;
666 688 }
667 689 }
668 690 let block = Block(raw);
669 691 assert_eq!(block.get(0), Element::Block(0));
670 692 assert_eq!(block.get(1), Element::Block(15));
671 693 assert_eq!(block.get(3), Element::None);
672 694 assert_eq!(block.get(2), Element::Rev(0));
673 695 assert_eq!(block.get(4), Element::Rev(1));
674 696 }
675 697
676 698 type TestIndex = HashMap<Revision, Node>;
677 699
678 700 impl RevlogIndex for TestIndex {
679 701 fn node(&self, rev: Revision) -> Option<&Node> {
680 702 self.get(&rev)
681 703 }
682 704
683 705 fn len(&self) -> usize {
684 706 self.len()
685 707 }
686 708 }
687 709
688 710 /// Pad hexadecimal Node prefix with zeros on the right
689 711 ///
690 712 /// This avoids having to repeatedly write very long hexadecimal
691 713 /// strings for test data, and brings actual hash size independency.
692 714 #[cfg(test)]
693 715 fn pad_node(hex: &str) -> Node {
694 716 Node::from_hex(&hex_pad_right(hex)).unwrap()
695 717 }
696 718
697 719 /// Pad hexadecimal Node prefix with zeros on the right, then insert
698 720 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
699 721 idx.insert(rev, pad_node(hex));
700 722 }
701 723
702 724 fn sample_nodetree() -> NodeTree {
703 725 NodeTree::from(vec![
704 726 block![0: Rev(9)],
705 727 block![0: Rev(0), 1: Rev(9)],
706 728 block![0: Block(1), 1:Rev(1)],
707 729 ])
708 730 }
709 731
710 732 #[test]
711 733 fn test_nt_debug() {
712 734 let nt = sample_nodetree();
713 735 assert_eq!(
714 736 format!("{:?}", nt),
715 737 "readonly: \
716 738 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
717 739 growable: [], \
718 740 root: {0: Block(1), 1: Rev(1)}",
719 741 );
720 742 }
721 743
722 744 #[test]
723 745 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
724 746 let mut idx: TestIndex = HashMap::new();
725 747 pad_insert(&mut idx, 1, "1234deadcafe");
726 748
727 749 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
728 750 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
729 751 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
730 752 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
731 753 assert_eq!(nt.find_hex(&idx, "1a")?, None);
732 754 assert_eq!(nt.find_hex(&idx, "ab")?, None);
733 755
734 756 // and with full binary Nodes
735 757 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
736 758 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
737 759 assert_eq!(nt.find_node(&idx, &unknown)?, None);
738 760 Ok(())
739 761 }
740 762
741 763 #[test]
742 764 fn test_immutable_find_one_jump() {
743 765 let mut idx = TestIndex::new();
744 766 pad_insert(&mut idx, 9, "012");
745 767 pad_insert(&mut idx, 0, "00a");
746 768
747 769 let nt = sample_nodetree();
748 770
749 771 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
750 772 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
751 assert_eq!(nt.find_hex(&idx, "00"), Ok(Some(0)));
773 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
752 774 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
775 assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
753 776 }
754 777
755 778 #[test]
756 779 fn test_mutated_find() -> Result<(), NodeMapError> {
757 780 let mut idx = TestIndex::new();
758 781 pad_insert(&mut idx, 9, "012");
759 782 pad_insert(&mut idx, 0, "00a");
760 783 pad_insert(&mut idx, 2, "cafe");
761 784 pad_insert(&mut idx, 3, "15");
762 785 pad_insert(&mut idx, 1, "10");
763 786
764 787 let nt = NodeTree {
765 788 readonly: sample_nodetree().readonly,
766 789 growable: vec![block![0: Rev(1), 5: Rev(3)]],
767 790 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
768 791 };
769 792 assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
770 793 assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
771 assert_eq!(nt.find_hex(&idx, "00")?, Some(0));
794 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
795 assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
772 796 assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
773 797 Ok(())
774 798 }
775 799
776 800 struct TestNtIndex {
777 801 index: TestIndex,
778 802 nt: NodeTree,
779 803 }
780 804
781 805 impl TestNtIndex {
782 806 fn new() -> Self {
783 807 TestNtIndex {
784 808 index: HashMap::new(),
785 809 nt: NodeTree::default(),
786 810 }
787 811 }
788 812
789 813 fn insert(
790 814 &mut self,
791 815 rev: Revision,
792 816 hex: &str,
793 817 ) -> Result<(), NodeMapError> {
794 818 let node = pad_node(hex);
795 819 self.index.insert(rev, node.clone());
796 820 self.nt.insert(&self.index, &node, rev)?;
797 821 Ok(())
798 822 }
799 823
800 824 fn find_hex(
801 825 &self,
802 826 prefix: &str,
803 827 ) -> Result<Option<Revision>, NodeMapError> {
804 828 self.nt.find_hex(&self.index, prefix)
805 829 }
806 830
807 831 /// Drain `added` and restart a new one
808 832 fn commit(self) -> Self {
809 833 let mut as_vec: Vec<Block> =
810 834 self.nt.readonly.iter().map(|block| block.clone()).collect();
811 835 as_vec.extend(self.nt.growable);
812 836 as_vec.push(self.nt.root);
813 837
814 838 Self {
815 839 index: self.index,
816 840 nt: NodeTree::from(as_vec).into(),
817 841 }
818 842 }
819 843 }
820 844
821 845 #[test]
822 846 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
823 847 let mut idx = TestNtIndex::new();
824 848 idx.insert(0, "1234")?;
825 849 assert_eq!(idx.find_hex("1")?, Some(0));
826 850 assert_eq!(idx.find_hex("12")?, Some(0));
827 851
828 852 // let's trigger a simple split
829 853 idx.insert(1, "1a34")?;
830 854 assert_eq!(idx.nt.growable.len(), 1);
831 855 assert_eq!(idx.find_hex("12")?, Some(0));
832 856 assert_eq!(idx.find_hex("1a")?, Some(1));
833 857
834 858 // reinserting is a no_op
835 859 idx.insert(1, "1a34")?;
836 860 assert_eq!(idx.nt.growable.len(), 1);
837 861 assert_eq!(idx.find_hex("12")?, Some(0));
838 862 assert_eq!(idx.find_hex("1a")?, Some(1));
839 863
840 864 idx.insert(2, "1a01")?;
841 865 assert_eq!(idx.nt.growable.len(), 2);
842 866 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
843 867 assert_eq!(idx.find_hex("12")?, Some(0));
844 868 assert_eq!(idx.find_hex("1a3")?, Some(1));
845 869 assert_eq!(idx.find_hex("1a0")?, Some(2));
846 870 assert_eq!(idx.find_hex("1a12")?, None);
847 871
848 872 // now let's make it split and create more than one additional block
849 873 idx.insert(3, "1a345")?;
850 874 assert_eq!(idx.nt.growable.len(), 4);
851 875 assert_eq!(idx.find_hex("1a340")?, Some(1));
852 876 assert_eq!(idx.find_hex("1a345")?, Some(3));
853 877 assert_eq!(idx.find_hex("1a341")?, None);
854 878
855 879 Ok(())
856 880 }
857 881
858 882 #[test]
859 883 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
860 884 // check that the splitting loop is long enough
861 885 let mut nt_idx = TestNtIndex::new();
862 886 let nt = &mut nt_idx.nt;
863 887 let idx = &mut nt_idx.index;
864 888
865 889 let node0_hex = hex_pad_right("444444");
866 890 let mut node1_hex = hex_pad_right("444444").clone();
867 891 node1_hex.pop();
868 892 node1_hex.push('5');
869 893 let node0 = Node::from_hex(&node0_hex).unwrap();
870 894 let node1 = Node::from_hex(&node1_hex).unwrap();
871 895
872 896 idx.insert(0, node0.clone());
873 897 nt.insert(idx, &node0, 0)?;
874 898 idx.insert(1, node1.clone());
875 899 nt.insert(idx, &node1, 1)?;
876 900
877 901 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
878 902 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
879 903 Ok(())
880 904 }
881 905
882 906 #[test]
883 907 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
884 908 let mut idx = TestNtIndex::new();
885 909 idx.insert(0, "1234")?;
886 910 idx.insert(1, "1235")?;
887 911 idx.insert(2, "131")?;
888 912 idx.insert(3, "cafe")?;
889 913 let mut idx = idx.commit();
890 914 assert_eq!(idx.find_hex("1234")?, Some(0));
891 915 assert_eq!(idx.find_hex("1235")?, Some(1));
892 916 assert_eq!(idx.find_hex("131")?, Some(2));
893 917 assert_eq!(idx.find_hex("cafe")?, Some(3));
894 918
895 919 idx.insert(4, "123A")?;
896 920 assert_eq!(idx.find_hex("1234")?, Some(0));
897 921 assert_eq!(idx.find_hex("1235")?, Some(1));
898 922 assert_eq!(idx.find_hex("131")?, Some(2));
899 923 assert_eq!(idx.find_hex("cafe")?, Some(3));
900 924 assert_eq!(idx.find_hex("123A")?, Some(4));
901 925
902 926 idx.insert(5, "c0")?;
903 927 assert_eq!(idx.find_hex("cafe")?, Some(3));
904 928 assert_eq!(idx.find_hex("c0")?, Some(5));
905 929 assert_eq!(idx.find_hex("c1")?, None);
906 930 assert_eq!(idx.find_hex("1234")?, Some(0));
907 931
908 932 Ok(())
909 933 }
910 934
911 935 #[test]
912 936 fn test_into_added_empty() {
913 937 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
914 938 assert!(sample_nodetree()
915 939 .into_readonly_and_added_bytes()
916 940 .1
917 941 .is_empty());
918 942 }
919 943
920 944 #[test]
921 945 fn test_into_added_bytes() -> Result<(), NodeMapError> {
922 946 let mut idx = TestNtIndex::new();
923 947 idx.insert(0, "1234")?;
924 948 let mut idx = idx.commit();
925 949 idx.insert(4, "cafe")?;
926 950 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
927 951
928 952 // only the root block has been changed
929 953 assert_eq!(bytes.len(), BLOCK_SIZE);
930 954 // big endian for -2
931 955 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
932 956 // big endian for -6
933 957 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
934 958 Ok(())
935 959 }
936 960 }
General Comments 0
You need to be logged in to leave comments. Login now