##// END OF EJS Templates
rustdoc: fixed or introduced crossrefs in nodemap.rs
Georges Racinet -
r51275:f2deaca3 default
parent child Browse files
Show More
@@ -1,1069 +1,1071 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 16 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
17 17 };
18 18
19 19 use bytes_cast::{unaligned, BytesCast};
20 20 use std::cmp::max;
21 21 use std::fmt;
22 22 use std::mem::{self, align_of, size_of};
23 23 use std::ops::Deref;
24 24 use std::ops::Index;
25 25
26 26 #[derive(Debug, PartialEq)]
27 27 pub enum NodeMapError {
28 /// A `NodePrefix` matches several [`Revision`]s.
28 29 MultipleResults,
29 30 /// A `Revision` stored in the nodemap could not be found in the index
30 31 RevisionNotInIndex(Revision),
31 32 }
32 33
33 34 /// Mapping system from Mercurial nodes to revision numbers.
34 35 ///
35 36 /// ## `RevlogIndex` and `NodeMap`
36 37 ///
37 38 /// One way to think about their relationship is that
38 39 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
39 40 /// carried by a [`RevlogIndex`].
40 41 ///
41 42 /// Many of the methods in this trait take a `RevlogIndex` argument
42 43 /// which is used for validation of their results. This index must naturally
43 44 /// be the one the `NodeMap` is about, and it must be consistent.
44 45 ///
45 46 /// Notably, the `NodeMap` must not store
46 47 /// information about more `Revision` values than there are in the index.
47 48 /// In these methods, an encountered `Revision` is not in the index, a
48 49 /// [`RevisionNotInIndex`] error is returned.
49 50 ///
50 51 /// In insert operations, the rule is thus that the `NodeMap` must always
51 52 /// be updated after the `RevlogIndex`
52 53 /// be updated first, and the `NodeMap` second.
53 54 ///
54 55 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
55 56 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
56 57 pub trait NodeMap {
57 58 /// Find the unique `Revision` having the given `Node`
58 59 ///
59 60 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
60 61 fn find_node(
61 62 &self,
62 63 index: &impl RevlogIndex,
63 64 node: &Node,
64 65 ) -> Result<Option<Revision>, NodeMapError> {
65 66 self.find_bin(index, node.into())
66 67 }
67 68
68 69 /// Find the unique Revision whose `Node` starts with a given binary prefix
69 70 ///
70 71 /// If no Revision matches the given prefix, `Ok(None)` is returned.
71 72 ///
72 73 /// If several Revisions match the given prefix, a
73 74 /// [MultipleResults](NodeMapError) error is returned.
74 75 fn find_bin(
75 76 &self,
76 77 idx: &impl RevlogIndex,
77 78 prefix: NodePrefix,
78 79 ) -> Result<Option<Revision>, NodeMapError>;
79 80
80 81 /// Give the size of the shortest node prefix that determines
81 82 /// the revision uniquely.
82 83 ///
83 84 /// From a binary node prefix, if it is matched in the node map, this
84 85 /// returns the number of hexadecimal digits that would had sufficed
85 86 /// to find the revision uniquely.
86 87 ///
87 /// Returns `None` if no `Revision` could be found for the prefix.
88 /// Returns `None` if no [`Revision`] could be found for the prefix.
88 89 ///
89 90 /// If several Revisions match the given prefix, a
90 91 /// [MultipleResults](NodeMapError) error is returned.
91 92 fn unique_prefix_len_bin(
92 93 &self,
93 94 idx: &impl RevlogIndex,
94 95 node_prefix: NodePrefix,
95 96 ) -> Result<Option<usize>, NodeMapError>;
96 97
97 98 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
98 99 fn unique_prefix_len_node(
99 100 &self,
100 101 idx: &impl RevlogIndex,
101 102 node: &Node,
102 103 ) -> Result<Option<usize>, NodeMapError> {
103 104 self.unique_prefix_len_bin(idx, node.into())
104 105 }
105 106 }
106 107
107 108 pub trait MutableNodeMap: NodeMap {
108 109 fn insert<I: RevlogIndex>(
109 110 &mut self,
110 111 index: &I,
111 112 node: &Node,
112 113 rev: Revision,
113 114 ) -> Result<(), NodeMapError>;
114 115 }
115 116
116 117 /// Low level NodeTree [`Block`] elements
117 118 ///
118 119 /// These are exactly as for instance on persistent storage.
119 120 type RawElement = unaligned::I32Be;
120 121
121 122 /// High level representation of values in NodeTree
122 123 /// [`Blocks`](struct.Block.html)
123 124 ///
124 125 /// This is the high level representation that most algorithms should
125 126 /// use.
126 127 #[derive(Clone, Debug, Eq, PartialEq)]
127 128 enum Element {
128 129 Rev(Revision),
129 130 Block(usize),
130 131 None,
131 132 }
132 133
133 134 impl From<RawElement> for Element {
134 135 /// Conversion from low level representation, after endianness conversion.
135 136 ///
136 137 /// See [`Block`](struct.Block.html) for explanation about the encoding.
137 138 fn from(raw: RawElement) -> Element {
138 139 let int = raw.get();
139 140 if int >= 0 {
140 141 Element::Block(int as usize)
141 142 } else if int == -1 {
142 143 Element::None
143 144 } else {
144 145 Element::Rev(-int - 2)
145 146 }
146 147 }
147 148 }
148 149
149 150 impl From<Element> for RawElement {
150 151 fn from(element: Element) -> RawElement {
151 152 RawElement::from(match element {
152 153 Element::None => 0,
153 154 Element::Block(i) => i as i32,
154 155 Element::Rev(rev) => -rev - 2,
155 156 })
156 157 }
157 158 }
158 159
159 160 /// A logical block of the `NodeTree`, packed with a fixed size.
160 161 ///
161 162 /// These are always used in container types implementing `Index<Block>`,
162 163 /// such as `&Block`
163 164 ///
164 165 /// As an array of integers, its ith element encodes that the
165 166 /// ith potential edge from the block, representing the ith hexadecimal digit
166 167 /// (nybble) `i` is either:
167 168 ///
168 169 /// - absent (value -1)
169 170 /// - another `Block` in the same indexable container (value β‰₯ 0)
170 /// - a `Revision` leaf (value ≀ -2)
171 /// - a [`Revision`] leaf (value ≀ -2)
171 172 ///
172 173 /// Endianness has to be fixed for consistency on shared storage across
173 174 /// different architectures.
174 175 ///
175 176 /// A key difference with the C `nodetree` is that we need to be
176 177 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
177 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
178 /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
178 179 ///
179 180 /// Another related difference is that `NULL_REVISION` (-1) is not
180 181 /// represented at all, because we want an immutable empty nodetree
181 182 /// to be valid.
182 183
183 184 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
184 185
185 186 #[derive(Copy, Clone, BytesCast, PartialEq)]
186 187 #[repr(transparent)]
187 188 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
188 189
189 190 impl Block {
190 191 fn new() -> Self {
191 192 let absent_node = RawElement::from(-1);
192 193 Block([absent_node; ELEMENTS_PER_BLOCK])
193 194 }
194 195
195 196 fn get(&self, nybble: u8) -> Element {
196 197 self.0[nybble as usize].into()
197 198 }
198 199
199 200 fn set(&mut self, nybble: u8, element: Element) {
200 201 self.0[nybble as usize] = element.into()
201 202 }
202 203 }
203 204
204 205 impl fmt::Debug for Block {
205 206 /// sparse representation for testing and debugging purposes
206 207 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207 208 f.debug_map()
208 209 .entries((0..16).filter_map(|i| match self.get(i) {
209 210 Element::None => None,
210 211 element => Some((i, element)),
211 212 }))
212 213 .finish()
213 214 }
214 215 }
215 216
216 217 /// A mutable 16-radix tree with the root block logically at the end
217 218 ///
218 219 /// Because of the append only nature of our node trees, we need to
219 220 /// keep the original untouched and store new blocks separately.
220 221 ///
221 222 /// The mutable root `Block` is kept apart so that we don't have to rebump
222 223 /// it on each insertion.
223 224 pub struct NodeTree {
224 225 readonly: Box<dyn Deref<Target = [Block]> + Send>,
225 226 growable: Vec<Block>,
226 227 root: Block,
227 228 masked_inner_blocks: usize,
228 229 }
229 230
230 231 impl Index<usize> for NodeTree {
231 232 type Output = Block;
232 233
233 234 fn index(&self, i: usize) -> &Block {
234 235 let ro_len = self.readonly.len();
235 236 if i < ro_len {
236 237 &self.readonly[i]
237 238 } else if i == ro_len + self.growable.len() {
238 239 &self.root
239 240 } else {
240 241 &self.growable[i - ro_len]
241 242 }
242 243 }
243 244 }
244 245
245 246 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
246 247 fn has_prefix_or_none(
247 248 idx: &impl RevlogIndex,
248 249 prefix: NodePrefix,
249 250 rev: Revision,
250 251 ) -> Result<Option<Revision>, NodeMapError> {
251 252 idx.node(rev)
252 253 .ok_or(NodeMapError::RevisionNotInIndex(rev))
253 254 .map(|node| {
254 255 if prefix.is_prefix_of(node) {
255 256 Some(rev)
256 257 } else {
257 258 None
258 259 }
259 260 })
260 261 }
261 262
262 263 /// validate that the candidate's node starts indeed with given prefix,
263 264 /// and treat ambiguities related to `NULL_REVISION`.
264 265 ///
265 266 /// From the data in the NodeTree, one can only conclude that some
266 267 /// revision is the only one for a *subprefix* of the one being looked up.
267 268 fn validate_candidate(
268 269 idx: &impl RevlogIndex,
269 270 prefix: NodePrefix,
270 271 candidate: (Option<Revision>, usize),
271 272 ) -> Result<(Option<Revision>, usize), NodeMapError> {
272 273 let (rev, steps) = candidate;
273 274 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
274 275 rev.map_or(Ok((None, steps)), |r| {
275 276 has_prefix_or_none(idx, prefix, r)
276 277 .map(|opt| (opt, max(steps, nz_nybble + 1)))
277 278 })
278 279 } else {
279 280 // the prefix is only made of zeros; NULL_REVISION always matches it
280 281 // and any other *valid* result is an ambiguity
281 282 match rev {
282 283 None => Ok((Some(NULL_REVISION), steps + 1)),
283 284 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
284 285 None => Ok((Some(NULL_REVISION), steps + 1)),
285 286 _ => Err(NodeMapError::MultipleResults),
286 287 },
287 288 }
288 289 }
289 290 }
290 291
291 292 impl NodeTree {
292 293 /// Initiate a NodeTree from an immutable slice-like of `Block`
293 294 ///
294 295 /// We keep `readonly` and clone its root block if it isn't empty.
295 296 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
296 297 let root = readonly.last().cloned().unwrap_or_else(Block::new);
297 298 NodeTree {
298 299 readonly,
299 300 growable: Vec::new(),
300 301 root,
301 302 masked_inner_blocks: 0,
302 303 }
303 304 }
304 305
305 306 /// Create from an opaque bunch of bytes
306 307 ///
307 308 /// The created `NodeTreeBytes` from `buffer`,
308 309 /// of which exactly `amount` bytes are used.
309 310 ///
310 311 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
311 312 /// - `offset` allows for the final file format to include fixed data
312 313 /// (generation number, behavioural flags)
313 314 /// - `amount` is expressed in bytes, and is not automatically derived from
314 315 /// `bytes`, so that a caller that manages them atomically can perform
315 316 /// temporary disk serializations and still rollback easily if needed.
316 317 /// First use-case for this would be to support Mercurial shell hooks.
317 318 ///
318 319 /// panics if `buffer` is smaller than `amount`
319 320 pub fn load_bytes(
320 321 bytes: Box<dyn Deref<Target = [u8]> + Send>,
321 322 amount: usize,
322 323 ) -> Self {
323 324 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
324 325 }
325 326
326 /// Retrieve added `Block` and the original immutable data
327 /// Retrieve added [`Block`]s and the original immutable data
327 328 pub fn into_readonly_and_added(
328 329 self,
329 330 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
330 331 let mut vec = self.growable;
331 332 let readonly = self.readonly;
332 333 if readonly.last() != Some(&self.root) {
333 334 vec.push(self.root);
334 335 }
335 336 (readonly, vec)
336 337 }
337 338
338 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
339 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
339 340 /// storage
340 341 pub fn into_readonly_and_added_bytes(
341 342 self,
342 343 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
343 344 let (readonly, vec) = self.into_readonly_and_added();
344 345 // Prevent running `v`'s destructor so we are in complete control
345 346 // of the allocation.
346 347 let vec = mem::ManuallyDrop::new(vec);
347 348
348 349 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
349 350 // bytes, so this is perfectly safe.
350 351 let bytes = unsafe {
351 352 // Check for compatible allocation layout.
352 353 // (Optimized away by constant-folding + dead code elimination.)
353 354 assert_eq!(size_of::<Block>(), 64);
354 355 assert_eq!(align_of::<Block>(), 1);
355 356
356 357 // /!\ Any use of `vec` after this is use-after-free.
357 358 // TODO: use `into_raw_parts` once stabilized
358 359 Vec::from_raw_parts(
359 360 vec.as_ptr() as *mut u8,
360 361 vec.len() * size_of::<Block>(),
361 362 vec.capacity() * size_of::<Block>(),
362 363 )
363 364 };
364 365 (readonly, bytes)
365 366 }
366 367
367 368 /// Total number of blocks
368 369 fn len(&self) -> usize {
369 370 self.readonly.len() + self.growable.len() + 1
370 371 }
371 372
372 373 /// Implemented for completeness
373 374 ///
374 375 /// A `NodeTree` always has at least the mutable root block.
375 376 #[allow(dead_code)]
376 377 fn is_empty(&self) -> bool {
377 378 false
378 379 }
379 380
380 381 /// Main working method for `NodeTree` searches
381 382 ///
382 383 /// The first returned value is the result of analysing `NodeTree` data
383 384 /// *alone*: whereas `None` guarantees that the given prefix is absent
384 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
385 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
386 /// that could match the prefix. Actually, all that can be inferred from
385 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
386 /// `Some(rev)`, it is to be understood that `rev` is the unique
387 /// [`Revision`] that could match the prefix. Actually, all that can
388 /// be inferred from
387 389 /// the `NodeTree` data is that `rev` is the revision with the longest
388 390 /// common node prefix with the given prefix.
389 391 ///
390 392 /// The second returned value is the size of the smallest subprefix
391 393 /// of `prefix` that would give the same result, i.e. not the
392 /// `MultipleResults` error variant (again, using only the data of the
393 /// `NodeTree`).
394 /// [MultipleResults](NodeMapError) error variant (again, using only the
395 /// data of the [`NodeTree`]).
394 396 fn lookup(
395 397 &self,
396 398 prefix: NodePrefix,
397 399 ) -> Result<(Option<Revision>, usize), NodeMapError> {
398 400 for (i, visit_item) in self.visit(prefix).enumerate() {
399 401 if let Some(opt) = visit_item.final_revision() {
400 402 return Ok((opt, i + 1));
401 403 }
402 404 }
403 405 Err(NodeMapError::MultipleResults)
404 406 }
405 407
406 408 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
407 409 NodeTreeVisitor {
408 410 nt: self,
409 411 prefix,
410 412 visit: self.len() - 1,
411 413 nybble_idx: 0,
412 414 done: false,
413 415 }
414 416 }
415 417 /// Return a mutable reference for `Block` at index `idx`.
416 418 ///
417 419 /// If `idx` lies in the immutable area, then the reference is to
418 420 /// a newly appended copy.
419 421 ///
420 422 /// Returns (new_idx, glen, mut_ref) where
421 423 ///
422 424 /// - `new_idx` is the index of the mutable `Block`
423 425 /// - `mut_ref` is a mutable reference to the mutable Block.
424 426 /// - `glen` is the new length of `self.growable`
425 427 ///
426 428 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
427 429 /// itself because of the mutable borrow taken with the returned `Block`
428 430 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
429 431 let ro_blocks = &self.readonly;
430 432 let ro_len = ro_blocks.len();
431 433 let glen = self.growable.len();
432 434 if idx < ro_len {
433 435 self.masked_inner_blocks += 1;
434 436 self.growable.push(ro_blocks[idx]);
435 437 (glen + ro_len, &mut self.growable[glen], glen + 1)
436 438 } else if glen + ro_len == idx {
437 439 (idx, &mut self.root, glen)
438 440 } else {
439 441 (idx, &mut self.growable[idx - ro_len], glen)
440 442 }
441 443 }
442 444
443 445 /// Main insertion method
444 446 ///
445 447 /// This will dive in the node tree to find the deepest `Block` for
446 448 /// `node`, split it as much as needed and record `node` in there.
447 449 /// The method then backtracks, updating references in all the visited
448 450 /// blocks from the root.
449 451 ///
450 452 /// All the mutated `Block` are copied first to the growable part if
451 453 /// needed. That happens for those in the immutable part except the root.
452 454 pub fn insert<I: RevlogIndex>(
453 455 &mut self,
454 456 index: &I,
455 457 node: &Node,
456 458 rev: Revision,
457 459 ) -> Result<(), NodeMapError> {
458 460 let ro_len = &self.readonly.len();
459 461
460 462 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
461 463 let read_nybbles = visit_steps.len();
462 464 // visit_steps cannot be empty, since we always visit the root block
463 465 let deepest = visit_steps.pop().unwrap();
464 466
465 467 let (mut block_idx, mut block, mut glen) =
466 468 self.mutable_block(deepest.block_idx);
467 469
468 470 if let Element::Rev(old_rev) = deepest.element {
469 471 let old_node = index
470 472 .node(old_rev)
471 473 .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
472 474 if old_node == node {
473 475 return Ok(()); // avoid creating lots of useless blocks
474 476 }
475 477
476 478 // Looping over the tail of nybbles in both nodes, creating
477 479 // new blocks until we find the difference
478 480 let mut new_block_idx = ro_len + glen;
479 481 let mut nybble = deepest.nybble;
480 482 for nybble_pos in read_nybbles..node.nybbles_len() {
481 483 block.set(nybble, Element::Block(new_block_idx));
482 484
483 485 let new_nybble = node.get_nybble(nybble_pos);
484 486 let old_nybble = old_node.get_nybble(nybble_pos);
485 487
486 488 if old_nybble == new_nybble {
487 489 self.growable.push(Block::new());
488 490 block = &mut self.growable[glen];
489 491 glen += 1;
490 492 new_block_idx += 1;
491 493 nybble = new_nybble;
492 494 } else {
493 495 let mut new_block = Block::new();
494 496 new_block.set(old_nybble, Element::Rev(old_rev));
495 497 new_block.set(new_nybble, Element::Rev(rev));
496 498 self.growable.push(new_block);
497 499 break;
498 500 }
499 501 }
500 502 } else {
501 503 // Free slot in the deepest block: no splitting has to be done
502 504 block.set(deepest.nybble, Element::Rev(rev));
503 505 }
504 506
505 507 // Backtrack over visit steps to update references
506 508 while let Some(visited) = visit_steps.pop() {
507 509 let to_write = Element::Block(block_idx);
508 510 if visit_steps.is_empty() {
509 511 self.root.set(visited.nybble, to_write);
510 512 break;
511 513 }
512 514 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
513 515 if block.get(visited.nybble) == to_write {
514 516 break;
515 517 }
516 518 block.set(visited.nybble, to_write);
517 519 block_idx = new_idx;
518 520 }
519 521 Ok(())
520 522 }
521 523
522 524 /// Make the whole `NodeTree` logically empty, without touching the
523 525 /// immutable part.
524 526 pub fn invalidate_all(&mut self) {
525 527 self.root = Block::new();
526 528 self.growable = Vec::new();
527 529 self.masked_inner_blocks = self.readonly.len();
528 530 }
529 531
530 532 /// Return the number of blocks in the readonly part that are currently
531 533 /// masked in the mutable part.
532 534 ///
533 535 /// The `NodeTree` structure has no efficient way to know how many blocks
534 536 /// are already unreachable in the readonly part.
535 537 ///
536 538 /// After a call to `invalidate_all()`, the returned number can be actually
537 539 /// bigger than the whole readonly part, a conventional way to mean that
538 540 /// all the readonly blocks have been masked. This is what is really
539 541 /// useful to the caller and does not require to know how many were
540 542 /// actually unreachable to begin with.
541 543 pub fn masked_readonly_blocks(&self) -> usize {
542 544 if let Some(readonly_root) = self.readonly.last() {
543 545 if readonly_root == &self.root {
544 546 return 0;
545 547 }
546 548 } else {
547 549 return 0;
548 550 }
549 551 self.masked_inner_blocks + 1
550 552 }
551 553 }
552 554
553 555 pub struct NodeTreeBytes {
554 556 buffer: Box<dyn Deref<Target = [u8]> + Send>,
555 557 len_in_blocks: usize,
556 558 }
557 559
558 560 impl NodeTreeBytes {
559 561 fn new(
560 562 buffer: Box<dyn Deref<Target = [u8]> + Send>,
561 563 amount: usize,
562 564 ) -> Self {
563 565 assert!(buffer.len() >= amount);
564 566 let len_in_blocks = amount / size_of::<Block>();
565 567 NodeTreeBytes {
566 568 buffer,
567 569 len_in_blocks,
568 570 }
569 571 }
570 572 }
571 573
572 574 impl Deref for NodeTreeBytes {
573 575 type Target = [Block];
574 576
575 577 fn deref(&self) -> &[Block] {
576 578 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
577 579 // `NodeTreeBytes::new` already asserted that `self.buffer` is
578 580 // large enough.
579 581 .unwrap()
580 582 .0
581 583 }
582 584 }
583 585
584 586 struct NodeTreeVisitor<'n> {
585 587 nt: &'n NodeTree,
586 588 prefix: NodePrefix,
587 589 visit: usize,
588 590 nybble_idx: usize,
589 591 done: bool,
590 592 }
591 593
592 594 #[derive(Debug, PartialEq, Clone)]
593 595 struct NodeTreeVisitItem {
594 596 block_idx: usize,
595 597 nybble: u8,
596 598 element: Element,
597 599 }
598 600
599 601 impl<'n> Iterator for NodeTreeVisitor<'n> {
600 602 type Item = NodeTreeVisitItem;
601 603
602 604 fn next(&mut self) -> Option<Self::Item> {
603 605 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
604 606 return None;
605 607 }
606 608
607 609 let nybble = self.prefix.get_nybble(self.nybble_idx);
608 610 self.nybble_idx += 1;
609 611
610 612 let visit = self.visit;
611 613 let element = self.nt[visit].get(nybble);
612 614 if let Element::Block(idx) = element {
613 615 self.visit = idx;
614 616 } else {
615 617 self.done = true;
616 618 }
617 619
618 620 Some(NodeTreeVisitItem {
619 621 block_idx: visit,
620 622 nybble,
621 623 element,
622 624 })
623 625 }
624 626 }
625 627
626 628 impl NodeTreeVisitItem {
627 629 // Return `Some(opt)` if this item is final, with `opt` being the
628 630 // `Revision` that it may represent.
629 631 //
630 632 // If the item is not terminal, return `None`
631 633 fn final_revision(&self) -> Option<Option<Revision>> {
632 634 match self.element {
633 635 Element::Block(_) => None,
634 636 Element::Rev(r) => Some(Some(r)),
635 637 Element::None => Some(None),
636 638 }
637 639 }
638 640 }
639 641
640 642 impl From<Vec<Block>> for NodeTree {
641 643 fn from(vec: Vec<Block>) -> Self {
642 644 Self::new(Box::new(vec))
643 645 }
644 646 }
645 647
646 648 impl fmt::Debug for NodeTree {
647 649 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
648 650 let readonly: &[Block] = &*self.readonly;
649 651 write!(
650 652 f,
651 653 "readonly: {:?}, growable: {:?}, root: {:?}",
652 654 readonly, self.growable, self.root
653 655 )
654 656 }
655 657 }
656 658
657 659 impl Default for NodeTree {
658 660 /// Create a fully mutable empty NodeTree
659 661 fn default() -> Self {
660 662 NodeTree::new(Box::new(Vec::new()))
661 663 }
662 664 }
663 665
664 666 impl NodeMap for NodeTree {
665 667 fn find_bin<'a>(
666 668 &self,
667 669 idx: &impl RevlogIndex,
668 670 prefix: NodePrefix,
669 671 ) -> Result<Option<Revision>, NodeMapError> {
670 672 validate_candidate(idx, prefix, self.lookup(prefix)?)
671 673 .map(|(opt, _shortest)| opt)
672 674 }
673 675
674 676 fn unique_prefix_len_bin<'a>(
675 677 &self,
676 678 idx: &impl RevlogIndex,
677 679 prefix: NodePrefix,
678 680 ) -> Result<Option<usize>, NodeMapError> {
679 681 validate_candidate(idx, prefix, self.lookup(prefix)?)
680 682 .map(|(opt, shortest)| opt.map(|_rev| shortest))
681 683 }
682 684 }
683 685
684 686 #[cfg(test)]
685 687 mod tests {
686 688 use super::NodeMapError::*;
687 689 use super::*;
688 690 use crate::revlog::node::{hex_pad_right, Node};
689 691 use std::collections::HashMap;
690 692
691 693 /// Creates a `Block` using a syntax close to the `Debug` output
692 694 macro_rules! block {
693 695 {$($nybble:tt : $variant:ident($val:tt)),*} => (
694 696 {
695 697 let mut block = Block::new();
696 698 $(block.set($nybble, Element::$variant($val)));*;
697 699 block
698 700 }
699 701 )
700 702 }
701 703
702 704 #[test]
703 705 fn test_block_debug() {
704 706 let mut block = Block::new();
705 707 block.set(1, Element::Rev(3));
706 708 block.set(10, Element::Block(0));
707 709 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
708 710 }
709 711
710 712 #[test]
711 713 fn test_block_macro() {
712 714 let block = block! {5: Block(2)};
713 715 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
714 716
715 717 let block = block! {13: Rev(15), 5: Block(2)};
716 718 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
717 719 }
718 720
719 721 #[test]
720 722 fn test_raw_block() {
721 723 let mut raw = [255u8; 64];
722 724
723 725 let mut counter = 0;
724 726 for val in [0_i32, 15, -2, -1, -3].iter() {
725 727 for byte in val.to_be_bytes().iter() {
726 728 raw[counter] = *byte;
727 729 counter += 1;
728 730 }
729 731 }
730 732 let (block, _) = Block::from_bytes(&raw).unwrap();
731 733 assert_eq!(block.get(0), Element::Block(0));
732 734 assert_eq!(block.get(1), Element::Block(15));
733 735 assert_eq!(block.get(3), Element::None);
734 736 assert_eq!(block.get(2), Element::Rev(0));
735 737 assert_eq!(block.get(4), Element::Rev(1));
736 738 }
737 739
738 740 type TestIndex = HashMap<Revision, Node>;
739 741
740 742 impl RevlogIndex for TestIndex {
741 743 fn node(&self, rev: Revision) -> Option<&Node> {
742 744 self.get(&rev)
743 745 }
744 746
745 747 fn len(&self) -> usize {
746 748 self.len()
747 749 }
748 750 }
749 751
750 752 /// Pad hexadecimal Node prefix with zeros on the right
751 753 ///
752 754 /// This avoids having to repeatedly write very long hexadecimal
753 755 /// strings for test data, and brings actual hash size independency.
754 756 #[cfg(test)]
755 757 fn pad_node(hex: &str) -> Node {
756 758 Node::from_hex(&hex_pad_right(hex)).unwrap()
757 759 }
758 760
759 761 /// Pad hexadecimal Node prefix with zeros on the right, then insert
760 762 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
761 763 idx.insert(rev, pad_node(hex));
762 764 }
763 765
764 766 fn sample_nodetree() -> NodeTree {
765 767 NodeTree::from(vec![
766 768 block![0: Rev(9)],
767 769 block![0: Rev(0), 1: Rev(9)],
768 770 block![0: Block(1), 1:Rev(1)],
769 771 ])
770 772 }
771 773
772 774 fn hex(s: &str) -> NodePrefix {
773 775 NodePrefix::from_hex(s).unwrap()
774 776 }
775 777
776 778 #[test]
777 779 fn test_nt_debug() {
778 780 let nt = sample_nodetree();
779 781 assert_eq!(
780 782 format!("{:?}", nt),
781 783 "readonly: \
782 784 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
783 785 growable: [], \
784 786 root: {0: Block(1), 1: Rev(1)}",
785 787 );
786 788 }
787 789
788 790 #[test]
789 791 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
790 792 let mut idx: TestIndex = HashMap::new();
791 793 pad_insert(&mut idx, 1, "1234deadcafe");
792 794
793 795 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
794 796 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
795 797 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
796 798 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
797 799 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
798 800 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
799 801
800 802 // and with full binary Nodes
801 803 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
802 804 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
803 805 assert_eq!(nt.find_node(&idx, &unknown)?, None);
804 806 Ok(())
805 807 }
806 808
807 809 #[test]
808 810 fn test_immutable_find_one_jump() {
809 811 let mut idx = TestIndex::new();
810 812 pad_insert(&mut idx, 9, "012");
811 813 pad_insert(&mut idx, 0, "00a");
812 814
813 815 let nt = sample_nodetree();
814 816
815 817 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
816 818 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
817 819 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
818 820 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
819 821 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
820 822 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
821 823 }
822 824
823 825 #[test]
824 826 fn test_mutated_find() -> Result<(), NodeMapError> {
825 827 let mut idx = TestIndex::new();
826 828 pad_insert(&mut idx, 9, "012");
827 829 pad_insert(&mut idx, 0, "00a");
828 830 pad_insert(&mut idx, 2, "cafe");
829 831 pad_insert(&mut idx, 3, "15");
830 832 pad_insert(&mut idx, 1, "10");
831 833
832 834 let nt = NodeTree {
833 835 readonly: sample_nodetree().readonly,
834 836 growable: vec![block![0: Rev(1), 5: Rev(3)]],
835 837 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
836 838 masked_inner_blocks: 1,
837 839 };
838 840 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
839 841 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
840 842 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
841 843 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
842 844 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
843 845 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
844 846 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
845 847 assert_eq!(nt.masked_readonly_blocks(), 2);
846 848 Ok(())
847 849 }
848 850
849 851 struct TestNtIndex {
850 852 index: TestIndex,
851 853 nt: NodeTree,
852 854 }
853 855
854 856 impl TestNtIndex {
855 857 fn new() -> Self {
856 858 TestNtIndex {
857 859 index: HashMap::new(),
858 860 nt: NodeTree::default(),
859 861 }
860 862 }
861 863
862 864 fn insert(
863 865 &mut self,
864 866 rev: Revision,
865 867 hex: &str,
866 868 ) -> Result<(), NodeMapError> {
867 869 let node = pad_node(hex);
868 870 self.index.insert(rev, node);
869 871 self.nt.insert(&self.index, &node, rev)?;
870 872 Ok(())
871 873 }
872 874
873 875 fn find_hex(
874 876 &self,
875 877 prefix: &str,
876 878 ) -> Result<Option<Revision>, NodeMapError> {
877 879 self.nt.find_bin(&self.index, hex(prefix))
878 880 }
879 881
880 882 fn unique_prefix_len_hex(
881 883 &self,
882 884 prefix: &str,
883 885 ) -> Result<Option<usize>, NodeMapError> {
884 886 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
885 887 }
886 888
887 889 /// Drain `added` and restart a new one
888 890 fn commit(self) -> Self {
889 891 let mut as_vec: Vec<Block> =
890 892 self.nt.readonly.iter().copied().collect();
891 893 as_vec.extend(self.nt.growable);
892 894 as_vec.push(self.nt.root);
893 895
894 896 Self {
895 897 index: self.index,
896 898 nt: NodeTree::from(as_vec),
897 899 }
898 900 }
899 901 }
900 902
901 903 #[test]
902 904 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
903 905 let mut idx = TestNtIndex::new();
904 906 idx.insert(0, "1234")?;
905 907 assert_eq!(idx.find_hex("1")?, Some(0));
906 908 assert_eq!(idx.find_hex("12")?, Some(0));
907 909
908 910 // let's trigger a simple split
909 911 idx.insert(1, "1a34")?;
910 912 assert_eq!(idx.nt.growable.len(), 1);
911 913 assert_eq!(idx.find_hex("12")?, Some(0));
912 914 assert_eq!(idx.find_hex("1a")?, Some(1));
913 915
914 916 // reinserting is a no_op
915 917 idx.insert(1, "1a34")?;
916 918 assert_eq!(idx.nt.growable.len(), 1);
917 919 assert_eq!(idx.find_hex("12")?, Some(0));
918 920 assert_eq!(idx.find_hex("1a")?, Some(1));
919 921
920 922 idx.insert(2, "1a01")?;
921 923 assert_eq!(idx.nt.growable.len(), 2);
922 924 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
923 925 assert_eq!(idx.find_hex("12")?, Some(0));
924 926 assert_eq!(idx.find_hex("1a3")?, Some(1));
925 927 assert_eq!(idx.find_hex("1a0")?, Some(2));
926 928 assert_eq!(idx.find_hex("1a12")?, None);
927 929
928 930 // now let's make it split and create more than one additional block
929 931 idx.insert(3, "1a345")?;
930 932 assert_eq!(idx.nt.growable.len(), 4);
931 933 assert_eq!(idx.find_hex("1a340")?, Some(1));
932 934 assert_eq!(idx.find_hex("1a345")?, Some(3));
933 935 assert_eq!(idx.find_hex("1a341")?, None);
934 936
935 937 // there's no readonly block to mask
936 938 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
937 939 Ok(())
938 940 }
939 941
940 942 #[test]
941 943 fn test_unique_prefix_len_zero_prefix() {
942 944 let mut idx = TestNtIndex::new();
943 945 idx.insert(0, "00000abcd").unwrap();
944 946
945 947 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
946 948 // in the nodetree proper, this will be found at the first nybble
947 949 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
948 950 // but the first difference with `NULL_NODE`
949 951 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
950 952 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
951 953
952 954 // same with odd result
953 955 idx.insert(1, "00123").unwrap();
954 956 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
955 957 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
956 958
957 959 // these are unchanged of course
958 960 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
959 961 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
960 962 }
961 963
962 964 #[test]
963 965 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
964 966 // check that the splitting loop is long enough
965 967 let mut nt_idx = TestNtIndex::new();
966 968 let nt = &mut nt_idx.nt;
967 969 let idx = &mut nt_idx.index;
968 970
969 971 let node0_hex = hex_pad_right("444444");
970 972 let mut node1_hex = hex_pad_right("444444");
971 973 node1_hex.pop();
972 974 node1_hex.push('5');
973 975 let node0 = Node::from_hex(&node0_hex).unwrap();
974 976 let node1 = Node::from_hex(&node1_hex).unwrap();
975 977
976 978 idx.insert(0, node0);
977 979 nt.insert(idx, &node0, 0)?;
978 980 idx.insert(1, node1);
979 981 nt.insert(idx, &node1, 1)?;
980 982
981 983 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
982 984 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
983 985 Ok(())
984 986 }
985 987
986 988 #[test]
987 989 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
988 990 let mut idx = TestNtIndex::new();
989 991 idx.insert(0, "1234")?;
990 992 idx.insert(1, "1235")?;
991 993 idx.insert(2, "131")?;
992 994 idx.insert(3, "cafe")?;
993 995 let mut idx = idx.commit();
994 996 assert_eq!(idx.find_hex("1234")?, Some(0));
995 997 assert_eq!(idx.find_hex("1235")?, Some(1));
996 998 assert_eq!(idx.find_hex("131")?, Some(2));
997 999 assert_eq!(idx.find_hex("cafe")?, Some(3));
998 1000 // we did not add anything since init from readonly
999 1001 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1000 1002
1001 1003 idx.insert(4, "123A")?;
1002 1004 assert_eq!(idx.find_hex("1234")?, Some(0));
1003 1005 assert_eq!(idx.find_hex("1235")?, Some(1));
1004 1006 assert_eq!(idx.find_hex("131")?, Some(2));
1005 1007 assert_eq!(idx.find_hex("cafe")?, Some(3));
1006 1008 assert_eq!(idx.find_hex("123A")?, Some(4));
1007 1009 // we masked blocks for all prefixes of "123", including the root
1008 1010 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1009 1011
1010 1012 eprintln!("{:?}", idx.nt);
1011 1013 idx.insert(5, "c0")?;
1012 1014 assert_eq!(idx.find_hex("cafe")?, Some(3));
1013 1015 assert_eq!(idx.find_hex("c0")?, Some(5));
1014 1016 assert_eq!(idx.find_hex("c1")?, None);
1015 1017 assert_eq!(idx.find_hex("1234")?, Some(0));
1016 1018 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1017 1019 // it doesn't mask anything
1018 1020 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1019 1021
1020 1022 Ok(())
1021 1023 }
1022 1024
1023 1025 #[test]
1024 1026 fn test_invalidate_all() -> Result<(), NodeMapError> {
1025 1027 let mut idx = TestNtIndex::new();
1026 1028 idx.insert(0, "1234")?;
1027 1029 idx.insert(1, "1235")?;
1028 1030 idx.insert(2, "131")?;
1029 1031 idx.insert(3, "cafe")?;
1030 1032 let mut idx = idx.commit();
1031 1033
1032 1034 idx.nt.invalidate_all();
1033 1035
1034 1036 assert_eq!(idx.find_hex("1234")?, None);
1035 1037 assert_eq!(idx.find_hex("1235")?, None);
1036 1038 assert_eq!(idx.find_hex("131")?, None);
1037 1039 assert_eq!(idx.find_hex("cafe")?, None);
1038 1040 // all the readonly blocks have been masked, this is the
1039 1041 // conventional expected response
1040 1042 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1041 1043 Ok(())
1042 1044 }
1043 1045
1044 1046 #[test]
1045 1047 fn test_into_added_empty() {
1046 1048 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1047 1049 assert!(sample_nodetree()
1048 1050 .into_readonly_and_added_bytes()
1049 1051 .1
1050 1052 .is_empty());
1051 1053 }
1052 1054
1053 1055 #[test]
1054 1056 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1055 1057 let mut idx = TestNtIndex::new();
1056 1058 idx.insert(0, "1234")?;
1057 1059 let mut idx = idx.commit();
1058 1060 idx.insert(4, "cafe")?;
1059 1061 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1060 1062
1061 1063 // only the root block has been changed
1062 1064 assert_eq!(bytes.len(), size_of::<Block>());
1063 1065 // big endian for -2
1064 1066 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1065 1067 // big endian for -6
1066 1068 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1067 1069 Ok(())
1068 1070 }
1069 1071 }
General Comments 0
You need to be logged in to leave comments. Login now