##// END OF EJS Templates
rust-nodemap: accounting for dead blocks...
Georges Racinet -
r44873:6329ce04 default
parent child Browse files
Show More
@@ -1,1056 +1,1087 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 16 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
17 17 RevlogIndex, NULL_REVISION,
18 18 };
19 19
20 20 use std::cmp::max;
21 21 use std::fmt;
22 22 use std::mem;
23 23 use std::ops::Deref;
24 24 use std::ops::Index;
25 25 use std::slice;
26 26
27 27 #[derive(Debug, PartialEq)]
28 28 pub enum NodeMapError {
29 29 MultipleResults,
30 30 InvalidNodePrefix(NodeError),
31 31 /// A `Revision` stored in the nodemap could not be found in the index
32 32 RevisionNotInIndex(Revision),
33 33 }
34 34
35 35 impl From<NodeError> for NodeMapError {
36 36 fn from(err: NodeError) -> Self {
37 37 NodeMapError::InvalidNodePrefix(err)
38 38 }
39 39 }
40 40
41 41 /// Mapping system from Mercurial nodes to revision numbers.
42 42 ///
43 43 /// ## `RevlogIndex` and `NodeMap`
44 44 ///
45 45 /// One way to think about their relationship is that
46 46 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
47 47 /// carried by a [`RevlogIndex`].
48 48 ///
49 49 /// Many of the methods in this trait take a `RevlogIndex` argument
50 50 /// which is used for validation of their results. This index must naturally
51 51 /// be the one the `NodeMap` is about, and it must be consistent.
52 52 ///
53 53 /// Notably, the `NodeMap` must not store
54 54 /// information about more `Revision` values than there are in the index.
55 55 /// In these methods, an encountered `Revision` is not in the index, a
56 56 /// [`RevisionNotInIndex`] error is returned.
57 57 ///
58 58 /// In insert operations, the rule is thus that the `NodeMap` must always
59 59 /// be updated after the `RevlogIndex`
60 60 /// be updated first, and the `NodeMap` second.
61 61 ///
62 62 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
63 63 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
64 64 pub trait NodeMap {
65 65 /// Find the unique `Revision` having the given `Node`
66 66 ///
67 67 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
68 68 fn find_node(
69 69 &self,
70 70 index: &impl RevlogIndex,
71 71 node: &Node,
72 72 ) -> Result<Option<Revision>, NodeMapError> {
73 73 self.find_bin(index, node.into())
74 74 }
75 75
76 76 /// Find the unique Revision whose `Node` starts with a given binary prefix
77 77 ///
78 78 /// If no Revision matches the given prefix, `Ok(None)` is returned.
79 79 ///
80 80 /// If several Revisions match the given prefix, a [`MultipleResults`]
81 81 /// error is returned.
82 82 fn find_bin<'a>(
83 83 &self,
84 84 idx: &impl RevlogIndex,
85 85 prefix: NodePrefixRef<'a>,
86 86 ) -> Result<Option<Revision>, NodeMapError>;
87 87
88 88 /// Find the unique Revision whose `Node` hexadecimal string representation
89 89 /// starts with a given prefix
90 90 ///
91 91 /// If no Revision matches the given prefix, `Ok(None)` is returned.
92 92 ///
93 93 /// If several Revisions match the given prefix, a [`MultipleResults`]
94 94 /// error is returned.
95 95 fn find_hex(
96 96 &self,
97 97 idx: &impl RevlogIndex,
98 98 prefix: &str,
99 99 ) -> Result<Option<Revision>, NodeMapError> {
100 100 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
101 101 }
102 102
103 103 /// Give the size of the shortest node prefix that determines
104 104 /// the revision uniquely.
105 105 ///
106 106 /// From a binary node prefix, if it is matched in the node map, this
107 107 /// returns the number of hexadecimal digits that would had sufficed
108 108 /// to find the revision uniquely.
109 109 ///
110 110 /// Returns `None` if no `Revision` could be found for the prefix.
111 111 ///
112 112 /// If several Revisions match the given prefix, a [`MultipleResults`]
113 113 /// error is returned.
114 114 fn unique_prefix_len_bin<'a>(
115 115 &self,
116 116 idx: &impl RevlogIndex,
117 117 node_prefix: NodePrefixRef<'a>,
118 118 ) -> Result<Option<usize>, NodeMapError>;
119 119
120 120 /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
121 121 /// of the prefix as input.
122 122 fn unique_prefix_len_hex(
123 123 &self,
124 124 idx: &impl RevlogIndex,
125 125 prefix: &str,
126 126 ) -> Result<Option<usize>, NodeMapError> {
127 127 self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
128 128 }
129 129
130 130 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
131 131 fn unique_prefix_len_node(
132 132 &self,
133 133 idx: &impl RevlogIndex,
134 134 node: &Node,
135 135 ) -> Result<Option<usize>, NodeMapError> {
136 136 self.unique_prefix_len_bin(idx, node.into())
137 137 }
138 138 }
139 139
140 140 pub trait MutableNodeMap: NodeMap {
141 141 fn insert<I: RevlogIndex>(
142 142 &mut self,
143 143 index: &I,
144 144 node: &Node,
145 145 rev: Revision,
146 146 ) -> Result<(), NodeMapError>;
147 147 }
148 148
149 149 /// Low level NodeTree [`Blocks`] elements
150 150 ///
151 151 /// These are exactly as for instance on persistent storage.
152 152 type RawElement = i32;
153 153
154 154 /// High level representation of values in NodeTree
155 155 /// [`Blocks`](struct.Block.html)
156 156 ///
157 157 /// This is the high level representation that most algorithms should
158 158 /// use.
159 159 #[derive(Clone, Debug, Eq, PartialEq)]
160 160 enum Element {
161 161 Rev(Revision),
162 162 Block(usize),
163 163 None,
164 164 }
165 165
166 166 impl From<RawElement> for Element {
167 167 /// Conversion from low level representation, after endianness conversion.
168 168 ///
169 169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
170 170 fn from(raw: RawElement) -> Element {
171 171 if raw >= 0 {
172 172 Element::Block(raw as usize)
173 173 } else if raw == -1 {
174 174 Element::None
175 175 } else {
176 176 Element::Rev(-raw - 2)
177 177 }
178 178 }
179 179 }
180 180
181 181 impl From<Element> for RawElement {
182 182 fn from(element: Element) -> RawElement {
183 183 match element {
184 184 Element::None => 0,
185 185 Element::Block(i) => i as RawElement,
186 186 Element::Rev(rev) => -rev - 2,
187 187 }
188 188 }
189 189 }
190 190
191 191 /// A logical block of the `NodeTree`, packed with a fixed size.
192 192 ///
193 193 /// These are always used in container types implementing `Index<Block>`,
194 194 /// such as `&Block`
195 195 ///
196 196 /// As an array of integers, its ith element encodes that the
197 197 /// ith potential edge from the block, representing the ith hexadecimal digit
198 198 /// (nybble) `i` is either:
199 199 ///
200 200 /// - absent (value -1)
201 201 /// - another `Block` in the same indexable container (value β‰₯ 0)
202 202 /// - a `Revision` leaf (value ≀ -2)
203 203 ///
204 204 /// Endianness has to be fixed for consistency on shared storage across
205 205 /// different architectures.
206 206 ///
207 207 /// A key difference with the C `nodetree` is that we need to be
208 208 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
209 209 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
210 210 ///
211 211 /// Another related difference is that `NULL_REVISION` (-1) is not
212 212 /// represented at all, because we want an immutable empty nodetree
213 213 /// to be valid.
214 214
215 215 #[derive(Copy, Clone)]
216 216 pub struct Block([u8; BLOCK_SIZE]);
217 217
218 218 /// Not derivable for arrays of length >32 until const generics are stable
219 219 impl PartialEq for Block {
220 220 fn eq(&self, other: &Self) -> bool {
221 221 &self.0[..] == &other.0[..]
222 222 }
223 223 }
224 224
225 225 pub const BLOCK_SIZE: usize = 64;
226 226
227 227 impl Block {
228 228 fn new() -> Self {
229 229 // -1 in 2's complement to create an absent node
230 230 let byte: u8 = 255;
231 231 Block([byte; BLOCK_SIZE])
232 232 }
233 233
234 234 fn get(&self, nybble: u8) -> Element {
235 235 let index = nybble as usize * mem::size_of::<RawElement>();
236 236 Element::from(RawElement::from_be_bytes([
237 237 self.0[index],
238 238 self.0[index + 1],
239 239 self.0[index + 2],
240 240 self.0[index + 3],
241 241 ]))
242 242 }
243 243
244 244 fn set(&mut self, nybble: u8, element: Element) {
245 245 let values = RawElement::to_be_bytes(element.into());
246 246 let index = nybble as usize * mem::size_of::<RawElement>();
247 247 self.0[index] = values[0];
248 248 self.0[index + 1] = values[1];
249 249 self.0[index + 2] = values[2];
250 250 self.0[index + 3] = values[3];
251 251 }
252 252 }
253 253
254 254 impl fmt::Debug for Block {
255 255 /// sparse representation for testing and debugging purposes
256 256 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
257 257 f.debug_map()
258 258 .entries((0..16).filter_map(|i| match self.get(i) {
259 259 Element::None => None,
260 260 element => Some((i, element)),
261 261 }))
262 262 .finish()
263 263 }
264 264 }
265 265
266 266 /// A mutable 16-radix tree with the root block logically at the end
267 267 ///
268 268 /// Because of the append only nature of our node trees, we need to
269 269 /// keep the original untouched and store new blocks separately.
270 270 ///
271 271 /// The mutable root `Block` is kept apart so that we don't have to rebump
272 272 /// it on each insertion.
273 273 pub struct NodeTree {
274 274 readonly: Box<dyn Deref<Target = [Block]> + Send>,
275 275 growable: Vec<Block>,
276 276 root: Block,
277 masked_inner_blocks: usize,
277 278 }
278 279
279 280 impl Index<usize> for NodeTree {
280 281 type Output = Block;
281 282
282 283 fn index(&self, i: usize) -> &Block {
283 284 let ro_len = self.readonly.len();
284 285 if i < ro_len {
285 286 &self.readonly[i]
286 287 } else if i == ro_len + self.growable.len() {
287 288 &self.root
288 289 } else {
289 290 &self.growable[i - ro_len]
290 291 }
291 292 }
292 293 }
293 294
294 295 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
295 296 fn has_prefix_or_none(
296 297 idx: &impl RevlogIndex,
297 298 prefix: NodePrefixRef,
298 299 rev: Revision,
299 300 ) -> Result<Option<Revision>, NodeMapError> {
300 301 idx.node(rev)
301 302 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
302 303 .map(|node| {
303 304 if prefix.is_prefix_of(node) {
304 305 Some(rev)
305 306 } else {
306 307 None
307 308 }
308 309 })
309 310 }
310 311
311 312 /// validate that the candidate's node starts indeed with given prefix,
312 313 /// and treat ambiguities related to `NULL_REVISION`.
313 314 ///
314 315 /// From the data in the NodeTree, one can only conclude that some
315 316 /// revision is the only one for a *subprefix* of the one being looked up.
316 317 fn validate_candidate(
317 318 idx: &impl RevlogIndex,
318 319 prefix: NodePrefixRef,
319 320 candidate: (Option<Revision>, usize),
320 321 ) -> Result<(Option<Revision>, usize), NodeMapError> {
321 322 let (rev, steps) = candidate;
322 323 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
323 324 rev.map_or(Ok((None, steps)), |r| {
324 325 has_prefix_or_none(idx, prefix, r)
325 326 .map(|opt| (opt, max(steps, nz_nybble + 1)))
326 327 })
327 328 } else {
328 329 // the prefix is only made of zeros; NULL_REVISION always matches it
329 330 // and any other *valid* result is an ambiguity
330 331 match rev {
331 332 None => Ok((Some(NULL_REVISION), steps + 1)),
332 333 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
333 334 None => Ok((Some(NULL_REVISION), steps + 1)),
334 335 _ => Err(NodeMapError::MultipleResults),
335 336 },
336 337 }
337 338 }
338 339 }
339 340
340 341 impl NodeTree {
341 342 /// Initiate a NodeTree from an immutable slice-like of `Block`
342 343 ///
343 344 /// We keep `readonly` and clone its root block if it isn't empty.
344 345 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
345 346 let root = readonly
346 347 .last()
347 348 .map(|b| b.clone())
348 349 .unwrap_or_else(|| Block::new());
349 350 NodeTree {
350 351 readonly: readonly,
351 352 growable: Vec::new(),
352 353 root: root,
354 masked_inner_blocks: 0,
353 355 }
354 356 }
355 357
356 358 /// Create from an opaque bunch of bytes
357 359 ///
358 360 /// The created `NodeTreeBytes` from `buffer`,
359 361 /// of which exactly `amount` bytes are used.
360 362 ///
361 363 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
362 364 /// - `offset` allows for the final file format to include fixed data
363 365 /// (generation number, behavioural flags)
364 366 /// - `amount` is expressed in bytes, and is not automatically derived from
365 367 /// `bytes`, so that a caller that manages them atomically can perform
366 368 /// temporary disk serializations and still rollback easily if needed.
367 369 /// First use-case for this would be to support Mercurial shell hooks.
368 370 ///
369 371 /// panics if `buffer` is smaller than `amount`
370 372 pub fn load_bytes(
371 373 bytes: Box<dyn Deref<Target = [u8]> + Send>,
372 374 amount: usize,
373 375 ) -> Self {
374 376 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
375 377 }
376 378
377 379 /// Retrieve added `Block` and the original immutable data
378 380 pub fn into_readonly_and_added(
379 381 self,
380 382 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
381 383 let mut vec = self.growable;
382 384 let readonly = self.readonly;
383 385 if readonly.last() != Some(&self.root) {
384 386 vec.push(self.root);
385 387 }
386 388 (readonly, vec)
387 389 }
388 390
389 391 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
390 392 /// storage
391 393 pub fn into_readonly_and_added_bytes(
392 394 self,
393 395 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
394 396 let (readonly, vec) = self.into_readonly_and_added();
395 397 // Prevent running `v`'s destructor so we are in complete control
396 398 // of the allocation.
397 399 let vec = mem::ManuallyDrop::new(vec);
398 400
399 401 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
400 402 // bytes, so this is perfectly safe.
401 403 let bytes = unsafe {
402 404 // Assert that `Block` hasn't been changed and has no padding
403 405 let _: [u8; 4 * BLOCK_SIZE] =
404 406 std::mem::transmute([Block::new(); 4]);
405 407
406 408 // /!\ Any use of `vec` after this is use-after-free.
407 409 // TODO: use `into_raw_parts` once stabilized
408 410 Vec::from_raw_parts(
409 411 vec.as_ptr() as *mut u8,
410 412 vec.len() * BLOCK_SIZE,
411 413 vec.capacity() * BLOCK_SIZE,
412 414 )
413 415 };
414 416 (readonly, bytes)
415 417 }
416 418
417 419 /// Total number of blocks
418 420 fn len(&self) -> usize {
419 421 self.readonly.len() + self.growable.len() + 1
420 422 }
421 423
422 424 /// Implemented for completeness
423 425 ///
424 426 /// A `NodeTree` always has at least the mutable root block.
425 427 #[allow(dead_code)]
426 428 fn is_empty(&self) -> bool {
427 429 false
428 430 }
429 431
430 432 /// Main working method for `NodeTree` searches
431 433 ///
432 434 /// The first returned value is the result of analysing `NodeTree` data
433 435 /// *alone*: whereas `None` guarantees that the given prefix is absent
434 436 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
435 437 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
436 438 /// that could match the prefix. Actually, all that can be inferred from
437 439 /// the `NodeTree` data is that `rev` is the revision with the longest
438 440 /// common node prefix with the given prefix.
439 441 ///
440 442 /// The second returned value is the size of the smallest subprefix
441 443 /// of `prefix` that would give the same result, i.e. not the
442 444 /// `MultipleResults` error variant (again, using only the data of the
443 445 /// `NodeTree`).
444 446 fn lookup(
445 447 &self,
446 448 prefix: NodePrefixRef,
447 449 ) -> Result<(Option<Revision>, usize), NodeMapError> {
448 450 for (i, visit_item) in self.visit(prefix).enumerate() {
449 451 if let Some(opt) = visit_item.final_revision() {
450 452 return Ok((opt, i + 1));
451 453 }
452 454 }
453 455 Err(NodeMapError::MultipleResults)
454 456 }
455 457
456 458 fn visit<'n, 'p>(
457 459 &'n self,
458 460 prefix: NodePrefixRef<'p>,
459 461 ) -> NodeTreeVisitor<'n, 'p> {
460 462 NodeTreeVisitor {
461 463 nt: self,
462 464 prefix: prefix,
463 465 visit: self.len() - 1,
464 466 nybble_idx: 0,
465 467 done: false,
466 468 }
467 469 }
468 470 /// Return a mutable reference for `Block` at index `idx`.
469 471 ///
470 472 /// If `idx` lies in the immutable area, then the reference is to
471 473 /// a newly appended copy.
472 474 ///
473 475 /// Returns (new_idx, glen, mut_ref) where
474 476 ///
475 477 /// - `new_idx` is the index of the mutable `Block`
476 478 /// - `mut_ref` is a mutable reference to the mutable Block.
477 479 /// - `glen` is the new length of `self.growable`
478 480 ///
479 481 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
480 482 /// itself because of the mutable borrow taken with the returned `Block`
481 483 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
482 484 let ro_blocks = &self.readonly;
483 485 let ro_len = ro_blocks.len();
484 486 let glen = self.growable.len();
485 487 if idx < ro_len {
488 self.masked_inner_blocks += 1;
486 489 // TODO OPTIM I think this makes two copies
487 490 self.growable.push(ro_blocks[idx].clone());
488 491 (glen + ro_len, &mut self.growable[glen], glen + 1)
489 492 } else if glen + ro_len == idx {
490 493 (idx, &mut self.root, glen)
491 494 } else {
492 495 (idx, &mut self.growable[idx - ro_len], glen)
493 496 }
494 497 }
495 498
496 499 /// Main insertion method
497 500 ///
498 501 /// This will dive in the node tree to find the deepest `Block` for
499 502 /// `node`, split it as much as needed and record `node` in there.
500 503 /// The method then backtracks, updating references in all the visited
501 504 /// blocks from the root.
502 505 ///
503 506 /// All the mutated `Block` are copied first to the growable part if
504 507 /// needed. That happens for those in the immutable part except the root.
505 508 pub fn insert<I: RevlogIndex>(
506 509 &mut self,
507 510 index: &I,
508 511 node: &Node,
509 512 rev: Revision,
510 513 ) -> Result<(), NodeMapError> {
511 514 let ro_len = &self.readonly.len();
512 515
513 516 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
514 517 let read_nybbles = visit_steps.len();
515 518 // visit_steps cannot be empty, since we always visit the root block
516 519 let deepest = visit_steps.pop().unwrap();
517 520
518 521 let (mut block_idx, mut block, mut glen) =
519 522 self.mutable_block(deepest.block_idx);
520 523
521 524 if let Element::Rev(old_rev) = deepest.element {
522 525 let old_node = index
523 526 .node(old_rev)
524 527 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
525 528 if old_node == node {
526 529 return Ok(()); // avoid creating lots of useless blocks
527 530 }
528 531
529 532 // Looping over the tail of nybbles in both nodes, creating
530 533 // new blocks until we find the difference
531 534 let mut new_block_idx = ro_len + glen;
532 535 let mut nybble = deepest.nybble;
533 536 for nybble_pos in read_nybbles..node.nybbles_len() {
534 537 block.set(nybble, Element::Block(new_block_idx));
535 538
536 539 let new_nybble = node.get_nybble(nybble_pos);
537 540 let old_nybble = old_node.get_nybble(nybble_pos);
538 541
539 542 if old_nybble == new_nybble {
540 543 self.growable.push(Block::new());
541 544 block = &mut self.growable[glen];
542 545 glen += 1;
543 546 new_block_idx += 1;
544 547 nybble = new_nybble;
545 548 } else {
546 549 let mut new_block = Block::new();
547 550 new_block.set(old_nybble, Element::Rev(old_rev));
548 551 new_block.set(new_nybble, Element::Rev(rev));
549 552 self.growable.push(new_block);
550 553 break;
551 554 }
552 555 }
553 556 } else {
554 557 // Free slot in the deepest block: no splitting has to be done
555 558 block.set(deepest.nybble, Element::Rev(rev));
556 559 }
557 560
558 561 // Backtrack over visit steps to update references
559 562 while let Some(visited) = visit_steps.pop() {
560 563 let to_write = Element::Block(block_idx);
561 564 if visit_steps.is_empty() {
562 565 self.root.set(visited.nybble, to_write);
563 566 break;
564 567 }
565 568 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
566 569 if block.get(visited.nybble) == to_write {
567 570 break;
568 571 }
569 572 block.set(visited.nybble, to_write);
570 573 block_idx = new_idx;
571 574 }
572 575 Ok(())
573 576 }
577
578 /// Return the number of blocks in the readonly part that are currently
579 /// masked in the mutable part.
580 ///
581 /// The `NodeTree` structure has no efficient way to know how many blocks
582 /// are already unreachable in the readonly part.
583 pub fn masked_readonly_blocks(&self) -> usize {
584 if let Some(readonly_root) = self.readonly.last() {
585 if readonly_root == &self.root {
586 return 0;
587 }
588 } else {
589 return 0;
590 }
591 self.masked_inner_blocks + 1
592 }
574 593 }
575 594
576 595 pub struct NodeTreeBytes {
577 596 buffer: Box<dyn Deref<Target = [u8]> + Send>,
578 597 len_in_blocks: usize,
579 598 }
580 599
581 600 impl NodeTreeBytes {
582 601 fn new(
583 602 buffer: Box<dyn Deref<Target = [u8]> + Send>,
584 603 amount: usize,
585 604 ) -> Self {
586 605 assert!(buffer.len() >= amount);
587 606 let len_in_blocks = amount / BLOCK_SIZE;
588 607 NodeTreeBytes {
589 608 buffer,
590 609 len_in_blocks,
591 610 }
592 611 }
593 612 }
594 613
595 614 impl Deref for NodeTreeBytes {
596 615 type Target = [Block];
597 616
598 617 fn deref(&self) -> &[Block] {
599 618 unsafe {
600 619 slice::from_raw_parts(
601 620 (&self.buffer).as_ptr() as *const Block,
602 621 self.len_in_blocks,
603 622 )
604 623 }
605 624 }
606 625 }
607 626
608 627 struct NodeTreeVisitor<'n, 'p> {
609 628 nt: &'n NodeTree,
610 629 prefix: NodePrefixRef<'p>,
611 630 visit: usize,
612 631 nybble_idx: usize,
613 632 done: bool,
614 633 }
615 634
616 635 #[derive(Debug, PartialEq, Clone)]
617 636 struct NodeTreeVisitItem {
618 637 block_idx: usize,
619 638 nybble: u8,
620 639 element: Element,
621 640 }
622 641
623 642 impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
624 643 type Item = NodeTreeVisitItem;
625 644
626 645 fn next(&mut self) -> Option<Self::Item> {
627 646 if self.done || self.nybble_idx >= self.prefix.len() {
628 647 return None;
629 648 }
630 649
631 650 let nybble = self.prefix.get_nybble(self.nybble_idx);
632 651 self.nybble_idx += 1;
633 652
634 653 let visit = self.visit;
635 654 let element = self.nt[visit].get(nybble);
636 655 if let Element::Block(idx) = element {
637 656 self.visit = idx;
638 657 } else {
639 658 self.done = true;
640 659 }
641 660
642 661 Some(NodeTreeVisitItem {
643 662 block_idx: visit,
644 663 nybble: nybble,
645 664 element: element,
646 665 })
647 666 }
648 667 }
649 668
650 669 impl NodeTreeVisitItem {
651 670 // Return `Some(opt)` if this item is final, with `opt` being the
652 671 // `Revision` that it may represent.
653 672 //
654 673 // If the item is not terminal, return `None`
655 674 fn final_revision(&self) -> Option<Option<Revision>> {
656 675 match self.element {
657 676 Element::Block(_) => None,
658 677 Element::Rev(r) => Some(Some(r)),
659 678 Element::None => Some(None),
660 679 }
661 680 }
662 681 }
663 682
664 683 impl From<Vec<Block>> for NodeTree {
665 684 fn from(vec: Vec<Block>) -> Self {
666 685 Self::new(Box::new(vec))
667 686 }
668 687 }
669 688
670 689 impl fmt::Debug for NodeTree {
671 690 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
672 691 let readonly: &[Block] = &*self.readonly;
673 692 write!(
674 693 f,
675 694 "readonly: {:?}, growable: {:?}, root: {:?}",
676 695 readonly, self.growable, self.root
677 696 )
678 697 }
679 698 }
680 699
681 700 impl Default for NodeTree {
682 701 /// Create a fully mutable empty NodeTree
683 702 fn default() -> Self {
684 703 NodeTree::new(Box::new(Vec::new()))
685 704 }
686 705 }
687 706
688 707 impl NodeMap for NodeTree {
689 708 fn find_bin<'a>(
690 709 &self,
691 710 idx: &impl RevlogIndex,
692 711 prefix: NodePrefixRef<'a>,
693 712 ) -> Result<Option<Revision>, NodeMapError> {
694 713 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
695 714 .map(|(opt, _shortest)| opt)
696 715 }
697 716
698 717 fn unique_prefix_len_bin<'a>(
699 718 &self,
700 719 idx: &impl RevlogIndex,
701 720 prefix: NodePrefixRef<'a>,
702 721 ) -> Result<Option<usize>, NodeMapError> {
703 722 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
704 723 .map(|(opt, shortest)| opt.map(|_rev| shortest))
705 724 }
706 725 }
707 726
708 727 #[cfg(test)]
709 728 mod tests {
710 729 use super::NodeMapError::*;
711 730 use super::*;
712 731 use crate::revlog::node::{hex_pad_right, Node};
713 732 use std::collections::HashMap;
714 733
715 734 /// Creates a `Block` using a syntax close to the `Debug` output
716 735 macro_rules! block {
717 736 {$($nybble:tt : $variant:ident($val:tt)),*} => (
718 737 {
719 738 let mut block = Block::new();
720 739 $(block.set($nybble, Element::$variant($val)));*;
721 740 block
722 741 }
723 742 )
724 743 }
725 744
726 745 #[test]
727 746 fn test_block_debug() {
728 747 let mut block = Block::new();
729 748 block.set(1, Element::Rev(3));
730 749 block.set(10, Element::Block(0));
731 750 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
732 751 }
733 752
734 753 #[test]
735 754 fn test_block_macro() {
736 755 let block = block! {5: Block(2)};
737 756 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
738 757
739 758 let block = block! {13: Rev(15), 5: Block(2)};
740 759 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
741 760 }
742 761
743 762 #[test]
744 763 fn test_raw_block() {
745 764 let mut raw = [255u8; 64];
746 765
747 766 let mut counter = 0;
748 767 for val in [0, 15, -2, -1, -3].iter() {
749 768 for byte in RawElement::to_be_bytes(*val).iter() {
750 769 raw[counter] = *byte;
751 770 counter += 1;
752 771 }
753 772 }
754 773 let block = Block(raw);
755 774 assert_eq!(block.get(0), Element::Block(0));
756 775 assert_eq!(block.get(1), Element::Block(15));
757 776 assert_eq!(block.get(3), Element::None);
758 777 assert_eq!(block.get(2), Element::Rev(0));
759 778 assert_eq!(block.get(4), Element::Rev(1));
760 779 }
761 780
762 781 type TestIndex = HashMap<Revision, Node>;
763 782
764 783 impl RevlogIndex for TestIndex {
765 784 fn node(&self, rev: Revision) -> Option<&Node> {
766 785 self.get(&rev)
767 786 }
768 787
769 788 fn len(&self) -> usize {
770 789 self.len()
771 790 }
772 791 }
773 792
774 793 /// Pad hexadecimal Node prefix with zeros on the right
775 794 ///
776 795 /// This avoids having to repeatedly write very long hexadecimal
777 796 /// strings for test data, and brings actual hash size independency.
778 797 #[cfg(test)]
779 798 fn pad_node(hex: &str) -> Node {
780 799 Node::from_hex(&hex_pad_right(hex)).unwrap()
781 800 }
782 801
783 802 /// Pad hexadecimal Node prefix with zeros on the right, then insert
784 803 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
785 804 idx.insert(rev, pad_node(hex));
786 805 }
787 806
788 807 fn sample_nodetree() -> NodeTree {
789 808 NodeTree::from(vec![
790 809 block![0: Rev(9)],
791 810 block![0: Rev(0), 1: Rev(9)],
792 811 block![0: Block(1), 1:Rev(1)],
793 812 ])
794 813 }
795 814
796 815 #[test]
797 816 fn test_nt_debug() {
798 817 let nt = sample_nodetree();
799 818 assert_eq!(
800 819 format!("{:?}", nt),
801 820 "readonly: \
802 821 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
803 822 growable: [], \
804 823 root: {0: Block(1), 1: Rev(1)}",
805 824 );
806 825 }
807 826
808 827 #[test]
809 828 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
810 829 let mut idx: TestIndex = HashMap::new();
811 830 pad_insert(&mut idx, 1, "1234deadcafe");
812 831
813 832 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
814 833 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
815 834 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
816 835 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
817 836 assert_eq!(nt.find_hex(&idx, "1a")?, None);
818 837 assert_eq!(nt.find_hex(&idx, "ab")?, None);
819 838
820 839 // and with full binary Nodes
821 840 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
822 841 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
823 842 assert_eq!(nt.find_node(&idx, &unknown)?, None);
824 843 Ok(())
825 844 }
826 845
827 846 #[test]
828 847 fn test_immutable_find_one_jump() {
829 848 let mut idx = TestIndex::new();
830 849 pad_insert(&mut idx, 9, "012");
831 850 pad_insert(&mut idx, 0, "00a");
832 851
833 852 let nt = sample_nodetree();
834 853
835 854 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
836 855 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
837 856 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
838 857 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
839 858 assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
840 859 assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
841 860 }
842 861
843 862 #[test]
844 863 fn test_mutated_find() -> Result<(), NodeMapError> {
845 864 let mut idx = TestIndex::new();
846 865 pad_insert(&mut idx, 9, "012");
847 866 pad_insert(&mut idx, 0, "00a");
848 867 pad_insert(&mut idx, 2, "cafe");
849 868 pad_insert(&mut idx, 3, "15");
850 869 pad_insert(&mut idx, 1, "10");
851 870
852 871 let nt = NodeTree {
853 872 readonly: sample_nodetree().readonly,
854 873 growable: vec![block![0: Rev(1), 5: Rev(3)]],
855 874 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
875 masked_inner_blocks: 1,
856 876 };
857 877 assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
858 878 assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
859 879 assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
860 880 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
861 881 assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
862 882 assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
863 883 assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
884 assert_eq!(nt.masked_readonly_blocks(), 2);
864 885 Ok(())
865 886 }
866 887
867 888 struct TestNtIndex {
868 889 index: TestIndex,
869 890 nt: NodeTree,
870 891 }
871 892
872 893 impl TestNtIndex {
873 894 fn new() -> Self {
874 895 TestNtIndex {
875 896 index: HashMap::new(),
876 897 nt: NodeTree::default(),
877 898 }
878 899 }
879 900
880 901 fn insert(
881 902 &mut self,
882 903 rev: Revision,
883 904 hex: &str,
884 905 ) -> Result<(), NodeMapError> {
885 906 let node = pad_node(hex);
886 907 self.index.insert(rev, node.clone());
887 908 self.nt.insert(&self.index, &node, rev)?;
888 909 Ok(())
889 910 }
890 911
891 912 fn find_hex(
892 913 &self,
893 914 prefix: &str,
894 915 ) -> Result<Option<Revision>, NodeMapError> {
895 916 self.nt.find_hex(&self.index, prefix)
896 917 }
897 918
898 919 fn unique_prefix_len_hex(
899 920 &self,
900 921 prefix: &str,
901 922 ) -> Result<Option<usize>, NodeMapError> {
902 923 self.nt.unique_prefix_len_hex(&self.index, prefix)
903 924 }
904 925
905 926 /// Drain `added` and restart a new one
906 927 fn commit(self) -> Self {
907 928 let mut as_vec: Vec<Block> =
908 929 self.nt.readonly.iter().map(|block| block.clone()).collect();
909 930 as_vec.extend(self.nt.growable);
910 931 as_vec.push(self.nt.root);
911 932
912 933 Self {
913 934 index: self.index,
914 935 nt: NodeTree::from(as_vec).into(),
915 936 }
916 937 }
917 938 }
918 939
919 940 #[test]
920 941 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
921 942 let mut idx = TestNtIndex::new();
922 943 idx.insert(0, "1234")?;
923 944 assert_eq!(idx.find_hex("1")?, Some(0));
924 945 assert_eq!(idx.find_hex("12")?, Some(0));
925 946
926 947 // let's trigger a simple split
927 948 idx.insert(1, "1a34")?;
928 949 assert_eq!(idx.nt.growable.len(), 1);
929 950 assert_eq!(idx.find_hex("12")?, Some(0));
930 951 assert_eq!(idx.find_hex("1a")?, Some(1));
931 952
932 953 // reinserting is a no_op
933 954 idx.insert(1, "1a34")?;
934 955 assert_eq!(idx.nt.growable.len(), 1);
935 956 assert_eq!(idx.find_hex("12")?, Some(0));
936 957 assert_eq!(idx.find_hex("1a")?, Some(1));
937 958
938 959 idx.insert(2, "1a01")?;
939 960 assert_eq!(idx.nt.growable.len(), 2);
940 961 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
941 962 assert_eq!(idx.find_hex("12")?, Some(0));
942 963 assert_eq!(idx.find_hex("1a3")?, Some(1));
943 964 assert_eq!(idx.find_hex("1a0")?, Some(2));
944 965 assert_eq!(idx.find_hex("1a12")?, None);
945 966
946 967 // now let's make it split and create more than one additional block
947 968 idx.insert(3, "1a345")?;
948 969 assert_eq!(idx.nt.growable.len(), 4);
949 970 assert_eq!(idx.find_hex("1a340")?, Some(1));
950 971 assert_eq!(idx.find_hex("1a345")?, Some(3));
951 972 assert_eq!(idx.find_hex("1a341")?, None);
952 973
974 // there's no readonly block to mask
975 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
953 976 Ok(())
954 977 }
955 978
956 979 #[test]
957 980 fn test_unique_prefix_len_zero_prefix() {
958 981 let mut idx = TestNtIndex::new();
959 982 idx.insert(0, "00000abcd").unwrap();
960 983
961 984 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
962 985 // in the nodetree proper, this will be found at the first nybble
963 986 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
964 987 // but the first difference with `NULL_NODE`
965 988 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
966 989 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
967 990
968 991 // same with odd result
969 992 idx.insert(1, "00123").unwrap();
970 993 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
971 994 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
972 995
973 996 // these are unchanged of course
974 997 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
975 998 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
976 999 }
977 1000
978 1001 #[test]
979 1002 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
980 1003 // check that the splitting loop is long enough
981 1004 let mut nt_idx = TestNtIndex::new();
982 1005 let nt = &mut nt_idx.nt;
983 1006 let idx = &mut nt_idx.index;
984 1007
985 1008 let node0_hex = hex_pad_right("444444");
986 1009 let mut node1_hex = hex_pad_right("444444").clone();
987 1010 node1_hex.pop();
988 1011 node1_hex.push('5');
989 1012 let node0 = Node::from_hex(&node0_hex).unwrap();
990 1013 let node1 = Node::from_hex(&node1_hex).unwrap();
991 1014
992 1015 idx.insert(0, node0.clone());
993 1016 nt.insert(idx, &node0, 0)?;
994 1017 idx.insert(1, node1.clone());
995 1018 nt.insert(idx, &node1, 1)?;
996 1019
997 1020 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
998 1021 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
999 1022 Ok(())
1000 1023 }
1001 1024
1002 1025 #[test]
1003 1026 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1004 1027 let mut idx = TestNtIndex::new();
1005 1028 idx.insert(0, "1234")?;
1006 1029 idx.insert(1, "1235")?;
1007 1030 idx.insert(2, "131")?;
1008 1031 idx.insert(3, "cafe")?;
1009 1032 let mut idx = idx.commit();
1010 1033 assert_eq!(idx.find_hex("1234")?, Some(0));
1011 1034 assert_eq!(idx.find_hex("1235")?, Some(1));
1012 1035 assert_eq!(idx.find_hex("131")?, Some(2));
1013 1036 assert_eq!(idx.find_hex("cafe")?, Some(3));
1037 // we did not add anything since init from readonly
1038 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1014 1039
1015 1040 idx.insert(4, "123A")?;
1016 1041 assert_eq!(idx.find_hex("1234")?, Some(0));
1017 1042 assert_eq!(idx.find_hex("1235")?, Some(1));
1018 1043 assert_eq!(idx.find_hex("131")?, Some(2));
1019 1044 assert_eq!(idx.find_hex("cafe")?, Some(3));
1020 1045 assert_eq!(idx.find_hex("123A")?, Some(4));
1046 // we masked blocks for all prefixes of "123", including the root
1047 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1021 1048
1049 eprintln!("{:?}", idx.nt);
1022 1050 idx.insert(5, "c0")?;
1023 1051 assert_eq!(idx.find_hex("cafe")?, Some(3));
1024 1052 assert_eq!(idx.find_hex("c0")?, Some(5));
1025 1053 assert_eq!(idx.find_hex("c1")?, None);
1026 1054 assert_eq!(idx.find_hex("1234")?, Some(0));
1055 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1056 // it doesn't mask anything
1057 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1027 1058
1028 1059 Ok(())
1029 1060 }
1030 1061
1031 1062 #[test]
1032 1063 fn test_into_added_empty() {
1033 1064 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1034 1065 assert!(sample_nodetree()
1035 1066 .into_readonly_and_added_bytes()
1036 1067 .1
1037 1068 .is_empty());
1038 1069 }
1039 1070
1040 1071 #[test]
1041 1072 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1042 1073 let mut idx = TestNtIndex::new();
1043 1074 idx.insert(0, "1234")?;
1044 1075 let mut idx = idx.commit();
1045 1076 idx.insert(4, "cafe")?;
1046 1077 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1047 1078
1048 1079 // only the root block has been changed
1049 1080 assert_eq!(bytes.len(), BLOCK_SIZE);
1050 1081 // big endian for -2
1051 1082 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1052 1083 // big endian for -6
1053 1084 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1054 1085 Ok(())
1055 1086 }
1056 1087 }
General Comments 0
You need to be logged in to leave comments. Login now