##// END OF EJS Templates
rust-nodemap: a method for full invalidation...
Georges Racinet -
r44874:d5189943 default
parent child Browse files
Show More
@@ -1,1087 +1,1122 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 16 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
17 17 RevlogIndex, NULL_REVISION,
18 18 };
19 19
20 20 use std::cmp::max;
21 21 use std::fmt;
22 22 use std::mem;
23 23 use std::ops::Deref;
24 24 use std::ops::Index;
25 25 use std::slice;
26 26
27 27 #[derive(Debug, PartialEq)]
28 28 pub enum NodeMapError {
29 29 MultipleResults,
30 30 InvalidNodePrefix(NodeError),
31 31 /// A `Revision` stored in the nodemap could not be found in the index
32 32 RevisionNotInIndex(Revision),
33 33 }
34 34
35 35 impl From<NodeError> for NodeMapError {
36 36 fn from(err: NodeError) -> Self {
37 37 NodeMapError::InvalidNodePrefix(err)
38 38 }
39 39 }
40 40
41 41 /// Mapping system from Mercurial nodes to revision numbers.
42 42 ///
43 43 /// ## `RevlogIndex` and `NodeMap`
44 44 ///
45 45 /// One way to think about their relationship is that
46 46 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
47 47 /// carried by a [`RevlogIndex`].
48 48 ///
49 49 /// Many of the methods in this trait take a `RevlogIndex` argument
50 50 /// which is used for validation of their results. This index must naturally
51 51 /// be the one the `NodeMap` is about, and it must be consistent.
52 52 ///
53 53 /// Notably, the `NodeMap` must not store
54 54 /// information about more `Revision` values than there are in the index.
55 55 /// In these methods, an encountered `Revision` is not in the index, a
56 56 /// [`RevisionNotInIndex`] error is returned.
57 57 ///
58 58 /// In insert operations, the rule is thus that the `NodeMap` must always
59 59 /// be updated after the `RevlogIndex`
60 60 /// be updated first, and the `NodeMap` second.
61 61 ///
62 62 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
63 63 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
64 64 pub trait NodeMap {
65 65 /// Find the unique `Revision` having the given `Node`
66 66 ///
67 67 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
68 68 fn find_node(
69 69 &self,
70 70 index: &impl RevlogIndex,
71 71 node: &Node,
72 72 ) -> Result<Option<Revision>, NodeMapError> {
73 73 self.find_bin(index, node.into())
74 74 }
75 75
76 76 /// Find the unique Revision whose `Node` starts with a given binary prefix
77 77 ///
78 78 /// If no Revision matches the given prefix, `Ok(None)` is returned.
79 79 ///
80 80 /// If several Revisions match the given prefix, a [`MultipleResults`]
81 81 /// error is returned.
82 82 fn find_bin<'a>(
83 83 &self,
84 84 idx: &impl RevlogIndex,
85 85 prefix: NodePrefixRef<'a>,
86 86 ) -> Result<Option<Revision>, NodeMapError>;
87 87
88 88 /// Find the unique Revision whose `Node` hexadecimal string representation
89 89 /// starts with a given prefix
90 90 ///
91 91 /// If no Revision matches the given prefix, `Ok(None)` is returned.
92 92 ///
93 93 /// If several Revisions match the given prefix, a [`MultipleResults`]
94 94 /// error is returned.
95 95 fn find_hex(
96 96 &self,
97 97 idx: &impl RevlogIndex,
98 98 prefix: &str,
99 99 ) -> Result<Option<Revision>, NodeMapError> {
100 100 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
101 101 }
102 102
103 103 /// Give the size of the shortest node prefix that determines
104 104 /// the revision uniquely.
105 105 ///
106 106 /// From a binary node prefix, if it is matched in the node map, this
107 107 /// returns the number of hexadecimal digits that would had sufficed
108 108 /// to find the revision uniquely.
109 109 ///
110 110 /// Returns `None` if no `Revision` could be found for the prefix.
111 111 ///
112 112 /// If several Revisions match the given prefix, a [`MultipleResults`]
113 113 /// error is returned.
114 114 fn unique_prefix_len_bin<'a>(
115 115 &self,
116 116 idx: &impl RevlogIndex,
117 117 node_prefix: NodePrefixRef<'a>,
118 118 ) -> Result<Option<usize>, NodeMapError>;
119 119
120 120 /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
121 121 /// of the prefix as input.
122 122 fn unique_prefix_len_hex(
123 123 &self,
124 124 idx: &impl RevlogIndex,
125 125 prefix: &str,
126 126 ) -> Result<Option<usize>, NodeMapError> {
127 127 self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
128 128 }
129 129
130 130 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
131 131 fn unique_prefix_len_node(
132 132 &self,
133 133 idx: &impl RevlogIndex,
134 134 node: &Node,
135 135 ) -> Result<Option<usize>, NodeMapError> {
136 136 self.unique_prefix_len_bin(idx, node.into())
137 137 }
138 138 }
139 139
140 140 pub trait MutableNodeMap: NodeMap {
141 141 fn insert<I: RevlogIndex>(
142 142 &mut self,
143 143 index: &I,
144 144 node: &Node,
145 145 rev: Revision,
146 146 ) -> Result<(), NodeMapError>;
147 147 }
148 148
149 149 /// Low level NodeTree [`Blocks`] elements
150 150 ///
151 151 /// These are exactly as for instance on persistent storage.
152 152 type RawElement = i32;
153 153
154 154 /// High level representation of values in NodeTree
155 155 /// [`Blocks`](struct.Block.html)
156 156 ///
157 157 /// This is the high level representation that most algorithms should
158 158 /// use.
159 159 #[derive(Clone, Debug, Eq, PartialEq)]
160 160 enum Element {
161 161 Rev(Revision),
162 162 Block(usize),
163 163 None,
164 164 }
165 165
166 166 impl From<RawElement> for Element {
167 167 /// Conversion from low level representation, after endianness conversion.
168 168 ///
169 169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
170 170 fn from(raw: RawElement) -> Element {
171 171 if raw >= 0 {
172 172 Element::Block(raw as usize)
173 173 } else if raw == -1 {
174 174 Element::None
175 175 } else {
176 176 Element::Rev(-raw - 2)
177 177 }
178 178 }
179 179 }
180 180
181 181 impl From<Element> for RawElement {
182 182 fn from(element: Element) -> RawElement {
183 183 match element {
184 184 Element::None => 0,
185 185 Element::Block(i) => i as RawElement,
186 186 Element::Rev(rev) => -rev - 2,
187 187 }
188 188 }
189 189 }
190 190
191 191 /// A logical block of the `NodeTree`, packed with a fixed size.
192 192 ///
193 193 /// These are always used in container types implementing `Index<Block>`,
194 194 /// such as `&Block`
195 195 ///
196 196 /// As an array of integers, its ith element encodes that the
197 197 /// ith potential edge from the block, representing the ith hexadecimal digit
198 198 /// (nybble) `i` is either:
199 199 ///
200 200 /// - absent (value -1)
201 201 /// - another `Block` in the same indexable container (value β‰₯ 0)
202 202 /// - a `Revision` leaf (value ≀ -2)
203 203 ///
204 204 /// Endianness has to be fixed for consistency on shared storage across
205 205 /// different architectures.
206 206 ///
207 207 /// A key difference with the C `nodetree` is that we need to be
208 208 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
209 209 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
210 210 ///
211 211 /// Another related difference is that `NULL_REVISION` (-1) is not
212 212 /// represented at all, because we want an immutable empty nodetree
213 213 /// to be valid.
214 214
215 215 #[derive(Copy, Clone)]
216 216 pub struct Block([u8; BLOCK_SIZE]);
217 217
218 218 /// Not derivable for arrays of length >32 until const generics are stable
219 219 impl PartialEq for Block {
220 220 fn eq(&self, other: &Self) -> bool {
221 221 &self.0[..] == &other.0[..]
222 222 }
223 223 }
224 224
225 225 pub const BLOCK_SIZE: usize = 64;
226 226
227 227 impl Block {
228 228 fn new() -> Self {
229 229 // -1 in 2's complement to create an absent node
230 230 let byte: u8 = 255;
231 231 Block([byte; BLOCK_SIZE])
232 232 }
233 233
234 234 fn get(&self, nybble: u8) -> Element {
235 235 let index = nybble as usize * mem::size_of::<RawElement>();
236 236 Element::from(RawElement::from_be_bytes([
237 237 self.0[index],
238 238 self.0[index + 1],
239 239 self.0[index + 2],
240 240 self.0[index + 3],
241 241 ]))
242 242 }
243 243
244 244 fn set(&mut self, nybble: u8, element: Element) {
245 245 let values = RawElement::to_be_bytes(element.into());
246 246 let index = nybble as usize * mem::size_of::<RawElement>();
247 247 self.0[index] = values[0];
248 248 self.0[index + 1] = values[1];
249 249 self.0[index + 2] = values[2];
250 250 self.0[index + 3] = values[3];
251 251 }
252 252 }
253 253
254 254 impl fmt::Debug for Block {
255 255 /// sparse representation for testing and debugging purposes
256 256 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
257 257 f.debug_map()
258 258 .entries((0..16).filter_map(|i| match self.get(i) {
259 259 Element::None => None,
260 260 element => Some((i, element)),
261 261 }))
262 262 .finish()
263 263 }
264 264 }
265 265
266 266 /// A mutable 16-radix tree with the root block logically at the end
267 267 ///
268 268 /// Because of the append only nature of our node trees, we need to
269 269 /// keep the original untouched and store new blocks separately.
270 270 ///
271 271 /// The mutable root `Block` is kept apart so that we don't have to rebump
272 272 /// it on each insertion.
273 273 pub struct NodeTree {
274 274 readonly: Box<dyn Deref<Target = [Block]> + Send>,
275 275 growable: Vec<Block>,
276 276 root: Block,
277 277 masked_inner_blocks: usize,
278 278 }
279 279
280 280 impl Index<usize> for NodeTree {
281 281 type Output = Block;
282 282
283 283 fn index(&self, i: usize) -> &Block {
284 284 let ro_len = self.readonly.len();
285 285 if i < ro_len {
286 286 &self.readonly[i]
287 287 } else if i == ro_len + self.growable.len() {
288 288 &self.root
289 289 } else {
290 290 &self.growable[i - ro_len]
291 291 }
292 292 }
293 293 }
294 294
295 295 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
296 296 fn has_prefix_or_none(
297 297 idx: &impl RevlogIndex,
298 298 prefix: NodePrefixRef,
299 299 rev: Revision,
300 300 ) -> Result<Option<Revision>, NodeMapError> {
301 301 idx.node(rev)
302 302 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
303 303 .map(|node| {
304 304 if prefix.is_prefix_of(node) {
305 305 Some(rev)
306 306 } else {
307 307 None
308 308 }
309 309 })
310 310 }
311 311
312 312 /// validate that the candidate's node starts indeed with given prefix,
313 313 /// and treat ambiguities related to `NULL_REVISION`.
314 314 ///
315 315 /// From the data in the NodeTree, one can only conclude that some
316 316 /// revision is the only one for a *subprefix* of the one being looked up.
317 317 fn validate_candidate(
318 318 idx: &impl RevlogIndex,
319 319 prefix: NodePrefixRef,
320 320 candidate: (Option<Revision>, usize),
321 321 ) -> Result<(Option<Revision>, usize), NodeMapError> {
322 322 let (rev, steps) = candidate;
323 323 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
324 324 rev.map_or(Ok((None, steps)), |r| {
325 325 has_prefix_or_none(idx, prefix, r)
326 326 .map(|opt| (opt, max(steps, nz_nybble + 1)))
327 327 })
328 328 } else {
329 329 // the prefix is only made of zeros; NULL_REVISION always matches it
330 330 // and any other *valid* result is an ambiguity
331 331 match rev {
332 332 None => Ok((Some(NULL_REVISION), steps + 1)),
333 333 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
334 334 None => Ok((Some(NULL_REVISION), steps + 1)),
335 335 _ => Err(NodeMapError::MultipleResults),
336 336 },
337 337 }
338 338 }
339 339 }
340 340
341 341 impl NodeTree {
342 342 /// Initiate a NodeTree from an immutable slice-like of `Block`
343 343 ///
344 344 /// We keep `readonly` and clone its root block if it isn't empty.
345 345 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
346 346 let root = readonly
347 347 .last()
348 348 .map(|b| b.clone())
349 349 .unwrap_or_else(|| Block::new());
350 350 NodeTree {
351 351 readonly: readonly,
352 352 growable: Vec::new(),
353 353 root: root,
354 354 masked_inner_blocks: 0,
355 355 }
356 356 }
357 357
358 358 /// Create from an opaque bunch of bytes
359 359 ///
360 360 /// The created `NodeTreeBytes` from `buffer`,
361 361 /// of which exactly `amount` bytes are used.
362 362 ///
363 363 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
364 364 /// - `offset` allows for the final file format to include fixed data
365 365 /// (generation number, behavioural flags)
366 366 /// - `amount` is expressed in bytes, and is not automatically derived from
367 367 /// `bytes`, so that a caller that manages them atomically can perform
368 368 /// temporary disk serializations and still rollback easily if needed.
369 369 /// First use-case for this would be to support Mercurial shell hooks.
370 370 ///
371 371 /// panics if `buffer` is smaller than `amount`
372 372 pub fn load_bytes(
373 373 bytes: Box<dyn Deref<Target = [u8]> + Send>,
374 374 amount: usize,
375 375 ) -> Self {
376 376 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
377 377 }
378 378
379 379 /// Retrieve added `Block` and the original immutable data
380 380 pub fn into_readonly_and_added(
381 381 self,
382 382 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
383 383 let mut vec = self.growable;
384 384 let readonly = self.readonly;
385 385 if readonly.last() != Some(&self.root) {
386 386 vec.push(self.root);
387 387 }
388 388 (readonly, vec)
389 389 }
390 390
391 391 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
392 392 /// storage
393 393 pub fn into_readonly_and_added_bytes(
394 394 self,
395 395 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
396 396 let (readonly, vec) = self.into_readonly_and_added();
397 397 // Prevent running `v`'s destructor so we are in complete control
398 398 // of the allocation.
399 399 let vec = mem::ManuallyDrop::new(vec);
400 400
401 401 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
402 402 // bytes, so this is perfectly safe.
403 403 let bytes = unsafe {
404 404 // Assert that `Block` hasn't been changed and has no padding
405 405 let _: [u8; 4 * BLOCK_SIZE] =
406 406 std::mem::transmute([Block::new(); 4]);
407 407
408 408 // /!\ Any use of `vec` after this is use-after-free.
409 409 // TODO: use `into_raw_parts` once stabilized
410 410 Vec::from_raw_parts(
411 411 vec.as_ptr() as *mut u8,
412 412 vec.len() * BLOCK_SIZE,
413 413 vec.capacity() * BLOCK_SIZE,
414 414 )
415 415 };
416 416 (readonly, bytes)
417 417 }
418 418
419 419 /// Total number of blocks
420 420 fn len(&self) -> usize {
421 421 self.readonly.len() + self.growable.len() + 1
422 422 }
423 423
424 424 /// Implemented for completeness
425 425 ///
426 426 /// A `NodeTree` always has at least the mutable root block.
427 427 #[allow(dead_code)]
428 428 fn is_empty(&self) -> bool {
429 429 false
430 430 }
431 431
432 432 /// Main working method for `NodeTree` searches
433 433 ///
434 434 /// The first returned value is the result of analysing `NodeTree` data
435 435 /// *alone*: whereas `None` guarantees that the given prefix is absent
436 436 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
437 437 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
438 438 /// that could match the prefix. Actually, all that can be inferred from
439 439 /// the `NodeTree` data is that `rev` is the revision with the longest
440 440 /// common node prefix with the given prefix.
441 441 ///
442 442 /// The second returned value is the size of the smallest subprefix
443 443 /// of `prefix` that would give the same result, i.e. not the
444 444 /// `MultipleResults` error variant (again, using only the data of the
445 445 /// `NodeTree`).
446 446 fn lookup(
447 447 &self,
448 448 prefix: NodePrefixRef,
449 449 ) -> Result<(Option<Revision>, usize), NodeMapError> {
450 450 for (i, visit_item) in self.visit(prefix).enumerate() {
451 451 if let Some(opt) = visit_item.final_revision() {
452 452 return Ok((opt, i + 1));
453 453 }
454 454 }
455 455 Err(NodeMapError::MultipleResults)
456 456 }
457 457
458 458 fn visit<'n, 'p>(
459 459 &'n self,
460 460 prefix: NodePrefixRef<'p>,
461 461 ) -> NodeTreeVisitor<'n, 'p> {
462 462 NodeTreeVisitor {
463 463 nt: self,
464 464 prefix: prefix,
465 465 visit: self.len() - 1,
466 466 nybble_idx: 0,
467 467 done: false,
468 468 }
469 469 }
470 470 /// Return a mutable reference for `Block` at index `idx`.
471 471 ///
472 472 /// If `idx` lies in the immutable area, then the reference is to
473 473 /// a newly appended copy.
474 474 ///
475 475 /// Returns (new_idx, glen, mut_ref) where
476 476 ///
477 477 /// - `new_idx` is the index of the mutable `Block`
478 478 /// - `mut_ref` is a mutable reference to the mutable Block.
479 479 /// - `glen` is the new length of `self.growable`
480 480 ///
481 481 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
482 482 /// itself because of the mutable borrow taken with the returned `Block`
483 483 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
484 484 let ro_blocks = &self.readonly;
485 485 let ro_len = ro_blocks.len();
486 486 let glen = self.growable.len();
487 487 if idx < ro_len {
488 488 self.masked_inner_blocks += 1;
489 489 // TODO OPTIM I think this makes two copies
490 490 self.growable.push(ro_blocks[idx].clone());
491 491 (glen + ro_len, &mut self.growable[glen], glen + 1)
492 492 } else if glen + ro_len == idx {
493 493 (idx, &mut self.root, glen)
494 494 } else {
495 495 (idx, &mut self.growable[idx - ro_len], glen)
496 496 }
497 497 }
498 498
499 499 /// Main insertion method
500 500 ///
501 501 /// This will dive in the node tree to find the deepest `Block` for
502 502 /// `node`, split it as much as needed and record `node` in there.
503 503 /// The method then backtracks, updating references in all the visited
504 504 /// blocks from the root.
505 505 ///
506 506 /// All the mutated `Block` are copied first to the growable part if
507 507 /// needed. That happens for those in the immutable part except the root.
508 508 pub fn insert<I: RevlogIndex>(
509 509 &mut self,
510 510 index: &I,
511 511 node: &Node,
512 512 rev: Revision,
513 513 ) -> Result<(), NodeMapError> {
514 514 let ro_len = &self.readonly.len();
515 515
516 516 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
517 517 let read_nybbles = visit_steps.len();
518 518 // visit_steps cannot be empty, since we always visit the root block
519 519 let deepest = visit_steps.pop().unwrap();
520 520
521 521 let (mut block_idx, mut block, mut glen) =
522 522 self.mutable_block(deepest.block_idx);
523 523
524 524 if let Element::Rev(old_rev) = deepest.element {
525 525 let old_node = index
526 526 .node(old_rev)
527 527 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
528 528 if old_node == node {
529 529 return Ok(()); // avoid creating lots of useless blocks
530 530 }
531 531
532 532 // Looping over the tail of nybbles in both nodes, creating
533 533 // new blocks until we find the difference
534 534 let mut new_block_idx = ro_len + glen;
535 535 let mut nybble = deepest.nybble;
536 536 for nybble_pos in read_nybbles..node.nybbles_len() {
537 537 block.set(nybble, Element::Block(new_block_idx));
538 538
539 539 let new_nybble = node.get_nybble(nybble_pos);
540 540 let old_nybble = old_node.get_nybble(nybble_pos);
541 541
542 542 if old_nybble == new_nybble {
543 543 self.growable.push(Block::new());
544 544 block = &mut self.growable[glen];
545 545 glen += 1;
546 546 new_block_idx += 1;
547 547 nybble = new_nybble;
548 548 } else {
549 549 let mut new_block = Block::new();
550 550 new_block.set(old_nybble, Element::Rev(old_rev));
551 551 new_block.set(new_nybble, Element::Rev(rev));
552 552 self.growable.push(new_block);
553 553 break;
554 554 }
555 555 }
556 556 } else {
557 557 // Free slot in the deepest block: no splitting has to be done
558 558 block.set(deepest.nybble, Element::Rev(rev));
559 559 }
560 560
561 561 // Backtrack over visit steps to update references
562 562 while let Some(visited) = visit_steps.pop() {
563 563 let to_write = Element::Block(block_idx);
564 564 if visit_steps.is_empty() {
565 565 self.root.set(visited.nybble, to_write);
566 566 break;
567 567 }
568 568 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
569 569 if block.get(visited.nybble) == to_write {
570 570 break;
571 571 }
572 572 block.set(visited.nybble, to_write);
573 573 block_idx = new_idx;
574 574 }
575 575 Ok(())
576 576 }
577 577
578 /// Make the whole `NodeTree` logically empty, without touching the
579 /// immutable part.
580 pub fn invalidate_all(&mut self) {
581 self.root = Block::new();
582 self.growable = Vec::new();
583 self.masked_inner_blocks = self.readonly.len();
584 }
585
578 586 /// Return the number of blocks in the readonly part that are currently
579 587 /// masked in the mutable part.
580 588 ///
581 589 /// The `NodeTree` structure has no efficient way to know how many blocks
582 590 /// are already unreachable in the readonly part.
591 ///
592 /// After a call to `invalidate_all()`, the returned number can be actually
593 /// bigger than the whole readonly part, a conventional way to mean that
594 /// all the readonly blocks have been masked. This is what is really
595 /// useful to the caller and does not require to know how many were
596 /// actually unreachable to begin with.
583 597 pub fn masked_readonly_blocks(&self) -> usize {
584 598 if let Some(readonly_root) = self.readonly.last() {
585 599 if readonly_root == &self.root {
586 600 return 0;
587 601 }
588 602 } else {
589 603 return 0;
590 604 }
591 605 self.masked_inner_blocks + 1
592 606 }
593 607 }
594 608
595 609 pub struct NodeTreeBytes {
596 610 buffer: Box<dyn Deref<Target = [u8]> + Send>,
597 611 len_in_blocks: usize,
598 612 }
599 613
600 614 impl NodeTreeBytes {
601 615 fn new(
602 616 buffer: Box<dyn Deref<Target = [u8]> + Send>,
603 617 amount: usize,
604 618 ) -> Self {
605 619 assert!(buffer.len() >= amount);
606 620 let len_in_blocks = amount / BLOCK_SIZE;
607 621 NodeTreeBytes {
608 622 buffer,
609 623 len_in_blocks,
610 624 }
611 625 }
612 626 }
613 627
614 628 impl Deref for NodeTreeBytes {
615 629 type Target = [Block];
616 630
617 631 fn deref(&self) -> &[Block] {
618 632 unsafe {
619 633 slice::from_raw_parts(
620 634 (&self.buffer).as_ptr() as *const Block,
621 635 self.len_in_blocks,
622 636 )
623 637 }
624 638 }
625 639 }
626 640
627 641 struct NodeTreeVisitor<'n, 'p> {
628 642 nt: &'n NodeTree,
629 643 prefix: NodePrefixRef<'p>,
630 644 visit: usize,
631 645 nybble_idx: usize,
632 646 done: bool,
633 647 }
634 648
635 649 #[derive(Debug, PartialEq, Clone)]
636 650 struct NodeTreeVisitItem {
637 651 block_idx: usize,
638 652 nybble: u8,
639 653 element: Element,
640 654 }
641 655
642 656 impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
643 657 type Item = NodeTreeVisitItem;
644 658
645 659 fn next(&mut self) -> Option<Self::Item> {
646 660 if self.done || self.nybble_idx >= self.prefix.len() {
647 661 return None;
648 662 }
649 663
650 664 let nybble = self.prefix.get_nybble(self.nybble_idx);
651 665 self.nybble_idx += 1;
652 666
653 667 let visit = self.visit;
654 668 let element = self.nt[visit].get(nybble);
655 669 if let Element::Block(idx) = element {
656 670 self.visit = idx;
657 671 } else {
658 672 self.done = true;
659 673 }
660 674
661 675 Some(NodeTreeVisitItem {
662 676 block_idx: visit,
663 677 nybble: nybble,
664 678 element: element,
665 679 })
666 680 }
667 681 }
668 682
669 683 impl NodeTreeVisitItem {
670 684 // Return `Some(opt)` if this item is final, with `opt` being the
671 685 // `Revision` that it may represent.
672 686 //
673 687 // If the item is not terminal, return `None`
674 688 fn final_revision(&self) -> Option<Option<Revision>> {
675 689 match self.element {
676 690 Element::Block(_) => None,
677 691 Element::Rev(r) => Some(Some(r)),
678 692 Element::None => Some(None),
679 693 }
680 694 }
681 695 }
682 696
683 697 impl From<Vec<Block>> for NodeTree {
684 698 fn from(vec: Vec<Block>) -> Self {
685 699 Self::new(Box::new(vec))
686 700 }
687 701 }
688 702
689 703 impl fmt::Debug for NodeTree {
690 704 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
691 705 let readonly: &[Block] = &*self.readonly;
692 706 write!(
693 707 f,
694 708 "readonly: {:?}, growable: {:?}, root: {:?}",
695 709 readonly, self.growable, self.root
696 710 )
697 711 }
698 712 }
699 713
700 714 impl Default for NodeTree {
701 715 /// Create a fully mutable empty NodeTree
702 716 fn default() -> Self {
703 717 NodeTree::new(Box::new(Vec::new()))
704 718 }
705 719 }
706 720
707 721 impl NodeMap for NodeTree {
708 722 fn find_bin<'a>(
709 723 &self,
710 724 idx: &impl RevlogIndex,
711 725 prefix: NodePrefixRef<'a>,
712 726 ) -> Result<Option<Revision>, NodeMapError> {
713 727 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
714 728 .map(|(opt, _shortest)| opt)
715 729 }
716 730
717 731 fn unique_prefix_len_bin<'a>(
718 732 &self,
719 733 idx: &impl RevlogIndex,
720 734 prefix: NodePrefixRef<'a>,
721 735 ) -> Result<Option<usize>, NodeMapError> {
722 736 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
723 737 .map(|(opt, shortest)| opt.map(|_rev| shortest))
724 738 }
725 739 }
726 740
727 741 #[cfg(test)]
728 742 mod tests {
729 743 use super::NodeMapError::*;
730 744 use super::*;
731 745 use crate::revlog::node::{hex_pad_right, Node};
732 746 use std::collections::HashMap;
733 747
734 748 /// Creates a `Block` using a syntax close to the `Debug` output
735 749 macro_rules! block {
736 750 {$($nybble:tt : $variant:ident($val:tt)),*} => (
737 751 {
738 752 let mut block = Block::new();
739 753 $(block.set($nybble, Element::$variant($val)));*;
740 754 block
741 755 }
742 756 )
743 757 }
744 758
745 759 #[test]
746 760 fn test_block_debug() {
747 761 let mut block = Block::new();
748 762 block.set(1, Element::Rev(3));
749 763 block.set(10, Element::Block(0));
750 764 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
751 765 }
752 766
753 767 #[test]
754 768 fn test_block_macro() {
755 769 let block = block! {5: Block(2)};
756 770 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
757 771
758 772 let block = block! {13: Rev(15), 5: Block(2)};
759 773 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
760 774 }
761 775
762 776 #[test]
763 777 fn test_raw_block() {
764 778 let mut raw = [255u8; 64];
765 779
766 780 let mut counter = 0;
767 781 for val in [0, 15, -2, -1, -3].iter() {
768 782 for byte in RawElement::to_be_bytes(*val).iter() {
769 783 raw[counter] = *byte;
770 784 counter += 1;
771 785 }
772 786 }
773 787 let block = Block(raw);
774 788 assert_eq!(block.get(0), Element::Block(0));
775 789 assert_eq!(block.get(1), Element::Block(15));
776 790 assert_eq!(block.get(3), Element::None);
777 791 assert_eq!(block.get(2), Element::Rev(0));
778 792 assert_eq!(block.get(4), Element::Rev(1));
779 793 }
780 794
781 795 type TestIndex = HashMap<Revision, Node>;
782 796
783 797 impl RevlogIndex for TestIndex {
784 798 fn node(&self, rev: Revision) -> Option<&Node> {
785 799 self.get(&rev)
786 800 }
787 801
788 802 fn len(&self) -> usize {
789 803 self.len()
790 804 }
791 805 }
792 806
793 807 /// Pad hexadecimal Node prefix with zeros on the right
794 808 ///
795 809 /// This avoids having to repeatedly write very long hexadecimal
796 810 /// strings for test data, and brings actual hash size independency.
797 811 #[cfg(test)]
798 812 fn pad_node(hex: &str) -> Node {
799 813 Node::from_hex(&hex_pad_right(hex)).unwrap()
800 814 }
801 815
802 816 /// Pad hexadecimal Node prefix with zeros on the right, then insert
803 817 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
804 818 idx.insert(rev, pad_node(hex));
805 819 }
806 820
807 821 fn sample_nodetree() -> NodeTree {
808 822 NodeTree::from(vec![
809 823 block![0: Rev(9)],
810 824 block![0: Rev(0), 1: Rev(9)],
811 825 block![0: Block(1), 1:Rev(1)],
812 826 ])
813 827 }
814 828
815 829 #[test]
816 830 fn test_nt_debug() {
817 831 let nt = sample_nodetree();
818 832 assert_eq!(
819 833 format!("{:?}", nt),
820 834 "readonly: \
821 835 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
822 836 growable: [], \
823 837 root: {0: Block(1), 1: Rev(1)}",
824 838 );
825 839 }
826 840
827 841 #[test]
828 842 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
829 843 let mut idx: TestIndex = HashMap::new();
830 844 pad_insert(&mut idx, 1, "1234deadcafe");
831 845
832 846 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
833 847 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
834 848 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
835 849 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
836 850 assert_eq!(nt.find_hex(&idx, "1a")?, None);
837 851 assert_eq!(nt.find_hex(&idx, "ab")?, None);
838 852
839 853 // and with full binary Nodes
840 854 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
841 855 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
842 856 assert_eq!(nt.find_node(&idx, &unknown)?, None);
843 857 Ok(())
844 858 }
845 859
846 860 #[test]
847 861 fn test_immutable_find_one_jump() {
848 862 let mut idx = TestIndex::new();
849 863 pad_insert(&mut idx, 9, "012");
850 864 pad_insert(&mut idx, 0, "00a");
851 865
852 866 let nt = sample_nodetree();
853 867
854 868 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
855 869 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
856 870 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
857 871 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
858 872 assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
859 873 assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
860 874 }
861 875
862 876 #[test]
863 877 fn test_mutated_find() -> Result<(), NodeMapError> {
864 878 let mut idx = TestIndex::new();
865 879 pad_insert(&mut idx, 9, "012");
866 880 pad_insert(&mut idx, 0, "00a");
867 881 pad_insert(&mut idx, 2, "cafe");
868 882 pad_insert(&mut idx, 3, "15");
869 883 pad_insert(&mut idx, 1, "10");
870 884
871 885 let nt = NodeTree {
872 886 readonly: sample_nodetree().readonly,
873 887 growable: vec![block![0: Rev(1), 5: Rev(3)]],
874 888 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
875 889 masked_inner_blocks: 1,
876 890 };
877 891 assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
878 892 assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
879 893 assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
880 894 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
881 895 assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
882 896 assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
883 897 assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
884 898 assert_eq!(nt.masked_readonly_blocks(), 2);
885 899 Ok(())
886 900 }
887 901
888 902 struct TestNtIndex {
889 903 index: TestIndex,
890 904 nt: NodeTree,
891 905 }
892 906
893 907 impl TestNtIndex {
894 908 fn new() -> Self {
895 909 TestNtIndex {
896 910 index: HashMap::new(),
897 911 nt: NodeTree::default(),
898 912 }
899 913 }
900 914
901 915 fn insert(
902 916 &mut self,
903 917 rev: Revision,
904 918 hex: &str,
905 919 ) -> Result<(), NodeMapError> {
906 920 let node = pad_node(hex);
907 921 self.index.insert(rev, node.clone());
908 922 self.nt.insert(&self.index, &node, rev)?;
909 923 Ok(())
910 924 }
911 925
912 926 fn find_hex(
913 927 &self,
914 928 prefix: &str,
915 929 ) -> Result<Option<Revision>, NodeMapError> {
916 930 self.nt.find_hex(&self.index, prefix)
917 931 }
918 932
919 933 fn unique_prefix_len_hex(
920 934 &self,
921 935 prefix: &str,
922 936 ) -> Result<Option<usize>, NodeMapError> {
923 937 self.nt.unique_prefix_len_hex(&self.index, prefix)
924 938 }
925 939
926 940 /// Drain `added` and restart a new one
927 941 fn commit(self) -> Self {
928 942 let mut as_vec: Vec<Block> =
929 943 self.nt.readonly.iter().map(|block| block.clone()).collect();
930 944 as_vec.extend(self.nt.growable);
931 945 as_vec.push(self.nt.root);
932 946
933 947 Self {
934 948 index: self.index,
935 949 nt: NodeTree::from(as_vec).into(),
936 950 }
937 951 }
938 952 }
939 953
940 954 #[test]
941 955 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
942 956 let mut idx = TestNtIndex::new();
943 957 idx.insert(0, "1234")?;
944 958 assert_eq!(idx.find_hex("1")?, Some(0));
945 959 assert_eq!(idx.find_hex("12")?, Some(0));
946 960
947 961 // let's trigger a simple split
948 962 idx.insert(1, "1a34")?;
949 963 assert_eq!(idx.nt.growable.len(), 1);
950 964 assert_eq!(idx.find_hex("12")?, Some(0));
951 965 assert_eq!(idx.find_hex("1a")?, Some(1));
952 966
953 967 // reinserting is a no_op
954 968 idx.insert(1, "1a34")?;
955 969 assert_eq!(idx.nt.growable.len(), 1);
956 970 assert_eq!(idx.find_hex("12")?, Some(0));
957 971 assert_eq!(idx.find_hex("1a")?, Some(1));
958 972
959 973 idx.insert(2, "1a01")?;
960 974 assert_eq!(idx.nt.growable.len(), 2);
961 975 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
962 976 assert_eq!(idx.find_hex("12")?, Some(0));
963 977 assert_eq!(idx.find_hex("1a3")?, Some(1));
964 978 assert_eq!(idx.find_hex("1a0")?, Some(2));
965 979 assert_eq!(idx.find_hex("1a12")?, None);
966 980
967 981 // now let's make it split and create more than one additional block
968 982 idx.insert(3, "1a345")?;
969 983 assert_eq!(idx.nt.growable.len(), 4);
970 984 assert_eq!(idx.find_hex("1a340")?, Some(1));
971 985 assert_eq!(idx.find_hex("1a345")?, Some(3));
972 986 assert_eq!(idx.find_hex("1a341")?, None);
973 987
974 988 // there's no readonly block to mask
975 989 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
976 990 Ok(())
977 991 }
978 992
979 993 #[test]
980 994 fn test_unique_prefix_len_zero_prefix() {
981 995 let mut idx = TestNtIndex::new();
982 996 idx.insert(0, "00000abcd").unwrap();
983 997
984 998 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
985 999 // in the nodetree proper, this will be found at the first nybble
986 1000 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
987 1001 // but the first difference with `NULL_NODE`
988 1002 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
989 1003 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
990 1004
991 1005 // same with odd result
992 1006 idx.insert(1, "00123").unwrap();
993 1007 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
994 1008 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
995 1009
996 1010 // these are unchanged of course
997 1011 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
998 1012 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
999 1013 }
1000 1014
1001 1015 #[test]
1002 1016 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
1003 1017 // check that the splitting loop is long enough
1004 1018 let mut nt_idx = TestNtIndex::new();
1005 1019 let nt = &mut nt_idx.nt;
1006 1020 let idx = &mut nt_idx.index;
1007 1021
1008 1022 let node0_hex = hex_pad_right("444444");
1009 1023 let mut node1_hex = hex_pad_right("444444").clone();
1010 1024 node1_hex.pop();
1011 1025 node1_hex.push('5');
1012 1026 let node0 = Node::from_hex(&node0_hex).unwrap();
1013 1027 let node1 = Node::from_hex(&node1_hex).unwrap();
1014 1028
1015 1029 idx.insert(0, node0.clone());
1016 1030 nt.insert(idx, &node0, 0)?;
1017 1031 idx.insert(1, node1.clone());
1018 1032 nt.insert(idx, &node1, 1)?;
1019 1033
1020 1034 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
1021 1035 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
1022 1036 Ok(())
1023 1037 }
1024 1038
1025 1039 #[test]
1026 1040 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1027 1041 let mut idx = TestNtIndex::new();
1028 1042 idx.insert(0, "1234")?;
1029 1043 idx.insert(1, "1235")?;
1030 1044 idx.insert(2, "131")?;
1031 1045 idx.insert(3, "cafe")?;
1032 1046 let mut idx = idx.commit();
1033 1047 assert_eq!(idx.find_hex("1234")?, Some(0));
1034 1048 assert_eq!(idx.find_hex("1235")?, Some(1));
1035 1049 assert_eq!(idx.find_hex("131")?, Some(2));
1036 1050 assert_eq!(idx.find_hex("cafe")?, Some(3));
1037 1051 // we did not add anything since init from readonly
1038 1052 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1039 1053
1040 1054 idx.insert(4, "123A")?;
1041 1055 assert_eq!(idx.find_hex("1234")?, Some(0));
1042 1056 assert_eq!(idx.find_hex("1235")?, Some(1));
1043 1057 assert_eq!(idx.find_hex("131")?, Some(2));
1044 1058 assert_eq!(idx.find_hex("cafe")?, Some(3));
1045 1059 assert_eq!(idx.find_hex("123A")?, Some(4));
1046 1060 // we masked blocks for all prefixes of "123", including the root
1047 1061 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1048 1062
1049 1063 eprintln!("{:?}", idx.nt);
1050 1064 idx.insert(5, "c0")?;
1051 1065 assert_eq!(idx.find_hex("cafe")?, Some(3));
1052 1066 assert_eq!(idx.find_hex("c0")?, Some(5));
1053 1067 assert_eq!(idx.find_hex("c1")?, None);
1054 1068 assert_eq!(idx.find_hex("1234")?, Some(0));
1055 1069 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1056 1070 // it doesn't mask anything
1057 1071 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1058 1072
1059 1073 Ok(())
1060 1074 }
1061 1075
1062 1076 #[test]
1077 fn test_invalidate_all() -> Result<(), NodeMapError> {
1078 let mut idx = TestNtIndex::new();
1079 idx.insert(0, "1234")?;
1080 idx.insert(1, "1235")?;
1081 idx.insert(2, "131")?;
1082 idx.insert(3, "cafe")?;
1083 let mut idx = idx.commit();
1084
1085 idx.nt.invalidate_all();
1086
1087 assert_eq!(idx.find_hex("1234")?, None);
1088 assert_eq!(idx.find_hex("1235")?, None);
1089 assert_eq!(idx.find_hex("131")?, None);
1090 assert_eq!(idx.find_hex("cafe")?, None);
1091 // all the readonly blocks have been masked, this is the
1092 // conventional expected response
1093 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1094 Ok(())
1095 }
1096
1097 #[test]
1063 1098 fn test_into_added_empty() {
1064 1099 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1065 1100 assert!(sample_nodetree()
1066 1101 .into_readonly_and_added_bytes()
1067 1102 .1
1068 1103 .is_empty());
1069 1104 }
1070 1105
1071 1106 #[test]
1072 1107 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1073 1108 let mut idx = TestNtIndex::new();
1074 1109 idx.insert(0, "1234")?;
1075 1110 let mut idx = idx.commit();
1076 1111 idx.insert(4, "cafe")?;
1077 1112 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1078 1113
1079 1114 // only the root block has been changed
1080 1115 assert_eq!(bytes.len(), BLOCK_SIZE);
1081 1116 // big endian for -2
1082 1117 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1083 1118 // big endian for -6
1084 1119 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1085 1120 Ok(())
1086 1121 }
1087 1122 }
General Comments 0
You need to be logged in to leave comments. Login now