##// END OF EJS Templates
revlog: make the rust test for node hex prefix resolution exercise the nodemap
Arseniy Alekseyev -
r51879:eccf7dc7 stable
parent child Browse files
Show More
@@ -1,825 +1,843 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// Mercurial revision numbers
37 37 ///
38 38 /// As noted in revlog.c, revision numbers are actually encoded in
39 39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 40 pub type Revision = i32;
41 41
42 42 /// Marker expressing the absence of a parent
43 43 ///
44 44 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
45 45 /// to be smaller than all existing revisions.
46 46 pub const NULL_REVISION: Revision = -1;
47 47
48 48 /// Same as `mercurial.node.wdirrev`
49 49 ///
50 50 /// This is also equal to `i32::max_value()`, but it's better to spell
51 51 /// it out explicitely, same as in `mercurial.node`
52 52 #[allow(clippy::unreadable_literal)]
53 53 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
54 54
55 55 pub const WORKING_DIRECTORY_HEX: &str =
56 56 "ffffffffffffffffffffffffffffffffffffffff";
57 57
58 58 /// The simplest expression of what we need of Mercurial DAGs.
59 59 pub trait Graph {
60 60 /// Return the two parents of the given `Revision`.
61 61 ///
62 62 /// Each of the parents can be independently `NULL_REVISION`
63 63 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
64 64 }
65 65
66 66 #[derive(Clone, Debug, PartialEq)]
67 67 pub enum GraphError {
68 68 ParentOutOfRange(Revision),
69 69 WorkingDirectoryUnsupported,
70 70 }
71 71
72 72 /// The Mercurial Revlog Index
73 73 ///
74 74 /// This is currently limited to the minimal interface that is needed for
75 75 /// the [`nodemap`](nodemap/index.html) module
76 76 pub trait RevlogIndex {
77 77 /// Total number of Revisions referenced in this index
78 78 fn len(&self) -> usize;
79 79
80 80 fn is_empty(&self) -> bool {
81 81 self.len() == 0
82 82 }
83 83
84 84 /// Return a reference to the Node or `None` if rev is out of bounds
85 85 ///
86 86 /// `NULL_REVISION` is not considered to be out of bounds.
87 87 fn node(&self, rev: Revision) -> Option<&Node>;
88 88 }
89 89
90 90 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
91 91 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
92 92 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
93 93 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
94 94
95 95 // Keep this in sync with REVIDX_KNOWN_FLAGS in
96 96 // mercurial/revlogutils/flagutil.py
97 97 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
98 98 | REVISION_FLAG_ELLIPSIS
99 99 | REVISION_FLAG_EXTSTORED
100 100 | REVISION_FLAG_HASCOPIESINFO;
101 101
102 102 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
103 103
104 104 #[derive(Debug, derive_more::From)]
105 105 pub enum RevlogError {
106 106 InvalidRevision,
107 107 /// Working directory is not supported
108 108 WDirUnsupported,
109 109 /// Found more than one entry whose ID match the requested prefix
110 110 AmbiguousPrefix,
111 111 #[from]
112 112 Other(HgError),
113 113 }
114 114
115 115 impl From<NodeMapError> for RevlogError {
116 116 fn from(error: NodeMapError) -> Self {
117 117 match error {
118 118 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
119 119 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
120 120 format!("nodemap point to revision {} not in index", rev),
121 121 ),
122 122 }
123 123 }
124 124 }
125 125
126 126 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
127 127 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
128 128 }
129 129
130 130 impl RevlogError {
131 131 fn corrupted<S: AsRef<str>>(context: S) -> Self {
132 132 RevlogError::Other(corrupted(context))
133 133 }
134 134 }
135 135
136 136 /// Read only implementation of revlog.
137 137 pub struct Revlog {
138 138 /// When index and data are not interleaved: bytes of the revlog index.
139 139 /// When index and data are interleaved: bytes of the revlog index and
140 140 /// data.
141 141 index: Index,
142 142 /// When index and data are not interleaved: bytes of the revlog data
143 143 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
144 144 /// When present on disk: the persistent nodemap for this revlog
145 145 nodemap: Option<nodemap::NodeTree>,
146 146 }
147 147
148 148 impl Revlog {
149 149 /// Open a revlog index file.
150 150 ///
151 151 /// It will also open the associated data file if index and data are not
152 152 /// interleaved.
153 153 pub fn open(
154 154 store_vfs: &Vfs,
155 155 index_path: impl AsRef<Path>,
156 156 data_path: Option<&Path>,
157 157 use_nodemap: bool,
158 158 ) -> Result<Self, HgError> {
159 Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
160 }
161
162 fn open_gen(
163 store_vfs: &Vfs,
164 index_path: impl AsRef<Path>,
165 data_path: Option<&Path>,
166 use_nodemap: bool,
167 nodemap_for_test: Option<nodemap::NodeTree>,
168 ) -> Result<Self, HgError> {
159 169 let index_path = index_path.as_ref();
160 170 let index = {
161 171 match store_vfs.mmap_open_opt(&index_path)? {
162 172 None => Index::new(Box::new(vec![])),
163 173 Some(index_mmap) => {
164 174 let index = Index::new(Box::new(index_mmap))?;
165 175 Ok(index)
166 176 }
167 177 }
168 178 }?;
169 179
170 180 let default_data_path = index_path.with_extension("d");
171 181
172 182 // type annotation required
173 183 // won't recognize Mmap as Deref<Target = [u8]>
174 184 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
175 185 if index.is_inline() {
176 186 None
177 187 } else {
178 188 let data_path = data_path.unwrap_or(&default_data_path);
179 189 let data_mmap = store_vfs.mmap_open(data_path)?;
180 190 Some(Box::new(data_mmap))
181 191 };
182 192
183 193 let nodemap = if index.is_inline() || !use_nodemap {
184 194 None
185 195 } else {
186 196 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
187 197 |(docket, data)| {
188 198 nodemap::NodeTree::load_bytes(
189 199 Box::new(data),
190 200 docket.data_length,
191 201 )
192 202 },
193 203 )
194 204 };
195 205
206 let nodemap = nodemap_for_test.or(nodemap);
207
196 208 Ok(Revlog {
197 209 index,
198 210 data_bytes,
199 211 nodemap,
200 212 })
201 213 }
202 214
203 215 /// Return number of entries of the `Revlog`.
204 216 pub fn len(&self) -> usize {
205 217 self.index.len()
206 218 }
207 219
208 220 /// Returns `true` if the `Revlog` has zero `entries`.
209 221 pub fn is_empty(&self) -> bool {
210 222 self.index.is_empty()
211 223 }
212 224
213 225 /// Returns the node ID for the given revision number, if it exists in this
214 226 /// revlog
215 227 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
216 228 if rev == NULL_REVISION {
217 229 return Some(&NULL_NODE);
218 230 }
219 231 Some(self.index.get_entry(rev)?.hash())
220 232 }
221 233
222 234 /// Return the revision number for the given node ID, if it exists in this
223 235 /// revlog
224 236 pub fn rev_from_node(
225 237 &self,
226 238 node: NodePrefix,
227 239 ) -> Result<Revision, RevlogError> {
228 240 if let Some(nodemap) = &self.nodemap {
229 241 nodemap
230 242 .find_bin(&self.index, node)?
231 243 .ok_or(RevlogError::InvalidRevision)
232 244 } else {
233 245 self.rev_from_node_no_persistent_nodemap(node)
234 246 }
235 247 }
236 248
237 249 /// Same as `rev_from_node`, without using a persistent nodemap
238 250 ///
239 251 /// This is used as fallback when a persistent nodemap is not present.
240 252 /// This happens when the persistent-nodemap experimental feature is not
241 253 /// enabled, or for small revlogs.
242 254 fn rev_from_node_no_persistent_nodemap(
243 255 &self,
244 256 node: NodePrefix,
245 257 ) -> Result<Revision, RevlogError> {
246 258 // Linear scan of the revlog
247 259 // TODO: consider building a non-persistent nodemap in memory to
248 260 // optimize these cases.
249 261 let mut found_by_prefix = None;
250 262 for rev in (-1..self.len() as Revision).rev() {
251 263 let candidate_node = if rev == -1 {
252 264 NULL_NODE
253 265 } else {
254 266 let index_entry =
255 267 self.index.get_entry(rev).ok_or_else(|| {
256 268 HgError::corrupted(
257 269 "revlog references a revision not in the index",
258 270 )
259 271 })?;
260 272 *index_entry.hash()
261 273 };
262 274 if node == candidate_node {
263 275 return Ok(rev);
264 276 }
265 277 if node.is_prefix_of(&candidate_node) {
266 278 if found_by_prefix.is_some() {
267 279 return Err(RevlogError::AmbiguousPrefix);
268 280 }
269 281 found_by_prefix = Some(rev)
270 282 }
271 283 }
272 284 found_by_prefix.ok_or(RevlogError::InvalidRevision)
273 285 }
274 286
275 287 /// Returns whether the given revision exists in this revlog.
276 288 pub fn has_rev(&self, rev: Revision) -> bool {
277 289 self.index.get_entry(rev).is_some()
278 290 }
279 291
280 292 /// Return the full data associated to a revision.
281 293 ///
282 294 /// All entries required to build the final data out of deltas will be
283 295 /// retrieved as needed, and the deltas will be applied to the inital
284 296 /// snapshot to rebuild the final data.
285 297 pub fn get_rev_data(
286 298 &self,
287 299 rev: Revision,
288 300 ) -> Result<Cow<[u8]>, RevlogError> {
289 301 if rev == NULL_REVISION {
290 302 return Ok(Cow::Borrowed(&[]));
291 303 };
292 304 Ok(self.get_entry(rev)?.data()?)
293 305 }
294 306
295 307 /// Check the hash of some given data against the recorded hash.
296 308 pub fn check_hash(
297 309 &self,
298 310 p1: Revision,
299 311 p2: Revision,
300 312 expected: &[u8],
301 313 data: &[u8],
302 314 ) -> bool {
303 315 let e1 = self.index.get_entry(p1);
304 316 let h1 = match e1 {
305 317 Some(ref entry) => entry.hash(),
306 318 None => &NULL_NODE,
307 319 };
308 320 let e2 = self.index.get_entry(p2);
309 321 let h2 = match e2 {
310 322 Some(ref entry) => entry.hash(),
311 323 None => &NULL_NODE,
312 324 };
313 325
314 326 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
315 327 }
316 328
317 329 /// Build the full data of a revision out its snapshot
318 330 /// and its deltas.
319 331 fn build_data_from_deltas(
320 332 snapshot: RevlogEntry,
321 333 deltas: &[RevlogEntry],
322 334 ) -> Result<Vec<u8>, HgError> {
323 335 let snapshot = snapshot.data_chunk()?;
324 336 let deltas = deltas
325 337 .iter()
326 338 .rev()
327 339 .map(RevlogEntry::data_chunk)
328 340 .collect::<Result<Vec<_>, _>>()?;
329 341 let patches: Vec<_> =
330 342 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
331 343 let patch = patch::fold_patch_lists(&patches);
332 344 Ok(patch.apply(&snapshot))
333 345 }
334 346
335 347 /// Return the revlog data.
336 348 fn data(&self) -> &[u8] {
337 349 match &self.data_bytes {
338 350 Some(data_bytes) => data_bytes,
339 351 None => panic!(
340 352 "forgot to load the data or trying to access inline data"
341 353 ),
342 354 }
343 355 }
344 356
345 357 pub fn make_null_entry(&self) -> RevlogEntry {
346 358 RevlogEntry {
347 359 revlog: self,
348 360 rev: NULL_REVISION,
349 361 bytes: b"",
350 362 compressed_len: 0,
351 363 uncompressed_len: 0,
352 364 base_rev_or_base_of_delta_chain: None,
353 365 p1: NULL_REVISION,
354 366 p2: NULL_REVISION,
355 367 flags: NULL_REVLOG_ENTRY_FLAGS,
356 368 hash: NULL_NODE,
357 369 }
358 370 }
359 371
360 372 /// Get an entry of the revlog.
361 373 pub fn get_entry(
362 374 &self,
363 375 rev: Revision,
364 376 ) -> Result<RevlogEntry, RevlogError> {
365 377 if rev == NULL_REVISION {
366 378 return Ok(self.make_null_entry());
367 379 }
368 380 let index_entry = self
369 381 .index
370 382 .get_entry(rev)
371 383 .ok_or(RevlogError::InvalidRevision)?;
372 384 let start = index_entry.offset();
373 385 let end = start + index_entry.compressed_len() as usize;
374 386 let data = if self.index.is_inline() {
375 387 self.index.data(start, end)
376 388 } else {
377 389 &self.data()[start..end]
378 390 };
379 391 let entry = RevlogEntry {
380 392 revlog: self,
381 393 rev,
382 394 bytes: data,
383 395 compressed_len: index_entry.compressed_len(),
384 396 uncompressed_len: index_entry.uncompressed_len(),
385 397 base_rev_or_base_of_delta_chain: if index_entry
386 398 .base_revision_or_base_of_delta_chain()
387 399 == rev
388 400 {
389 401 None
390 402 } else {
391 403 Some(index_entry.base_revision_or_base_of_delta_chain())
392 404 },
393 405 p1: index_entry.p1(),
394 406 p2: index_entry.p2(),
395 407 flags: index_entry.flags(),
396 408 hash: *index_entry.hash(),
397 409 };
398 410 Ok(entry)
399 411 }
400 412
401 413 /// when resolving internal references within revlog, any errors
402 414 /// should be reported as corruption, instead of e.g. "invalid revision"
403 415 fn get_entry_internal(
404 416 &self,
405 417 rev: Revision,
406 418 ) -> Result<RevlogEntry, HgError> {
407 419 self.get_entry(rev)
408 420 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
409 421 }
410 422 }
411 423
412 424 /// The revlog entry's bytes and the necessary informations to extract
413 425 /// the entry's data.
414 426 #[derive(Clone)]
415 427 pub struct RevlogEntry<'revlog> {
416 428 revlog: &'revlog Revlog,
417 429 rev: Revision,
418 430 bytes: &'revlog [u8],
419 431 compressed_len: u32,
420 432 uncompressed_len: i32,
421 433 base_rev_or_base_of_delta_chain: Option<Revision>,
422 434 p1: Revision,
423 435 p2: Revision,
424 436 flags: u16,
425 437 hash: Node,
426 438 }
427 439
428 440 thread_local! {
429 441 // seems fine to [unwrap] here: this can only fail due to memory allocation
430 442 // failing, and it's normal for that to cause panic.
431 443 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
432 444 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
433 445 }
434 446
435 447 fn zstd_decompress_to_buffer(
436 448 bytes: &[u8],
437 449 buf: &mut Vec<u8>,
438 450 ) -> Result<usize, std::io::Error> {
439 451 ZSTD_DECODER
440 452 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
441 453 }
442 454
443 455 impl<'revlog> RevlogEntry<'revlog> {
444 456 pub fn revision(&self) -> Revision {
445 457 self.rev
446 458 }
447 459
448 460 pub fn node(&self) -> &Node {
449 461 &self.hash
450 462 }
451 463
452 464 pub fn uncompressed_len(&self) -> Option<u32> {
453 465 u32::try_from(self.uncompressed_len).ok()
454 466 }
455 467
456 468 pub fn has_p1(&self) -> bool {
457 469 self.p1 != NULL_REVISION
458 470 }
459 471
460 472 pub fn p1_entry(
461 473 &self,
462 474 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
463 475 if self.p1 == NULL_REVISION {
464 476 Ok(None)
465 477 } else {
466 478 Ok(Some(self.revlog.get_entry(self.p1)?))
467 479 }
468 480 }
469 481
470 482 pub fn p2_entry(
471 483 &self,
472 484 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
473 485 if self.p2 == NULL_REVISION {
474 486 Ok(None)
475 487 } else {
476 488 Ok(Some(self.revlog.get_entry(self.p2)?))
477 489 }
478 490 }
479 491
480 492 pub fn p1(&self) -> Option<Revision> {
481 493 if self.p1 == NULL_REVISION {
482 494 None
483 495 } else {
484 496 Some(self.p1)
485 497 }
486 498 }
487 499
488 500 pub fn p2(&self) -> Option<Revision> {
489 501 if self.p2 == NULL_REVISION {
490 502 None
491 503 } else {
492 504 Some(self.p2)
493 505 }
494 506 }
495 507
496 508 pub fn is_censored(&self) -> bool {
497 509 (self.flags & REVISION_FLAG_CENSORED) != 0
498 510 }
499 511
500 512 pub fn has_length_affecting_flag_processor(&self) -> bool {
501 513 // Relevant Python code: revlog.size()
502 514 // note: ELLIPSIS is known to not change the content
503 515 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
504 516 }
505 517
506 518 /// The data for this entry, after resolving deltas if any.
507 519 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
508 520 let mut entry = self.clone();
509 521 let mut delta_chain = vec![];
510 522
511 523 // The meaning of `base_rev_or_base_of_delta_chain` depends on
512 524 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
513 525 // `mercurial/revlogutils/constants.py` and the code in
514 526 // [_chaininfo] and in [index_deltachain].
515 527 let uses_generaldelta = self.revlog.index.uses_generaldelta();
516 528 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
517 529 let base_rev = if uses_generaldelta {
518 530 base_rev
519 531 } else {
520 532 entry.rev - 1
521 533 };
522 534 delta_chain.push(entry);
523 535 entry = self.revlog.get_entry_internal(base_rev)?;
524 536 }
525 537
526 538 let data = if delta_chain.is_empty() {
527 539 entry.data_chunk()?
528 540 } else {
529 541 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
530 542 };
531 543
532 544 Ok(data)
533 545 }
534 546
535 547 fn check_data(
536 548 &self,
537 549 data: Cow<'revlog, [u8]>,
538 550 ) -> Result<Cow<'revlog, [u8]>, HgError> {
539 551 if self.revlog.check_hash(
540 552 self.p1,
541 553 self.p2,
542 554 self.hash.as_bytes(),
543 555 &data,
544 556 ) {
545 557 Ok(data)
546 558 } else {
547 559 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
548 560 return Err(HgError::unsupported(
549 561 "ellipsis revisions are not supported by rhg",
550 562 ));
551 563 }
552 564 Err(corrupted(format!(
553 565 "hash check failed for revision {}",
554 566 self.rev
555 567 )))
556 568 }
557 569 }
558 570
559 571 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
560 572 let data = self.rawdata()?;
561 573 if self.rev == NULL_REVISION {
562 574 return Ok(data);
563 575 }
564 576 if self.is_censored() {
565 577 return Err(HgError::CensoredNodeError);
566 578 }
567 579 self.check_data(data)
568 580 }
569 581
570 582 /// Extract the data contained in the entry.
571 583 /// This may be a delta. (See `is_delta`.)
572 584 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
573 585 if self.bytes.is_empty() {
574 586 return Ok(Cow::Borrowed(&[]));
575 587 }
576 588 match self.bytes[0] {
577 589 // Revision data is the entirety of the entry, including this
578 590 // header.
579 591 b'\0' => Ok(Cow::Borrowed(self.bytes)),
580 592 // Raw revision data follows.
581 593 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
582 594 // zlib (RFC 1950) data.
583 595 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
584 596 // zstd data.
585 597 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
586 598 // A proper new format should have had a repo/store requirement.
587 599 format_type => Err(corrupted(format!(
588 600 "unknown compression header '{}'",
589 601 format_type
590 602 ))),
591 603 }
592 604 }
593 605
594 606 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
595 607 let mut decoder = ZlibDecoder::new(self.bytes);
596 608 if self.is_delta() {
597 609 let mut buf = Vec::with_capacity(self.compressed_len as usize);
598 610 decoder
599 611 .read_to_end(&mut buf)
600 612 .map_err(|e| corrupted(e.to_string()))?;
601 613 Ok(buf)
602 614 } else {
603 615 let cap = self.uncompressed_len.max(0) as usize;
604 616 let mut buf = vec![0; cap];
605 617 decoder
606 618 .read_exact(&mut buf)
607 619 .map_err(|e| corrupted(e.to_string()))?;
608 620 Ok(buf)
609 621 }
610 622 }
611 623
612 624 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
613 625 let cap = self.uncompressed_len.max(0) as usize;
614 626 if self.is_delta() {
615 627 // [cap] is usually an over-estimate of the space needed because
616 628 // it's the length of delta-decoded data, but we're interested
617 629 // in the size of the delta.
618 630 // This means we have to [shrink_to_fit] to avoid holding on
619 631 // to a large chunk of memory, but it also means we must have a
620 632 // fallback branch, for the case when the delta is longer than
621 633 // the original data (surprisingly, this does happen in practice)
622 634 let mut buf = Vec::with_capacity(cap);
623 635 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
624 636 Ok(_) => buf.shrink_to_fit(),
625 637 Err(_) => {
626 638 buf.clear();
627 639 zstd::stream::copy_decode(self.bytes, &mut buf)
628 640 .map_err(|e| corrupted(e.to_string()))?;
629 641 }
630 642 };
631 643 Ok(buf)
632 644 } else {
633 645 let mut buf = Vec::with_capacity(cap);
634 646 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
635 647 .map_err(|e| corrupted(e.to_string()))?;
636 648 if len != self.uncompressed_len as usize {
637 649 Err(corrupted("uncompressed length does not match"))
638 650 } else {
639 651 Ok(buf)
640 652 }
641 653 }
642 654 }
643 655
644 656 /// Tell if the entry is a snapshot or a delta
645 657 /// (influences on decompression).
646 658 fn is_delta(&self) -> bool {
647 659 self.base_rev_or_base_of_delta_chain.is_some()
648 660 }
649 661 }
650 662
651 663 /// Calculate the hash of a revision given its data and its parents.
652 664 fn hash(
653 665 data: &[u8],
654 666 p1_hash: &[u8],
655 667 p2_hash: &[u8],
656 668 ) -> [u8; NODE_BYTES_LENGTH] {
657 669 let mut hasher = Sha1::new();
658 670 let (a, b) = (p1_hash, p2_hash);
659 671 if a > b {
660 672 hasher.update(b);
661 673 hasher.update(a);
662 674 } else {
663 675 hasher.update(a);
664 676 hasher.update(b);
665 677 }
666 678 hasher.update(data);
667 679 *hasher.finalize().as_ref()
668 680 }
669 681
670 682 #[cfg(test)]
671 683 mod tests {
672 684 use super::*;
673 685 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
674 686 use itertools::Itertools;
675 687
676 688 #[test]
677 689 fn test_empty() {
678 690 let temp = tempfile::tempdir().unwrap();
679 691 let vfs = Vfs { base: temp.path() };
680 692 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
681 693 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
682 694 assert!(revlog.is_empty());
683 695 assert_eq!(revlog.len(), 0);
684 696 assert!(revlog.get_entry(0).is_err());
685 697 assert!(!revlog.has_rev(0));
686 698 assert_eq!(
687 699 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
688 700 NULL_REVISION
689 701 );
690 702 let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
691 703 assert_eq!(null_entry.revision(), NULL_REVISION);
692 704 assert!(null_entry.data().unwrap().is_empty());
693 705 }
694 706
695 707 #[test]
696 708 fn test_inline() {
697 709 let temp = tempfile::tempdir().unwrap();
698 710 let vfs = Vfs { base: temp.path() };
699 711 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
700 712 .unwrap();
701 713 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
702 714 .unwrap();
703 715 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
704 716 .unwrap();
705 717 let entry0_bytes = IndexEntryBuilder::new()
706 718 .is_first(true)
707 719 .with_version(1)
708 720 .with_inline(true)
709 721 .with_offset(INDEX_ENTRY_SIZE)
710 722 .with_node(node0)
711 723 .build();
712 724 let entry1_bytes = IndexEntryBuilder::new()
713 725 .with_offset(INDEX_ENTRY_SIZE)
714 726 .with_node(node1)
715 727 .build();
716 728 let entry2_bytes = IndexEntryBuilder::new()
717 729 .with_offset(INDEX_ENTRY_SIZE)
718 730 .with_p1(0)
719 731 .with_p2(1)
720 732 .with_node(node2)
721 733 .build();
722 734 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
723 735 .into_iter()
724 736 .flatten()
725 737 .collect_vec();
726 738 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
727 739 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
728 740
729 741 let entry0 = revlog.get_entry(0).ok().unwrap();
730 742 assert_eq!(entry0.revision(), 0);
731 743 assert_eq!(*entry0.node(), node0);
732 744 assert!(!entry0.has_p1());
733 745 assert_eq!(entry0.p1(), None);
734 746 assert_eq!(entry0.p2(), None);
735 747 let p1_entry = entry0.p1_entry().unwrap();
736 748 assert!(p1_entry.is_none());
737 749 let p2_entry = entry0.p2_entry().unwrap();
738 750 assert!(p2_entry.is_none());
739 751
740 752 let entry1 = revlog.get_entry(1).ok().unwrap();
741 753 assert_eq!(entry1.revision(), 1);
742 754 assert_eq!(*entry1.node(), node1);
743 755 assert!(!entry1.has_p1());
744 756 assert_eq!(entry1.p1(), None);
745 757 assert_eq!(entry1.p2(), None);
746 758 let p1_entry = entry1.p1_entry().unwrap();
747 759 assert!(p1_entry.is_none());
748 760 let p2_entry = entry1.p2_entry().unwrap();
749 761 assert!(p2_entry.is_none());
750 762
751 763 let entry2 = revlog.get_entry(2).ok().unwrap();
752 764 assert_eq!(entry2.revision(), 2);
753 765 assert_eq!(*entry2.node(), node2);
754 766 assert!(entry2.has_p1());
755 767 assert_eq!(entry2.p1(), Some(0));
756 768 assert_eq!(entry2.p2(), Some(1));
757 769 let p1_entry = entry2.p1_entry().unwrap();
758 770 assert!(p1_entry.is_some());
759 771 assert_eq!(p1_entry.unwrap().revision(), 0);
760 772 let p2_entry = entry2.p2_entry().unwrap();
761 773 assert!(p2_entry.is_some());
762 774 assert_eq!(p2_entry.unwrap().revision(), 1);
763 775 }
764 776
765 777 #[test]
766 778 fn test_nodemap() {
767 779 let temp = tempfile::tempdir().unwrap();
768 780 let vfs = Vfs { base: temp.path() };
769 781
770 782 // building a revlog with a forced Node starting with zeros
771 783 // This is a corruption, but it does not preclude using the nodemap
772 784 // if we don't try and access the data
773 785 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
774 786 .unwrap();
775 787 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
776 788 .unwrap();
777 789 let entry0_bytes = IndexEntryBuilder::new()
778 790 .is_first(true)
779 791 .with_version(1)
780 792 .with_inline(true)
781 793 .with_offset(INDEX_ENTRY_SIZE)
782 794 .with_node(node0)
783 795 .build();
784 796 let entry1_bytes = IndexEntryBuilder::new()
785 797 .with_offset(INDEX_ENTRY_SIZE)
786 798 .with_node(node1)
787 799 .build();
788 800 let contents = vec![entry0_bytes, entry1_bytes]
789 801 .into_iter()
790 802 .flatten()
791 803 .collect_vec();
792 804 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
793 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
805
806 let mut idx = nodemap::tests::TestNtIndex::new();
807 idx.insert_node(0, node0).unwrap();
808 idx.insert_node(1, node1).unwrap();
809
810 let revlog =
811 Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
794 812
795 813 // accessing the data shows the corruption
796 814 revlog.get_entry(0).unwrap().data().unwrap_err();
797 815
798 816 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
799 817 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
800 818 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
801 819 assert_eq!(
802 820 revlog
803 821 .rev_from_node(NodePrefix::from_hex("000").unwrap())
804 822 .unwrap(),
805 823 -1
806 824 );
807 825 assert_eq!(
808 826 revlog
809 827 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
810 828 .unwrap(),
811 829 1
812 830 );
813 831 // RevlogError does not implement PartialEq
814 832 // (ultimately because io::Error does not)
815 833 match revlog
816 834 .rev_from_node(NodePrefix::from_hex("00").unwrap())
817 835 .expect_err("Expected to give AmbiguousPrefix error")
818 836 {
819 837 RevlogError::AmbiguousPrefix => (),
820 838 e => {
821 839 panic!("Got another error than AmbiguousPrefix: {:?}", e);
822 840 }
823 841 };
824 842 }
825 843 }
@@ -1,1067 +1,1074 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 16 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
17 17 };
18 18
19 19 use bytes_cast::{unaligned, BytesCast};
20 20 use std::cmp::max;
21 21 use std::fmt;
22 22 use std::mem::{self, align_of, size_of};
23 23 use std::ops::Deref;
24 24 use std::ops::Index;
25 25
26 26 #[derive(Debug, PartialEq)]
27 27 pub enum NodeMapError {
28 28 /// A `NodePrefix` matches several [`Revision`]s.
29 29 ///
30 30 /// This can be returned by methods meant for (at most) one match.
31 31 MultipleResults,
32 32 /// A `Revision` stored in the nodemap could not be found in the index
33 33 RevisionNotInIndex(Revision),
34 34 }
35 35
36 36 /// Mapping system from Mercurial nodes to revision numbers.
37 37 ///
38 38 /// ## `RevlogIndex` and `NodeMap`
39 39 ///
40 40 /// One way to think about their relationship is that
41 41 /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
42 42 /// information carried by a [`RevlogIndex`].
43 43 ///
44 44 /// Many of the methods in this trait take a `RevlogIndex` argument
45 45 /// which is used for validation of their results. This index must naturally
46 46 /// be the one the `NodeMap` is about, and it must be consistent.
47 47 ///
48 48 /// Notably, the `NodeMap` must not store
49 49 /// information about more `Revision` values than there are in the index.
50 50 /// In these methods, an encountered `Revision` is not in the index, a
51 51 /// [RevisionNotInIndex](NodeMapError) error is returned.
52 52 ///
53 53 /// In insert operations, the rule is thus that the `NodeMap` must always
54 54 /// be updated after the `RevlogIndex` it is about.
55 55 pub trait NodeMap {
56 56 /// Find the unique `Revision` having the given `Node`
57 57 ///
58 58 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
59 59 fn find_node(
60 60 &self,
61 61 index: &impl RevlogIndex,
62 62 node: &Node,
63 63 ) -> Result<Option<Revision>, NodeMapError> {
64 64 self.find_bin(index, node.into())
65 65 }
66 66
67 67 /// Find the unique Revision whose `Node` starts with a given binary prefix
68 68 ///
69 69 /// If no Revision matches the given prefix, `Ok(None)` is returned.
70 70 ///
71 71 /// If several Revisions match the given prefix, a
72 72 /// [MultipleResults](NodeMapError) error is returned.
73 73 fn find_bin(
74 74 &self,
75 75 idx: &impl RevlogIndex,
76 76 prefix: NodePrefix,
77 77 ) -> Result<Option<Revision>, NodeMapError>;
78 78
79 79 /// Give the size of the shortest node prefix that determines
80 80 /// the revision uniquely.
81 81 ///
82 82 /// From a binary node prefix, if it is matched in the node map, this
83 83 /// returns the number of hexadecimal digits that would had sufficed
84 84 /// to find the revision uniquely.
85 85 ///
86 86 /// Returns `None` if no [`Revision`] could be found for the prefix.
87 87 ///
88 88 /// If several Revisions match the given prefix, a
89 89 /// [MultipleResults](NodeMapError) error is returned.
90 90 fn unique_prefix_len_bin(
91 91 &self,
92 92 idx: &impl RevlogIndex,
93 93 node_prefix: NodePrefix,
94 94 ) -> Result<Option<usize>, NodeMapError>;
95 95
96 96 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
97 97 /// a full [`Node`] as input
98 98 fn unique_prefix_len_node(
99 99 &self,
100 100 idx: &impl RevlogIndex,
101 101 node: &Node,
102 102 ) -> Result<Option<usize>, NodeMapError> {
103 103 self.unique_prefix_len_bin(idx, node.into())
104 104 }
105 105 }
106 106
107 107 pub trait MutableNodeMap: NodeMap {
108 108 fn insert<I: RevlogIndex>(
109 109 &mut self,
110 110 index: &I,
111 111 node: &Node,
112 112 rev: Revision,
113 113 ) -> Result<(), NodeMapError>;
114 114 }
115 115
116 116 /// Low level NodeTree [`Block`] elements
117 117 ///
118 118 /// These are exactly as for instance on persistent storage.
119 119 type RawElement = unaligned::I32Be;
120 120
121 121 /// High level representation of values in NodeTree
122 122 /// [`Blocks`](struct.Block.html)
123 123 ///
124 124 /// This is the high level representation that most algorithms should
125 125 /// use.
126 126 #[derive(Clone, Debug, Eq, PartialEq)]
127 127 enum Element {
128 128 Rev(Revision),
129 129 Block(usize),
130 130 None,
131 131 }
132 132
133 133 impl From<RawElement> for Element {
134 134 /// Conversion from low level representation, after endianness conversion.
135 135 ///
136 136 /// See [`Block`](struct.Block.html) for explanation about the encoding.
137 137 fn from(raw: RawElement) -> Element {
138 138 let int = raw.get();
139 139 if int >= 0 {
140 140 Element::Block(int as usize)
141 141 } else if int == -1 {
142 142 Element::None
143 143 } else {
144 144 Element::Rev(-int - 2)
145 145 }
146 146 }
147 147 }
148 148
149 149 impl From<Element> for RawElement {
150 150 fn from(element: Element) -> RawElement {
151 151 RawElement::from(match element {
152 152 Element::None => 0,
153 153 Element::Block(i) => i as i32,
154 154 Element::Rev(rev) => -rev - 2,
155 155 })
156 156 }
157 157 }
158 158
159 159 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
160 160
161 161 /// A logical block of the [`NodeTree`], packed with a fixed size.
162 162 ///
163 163 /// These are always used in container types implementing `Index<Block>`,
164 164 /// such as `&Block`
165 165 ///
166 166 /// As an array of integers, its ith element encodes that the
167 167 /// ith potential edge from the block, representing the ith hexadecimal digit
168 168 /// (nybble) `i` is either:
169 169 ///
170 170 /// - absent (value -1)
171 171 /// - another `Block` in the same indexable container (value β‰₯ 0)
172 172 /// - a [`Revision`] leaf (value ≀ -2)
173 173 ///
174 174 /// Endianness has to be fixed for consistency on shared storage across
175 175 /// different architectures.
176 176 ///
177 177 /// A key difference with the C `nodetree` is that we need to be
178 178 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
179 179 /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
180 180 ///
181 181 /// Another related difference is that `NULL_REVISION` (-1) is not
182 182 /// represented at all, because we want an immutable empty nodetree
183 183 /// to be valid.
184 184 #[derive(Copy, Clone, BytesCast, PartialEq)]
185 185 #[repr(transparent)]
186 186 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
187 187
188 188 impl Block {
189 189 fn new() -> Self {
190 190 let absent_node = RawElement::from(-1);
191 191 Block([absent_node; ELEMENTS_PER_BLOCK])
192 192 }
193 193
194 194 fn get(&self, nybble: u8) -> Element {
195 195 self.0[nybble as usize].into()
196 196 }
197 197
198 198 fn set(&mut self, nybble: u8, element: Element) {
199 199 self.0[nybble as usize] = element.into()
200 200 }
201 201 }
202 202
203 203 impl fmt::Debug for Block {
204 204 /// sparse representation for testing and debugging purposes
205 205 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
206 206 f.debug_map()
207 207 .entries((0..16).filter_map(|i| match self.get(i) {
208 208 Element::None => None,
209 209 element => Some((i, element)),
210 210 }))
211 211 .finish()
212 212 }
213 213 }
214 214
215 215 /// A mutable 16-radix tree with the root block logically at the end
216 216 ///
217 217 /// Because of the append only nature of our node trees, we need to
218 218 /// keep the original untouched and store new blocks separately.
219 219 ///
220 220 /// The mutable root [`Block`] is kept apart so that we don't have to rebump
221 221 /// it on each insertion.
222 222 pub struct NodeTree {
223 223 readonly: Box<dyn Deref<Target = [Block]> + Send>,
224 224 growable: Vec<Block>,
225 225 root: Block,
226 226 masked_inner_blocks: usize,
227 227 }
228 228
229 229 impl Index<usize> for NodeTree {
230 230 type Output = Block;
231 231
232 232 fn index(&self, i: usize) -> &Block {
233 233 let ro_len = self.readonly.len();
234 234 if i < ro_len {
235 235 &self.readonly[i]
236 236 } else if i == ro_len + self.growable.len() {
237 237 &self.root
238 238 } else {
239 239 &self.growable[i - ro_len]
240 240 }
241 241 }
242 242 }
243 243
244 244 /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
245 245 fn has_prefix_or_none(
246 246 idx: &impl RevlogIndex,
247 247 prefix: NodePrefix,
248 248 rev: Revision,
249 249 ) -> Result<Option<Revision>, NodeMapError> {
250 250 idx.node(rev)
251 251 .ok_or(NodeMapError::RevisionNotInIndex(rev))
252 252 .map(|node| {
253 253 if prefix.is_prefix_of(node) {
254 254 Some(rev)
255 255 } else {
256 256 None
257 257 }
258 258 })
259 259 }
260 260
261 261 /// validate that the candidate's node starts indeed with given prefix,
262 262 /// and treat ambiguities related to [`NULL_REVISION`].
263 263 ///
264 264 /// From the data in the NodeTree, one can only conclude that some
265 265 /// revision is the only one for a *subprefix* of the one being looked up.
266 266 fn validate_candidate(
267 267 idx: &impl RevlogIndex,
268 268 prefix: NodePrefix,
269 269 candidate: (Option<Revision>, usize),
270 270 ) -> Result<(Option<Revision>, usize), NodeMapError> {
271 271 let (rev, steps) = candidate;
272 272 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
273 273 rev.map_or(Ok((None, steps)), |r| {
274 274 has_prefix_or_none(idx, prefix, r)
275 275 .map(|opt| (opt, max(steps, nz_nybble + 1)))
276 276 })
277 277 } else {
278 278 // the prefix is only made of zeros; NULL_REVISION always matches it
279 279 // and any other *valid* result is an ambiguity
280 280 match rev {
281 281 None => Ok((Some(NULL_REVISION), steps + 1)),
282 282 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
283 283 None => Ok((Some(NULL_REVISION), steps + 1)),
284 284 _ => Err(NodeMapError::MultipleResults),
285 285 },
286 286 }
287 287 }
288 288 }
289 289
290 290 impl NodeTree {
291 291 /// Initiate a NodeTree from an immutable slice-like of `Block`
292 292 ///
293 293 /// We keep `readonly` and clone its root block if it isn't empty.
294 294 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
295 295 let root = readonly.last().cloned().unwrap_or_else(Block::new);
296 296 NodeTree {
297 297 readonly,
298 298 growable: Vec::new(),
299 299 root,
300 300 masked_inner_blocks: 0,
301 301 }
302 302 }
303 303
304 304 /// Create from an opaque bunch of bytes
305 305 ///
306 306 /// The created [`NodeTreeBytes`] from `bytes`,
307 307 /// of which exactly `amount` bytes are used.
308 308 ///
309 309 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
310 310 /// - `amount` is expressed in bytes, and is not automatically derived from
311 311 /// `bytes`, so that a caller that manages them atomically can perform
312 312 /// temporary disk serializations and still rollback easily if needed.
313 313 /// First use-case for this would be to support Mercurial shell hooks.
314 314 ///
315 315 /// panics if `buffer` is smaller than `amount`
316 316 pub fn load_bytes(
317 317 bytes: Box<dyn Deref<Target = [u8]> + Send>,
318 318 amount: usize,
319 319 ) -> Self {
320 320 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
321 321 }
322 322
323 323 /// Retrieve added [`Block`]s and the original immutable data
324 324 pub fn into_readonly_and_added(
325 325 self,
326 326 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
327 327 let mut vec = self.growable;
328 328 let readonly = self.readonly;
329 329 if readonly.last() != Some(&self.root) {
330 330 vec.push(self.root);
331 331 }
332 332 (readonly, vec)
333 333 }
334 334
335 335 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
336 336 /// storage
337 337 pub fn into_readonly_and_added_bytes(
338 338 self,
339 339 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
340 340 let (readonly, vec) = self.into_readonly_and_added();
341 341 // Prevent running `v`'s destructor so we are in complete control
342 342 // of the allocation.
343 343 let vec = mem::ManuallyDrop::new(vec);
344 344
345 345 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
346 346 // bytes, so this is perfectly safe.
347 347 let bytes = unsafe {
348 348 // Check for compatible allocation layout.
349 349 // (Optimized away by constant-folding + dead code elimination.)
350 350 assert_eq!(size_of::<Block>(), 64);
351 351 assert_eq!(align_of::<Block>(), 1);
352 352
353 353 // /!\ Any use of `vec` after this is use-after-free.
354 354 // TODO: use `into_raw_parts` once stabilized
355 355 Vec::from_raw_parts(
356 356 vec.as_ptr() as *mut u8,
357 357 vec.len() * size_of::<Block>(),
358 358 vec.capacity() * size_of::<Block>(),
359 359 )
360 360 };
361 361 (readonly, bytes)
362 362 }
363 363
364 364 /// Total number of blocks
365 365 fn len(&self) -> usize {
366 366 self.readonly.len() + self.growable.len() + 1
367 367 }
368 368
369 369 /// Implemented for completeness
370 370 ///
371 371 /// A `NodeTree` always has at least the mutable root block.
372 372 #[allow(dead_code)]
373 373 fn is_empty(&self) -> bool {
374 374 false
375 375 }
376 376
377 377 /// Main working method for `NodeTree` searches
378 378 ///
379 379 /// The first returned value is the result of analysing `NodeTree` data
380 380 /// *alone*: whereas `None` guarantees that the given prefix is absent
381 381 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
382 382 /// `Some(rev)`, it is to be understood that `rev` is the unique
383 383 /// [`Revision`] that could match the prefix. Actually, all that can
384 384 /// be inferred from
385 385 /// the `NodeTree` data is that `rev` is the revision with the longest
386 386 /// common node prefix with the given prefix.
387 387 ///
388 388 /// The second returned value is the size of the smallest subprefix
389 389 /// of `prefix` that would give the same result, i.e. not the
390 390 /// [MultipleResults](NodeMapError) error variant (again, using only the
391 391 /// data of the [`NodeTree`]).
392 392 fn lookup(
393 393 &self,
394 394 prefix: NodePrefix,
395 395 ) -> Result<(Option<Revision>, usize), NodeMapError> {
396 396 for (i, visit_item) in self.visit(prefix).enumerate() {
397 397 if let Some(opt) = visit_item.final_revision() {
398 398 return Ok((opt, i + 1));
399 399 }
400 400 }
401 401 Err(NodeMapError::MultipleResults)
402 402 }
403 403
404 404 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
405 405 NodeTreeVisitor {
406 406 nt: self,
407 407 prefix,
408 408 visit: self.len() - 1,
409 409 nybble_idx: 0,
410 410 done: false,
411 411 }
412 412 }
413 413 /// Return a mutable reference for `Block` at index `idx`.
414 414 ///
415 415 /// If `idx` lies in the immutable area, then the reference is to
416 416 /// a newly appended copy.
417 417 ///
418 418 /// Returns (new_idx, glen, mut_ref) where
419 419 ///
420 420 /// - `new_idx` is the index of the mutable `Block`
421 421 /// - `mut_ref` is a mutable reference to the mutable Block.
422 422 /// - `glen` is the new length of `self.growable`
423 423 ///
424 424 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
425 425 /// itself because of the mutable borrow taken with the returned `Block`
426 426 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
427 427 let ro_blocks = &self.readonly;
428 428 let ro_len = ro_blocks.len();
429 429 let glen = self.growable.len();
430 430 if idx < ro_len {
431 431 self.masked_inner_blocks += 1;
432 432 self.growable.push(ro_blocks[idx]);
433 433 (glen + ro_len, &mut self.growable[glen], glen + 1)
434 434 } else if glen + ro_len == idx {
435 435 (idx, &mut self.root, glen)
436 436 } else {
437 437 (idx, &mut self.growable[idx - ro_len], glen)
438 438 }
439 439 }
440 440
441 441 /// Main insertion method
442 442 ///
443 443 /// This will dive in the node tree to find the deepest `Block` for
444 444 /// `node`, split it as much as needed and record `node` in there.
445 445 /// The method then backtracks, updating references in all the visited
446 446 /// blocks from the root.
447 447 ///
448 448 /// All the mutated `Block` are copied first to the growable part if
449 449 /// needed. That happens for those in the immutable part except the root.
450 450 pub fn insert<I: RevlogIndex>(
451 451 &mut self,
452 452 index: &I,
453 453 node: &Node,
454 454 rev: Revision,
455 455 ) -> Result<(), NodeMapError> {
456 456 let ro_len = &self.readonly.len();
457 457
458 458 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
459 459 let read_nybbles = visit_steps.len();
460 460 // visit_steps cannot be empty, since we always visit the root block
461 461 let deepest = visit_steps.pop().unwrap();
462 462
463 463 let (mut block_idx, mut block, mut glen) =
464 464 self.mutable_block(deepest.block_idx);
465 465
466 466 if let Element::Rev(old_rev) = deepest.element {
467 467 let old_node = index
468 468 .node(old_rev)
469 469 .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
470 470 if old_node == node {
471 471 return Ok(()); // avoid creating lots of useless blocks
472 472 }
473 473
474 474 // Looping over the tail of nybbles in both nodes, creating
475 475 // new blocks until we find the difference
476 476 let mut new_block_idx = ro_len + glen;
477 477 let mut nybble = deepest.nybble;
478 478 for nybble_pos in read_nybbles..node.nybbles_len() {
479 479 block.set(nybble, Element::Block(new_block_idx));
480 480
481 481 let new_nybble = node.get_nybble(nybble_pos);
482 482 let old_nybble = old_node.get_nybble(nybble_pos);
483 483
484 484 if old_nybble == new_nybble {
485 485 self.growable.push(Block::new());
486 486 block = &mut self.growable[glen];
487 487 glen += 1;
488 488 new_block_idx += 1;
489 489 nybble = new_nybble;
490 490 } else {
491 491 let mut new_block = Block::new();
492 492 new_block.set(old_nybble, Element::Rev(old_rev));
493 493 new_block.set(new_nybble, Element::Rev(rev));
494 494 self.growable.push(new_block);
495 495 break;
496 496 }
497 497 }
498 498 } else {
499 499 // Free slot in the deepest block: no splitting has to be done
500 500 block.set(deepest.nybble, Element::Rev(rev));
501 501 }
502 502
503 503 // Backtrack over visit steps to update references
504 504 while let Some(visited) = visit_steps.pop() {
505 505 let to_write = Element::Block(block_idx);
506 506 if visit_steps.is_empty() {
507 507 self.root.set(visited.nybble, to_write);
508 508 break;
509 509 }
510 510 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
511 511 if block.get(visited.nybble) == to_write {
512 512 break;
513 513 }
514 514 block.set(visited.nybble, to_write);
515 515 block_idx = new_idx;
516 516 }
517 517 Ok(())
518 518 }
519 519
520 520 /// Make the whole `NodeTree` logically empty, without touching the
521 521 /// immutable part.
522 522 pub fn invalidate_all(&mut self) {
523 523 self.root = Block::new();
524 524 self.growable = Vec::new();
525 525 self.masked_inner_blocks = self.readonly.len();
526 526 }
527 527
528 528 /// Return the number of blocks in the readonly part that are currently
529 529 /// masked in the mutable part.
530 530 ///
531 531 /// The `NodeTree` structure has no efficient way to know how many blocks
532 532 /// are already unreachable in the readonly part.
533 533 ///
534 534 /// After a call to `invalidate_all()`, the returned number can be actually
535 535 /// bigger than the whole readonly part, a conventional way to mean that
536 536 /// all the readonly blocks have been masked. This is what is really
537 537 /// useful to the caller and does not require to know how many were
538 538 /// actually unreachable to begin with.
539 539 pub fn masked_readonly_blocks(&self) -> usize {
540 540 if let Some(readonly_root) = self.readonly.last() {
541 541 if readonly_root == &self.root {
542 542 return 0;
543 543 }
544 544 } else {
545 545 return 0;
546 546 }
547 547 self.masked_inner_blocks + 1
548 548 }
549 549 }
550 550
551 551 pub struct NodeTreeBytes {
552 552 buffer: Box<dyn Deref<Target = [u8]> + Send>,
553 553 len_in_blocks: usize,
554 554 }
555 555
556 556 impl NodeTreeBytes {
557 557 fn new(
558 558 buffer: Box<dyn Deref<Target = [u8]> + Send>,
559 559 amount: usize,
560 560 ) -> Self {
561 561 assert!(buffer.len() >= amount);
562 562 let len_in_blocks = amount / size_of::<Block>();
563 563 NodeTreeBytes {
564 564 buffer,
565 565 len_in_blocks,
566 566 }
567 567 }
568 568 }
569 569
570 570 impl Deref for NodeTreeBytes {
571 571 type Target = [Block];
572 572
573 573 fn deref(&self) -> &[Block] {
574 574 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
575 575 // `NodeTreeBytes::new` already asserted that `self.buffer` is
576 576 // large enough.
577 577 .unwrap()
578 578 .0
579 579 }
580 580 }
581 581
582 582 struct NodeTreeVisitor<'n> {
583 583 nt: &'n NodeTree,
584 584 prefix: NodePrefix,
585 585 visit: usize,
586 586 nybble_idx: usize,
587 587 done: bool,
588 588 }
589 589
590 590 #[derive(Debug, PartialEq, Clone)]
591 591 struct NodeTreeVisitItem {
592 592 block_idx: usize,
593 593 nybble: u8,
594 594 element: Element,
595 595 }
596 596
597 597 impl<'n> Iterator for NodeTreeVisitor<'n> {
598 598 type Item = NodeTreeVisitItem;
599 599
600 600 fn next(&mut self) -> Option<Self::Item> {
601 601 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
602 602 return None;
603 603 }
604 604
605 605 let nybble = self.prefix.get_nybble(self.nybble_idx);
606 606 self.nybble_idx += 1;
607 607
608 608 let visit = self.visit;
609 609 let element = self.nt[visit].get(nybble);
610 610 if let Element::Block(idx) = element {
611 611 self.visit = idx;
612 612 } else {
613 613 self.done = true;
614 614 }
615 615
616 616 Some(NodeTreeVisitItem {
617 617 block_idx: visit,
618 618 nybble,
619 619 element,
620 620 })
621 621 }
622 622 }
623 623
624 624 impl NodeTreeVisitItem {
625 625 // Return `Some(opt)` if this item is final, with `opt` being the
626 626 // `Revision` that it may represent.
627 627 //
628 628 // If the item is not terminal, return `None`
629 629 fn final_revision(&self) -> Option<Option<Revision>> {
630 630 match self.element {
631 631 Element::Block(_) => None,
632 632 Element::Rev(r) => Some(Some(r)),
633 633 Element::None => Some(None),
634 634 }
635 635 }
636 636 }
637 637
638 638 impl From<Vec<Block>> for NodeTree {
639 639 fn from(vec: Vec<Block>) -> Self {
640 640 Self::new(Box::new(vec))
641 641 }
642 642 }
643 643
644 644 impl fmt::Debug for NodeTree {
645 645 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
646 646 let readonly: &[Block] = &*self.readonly;
647 647 write!(
648 648 f,
649 649 "readonly: {:?}, growable: {:?}, root: {:?}",
650 650 readonly, self.growable, self.root
651 651 )
652 652 }
653 653 }
654 654
655 655 impl Default for NodeTree {
656 656 /// Create a fully mutable empty NodeTree
657 657 fn default() -> Self {
658 658 NodeTree::new(Box::new(Vec::new()))
659 659 }
660 660 }
661 661
662 662 impl NodeMap for NodeTree {
663 663 fn find_bin<'a>(
664 664 &self,
665 665 idx: &impl RevlogIndex,
666 666 prefix: NodePrefix,
667 667 ) -> Result<Option<Revision>, NodeMapError> {
668 668 validate_candidate(idx, prefix, self.lookup(prefix)?)
669 669 .map(|(opt, _shortest)| opt)
670 670 }
671 671
672 672 fn unique_prefix_len_bin<'a>(
673 673 &self,
674 674 idx: &impl RevlogIndex,
675 675 prefix: NodePrefix,
676 676 ) -> Result<Option<usize>, NodeMapError> {
677 677 validate_candidate(idx, prefix, self.lookup(prefix)?)
678 678 .map(|(opt, shortest)| opt.map(|_rev| shortest))
679 679 }
680 680 }
681 681
682 682 #[cfg(test)]
683 mod tests {
683 pub mod tests {
684 684 use super::NodeMapError::*;
685 685 use super::*;
686 686 use crate::revlog::node::{hex_pad_right, Node};
687 687 use std::collections::HashMap;
688 688
689 689 /// Creates a `Block` using a syntax close to the `Debug` output
690 690 macro_rules! block {
691 691 {$($nybble:tt : $variant:ident($val:tt)),*} => (
692 692 {
693 693 let mut block = Block::new();
694 694 $(block.set($nybble, Element::$variant($val)));*;
695 695 block
696 696 }
697 697 )
698 698 }
699 699
700 700 #[test]
701 701 fn test_block_debug() {
702 702 let mut block = Block::new();
703 703 block.set(1, Element::Rev(3));
704 704 block.set(10, Element::Block(0));
705 705 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
706 706 }
707 707
708 708 #[test]
709 709 fn test_block_macro() {
710 710 let block = block! {5: Block(2)};
711 711 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
712 712
713 713 let block = block! {13: Rev(15), 5: Block(2)};
714 714 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
715 715 }
716 716
717 717 #[test]
718 718 fn test_raw_block() {
719 719 let mut raw = [255u8; 64];
720 720
721 721 let mut counter = 0;
722 722 for val in [0_i32, 15, -2, -1, -3].iter() {
723 723 for byte in val.to_be_bytes().iter() {
724 724 raw[counter] = *byte;
725 725 counter += 1;
726 726 }
727 727 }
728 728 let (block, _) = Block::from_bytes(&raw).unwrap();
729 729 assert_eq!(block.get(0), Element::Block(0));
730 730 assert_eq!(block.get(1), Element::Block(15));
731 731 assert_eq!(block.get(3), Element::None);
732 732 assert_eq!(block.get(2), Element::Rev(0));
733 733 assert_eq!(block.get(4), Element::Rev(1));
734 734 }
735 735
736 736 type TestIndex = HashMap<Revision, Node>;
737 737
738 738 impl RevlogIndex for TestIndex {
739 739 fn node(&self, rev: Revision) -> Option<&Node> {
740 740 self.get(&rev)
741 741 }
742 742
743 743 fn len(&self) -> usize {
744 744 self.len()
745 745 }
746 746 }
747 747
748 748 /// Pad hexadecimal Node prefix with zeros on the right
749 749 ///
750 750 /// This avoids having to repeatedly write very long hexadecimal
751 751 /// strings for test data, and brings actual hash size independency.
752 752 #[cfg(test)]
753 753 fn pad_node(hex: &str) -> Node {
754 754 Node::from_hex(&hex_pad_right(hex)).unwrap()
755 755 }
756 756
757 757 /// Pad hexadecimal Node prefix with zeros on the right, then insert
758 758 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
759 759 idx.insert(rev, pad_node(hex));
760 760 }
761 761
762 762 fn sample_nodetree() -> NodeTree {
763 763 NodeTree::from(vec![
764 764 block![0: Rev(9)],
765 765 block![0: Rev(0), 1: Rev(9)],
766 766 block![0: Block(1), 1:Rev(1)],
767 767 ])
768 768 }
769 769
770 770 fn hex(s: &str) -> NodePrefix {
771 771 NodePrefix::from_hex(s).unwrap()
772 772 }
773 773
774 774 #[test]
775 775 fn test_nt_debug() {
776 776 let nt = sample_nodetree();
777 777 assert_eq!(
778 778 format!("{:?}", nt),
779 779 "readonly: \
780 780 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
781 781 growable: [], \
782 782 root: {0: Block(1), 1: Rev(1)}",
783 783 );
784 784 }
785 785
786 786 #[test]
787 787 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
788 788 let mut idx: TestIndex = HashMap::new();
789 789 pad_insert(&mut idx, 1, "1234deadcafe");
790 790
791 791 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
792 792 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
793 793 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
794 794 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
795 795 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
796 796 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
797 797
798 798 // and with full binary Nodes
799 799 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
800 800 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
801 801 assert_eq!(nt.find_node(&idx, &unknown)?, None);
802 802 Ok(())
803 803 }
804 804
805 805 #[test]
806 806 fn test_immutable_find_one_jump() {
807 807 let mut idx = TestIndex::new();
808 808 pad_insert(&mut idx, 9, "012");
809 809 pad_insert(&mut idx, 0, "00a");
810 810
811 811 let nt = sample_nodetree();
812 812
813 813 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
814 814 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
815 815 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
816 816 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
817 817 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
818 818 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
819 819 }
820 820
821 821 #[test]
822 822 fn test_mutated_find() -> Result<(), NodeMapError> {
823 823 let mut idx = TestIndex::new();
824 824 pad_insert(&mut idx, 9, "012");
825 825 pad_insert(&mut idx, 0, "00a");
826 826 pad_insert(&mut idx, 2, "cafe");
827 827 pad_insert(&mut idx, 3, "15");
828 828 pad_insert(&mut idx, 1, "10");
829 829
830 830 let nt = NodeTree {
831 831 readonly: sample_nodetree().readonly,
832 832 growable: vec![block![0: Rev(1), 5: Rev(3)]],
833 833 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
834 834 masked_inner_blocks: 1,
835 835 };
836 836 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
837 837 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
838 838 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
839 839 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
840 840 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
841 841 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
842 842 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
843 843 assert_eq!(nt.masked_readonly_blocks(), 2);
844 844 Ok(())
845 845 }
846 846
847 struct TestNtIndex {
848 index: TestIndex,
849 nt: NodeTree,
847 pub struct TestNtIndex {
848 pub index: TestIndex,
849 pub nt: NodeTree,
850 850 }
851 851
852 852 impl TestNtIndex {
853 fn new() -> Self {
853 pub fn new() -> Self {
854 854 TestNtIndex {
855 855 index: HashMap::new(),
856 856 nt: NodeTree::default(),
857 857 }
858 858 }
859 859
860 fn insert(
860 pub fn insert_node(
861 &mut self,
862 rev: Revision,
863 node: Node,
864 ) -> Result<(), NodeMapError> {
865 self.index.insert(rev, node);
866 self.nt.insert(&self.index, &node, rev)?;
867 Ok(())
868 }
869
870 pub fn insert(
861 871 &mut self,
862 872 rev: Revision,
863 873 hex: &str,
864 874 ) -> Result<(), NodeMapError> {
865 let node = pad_node(hex);
866 self.index.insert(rev, node);
867 self.nt.insert(&self.index, &node, rev)?;
868 Ok(())
875 return self.insert_node(rev, pad_node(hex));
869 876 }
870 877
871 878 fn find_hex(
872 879 &self,
873 880 prefix: &str,
874 881 ) -> Result<Option<Revision>, NodeMapError> {
875 882 self.nt.find_bin(&self.index, hex(prefix))
876 883 }
877 884
878 885 fn unique_prefix_len_hex(
879 886 &self,
880 887 prefix: &str,
881 888 ) -> Result<Option<usize>, NodeMapError> {
882 889 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
883 890 }
884 891
885 892 /// Drain `added` and restart a new one
886 893 fn commit(self) -> Self {
887 894 let mut as_vec: Vec<Block> =
888 895 self.nt.readonly.iter().copied().collect();
889 896 as_vec.extend(self.nt.growable);
890 897 as_vec.push(self.nt.root);
891 898
892 899 Self {
893 900 index: self.index,
894 901 nt: NodeTree::from(as_vec),
895 902 }
896 903 }
897 904 }
898 905
899 906 #[test]
900 907 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
901 908 let mut idx = TestNtIndex::new();
902 909 idx.insert(0, "1234")?;
903 910 assert_eq!(idx.find_hex("1")?, Some(0));
904 911 assert_eq!(idx.find_hex("12")?, Some(0));
905 912
906 913 // let's trigger a simple split
907 914 idx.insert(1, "1a34")?;
908 915 assert_eq!(idx.nt.growable.len(), 1);
909 916 assert_eq!(idx.find_hex("12")?, Some(0));
910 917 assert_eq!(idx.find_hex("1a")?, Some(1));
911 918
912 919 // reinserting is a no_op
913 920 idx.insert(1, "1a34")?;
914 921 assert_eq!(idx.nt.growable.len(), 1);
915 922 assert_eq!(idx.find_hex("12")?, Some(0));
916 923 assert_eq!(idx.find_hex("1a")?, Some(1));
917 924
918 925 idx.insert(2, "1a01")?;
919 926 assert_eq!(idx.nt.growable.len(), 2);
920 927 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
921 928 assert_eq!(idx.find_hex("12")?, Some(0));
922 929 assert_eq!(idx.find_hex("1a3")?, Some(1));
923 930 assert_eq!(idx.find_hex("1a0")?, Some(2));
924 931 assert_eq!(idx.find_hex("1a12")?, None);
925 932
926 933 // now let's make it split and create more than one additional block
927 934 idx.insert(3, "1a345")?;
928 935 assert_eq!(idx.nt.growable.len(), 4);
929 936 assert_eq!(idx.find_hex("1a340")?, Some(1));
930 937 assert_eq!(idx.find_hex("1a345")?, Some(3));
931 938 assert_eq!(idx.find_hex("1a341")?, None);
932 939
933 940 // there's no readonly block to mask
934 941 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
935 942 Ok(())
936 943 }
937 944
938 945 #[test]
939 946 fn test_unique_prefix_len_zero_prefix() {
940 947 let mut idx = TestNtIndex::new();
941 948 idx.insert(0, "00000abcd").unwrap();
942 949
943 950 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
944 951 // in the nodetree proper, this will be found at the first nybble
945 952 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
946 953 // but the first difference with `NULL_NODE`
947 954 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
948 955 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
949 956
950 957 // same with odd result
951 958 idx.insert(1, "00123").unwrap();
952 959 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
953 960 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
954 961
955 962 // these are unchanged of course
956 963 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
957 964 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
958 965 }
959 966
960 967 #[test]
961 968 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
962 969 // check that the splitting loop is long enough
963 970 let mut nt_idx = TestNtIndex::new();
964 971 let nt = &mut nt_idx.nt;
965 972 let idx = &mut nt_idx.index;
966 973
967 974 let node0_hex = hex_pad_right("444444");
968 975 let mut node1_hex = hex_pad_right("444444");
969 976 node1_hex.pop();
970 977 node1_hex.push('5');
971 978 let node0 = Node::from_hex(&node0_hex).unwrap();
972 979 let node1 = Node::from_hex(&node1_hex).unwrap();
973 980
974 981 idx.insert(0, node0);
975 982 nt.insert(idx, &node0, 0)?;
976 983 idx.insert(1, node1);
977 984 nt.insert(idx, &node1, 1)?;
978 985
979 986 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
980 987 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
981 988 Ok(())
982 989 }
983 990
984 991 #[test]
985 992 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
986 993 let mut idx = TestNtIndex::new();
987 994 idx.insert(0, "1234")?;
988 995 idx.insert(1, "1235")?;
989 996 idx.insert(2, "131")?;
990 997 idx.insert(3, "cafe")?;
991 998 let mut idx = idx.commit();
992 999 assert_eq!(idx.find_hex("1234")?, Some(0));
993 1000 assert_eq!(idx.find_hex("1235")?, Some(1));
994 1001 assert_eq!(idx.find_hex("131")?, Some(2));
995 1002 assert_eq!(idx.find_hex("cafe")?, Some(3));
996 1003 // we did not add anything since init from readonly
997 1004 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
998 1005
999 1006 idx.insert(4, "123A")?;
1000 1007 assert_eq!(idx.find_hex("1234")?, Some(0));
1001 1008 assert_eq!(idx.find_hex("1235")?, Some(1));
1002 1009 assert_eq!(idx.find_hex("131")?, Some(2));
1003 1010 assert_eq!(idx.find_hex("cafe")?, Some(3));
1004 1011 assert_eq!(idx.find_hex("123A")?, Some(4));
1005 1012 // we masked blocks for all prefixes of "123", including the root
1006 1013 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1007 1014
1008 1015 eprintln!("{:?}", idx.nt);
1009 1016 idx.insert(5, "c0")?;
1010 1017 assert_eq!(idx.find_hex("cafe")?, Some(3));
1011 1018 assert_eq!(idx.find_hex("c0")?, Some(5));
1012 1019 assert_eq!(idx.find_hex("c1")?, None);
1013 1020 assert_eq!(idx.find_hex("1234")?, Some(0));
1014 1021 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1015 1022 // it doesn't mask anything
1016 1023 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1017 1024
1018 1025 Ok(())
1019 1026 }
1020 1027
1021 1028 #[test]
1022 1029 fn test_invalidate_all() -> Result<(), NodeMapError> {
1023 1030 let mut idx = TestNtIndex::new();
1024 1031 idx.insert(0, "1234")?;
1025 1032 idx.insert(1, "1235")?;
1026 1033 idx.insert(2, "131")?;
1027 1034 idx.insert(3, "cafe")?;
1028 1035 let mut idx = idx.commit();
1029 1036
1030 1037 idx.nt.invalidate_all();
1031 1038
1032 1039 assert_eq!(idx.find_hex("1234")?, None);
1033 1040 assert_eq!(idx.find_hex("1235")?, None);
1034 1041 assert_eq!(idx.find_hex("131")?, None);
1035 1042 assert_eq!(idx.find_hex("cafe")?, None);
1036 1043 // all the readonly blocks have been masked, this is the
1037 1044 // conventional expected response
1038 1045 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1039 1046 Ok(())
1040 1047 }
1041 1048
1042 1049 #[test]
1043 1050 fn test_into_added_empty() {
1044 1051 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1045 1052 assert!(sample_nodetree()
1046 1053 .into_readonly_and_added_bytes()
1047 1054 .1
1048 1055 .is_empty());
1049 1056 }
1050 1057
1051 1058 #[test]
1052 1059 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1053 1060 let mut idx = TestNtIndex::new();
1054 1061 idx.insert(0, "1234")?;
1055 1062 let mut idx = idx.commit();
1056 1063 idx.insert(4, "cafe")?;
1057 1064 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1058 1065
1059 1066 // only the root block has been changed
1060 1067 assert_eq!(bytes.len(), size_of::<Block>());
1061 1068 // big endian for -2
1062 1069 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1063 1070 // big endian for -6
1064 1071 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1065 1072 Ok(())
1066 1073 }
1067 1074 }
General Comments 0
You need to be logged in to leave comments. Login now