##// END OF EJS Templates
rust-revlog: don't try to open the data file if the index is empty...
Raphaël Gomès -
r52759:09ece563 default
parent child Browse files
Show More
@@ -1,1419 +1,1422
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::collections::HashSet;
21 21 use std::io::Read;
22 22 use std::ops::Deref;
23 23 use std::path::Path;
24 24
25 25 use flate2::read::ZlibDecoder;
26 26 use sha1::{Digest, Sha1};
27 27 use std::cell::RefCell;
28 28 use zstd;
29 29
30 30 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
31 31 use self::nodemap_docket::NodeMapDocket;
32 32 use super::index::Index;
33 33 use super::index::INDEX_ENTRY_SIZE;
34 34 use super::nodemap::{NodeMap, NodeMapError};
35 35 use crate::config::{Config, ResourceProfileValue};
36 36 use crate::errors::HgError;
37 37 use crate::exit_codes;
38 38 use crate::requirements::{
39 39 GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
40 40 };
41 41 use crate::vfs::Vfs;
42 42
43 43 /// As noted in revlog.c, revision numbers are actually encoded in
44 44 /// 4 bytes, and are liberally converted to ints, whence the i32
45 45 pub type BaseRevision = i32;
46 46
47 47 /// Mercurial revision numbers
48 48 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
49 49 /// in the sense that they should only be used for revisions that are
50 50 /// valid for a given index (i.e. in bounds).
51 51 #[derive(
52 52 Debug,
53 53 derive_more::Display,
54 54 Clone,
55 55 Copy,
56 56 Hash,
57 57 PartialEq,
58 58 Eq,
59 59 PartialOrd,
60 60 Ord,
61 61 )]
62 62 pub struct Revision(pub BaseRevision);
63 63
64 64 impl format_bytes::DisplayBytes for Revision {
65 65 fn display_bytes(
66 66 &self,
67 67 output: &mut dyn std::io::Write,
68 68 ) -> std::io::Result<()> {
69 69 self.0.display_bytes(output)
70 70 }
71 71 }
72 72
73 73 /// Unchecked Mercurial revision numbers.
74 74 ///
75 75 /// Values of this type have no guarantee of being a valid revision number
76 76 /// in any context. Use method `check_revision` to get a valid revision within
77 77 /// the appropriate index object.
78 78 #[derive(
79 79 Debug,
80 80 derive_more::Display,
81 81 Clone,
82 82 Copy,
83 83 Hash,
84 84 PartialEq,
85 85 Eq,
86 86 PartialOrd,
87 87 Ord,
88 88 )]
89 89 pub struct UncheckedRevision(pub BaseRevision);
90 90
91 91 impl format_bytes::DisplayBytes for UncheckedRevision {
92 92 fn display_bytes(
93 93 &self,
94 94 output: &mut dyn std::io::Write,
95 95 ) -> std::io::Result<()> {
96 96 self.0.display_bytes(output)
97 97 }
98 98 }
99 99
100 100 impl From<Revision> for UncheckedRevision {
101 101 fn from(value: Revision) -> Self {
102 102 Self(value.0)
103 103 }
104 104 }
105 105
106 106 impl From<BaseRevision> for UncheckedRevision {
107 107 fn from(value: BaseRevision) -> Self {
108 108 Self(value)
109 109 }
110 110 }
111 111
112 112 /// Marker expressing the absence of a parent
113 113 ///
114 114 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
115 115 /// to be smaller than all existing revisions.
116 116 pub const NULL_REVISION: Revision = Revision(-1);
117 117
118 118 /// Same as `mercurial.node.wdirrev`
119 119 ///
120 120 /// This is also equal to `i32::max_value()`, but it's better to spell
121 121 /// it out explicitely, same as in `mercurial.node`
122 122 #[allow(clippy::unreadable_literal)]
123 123 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
124 124 UncheckedRevision(0x7fffffff);
125 125
126 126 pub const WORKING_DIRECTORY_HEX: &str =
127 127 "ffffffffffffffffffffffffffffffffffffffff";
128 128
129 129 /// The simplest expression of what we need of Mercurial DAGs.
130 130 pub trait Graph {
131 131 /// Return the two parents of the given `Revision`.
132 132 ///
133 133 /// Each of the parents can be independently `NULL_REVISION`
134 134 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
135 135 }
136 136
137 137 #[derive(Clone, Debug, PartialEq)]
138 138 pub enum GraphError {
139 139 ParentOutOfRange(Revision),
140 140 }
141 141
142 142 impl<T: Graph> Graph for &T {
143 143 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
144 144 (*self).parents(rev)
145 145 }
146 146 }
147 147
148 148 /// The Mercurial Revlog Index
149 149 ///
150 150 /// This is currently limited to the minimal interface that is needed for
151 151 /// the [`nodemap`](nodemap/index.html) module
152 152 pub trait RevlogIndex {
153 153 /// Total number of Revisions referenced in this index
154 154 fn len(&self) -> usize;
155 155
156 156 fn is_empty(&self) -> bool {
157 157 self.len() == 0
158 158 }
159 159
160 160 /// Return a reference to the Node or `None` for `NULL_REVISION`
161 161 fn node(&self, rev: Revision) -> Option<&Node>;
162 162
163 163 /// Return a [`Revision`] if `rev` is a valid revision number for this
164 164 /// index.
165 165 ///
166 166 /// [`NULL_REVISION`] is considered to be valid.
167 167 #[inline(always)]
168 168 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
169 169 let rev = rev.0;
170 170
171 171 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
172 172 {
173 173 Some(Revision(rev))
174 174 } else {
175 175 None
176 176 }
177 177 }
178 178 }
179 179
180 180 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
181 181 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
182 182 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
183 183 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
184 184
185 185 // Keep this in sync with REVIDX_KNOWN_FLAGS in
186 186 // mercurial/revlogutils/flagutil.py
187 187 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
188 188 | REVISION_FLAG_ELLIPSIS
189 189 | REVISION_FLAG_EXTSTORED
190 190 | REVISION_FLAG_HASCOPIESINFO;
191 191
192 192 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
193 193
194 194 #[derive(Debug, derive_more::From, derive_more::Display)]
195 195 pub enum RevlogError {
196 196 InvalidRevision,
197 197 /// Working directory is not supported
198 198 WDirUnsupported,
199 199 /// Found more than one entry whose ID match the requested prefix
200 200 AmbiguousPrefix,
201 201 #[from]
202 202 Other(HgError),
203 203 }
204 204
205 205 impl From<NodeMapError> for RevlogError {
206 206 fn from(error: NodeMapError) -> Self {
207 207 match error {
208 208 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
209 209 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
210 210 format!("nodemap point to revision {} not in index", rev),
211 211 ),
212 212 }
213 213 }
214 214 }
215 215
216 216 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
217 217 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
218 218 }
219 219
220 220 impl RevlogError {
221 221 fn corrupted<S: AsRef<str>>(context: S) -> Self {
222 222 RevlogError::Other(corrupted(context))
223 223 }
224 224 }
225 225
226 226 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
227 227 pub enum RevlogType {
228 228 Changelog,
229 229 Manifestlog,
230 230 Filelog,
231 231 }
232 232
233 233 impl TryFrom<usize> for RevlogType {
234 234 type Error = HgError;
235 235
236 236 fn try_from(value: usize) -> Result<Self, Self::Error> {
237 237 match value {
238 238 1001 => Ok(Self::Changelog),
239 239 1002 => Ok(Self::Manifestlog),
240 240 1003 => Ok(Self::Filelog),
241 241 t => Err(HgError::abort(
242 242 format!("Unknown revlog type {}", t),
243 243 exit_codes::ABORT,
244 244 None,
245 245 )),
246 246 }
247 247 }
248 248 }
249 249
250 250 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
251 251 pub enum CompressionEngine {
252 252 Zlib {
253 253 /// Between 0 and 9 included
254 254 level: u32,
255 255 },
256 256 Zstd {
257 257 /// Between 0 and 22 included
258 258 level: u32,
259 259 /// Never used in practice for now
260 260 threads: u32,
261 261 },
262 262 /// No compression is performed
263 263 None,
264 264 }
265 265 impl CompressionEngine {
266 266 pub fn set_level(&mut self, new_level: usize) -> Result<(), HgError> {
267 267 match self {
268 268 CompressionEngine::Zlib { level } => {
269 269 if new_level > 9 {
270 270 return Err(HgError::abort(
271 271 format!(
272 272 "invalid compression zlib compression level {}",
273 273 new_level
274 274 ),
275 275 exit_codes::ABORT,
276 276 None,
277 277 ));
278 278 }
279 279 *level = new_level as u32;
280 280 }
281 281 CompressionEngine::Zstd { level, .. } => {
282 282 if new_level > 22 {
283 283 return Err(HgError::abort(
284 284 format!(
285 285 "invalid compression zstd compression level {}",
286 286 new_level
287 287 ),
288 288 exit_codes::ABORT,
289 289 None,
290 290 ));
291 291 }
292 292 *level = new_level as u32;
293 293 }
294 294 CompressionEngine::None => {}
295 295 }
296 296 Ok(())
297 297 }
298 298
299 299 pub fn zstd(
300 300 zstd_level: Option<u32>,
301 301 ) -> Result<CompressionEngine, HgError> {
302 302 let mut engine = CompressionEngine::Zstd {
303 303 level: 3,
304 304 threads: 0,
305 305 };
306 306 if let Some(level) = zstd_level {
307 307 engine.set_level(level as usize)?;
308 308 }
309 309 Ok(engine)
310 310 }
311 311 }
312 312
313 313 impl Default for CompressionEngine {
314 314 fn default() -> Self {
315 315 Self::Zlib { level: 6 }
316 316 }
317 317 }
318 318
319 319 #[derive(Debug, Clone, Copy, PartialEq)]
320 320 /// Holds configuration values about how the revlog data is read
321 321 pub struct RevlogDataConfig {
322 322 /// Should we try to open the "pending" version of the revlog
323 323 pub try_pending: bool,
324 324 /// Should we try to open the "split" version of the revlog
325 325 pub try_split: bool,
326 326 /// When True, `indexfile` should be opened with `checkambig=True` at
327 327 /// writing time, to avoid file stat ambiguity
328 328 pub check_ambig: bool,
329 329 /// If true, use mmap instead of reading to deal with large indexes
330 330 pub mmap_large_index: bool,
331 331 /// How much data is considered large
332 332 pub mmap_index_threshold: Option<u64>,
333 333 /// How much data to read and cache into the raw revlog data cache
334 334 pub chunk_cache_size: u64,
335 335 /// The size of the uncompressed cache compared to the largest revision
336 336 /// seen
337 337 pub uncompressed_cache_factor: Option<f64>,
338 338 /// The number of chunks cached
339 339 pub uncompressed_cache_count: Option<u64>,
340 340 /// Allow sparse reading of the revlog data
341 341 pub with_sparse_read: bool,
342 342 /// Minimal density of a sparse read chunk
343 343 pub sr_density_threshold: f64,
344 344 /// Minimal size of the data we skip when performing sparse reads
345 345 pub sr_min_gap_size: u64,
346 346 /// Whether deltas are encoded against arbitrary bases
347 347 pub general_delta: bool,
348 348 }
349 349
350 350 impl RevlogDataConfig {
351 351 pub fn new(
352 352 config: &Config,
353 353 requirements: &HashSet<String>,
354 354 ) -> Result<Self, HgError> {
355 355 let mut data_config = Self::default();
356 356 if let Some(chunk_cache_size) =
357 357 config.get_byte_size(b"format", b"chunkcachesize")?
358 358 {
359 359 data_config.chunk_cache_size = chunk_cache_size;
360 360 }
361 361
362 362 let memory_profile = config.get_resource_profile(Some("memory"));
363 363 if memory_profile.value >= ResourceProfileValue::Medium {
364 364 data_config.uncompressed_cache_count = Some(10_000);
365 365 data_config.uncompressed_cache_factor = Some(4.0);
366 366 if memory_profile.value >= ResourceProfileValue::High {
367 367 data_config.uncompressed_cache_factor = Some(10.0)
368 368 }
369 369 }
370 370
371 371 if let Some(mmap_index_threshold) =
372 372 config.get_byte_size(b"experimental", b"mmapindexthreshold")?
373 373 {
374 374 data_config.mmap_index_threshold = Some(mmap_index_threshold);
375 375 }
376 376
377 377 let with_sparse_read =
378 378 config.get_bool(b"experimental", b"sparse-read")?;
379 379 if let Some(sr_density_threshold) = config
380 380 .get_f64(b"experimental", b"sparse-read.density-threshold")?
381 381 {
382 382 data_config.sr_density_threshold = sr_density_threshold;
383 383 }
384 384 data_config.with_sparse_read = with_sparse_read;
385 385 if let Some(sr_min_gap_size) = config
386 386 .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
387 387 {
388 388 data_config.sr_min_gap_size = sr_min_gap_size;
389 389 }
390 390
391 391 data_config.with_sparse_read =
392 392 requirements.contains(SPARSEREVLOG_REQUIREMENT);
393 393
394 394 Ok(data_config)
395 395 }
396 396 }
397 397
398 398 impl Default for RevlogDataConfig {
399 399 fn default() -> Self {
400 400 Self {
401 401 chunk_cache_size: 65536,
402 402 sr_density_threshold: 0.50,
403 403 sr_min_gap_size: 262144,
404 404 try_pending: Default::default(),
405 405 try_split: Default::default(),
406 406 check_ambig: Default::default(),
407 407 mmap_large_index: Default::default(),
408 408 mmap_index_threshold: Default::default(),
409 409 uncompressed_cache_factor: Default::default(),
410 410 uncompressed_cache_count: Default::default(),
411 411 with_sparse_read: Default::default(),
412 412 general_delta: Default::default(),
413 413 }
414 414 }
415 415 }
416 416
417 417 #[derive(Debug, Clone, Copy, PartialEq)]
418 418 /// Holds configuration values about how new deltas are computed.
419 419 ///
420 420 /// Some attributes are duplicated from [`RevlogDataConfig`] to help having
421 421 /// each object self contained.
422 422 pub struct RevlogDeltaConfig {
423 423 /// Whether deltas can be encoded against arbitrary bases
424 424 pub general_delta: bool,
425 425 /// Allow sparse writing of the revlog data
426 426 pub sparse_revlog: bool,
427 427 /// Maximum length of a delta chain
428 428 pub max_chain_len: Option<u64>,
429 429 /// Maximum distance between a delta chain's start and end
430 430 pub max_deltachain_span: Option<u64>,
431 431 /// If `upper_bound_comp` is not None, this is the expected maximal
432 432 /// gain from compression for the data content
433 433 pub upper_bound_comp: Option<f64>,
434 434 /// Should we try a delta against both parents
435 435 pub delta_both_parents: bool,
436 436 /// Test delta base candidate groups by chunks of this maximal size
437 437 pub candidate_group_chunk_size: u64,
438 438 /// Should we display debug information about delta computation
439 439 pub debug_delta: bool,
440 440 /// Trust incoming deltas by default
441 441 pub lazy_delta: bool,
442 442 /// Trust the base of incoming deltas by default
443 443 pub lazy_delta_base: bool,
444 444 }
445 445 impl RevlogDeltaConfig {
446 446 pub fn new(
447 447 config: &Config,
448 448 requirements: &HashSet<String>,
449 449 revlog_type: RevlogType,
450 450 ) -> Result<Self, HgError> {
451 451 let mut delta_config = Self {
452 452 delta_both_parents: config
453 453 .get_option_no_default(
454 454 b"storage",
455 455 b"revlog.optimize-delta-parent-choice",
456 456 )?
457 457 .unwrap_or(true),
458 458 candidate_group_chunk_size: config
459 459 .get_u64(
460 460 b"storage",
461 461 b"revlog.delta-parent-search.candidate-group-chunk-size",
462 462 )?
463 463 .unwrap_or_default(),
464 464 ..Default::default()
465 465 };
466 466
467 467 delta_config.debug_delta =
468 468 config.get_bool(b"debug", b"revlog.debug-delta")?;
469 469
470 470 delta_config.general_delta =
471 471 requirements.contains(GENERALDELTA_REQUIREMENT);
472 472
473 473 let lazy_delta =
474 474 config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
475 475
476 476 if revlog_type == RevlogType::Manifestlog {
477 477 // upper bound of what we expect from compression
478 478 // (real life value seems to be 3)
479 479 delta_config.upper_bound_comp = Some(3.0)
480 480 }
481 481
482 482 let mut lazy_delta_base = false;
483 483 if lazy_delta {
484 484 lazy_delta_base = match config.get_option_no_default(
485 485 b"storage",
486 486 b"revlog.reuse-external-delta-parent",
487 487 )? {
488 488 Some(base) => base,
489 489 None => config.get_bool(b"format", b"generaldelta")?,
490 490 };
491 491 }
492 492 delta_config.lazy_delta = lazy_delta;
493 493 delta_config.lazy_delta_base = lazy_delta_base;
494 494
495 495 delta_config.max_deltachain_span =
496 496 match config.get_i64(b"experimental", b"maxdeltachainspan")? {
497 497 Some(span) => {
498 498 if span < 0 {
499 499 None
500 500 } else {
501 501 Some(span as u64)
502 502 }
503 503 }
504 504 None => None,
505 505 };
506 506
507 507 delta_config.sparse_revlog =
508 508 requirements.contains(SPARSEREVLOG_REQUIREMENT);
509 509
510 510 delta_config.max_chain_len =
511 511 config.get_byte_size_no_default(b"format", b"maxchainlen")?;
512 512
513 513 Ok(delta_config)
514 514 }
515 515 }
516 516
517 517 impl Default for RevlogDeltaConfig {
518 518 fn default() -> Self {
519 519 Self {
520 520 delta_both_parents: true,
521 521 lazy_delta: true,
522 522 general_delta: Default::default(),
523 523 sparse_revlog: Default::default(),
524 524 max_chain_len: Default::default(),
525 525 max_deltachain_span: Default::default(),
526 526 upper_bound_comp: Default::default(),
527 527 candidate_group_chunk_size: Default::default(),
528 528 debug_delta: Default::default(),
529 529 lazy_delta_base: Default::default(),
530 530 }
531 531 }
532 532 }
533 533
534 534 #[derive(Debug, Default, Clone, Copy, PartialEq)]
535 535 /// Holds configuration values about the available revlog features
536 536 pub struct RevlogFeatureConfig {
537 537 /// The compression engine and its options
538 538 pub compression_engine: CompressionEngine,
539 539 /// Can we use censor on this revlog
540 540 pub censorable: bool,
541 541 /// Does this revlog use the "side data" feature
542 542 pub has_side_data: bool,
543 543 /// Might remove this configuration once the rank computation has no
544 544 /// impact
545 545 pub compute_rank: bool,
546 546 /// Parent order is supposed to be semantically irrelevant, so we
547 547 /// normally re-sort parents to ensure that the first parent is non-null,
548 548 /// if there is a non-null parent at all.
549 549 /// filelog abuses the parent order as a flag to mark some instances of
550 550 /// meta-encoded files, so allow it to disable this behavior.
551 551 pub canonical_parent_order: bool,
552 552 /// Can ellipsis commit be used
553 553 pub enable_ellipsis: bool,
554 554 }
555 555 impl RevlogFeatureConfig {
556 556 pub fn new(
557 557 config: &Config,
558 558 requirements: &HashSet<String>,
559 559 ) -> Result<Self, HgError> {
560 560 let mut feature_config = Self::default();
561 561
562 562 let zlib_level = config.get_u32(b"storage", b"revlog.zlib.level")?;
563 563 let zstd_level = config.get_u32(b"storage", b"revlog.zstd.level")?;
564 564
565 565 feature_config.compression_engine = CompressionEngine::default();
566 566
567 567 for requirement in requirements {
568 568 if requirement.starts_with("revlog-compression-")
569 569 || requirement.starts_with("exp-compression-")
570 570 {
571 571 let split = &mut requirement.splitn(3, '-');
572 572 split.next();
573 573 split.next();
574 574 feature_config.compression_engine = match split.next().unwrap()
575 575 {
576 576 "zstd" => CompressionEngine::zstd(zstd_level)?,
577 577 e => {
578 578 return Err(HgError::UnsupportedFeature(format!(
579 579 "Unsupported compression engine '{e}'"
580 580 )))
581 581 }
582 582 };
583 583 }
584 584 }
585 585 if let Some(level) = zlib_level {
586 586 if matches!(
587 587 feature_config.compression_engine,
588 588 CompressionEngine::Zlib { .. }
589 589 ) {
590 590 feature_config
591 591 .compression_engine
592 592 .set_level(level as usize)?;
593 593 }
594 594 }
595 595
596 596 feature_config.enable_ellipsis =
597 597 requirements.contains(NARROW_REQUIREMENT);
598 598
599 599 Ok(feature_config)
600 600 }
601 601 }
602 602
603 603 /// Read only implementation of revlog.
604 604 pub struct Revlog {
605 605 /// When index and data are not interleaved: bytes of the revlog index.
606 606 /// When index and data are interleaved: bytes of the revlog index and
607 607 /// data.
608 608 index: Index,
609 609 /// When index and data are not interleaved: bytes of the revlog data
610 610 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
611 611 /// When present on disk: the persistent nodemap for this revlog
612 612 nodemap: Option<nodemap::NodeTree>,
613 613 }
614 614
615 615 impl Graph for Revlog {
616 616 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
617 617 self.index.parents(rev)
618 618 }
619 619 }
620 620
621 621 #[derive(Debug, Copy, Clone)]
622 622 pub enum RevlogVersionOptions {
623 623 V0,
624 624 V1 { generaldelta: bool },
625 625 V2,
626 626 ChangelogV2 { compute_rank: bool },
627 627 }
628 628
629 629 /// Options to govern how a revlog should be opened, usually from the
630 630 /// repository configuration or requirements.
631 631 #[derive(Debug, Copy, Clone)]
632 632 pub struct RevlogOpenOptions {
633 633 /// The revlog version, along with any option specific to this version
634 634 pub version: RevlogVersionOptions,
635 635 /// Whether the revlog uses a persistent nodemap.
636 636 pub use_nodemap: bool,
637 637 // TODO other non-header/version options,
638 638 }
639 639
640 640 impl RevlogOpenOptions {
641 641 pub fn new() -> Self {
642 642 Self {
643 643 version: RevlogVersionOptions::V1 { generaldelta: true },
644 644 use_nodemap: false,
645 645 }
646 646 }
647 647
648 648 fn default_index_header(&self) -> index::IndexHeader {
649 649 index::IndexHeader {
650 650 header_bytes: match self.version {
651 651 RevlogVersionOptions::V0 => [0, 0, 0, 0],
652 652 RevlogVersionOptions::V1 { generaldelta } => {
653 653 [0, if generaldelta { 3 } else { 1 }, 0, 1]
654 654 }
655 655 RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
656 656 RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
657 657 0xD34Du32.to_be_bytes()
658 658 }
659 659 },
660 660 }
661 661 }
662 662 }
663 663
664 664 impl Default for RevlogOpenOptions {
665 665 fn default() -> Self {
666 666 Self::new()
667 667 }
668 668 }
669 669
670 670 impl Revlog {
671 671 /// Open a revlog index file.
672 672 ///
673 673 /// It will also open the associated data file if index and data are not
674 674 /// interleaved.
675 675 pub fn open(
676 676 store_vfs: &Vfs,
677 677 index_path: impl AsRef<Path>,
678 678 data_path: Option<&Path>,
679 679 options: RevlogOpenOptions,
680 680 ) -> Result<Self, HgError> {
681 681 Self::open_gen(store_vfs, index_path, data_path, options, None)
682 682 }
683 683
684 684 fn open_gen(
685 685 store_vfs: &Vfs,
686 686 index_path: impl AsRef<Path>,
687 687 data_path: Option<&Path>,
688 688 options: RevlogOpenOptions,
689 689 nodemap_for_test: Option<nodemap::NodeTree>,
690 690 ) -> Result<Self, HgError> {
691 691 let index_path = index_path.as_ref();
692 692 let index = {
693 693 match store_vfs.mmap_open_opt(index_path)? {
694 694 None => Index::new(
695 695 Box::<Vec<_>>::default(),
696 696 options.default_index_header(),
697 697 ),
698 698 Some(index_mmap) => {
699 699 let index = Index::new(
700 700 Box::new(index_mmap),
701 701 options.default_index_header(),
702 702 )?;
703 703 Ok(index)
704 704 }
705 705 }
706 706 }?;
707 707
708 708 let default_data_path = index_path.with_extension("d");
709 709
710 710 // type annotation required
711 711 // won't recognize Mmap as Deref<Target = [u8]>
712 712 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
713 713 if index.is_inline() {
714 714 None
715 } else if index.is_empty() {
716 // No need to even try to open the data file then.
717 Some(Box::new(&[][..]))
715 718 } else {
716 719 let data_path = data_path.unwrap_or(&default_data_path);
717 720 let data_mmap = store_vfs.mmap_open(data_path)?;
718 721 Some(Box::new(data_mmap))
719 722 };
720 723
721 724 let nodemap = if index.is_inline() || !options.use_nodemap {
722 725 None
723 726 } else {
724 727 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
725 728 |(docket, data)| {
726 729 nodemap::NodeTree::load_bytes(
727 730 Box::new(data),
728 731 docket.data_length,
729 732 )
730 733 },
731 734 )
732 735 };
733 736
734 737 let nodemap = nodemap_for_test.or(nodemap);
735 738
736 739 Ok(Revlog {
737 740 index,
738 741 data_bytes,
739 742 nodemap,
740 743 })
741 744 }
742 745
743 746 /// Return number of entries of the `Revlog`.
744 747 pub fn len(&self) -> usize {
745 748 self.index.len()
746 749 }
747 750
748 751 /// Returns `true` if the `Revlog` has zero `entries`.
749 752 pub fn is_empty(&self) -> bool {
750 753 self.index.is_empty()
751 754 }
752 755
753 756 /// Returns the node ID for the given revision number, if it exists in this
754 757 /// revlog
755 758 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
756 759 if rev == NULL_REVISION.into() {
757 760 return Some(&NULL_NODE);
758 761 }
759 762 let rev = self.index.check_revision(rev)?;
760 763 Some(self.index.get_entry(rev)?.hash())
761 764 }
762 765
763 766 /// Return the revision number for the given node ID, if it exists in this
764 767 /// revlog
765 768 pub fn rev_from_node(
766 769 &self,
767 770 node: NodePrefix,
768 771 ) -> Result<Revision, RevlogError> {
769 772 if let Some(nodemap) = &self.nodemap {
770 773 nodemap
771 774 .find_bin(&self.index, node)?
772 775 .ok_or(RevlogError::InvalidRevision)
773 776 } else {
774 777 self.rev_from_node_no_persistent_nodemap(node)
775 778 }
776 779 }
777 780
778 781 /// Same as `rev_from_node`, without using a persistent nodemap
779 782 ///
780 783 /// This is used as fallback when a persistent nodemap is not present.
781 784 /// This happens when the persistent-nodemap experimental feature is not
782 785 /// enabled, or for small revlogs.
783 786 fn rev_from_node_no_persistent_nodemap(
784 787 &self,
785 788 node: NodePrefix,
786 789 ) -> Result<Revision, RevlogError> {
787 790 // Linear scan of the revlog
788 791 // TODO: consider building a non-persistent nodemap in memory to
789 792 // optimize these cases.
790 793 let mut found_by_prefix = None;
791 794 for rev in (-1..self.len() as BaseRevision).rev() {
792 795 let rev = Revision(rev as BaseRevision);
793 796 let candidate_node = if rev == Revision(-1) {
794 797 NULL_NODE
795 798 } else {
796 799 let index_entry =
797 800 self.index.get_entry(rev).ok_or_else(|| {
798 801 HgError::corrupted(
799 802 "revlog references a revision not in the index",
800 803 )
801 804 })?;
802 805 *index_entry.hash()
803 806 };
804 807 if node == candidate_node {
805 808 return Ok(rev);
806 809 }
807 810 if node.is_prefix_of(&candidate_node) {
808 811 if found_by_prefix.is_some() {
809 812 return Err(RevlogError::AmbiguousPrefix);
810 813 }
811 814 found_by_prefix = Some(rev)
812 815 }
813 816 }
814 817 found_by_prefix.ok_or(RevlogError::InvalidRevision)
815 818 }
816 819
817 820 /// Returns whether the given revision exists in this revlog.
818 821 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
819 822 self.index.check_revision(rev).is_some()
820 823 }
821 824
822 825 /// Return the full data associated to a revision.
823 826 ///
824 827 /// All entries required to build the final data out of deltas will be
825 828 /// retrieved as needed, and the deltas will be applied to the inital
826 829 /// snapshot to rebuild the final data.
827 830 pub fn get_rev_data(
828 831 &self,
829 832 rev: UncheckedRevision,
830 833 ) -> Result<Cow<[u8]>, RevlogError> {
831 834 if rev == NULL_REVISION.into() {
832 835 return Ok(Cow::Borrowed(&[]));
833 836 };
834 837 self.get_entry(rev)?.data()
835 838 }
836 839
837 840 /// [`Self::get_rev_data`] for checked revisions.
838 841 pub fn get_rev_data_for_checked_rev(
839 842 &self,
840 843 rev: Revision,
841 844 ) -> Result<Cow<[u8]>, RevlogError> {
842 845 if rev == NULL_REVISION {
843 846 return Ok(Cow::Borrowed(&[]));
844 847 };
845 848 self.get_entry_for_checked_rev(rev)?.data()
846 849 }
847 850
848 851 /// Check the hash of some given data against the recorded hash.
849 852 pub fn check_hash(
850 853 &self,
851 854 p1: Revision,
852 855 p2: Revision,
853 856 expected: &[u8],
854 857 data: &[u8],
855 858 ) -> bool {
856 859 let e1 = self.index.get_entry(p1);
857 860 let h1 = match e1 {
858 861 Some(ref entry) => entry.hash(),
859 862 None => &NULL_NODE,
860 863 };
861 864 let e2 = self.index.get_entry(p2);
862 865 let h2 = match e2 {
863 866 Some(ref entry) => entry.hash(),
864 867 None => &NULL_NODE,
865 868 };
866 869
867 870 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
868 871 }
869 872
870 873 /// Build the full data of a revision out its snapshot
871 874 /// and its deltas.
872 875 fn build_data_from_deltas(
873 876 snapshot: RevlogEntry,
874 877 deltas: &[RevlogEntry],
875 878 ) -> Result<Vec<u8>, HgError> {
876 879 let snapshot = snapshot.data_chunk()?;
877 880 let deltas = deltas
878 881 .iter()
879 882 .rev()
880 883 .map(RevlogEntry::data_chunk)
881 884 .collect::<Result<Vec<_>, _>>()?;
882 885 let patches: Vec<_> =
883 886 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
884 887 let patch = patch::fold_patch_lists(&patches);
885 888 Ok(patch.apply(&snapshot))
886 889 }
887 890
888 891 /// Return the revlog data.
889 892 fn data(&self) -> &[u8] {
890 893 match &self.data_bytes {
891 894 Some(data_bytes) => data_bytes,
892 895 None => panic!(
893 896 "forgot to load the data or trying to access inline data"
894 897 ),
895 898 }
896 899 }
897 900
898 901 pub fn make_null_entry(&self) -> RevlogEntry {
899 902 RevlogEntry {
900 903 revlog: self,
901 904 rev: NULL_REVISION,
902 905 bytes: b"",
903 906 compressed_len: 0,
904 907 uncompressed_len: 0,
905 908 base_rev_or_base_of_delta_chain: None,
906 909 p1: NULL_REVISION,
907 910 p2: NULL_REVISION,
908 911 flags: NULL_REVLOG_ENTRY_FLAGS,
909 912 hash: NULL_NODE,
910 913 }
911 914 }
912 915
913 916 fn get_entry_for_checked_rev(
914 917 &self,
915 918 rev: Revision,
916 919 ) -> Result<RevlogEntry, RevlogError> {
917 920 if rev == NULL_REVISION {
918 921 return Ok(self.make_null_entry());
919 922 }
920 923 let index_entry = self
921 924 .index
922 925 .get_entry(rev)
923 926 .ok_or(RevlogError::InvalidRevision)?;
924 927 let offset = index_entry.offset();
925 928 let start = if self.index.is_inline() {
926 929 offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE)
927 930 } else {
928 931 offset
929 932 };
930 933 let end = start + index_entry.compressed_len() as usize;
931 934 let data = if self.index.is_inline() {
932 935 self.index.data(start, end)
933 936 } else {
934 937 &self.data()[start..end]
935 938 };
936 939 let base_rev = self
937 940 .index
938 941 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
939 942 .ok_or_else(|| {
940 943 RevlogError::corrupted(format!(
941 944 "base revision for rev {} is invalid",
942 945 rev
943 946 ))
944 947 })?;
945 948 let p1 =
946 949 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
947 950 RevlogError::corrupted(format!(
948 951 "p1 for rev {} is invalid",
949 952 rev
950 953 ))
951 954 })?;
952 955 let p2 =
953 956 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
954 957 RevlogError::corrupted(format!(
955 958 "p2 for rev {} is invalid",
956 959 rev
957 960 ))
958 961 })?;
959 962 let entry = RevlogEntry {
960 963 revlog: self,
961 964 rev,
962 965 bytes: data,
963 966 compressed_len: index_entry.compressed_len(),
964 967 uncompressed_len: index_entry.uncompressed_len(),
965 968 base_rev_or_base_of_delta_chain: if base_rev == rev {
966 969 None
967 970 } else {
968 971 Some(base_rev)
969 972 },
970 973 p1,
971 974 p2,
972 975 flags: index_entry.flags(),
973 976 hash: *index_entry.hash(),
974 977 };
975 978 Ok(entry)
976 979 }
977 980
978 981 /// Get an entry of the revlog.
979 982 pub fn get_entry(
980 983 &self,
981 984 rev: UncheckedRevision,
982 985 ) -> Result<RevlogEntry, RevlogError> {
983 986 if rev == NULL_REVISION.into() {
984 987 return Ok(self.make_null_entry());
985 988 }
986 989 let rev = self.index.check_revision(rev).ok_or_else(|| {
987 990 RevlogError::corrupted(format!("rev {} is invalid", rev))
988 991 })?;
989 992 self.get_entry_for_checked_rev(rev)
990 993 }
991 994 }
992 995
993 996 /// The revlog entry's bytes and the necessary informations to extract
994 997 /// the entry's data.
995 998 #[derive(Clone)]
996 999 pub struct RevlogEntry<'revlog> {
997 1000 revlog: &'revlog Revlog,
998 1001 rev: Revision,
999 1002 bytes: &'revlog [u8],
1000 1003 compressed_len: u32,
1001 1004 uncompressed_len: i32,
1002 1005 base_rev_or_base_of_delta_chain: Option<Revision>,
1003 1006 p1: Revision,
1004 1007 p2: Revision,
1005 1008 flags: u16,
1006 1009 hash: Node,
1007 1010 }
1008 1011
1009 1012 thread_local! {
1010 1013 // seems fine to [unwrap] here: this can only fail due to memory allocation
1011 1014 // failing, and it's normal for that to cause panic.
1012 1015 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
1013 1016 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
1014 1017 }
1015 1018
1016 1019 fn zstd_decompress_to_buffer(
1017 1020 bytes: &[u8],
1018 1021 buf: &mut Vec<u8>,
1019 1022 ) -> Result<usize, std::io::Error> {
1020 1023 ZSTD_DECODER
1021 1024 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
1022 1025 }
1023 1026
1024 1027 impl<'revlog> RevlogEntry<'revlog> {
1025 1028 pub fn revision(&self) -> Revision {
1026 1029 self.rev
1027 1030 }
1028 1031
1029 1032 pub fn node(&self) -> &Node {
1030 1033 &self.hash
1031 1034 }
1032 1035
1033 1036 pub fn uncompressed_len(&self) -> Option<u32> {
1034 1037 u32::try_from(self.uncompressed_len).ok()
1035 1038 }
1036 1039
1037 1040 pub fn has_p1(&self) -> bool {
1038 1041 self.p1 != NULL_REVISION
1039 1042 }
1040 1043
1041 1044 pub fn p1_entry(
1042 1045 &self,
1043 1046 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
1044 1047 if self.p1 == NULL_REVISION {
1045 1048 Ok(None)
1046 1049 } else {
1047 1050 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
1048 1051 }
1049 1052 }
1050 1053
1051 1054 pub fn p2_entry(
1052 1055 &self,
1053 1056 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
1054 1057 if self.p2 == NULL_REVISION {
1055 1058 Ok(None)
1056 1059 } else {
1057 1060 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
1058 1061 }
1059 1062 }
1060 1063
1061 1064 pub fn p1(&self) -> Option<Revision> {
1062 1065 if self.p1 == NULL_REVISION {
1063 1066 None
1064 1067 } else {
1065 1068 Some(self.p1)
1066 1069 }
1067 1070 }
1068 1071
1069 1072 pub fn p2(&self) -> Option<Revision> {
1070 1073 if self.p2 == NULL_REVISION {
1071 1074 None
1072 1075 } else {
1073 1076 Some(self.p2)
1074 1077 }
1075 1078 }
1076 1079
1077 1080 pub fn is_censored(&self) -> bool {
1078 1081 (self.flags & REVISION_FLAG_CENSORED) != 0
1079 1082 }
1080 1083
1081 1084 pub fn has_length_affecting_flag_processor(&self) -> bool {
1082 1085 // Relevant Python code: revlog.size()
1083 1086 // note: ELLIPSIS is known to not change the content
1084 1087 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
1085 1088 }
1086 1089
1087 1090 /// The data for this entry, after resolving deltas if any.
1088 1091 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1089 1092 let mut entry = self.clone();
1090 1093 let mut delta_chain = vec![];
1091 1094
1092 1095 // The meaning of `base_rev_or_base_of_delta_chain` depends on
1093 1096 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
1094 1097 // `mercurial/revlogutils/constants.py` and the code in
1095 1098 // [_chaininfo] and in [index_deltachain].
1096 1099 let uses_generaldelta = self.revlog.index.uses_generaldelta();
1097 1100 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
1098 1101 entry = if uses_generaldelta {
1099 1102 delta_chain.push(entry);
1100 1103 self.revlog.get_entry_for_checked_rev(base_rev)?
1101 1104 } else {
1102 1105 let base_rev = UncheckedRevision(entry.rev.0 - 1);
1103 1106 delta_chain.push(entry);
1104 1107 self.revlog.get_entry(base_rev)?
1105 1108 };
1106 1109 }
1107 1110
1108 1111 let data = if delta_chain.is_empty() {
1109 1112 entry.data_chunk()?
1110 1113 } else {
1111 1114 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
1112 1115 };
1113 1116
1114 1117 Ok(data)
1115 1118 }
1116 1119
1117 1120 fn check_data(
1118 1121 &self,
1119 1122 data: Cow<'revlog, [u8]>,
1120 1123 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1121 1124 if self.revlog.check_hash(
1122 1125 self.p1,
1123 1126 self.p2,
1124 1127 self.hash.as_bytes(),
1125 1128 &data,
1126 1129 ) {
1127 1130 Ok(data)
1128 1131 } else {
1129 1132 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
1130 1133 return Err(HgError::unsupported(
1131 1134 "ellipsis revisions are not supported by rhg",
1132 1135 )
1133 1136 .into());
1134 1137 }
1135 1138 Err(corrupted(format!(
1136 1139 "hash check failed for revision {}",
1137 1140 self.rev
1138 1141 ))
1139 1142 .into())
1140 1143 }
1141 1144 }
1142 1145
1143 1146 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1144 1147 let data = self.rawdata()?;
1145 1148 if self.rev == NULL_REVISION {
1146 1149 return Ok(data);
1147 1150 }
1148 1151 if self.is_censored() {
1149 1152 return Err(HgError::CensoredNodeError.into());
1150 1153 }
1151 1154 self.check_data(data)
1152 1155 }
1153 1156
1154 1157 /// Extract the data contained in the entry.
1155 1158 /// This may be a delta. (See `is_delta`.)
1156 1159 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
1157 1160 if self.bytes.is_empty() {
1158 1161 return Ok(Cow::Borrowed(&[]));
1159 1162 }
1160 1163 match self.bytes[0] {
1161 1164 // Revision data is the entirety of the entry, including this
1162 1165 // header.
1163 1166 b'\0' => Ok(Cow::Borrowed(self.bytes)),
1164 1167 // Raw revision data follows.
1165 1168 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
1166 1169 // zlib (RFC 1950) data.
1167 1170 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
1168 1171 // zstd data.
1169 1172 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
1170 1173 // A proper new format should have had a repo/store requirement.
1171 1174 format_type => Err(corrupted(format!(
1172 1175 "unknown compression header '{}'",
1173 1176 format_type
1174 1177 ))),
1175 1178 }
1176 1179 }
1177 1180
1178 1181 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
1179 1182 let mut decoder = ZlibDecoder::new(self.bytes);
1180 1183 if self.is_delta() {
1181 1184 let mut buf = Vec::with_capacity(self.compressed_len as usize);
1182 1185 decoder
1183 1186 .read_to_end(&mut buf)
1184 1187 .map_err(|e| corrupted(e.to_string()))?;
1185 1188 Ok(buf)
1186 1189 } else {
1187 1190 let cap = self.uncompressed_len.max(0) as usize;
1188 1191 let mut buf = vec![0; cap];
1189 1192 decoder
1190 1193 .read_exact(&mut buf)
1191 1194 .map_err(|e| corrupted(e.to_string()))?;
1192 1195 Ok(buf)
1193 1196 }
1194 1197 }
1195 1198
1196 1199 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
1197 1200 let cap = self.uncompressed_len.max(0) as usize;
1198 1201 if self.is_delta() {
1199 1202 // [cap] is usually an over-estimate of the space needed because
1200 1203 // it's the length of delta-decoded data, but we're interested
1201 1204 // in the size of the delta.
1202 1205 // This means we have to [shrink_to_fit] to avoid holding on
1203 1206 // to a large chunk of memory, but it also means we must have a
1204 1207 // fallback branch, for the case when the delta is longer than
1205 1208 // the original data (surprisingly, this does happen in practice)
1206 1209 let mut buf = Vec::with_capacity(cap);
1207 1210 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
1208 1211 Ok(_) => buf.shrink_to_fit(),
1209 1212 Err(_) => {
1210 1213 buf.clear();
1211 1214 zstd::stream::copy_decode(self.bytes, &mut buf)
1212 1215 .map_err(|e| corrupted(e.to_string()))?;
1213 1216 }
1214 1217 };
1215 1218 Ok(buf)
1216 1219 } else {
1217 1220 let mut buf = Vec::with_capacity(cap);
1218 1221 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
1219 1222 .map_err(|e| corrupted(e.to_string()))?;
1220 1223 if len != self.uncompressed_len as usize {
1221 1224 Err(corrupted("uncompressed length does not match"))
1222 1225 } else {
1223 1226 Ok(buf)
1224 1227 }
1225 1228 }
1226 1229 }
1227 1230
1228 1231 /// Tell if the entry is a snapshot or a delta
1229 1232 /// (influences on decompression).
1230 1233 fn is_delta(&self) -> bool {
1231 1234 self.base_rev_or_base_of_delta_chain.is_some()
1232 1235 }
1233 1236 }
1234 1237
1235 1238 /// Calculate the hash of a revision given its data and its parents.
1236 1239 fn hash(
1237 1240 data: &[u8],
1238 1241 p1_hash: &[u8],
1239 1242 p2_hash: &[u8],
1240 1243 ) -> [u8; NODE_BYTES_LENGTH] {
1241 1244 let mut hasher = Sha1::new();
1242 1245 let (a, b) = (p1_hash, p2_hash);
1243 1246 if a > b {
1244 1247 hasher.update(b);
1245 1248 hasher.update(a);
1246 1249 } else {
1247 1250 hasher.update(a);
1248 1251 hasher.update(b);
1249 1252 }
1250 1253 hasher.update(data);
1251 1254 *hasher.finalize().as_ref()
1252 1255 }
1253 1256
1254 1257 #[cfg(test)]
1255 1258 mod tests {
1256 1259 use super::*;
1257 1260 use crate::index::IndexEntryBuilder;
1258 1261 use itertools::Itertools;
1259 1262
1260 1263 #[test]
1261 1264 fn test_empty() {
1262 1265 let temp = tempfile::tempdir().unwrap();
1263 1266 let vfs = Vfs { base: temp.path() };
1264 1267 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
1265 1268 let revlog =
1266 1269 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
1267 1270 .unwrap();
1268 1271 assert!(revlog.is_empty());
1269 1272 assert_eq!(revlog.len(), 0);
1270 1273 assert!(revlog.get_entry(0.into()).is_err());
1271 1274 assert!(!revlog.has_rev(0.into()));
1272 1275 assert_eq!(
1273 1276 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1274 1277 NULL_REVISION
1275 1278 );
1276 1279 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
1277 1280 assert_eq!(null_entry.revision(), NULL_REVISION);
1278 1281 assert!(null_entry.data().unwrap().is_empty());
1279 1282 }
1280 1283
1281 1284 #[test]
1282 1285 fn test_inline() {
1283 1286 let temp = tempfile::tempdir().unwrap();
1284 1287 let vfs = Vfs { base: temp.path() };
1285 1288 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
1286 1289 .unwrap();
1287 1290 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1288 1291 .unwrap();
1289 1292 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
1290 1293 .unwrap();
1291 1294 let entry0_bytes = IndexEntryBuilder::new()
1292 1295 .is_first(true)
1293 1296 .with_version(1)
1294 1297 .with_inline(true)
1295 1298 .with_node(node0)
1296 1299 .build();
1297 1300 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1298 1301 let entry2_bytes = IndexEntryBuilder::new()
1299 1302 .with_p1(Revision(0))
1300 1303 .with_p2(Revision(1))
1301 1304 .with_node(node2)
1302 1305 .build();
1303 1306 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
1304 1307 .into_iter()
1305 1308 .flatten()
1306 1309 .collect_vec();
1307 1310 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1308 1311 let revlog =
1309 1312 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
1310 1313 .unwrap();
1311 1314
1312 1315 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
1313 1316 assert_eq!(entry0.revision(), Revision(0));
1314 1317 assert_eq!(*entry0.node(), node0);
1315 1318 assert!(!entry0.has_p1());
1316 1319 assert_eq!(entry0.p1(), None);
1317 1320 assert_eq!(entry0.p2(), None);
1318 1321 let p1_entry = entry0.p1_entry().unwrap();
1319 1322 assert!(p1_entry.is_none());
1320 1323 let p2_entry = entry0.p2_entry().unwrap();
1321 1324 assert!(p2_entry.is_none());
1322 1325
1323 1326 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
1324 1327 assert_eq!(entry1.revision(), Revision(1));
1325 1328 assert_eq!(*entry1.node(), node1);
1326 1329 assert!(!entry1.has_p1());
1327 1330 assert_eq!(entry1.p1(), None);
1328 1331 assert_eq!(entry1.p2(), None);
1329 1332 let p1_entry = entry1.p1_entry().unwrap();
1330 1333 assert!(p1_entry.is_none());
1331 1334 let p2_entry = entry1.p2_entry().unwrap();
1332 1335 assert!(p2_entry.is_none());
1333 1336
1334 1337 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
1335 1338 assert_eq!(entry2.revision(), Revision(2));
1336 1339 assert_eq!(*entry2.node(), node2);
1337 1340 assert!(entry2.has_p1());
1338 1341 assert_eq!(entry2.p1(), Some(Revision(0)));
1339 1342 assert_eq!(entry2.p2(), Some(Revision(1)));
1340 1343 let p1_entry = entry2.p1_entry().unwrap();
1341 1344 assert!(p1_entry.is_some());
1342 1345 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
1343 1346 let p2_entry = entry2.p2_entry().unwrap();
1344 1347 assert!(p2_entry.is_some());
1345 1348 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
1346 1349 }
1347 1350
1348 1351 #[test]
1349 1352 fn test_nodemap() {
1350 1353 let temp = tempfile::tempdir().unwrap();
1351 1354 let vfs = Vfs { base: temp.path() };
1352 1355
1353 1356 // building a revlog with a forced Node starting with zeros
1354 1357 // This is a corruption, but it does not preclude using the nodemap
1355 1358 // if we don't try and access the data
1356 1359 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
1357 1360 .unwrap();
1358 1361 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1359 1362 .unwrap();
1360 1363 let entry0_bytes = IndexEntryBuilder::new()
1361 1364 .is_first(true)
1362 1365 .with_version(1)
1363 1366 .with_inline(true)
1364 1367 .with_node(node0)
1365 1368 .build();
1366 1369 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1367 1370 let contents = vec![entry0_bytes, entry1_bytes]
1368 1371 .into_iter()
1369 1372 .flatten()
1370 1373 .collect_vec();
1371 1374 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1372 1375
1373 1376 let mut idx = nodemap::tests::TestNtIndex::new();
1374 1377 idx.insert_node(Revision(0), node0).unwrap();
1375 1378 idx.insert_node(Revision(1), node1).unwrap();
1376 1379
1377 1380 let revlog = Revlog::open_gen(
1378 1381 &vfs,
1379 1382 "foo.i",
1380 1383 None,
1381 1384 RevlogOpenOptions::new(),
1382 1385 Some(idx.nt),
1383 1386 )
1384 1387 .unwrap();
1385 1388
1386 1389 // accessing the data shows the corruption
1387 1390 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
1388 1391
1389 1392 assert_eq!(
1390 1393 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1391 1394 Revision(-1)
1392 1395 );
1393 1396 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
1394 1397 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
1395 1398 assert_eq!(
1396 1399 revlog
1397 1400 .rev_from_node(NodePrefix::from_hex("000").unwrap())
1398 1401 .unwrap(),
1399 1402 Revision(-1)
1400 1403 );
1401 1404 assert_eq!(
1402 1405 revlog
1403 1406 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
1404 1407 .unwrap(),
1405 1408 Revision(1)
1406 1409 );
1407 1410 // RevlogError does not implement PartialEq
1408 1411 // (ultimately because io::Error does not)
1409 1412 match revlog
1410 1413 .rev_from_node(NodePrefix::from_hex("00").unwrap())
1411 1414 .expect_err("Expected to give AmbiguousPrefix error")
1412 1415 {
1413 1416 RevlogError::AmbiguousPrefix => (),
1414 1417 e => {
1415 1418 panic!("Got another error than AmbiguousPrefix: {:?}", e);
1416 1419 }
1417 1420 };
1418 1421 }
1419 1422 }
General Comments 0
You need to be logged in to leave comments. Login now