##// END OF EJS Templates
rust-revlog: fix incorrect results with NULL_NODE prefixes...
Georges Racinet -
r51637:bca40373 stable
parent child Browse files
Show More
@@ -1,751 +1,820 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// Mercurial revision numbers
37 37 ///
38 38 /// As noted in revlog.c, revision numbers are actually encoded in
39 39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 40 pub type Revision = i32;
41 41
42 42 /// Marker expressing the absence of a parent
43 43 ///
44 44 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
45 45 /// to be smaller than all existing revisions.
46 46 pub const NULL_REVISION: Revision = -1;
47 47
48 48 /// Same as `mercurial.node.wdirrev`
49 49 ///
50 50 /// This is also equal to `i32::max_value()`, but it's better to spell
51 51 /// it out explicitely, same as in `mercurial.node`
52 52 #[allow(clippy::unreadable_literal)]
53 53 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
54 54
55 55 pub const WORKING_DIRECTORY_HEX: &str =
56 56 "ffffffffffffffffffffffffffffffffffffffff";
57 57
58 58 /// The simplest expression of what we need of Mercurial DAGs.
59 59 pub trait Graph {
60 60 /// Return the two parents of the given `Revision`.
61 61 ///
62 62 /// Each of the parents can be independently `NULL_REVISION`
63 63 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
64 64 }
65 65
66 66 #[derive(Clone, Debug, PartialEq)]
67 67 pub enum GraphError {
68 68 ParentOutOfRange(Revision),
69 69 WorkingDirectoryUnsupported,
70 70 }
71 71
72 72 /// The Mercurial Revlog Index
73 73 ///
74 74 /// This is currently limited to the minimal interface that is needed for
75 75 /// the [`nodemap`](nodemap/index.html) module
76 76 pub trait RevlogIndex {
77 77 /// Total number of Revisions referenced in this index
78 78 fn len(&self) -> usize;
79 79
80 80 fn is_empty(&self) -> bool {
81 81 self.len() == 0
82 82 }
83 83
84 84 /// Return a reference to the Node or `None` if rev is out of bounds
85 85 ///
86 86 /// `NULL_REVISION` is not considered to be out of bounds.
87 87 fn node(&self, rev: Revision) -> Option<&Node>;
88 88 }
89 89
90 90 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
91 91 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
92 92 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
93 93 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
94 94
95 95 // Keep this in sync with REVIDX_KNOWN_FLAGS in
96 96 // mercurial/revlogutils/flagutil.py
97 97 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
98 98 | REVISION_FLAG_ELLIPSIS
99 99 | REVISION_FLAG_EXTSTORED
100 100 | REVISION_FLAG_HASCOPIESINFO;
101 101
102 102 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
103 103
104 104 #[derive(Debug, derive_more::From)]
105 105 pub enum RevlogError {
106 106 InvalidRevision,
107 107 /// Working directory is not supported
108 108 WDirUnsupported,
109 109 /// Found more than one entry whose ID match the requested prefix
110 110 AmbiguousPrefix,
111 111 #[from]
112 112 Other(HgError),
113 113 }
114 114
115 115 impl From<NodeMapError> for RevlogError {
116 116 fn from(error: NodeMapError) -> Self {
117 117 match error {
118 118 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
119 119 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
120 120 format!("nodemap point to revision {} not in index", rev),
121 121 ),
122 122 }
123 123 }
124 124 }
125 125
126 126 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
127 127 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
128 128 }
129 129
130 130 impl RevlogError {
131 131 fn corrupted<S: AsRef<str>>(context: S) -> Self {
132 132 RevlogError::Other(corrupted(context))
133 133 }
134 134 }
135 135
136 136 /// Read only implementation of revlog.
137 137 pub struct Revlog {
138 138 /// When index and data are not interleaved: bytes of the revlog index.
139 139 /// When index and data are interleaved: bytes of the revlog index and
140 140 /// data.
141 141 index: Index,
142 142 /// When index and data are not interleaved: bytes of the revlog data
143 143 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
144 144 /// When present on disk: the persistent nodemap for this revlog
145 145 nodemap: Option<nodemap::NodeTree>,
146 146 }
147 147
148 148 impl Revlog {
149 149 /// Open a revlog index file.
150 150 ///
151 151 /// It will also open the associated data file if index and data are not
152 152 /// interleaved.
153 153 pub fn open(
154 154 store_vfs: &Vfs,
155 155 index_path: impl AsRef<Path>,
156 156 data_path: Option<&Path>,
157 157 use_nodemap: bool,
158 158 ) -> Result<Self, HgError> {
159 159 let index_path = index_path.as_ref();
160 160 let index = {
161 161 match store_vfs.mmap_open_opt(&index_path)? {
162 162 None => Index::new(Box::new(vec![])),
163 163 Some(index_mmap) => {
164 164 let index = Index::new(Box::new(index_mmap))?;
165 165 Ok(index)
166 166 }
167 167 }
168 168 }?;
169 169
170 170 let default_data_path = index_path.with_extension("d");
171 171
172 172 // type annotation required
173 173 // won't recognize Mmap as Deref<Target = [u8]>
174 174 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
175 175 if index.is_inline() {
176 176 None
177 177 } else {
178 178 let data_path = data_path.unwrap_or(&default_data_path);
179 179 let data_mmap = store_vfs.mmap_open(data_path)?;
180 180 Some(Box::new(data_mmap))
181 181 };
182 182
183 183 let nodemap = if index.is_inline() || !use_nodemap {
184 184 None
185 185 } else {
186 186 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
187 187 |(docket, data)| {
188 188 nodemap::NodeTree::load_bytes(
189 189 Box::new(data),
190 190 docket.data_length,
191 191 )
192 192 },
193 193 )
194 194 };
195 195
196 196 Ok(Revlog {
197 197 index,
198 198 data_bytes,
199 199 nodemap,
200 200 })
201 201 }
202 202
203 203 /// Return number of entries of the `Revlog`.
204 204 pub fn len(&self) -> usize {
205 205 self.index.len()
206 206 }
207 207
208 208 /// Returns `true` if the `Revlog` has zero `entries`.
209 209 pub fn is_empty(&self) -> bool {
210 210 self.index.is_empty()
211 211 }
212 212
213 213 /// Returns the node ID for the given revision number, if it exists in this
214 214 /// revlog
215 215 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
216 216 if rev == NULL_REVISION {
217 217 return Some(&NULL_NODE);
218 218 }
219 219 Some(self.index.get_entry(rev)?.hash())
220 220 }
221 221
222 222 /// Return the revision number for the given node ID, if it exists in this
223 223 /// revlog
224 224 pub fn rev_from_node(
225 225 &self,
226 226 node: NodePrefix,
227 227 ) -> Result<Revision, RevlogError> {
228 if node.is_prefix_of(&NULL_NODE) {
229 return Ok(NULL_REVISION);
230 }
228 let looked_up = if let Some(nodemap) = &self.nodemap {
229 nodemap
230 .find_bin(&self.index, node)?
231 .ok_or(RevlogError::InvalidRevision)
232 } else {
233 self.rev_from_node_no_persistent_nodemap(node)
234 };
231 235
232 if let Some(nodemap) = &self.nodemap {
233 return nodemap
234 .find_bin(&self.index, node)?
235 .ok_or(RevlogError::InvalidRevision);
236 }
237 self.rev_from_node_no_persistent_nodemap(node)
236 if node.is_prefix_of(&NULL_NODE) {
237 return match looked_up {
238 Ok(_) => Err(RevlogError::AmbiguousPrefix),
239 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
240 res => res,
241 };
242 };
243
244 looked_up
238 245 }
239 246
240 247 /// Same as `rev_from_node`, without using a persistent nodemap
241 248 ///
242 249 /// This is used as fallback when a persistent nodemap is not present.
243 250 /// This happens when the persistent-nodemap experimental feature is not
244 251 /// enabled, or for small revlogs.
245 252 fn rev_from_node_no_persistent_nodemap(
246 253 &self,
247 254 node: NodePrefix,
248 255 ) -> Result<Revision, RevlogError> {
249 256 // Linear scan of the revlog
250 257 // TODO: consider building a non-persistent nodemap in memory to
251 258 // optimize these cases.
252 259 let mut found_by_prefix = None;
253 260 for rev in (0..self.len() as Revision).rev() {
254 261 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
255 262 HgError::corrupted(
256 263 "revlog references a revision not in the index",
257 264 )
258 265 })?;
259 266 if node == *index_entry.hash() {
260 267 return Ok(rev);
261 268 }
262 269 if node.is_prefix_of(index_entry.hash()) {
263 270 if found_by_prefix.is_some() {
264 271 return Err(RevlogError::AmbiguousPrefix);
265 272 }
266 273 found_by_prefix = Some(rev)
267 274 }
268 275 }
269 276 found_by_prefix.ok_or(RevlogError::InvalidRevision)
270 277 }
271 278
272 279 /// Returns whether the given revision exists in this revlog.
273 280 pub fn has_rev(&self, rev: Revision) -> bool {
274 281 self.index.get_entry(rev).is_some()
275 282 }
276 283
277 284 /// Return the full data associated to a revision.
278 285 ///
279 286 /// All entries required to build the final data out of deltas will be
280 287 /// retrieved as needed, and the deltas will be applied to the inital
281 288 /// snapshot to rebuild the final data.
282 289 pub fn get_rev_data(
283 290 &self,
284 291 rev: Revision,
285 292 ) -> Result<Cow<[u8]>, RevlogError> {
286 293 if rev == NULL_REVISION {
287 294 return Ok(Cow::Borrowed(&[]));
288 295 };
289 296 Ok(self.get_entry(rev)?.data()?)
290 297 }
291 298
292 299 /// Check the hash of some given data against the recorded hash.
293 300 pub fn check_hash(
294 301 &self,
295 302 p1: Revision,
296 303 p2: Revision,
297 304 expected: &[u8],
298 305 data: &[u8],
299 306 ) -> bool {
300 307 let e1 = self.index.get_entry(p1);
301 308 let h1 = match e1 {
302 309 Some(ref entry) => entry.hash(),
303 310 None => &NULL_NODE,
304 311 };
305 312 let e2 = self.index.get_entry(p2);
306 313 let h2 = match e2 {
307 314 Some(ref entry) => entry.hash(),
308 315 None => &NULL_NODE,
309 316 };
310 317
311 318 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
312 319 }
313 320
314 321 /// Build the full data of a revision out its snapshot
315 322 /// and its deltas.
316 323 fn build_data_from_deltas(
317 324 snapshot: RevlogEntry,
318 325 deltas: &[RevlogEntry],
319 326 ) -> Result<Vec<u8>, HgError> {
320 327 let snapshot = snapshot.data_chunk()?;
321 328 let deltas = deltas
322 329 .iter()
323 330 .rev()
324 331 .map(RevlogEntry::data_chunk)
325 332 .collect::<Result<Vec<_>, _>>()?;
326 333 let patches: Vec<_> =
327 334 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
328 335 let patch = patch::fold_patch_lists(&patches);
329 336 Ok(patch.apply(&snapshot))
330 337 }
331 338
332 339 /// Return the revlog data.
333 340 fn data(&self) -> &[u8] {
334 341 match &self.data_bytes {
335 342 Some(data_bytes) => data_bytes,
336 343 None => panic!(
337 344 "forgot to load the data or trying to access inline data"
338 345 ),
339 346 }
340 347 }
341 348
342 349 pub fn make_null_entry(&self) -> RevlogEntry {
343 350 RevlogEntry {
344 351 revlog: self,
345 352 rev: NULL_REVISION,
346 353 bytes: b"",
347 354 compressed_len: 0,
348 355 uncompressed_len: 0,
349 356 base_rev_or_base_of_delta_chain: None,
350 357 p1: NULL_REVISION,
351 358 p2: NULL_REVISION,
352 359 flags: NULL_REVLOG_ENTRY_FLAGS,
353 360 hash: NULL_NODE,
354 361 }
355 362 }
356 363
357 364 /// Get an entry of the revlog.
358 365 pub fn get_entry(
359 366 &self,
360 367 rev: Revision,
361 368 ) -> Result<RevlogEntry, RevlogError> {
362 369 if rev == NULL_REVISION {
363 370 return Ok(self.make_null_entry());
364 371 }
365 372 let index_entry = self
366 373 .index
367 374 .get_entry(rev)
368 375 .ok_or(RevlogError::InvalidRevision)?;
369 376 let start = index_entry.offset();
370 377 let end = start + index_entry.compressed_len() as usize;
371 378 let data = if self.index.is_inline() {
372 379 self.index.data(start, end)
373 380 } else {
374 381 &self.data()[start..end]
375 382 };
376 383 let entry = RevlogEntry {
377 384 revlog: self,
378 385 rev,
379 386 bytes: data,
380 387 compressed_len: index_entry.compressed_len(),
381 388 uncompressed_len: index_entry.uncompressed_len(),
382 389 base_rev_or_base_of_delta_chain: if index_entry
383 390 .base_revision_or_base_of_delta_chain()
384 391 == rev
385 392 {
386 393 None
387 394 } else {
388 395 Some(index_entry.base_revision_or_base_of_delta_chain())
389 396 },
390 397 p1: index_entry.p1(),
391 398 p2: index_entry.p2(),
392 399 flags: index_entry.flags(),
393 400 hash: *index_entry.hash(),
394 401 };
395 402 Ok(entry)
396 403 }
397 404
398 405 /// when resolving internal references within revlog, any errors
399 406 /// should be reported as corruption, instead of e.g. "invalid revision"
400 407 fn get_entry_internal(
401 408 &self,
402 409 rev: Revision,
403 410 ) -> Result<RevlogEntry, HgError> {
404 411 self.get_entry(rev)
405 412 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
406 413 }
407 414 }
408 415
409 416 /// The revlog entry's bytes and the necessary informations to extract
410 417 /// the entry's data.
411 418 #[derive(Clone)]
412 419 pub struct RevlogEntry<'revlog> {
413 420 revlog: &'revlog Revlog,
414 421 rev: Revision,
415 422 bytes: &'revlog [u8],
416 423 compressed_len: u32,
417 424 uncompressed_len: i32,
418 425 base_rev_or_base_of_delta_chain: Option<Revision>,
419 426 p1: Revision,
420 427 p2: Revision,
421 428 flags: u16,
422 429 hash: Node,
423 430 }
424 431
425 432 thread_local! {
426 433 // seems fine to [unwrap] here: this can only fail due to memory allocation
427 434 // failing, and it's normal for that to cause panic.
428 435 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
429 436 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
430 437 }
431 438
432 439 fn zstd_decompress_to_buffer(
433 440 bytes: &[u8],
434 441 buf: &mut Vec<u8>,
435 442 ) -> Result<usize, std::io::Error> {
436 443 ZSTD_DECODER
437 444 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
438 445 }
439 446
440 447 impl<'revlog> RevlogEntry<'revlog> {
441 448 pub fn revision(&self) -> Revision {
442 449 self.rev
443 450 }
444 451
445 452 pub fn node(&self) -> &Node {
446 453 &self.hash
447 454 }
448 455
449 456 pub fn uncompressed_len(&self) -> Option<u32> {
450 457 u32::try_from(self.uncompressed_len).ok()
451 458 }
452 459
453 460 pub fn has_p1(&self) -> bool {
454 461 self.p1 != NULL_REVISION
455 462 }
456 463
457 464 pub fn p1_entry(
458 465 &self,
459 466 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
460 467 if self.p1 == NULL_REVISION {
461 468 Ok(None)
462 469 } else {
463 470 Ok(Some(self.revlog.get_entry(self.p1)?))
464 471 }
465 472 }
466 473
467 474 pub fn p2_entry(
468 475 &self,
469 476 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
470 477 if self.p2 == NULL_REVISION {
471 478 Ok(None)
472 479 } else {
473 480 Ok(Some(self.revlog.get_entry(self.p2)?))
474 481 }
475 482 }
476 483
477 484 pub fn p1(&self) -> Option<Revision> {
478 485 if self.p1 == NULL_REVISION {
479 486 None
480 487 } else {
481 488 Some(self.p1)
482 489 }
483 490 }
484 491
485 492 pub fn p2(&self) -> Option<Revision> {
486 493 if self.p2 == NULL_REVISION {
487 494 None
488 495 } else {
489 496 Some(self.p2)
490 497 }
491 498 }
492 499
493 500 pub fn is_censored(&self) -> bool {
494 501 (self.flags & REVISION_FLAG_CENSORED) != 0
495 502 }
496 503
497 504 pub fn has_length_affecting_flag_processor(&self) -> bool {
498 505 // Relevant Python code: revlog.size()
499 506 // note: ELLIPSIS is known to not change the content
500 507 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
501 508 }
502 509
503 510 /// The data for this entry, after resolving deltas if any.
504 511 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
505 512 let mut entry = self.clone();
506 513 let mut delta_chain = vec![];
507 514
508 515 // The meaning of `base_rev_or_base_of_delta_chain` depends on
509 516 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
510 517 // `mercurial/revlogutils/constants.py` and the code in
511 518 // [_chaininfo] and in [index_deltachain].
512 519 let uses_generaldelta = self.revlog.index.uses_generaldelta();
513 520 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
514 521 let base_rev = if uses_generaldelta {
515 522 base_rev
516 523 } else {
517 524 entry.rev - 1
518 525 };
519 526 delta_chain.push(entry);
520 527 entry = self.revlog.get_entry_internal(base_rev)?;
521 528 }
522 529
523 530 let data = if delta_chain.is_empty() {
524 531 entry.data_chunk()?
525 532 } else {
526 533 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
527 534 };
528 535
529 536 Ok(data)
530 537 }
531 538
532 539 fn check_data(
533 540 &self,
534 541 data: Cow<'revlog, [u8]>,
535 542 ) -> Result<Cow<'revlog, [u8]>, HgError> {
536 543 if self.revlog.check_hash(
537 544 self.p1,
538 545 self.p2,
539 546 self.hash.as_bytes(),
540 547 &data,
541 548 ) {
542 549 Ok(data)
543 550 } else {
544 551 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
545 552 return Err(HgError::unsupported(
546 553 "ellipsis revisions are not supported by rhg",
547 554 ));
548 555 }
549 556 Err(corrupted(format!(
550 557 "hash check failed for revision {}",
551 558 self.rev
552 559 )))
553 560 }
554 561 }
555 562
556 563 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
557 564 let data = self.rawdata()?;
558 565 if self.is_censored() {
559 566 return Err(HgError::CensoredNodeError);
560 567 }
561 568 self.check_data(data)
562 569 }
563 570
564 571 /// Extract the data contained in the entry.
565 572 /// This may be a delta. (See `is_delta`.)
566 573 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
567 574 if self.bytes.is_empty() {
568 575 return Ok(Cow::Borrowed(&[]));
569 576 }
570 577 match self.bytes[0] {
571 578 // Revision data is the entirety of the entry, including this
572 579 // header.
573 580 b'\0' => Ok(Cow::Borrowed(self.bytes)),
574 581 // Raw revision data follows.
575 582 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
576 583 // zlib (RFC 1950) data.
577 584 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
578 585 // zstd data.
579 586 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
580 587 // A proper new format should have had a repo/store requirement.
581 588 format_type => Err(corrupted(format!(
582 589 "unknown compression header '{}'",
583 590 format_type
584 591 ))),
585 592 }
586 593 }
587 594
588 595 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
589 596 let mut decoder = ZlibDecoder::new(self.bytes);
590 597 if self.is_delta() {
591 598 let mut buf = Vec::with_capacity(self.compressed_len as usize);
592 599 decoder
593 600 .read_to_end(&mut buf)
594 601 .map_err(|e| corrupted(e.to_string()))?;
595 602 Ok(buf)
596 603 } else {
597 604 let cap = self.uncompressed_len.max(0) as usize;
598 605 let mut buf = vec![0; cap];
599 606 decoder
600 607 .read_exact(&mut buf)
601 608 .map_err(|e| corrupted(e.to_string()))?;
602 609 Ok(buf)
603 610 }
604 611 }
605 612
606 613 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
607 614 let cap = self.uncompressed_len.max(0) as usize;
608 615 if self.is_delta() {
609 616 // [cap] is usually an over-estimate of the space needed because
610 617 // it's the length of delta-decoded data, but we're interested
611 618 // in the size of the delta.
612 619 // This means we have to [shrink_to_fit] to avoid holding on
613 620 // to a large chunk of memory, but it also means we must have a
614 621 // fallback branch, for the case when the delta is longer than
615 622 // the original data (surprisingly, this does happen in practice)
616 623 let mut buf = Vec::with_capacity(cap);
617 624 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
618 625 Ok(_) => buf.shrink_to_fit(),
619 626 Err(_) => {
620 627 buf.clear();
621 628 zstd::stream::copy_decode(self.bytes, &mut buf)
622 629 .map_err(|e| corrupted(e.to_string()))?;
623 630 }
624 631 };
625 632 Ok(buf)
626 633 } else {
627 634 let mut buf = Vec::with_capacity(cap);
628 635 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
629 636 .map_err(|e| corrupted(e.to_string()))?;
630 637 if len != self.uncompressed_len as usize {
631 638 Err(corrupted("uncompressed length does not match"))
632 639 } else {
633 640 Ok(buf)
634 641 }
635 642 }
636 643 }
637 644
638 645 /// Tell if the entry is a snapshot or a delta
639 646 /// (influences on decompression).
640 647 fn is_delta(&self) -> bool {
641 648 self.base_rev_or_base_of_delta_chain.is_some()
642 649 }
643 650 }
644 651
645 652 /// Calculate the hash of a revision given its data and its parents.
646 653 fn hash(
647 654 data: &[u8],
648 655 p1_hash: &[u8],
649 656 p2_hash: &[u8],
650 657 ) -> [u8; NODE_BYTES_LENGTH] {
651 658 let mut hasher = Sha1::new();
652 659 let (a, b) = (p1_hash, p2_hash);
653 660 if a > b {
654 661 hasher.update(b);
655 662 hasher.update(a);
656 663 } else {
657 664 hasher.update(a);
658 665 hasher.update(b);
659 666 }
660 667 hasher.update(data);
661 668 *hasher.finalize().as_ref()
662 669 }
663 670
664 671 #[cfg(test)]
665 672 mod tests {
666 673 use super::*;
667 674 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
668 675 use itertools::Itertools;
669 676
670 677 #[test]
671 678 fn test_empty() {
672 679 let temp = tempfile::tempdir().unwrap();
673 680 let vfs = Vfs { base: temp.path() };
674 681 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
675 682 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
676 683 assert!(revlog.is_empty());
677 684 assert_eq!(revlog.len(), 0);
678 685 assert!(revlog.get_entry(0).is_err());
679 686 assert!(!revlog.has_rev(0));
687 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
680 688 }
681 689
682 690 #[test]
683 691 fn test_inline() {
684 692 let temp = tempfile::tempdir().unwrap();
685 693 let vfs = Vfs { base: temp.path() };
686 694 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
687 695 .unwrap();
688 696 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
689 697 .unwrap();
690 698 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
691 699 .unwrap();
692 700 let entry0_bytes = IndexEntryBuilder::new()
693 701 .is_first(true)
694 702 .with_version(1)
695 703 .with_inline(true)
696 704 .with_offset(INDEX_ENTRY_SIZE)
697 705 .with_node(node0)
698 706 .build();
699 707 let entry1_bytes = IndexEntryBuilder::new()
700 708 .with_offset(INDEX_ENTRY_SIZE)
701 709 .with_node(node1)
702 710 .build();
703 711 let entry2_bytes = IndexEntryBuilder::new()
704 712 .with_offset(INDEX_ENTRY_SIZE)
705 713 .with_p1(0)
706 714 .with_p2(1)
707 715 .with_node(node2)
708 716 .build();
709 717 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
710 718 .into_iter()
711 719 .flatten()
712 720 .collect_vec();
713 721 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
714 722 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
715 723
716 724 let entry0 = revlog.get_entry(0).ok().unwrap();
717 725 assert_eq!(entry0.revision(), 0);
718 726 assert_eq!(*entry0.node(), node0);
719 727 assert!(!entry0.has_p1());
720 728 assert_eq!(entry0.p1(), None);
721 729 assert_eq!(entry0.p2(), None);
722 730 let p1_entry = entry0.p1_entry().unwrap();
723 731 assert!(p1_entry.is_none());
724 732 let p2_entry = entry0.p2_entry().unwrap();
725 733 assert!(p2_entry.is_none());
726 734
727 735 let entry1 = revlog.get_entry(1).ok().unwrap();
728 736 assert_eq!(entry1.revision(), 1);
729 737 assert_eq!(*entry1.node(), node1);
730 738 assert!(!entry1.has_p1());
731 739 assert_eq!(entry1.p1(), None);
732 740 assert_eq!(entry1.p2(), None);
733 741 let p1_entry = entry1.p1_entry().unwrap();
734 742 assert!(p1_entry.is_none());
735 743 let p2_entry = entry1.p2_entry().unwrap();
736 744 assert!(p2_entry.is_none());
737 745
738 746 let entry2 = revlog.get_entry(2).ok().unwrap();
739 747 assert_eq!(entry2.revision(), 2);
740 748 assert_eq!(*entry2.node(), node2);
741 749 assert!(entry2.has_p1());
742 750 assert_eq!(entry2.p1(), Some(0));
743 751 assert_eq!(entry2.p2(), Some(1));
744 752 let p1_entry = entry2.p1_entry().unwrap();
745 753 assert!(p1_entry.is_some());
746 754 assert_eq!(p1_entry.unwrap().revision(), 0);
747 755 let p2_entry = entry2.p2_entry().unwrap();
748 756 assert!(p2_entry.is_some());
749 757 assert_eq!(p2_entry.unwrap().revision(), 1);
750 758 }
759
760 #[test]
761 fn test_nodemap() {
762 let temp = tempfile::tempdir().unwrap();
763 let vfs = Vfs { base: temp.path() };
764
765 // building a revlog with a forced Node starting with zeros
766 // This is a corruption, but it does not preclude using the nodemap
767 // if we don't try and access the data
768 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
769 .unwrap();
770 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
771 .unwrap();
772 let entry0_bytes = IndexEntryBuilder::new()
773 .is_first(true)
774 .with_version(1)
775 .with_inline(true)
776 .with_offset(INDEX_ENTRY_SIZE)
777 .with_node(node0)
778 .build();
779 let entry1_bytes = IndexEntryBuilder::new()
780 .with_offset(INDEX_ENTRY_SIZE)
781 .with_node(node1)
782 .build();
783 let contents = vec![entry0_bytes, entry1_bytes]
784 .into_iter()
785 .flatten()
786 .collect_vec();
787 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
788 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
789
790 // accessing the data shows the corruption
791 revlog.get_entry(0).unwrap().data().unwrap_err();
792
793 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
794 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
795 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
796 assert_eq!(
797 revlog
798 .rev_from_node(NodePrefix::from_hex("000").unwrap())
799 .unwrap(),
800 -1
801 );
802 assert_eq!(
803 revlog
804 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
805 .unwrap(),
806 1
807 );
808 // RevlogError does not implement PartialEq
809 // (ultimately because io::Error does not)
810 match revlog
811 .rev_from_node(NodePrefix::from_hex("00").unwrap())
812 .expect_err("Expected to give AmbiguousPrefix error")
813 {
814 RevlogError::AmbiguousPrefix => (),
815 e => {
816 panic!("Got another error than AmbiguousPrefix: {:?}", e);
817 }
818 };
819 }
751 820 }
General Comments 0
You need to be logged in to leave comments. Login now