##// END OF EJS Templates
rust-revlog: split logic for `rawdata` to prepare for `UncheckedRevision` use...
Raphaël Gomès -
r51869:9929c8a7 default
parent child Browse files
Show More
@@ -1,848 +1,849 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// Mercurial revision numbers
37 37 ///
38 38 /// As noted in revlog.c, revision numbers are actually encoded in
39 39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 40 pub type Revision = i32;
41 41
42 42 /// Unchecked Mercurial revision numbers.
43 43 ///
44 44 /// Values of this type have no guarantee of being a valid revision number
45 45 /// in any context. Use method `check_revision` to get a valid revision within
46 46 /// the appropriate index object.
47 47 ///
48 48 /// As noted in revlog.c, revision numbers are actually encoded in
49 49 /// 4 bytes, and are liberally converted to ints, whence the i32
50 50 pub type UncheckedRevision = i32;
51 51
52 52 /// Marker expressing the absence of a parent
53 53 ///
54 54 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
55 55 /// to be smaller than all existing revisions.
56 56 pub const NULL_REVISION: Revision = -1;
57 57
58 58 /// Same as `mercurial.node.wdirrev`
59 59 ///
60 60 /// This is also equal to `i32::max_value()`, but it's better to spell
61 61 /// it out explicitely, same as in `mercurial.node`
62 62 #[allow(clippy::unreadable_literal)]
63 63 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
64 64
65 65 pub const WORKING_DIRECTORY_HEX: &str =
66 66 "ffffffffffffffffffffffffffffffffffffffff";
67 67
68 68 /// The simplest expression of what we need of Mercurial DAGs.
69 69 pub trait Graph {
70 70 /// Return the two parents of the given `Revision`.
71 71 ///
72 72 /// Each of the parents can be independently `NULL_REVISION`
73 73 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
74 74 }
75 75
76 76 #[derive(Clone, Debug, PartialEq)]
77 77 pub enum GraphError {
78 78 ParentOutOfRange(Revision),
79 79 }
80 80
81 81 /// The Mercurial Revlog Index
82 82 ///
83 83 /// This is currently limited to the minimal interface that is needed for
84 84 /// the [`nodemap`](nodemap/index.html) module
85 85 pub trait RevlogIndex {
86 86 /// Total number of Revisions referenced in this index
87 87 fn len(&self) -> usize;
88 88
89 89 fn is_empty(&self) -> bool {
90 90 self.len() == 0
91 91 }
92 92
93 93 /// Return a reference to the Node or `None` if rev is out of bounds
94 94 ///
95 95 /// `NULL_REVISION` is not considered to be out of bounds.
96 96 fn node(&self, rev: Revision) -> Option<&Node>;
97 97
98 98 /// Return a [`Revision`] if `rev` is a valid revision number for this
99 99 /// index
100 100 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
101 101 if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {
102 102 Some(rev)
103 103 } else {
104 104 None
105 105 }
106 106 }
107 107 }
108 108
109 109 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
110 110 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
111 111 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
112 112 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
113 113
114 114 // Keep this in sync with REVIDX_KNOWN_FLAGS in
115 115 // mercurial/revlogutils/flagutil.py
116 116 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
117 117 | REVISION_FLAG_ELLIPSIS
118 118 | REVISION_FLAG_EXTSTORED
119 119 | REVISION_FLAG_HASCOPIESINFO;
120 120
121 121 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
122 122
123 123 #[derive(Debug, derive_more::From)]
124 124 pub enum RevlogError {
125 125 InvalidRevision,
126 126 /// Working directory is not supported
127 127 WDirUnsupported,
128 128 /// Found more than one entry whose ID match the requested prefix
129 129 AmbiguousPrefix,
130 130 #[from]
131 131 Other(HgError),
132 132 }
133 133
134 134 impl From<NodeMapError> for RevlogError {
135 135 fn from(error: NodeMapError) -> Self {
136 136 match error {
137 137 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
138 138 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
139 139 format!("nodemap point to revision {} not in index", rev),
140 140 ),
141 141 }
142 142 }
143 143 }
144 144
145 145 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
146 146 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
147 147 }
148 148
149 149 impl RevlogError {
150 150 fn corrupted<S: AsRef<str>>(context: S) -> Self {
151 151 RevlogError::Other(corrupted(context))
152 152 }
153 153 }
154 154
155 155 /// Read only implementation of revlog.
156 156 pub struct Revlog {
157 157 /// When index and data are not interleaved: bytes of the revlog index.
158 158 /// When index and data are interleaved: bytes of the revlog index and
159 159 /// data.
160 160 index: Index,
161 161 /// When index and data are not interleaved: bytes of the revlog data
162 162 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
163 163 /// When present on disk: the persistent nodemap for this revlog
164 164 nodemap: Option<nodemap::NodeTree>,
165 165 }
166 166
167 167 impl Revlog {
168 168 /// Open a revlog index file.
169 169 ///
170 170 /// It will also open the associated data file if index and data are not
171 171 /// interleaved.
172 172 pub fn open(
173 173 store_vfs: &Vfs,
174 174 index_path: impl AsRef<Path>,
175 175 data_path: Option<&Path>,
176 176 use_nodemap: bool,
177 177 ) -> Result<Self, HgError> {
178 178 let index_path = index_path.as_ref();
179 179 let index = {
180 180 match store_vfs.mmap_open_opt(&index_path)? {
181 181 None => Index::new(Box::new(vec![])),
182 182 Some(index_mmap) => {
183 183 let index = Index::new(Box::new(index_mmap))?;
184 184 Ok(index)
185 185 }
186 186 }
187 187 }?;
188 188
189 189 let default_data_path = index_path.with_extension("d");
190 190
191 191 // type annotation required
192 192 // won't recognize Mmap as Deref<Target = [u8]>
193 193 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
194 194 if index.is_inline() {
195 195 None
196 196 } else {
197 197 let data_path = data_path.unwrap_or(&default_data_path);
198 198 let data_mmap = store_vfs.mmap_open(data_path)?;
199 199 Some(Box::new(data_mmap))
200 200 };
201 201
202 202 let nodemap = if index.is_inline() || !use_nodemap {
203 203 None
204 204 } else {
205 205 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
206 206 |(docket, data)| {
207 207 nodemap::NodeTree::load_bytes(
208 208 Box::new(data),
209 209 docket.data_length,
210 210 )
211 211 },
212 212 )
213 213 };
214 214
215 215 Ok(Revlog {
216 216 index,
217 217 data_bytes,
218 218 nodemap,
219 219 })
220 220 }
221 221
222 222 /// Return number of entries of the `Revlog`.
223 223 pub fn len(&self) -> usize {
224 224 self.index.len()
225 225 }
226 226
227 227 /// Returns `true` if the `Revlog` has zero `entries`.
228 228 pub fn is_empty(&self) -> bool {
229 229 self.index.is_empty()
230 230 }
231 231
232 232 /// Returns the node ID for the given revision number, if it exists in this
233 233 /// revlog
234 234 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
235 235 if rev == NULL_REVISION {
236 236 return Some(&NULL_NODE);
237 237 }
238 238 Some(self.index.get_entry(rev)?.hash())
239 239 }
240 240
241 241 /// Return the revision number for the given node ID, if it exists in this
242 242 /// revlog
243 243 pub fn rev_from_node(
244 244 &self,
245 245 node: NodePrefix,
246 246 ) -> Result<Revision, RevlogError> {
247 247 let looked_up = if let Some(nodemap) = &self.nodemap {
248 248 nodemap
249 249 .find_bin(&self.index, node)?
250 250 .ok_or(RevlogError::InvalidRevision)
251 251 } else {
252 252 self.rev_from_node_no_persistent_nodemap(node)
253 253 };
254 254
255 255 if node.is_prefix_of(&NULL_NODE) {
256 256 return match looked_up {
257 257 Ok(_) => Err(RevlogError::AmbiguousPrefix),
258 258 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
259 259 res => res,
260 260 };
261 261 };
262 262
263 263 looked_up
264 264 }
265 265
266 266 /// Same as `rev_from_node`, without using a persistent nodemap
267 267 ///
268 268 /// This is used as fallback when a persistent nodemap is not present.
269 269 /// This happens when the persistent-nodemap experimental feature is not
270 270 /// enabled, or for small revlogs.
271 271 fn rev_from_node_no_persistent_nodemap(
272 272 &self,
273 273 node: NodePrefix,
274 274 ) -> Result<Revision, RevlogError> {
275 275 // Linear scan of the revlog
276 276 // TODO: consider building a non-persistent nodemap in memory to
277 277 // optimize these cases.
278 278 let mut found_by_prefix = None;
279 279 for rev in (0..self.len() as Revision).rev() {
280 280 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
281 281 HgError::corrupted(
282 282 "revlog references a revision not in the index",
283 283 )
284 284 })?;
285 285 if node == *index_entry.hash() {
286 286 return Ok(rev);
287 287 }
288 288 if node.is_prefix_of(index_entry.hash()) {
289 289 if found_by_prefix.is_some() {
290 290 return Err(RevlogError::AmbiguousPrefix);
291 291 }
292 292 found_by_prefix = Some(rev)
293 293 }
294 294 }
295 295 found_by_prefix.ok_or(RevlogError::InvalidRevision)
296 296 }
297 297
298 298 /// Returns whether the given revision exists in this revlog.
299 299 pub fn has_rev(&self, rev: Revision) -> bool {
300 300 self.index.get_entry(rev).is_some()
301 301 }
302 302
303 303 /// Return the full data associated to a revision.
304 304 ///
305 305 /// All entries required to build the final data out of deltas will be
306 306 /// retrieved as needed, and the deltas will be applied to the inital
307 307 /// snapshot to rebuild the final data.
308 308 pub fn get_rev_data(
309 309 &self,
310 310 rev: Revision,
311 311 ) -> Result<Cow<[u8]>, RevlogError> {
312 312 if rev == NULL_REVISION {
313 313 return Ok(Cow::Borrowed(&[]));
314 314 };
315 315 Ok(self.get_entry(rev)?.data()?)
316 316 }
317 317
318 318 /// Check the hash of some given data against the recorded hash.
319 319 pub fn check_hash(
320 320 &self,
321 321 p1: Revision,
322 322 p2: Revision,
323 323 expected: &[u8],
324 324 data: &[u8],
325 325 ) -> bool {
326 326 let e1 = self.index.get_entry(p1);
327 327 let h1 = match e1 {
328 328 Some(ref entry) => entry.hash(),
329 329 None => &NULL_NODE,
330 330 };
331 331 let e2 = self.index.get_entry(p2);
332 332 let h2 = match e2 {
333 333 Some(ref entry) => entry.hash(),
334 334 None => &NULL_NODE,
335 335 };
336 336
337 337 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
338 338 }
339 339
340 340 /// Build the full data of a revision out its snapshot
341 341 /// and its deltas.
342 342 fn build_data_from_deltas(
343 343 snapshot: RevlogEntry,
344 344 deltas: &[RevlogEntry],
345 345 ) -> Result<Vec<u8>, HgError> {
346 346 let snapshot = snapshot.data_chunk()?;
347 347 let deltas = deltas
348 348 .iter()
349 349 .rev()
350 350 .map(RevlogEntry::data_chunk)
351 351 .collect::<Result<Vec<_>, _>>()?;
352 352 let patches: Vec<_> =
353 353 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
354 354 let patch = patch::fold_patch_lists(&patches);
355 355 Ok(patch.apply(&snapshot))
356 356 }
357 357
358 358 /// Return the revlog data.
359 359 fn data(&self) -> &[u8] {
360 360 match &self.data_bytes {
361 361 Some(data_bytes) => data_bytes,
362 362 None => panic!(
363 363 "forgot to load the data or trying to access inline data"
364 364 ),
365 365 }
366 366 }
367 367
368 368 pub fn make_null_entry(&self) -> RevlogEntry {
369 369 RevlogEntry {
370 370 revlog: self,
371 371 rev: NULL_REVISION,
372 372 bytes: b"",
373 373 compressed_len: 0,
374 374 uncompressed_len: 0,
375 375 base_rev_or_base_of_delta_chain: None,
376 376 p1: NULL_REVISION,
377 377 p2: NULL_REVISION,
378 378 flags: NULL_REVLOG_ENTRY_FLAGS,
379 379 hash: NULL_NODE,
380 380 }
381 381 }
382 382
383 383 /// Get an entry of the revlog.
384 384 pub fn get_entry(
385 385 &self,
386 386 rev: Revision,
387 387 ) -> Result<RevlogEntry, RevlogError> {
388 388 if rev == NULL_REVISION {
389 389 return Ok(self.make_null_entry());
390 390 }
391 391 let index_entry = self
392 392 .index
393 393 .get_entry(rev)
394 394 .ok_or(RevlogError::InvalidRevision)?;
395 395 let start = index_entry.offset();
396 396 let end = start + index_entry.compressed_len() as usize;
397 397 let data = if self.index.is_inline() {
398 398 self.index.data(start, end)
399 399 } else {
400 400 &self.data()[start..end]
401 401 };
402 402 let entry = RevlogEntry {
403 403 revlog: self,
404 404 rev,
405 405 bytes: data,
406 406 compressed_len: index_entry.compressed_len(),
407 407 uncompressed_len: index_entry.uncompressed_len(),
408 408 base_rev_or_base_of_delta_chain: if index_entry
409 409 .base_revision_or_base_of_delta_chain()
410 410 == rev
411 411 {
412 412 None
413 413 } else {
414 414 Some(index_entry.base_revision_or_base_of_delta_chain())
415 415 },
416 416 p1: index_entry.p1(),
417 417 p2: index_entry.p2(),
418 418 flags: index_entry.flags(),
419 419 hash: *index_entry.hash(),
420 420 };
421 421 Ok(entry)
422 422 }
423 423
424 424 /// when resolving internal references within revlog, any errors
425 425 /// should be reported as corruption, instead of e.g. "invalid revision"
426 426 fn get_entry_internal(
427 427 &self,
428 428 rev: Revision,
429 429 ) -> Result<RevlogEntry, HgError> {
430 430 self.get_entry(rev)
431 431 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
432 432 }
433 433 }
434 434
435 435 /// The revlog entry's bytes and the necessary informations to extract
436 436 /// the entry's data.
437 437 #[derive(Clone)]
438 438 pub struct RevlogEntry<'revlog> {
439 439 revlog: &'revlog Revlog,
440 440 rev: Revision,
441 441 bytes: &'revlog [u8],
442 442 compressed_len: u32,
443 443 uncompressed_len: i32,
444 444 base_rev_or_base_of_delta_chain: Option<Revision>,
445 445 p1: Revision,
446 446 p2: Revision,
447 447 flags: u16,
448 448 hash: Node,
449 449 }
450 450
451 451 thread_local! {
452 452 // seems fine to [unwrap] here: this can only fail due to memory allocation
453 453 // failing, and it's normal for that to cause panic.
454 454 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
455 455 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
456 456 }
457 457
458 458 fn zstd_decompress_to_buffer(
459 459 bytes: &[u8],
460 460 buf: &mut Vec<u8>,
461 461 ) -> Result<usize, std::io::Error> {
462 462 ZSTD_DECODER
463 463 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
464 464 }
465 465
466 466 impl<'revlog> RevlogEntry<'revlog> {
467 467 pub fn revision(&self) -> Revision {
468 468 self.rev
469 469 }
470 470
471 471 pub fn node(&self) -> &Node {
472 472 &self.hash
473 473 }
474 474
475 475 pub fn uncompressed_len(&self) -> Option<u32> {
476 476 u32::try_from(self.uncompressed_len).ok()
477 477 }
478 478
479 479 pub fn has_p1(&self) -> bool {
480 480 self.p1 != NULL_REVISION
481 481 }
482 482
483 483 pub fn p1_entry(
484 484 &self,
485 485 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
486 486 if self.p1 == NULL_REVISION {
487 487 Ok(None)
488 488 } else {
489 489 Ok(Some(self.revlog.get_entry(self.p1)?))
490 490 }
491 491 }
492 492
493 493 pub fn p2_entry(
494 494 &self,
495 495 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
496 496 if self.p2 == NULL_REVISION {
497 497 Ok(None)
498 498 } else {
499 499 Ok(Some(self.revlog.get_entry(self.p2)?))
500 500 }
501 501 }
502 502
503 503 pub fn p1(&self) -> Option<Revision> {
504 504 if self.p1 == NULL_REVISION {
505 505 None
506 506 } else {
507 507 Some(self.p1)
508 508 }
509 509 }
510 510
511 511 pub fn p2(&self) -> Option<Revision> {
512 512 if self.p2 == NULL_REVISION {
513 513 None
514 514 } else {
515 515 Some(self.p2)
516 516 }
517 517 }
518 518
519 519 pub fn is_censored(&self) -> bool {
520 520 (self.flags & REVISION_FLAG_CENSORED) != 0
521 521 }
522 522
523 523 pub fn has_length_affecting_flag_processor(&self) -> bool {
524 524 // Relevant Python code: revlog.size()
525 525 // note: ELLIPSIS is known to not change the content
526 526 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
527 527 }
528 528
529 529 /// The data for this entry, after resolving deltas if any.
530 530 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
531 531 let mut entry = self.clone();
532 532 let mut delta_chain = vec![];
533 533
534 534 // The meaning of `base_rev_or_base_of_delta_chain` depends on
535 535 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
536 536 // `mercurial/revlogutils/constants.py` and the code in
537 537 // [_chaininfo] and in [index_deltachain].
538 538 let uses_generaldelta = self.revlog.index.uses_generaldelta();
539 539 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
540 let base_rev = if uses_generaldelta {
541 base_rev
540 entry = if uses_generaldelta {
541 delta_chain.push(entry);
542 self.revlog.get_entry_internal(base_rev)?
542 543 } else {
543 entry.rev - 1
544 let base_rev = entry.rev - 1;
545 delta_chain.push(entry);
546 self.revlog.get_entry_internal(base_rev)?
544 547 };
545 delta_chain.push(entry);
546 entry = self.revlog.get_entry_internal(base_rev)?;
547 548 }
548 549
549 550 let data = if delta_chain.is_empty() {
550 551 entry.data_chunk()?
551 552 } else {
552 553 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
553 554 };
554 555
555 556 Ok(data)
556 557 }
557 558
558 559 fn check_data(
559 560 &self,
560 561 data: Cow<'revlog, [u8]>,
561 562 ) -> Result<Cow<'revlog, [u8]>, HgError> {
562 563 if self.revlog.check_hash(
563 564 self.p1,
564 565 self.p2,
565 566 self.hash.as_bytes(),
566 567 &data,
567 568 ) {
568 569 Ok(data)
569 570 } else {
570 571 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
571 572 return Err(HgError::unsupported(
572 573 "ellipsis revisions are not supported by rhg",
573 574 ));
574 575 }
575 576 Err(corrupted(format!(
576 577 "hash check failed for revision {}",
577 578 self.rev
578 579 )))
579 580 }
580 581 }
581 582
582 583 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
583 584 let data = self.rawdata()?;
584 585 if self.rev == NULL_REVISION {
585 586 return Ok(data);
586 587 }
587 588 if self.is_censored() {
588 589 return Err(HgError::CensoredNodeError);
589 590 }
590 591 self.check_data(data)
591 592 }
592 593
593 594 /// Extract the data contained in the entry.
594 595 /// This may be a delta. (See `is_delta`.)
595 596 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
596 597 if self.bytes.is_empty() {
597 598 return Ok(Cow::Borrowed(&[]));
598 599 }
599 600 match self.bytes[0] {
600 601 // Revision data is the entirety of the entry, including this
601 602 // header.
602 603 b'\0' => Ok(Cow::Borrowed(self.bytes)),
603 604 // Raw revision data follows.
604 605 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
605 606 // zlib (RFC 1950) data.
606 607 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
607 608 // zstd data.
608 609 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
609 610 // A proper new format should have had a repo/store requirement.
610 611 format_type => Err(corrupted(format!(
611 612 "unknown compression header '{}'",
612 613 format_type
613 614 ))),
614 615 }
615 616 }
616 617
617 618 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
618 619 let mut decoder = ZlibDecoder::new(self.bytes);
619 620 if self.is_delta() {
620 621 let mut buf = Vec::with_capacity(self.compressed_len as usize);
621 622 decoder
622 623 .read_to_end(&mut buf)
623 624 .map_err(|e| corrupted(e.to_string()))?;
624 625 Ok(buf)
625 626 } else {
626 627 let cap = self.uncompressed_len.max(0) as usize;
627 628 let mut buf = vec![0; cap];
628 629 decoder
629 630 .read_exact(&mut buf)
630 631 .map_err(|e| corrupted(e.to_string()))?;
631 632 Ok(buf)
632 633 }
633 634 }
634 635
635 636 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
636 637 let cap = self.uncompressed_len.max(0) as usize;
637 638 if self.is_delta() {
638 639 // [cap] is usually an over-estimate of the space needed because
639 640 // it's the length of delta-decoded data, but we're interested
640 641 // in the size of the delta.
641 642 // This means we have to [shrink_to_fit] to avoid holding on
642 643 // to a large chunk of memory, but it also means we must have a
643 644 // fallback branch, for the case when the delta is longer than
644 645 // the original data (surprisingly, this does happen in practice)
645 646 let mut buf = Vec::with_capacity(cap);
646 647 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
647 648 Ok(_) => buf.shrink_to_fit(),
648 649 Err(_) => {
649 650 buf.clear();
650 651 zstd::stream::copy_decode(self.bytes, &mut buf)
651 652 .map_err(|e| corrupted(e.to_string()))?;
652 653 }
653 654 };
654 655 Ok(buf)
655 656 } else {
656 657 let mut buf = Vec::with_capacity(cap);
657 658 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
658 659 .map_err(|e| corrupted(e.to_string()))?;
659 660 if len != self.uncompressed_len as usize {
660 661 Err(corrupted("uncompressed length does not match"))
661 662 } else {
662 663 Ok(buf)
663 664 }
664 665 }
665 666 }
666 667
667 668 /// Tell if the entry is a snapshot or a delta
668 669 /// (influences on decompression).
669 670 fn is_delta(&self) -> bool {
670 671 self.base_rev_or_base_of_delta_chain.is_some()
671 672 }
672 673 }
673 674
674 675 /// Calculate the hash of a revision given its data and its parents.
675 676 fn hash(
676 677 data: &[u8],
677 678 p1_hash: &[u8],
678 679 p2_hash: &[u8],
679 680 ) -> [u8; NODE_BYTES_LENGTH] {
680 681 let mut hasher = Sha1::new();
681 682 let (a, b) = (p1_hash, p2_hash);
682 683 if a > b {
683 684 hasher.update(b);
684 685 hasher.update(a);
685 686 } else {
686 687 hasher.update(a);
687 688 hasher.update(b);
688 689 }
689 690 hasher.update(data);
690 691 *hasher.finalize().as_ref()
691 692 }
692 693
693 694 #[cfg(test)]
694 695 mod tests {
695 696 use super::*;
696 697 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
697 698 use itertools::Itertools;
698 699
699 700 #[test]
700 701 fn test_empty() {
701 702 let temp = tempfile::tempdir().unwrap();
702 703 let vfs = Vfs { base: temp.path() };
703 704 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
704 705 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
705 706 assert!(revlog.is_empty());
706 707 assert_eq!(revlog.len(), 0);
707 708 assert!(revlog.get_entry(0).is_err());
708 709 assert!(!revlog.has_rev(0));
709 710 assert_eq!(
710 711 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
711 712 NULL_REVISION
712 713 );
713 714 let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
714 715 assert_eq!(null_entry.revision(), NULL_REVISION);
715 716 assert!(null_entry.data().unwrap().is_empty());
716 717 }
717 718
718 719 #[test]
719 720 fn test_inline() {
720 721 let temp = tempfile::tempdir().unwrap();
721 722 let vfs = Vfs { base: temp.path() };
722 723 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
723 724 .unwrap();
724 725 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
725 726 .unwrap();
726 727 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
727 728 .unwrap();
728 729 let entry0_bytes = IndexEntryBuilder::new()
729 730 .is_first(true)
730 731 .with_version(1)
731 732 .with_inline(true)
732 733 .with_offset(INDEX_ENTRY_SIZE)
733 734 .with_node(node0)
734 735 .build();
735 736 let entry1_bytes = IndexEntryBuilder::new()
736 737 .with_offset(INDEX_ENTRY_SIZE)
737 738 .with_node(node1)
738 739 .build();
739 740 let entry2_bytes = IndexEntryBuilder::new()
740 741 .with_offset(INDEX_ENTRY_SIZE)
741 742 .with_p1(0)
742 743 .with_p2(1)
743 744 .with_node(node2)
744 745 .build();
745 746 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
746 747 .into_iter()
747 748 .flatten()
748 749 .collect_vec();
749 750 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
750 751 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
751 752
752 753 let entry0 = revlog.get_entry(0).ok().unwrap();
753 754 assert_eq!(entry0.revision(), 0);
754 755 assert_eq!(*entry0.node(), node0);
755 756 assert!(!entry0.has_p1());
756 757 assert_eq!(entry0.p1(), None);
757 758 assert_eq!(entry0.p2(), None);
758 759 let p1_entry = entry0.p1_entry().unwrap();
759 760 assert!(p1_entry.is_none());
760 761 let p2_entry = entry0.p2_entry().unwrap();
761 762 assert!(p2_entry.is_none());
762 763
763 764 let entry1 = revlog.get_entry(1).ok().unwrap();
764 765 assert_eq!(entry1.revision(), 1);
765 766 assert_eq!(*entry1.node(), node1);
766 767 assert!(!entry1.has_p1());
767 768 assert_eq!(entry1.p1(), None);
768 769 assert_eq!(entry1.p2(), None);
769 770 let p1_entry = entry1.p1_entry().unwrap();
770 771 assert!(p1_entry.is_none());
771 772 let p2_entry = entry1.p2_entry().unwrap();
772 773 assert!(p2_entry.is_none());
773 774
774 775 let entry2 = revlog.get_entry(2).ok().unwrap();
775 776 assert_eq!(entry2.revision(), 2);
776 777 assert_eq!(*entry2.node(), node2);
777 778 assert!(entry2.has_p1());
778 779 assert_eq!(entry2.p1(), Some(0));
779 780 assert_eq!(entry2.p2(), Some(1));
780 781 let p1_entry = entry2.p1_entry().unwrap();
781 782 assert!(p1_entry.is_some());
782 783 assert_eq!(p1_entry.unwrap().revision(), 0);
783 784 let p2_entry = entry2.p2_entry().unwrap();
784 785 assert!(p2_entry.is_some());
785 786 assert_eq!(p2_entry.unwrap().revision(), 1);
786 787 }
787 788
788 789 #[test]
789 790 fn test_nodemap() {
790 791 let temp = tempfile::tempdir().unwrap();
791 792 let vfs = Vfs { base: temp.path() };
792 793
793 794 // building a revlog with a forced Node starting with zeros
794 795 // This is a corruption, but it does not preclude using the nodemap
795 796 // if we don't try and access the data
796 797 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
797 798 .unwrap();
798 799 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
799 800 .unwrap();
800 801 let entry0_bytes = IndexEntryBuilder::new()
801 802 .is_first(true)
802 803 .with_version(1)
803 804 .with_inline(true)
804 805 .with_offset(INDEX_ENTRY_SIZE)
805 806 .with_node(node0)
806 807 .build();
807 808 let entry1_bytes = IndexEntryBuilder::new()
808 809 .with_offset(INDEX_ENTRY_SIZE)
809 810 .with_node(node1)
810 811 .build();
811 812 let contents = vec![entry0_bytes, entry1_bytes]
812 813 .into_iter()
813 814 .flatten()
814 815 .collect_vec();
815 816 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
816 817 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
817 818
818 819 // accessing the data shows the corruption
819 820 revlog.get_entry(0).unwrap().data().unwrap_err();
820 821
821 822 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
822 823 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
823 824 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
824 825 assert_eq!(
825 826 revlog
826 827 .rev_from_node(NodePrefix::from_hex("000").unwrap())
827 828 .unwrap(),
828 829 -1
829 830 );
830 831 assert_eq!(
831 832 revlog
832 833 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
833 834 .unwrap(),
834 835 1
835 836 );
836 837 // RevlogError does not implement PartialEq
837 838 // (ultimately because io::Error does not)
838 839 match revlog
839 840 .rev_from_node(NodePrefix::from_hex("00").unwrap())
840 841 .expect_err("Expected to give AmbiguousPrefix error")
841 842 {
842 843 RevlogError::AmbiguousPrefix => (),
843 844 e => {
844 845 panic!("Got another error than AmbiguousPrefix: {:?}", e);
845 846 }
846 847 };
847 848 }
848 849 }
General Comments 0
You need to be logged in to leave comments. Login now