##// END OF EJS Templates
rust: add `UncheckedRevision` type...
Raphaël Gomès -
r51867:c950fdba default
parent child Browse files
Show More
@@ -1,829 +1,849 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// Mercurial revision numbers
37 37 ///
38 38 /// As noted in revlog.c, revision numbers are actually encoded in
39 39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 40 pub type Revision = i32;
41 41
42 /// Unchecked Mercurial revision numbers.
43 ///
44 /// Values of this type have no guarantee of being a valid revision number
45 /// in any context. Use method `check_revision` to get a valid revision within
46 /// the appropriate index object.
47 ///
48 /// As noted in revlog.c, revision numbers are actually encoded in
49 /// 4 bytes, and are liberally converted to ints, whence the i32
50 pub type UncheckedRevision = i32;
51
42 52 /// Marker expressing the absence of a parent
43 53 ///
44 54 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
45 55 /// to be smaller than all existing revisions.
46 56 pub const NULL_REVISION: Revision = -1;
47 57
48 58 /// Same as `mercurial.node.wdirrev`
49 59 ///
50 60 /// This is also equal to `i32::max_value()`, but it's better to spell
51 61 /// it out explicitely, same as in `mercurial.node`
52 62 #[allow(clippy::unreadable_literal)]
53 63 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
54 64
55 65 pub const WORKING_DIRECTORY_HEX: &str =
56 66 "ffffffffffffffffffffffffffffffffffffffff";
57 67
58 68 /// The simplest expression of what we need of Mercurial DAGs.
59 69 pub trait Graph {
60 70 /// Return the two parents of the given `Revision`.
61 71 ///
62 72 /// Each of the parents can be independently `NULL_REVISION`
63 73 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
64 74 }
65 75
66 76 #[derive(Clone, Debug, PartialEq)]
67 77 pub enum GraphError {
68 78 ParentOutOfRange(Revision),
69 79 WorkingDirectoryUnsupported,
70 80 }
71 81
72 82 /// The Mercurial Revlog Index
73 83 ///
74 84 /// This is currently limited to the minimal interface that is needed for
75 85 /// the [`nodemap`](nodemap/index.html) module
76 86 pub trait RevlogIndex {
77 87 /// Total number of Revisions referenced in this index
78 88 fn len(&self) -> usize;
79 89
80 90 fn is_empty(&self) -> bool {
81 91 self.len() == 0
82 92 }
83 93
84 94 /// Return a reference to the Node or `None` if rev is out of bounds
85 95 ///
86 96 /// `NULL_REVISION` is not considered to be out of bounds.
87 97 fn node(&self, rev: Revision) -> Option<&Node>;
98
99 /// Return a [`Revision`] if `rev` is a valid revision number for this
100 /// index
101 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
102 if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {
103 Some(rev)
104 } else {
105 None
106 }
107 }
88 108 }
89 109
90 110 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
91 111 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
92 112 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
93 113 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
94 114
95 115 // Keep this in sync with REVIDX_KNOWN_FLAGS in
96 116 // mercurial/revlogutils/flagutil.py
97 117 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
98 118 | REVISION_FLAG_ELLIPSIS
99 119 | REVISION_FLAG_EXTSTORED
100 120 | REVISION_FLAG_HASCOPIESINFO;
101 121
102 122 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
103 123
104 124 #[derive(Debug, derive_more::From)]
105 125 pub enum RevlogError {
106 126 InvalidRevision,
107 127 /// Working directory is not supported
108 128 WDirUnsupported,
109 129 /// Found more than one entry whose ID match the requested prefix
110 130 AmbiguousPrefix,
111 131 #[from]
112 132 Other(HgError),
113 133 }
114 134
115 135 impl From<NodeMapError> for RevlogError {
116 136 fn from(error: NodeMapError) -> Self {
117 137 match error {
118 138 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
119 139 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
120 140 format!("nodemap point to revision {} not in index", rev),
121 141 ),
122 142 }
123 143 }
124 144 }
125 145
126 146 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
127 147 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
128 148 }
129 149
130 150 impl RevlogError {
131 151 fn corrupted<S: AsRef<str>>(context: S) -> Self {
132 152 RevlogError::Other(corrupted(context))
133 153 }
134 154 }
135 155
136 156 /// Read only implementation of revlog.
137 157 pub struct Revlog {
138 158 /// When index and data are not interleaved: bytes of the revlog index.
139 159 /// When index and data are interleaved: bytes of the revlog index and
140 160 /// data.
141 161 index: Index,
142 162 /// When index and data are not interleaved: bytes of the revlog data
143 163 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
144 164 /// When present on disk: the persistent nodemap for this revlog
145 165 nodemap: Option<nodemap::NodeTree>,
146 166 }
147 167
148 168 impl Revlog {
149 169 /// Open a revlog index file.
150 170 ///
151 171 /// It will also open the associated data file if index and data are not
152 172 /// interleaved.
153 173 pub fn open(
154 174 store_vfs: &Vfs,
155 175 index_path: impl AsRef<Path>,
156 176 data_path: Option<&Path>,
157 177 use_nodemap: bool,
158 178 ) -> Result<Self, HgError> {
159 179 let index_path = index_path.as_ref();
160 180 let index = {
161 181 match store_vfs.mmap_open_opt(&index_path)? {
162 182 None => Index::new(Box::new(vec![])),
163 183 Some(index_mmap) => {
164 184 let index = Index::new(Box::new(index_mmap))?;
165 185 Ok(index)
166 186 }
167 187 }
168 188 }?;
169 189
170 190 let default_data_path = index_path.with_extension("d");
171 191
172 192 // type annotation required
173 193 // won't recognize Mmap as Deref<Target = [u8]>
174 194 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
175 195 if index.is_inline() {
176 196 None
177 197 } else {
178 198 let data_path = data_path.unwrap_or(&default_data_path);
179 199 let data_mmap = store_vfs.mmap_open(data_path)?;
180 200 Some(Box::new(data_mmap))
181 201 };
182 202
183 203 let nodemap = if index.is_inline() || !use_nodemap {
184 204 None
185 205 } else {
186 206 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
187 207 |(docket, data)| {
188 208 nodemap::NodeTree::load_bytes(
189 209 Box::new(data),
190 210 docket.data_length,
191 211 )
192 212 },
193 213 )
194 214 };
195 215
196 216 Ok(Revlog {
197 217 index,
198 218 data_bytes,
199 219 nodemap,
200 220 })
201 221 }
202 222
203 223 /// Return number of entries of the `Revlog`.
204 224 pub fn len(&self) -> usize {
205 225 self.index.len()
206 226 }
207 227
208 228 /// Returns `true` if the `Revlog` has zero `entries`.
209 229 pub fn is_empty(&self) -> bool {
210 230 self.index.is_empty()
211 231 }
212 232
213 233 /// Returns the node ID for the given revision number, if it exists in this
214 234 /// revlog
215 235 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
216 236 if rev == NULL_REVISION {
217 237 return Some(&NULL_NODE);
218 238 }
219 239 Some(self.index.get_entry(rev)?.hash())
220 240 }
221 241
222 242 /// Return the revision number for the given node ID, if it exists in this
223 243 /// revlog
224 244 pub fn rev_from_node(
225 245 &self,
226 246 node: NodePrefix,
227 247 ) -> Result<Revision, RevlogError> {
228 248 let looked_up = if let Some(nodemap) = &self.nodemap {
229 249 nodemap
230 250 .find_bin(&self.index, node)?
231 251 .ok_or(RevlogError::InvalidRevision)
232 252 } else {
233 253 self.rev_from_node_no_persistent_nodemap(node)
234 254 };
235 255
236 256 if node.is_prefix_of(&NULL_NODE) {
237 257 return match looked_up {
238 258 Ok(_) => Err(RevlogError::AmbiguousPrefix),
239 259 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
240 260 res => res,
241 261 };
242 262 };
243 263
244 264 looked_up
245 265 }
246 266
247 267 /// Same as `rev_from_node`, without using a persistent nodemap
248 268 ///
249 269 /// This is used as fallback when a persistent nodemap is not present.
250 270 /// This happens when the persistent-nodemap experimental feature is not
251 271 /// enabled, or for small revlogs.
252 272 fn rev_from_node_no_persistent_nodemap(
253 273 &self,
254 274 node: NodePrefix,
255 275 ) -> Result<Revision, RevlogError> {
256 276 // Linear scan of the revlog
257 277 // TODO: consider building a non-persistent nodemap in memory to
258 278 // optimize these cases.
259 279 let mut found_by_prefix = None;
260 280 for rev in (0..self.len() as Revision).rev() {
261 281 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
262 282 HgError::corrupted(
263 283 "revlog references a revision not in the index",
264 284 )
265 285 })?;
266 286 if node == *index_entry.hash() {
267 287 return Ok(rev);
268 288 }
269 289 if node.is_prefix_of(index_entry.hash()) {
270 290 if found_by_prefix.is_some() {
271 291 return Err(RevlogError::AmbiguousPrefix);
272 292 }
273 293 found_by_prefix = Some(rev)
274 294 }
275 295 }
276 296 found_by_prefix.ok_or(RevlogError::InvalidRevision)
277 297 }
278 298
279 299 /// Returns whether the given revision exists in this revlog.
280 300 pub fn has_rev(&self, rev: Revision) -> bool {
281 301 self.index.get_entry(rev).is_some()
282 302 }
283 303
284 304 /// Return the full data associated to a revision.
285 305 ///
286 306 /// All entries required to build the final data out of deltas will be
287 307 /// retrieved as needed, and the deltas will be applied to the inital
288 308 /// snapshot to rebuild the final data.
289 309 pub fn get_rev_data(
290 310 &self,
291 311 rev: Revision,
292 312 ) -> Result<Cow<[u8]>, RevlogError> {
293 313 if rev == NULL_REVISION {
294 314 return Ok(Cow::Borrowed(&[]));
295 315 };
296 316 Ok(self.get_entry(rev)?.data()?)
297 317 }
298 318
299 319 /// Check the hash of some given data against the recorded hash.
300 320 pub fn check_hash(
301 321 &self,
302 322 p1: Revision,
303 323 p2: Revision,
304 324 expected: &[u8],
305 325 data: &[u8],
306 326 ) -> bool {
307 327 let e1 = self.index.get_entry(p1);
308 328 let h1 = match e1 {
309 329 Some(ref entry) => entry.hash(),
310 330 None => &NULL_NODE,
311 331 };
312 332 let e2 = self.index.get_entry(p2);
313 333 let h2 = match e2 {
314 334 Some(ref entry) => entry.hash(),
315 335 None => &NULL_NODE,
316 336 };
317 337
318 338 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
319 339 }
320 340
321 341 /// Build the full data of a revision out its snapshot
322 342 /// and its deltas.
323 343 fn build_data_from_deltas(
324 344 snapshot: RevlogEntry,
325 345 deltas: &[RevlogEntry],
326 346 ) -> Result<Vec<u8>, HgError> {
327 347 let snapshot = snapshot.data_chunk()?;
328 348 let deltas = deltas
329 349 .iter()
330 350 .rev()
331 351 .map(RevlogEntry::data_chunk)
332 352 .collect::<Result<Vec<_>, _>>()?;
333 353 let patches: Vec<_> =
334 354 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
335 355 let patch = patch::fold_patch_lists(&patches);
336 356 Ok(patch.apply(&snapshot))
337 357 }
338 358
339 359 /// Return the revlog data.
340 360 fn data(&self) -> &[u8] {
341 361 match &self.data_bytes {
342 362 Some(data_bytes) => data_bytes,
343 363 None => panic!(
344 364 "forgot to load the data or trying to access inline data"
345 365 ),
346 366 }
347 367 }
348 368
349 369 pub fn make_null_entry(&self) -> RevlogEntry {
350 370 RevlogEntry {
351 371 revlog: self,
352 372 rev: NULL_REVISION,
353 373 bytes: b"",
354 374 compressed_len: 0,
355 375 uncompressed_len: 0,
356 376 base_rev_or_base_of_delta_chain: None,
357 377 p1: NULL_REVISION,
358 378 p2: NULL_REVISION,
359 379 flags: NULL_REVLOG_ENTRY_FLAGS,
360 380 hash: NULL_NODE,
361 381 }
362 382 }
363 383
364 384 /// Get an entry of the revlog.
365 385 pub fn get_entry(
366 386 &self,
367 387 rev: Revision,
368 388 ) -> Result<RevlogEntry, RevlogError> {
369 389 if rev == NULL_REVISION {
370 390 return Ok(self.make_null_entry());
371 391 }
372 392 let index_entry = self
373 393 .index
374 394 .get_entry(rev)
375 395 .ok_or(RevlogError::InvalidRevision)?;
376 396 let start = index_entry.offset();
377 397 let end = start + index_entry.compressed_len() as usize;
378 398 let data = if self.index.is_inline() {
379 399 self.index.data(start, end)
380 400 } else {
381 401 &self.data()[start..end]
382 402 };
383 403 let entry = RevlogEntry {
384 404 revlog: self,
385 405 rev,
386 406 bytes: data,
387 407 compressed_len: index_entry.compressed_len(),
388 408 uncompressed_len: index_entry.uncompressed_len(),
389 409 base_rev_or_base_of_delta_chain: if index_entry
390 410 .base_revision_or_base_of_delta_chain()
391 411 == rev
392 412 {
393 413 None
394 414 } else {
395 415 Some(index_entry.base_revision_or_base_of_delta_chain())
396 416 },
397 417 p1: index_entry.p1(),
398 418 p2: index_entry.p2(),
399 419 flags: index_entry.flags(),
400 420 hash: *index_entry.hash(),
401 421 };
402 422 Ok(entry)
403 423 }
404 424
405 425 /// when resolving internal references within revlog, any errors
406 426 /// should be reported as corruption, instead of e.g. "invalid revision"
407 427 fn get_entry_internal(
408 428 &self,
409 429 rev: Revision,
410 430 ) -> Result<RevlogEntry, HgError> {
411 431 self.get_entry(rev)
412 432 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
413 433 }
414 434 }
415 435
416 436 /// The revlog entry's bytes and the necessary informations to extract
417 437 /// the entry's data.
418 438 #[derive(Clone)]
419 439 pub struct RevlogEntry<'revlog> {
420 440 revlog: &'revlog Revlog,
421 441 rev: Revision,
422 442 bytes: &'revlog [u8],
423 443 compressed_len: u32,
424 444 uncompressed_len: i32,
425 445 base_rev_or_base_of_delta_chain: Option<Revision>,
426 446 p1: Revision,
427 447 p2: Revision,
428 448 flags: u16,
429 449 hash: Node,
430 450 }
431 451
432 452 thread_local! {
433 453 // seems fine to [unwrap] here: this can only fail due to memory allocation
434 454 // failing, and it's normal for that to cause panic.
435 455 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
436 456 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
437 457 }
438 458
439 459 fn zstd_decompress_to_buffer(
440 460 bytes: &[u8],
441 461 buf: &mut Vec<u8>,
442 462 ) -> Result<usize, std::io::Error> {
443 463 ZSTD_DECODER
444 464 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
445 465 }
446 466
447 467 impl<'revlog> RevlogEntry<'revlog> {
448 468 pub fn revision(&self) -> Revision {
449 469 self.rev
450 470 }
451 471
452 472 pub fn node(&self) -> &Node {
453 473 &self.hash
454 474 }
455 475
456 476 pub fn uncompressed_len(&self) -> Option<u32> {
457 477 u32::try_from(self.uncompressed_len).ok()
458 478 }
459 479
460 480 pub fn has_p1(&self) -> bool {
461 481 self.p1 != NULL_REVISION
462 482 }
463 483
464 484 pub fn p1_entry(
465 485 &self,
466 486 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
467 487 if self.p1 == NULL_REVISION {
468 488 Ok(None)
469 489 } else {
470 490 Ok(Some(self.revlog.get_entry(self.p1)?))
471 491 }
472 492 }
473 493
474 494 pub fn p2_entry(
475 495 &self,
476 496 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
477 497 if self.p2 == NULL_REVISION {
478 498 Ok(None)
479 499 } else {
480 500 Ok(Some(self.revlog.get_entry(self.p2)?))
481 501 }
482 502 }
483 503
484 504 pub fn p1(&self) -> Option<Revision> {
485 505 if self.p1 == NULL_REVISION {
486 506 None
487 507 } else {
488 508 Some(self.p1)
489 509 }
490 510 }
491 511
492 512 pub fn p2(&self) -> Option<Revision> {
493 513 if self.p2 == NULL_REVISION {
494 514 None
495 515 } else {
496 516 Some(self.p2)
497 517 }
498 518 }
499 519
500 520 pub fn is_censored(&self) -> bool {
501 521 (self.flags & REVISION_FLAG_CENSORED) != 0
502 522 }
503 523
504 524 pub fn has_length_affecting_flag_processor(&self) -> bool {
505 525 // Relevant Python code: revlog.size()
506 526 // note: ELLIPSIS is known to not change the content
507 527 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
508 528 }
509 529
510 530 /// The data for this entry, after resolving deltas if any.
511 531 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
512 532 let mut entry = self.clone();
513 533 let mut delta_chain = vec![];
514 534
515 535 // The meaning of `base_rev_or_base_of_delta_chain` depends on
516 536 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
517 537 // `mercurial/revlogutils/constants.py` and the code in
518 538 // [_chaininfo] and in [index_deltachain].
519 539 let uses_generaldelta = self.revlog.index.uses_generaldelta();
520 540 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
521 541 let base_rev = if uses_generaldelta {
522 542 base_rev
523 543 } else {
524 544 entry.rev - 1
525 545 };
526 546 delta_chain.push(entry);
527 547 entry = self.revlog.get_entry_internal(base_rev)?;
528 548 }
529 549
530 550 let data = if delta_chain.is_empty() {
531 551 entry.data_chunk()?
532 552 } else {
533 553 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
534 554 };
535 555
536 556 Ok(data)
537 557 }
538 558
539 559 fn check_data(
540 560 &self,
541 561 data: Cow<'revlog, [u8]>,
542 562 ) -> Result<Cow<'revlog, [u8]>, HgError> {
543 563 if self.revlog.check_hash(
544 564 self.p1,
545 565 self.p2,
546 566 self.hash.as_bytes(),
547 567 &data,
548 568 ) {
549 569 Ok(data)
550 570 } else {
551 571 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
552 572 return Err(HgError::unsupported(
553 573 "ellipsis revisions are not supported by rhg",
554 574 ));
555 575 }
556 576 Err(corrupted(format!(
557 577 "hash check failed for revision {}",
558 578 self.rev
559 579 )))
560 580 }
561 581 }
562 582
563 583 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
564 584 let data = self.rawdata()?;
565 585 if self.rev == NULL_REVISION {
566 586 return Ok(data);
567 587 }
568 588 if self.is_censored() {
569 589 return Err(HgError::CensoredNodeError);
570 590 }
571 591 self.check_data(data)
572 592 }
573 593
574 594 /// Extract the data contained in the entry.
575 595 /// This may be a delta. (See `is_delta`.)
576 596 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
577 597 if self.bytes.is_empty() {
578 598 return Ok(Cow::Borrowed(&[]));
579 599 }
580 600 match self.bytes[0] {
581 601 // Revision data is the entirety of the entry, including this
582 602 // header.
583 603 b'\0' => Ok(Cow::Borrowed(self.bytes)),
584 604 // Raw revision data follows.
585 605 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
586 606 // zlib (RFC 1950) data.
587 607 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
588 608 // zstd data.
589 609 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
590 610 // A proper new format should have had a repo/store requirement.
591 611 format_type => Err(corrupted(format!(
592 612 "unknown compression header '{}'",
593 613 format_type
594 614 ))),
595 615 }
596 616 }
597 617
598 618 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
599 619 let mut decoder = ZlibDecoder::new(self.bytes);
600 620 if self.is_delta() {
601 621 let mut buf = Vec::with_capacity(self.compressed_len as usize);
602 622 decoder
603 623 .read_to_end(&mut buf)
604 624 .map_err(|e| corrupted(e.to_string()))?;
605 625 Ok(buf)
606 626 } else {
607 627 let cap = self.uncompressed_len.max(0) as usize;
608 628 let mut buf = vec![0; cap];
609 629 decoder
610 630 .read_exact(&mut buf)
611 631 .map_err(|e| corrupted(e.to_string()))?;
612 632 Ok(buf)
613 633 }
614 634 }
615 635
616 636 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
617 637 let cap = self.uncompressed_len.max(0) as usize;
618 638 if self.is_delta() {
619 639 // [cap] is usually an over-estimate of the space needed because
620 640 // it's the length of delta-decoded data, but we're interested
621 641 // in the size of the delta.
622 642 // This means we have to [shrink_to_fit] to avoid holding on
623 643 // to a large chunk of memory, but it also means we must have a
624 644 // fallback branch, for the case when the delta is longer than
625 645 // the original data (surprisingly, this does happen in practice)
626 646 let mut buf = Vec::with_capacity(cap);
627 647 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
628 648 Ok(_) => buf.shrink_to_fit(),
629 649 Err(_) => {
630 650 buf.clear();
631 651 zstd::stream::copy_decode(self.bytes, &mut buf)
632 652 .map_err(|e| corrupted(e.to_string()))?;
633 653 }
634 654 };
635 655 Ok(buf)
636 656 } else {
637 657 let mut buf = Vec::with_capacity(cap);
638 658 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
639 659 .map_err(|e| corrupted(e.to_string()))?;
640 660 if len != self.uncompressed_len as usize {
641 661 Err(corrupted("uncompressed length does not match"))
642 662 } else {
643 663 Ok(buf)
644 664 }
645 665 }
646 666 }
647 667
648 668 /// Tell if the entry is a snapshot or a delta
649 669 /// (influences on decompression).
650 670 fn is_delta(&self) -> bool {
651 671 self.base_rev_or_base_of_delta_chain.is_some()
652 672 }
653 673 }
654 674
655 675 /// Calculate the hash of a revision given its data and its parents.
656 676 fn hash(
657 677 data: &[u8],
658 678 p1_hash: &[u8],
659 679 p2_hash: &[u8],
660 680 ) -> [u8; NODE_BYTES_LENGTH] {
661 681 let mut hasher = Sha1::new();
662 682 let (a, b) = (p1_hash, p2_hash);
663 683 if a > b {
664 684 hasher.update(b);
665 685 hasher.update(a);
666 686 } else {
667 687 hasher.update(a);
668 688 hasher.update(b);
669 689 }
670 690 hasher.update(data);
671 691 *hasher.finalize().as_ref()
672 692 }
673 693
674 694 #[cfg(test)]
675 695 mod tests {
676 696 use super::*;
677 697 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
678 698 use itertools::Itertools;
679 699
680 700 #[test]
681 701 fn test_empty() {
682 702 let temp = tempfile::tempdir().unwrap();
683 703 let vfs = Vfs { base: temp.path() };
684 704 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
685 705 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
686 706 assert!(revlog.is_empty());
687 707 assert_eq!(revlog.len(), 0);
688 708 assert!(revlog.get_entry(0).is_err());
689 709 assert!(!revlog.has_rev(0));
690 710 assert_eq!(
691 711 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
692 712 NULL_REVISION
693 713 );
694 714 let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
695 715 assert_eq!(null_entry.revision(), NULL_REVISION);
696 716 assert!(null_entry.data().unwrap().is_empty());
697 717 }
698 718
699 719 #[test]
700 720 fn test_inline() {
701 721 let temp = tempfile::tempdir().unwrap();
702 722 let vfs = Vfs { base: temp.path() };
703 723 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
704 724 .unwrap();
705 725 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
706 726 .unwrap();
707 727 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
708 728 .unwrap();
709 729 let entry0_bytes = IndexEntryBuilder::new()
710 730 .is_first(true)
711 731 .with_version(1)
712 732 .with_inline(true)
713 733 .with_offset(INDEX_ENTRY_SIZE)
714 734 .with_node(node0)
715 735 .build();
716 736 let entry1_bytes = IndexEntryBuilder::new()
717 737 .with_offset(INDEX_ENTRY_SIZE)
718 738 .with_node(node1)
719 739 .build();
720 740 let entry2_bytes = IndexEntryBuilder::new()
721 741 .with_offset(INDEX_ENTRY_SIZE)
722 742 .with_p1(0)
723 743 .with_p2(1)
724 744 .with_node(node2)
725 745 .build();
726 746 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
727 747 .into_iter()
728 748 .flatten()
729 749 .collect_vec();
730 750 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
731 751 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
732 752
733 753 let entry0 = revlog.get_entry(0).ok().unwrap();
734 754 assert_eq!(entry0.revision(), 0);
735 755 assert_eq!(*entry0.node(), node0);
736 756 assert!(!entry0.has_p1());
737 757 assert_eq!(entry0.p1(), None);
738 758 assert_eq!(entry0.p2(), None);
739 759 let p1_entry = entry0.p1_entry().unwrap();
740 760 assert!(p1_entry.is_none());
741 761 let p2_entry = entry0.p2_entry().unwrap();
742 762 assert!(p2_entry.is_none());
743 763
744 764 let entry1 = revlog.get_entry(1).ok().unwrap();
745 765 assert_eq!(entry1.revision(), 1);
746 766 assert_eq!(*entry1.node(), node1);
747 767 assert!(!entry1.has_p1());
748 768 assert_eq!(entry1.p1(), None);
749 769 assert_eq!(entry1.p2(), None);
750 770 let p1_entry = entry1.p1_entry().unwrap();
751 771 assert!(p1_entry.is_none());
752 772 let p2_entry = entry1.p2_entry().unwrap();
753 773 assert!(p2_entry.is_none());
754 774
755 775 let entry2 = revlog.get_entry(2).ok().unwrap();
756 776 assert_eq!(entry2.revision(), 2);
757 777 assert_eq!(*entry2.node(), node2);
758 778 assert!(entry2.has_p1());
759 779 assert_eq!(entry2.p1(), Some(0));
760 780 assert_eq!(entry2.p2(), Some(1));
761 781 let p1_entry = entry2.p1_entry().unwrap();
762 782 assert!(p1_entry.is_some());
763 783 assert_eq!(p1_entry.unwrap().revision(), 0);
764 784 let p2_entry = entry2.p2_entry().unwrap();
765 785 assert!(p2_entry.is_some());
766 786 assert_eq!(p2_entry.unwrap().revision(), 1);
767 787 }
768 788
769 789 #[test]
770 790 fn test_nodemap() {
771 791 let temp = tempfile::tempdir().unwrap();
772 792 let vfs = Vfs { base: temp.path() };
773 793
774 794 // building a revlog with a forced Node starting with zeros
775 795 // This is a corruption, but it does not preclude using the nodemap
776 796 // if we don't try and access the data
777 797 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
778 798 .unwrap();
779 799 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
780 800 .unwrap();
781 801 let entry0_bytes = IndexEntryBuilder::new()
782 802 .is_first(true)
783 803 .with_version(1)
784 804 .with_inline(true)
785 805 .with_offset(INDEX_ENTRY_SIZE)
786 806 .with_node(node0)
787 807 .build();
788 808 let entry1_bytes = IndexEntryBuilder::new()
789 809 .with_offset(INDEX_ENTRY_SIZE)
790 810 .with_node(node1)
791 811 .build();
792 812 let contents = vec![entry0_bytes, entry1_bytes]
793 813 .into_iter()
794 814 .flatten()
795 815 .collect_vec();
796 816 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
797 817 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
798 818
799 819 // accessing the data shows the corruption
800 820 revlog.get_entry(0).unwrap().data().unwrap_err();
801 821
802 822 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
803 823 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
804 824 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
805 825 assert_eq!(
806 826 revlog
807 827 .rev_from_node(NodePrefix::from_hex("000").unwrap())
808 828 .unwrap(),
809 829 -1
810 830 );
811 831 assert_eq!(
812 832 revlog
813 833 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
814 834 .unwrap(),
815 835 1
816 836 );
817 837 // RevlogError does not implement PartialEq
818 838 // (ultimately because io::Error does not)
819 839 match revlog
820 840 .rev_from_node(NodePrefix::from_hex("00").unwrap())
821 841 .expect_err("Expected to give AmbiguousPrefix error")
822 842 {
823 843 RevlogError::AmbiguousPrefix => (),
824 844 e => {
825 845 panic!("Got another error than AmbiguousPrefix: {:?}", e);
826 846 }
827 847 };
828 848 }
829 849 }
General Comments 0
You need to be logged in to leave comments. Login now