##// END OF EJS Templates
rust-revlog: split out method for `rev_from_node` without persistent nodemap...
Georges Racinet -
r51636:0159b014 stable
parent child Browse files
Show More
@@ -1,743 +1,751 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// Mercurial revision numbers
37 37 ///
38 38 /// As noted in revlog.c, revision numbers are actually encoded in
39 39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 40 pub type Revision = i32;
41 41
42 42 /// Marker expressing the absence of a parent
43 43 ///
44 44 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
45 45 /// to be smaller than all existing revisions.
46 46 pub const NULL_REVISION: Revision = -1;
47 47
48 48 /// Same as `mercurial.node.wdirrev`
49 49 ///
50 50 /// This is also equal to `i32::max_value()`, but it's better to spell
51 51 /// it out explicitely, same as in `mercurial.node`
52 52 #[allow(clippy::unreadable_literal)]
53 53 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
54 54
55 55 pub const WORKING_DIRECTORY_HEX: &str =
56 56 "ffffffffffffffffffffffffffffffffffffffff";
57 57
58 58 /// The simplest expression of what we need of Mercurial DAGs.
59 59 pub trait Graph {
60 60 /// Return the two parents of the given `Revision`.
61 61 ///
62 62 /// Each of the parents can be independently `NULL_REVISION`
63 63 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
64 64 }
65 65
66 66 #[derive(Clone, Debug, PartialEq)]
67 67 pub enum GraphError {
68 68 ParentOutOfRange(Revision),
69 69 WorkingDirectoryUnsupported,
70 70 }
71 71
72 72 /// The Mercurial Revlog Index
73 73 ///
74 74 /// This is currently limited to the minimal interface that is needed for
75 75 /// the [`nodemap`](nodemap/index.html) module
76 76 pub trait RevlogIndex {
77 77 /// Total number of Revisions referenced in this index
78 78 fn len(&self) -> usize;
79 79
80 80 fn is_empty(&self) -> bool {
81 81 self.len() == 0
82 82 }
83 83
84 84 /// Return a reference to the Node or `None` if rev is out of bounds
85 85 ///
86 86 /// `NULL_REVISION` is not considered to be out of bounds.
87 87 fn node(&self, rev: Revision) -> Option<&Node>;
88 88 }
89 89
90 90 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
91 91 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
92 92 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
93 93 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
94 94
95 95 // Keep this in sync with REVIDX_KNOWN_FLAGS in
96 96 // mercurial/revlogutils/flagutil.py
97 97 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
98 98 | REVISION_FLAG_ELLIPSIS
99 99 | REVISION_FLAG_EXTSTORED
100 100 | REVISION_FLAG_HASCOPIESINFO;
101 101
102 102 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
103 103
104 104 #[derive(Debug, derive_more::From)]
105 105 pub enum RevlogError {
106 106 InvalidRevision,
107 107 /// Working directory is not supported
108 108 WDirUnsupported,
109 109 /// Found more than one entry whose ID match the requested prefix
110 110 AmbiguousPrefix,
111 111 #[from]
112 112 Other(HgError),
113 113 }
114 114
115 115 impl From<NodeMapError> for RevlogError {
116 116 fn from(error: NodeMapError) -> Self {
117 117 match error {
118 118 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
119 119 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
120 120 format!("nodemap point to revision {} not in index", rev),
121 121 ),
122 122 }
123 123 }
124 124 }
125 125
126 126 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
127 127 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
128 128 }
129 129
130 130 impl RevlogError {
131 131 fn corrupted<S: AsRef<str>>(context: S) -> Self {
132 132 RevlogError::Other(corrupted(context))
133 133 }
134 134 }
135 135
136 136 /// Read only implementation of revlog.
137 137 pub struct Revlog {
138 138 /// When index and data are not interleaved: bytes of the revlog index.
139 139 /// When index and data are interleaved: bytes of the revlog index and
140 140 /// data.
141 141 index: Index,
142 142 /// When index and data are not interleaved: bytes of the revlog data
143 143 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
144 144 /// When present on disk: the persistent nodemap for this revlog
145 145 nodemap: Option<nodemap::NodeTree>,
146 146 }
147 147
148 148 impl Revlog {
149 149 /// Open a revlog index file.
150 150 ///
151 151 /// It will also open the associated data file if index and data are not
152 152 /// interleaved.
153 153 pub fn open(
154 154 store_vfs: &Vfs,
155 155 index_path: impl AsRef<Path>,
156 156 data_path: Option<&Path>,
157 157 use_nodemap: bool,
158 158 ) -> Result<Self, HgError> {
159 159 let index_path = index_path.as_ref();
160 160 let index = {
161 161 match store_vfs.mmap_open_opt(&index_path)? {
162 162 None => Index::new(Box::new(vec![])),
163 163 Some(index_mmap) => {
164 164 let index = Index::new(Box::new(index_mmap))?;
165 165 Ok(index)
166 166 }
167 167 }
168 168 }?;
169 169
170 170 let default_data_path = index_path.with_extension("d");
171 171
172 172 // type annotation required
173 173 // won't recognize Mmap as Deref<Target = [u8]>
174 174 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
175 175 if index.is_inline() {
176 176 None
177 177 } else {
178 178 let data_path = data_path.unwrap_or(&default_data_path);
179 179 let data_mmap = store_vfs.mmap_open(data_path)?;
180 180 Some(Box::new(data_mmap))
181 181 };
182 182
183 183 let nodemap = if index.is_inline() || !use_nodemap {
184 184 None
185 185 } else {
186 186 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
187 187 |(docket, data)| {
188 188 nodemap::NodeTree::load_bytes(
189 189 Box::new(data),
190 190 docket.data_length,
191 191 )
192 192 },
193 193 )
194 194 };
195 195
196 196 Ok(Revlog {
197 197 index,
198 198 data_bytes,
199 199 nodemap,
200 200 })
201 201 }
202 202
203 203 /// Return number of entries of the `Revlog`.
204 204 pub fn len(&self) -> usize {
205 205 self.index.len()
206 206 }
207 207
208 208 /// Returns `true` if the `Revlog` has zero `entries`.
209 209 pub fn is_empty(&self) -> bool {
210 210 self.index.is_empty()
211 211 }
212 212
213 213 /// Returns the node ID for the given revision number, if it exists in this
214 214 /// revlog
215 215 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
216 216 if rev == NULL_REVISION {
217 217 return Some(&NULL_NODE);
218 218 }
219 219 Some(self.index.get_entry(rev)?.hash())
220 220 }
221 221
222 222 /// Return the revision number for the given node ID, if it exists in this
223 223 /// revlog
224 224 pub fn rev_from_node(
225 225 &self,
226 226 node: NodePrefix,
227 227 ) -> Result<Revision, RevlogError> {
228 228 if node.is_prefix_of(&NULL_NODE) {
229 229 return Ok(NULL_REVISION);
230 230 }
231 231
232 232 if let Some(nodemap) = &self.nodemap {
233 233 return nodemap
234 234 .find_bin(&self.index, node)?
235 235 .ok_or(RevlogError::InvalidRevision);
236 236 }
237 self.rev_from_node_no_persistent_nodemap(node)
238 }
237 239
238 // Fallback to linear scan when a persistent nodemap is not present.
239 // This happens when the persistent-nodemap experimental feature is not
240 // enabled, or for small revlogs.
241 //
240 /// Same as `rev_from_node`, without using a persistent nodemap
241 ///
242 /// This is used as fallback when a persistent nodemap is not present.
243 /// This happens when the persistent-nodemap experimental feature is not
244 /// enabled, or for small revlogs.
245 fn rev_from_node_no_persistent_nodemap(
246 &self,
247 node: NodePrefix,
248 ) -> Result<Revision, RevlogError> {
249 // Linear scan of the revlog
242 250 // TODO: consider building a non-persistent nodemap in memory to
243 251 // optimize these cases.
244 252 let mut found_by_prefix = None;
245 253 for rev in (0..self.len() as Revision).rev() {
246 254 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
247 255 HgError::corrupted(
248 256 "revlog references a revision not in the index",
249 257 )
250 258 })?;
251 259 if node == *index_entry.hash() {
252 260 return Ok(rev);
253 261 }
254 262 if node.is_prefix_of(index_entry.hash()) {
255 263 if found_by_prefix.is_some() {
256 264 return Err(RevlogError::AmbiguousPrefix);
257 265 }
258 266 found_by_prefix = Some(rev)
259 267 }
260 268 }
261 269 found_by_prefix.ok_or(RevlogError::InvalidRevision)
262 270 }
263 271
264 272 /// Returns whether the given revision exists in this revlog.
265 273 pub fn has_rev(&self, rev: Revision) -> bool {
266 274 self.index.get_entry(rev).is_some()
267 275 }
268 276
269 277 /// Return the full data associated to a revision.
270 278 ///
271 279 /// All entries required to build the final data out of deltas will be
272 280 /// retrieved as needed, and the deltas will be applied to the inital
273 281 /// snapshot to rebuild the final data.
274 282 pub fn get_rev_data(
275 283 &self,
276 284 rev: Revision,
277 285 ) -> Result<Cow<[u8]>, RevlogError> {
278 286 if rev == NULL_REVISION {
279 287 return Ok(Cow::Borrowed(&[]));
280 288 };
281 289 Ok(self.get_entry(rev)?.data()?)
282 290 }
283 291
284 292 /// Check the hash of some given data against the recorded hash.
285 293 pub fn check_hash(
286 294 &self,
287 295 p1: Revision,
288 296 p2: Revision,
289 297 expected: &[u8],
290 298 data: &[u8],
291 299 ) -> bool {
292 300 let e1 = self.index.get_entry(p1);
293 301 let h1 = match e1 {
294 302 Some(ref entry) => entry.hash(),
295 303 None => &NULL_NODE,
296 304 };
297 305 let e2 = self.index.get_entry(p2);
298 306 let h2 = match e2 {
299 307 Some(ref entry) => entry.hash(),
300 308 None => &NULL_NODE,
301 309 };
302 310
303 311 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
304 312 }
305 313
306 314 /// Build the full data of a revision out its snapshot
307 315 /// and its deltas.
308 316 fn build_data_from_deltas(
309 317 snapshot: RevlogEntry,
310 318 deltas: &[RevlogEntry],
311 319 ) -> Result<Vec<u8>, HgError> {
312 320 let snapshot = snapshot.data_chunk()?;
313 321 let deltas = deltas
314 322 .iter()
315 323 .rev()
316 324 .map(RevlogEntry::data_chunk)
317 325 .collect::<Result<Vec<_>, _>>()?;
318 326 let patches: Vec<_> =
319 327 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
320 328 let patch = patch::fold_patch_lists(&patches);
321 329 Ok(patch.apply(&snapshot))
322 330 }
323 331
324 332 /// Return the revlog data.
325 333 fn data(&self) -> &[u8] {
326 334 match &self.data_bytes {
327 335 Some(data_bytes) => data_bytes,
328 336 None => panic!(
329 337 "forgot to load the data or trying to access inline data"
330 338 ),
331 339 }
332 340 }
333 341
334 342 pub fn make_null_entry(&self) -> RevlogEntry {
335 343 RevlogEntry {
336 344 revlog: self,
337 345 rev: NULL_REVISION,
338 346 bytes: b"",
339 347 compressed_len: 0,
340 348 uncompressed_len: 0,
341 349 base_rev_or_base_of_delta_chain: None,
342 350 p1: NULL_REVISION,
343 351 p2: NULL_REVISION,
344 352 flags: NULL_REVLOG_ENTRY_FLAGS,
345 353 hash: NULL_NODE,
346 354 }
347 355 }
348 356
349 357 /// Get an entry of the revlog.
350 358 pub fn get_entry(
351 359 &self,
352 360 rev: Revision,
353 361 ) -> Result<RevlogEntry, RevlogError> {
354 362 if rev == NULL_REVISION {
355 363 return Ok(self.make_null_entry());
356 364 }
357 365 let index_entry = self
358 366 .index
359 367 .get_entry(rev)
360 368 .ok_or(RevlogError::InvalidRevision)?;
361 369 let start = index_entry.offset();
362 370 let end = start + index_entry.compressed_len() as usize;
363 371 let data = if self.index.is_inline() {
364 372 self.index.data(start, end)
365 373 } else {
366 374 &self.data()[start..end]
367 375 };
368 376 let entry = RevlogEntry {
369 377 revlog: self,
370 378 rev,
371 379 bytes: data,
372 380 compressed_len: index_entry.compressed_len(),
373 381 uncompressed_len: index_entry.uncompressed_len(),
374 382 base_rev_or_base_of_delta_chain: if index_entry
375 383 .base_revision_or_base_of_delta_chain()
376 384 == rev
377 385 {
378 386 None
379 387 } else {
380 388 Some(index_entry.base_revision_or_base_of_delta_chain())
381 389 },
382 390 p1: index_entry.p1(),
383 391 p2: index_entry.p2(),
384 392 flags: index_entry.flags(),
385 393 hash: *index_entry.hash(),
386 394 };
387 395 Ok(entry)
388 396 }
389 397
390 398 /// when resolving internal references within revlog, any errors
391 399 /// should be reported as corruption, instead of e.g. "invalid revision"
392 400 fn get_entry_internal(
393 401 &self,
394 402 rev: Revision,
395 403 ) -> Result<RevlogEntry, HgError> {
396 404 self.get_entry(rev)
397 405 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
398 406 }
399 407 }
400 408
401 409 /// The revlog entry's bytes and the necessary informations to extract
402 410 /// the entry's data.
403 411 #[derive(Clone)]
404 412 pub struct RevlogEntry<'revlog> {
405 413 revlog: &'revlog Revlog,
406 414 rev: Revision,
407 415 bytes: &'revlog [u8],
408 416 compressed_len: u32,
409 417 uncompressed_len: i32,
410 418 base_rev_or_base_of_delta_chain: Option<Revision>,
411 419 p1: Revision,
412 420 p2: Revision,
413 421 flags: u16,
414 422 hash: Node,
415 423 }
416 424
417 425 thread_local! {
418 426 // seems fine to [unwrap] here: this can only fail due to memory allocation
419 427 // failing, and it's normal for that to cause panic.
420 428 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
421 429 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
422 430 }
423 431
424 432 fn zstd_decompress_to_buffer(
425 433 bytes: &[u8],
426 434 buf: &mut Vec<u8>,
427 435 ) -> Result<usize, std::io::Error> {
428 436 ZSTD_DECODER
429 437 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
430 438 }
431 439
432 440 impl<'revlog> RevlogEntry<'revlog> {
433 441 pub fn revision(&self) -> Revision {
434 442 self.rev
435 443 }
436 444
437 445 pub fn node(&self) -> &Node {
438 446 &self.hash
439 447 }
440 448
441 449 pub fn uncompressed_len(&self) -> Option<u32> {
442 450 u32::try_from(self.uncompressed_len).ok()
443 451 }
444 452
445 453 pub fn has_p1(&self) -> bool {
446 454 self.p1 != NULL_REVISION
447 455 }
448 456
449 457 pub fn p1_entry(
450 458 &self,
451 459 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
452 460 if self.p1 == NULL_REVISION {
453 461 Ok(None)
454 462 } else {
455 463 Ok(Some(self.revlog.get_entry(self.p1)?))
456 464 }
457 465 }
458 466
459 467 pub fn p2_entry(
460 468 &self,
461 469 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
462 470 if self.p2 == NULL_REVISION {
463 471 Ok(None)
464 472 } else {
465 473 Ok(Some(self.revlog.get_entry(self.p2)?))
466 474 }
467 475 }
468 476
469 477 pub fn p1(&self) -> Option<Revision> {
470 478 if self.p1 == NULL_REVISION {
471 479 None
472 480 } else {
473 481 Some(self.p1)
474 482 }
475 483 }
476 484
477 485 pub fn p2(&self) -> Option<Revision> {
478 486 if self.p2 == NULL_REVISION {
479 487 None
480 488 } else {
481 489 Some(self.p2)
482 490 }
483 491 }
484 492
485 493 pub fn is_censored(&self) -> bool {
486 494 (self.flags & REVISION_FLAG_CENSORED) != 0
487 495 }
488 496
489 497 pub fn has_length_affecting_flag_processor(&self) -> bool {
490 498 // Relevant Python code: revlog.size()
491 499 // note: ELLIPSIS is known to not change the content
492 500 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
493 501 }
494 502
495 503 /// The data for this entry, after resolving deltas if any.
496 504 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
497 505 let mut entry = self.clone();
498 506 let mut delta_chain = vec![];
499 507
500 508 // The meaning of `base_rev_or_base_of_delta_chain` depends on
501 509 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
502 510 // `mercurial/revlogutils/constants.py` and the code in
503 511 // [_chaininfo] and in [index_deltachain].
504 512 let uses_generaldelta = self.revlog.index.uses_generaldelta();
505 513 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
506 514 let base_rev = if uses_generaldelta {
507 515 base_rev
508 516 } else {
509 517 entry.rev - 1
510 518 };
511 519 delta_chain.push(entry);
512 520 entry = self.revlog.get_entry_internal(base_rev)?;
513 521 }
514 522
515 523 let data = if delta_chain.is_empty() {
516 524 entry.data_chunk()?
517 525 } else {
518 526 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
519 527 };
520 528
521 529 Ok(data)
522 530 }
523 531
524 532 fn check_data(
525 533 &self,
526 534 data: Cow<'revlog, [u8]>,
527 535 ) -> Result<Cow<'revlog, [u8]>, HgError> {
528 536 if self.revlog.check_hash(
529 537 self.p1,
530 538 self.p2,
531 539 self.hash.as_bytes(),
532 540 &data,
533 541 ) {
534 542 Ok(data)
535 543 } else {
536 544 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
537 545 return Err(HgError::unsupported(
538 546 "ellipsis revisions are not supported by rhg",
539 547 ));
540 548 }
541 549 Err(corrupted(format!(
542 550 "hash check failed for revision {}",
543 551 self.rev
544 552 )))
545 553 }
546 554 }
547 555
548 556 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
549 557 let data = self.rawdata()?;
550 558 if self.is_censored() {
551 559 return Err(HgError::CensoredNodeError);
552 560 }
553 561 self.check_data(data)
554 562 }
555 563
556 564 /// Extract the data contained in the entry.
557 565 /// This may be a delta. (See `is_delta`.)
558 566 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
559 567 if self.bytes.is_empty() {
560 568 return Ok(Cow::Borrowed(&[]));
561 569 }
562 570 match self.bytes[0] {
563 571 // Revision data is the entirety of the entry, including this
564 572 // header.
565 573 b'\0' => Ok(Cow::Borrowed(self.bytes)),
566 574 // Raw revision data follows.
567 575 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
568 576 // zlib (RFC 1950) data.
569 577 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
570 578 // zstd data.
571 579 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
572 580 // A proper new format should have had a repo/store requirement.
573 581 format_type => Err(corrupted(format!(
574 582 "unknown compression header '{}'",
575 583 format_type
576 584 ))),
577 585 }
578 586 }
579 587
580 588 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
581 589 let mut decoder = ZlibDecoder::new(self.bytes);
582 590 if self.is_delta() {
583 591 let mut buf = Vec::with_capacity(self.compressed_len as usize);
584 592 decoder
585 593 .read_to_end(&mut buf)
586 594 .map_err(|e| corrupted(e.to_string()))?;
587 595 Ok(buf)
588 596 } else {
589 597 let cap = self.uncompressed_len.max(0) as usize;
590 598 let mut buf = vec![0; cap];
591 599 decoder
592 600 .read_exact(&mut buf)
593 601 .map_err(|e| corrupted(e.to_string()))?;
594 602 Ok(buf)
595 603 }
596 604 }
597 605
598 606 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
599 607 let cap = self.uncompressed_len.max(0) as usize;
600 608 if self.is_delta() {
601 609 // [cap] is usually an over-estimate of the space needed because
602 610 // it's the length of delta-decoded data, but we're interested
603 611 // in the size of the delta.
604 612 // This means we have to [shrink_to_fit] to avoid holding on
605 613 // to a large chunk of memory, but it also means we must have a
606 614 // fallback branch, for the case when the delta is longer than
607 615 // the original data (surprisingly, this does happen in practice)
608 616 let mut buf = Vec::with_capacity(cap);
609 617 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
610 618 Ok(_) => buf.shrink_to_fit(),
611 619 Err(_) => {
612 620 buf.clear();
613 621 zstd::stream::copy_decode(self.bytes, &mut buf)
614 622 .map_err(|e| corrupted(e.to_string()))?;
615 623 }
616 624 };
617 625 Ok(buf)
618 626 } else {
619 627 let mut buf = Vec::with_capacity(cap);
620 628 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
621 629 .map_err(|e| corrupted(e.to_string()))?;
622 630 if len != self.uncompressed_len as usize {
623 631 Err(corrupted("uncompressed length does not match"))
624 632 } else {
625 633 Ok(buf)
626 634 }
627 635 }
628 636 }
629 637
630 638 /// Tell if the entry is a snapshot or a delta
631 639 /// (influences on decompression).
632 640 fn is_delta(&self) -> bool {
633 641 self.base_rev_or_base_of_delta_chain.is_some()
634 642 }
635 643 }
636 644
637 645 /// Calculate the hash of a revision given its data and its parents.
638 646 fn hash(
639 647 data: &[u8],
640 648 p1_hash: &[u8],
641 649 p2_hash: &[u8],
642 650 ) -> [u8; NODE_BYTES_LENGTH] {
643 651 let mut hasher = Sha1::new();
644 652 let (a, b) = (p1_hash, p2_hash);
645 653 if a > b {
646 654 hasher.update(b);
647 655 hasher.update(a);
648 656 } else {
649 657 hasher.update(a);
650 658 hasher.update(b);
651 659 }
652 660 hasher.update(data);
653 661 *hasher.finalize().as_ref()
654 662 }
655 663
656 664 #[cfg(test)]
657 665 mod tests {
658 666 use super::*;
659 667 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
660 668 use itertools::Itertools;
661 669
662 670 #[test]
663 671 fn test_empty() {
664 672 let temp = tempfile::tempdir().unwrap();
665 673 let vfs = Vfs { base: temp.path() };
666 674 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
667 675 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
668 676 assert!(revlog.is_empty());
669 677 assert_eq!(revlog.len(), 0);
670 678 assert!(revlog.get_entry(0).is_err());
671 679 assert!(!revlog.has_rev(0));
672 680 }
673 681
674 682 #[test]
675 683 fn test_inline() {
676 684 let temp = tempfile::tempdir().unwrap();
677 685 let vfs = Vfs { base: temp.path() };
678 686 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
679 687 .unwrap();
680 688 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
681 689 .unwrap();
682 690 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
683 691 .unwrap();
684 692 let entry0_bytes = IndexEntryBuilder::new()
685 693 .is_first(true)
686 694 .with_version(1)
687 695 .with_inline(true)
688 696 .with_offset(INDEX_ENTRY_SIZE)
689 697 .with_node(node0)
690 698 .build();
691 699 let entry1_bytes = IndexEntryBuilder::new()
692 700 .with_offset(INDEX_ENTRY_SIZE)
693 701 .with_node(node1)
694 702 .build();
695 703 let entry2_bytes = IndexEntryBuilder::new()
696 704 .with_offset(INDEX_ENTRY_SIZE)
697 705 .with_p1(0)
698 706 .with_p2(1)
699 707 .with_node(node2)
700 708 .build();
701 709 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
702 710 .into_iter()
703 711 .flatten()
704 712 .collect_vec();
705 713 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
706 714 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
707 715
708 716 let entry0 = revlog.get_entry(0).ok().unwrap();
709 717 assert_eq!(entry0.revision(), 0);
710 718 assert_eq!(*entry0.node(), node0);
711 719 assert!(!entry0.has_p1());
712 720 assert_eq!(entry0.p1(), None);
713 721 assert_eq!(entry0.p2(), None);
714 722 let p1_entry = entry0.p1_entry().unwrap();
715 723 assert!(p1_entry.is_none());
716 724 let p2_entry = entry0.p2_entry().unwrap();
717 725 assert!(p2_entry.is_none());
718 726
719 727 let entry1 = revlog.get_entry(1).ok().unwrap();
720 728 assert_eq!(entry1.revision(), 1);
721 729 assert_eq!(*entry1.node(), node1);
722 730 assert!(!entry1.has_p1());
723 731 assert_eq!(entry1.p1(), None);
724 732 assert_eq!(entry1.p2(), None);
725 733 let p1_entry = entry1.p1_entry().unwrap();
726 734 assert!(p1_entry.is_none());
727 735 let p2_entry = entry1.p2_entry().unwrap();
728 736 assert!(p2_entry.is_none());
729 737
730 738 let entry2 = revlog.get_entry(2).ok().unwrap();
731 739 assert_eq!(entry2.revision(), 2);
732 740 assert_eq!(*entry2.node(), node2);
733 741 assert!(entry2.has_p1());
734 742 assert_eq!(entry2.p1(), Some(0));
735 743 assert_eq!(entry2.p2(), Some(1));
736 744 let p1_entry = entry2.p1_entry().unwrap();
737 745 assert!(p1_entry.is_some());
738 746 assert_eq!(p1_entry.unwrap().revision(), 0);
739 747 let p2_entry = entry2.p2_entry().unwrap();
740 748 assert!(p2_entry.is_some());
741 749 assert_eq!(p2_entry.unwrap().revision(), 1);
742 750 }
743 751 }
General Comments 0
You need to be logged in to leave comments. Login now