##// END OF EJS Templates
rust-revlog: add file IO helpers...
Raphaël Gomès -
r53052:426696af default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (535 lines changed) Show them Hide them
@@ -0,0 +1,535
1 //! Helpers for revlog file reading and writing.
2
3 use std::{
4 cell::RefCell,
5 fs::File,
6 io::{Read, Seek, SeekFrom, Write},
7 path::{Path, PathBuf},
8 sync::{Arc, Mutex},
9 };
10
11 use crate::{
12 errors::{HgError, IoResultExt},
13 vfs::Vfs,
14 };
15
16 /// Wraps accessing arbitrary chunks of data within a file and reusing handles.
17 /// This is currently useful for accessing a revlog's data file, only reading
18 /// the ranges that are currently relevant, like a sort of basic and manual
19 /// file-based mmap.
20 ///
21 /// XXX should this just be replaced with `mmap` + `madvise` ranges?
22 /// The upcoming `UncompressedChunkCache` will make up for most of the slowness
23 /// of re-reading the same chunks, so this might not be as useful. Aside from
24 /// the major benefit of having less code to take care of, using `mmap` will
25 /// allow multiple processes to share the same pages, especially for the
26 /// changelog and manifest, which would make a difference in server contexts.
27 pub struct RandomAccessFile {
28 /// The current store VFS to pass it to [`FileHandle`]
29 vfs: Box<dyn Vfs>,
30 /// Filename of the open file, relative to the vfs root
31 pub filename: PathBuf,
32 /// The current read-only handle on the file, if any
33 pub reading_handle: RefCell<Option<FileHandle>>,
34 /// The current read-write handle on the file, if any
35 pub writing_handle: RefCell<Option<FileHandle>>,
36 }
37
38 impl RandomAccessFile {
39 /// Wrap a file for random access
40 pub fn new(vfs: Box<dyn Vfs>, filename: PathBuf) -> Self {
41 assert!(filename.is_relative());
42 Self {
43 vfs,
44 filename,
45 reading_handle: RefCell::new(None),
46 writing_handle: RefCell::new(None),
47 }
48 }
49
50 /// Read a chunk of bytes from the file.
51 pub fn read_chunk(
52 &self,
53 offset: usize,
54 length: usize,
55 ) -> Result<Vec<u8>, HgError> {
56 let mut handle = self.get_read_handle()?;
57 handle
58 .seek(SeekFrom::Start(offset as u64))
59 .when_reading_file(&self.filename)?;
60 handle.read_exact(length).when_reading_file(&self.filename)
61 }
62
63 /// `pub` only for hg-cpython
64 #[doc(hidden)]
65 pub fn get_read_handle(&self) -> Result<FileHandle, HgError> {
66 if let Some(handle) = &*self.writing_handle.borrow() {
67 // Use a file handle being actively used for writes, if available.
68 // There is some danger to doing this because reads will seek the
69 // file.
70 // However, [`Revlog::write_entry`] performs a `SeekFrom::End(0)`
71 // before all writes, so we should be safe.
72 return Ok(handle.clone());
73 }
74 if let Some(handle) = &*self.reading_handle.borrow() {
75 return Ok(handle.clone());
76 }
77 // early returns done to work around borrowck being overzealous
78 // See https://github.com/rust-lang/rust/issues/103108
79 let new_handle = FileHandle::new(
80 dyn_clone::clone_box(&*self.vfs),
81 &self.filename,
82 false,
83 false,
84 )?;
85 *self.reading_handle.borrow_mut() = Some(new_handle.clone());
86 Ok(new_handle)
87 }
88
89 /// `pub` only for hg-cpython
90 #[doc(hidden)]
91 pub fn exit_reading_context(&self) {
92 self.reading_handle.take();
93 }
94
95 // Returns whether this file currently open
96 pub fn is_open(&self) -> bool {
97 self.reading_handle.borrow().is_some()
98 || self.writing_handle.borrow().is_some()
99 }
100 }
101
102 /// A buffer that holds new changelog index data that needs to be written
103 /// after the manifest and filelogs so that the repo is updated atomically to
104 /// external processes.
105 #[derive(Clone, Debug, Default)]
106 pub struct DelayedBuffer {
107 // The actual in-memory bytes storing the delayed writes
108 pub(super) buffer: Vec<u8>,
109 /// The current offset into the virtual file composed of file + buffer
110 offset: u64,
111 /// The size of the file at the time of opening
112 file_size: u64,
113 }
114
115 impl DelayedBuffer {
116 /// Returns the length of the full data (on-disk + buffer length).
117 pub fn len(&self) -> u64 {
118 self.buffer.len() as u64 + self.file_size
119 }
120
121 pub fn is_empty(&self) -> bool {
122 self.len() == 0
123 }
124 }
125
126 /// Holds an open [`File`] and the related data. This can be used for reading
127 /// and writing. Writes can be delayed to a buffer before touching the disk,
128 /// if relevant (in the changelog case), but reads are transparent.
129 pub struct FileHandle {
130 /// The actual open file
131 pub file: File,
132 /// The VFS with which the file was opened
133 vfs: Box<dyn Vfs>,
134 /// Filename of the open file, relative to the repo root
135 filename: PathBuf,
136 /// Buffer of delayed entry writes to the changelog index. This points
137 /// back to the buffer inside the revlog this handle refers to.
138 delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
139 }
140
141 impl std::fmt::Debug for FileHandle {
142 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143 f.debug_struct("FileHandle")
144 .field("filename", &self.filename)
145 .field("delayed_buffer", &self.delayed_buffer)
146 .field("file", &self.file)
147 .finish()
148 }
149 }
150
151 impl Clone for FileHandle {
152 fn clone(&self) -> Self {
153 Self {
154 vfs: dyn_clone::clone_box(&*self.vfs),
155 filename: self.filename.clone(),
156 delayed_buffer: self.delayed_buffer.clone(),
157 // This can only fail if the OS doesn't have the file handle
158 // anymore, so we're not going to do anything useful anyway.
159 file: self.file.try_clone().expect("couldn't clone file handle"),
160 }
161 }
162 }
163
164 impl FileHandle {
165 /// Get a (read or write) file handle to `filename`. Only creates the file
166 /// if `create` is `true`.
167 pub fn new(
168 vfs: Box<dyn Vfs>,
169 filename: impl AsRef<Path>,
170 create: bool,
171 write: bool,
172 ) -> Result<Self, HgError> {
173 let file = if create {
174 vfs.create(filename.as_ref())?
175 } else if write {
176 vfs.open(filename.as_ref())?
177 } else {
178 vfs.open_read(filename.as_ref())?
179 };
180 Ok(Self {
181 vfs,
182 filename: filename.as_ref().to_owned(),
183 delayed_buffer: None,
184 file,
185 })
186 }
187
188 /// Get a file handle to `filename`, but writes go to a [`DelayedBuffer`].
189 pub fn new_delayed(
190 vfs: Box<dyn Vfs>,
191 filename: impl AsRef<Path>,
192 create: bool,
193 delayed_buffer: Arc<Mutex<DelayedBuffer>>,
194 ) -> Result<Self, HgError> {
195 let mut file = if create {
196 vfs.create(filename.as_ref())?
197 } else {
198 vfs.open(filename.as_ref())?
199 };
200 let size = vfs.file_size(&file)?;
201 let offset = file
202 .stream_position()
203 .when_reading_file(filename.as_ref())?;
204
205 {
206 let mut buf = delayed_buffer.lock().unwrap();
207 buf.file_size = size;
208 buf.offset = offset;
209 }
210
211 Ok(Self {
212 vfs,
213 filename: filename.as_ref().to_owned(),
214 delayed_buffer: Some(delayed_buffer),
215 file,
216 })
217 }
218
219 /// Wrap an existing [`File`]
220 pub fn from_file(
221 file: File,
222 vfs: Box<dyn Vfs>,
223 filename: impl AsRef<Path>,
224 ) -> Self {
225 Self {
226 vfs,
227 filename: filename.as_ref().to_owned(),
228 delayed_buffer: None,
229 file,
230 }
231 }
232
233 /// Wrap an existing [`File`], but writes go to a [`DelayedBuffer`].
234 pub fn from_file_delayed(
235 mut file: File,
236 vfs: Box<dyn Vfs>,
237 filename: impl AsRef<Path>,
238 delayed_buffer: Arc<Mutex<DelayedBuffer>>,
239 ) -> Result<Self, HgError> {
240 let size = vfs.file_size(&file)?;
241 let offset = file
242 .stream_position()
243 .when_reading_file(filename.as_ref())?;
244
245 {
246 let mut buf = delayed_buffer.lock().unwrap();
247 buf.file_size = size;
248 buf.offset = offset;
249 }
250
251 Ok(Self {
252 vfs,
253 filename: filename.as_ref().to_owned(),
254 delayed_buffer: Some(delayed_buffer),
255 file,
256 })
257 }
258
259 /// Move the position of the handle to `pos`,
260 /// spanning the [`DelayedBuffer`] if defined. Will return an error if
261 /// an invalid seek position is asked, or for any standard io error.
262 pub fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
263 if let Some(delay_buf) = &self.delayed_buffer {
264 let mut delay_buf = delay_buf.lock().unwrap();
265 // Virtual file offset spans real file and data
266 match pos {
267 SeekFrom::Start(offset) => delay_buf.offset = offset,
268 SeekFrom::End(offset) => {
269 delay_buf.offset =
270 delay_buf.len().saturating_add_signed(offset)
271 }
272 SeekFrom::Current(offset) => {
273 delay_buf.offset =
274 delay_buf.offset.saturating_add_signed(offset);
275 }
276 }
277 if delay_buf.offset < delay_buf.file_size {
278 self.file.seek(pos)
279 } else {
280 Ok(delay_buf.offset)
281 }
282 } else {
283 self.file.seek(pos)
284 }
285 }
286
287 /// Read exactly `length` bytes from the current position.
288 /// Errors are the same as [`std::io::Read::read_exact`].
289 pub fn read_exact(
290 &mut self,
291 length: usize,
292 ) -> Result<Vec<u8>, std::io::Error> {
293 if let Some(delay_buf) = self.delayed_buffer.as_mut() {
294 let mut delay_buf = delay_buf.lock().unwrap();
295 let mut buf = vec![0; length];
296 let offset: isize =
297 delay_buf.offset.try_into().expect("buffer too large");
298 let file_size: isize =
299 delay_buf.file_size.try_into().expect("file too large");
300 let span: isize = offset - file_size;
301 let length = length.try_into().expect("too large of a length");
302 let absolute_span: u64 =
303 span.unsigned_abs().try_into().expect("length too large");
304 if span < 0 {
305 if length <= absolute_span {
306 // We're only in the file
307 self.file.read_exact(&mut buf)?;
308 } else {
309 // We're spanning file and buffer
310 self.file
311 .read_exact(&mut buf[..absolute_span as usize])?;
312 delay_buf
313 .buffer
314 .take(length - absolute_span)
315 .read_exact(&mut buf[absolute_span as usize..])?;
316 }
317 } else {
318 // We're only in the buffer
319 delay_buf.buffer[absolute_span as usize..]
320 .take(length)
321 .read_exact(&mut buf)?;
322 }
323 delay_buf.offset += length;
324 Ok(buf.to_owned())
325 } else {
326 let mut buf = vec![0; length];
327 self.file.read_exact(&mut buf)?;
328 Ok(buf)
329 }
330 }
331
332 /// Flush the in-memory changes to disk. This does *not* write the
333 /// delayed buffer, only the pending file changes.
334 pub fn flush(&mut self) -> Result<(), HgError> {
335 self.file.flush().when_writing_file(&self.filename)
336 }
337
338 /// Return the current position in the file
339 pub fn position(&mut self) -> Result<u64, HgError> {
340 self.file
341 .stream_position()
342 .when_reading_file(&self.filename)
343 }
344
345 /// Append `data` to the file, or to the [`DelayedBuffer`], if any.
346 pub fn write_all(&mut self, data: &[u8]) -> Result<(), HgError> {
347 if let Some(buf) = &mut self.delayed_buffer {
348 let mut delayed_buffer = buf.lock().expect("propagate the panic");
349 assert_eq!(delayed_buffer.offset, delayed_buffer.len());
350 delayed_buffer.buffer.extend_from_slice(data);
351 delayed_buffer.offset += data.len() as u64;
352 Ok(())
353 } else {
354 self.file
355 .write_all(data)
356 .when_writing_file(&self.filename)?;
357 Ok(())
358 }
359 }
360 }
361
362 /// Write handles to a given revlog (index + maybe data)
363 #[derive(Debug)]
364 pub struct WriteHandles {
365 /// Handle to the index file
366 pub index_handle: FileHandle,
367 /// Handle to the data file, if the revlog is non-inline
368 pub data_handle: Option<FileHandle>,
369 }
370
371 #[cfg(test)]
372 mod tests {
373 use std::io::ErrorKind;
374
375 use crate::vfs::VfsImpl;
376
377 use super::*;
378
379 #[test]
380 fn test_random_access_file() {
381 let base = tempfile::tempdir().unwrap().into_path();
382 let filename = Path::new("a");
383 let file_path = base.join(filename);
384 let raf = RandomAccessFile::new(
385 Box::new(VfsImpl { base }),
386 filename.to_owned(),
387 );
388
389 assert!(!raf.is_open());
390 assert_eq!(&raf.filename, &filename);
391 // Should fail to read a non-existing file
392 match raf.get_read_handle().unwrap_err() {
393 HgError::IoError { error, .. } => match error.kind() {
394 std::io::ErrorKind::NotFound => {}
395 _ => panic!("should be not found"),
396 },
397 e => panic!("{}", e.to_string()),
398 }
399
400 std::fs::write(file_path, b"1234567890").unwrap();
401
402 // Should be able to open an existing file
403 let mut handle = raf.get_read_handle().unwrap();
404 assert!(raf.is_open());
405 assert_eq!(handle.read_exact(10).unwrap(), b"1234567890".to_vec());
406 }
407
408 #[test]
409 fn test_file_handle() {
410 let base = tempfile::tempdir().unwrap().into_path();
411 let filename = base.join("a");
412 // No `create` should fail
413 FileHandle::new(
414 Box::new(VfsImpl { base: base.clone() }),
415 &filename,
416 false,
417 false,
418 )
419 .unwrap_err();
420 std::fs::write(&filename, b"1234567890").unwrap();
421
422 let mut read_handle = FileHandle::new(
423 Box::new(VfsImpl { base: base.clone() }),
424 &filename,
425 false,
426 false,
427 )
428 .unwrap();
429 assert_eq!(&read_handle.filename, &filename);
430 assert_eq!(read_handle.position().unwrap(), 0);
431
432 // Writing to an explicit read handle should fail
433 read_handle.write_all(b"some data").unwrap_err();
434
435 // reading exactly n bytes should work
436 assert_eq!(read_handle.read_exact(3).unwrap(), b"123".to_vec());
437 // and the position should be remembered
438 assert_eq!(read_handle.read_exact(2).unwrap(), b"45".to_vec());
439
440 // Seeking should work
441 let position = read_handle.position().unwrap();
442 read_handle.seek(SeekFrom::Current(-2)).unwrap();
443 assert_eq!(position - 2, read_handle.position().unwrap());
444
445 // Seeking too much data should fail
446 read_handle.read_exact(1000).unwrap_err();
447
448 // Work around the yet unimplemented VFS for write
449 let mut options = std::fs::OpenOptions::new();
450 options.read(true);
451 options.write(true);
452 let file = options.open(&filename).unwrap();
453 // Open a write handle
454 let mut handle = FileHandle::from_file(
455 file,
456 Box::new(VfsImpl { base: base.clone() }),
457 &filename,
458 );
459
460 // Now writing should succeed
461 handle.write_all(b"new data").unwrap();
462 // Opening or writing does not seek, so we should be at the start
463 assert_eq!(handle.position().unwrap(), 8);
464 // We can still read
465 assert_eq!(handle.read_exact(2).unwrap(), b"90".to_vec());
466 // Flushing doesn't do anything unexpected
467 handle.flush().unwrap();
468
469 let delayed_buffer = Arc::new(Mutex::new(DelayedBuffer::default()));
470 let file = options.open(&filename).unwrap();
471 let mut handle = FileHandle::from_file_delayed(
472 file,
473 Box::new(VfsImpl { base: base.clone() }),
474 &filename,
475 delayed_buffer,
476 )
477 .unwrap();
478
479 assert_eq!(
480 handle
481 .delayed_buffer
482 .as_ref()
483 .unwrap()
484 .lock()
485 .unwrap()
486 .file_size,
487 10
488 );
489 handle.seek(SeekFrom::End(0)).unwrap();
490 handle.write_all(b"should go to buffer").unwrap();
491 assert_eq!(
492 handle
493 .delayed_buffer
494 .as_ref()
495 .unwrap()
496 .lock()
497 .unwrap()
498 .len(),
499 29
500 );
501 read_handle.seek(SeekFrom::Start(0)).unwrap();
502 // On-disk file contents should be unchanged
503 assert_eq!(
504 read_handle.read_exact(10).unwrap(),
505 b"new data90".to_vec(),
506 );
507
508 assert_eq!(
509 read_handle.read_exact(1).unwrap_err().kind(),
510 ErrorKind::UnexpectedEof
511 );
512
513 handle.flush().unwrap();
514 // On-disk file contents should still be unchanged after a flush
515 assert_eq!(
516 read_handle.read_exact(1).unwrap_err().kind(),
517 ErrorKind::UnexpectedEof
518 );
519
520 // Read from the buffer only
521 handle.seek(SeekFrom::End(-1)).unwrap();
522 assert_eq!(handle.read_exact(1).unwrap(), b"r".to_vec());
523
524 // Read from an overlapping section of file and buffer
525 handle.seek(SeekFrom::Start(6)).unwrap();
526 assert_eq!(
527 handle.read_exact(20).unwrap(),
528 b"ta90should go to buf".to_vec()
529 );
530
531 // Read from file only
532 handle.seek(SeekFrom::Start(0)).unwrap();
533 assert_eq!(handle.read_exact(8).unwrap(), b"new data".to_vec());
534 }
535 }
@@ -1,1331 +1,1332
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 mod nodemap_docket;
10 mod nodemap_docket;
11 pub mod path_encode;
11 pub mod path_encode;
12 use compression::{uncompressed_zstd_data, CompressionConfig};
12 use compression::{uncompressed_zstd_data, CompressionConfig};
13 pub use node::{FromHexError, Node, NodePrefix};
13 pub use node::{FromHexError, Node, NodePrefix};
14 pub mod changelog;
14 pub mod changelog;
15 pub mod compression;
15 pub mod compression;
16 pub mod file_io;
16 pub mod filelog;
17 pub mod filelog;
17 pub mod index;
18 pub mod index;
18 pub mod manifest;
19 pub mod manifest;
19 pub mod patch;
20 pub mod patch;
20
21
21 use std::borrow::Cow;
22 use std::borrow::Cow;
22 use std::collections::HashSet;
23 use std::collections::HashSet;
23 use std::io::Read;
24 use std::io::Read;
24 use std::ops::Deref;
25 use std::ops::Deref;
25 use std::path::Path;
26 use std::path::Path;
26
27
27 use flate2::read::ZlibDecoder;
28 use flate2::read::ZlibDecoder;
28 use sha1::{Digest, Sha1};
29 use sha1::{Digest, Sha1};
29
30
30 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
31 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
31 use self::nodemap_docket::NodeMapDocket;
32 use self::nodemap_docket::NodeMapDocket;
32 use super::index::Index;
33 use super::index::Index;
33 use super::index::INDEX_ENTRY_SIZE;
34 use super::index::INDEX_ENTRY_SIZE;
34 use super::nodemap::{NodeMap, NodeMapError};
35 use super::nodemap::{NodeMap, NodeMapError};
35 use crate::config::{Config, ResourceProfileValue};
36 use crate::config::{Config, ResourceProfileValue};
36 use crate::errors::HgError;
37 use crate::errors::HgError;
37 use crate::exit_codes;
38 use crate::exit_codes;
38 use crate::requirements::{
39 use crate::requirements::{
39 GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
40 GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
40 };
41 };
41 use crate::vfs::VfsImpl;
42 use crate::vfs::VfsImpl;
42
43
43 /// As noted in revlog.c, revision numbers are actually encoded in
44 /// As noted in revlog.c, revision numbers are actually encoded in
44 /// 4 bytes, and are liberally converted to ints, whence the i32
45 /// 4 bytes, and are liberally converted to ints, whence the i32
45 pub type BaseRevision = i32;
46 pub type BaseRevision = i32;
46
47
47 /// Mercurial revision numbers
48 /// Mercurial revision numbers
48 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
49 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
49 /// in the sense that they should only be used for revisions that are
50 /// in the sense that they should only be used for revisions that are
50 /// valid for a given index (i.e. in bounds).
51 /// valid for a given index (i.e. in bounds).
51 #[derive(
52 #[derive(
52 Debug,
53 Debug,
53 derive_more::Display,
54 derive_more::Display,
54 Clone,
55 Clone,
55 Copy,
56 Copy,
56 Hash,
57 Hash,
57 PartialEq,
58 PartialEq,
58 Eq,
59 Eq,
59 PartialOrd,
60 PartialOrd,
60 Ord,
61 Ord,
61 )]
62 )]
62 pub struct Revision(pub BaseRevision);
63 pub struct Revision(pub BaseRevision);
63
64
64 impl format_bytes::DisplayBytes for Revision {
65 impl format_bytes::DisplayBytes for Revision {
65 fn display_bytes(
66 fn display_bytes(
66 &self,
67 &self,
67 output: &mut dyn std::io::Write,
68 output: &mut dyn std::io::Write,
68 ) -> std::io::Result<()> {
69 ) -> std::io::Result<()> {
69 self.0.display_bytes(output)
70 self.0.display_bytes(output)
70 }
71 }
71 }
72 }
72
73
73 /// Unchecked Mercurial revision numbers.
74 /// Unchecked Mercurial revision numbers.
74 ///
75 ///
75 /// Values of this type have no guarantee of being a valid revision number
76 /// Values of this type have no guarantee of being a valid revision number
76 /// in any context. Use method `check_revision` to get a valid revision within
77 /// in any context. Use method `check_revision` to get a valid revision within
77 /// the appropriate index object.
78 /// the appropriate index object.
78 #[derive(
79 #[derive(
79 Debug,
80 Debug,
80 derive_more::Display,
81 derive_more::Display,
81 Clone,
82 Clone,
82 Copy,
83 Copy,
83 Hash,
84 Hash,
84 PartialEq,
85 PartialEq,
85 Eq,
86 Eq,
86 PartialOrd,
87 PartialOrd,
87 Ord,
88 Ord,
88 )]
89 )]
89 pub struct UncheckedRevision(pub BaseRevision);
90 pub struct UncheckedRevision(pub BaseRevision);
90
91
91 impl format_bytes::DisplayBytes for UncheckedRevision {
92 impl format_bytes::DisplayBytes for UncheckedRevision {
92 fn display_bytes(
93 fn display_bytes(
93 &self,
94 &self,
94 output: &mut dyn std::io::Write,
95 output: &mut dyn std::io::Write,
95 ) -> std::io::Result<()> {
96 ) -> std::io::Result<()> {
96 self.0.display_bytes(output)
97 self.0.display_bytes(output)
97 }
98 }
98 }
99 }
99
100
100 impl From<Revision> for UncheckedRevision {
101 impl From<Revision> for UncheckedRevision {
101 fn from(value: Revision) -> Self {
102 fn from(value: Revision) -> Self {
102 Self(value.0)
103 Self(value.0)
103 }
104 }
104 }
105 }
105
106
106 impl From<BaseRevision> for UncheckedRevision {
107 impl From<BaseRevision> for UncheckedRevision {
107 fn from(value: BaseRevision) -> Self {
108 fn from(value: BaseRevision) -> Self {
108 Self(value)
109 Self(value)
109 }
110 }
110 }
111 }
111
112
112 /// Marker expressing the absence of a parent
113 /// Marker expressing the absence of a parent
113 ///
114 ///
114 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
115 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
115 /// to be smaller than all existing revisions.
116 /// to be smaller than all existing revisions.
116 pub const NULL_REVISION: Revision = Revision(-1);
117 pub const NULL_REVISION: Revision = Revision(-1);
117
118
118 /// Same as `mercurial.node.wdirrev`
119 /// Same as `mercurial.node.wdirrev`
119 ///
120 ///
120 /// This is also equal to `i32::max_value()`, but it's better to spell
121 /// This is also equal to `i32::max_value()`, but it's better to spell
121 /// it out explicitely, same as in `mercurial.node`
122 /// it out explicitely, same as in `mercurial.node`
122 #[allow(clippy::unreadable_literal)]
123 #[allow(clippy::unreadable_literal)]
123 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
124 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
124 UncheckedRevision(0x7fffffff);
125 UncheckedRevision(0x7fffffff);
125
126
126 pub const WORKING_DIRECTORY_HEX: &str =
127 pub const WORKING_DIRECTORY_HEX: &str =
127 "ffffffffffffffffffffffffffffffffffffffff";
128 "ffffffffffffffffffffffffffffffffffffffff";
128
129
129 /// The simplest expression of what we need of Mercurial DAGs.
130 /// The simplest expression of what we need of Mercurial DAGs.
130 pub trait Graph {
131 pub trait Graph {
131 /// Return the two parents of the given `Revision`.
132 /// Return the two parents of the given `Revision`.
132 ///
133 ///
133 /// Each of the parents can be independently `NULL_REVISION`
134 /// Each of the parents can be independently `NULL_REVISION`
134 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
135 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
135 }
136 }
136
137
137 #[derive(Clone, Debug, PartialEq)]
138 #[derive(Clone, Debug, PartialEq)]
138 pub enum GraphError {
139 pub enum GraphError {
139 ParentOutOfRange(Revision),
140 ParentOutOfRange(Revision),
140 }
141 }
141
142
142 impl std::fmt::Display for GraphError {
143 impl std::fmt::Display for GraphError {
143 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144 match self {
145 match self {
145 GraphError::ParentOutOfRange(revision) => {
146 GraphError::ParentOutOfRange(revision) => {
146 write!(f, "parent out of range ({})", revision)
147 write!(f, "parent out of range ({})", revision)
147 }
148 }
148 }
149 }
149 }
150 }
150 }
151 }
151
152
152 impl<T: Graph> Graph for &T {
153 impl<T: Graph> Graph for &T {
153 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
154 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
154 (*self).parents(rev)
155 (*self).parents(rev)
155 }
156 }
156 }
157 }
157
158
158 /// The Mercurial Revlog Index
159 /// The Mercurial Revlog Index
159 ///
160 ///
160 /// This is currently limited to the minimal interface that is needed for
161 /// This is currently limited to the minimal interface that is needed for
161 /// the [`nodemap`](nodemap/index.html) module
162 /// the [`nodemap`](nodemap/index.html) module
162 pub trait RevlogIndex {
163 pub trait RevlogIndex {
163 /// Total number of Revisions referenced in this index
164 /// Total number of Revisions referenced in this index
164 fn len(&self) -> usize;
165 fn len(&self) -> usize;
165
166
166 fn is_empty(&self) -> bool {
167 fn is_empty(&self) -> bool {
167 self.len() == 0
168 self.len() == 0
168 }
169 }
169
170
170 /// Return a reference to the Node or `None` for `NULL_REVISION`
171 /// Return a reference to the Node or `None` for `NULL_REVISION`
171 fn node(&self, rev: Revision) -> Option<&Node>;
172 fn node(&self, rev: Revision) -> Option<&Node>;
172
173
173 /// Return a [`Revision`] if `rev` is a valid revision number for this
174 /// Return a [`Revision`] if `rev` is a valid revision number for this
174 /// index.
175 /// index.
175 ///
176 ///
176 /// [`NULL_REVISION`] is considered to be valid.
177 /// [`NULL_REVISION`] is considered to be valid.
177 #[inline(always)]
178 #[inline(always)]
178 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
179 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
179 let rev = rev.0;
180 let rev = rev.0;
180
181
181 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
182 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
182 {
183 {
183 Some(Revision(rev))
184 Some(Revision(rev))
184 } else {
185 } else {
185 None
186 None
186 }
187 }
187 }
188 }
188 }
189 }
189
190
190 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
191 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
191 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
192 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
192 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
193 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
193 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
194 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
194
195
195 // Keep this in sync with REVIDX_KNOWN_FLAGS in
196 // Keep this in sync with REVIDX_KNOWN_FLAGS in
196 // mercurial/revlogutils/flagutil.py
197 // mercurial/revlogutils/flagutil.py
197 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
198 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
198 | REVISION_FLAG_ELLIPSIS
199 | REVISION_FLAG_ELLIPSIS
199 | REVISION_FLAG_EXTSTORED
200 | REVISION_FLAG_EXTSTORED
200 | REVISION_FLAG_HASCOPIESINFO;
201 | REVISION_FLAG_HASCOPIESINFO;
201
202
202 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
203 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
203
204
204 #[derive(Debug, derive_more::From, derive_more::Display)]
205 #[derive(Debug, derive_more::From, derive_more::Display)]
205 pub enum RevlogError {
206 pub enum RevlogError {
206 #[display(fmt = "invalid revision identifier: {}", "_0")]
207 #[display(fmt = "invalid revision identifier: {}", "_0")]
207 InvalidRevision(String),
208 InvalidRevision(String),
208 /// Working directory is not supported
209 /// Working directory is not supported
209 WDirUnsupported,
210 WDirUnsupported,
210 /// Found more than one entry whose ID match the requested prefix
211 /// Found more than one entry whose ID match the requested prefix
211 AmbiguousPrefix,
212 AmbiguousPrefix,
212 #[from]
213 #[from]
213 Other(HgError),
214 Other(HgError),
214 }
215 }
215
216
216 impl From<NodeMapError> for RevlogError {
217 impl From<NodeMapError> for RevlogError {
217 fn from(error: NodeMapError) -> Self {
218 fn from(error: NodeMapError) -> Self {
218 match error {
219 match error {
219 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
220 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
220 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
221 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
221 format!("nodemap point to revision {} not in index", rev),
222 format!("nodemap point to revision {} not in index", rev),
222 ),
223 ),
223 }
224 }
224 }
225 }
225 }
226 }
226
227
227 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
228 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
228 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
229 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
229 }
230 }
230
231
231 impl RevlogError {
232 impl RevlogError {
232 fn corrupted<S: AsRef<str>>(context: S) -> Self {
233 fn corrupted<S: AsRef<str>>(context: S) -> Self {
233 RevlogError::Other(corrupted(context))
234 RevlogError::Other(corrupted(context))
234 }
235 }
235 }
236 }
236
237
237 #[derive(derive_more::Display, Debug, Copy, Clone, PartialEq, Eq)]
238 #[derive(derive_more::Display, Debug, Copy, Clone, PartialEq, Eq)]
238 pub enum RevlogType {
239 pub enum RevlogType {
239 Changelog,
240 Changelog,
240 Manifestlog,
241 Manifestlog,
241 Filelog,
242 Filelog,
242 }
243 }
243
244
244 impl TryFrom<usize> for RevlogType {
245 impl TryFrom<usize> for RevlogType {
245 type Error = HgError;
246 type Error = HgError;
246
247
247 fn try_from(value: usize) -> Result<Self, Self::Error> {
248 fn try_from(value: usize) -> Result<Self, Self::Error> {
248 match value {
249 match value {
249 1001 => Ok(Self::Changelog),
250 1001 => Ok(Self::Changelog),
250 1002 => Ok(Self::Manifestlog),
251 1002 => Ok(Self::Manifestlog),
251 1003 => Ok(Self::Filelog),
252 1003 => Ok(Self::Filelog),
252 t => Err(HgError::abort(
253 t => Err(HgError::abort(
253 format!("Unknown revlog type {}", t),
254 format!("Unknown revlog type {}", t),
254 exit_codes::ABORT,
255 exit_codes::ABORT,
255 None,
256 None,
256 )),
257 )),
257 }
258 }
258 }
259 }
259 }
260 }
260
261
261 #[derive(Debug, Clone, Copy, PartialEq)]
262 #[derive(Debug, Clone, Copy, PartialEq)]
262 /// Holds configuration values about how the revlog data is read
263 /// Holds configuration values about how the revlog data is read
263 pub struct RevlogDataConfig {
264 pub struct RevlogDataConfig {
264 /// Should we try to open the "pending" version of the revlog
265 /// Should we try to open the "pending" version of the revlog
265 pub try_pending: bool,
266 pub try_pending: bool,
266 /// Should we try to open the "split" version of the revlog
267 /// Should we try to open the "split" version of the revlog
267 pub try_split: bool,
268 pub try_split: bool,
268 /// When True, `indexfile` should be opened with `checkambig=True` at
269 /// When True, `indexfile` should be opened with `checkambig=True` at
269 /// writing time, to avoid file stat ambiguity
270 /// writing time, to avoid file stat ambiguity
270 pub check_ambig: bool,
271 pub check_ambig: bool,
271 /// If true, use mmap instead of reading to deal with large indexes
272 /// If true, use mmap instead of reading to deal with large indexes
272 pub mmap_large_index: bool,
273 pub mmap_large_index: bool,
273 /// How much data is considered large
274 /// How much data is considered large
274 pub mmap_index_threshold: Option<u64>,
275 pub mmap_index_threshold: Option<u64>,
275 /// How much data to read and cache into the raw revlog data cache
276 /// How much data to read and cache into the raw revlog data cache
276 pub chunk_cache_size: u64,
277 pub chunk_cache_size: u64,
277 /// The size of the uncompressed cache compared to the largest revision
278 /// The size of the uncompressed cache compared to the largest revision
278 /// seen
279 /// seen
279 pub uncompressed_cache_factor: Option<f64>,
280 pub uncompressed_cache_factor: Option<f64>,
280 /// The number of chunks cached
281 /// The number of chunks cached
281 pub uncompressed_cache_count: Option<u64>,
282 pub uncompressed_cache_count: Option<u64>,
282 /// Allow sparse reading of the revlog data
283 /// Allow sparse reading of the revlog data
283 pub with_sparse_read: bool,
284 pub with_sparse_read: bool,
284 /// Minimal density of a sparse read chunk
285 /// Minimal density of a sparse read chunk
285 pub sr_density_threshold: f64,
286 pub sr_density_threshold: f64,
286 /// Minimal size of the data we skip when performing sparse reads
287 /// Minimal size of the data we skip when performing sparse reads
287 pub sr_min_gap_size: u64,
288 pub sr_min_gap_size: u64,
288 /// Whether deltas are encoded against arbitrary bases
289 /// Whether deltas are encoded against arbitrary bases
289 pub general_delta: bool,
290 pub general_delta: bool,
290 }
291 }
291
292
292 impl RevlogDataConfig {
293 impl RevlogDataConfig {
293 pub fn new(
294 pub fn new(
294 config: &Config,
295 config: &Config,
295 requirements: &HashSet<String>,
296 requirements: &HashSet<String>,
296 ) -> Result<Self, HgError> {
297 ) -> Result<Self, HgError> {
297 let mut data_config = Self::default();
298 let mut data_config = Self::default();
298 if let Some(chunk_cache_size) =
299 if let Some(chunk_cache_size) =
299 config.get_byte_size(b"format", b"chunkcachesize")?
300 config.get_byte_size(b"format", b"chunkcachesize")?
300 {
301 {
301 data_config.chunk_cache_size = chunk_cache_size;
302 data_config.chunk_cache_size = chunk_cache_size;
302 }
303 }
303
304
304 let memory_profile = config.get_resource_profile(Some("memory"));
305 let memory_profile = config.get_resource_profile(Some("memory"));
305 if memory_profile.value >= ResourceProfileValue::Medium {
306 if memory_profile.value >= ResourceProfileValue::Medium {
306 data_config.uncompressed_cache_count = Some(10_000);
307 data_config.uncompressed_cache_count = Some(10_000);
307 data_config.uncompressed_cache_factor = Some(4.0);
308 data_config.uncompressed_cache_factor = Some(4.0);
308 if memory_profile.value >= ResourceProfileValue::High {
309 if memory_profile.value >= ResourceProfileValue::High {
309 data_config.uncompressed_cache_factor = Some(10.0)
310 data_config.uncompressed_cache_factor = Some(10.0)
310 }
311 }
311 }
312 }
312
313
313 if let Some(mmap_index_threshold) = config
314 if let Some(mmap_index_threshold) = config
314 .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
315 .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
315 {
316 {
316 data_config.mmap_index_threshold = Some(mmap_index_threshold);
317 data_config.mmap_index_threshold = Some(mmap_index_threshold);
317 }
318 }
318
319
319 let with_sparse_read =
320 let with_sparse_read =
320 config.get_bool(b"experimental", b"sparse-read")?;
321 config.get_bool(b"experimental", b"sparse-read")?;
321 if let Some(sr_density_threshold) = config
322 if let Some(sr_density_threshold) = config
322 .get_f64(b"experimental", b"sparse-read.density-threshold")?
323 .get_f64(b"experimental", b"sparse-read.density-threshold")?
323 {
324 {
324 data_config.sr_density_threshold = sr_density_threshold;
325 data_config.sr_density_threshold = sr_density_threshold;
325 }
326 }
326 data_config.with_sparse_read = with_sparse_read;
327 data_config.with_sparse_read = with_sparse_read;
327 if let Some(sr_min_gap_size) = config
328 if let Some(sr_min_gap_size) = config
328 .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
329 .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
329 {
330 {
330 data_config.sr_min_gap_size = sr_min_gap_size;
331 data_config.sr_min_gap_size = sr_min_gap_size;
331 }
332 }
332
333
333 data_config.with_sparse_read =
334 data_config.with_sparse_read =
334 requirements.contains(SPARSEREVLOG_REQUIREMENT);
335 requirements.contains(SPARSEREVLOG_REQUIREMENT);
335
336
336 Ok(data_config)
337 Ok(data_config)
337 }
338 }
338 }
339 }
339
340
340 impl Default for RevlogDataConfig {
341 impl Default for RevlogDataConfig {
341 fn default() -> Self {
342 fn default() -> Self {
342 Self {
343 Self {
343 chunk_cache_size: 65536,
344 chunk_cache_size: 65536,
344 sr_density_threshold: 0.50,
345 sr_density_threshold: 0.50,
345 sr_min_gap_size: 262144,
346 sr_min_gap_size: 262144,
346 try_pending: Default::default(),
347 try_pending: Default::default(),
347 try_split: Default::default(),
348 try_split: Default::default(),
348 check_ambig: Default::default(),
349 check_ambig: Default::default(),
349 mmap_large_index: Default::default(),
350 mmap_large_index: Default::default(),
350 mmap_index_threshold: Default::default(),
351 mmap_index_threshold: Default::default(),
351 uncompressed_cache_factor: Default::default(),
352 uncompressed_cache_factor: Default::default(),
352 uncompressed_cache_count: Default::default(),
353 uncompressed_cache_count: Default::default(),
353 with_sparse_read: Default::default(),
354 with_sparse_read: Default::default(),
354 general_delta: Default::default(),
355 general_delta: Default::default(),
355 }
356 }
356 }
357 }
357 }
358 }
358
359
359 #[derive(Debug, Clone, Copy, PartialEq)]
360 #[derive(Debug, Clone, Copy, PartialEq)]
360 /// Holds configuration values about how new deltas are computed.
361 /// Holds configuration values about how new deltas are computed.
361 ///
362 ///
362 /// Some attributes are duplicated from [`RevlogDataConfig`] to help having
363 /// Some attributes are duplicated from [`RevlogDataConfig`] to help having
363 /// each object self contained.
364 /// each object self contained.
364 pub struct RevlogDeltaConfig {
365 pub struct RevlogDeltaConfig {
365 /// Whether deltas can be encoded against arbitrary bases
366 /// Whether deltas can be encoded against arbitrary bases
366 pub general_delta: bool,
367 pub general_delta: bool,
367 /// Allow sparse writing of the revlog data
368 /// Allow sparse writing of the revlog data
368 pub sparse_revlog: bool,
369 pub sparse_revlog: bool,
369 /// Maximum length of a delta chain
370 /// Maximum length of a delta chain
370 pub max_chain_len: Option<u64>,
371 pub max_chain_len: Option<u64>,
371 /// Maximum distance between a delta chain's start and end
372 /// Maximum distance between a delta chain's start and end
372 pub max_deltachain_span: Option<u64>,
373 pub max_deltachain_span: Option<u64>,
373 /// If `upper_bound_comp` is not None, this is the expected maximal
374 /// If `upper_bound_comp` is not None, this is the expected maximal
374 /// gain from compression for the data content
375 /// gain from compression for the data content
375 pub upper_bound_comp: Option<f64>,
376 pub upper_bound_comp: Option<f64>,
376 /// Should we try a delta against both parents
377 /// Should we try a delta against both parents
377 pub delta_both_parents: bool,
378 pub delta_both_parents: bool,
378 /// Test delta base candidate groups by chunks of this maximal size
379 /// Test delta base candidate groups by chunks of this maximal size
379 pub candidate_group_chunk_size: u64,
380 pub candidate_group_chunk_size: u64,
380 /// Should we display debug information about delta computation
381 /// Should we display debug information about delta computation
381 pub debug_delta: bool,
382 pub debug_delta: bool,
382 /// Trust incoming deltas by default
383 /// Trust incoming deltas by default
383 pub lazy_delta: bool,
384 pub lazy_delta: bool,
384 /// Trust the base of incoming deltas by default
385 /// Trust the base of incoming deltas by default
385 pub lazy_delta_base: bool,
386 pub lazy_delta_base: bool,
386 }
387 }
387 impl RevlogDeltaConfig {
388 impl RevlogDeltaConfig {
388 pub fn new(
389 pub fn new(
389 config: &Config,
390 config: &Config,
390 requirements: &HashSet<String>,
391 requirements: &HashSet<String>,
391 revlog_type: RevlogType,
392 revlog_type: RevlogType,
392 ) -> Result<Self, HgError> {
393 ) -> Result<Self, HgError> {
393 let mut delta_config = Self {
394 let mut delta_config = Self {
394 delta_both_parents: config
395 delta_both_parents: config
395 .get_option_no_default(
396 .get_option_no_default(
396 b"storage",
397 b"storage",
397 b"revlog.optimize-delta-parent-choice",
398 b"revlog.optimize-delta-parent-choice",
398 )?
399 )?
399 .unwrap_or(true),
400 .unwrap_or(true),
400 candidate_group_chunk_size: config
401 candidate_group_chunk_size: config
401 .get_u64(
402 .get_u64(
402 b"storage",
403 b"storage",
403 b"revlog.delta-parent-search.candidate-group-chunk-size",
404 b"revlog.delta-parent-search.candidate-group-chunk-size",
404 )?
405 )?
405 .unwrap_or_default(),
406 .unwrap_or_default(),
406 ..Default::default()
407 ..Default::default()
407 };
408 };
408
409
409 delta_config.debug_delta =
410 delta_config.debug_delta =
410 config.get_bool(b"debug", b"revlog.debug-delta")?;
411 config.get_bool(b"debug", b"revlog.debug-delta")?;
411
412
412 delta_config.general_delta =
413 delta_config.general_delta =
413 requirements.contains(GENERALDELTA_REQUIREMENT);
414 requirements.contains(GENERALDELTA_REQUIREMENT);
414
415
415 let lazy_delta =
416 let lazy_delta =
416 config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
417 config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
417
418
418 if revlog_type == RevlogType::Manifestlog {
419 if revlog_type == RevlogType::Manifestlog {
419 // upper bound of what we expect from compression
420 // upper bound of what we expect from compression
420 // (real life value seems to be 3)
421 // (real life value seems to be 3)
421 delta_config.upper_bound_comp = Some(3.0)
422 delta_config.upper_bound_comp = Some(3.0)
422 }
423 }
423
424
424 let mut lazy_delta_base = false;
425 let mut lazy_delta_base = false;
425 if lazy_delta {
426 if lazy_delta {
426 lazy_delta_base = match config.get_option_no_default(
427 lazy_delta_base = match config.get_option_no_default(
427 b"storage",
428 b"storage",
428 b"revlog.reuse-external-delta-parent",
429 b"revlog.reuse-external-delta-parent",
429 )? {
430 )? {
430 Some(base) => base,
431 Some(base) => base,
431 None => config.get_bool(b"format", b"generaldelta")?,
432 None => config.get_bool(b"format", b"generaldelta")?,
432 };
433 };
433 }
434 }
434 delta_config.lazy_delta = lazy_delta;
435 delta_config.lazy_delta = lazy_delta;
435 delta_config.lazy_delta_base = lazy_delta_base;
436 delta_config.lazy_delta_base = lazy_delta_base;
436
437
437 delta_config.max_deltachain_span =
438 delta_config.max_deltachain_span =
438 match config.get_i64(b"experimental", b"maxdeltachainspan")? {
439 match config.get_i64(b"experimental", b"maxdeltachainspan")? {
439 Some(span) => {
440 Some(span) => {
440 if span < 0 {
441 if span < 0 {
441 None
442 None
442 } else {
443 } else {
443 Some(span as u64)
444 Some(span as u64)
444 }
445 }
445 }
446 }
446 None => None,
447 None => None,
447 };
448 };
448
449
449 delta_config.sparse_revlog =
450 delta_config.sparse_revlog =
450 requirements.contains(SPARSEREVLOG_REQUIREMENT);
451 requirements.contains(SPARSEREVLOG_REQUIREMENT);
451
452
452 delta_config.max_chain_len =
453 delta_config.max_chain_len =
453 config.get_byte_size_no_default(b"format", b"maxchainlen")?;
454 config.get_byte_size_no_default(b"format", b"maxchainlen")?;
454
455
455 Ok(delta_config)
456 Ok(delta_config)
456 }
457 }
457 }
458 }
458
459
459 impl Default for RevlogDeltaConfig {
460 impl Default for RevlogDeltaConfig {
460 fn default() -> Self {
461 fn default() -> Self {
461 Self {
462 Self {
462 delta_both_parents: true,
463 delta_both_parents: true,
463 lazy_delta: true,
464 lazy_delta: true,
464 general_delta: Default::default(),
465 general_delta: Default::default(),
465 sparse_revlog: Default::default(),
466 sparse_revlog: Default::default(),
466 max_chain_len: Default::default(),
467 max_chain_len: Default::default(),
467 max_deltachain_span: Default::default(),
468 max_deltachain_span: Default::default(),
468 upper_bound_comp: Default::default(),
469 upper_bound_comp: Default::default(),
469 candidate_group_chunk_size: Default::default(),
470 candidate_group_chunk_size: Default::default(),
470 debug_delta: Default::default(),
471 debug_delta: Default::default(),
471 lazy_delta_base: Default::default(),
472 lazy_delta_base: Default::default(),
472 }
473 }
473 }
474 }
474 }
475 }
475
476
476 #[derive(Debug, Default, Clone, Copy, PartialEq)]
477 #[derive(Debug, Default, Clone, Copy, PartialEq)]
477 /// Holds configuration values about the available revlog features
478 /// Holds configuration values about the available revlog features
478 pub struct RevlogFeatureConfig {
479 pub struct RevlogFeatureConfig {
479 /// The compression engine and its options
480 /// The compression engine and its options
480 pub compression_engine: CompressionConfig,
481 pub compression_engine: CompressionConfig,
481 /// Can we use censor on this revlog
482 /// Can we use censor on this revlog
482 pub censorable: bool,
483 pub censorable: bool,
483 /// Does this revlog use the "side data" feature
484 /// Does this revlog use the "side data" feature
484 pub has_side_data: bool,
485 pub has_side_data: bool,
485 /// Might remove this configuration once the rank computation has no
486 /// Might remove this configuration once the rank computation has no
486 /// impact
487 /// impact
487 pub compute_rank: bool,
488 pub compute_rank: bool,
488 /// Parent order is supposed to be semantically irrelevant, so we
489 /// Parent order is supposed to be semantically irrelevant, so we
489 /// normally re-sort parents to ensure that the first parent is non-null,
490 /// normally re-sort parents to ensure that the first parent is non-null,
490 /// if there is a non-null parent at all.
491 /// if there is a non-null parent at all.
491 /// filelog abuses the parent order as a flag to mark some instances of
492 /// filelog abuses the parent order as a flag to mark some instances of
492 /// meta-encoded files, so allow it to disable this behavior.
493 /// meta-encoded files, so allow it to disable this behavior.
493 pub canonical_parent_order: bool,
494 pub canonical_parent_order: bool,
494 /// Can ellipsis commit be used
495 /// Can ellipsis commit be used
495 pub enable_ellipsis: bool,
496 pub enable_ellipsis: bool,
496 }
497 }
497 impl RevlogFeatureConfig {
498 impl RevlogFeatureConfig {
498 pub fn new(
499 pub fn new(
499 config: &Config,
500 config: &Config,
500 requirements: &HashSet<String>,
501 requirements: &HashSet<String>,
501 ) -> Result<Self, HgError> {
502 ) -> Result<Self, HgError> {
502 Ok(Self {
503 Ok(Self {
503 compression_engine: CompressionConfig::new(config, requirements)?,
504 compression_engine: CompressionConfig::new(config, requirements)?,
504 enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
505 enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
505 ..Default::default()
506 ..Default::default()
506 })
507 })
507 }
508 }
508 }
509 }
509
510
510 /// Read only implementation of revlog.
511 /// Read only implementation of revlog.
511 pub struct Revlog {
512 pub struct Revlog {
512 /// When index and data are not interleaved: bytes of the revlog index.
513 /// When index and data are not interleaved: bytes of the revlog index.
513 /// When index and data are interleaved: bytes of the revlog index and
514 /// When index and data are interleaved: bytes of the revlog index and
514 /// data.
515 /// data.
515 index: Index,
516 index: Index,
516 /// When index and data are not interleaved: bytes of the revlog data
517 /// When index and data are not interleaved: bytes of the revlog data
517 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
518 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
518 /// When present on disk: the persistent nodemap for this revlog
519 /// When present on disk: the persistent nodemap for this revlog
519 nodemap: Option<nodemap::NodeTree>,
520 nodemap: Option<nodemap::NodeTree>,
520 }
521 }
521
522
522 impl Graph for Revlog {
523 impl Graph for Revlog {
523 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
524 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
524 self.index.parents(rev)
525 self.index.parents(rev)
525 }
526 }
526 }
527 }
527
528
528 #[derive(Debug, Copy, Clone, PartialEq)]
529 #[derive(Debug, Copy, Clone, PartialEq)]
529 pub enum RevlogVersionOptions {
530 pub enum RevlogVersionOptions {
530 V0,
531 V0,
531 V1 { general_delta: bool, inline: bool },
532 V1 { general_delta: bool, inline: bool },
532 V2,
533 V2,
533 ChangelogV2 { compute_rank: bool },
534 ChangelogV2 { compute_rank: bool },
534 }
535 }
535
536
536 /// Options to govern how a revlog should be opened, usually from the
537 /// Options to govern how a revlog should be opened, usually from the
537 /// repository configuration or requirements.
538 /// repository configuration or requirements.
538 #[derive(Debug, Copy, Clone)]
539 #[derive(Debug, Copy, Clone)]
539 pub struct RevlogOpenOptions {
540 pub struct RevlogOpenOptions {
540 /// The revlog version, along with any option specific to this version
541 /// The revlog version, along with any option specific to this version
541 pub version: RevlogVersionOptions,
542 pub version: RevlogVersionOptions,
542 /// Whether the revlog uses a persistent nodemap.
543 /// Whether the revlog uses a persistent nodemap.
543 pub use_nodemap: bool,
544 pub use_nodemap: bool,
544 pub delta_config: RevlogDeltaConfig,
545 pub delta_config: RevlogDeltaConfig,
545 pub data_config: RevlogDataConfig,
546 pub data_config: RevlogDataConfig,
546 pub feature_config: RevlogFeatureConfig,
547 pub feature_config: RevlogFeatureConfig,
547 }
548 }
548
549
549 #[cfg(test)]
550 #[cfg(test)]
550 impl Default for RevlogOpenOptions {
551 impl Default for RevlogOpenOptions {
551 fn default() -> Self {
552 fn default() -> Self {
552 Self {
553 Self {
553 version: RevlogVersionOptions::V1 {
554 version: RevlogVersionOptions::V1 {
554 general_delta: true,
555 general_delta: true,
555 inline: false,
556 inline: false,
556 },
557 },
557 use_nodemap: true,
558 use_nodemap: true,
558 data_config: Default::default(),
559 data_config: Default::default(),
559 delta_config: Default::default(),
560 delta_config: Default::default(),
560 feature_config: Default::default(),
561 feature_config: Default::default(),
561 }
562 }
562 }
563 }
563 }
564 }
564
565
565 impl RevlogOpenOptions {
566 impl RevlogOpenOptions {
566 pub fn new(
567 pub fn new(
567 inline: bool,
568 inline: bool,
568 data_config: RevlogDataConfig,
569 data_config: RevlogDataConfig,
569 delta_config: RevlogDeltaConfig,
570 delta_config: RevlogDeltaConfig,
570 feature_config: RevlogFeatureConfig,
571 feature_config: RevlogFeatureConfig,
571 ) -> Self {
572 ) -> Self {
572 Self {
573 Self {
573 version: RevlogVersionOptions::V1 {
574 version: RevlogVersionOptions::V1 {
574 general_delta: data_config.general_delta,
575 general_delta: data_config.general_delta,
575 inline,
576 inline,
576 },
577 },
577 use_nodemap: false,
578 use_nodemap: false,
578 data_config,
579 data_config,
579 delta_config,
580 delta_config,
580 feature_config,
581 feature_config,
581 }
582 }
582 }
583 }
583
584
584 pub fn index_header(&self) -> index::IndexHeader {
585 pub fn index_header(&self) -> index::IndexHeader {
585 index::IndexHeader {
586 index::IndexHeader {
586 header_bytes: match self.version {
587 header_bytes: match self.version {
587 RevlogVersionOptions::V0 => [0, 0, 0, 0],
588 RevlogVersionOptions::V0 => [0, 0, 0, 0],
588 RevlogVersionOptions::V1 {
589 RevlogVersionOptions::V1 {
589 general_delta,
590 general_delta,
590 inline,
591 inline,
591 } => [
592 } => [
592 0,
593 0,
593 if general_delta && inline {
594 if general_delta && inline {
594 3
595 3
595 } else if general_delta {
596 } else if general_delta {
596 2
597 2
597 } else {
598 } else {
598 u8::from(inline)
599 u8::from(inline)
599 },
600 },
600 0,
601 0,
601 1,
602 1,
602 ],
603 ],
603 RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
604 RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
604 RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
605 RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
605 0xD34Du32.to_be_bytes()
606 0xD34Du32.to_be_bytes()
606 }
607 }
607 },
608 },
608 }
609 }
609 }
610 }
610 }
611 }
611
612
612 impl Revlog {
613 impl Revlog {
613 /// Open a revlog index file.
614 /// Open a revlog index file.
614 ///
615 ///
615 /// It will also open the associated data file if index and data are not
616 /// It will also open the associated data file if index and data are not
616 /// interleaved.
617 /// interleaved.
617 pub fn open(
618 pub fn open(
618 // Todo use the `Vfs` trait here once we create a function for mmap
619 // Todo use the `Vfs` trait here once we create a function for mmap
619 store_vfs: &VfsImpl,
620 store_vfs: &VfsImpl,
620 index_path: impl AsRef<Path>,
621 index_path: impl AsRef<Path>,
621 data_path: Option<&Path>,
622 data_path: Option<&Path>,
622 options: RevlogOpenOptions,
623 options: RevlogOpenOptions,
623 ) -> Result<Self, HgError> {
624 ) -> Result<Self, HgError> {
624 Self::open_gen(store_vfs, index_path, data_path, options, None)
625 Self::open_gen(store_vfs, index_path, data_path, options, None)
625 }
626 }
626
627
627 fn open_gen(
628 fn open_gen(
628 // Todo use the `Vfs` trait here once we create a function for mmap
629 // Todo use the `Vfs` trait here once we create a function for mmap
629 store_vfs: &VfsImpl,
630 store_vfs: &VfsImpl,
630 index_path: impl AsRef<Path>,
631 index_path: impl AsRef<Path>,
631 data_path: Option<&Path>,
632 data_path: Option<&Path>,
632 options: RevlogOpenOptions,
633 options: RevlogOpenOptions,
633 nodemap_for_test: Option<nodemap::NodeTree>,
634 nodemap_for_test: Option<nodemap::NodeTree>,
634 ) -> Result<Self, HgError> {
635 ) -> Result<Self, HgError> {
635 let index_path = index_path.as_ref();
636 let index_path = index_path.as_ref();
636 let index = {
637 let index = {
637 match store_vfs.mmap_open_opt(index_path)? {
638 match store_vfs.mmap_open_opt(index_path)? {
638 None => Index::new(
639 None => Index::new(
639 Box::<Vec<_>>::default(),
640 Box::<Vec<_>>::default(),
640 options.index_header(),
641 options.index_header(),
641 ),
642 ),
642 Some(index_mmap) => {
643 Some(index_mmap) => {
643 let index = Index::new(
644 let index = Index::new(
644 Box::new(index_mmap),
645 Box::new(index_mmap),
645 options.index_header(),
646 options.index_header(),
646 )?;
647 )?;
647 Ok(index)
648 Ok(index)
648 }
649 }
649 }
650 }
650 }?;
651 }?;
651
652
652 let default_data_path = index_path.with_extension("d");
653 let default_data_path = index_path.with_extension("d");
653
654
654 // type annotation required
655 // type annotation required
655 // won't recognize Mmap as Deref<Target = [u8]>
656 // won't recognize Mmap as Deref<Target = [u8]>
656 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
657 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
657 if index.is_inline() {
658 if index.is_inline() {
658 None
659 None
659 } else if index.is_empty() {
660 } else if index.is_empty() {
660 // No need to even try to open the data file then.
661 // No need to even try to open the data file then.
661 Some(Box::new(&[][..]))
662 Some(Box::new(&[][..]))
662 } else {
663 } else {
663 let data_path = data_path.unwrap_or(&default_data_path);
664 let data_path = data_path.unwrap_or(&default_data_path);
664 let data_mmap = store_vfs.mmap_open(data_path)?;
665 let data_mmap = store_vfs.mmap_open(data_path)?;
665 Some(Box::new(data_mmap))
666 Some(Box::new(data_mmap))
666 };
667 };
667
668
668 let nodemap = if index.is_inline() || !options.use_nodemap {
669 let nodemap = if index.is_inline() || !options.use_nodemap {
669 None
670 None
670 } else {
671 } else {
671 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
672 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
672 |(docket, data)| {
673 |(docket, data)| {
673 nodemap::NodeTree::load_bytes(
674 nodemap::NodeTree::load_bytes(
674 Box::new(data),
675 Box::new(data),
675 docket.data_length,
676 docket.data_length,
676 )
677 )
677 },
678 },
678 )
679 )
679 };
680 };
680
681
681 let nodemap = nodemap_for_test.or(nodemap);
682 let nodemap = nodemap_for_test.or(nodemap);
682
683
683 Ok(Revlog {
684 Ok(Revlog {
684 index,
685 index,
685 data_bytes,
686 data_bytes,
686 nodemap,
687 nodemap,
687 })
688 })
688 }
689 }
689
690
690 /// Return number of entries of the `Revlog`.
691 /// Return number of entries of the `Revlog`.
691 pub fn len(&self) -> usize {
692 pub fn len(&self) -> usize {
692 self.index.len()
693 self.index.len()
693 }
694 }
694
695
695 /// Returns `true` if the `Revlog` has zero `entries`.
696 /// Returns `true` if the `Revlog` has zero `entries`.
696 pub fn is_empty(&self) -> bool {
697 pub fn is_empty(&self) -> bool {
697 self.index.is_empty()
698 self.index.is_empty()
698 }
699 }
699
700
700 /// Returns the node ID for the given revision number, if it exists in this
701 /// Returns the node ID for the given revision number, if it exists in this
701 /// revlog
702 /// revlog
702 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
703 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
703 if rev == NULL_REVISION.into() {
704 if rev == NULL_REVISION.into() {
704 return Some(&NULL_NODE);
705 return Some(&NULL_NODE);
705 }
706 }
706 let rev = self.index.check_revision(rev)?;
707 let rev = self.index.check_revision(rev)?;
707 Some(self.index.get_entry(rev)?.hash())
708 Some(self.index.get_entry(rev)?.hash())
708 }
709 }
709
710
710 /// Return the revision number for the given node ID, if it exists in this
711 /// Return the revision number for the given node ID, if it exists in this
711 /// revlog
712 /// revlog
712 pub fn rev_from_node(
713 pub fn rev_from_node(
713 &self,
714 &self,
714 node: NodePrefix,
715 node: NodePrefix,
715 ) -> Result<Revision, RevlogError> {
716 ) -> Result<Revision, RevlogError> {
716 if let Some(nodemap) = &self.nodemap {
717 if let Some(nodemap) = &self.nodemap {
717 nodemap
718 nodemap
718 .find_bin(&self.index, node)?
719 .find_bin(&self.index, node)?
719 .ok_or(RevlogError::InvalidRevision(format!("{:x}", node)))
720 .ok_or(RevlogError::InvalidRevision(format!("{:x}", node)))
720 } else {
721 } else {
721 self.rev_from_node_no_persistent_nodemap(node)
722 self.rev_from_node_no_persistent_nodemap(node)
722 }
723 }
723 }
724 }
724
725
725 /// Same as `rev_from_node`, without using a persistent nodemap
726 /// Same as `rev_from_node`, without using a persistent nodemap
726 ///
727 ///
727 /// This is used as fallback when a persistent nodemap is not present.
728 /// This is used as fallback when a persistent nodemap is not present.
728 /// This happens when the persistent-nodemap experimental feature is not
729 /// This happens when the persistent-nodemap experimental feature is not
729 /// enabled, or for small revlogs.
730 /// enabled, or for small revlogs.
730 fn rev_from_node_no_persistent_nodemap(
731 fn rev_from_node_no_persistent_nodemap(
731 &self,
732 &self,
732 node: NodePrefix,
733 node: NodePrefix,
733 ) -> Result<Revision, RevlogError> {
734 ) -> Result<Revision, RevlogError> {
734 // Linear scan of the revlog
735 // Linear scan of the revlog
735 // TODO: consider building a non-persistent nodemap in memory to
736 // TODO: consider building a non-persistent nodemap in memory to
736 // optimize these cases.
737 // optimize these cases.
737 let mut found_by_prefix = None;
738 let mut found_by_prefix = None;
738 for rev in (-1..self.len() as BaseRevision).rev() {
739 for rev in (-1..self.len() as BaseRevision).rev() {
739 let rev = Revision(rev as BaseRevision);
740 let rev = Revision(rev as BaseRevision);
740 let candidate_node = if rev == Revision(-1) {
741 let candidate_node = if rev == Revision(-1) {
741 NULL_NODE
742 NULL_NODE
742 } else {
743 } else {
743 let index_entry =
744 let index_entry =
744 self.index.get_entry(rev).ok_or_else(|| {
745 self.index.get_entry(rev).ok_or_else(|| {
745 HgError::corrupted(
746 HgError::corrupted(
746 "revlog references a revision not in the index",
747 "revlog references a revision not in the index",
747 )
748 )
748 })?;
749 })?;
749 *index_entry.hash()
750 *index_entry.hash()
750 };
751 };
751 if node == candidate_node {
752 if node == candidate_node {
752 return Ok(rev);
753 return Ok(rev);
753 }
754 }
754 if node.is_prefix_of(&candidate_node) {
755 if node.is_prefix_of(&candidate_node) {
755 if found_by_prefix.is_some() {
756 if found_by_prefix.is_some() {
756 return Err(RevlogError::AmbiguousPrefix);
757 return Err(RevlogError::AmbiguousPrefix);
757 }
758 }
758 found_by_prefix = Some(rev)
759 found_by_prefix = Some(rev)
759 }
760 }
760 }
761 }
761 found_by_prefix
762 found_by_prefix
762 .ok_or(RevlogError::InvalidRevision(format!("{:x}", node)))
763 .ok_or(RevlogError::InvalidRevision(format!("{:x}", node)))
763 }
764 }
764
765
765 /// Returns whether the given revision exists in this revlog.
766 /// Returns whether the given revision exists in this revlog.
766 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
767 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
767 self.index.check_revision(rev).is_some()
768 self.index.check_revision(rev).is_some()
768 }
769 }
769
770
770 /// Return the full data associated to a revision.
771 /// Return the full data associated to a revision.
771 ///
772 ///
772 /// All entries required to build the final data out of deltas will be
773 /// All entries required to build the final data out of deltas will be
773 /// retrieved as needed, and the deltas will be applied to the inital
774 /// retrieved as needed, and the deltas will be applied to the inital
774 /// snapshot to rebuild the final data.
775 /// snapshot to rebuild the final data.
775 pub fn get_rev_data(
776 pub fn get_rev_data(
776 &self,
777 &self,
777 rev: UncheckedRevision,
778 rev: UncheckedRevision,
778 ) -> Result<Cow<[u8]>, RevlogError> {
779 ) -> Result<Cow<[u8]>, RevlogError> {
779 if rev == NULL_REVISION.into() {
780 if rev == NULL_REVISION.into() {
780 return Ok(Cow::Borrowed(&[]));
781 return Ok(Cow::Borrowed(&[]));
781 };
782 };
782 self.get_entry(rev)?.data()
783 self.get_entry(rev)?.data()
783 }
784 }
784
785
785 /// [`Self::get_rev_data`] for checked revisions.
786 /// [`Self::get_rev_data`] for checked revisions.
786 pub fn get_rev_data_for_checked_rev(
787 pub fn get_rev_data_for_checked_rev(
787 &self,
788 &self,
788 rev: Revision,
789 rev: Revision,
789 ) -> Result<Cow<[u8]>, RevlogError> {
790 ) -> Result<Cow<[u8]>, RevlogError> {
790 if rev == NULL_REVISION {
791 if rev == NULL_REVISION {
791 return Ok(Cow::Borrowed(&[]));
792 return Ok(Cow::Borrowed(&[]));
792 };
793 };
793 self.get_entry_for_checked_rev(rev)?.data()
794 self.get_entry_for_checked_rev(rev)?.data()
794 }
795 }
795
796
796 /// Check the hash of some given data against the recorded hash.
797 /// Check the hash of some given data against the recorded hash.
797 pub fn check_hash(
798 pub fn check_hash(
798 &self,
799 &self,
799 p1: Revision,
800 p1: Revision,
800 p2: Revision,
801 p2: Revision,
801 expected: &[u8],
802 expected: &[u8],
802 data: &[u8],
803 data: &[u8],
803 ) -> bool {
804 ) -> bool {
804 let e1 = self.index.get_entry(p1);
805 let e1 = self.index.get_entry(p1);
805 let h1 = match e1 {
806 let h1 = match e1 {
806 Some(ref entry) => entry.hash(),
807 Some(ref entry) => entry.hash(),
807 None => &NULL_NODE,
808 None => &NULL_NODE,
808 };
809 };
809 let e2 = self.index.get_entry(p2);
810 let e2 = self.index.get_entry(p2);
810 let h2 = match e2 {
811 let h2 = match e2 {
811 Some(ref entry) => entry.hash(),
812 Some(ref entry) => entry.hash(),
812 None => &NULL_NODE,
813 None => &NULL_NODE,
813 };
814 };
814
815
815 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
816 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
816 }
817 }
817
818
818 /// Build the full data of a revision out its snapshot
819 /// Build the full data of a revision out its snapshot
819 /// and its deltas.
820 /// and its deltas.
820 fn build_data_from_deltas(
821 fn build_data_from_deltas(
821 snapshot: RevlogEntry,
822 snapshot: RevlogEntry,
822 deltas: &[RevlogEntry],
823 deltas: &[RevlogEntry],
823 ) -> Result<Vec<u8>, HgError> {
824 ) -> Result<Vec<u8>, HgError> {
824 let snapshot = snapshot.data_chunk()?;
825 let snapshot = snapshot.data_chunk()?;
825 let deltas = deltas
826 let deltas = deltas
826 .iter()
827 .iter()
827 .rev()
828 .rev()
828 .map(RevlogEntry::data_chunk)
829 .map(RevlogEntry::data_chunk)
829 .collect::<Result<Vec<_>, _>>()?;
830 .collect::<Result<Vec<_>, _>>()?;
830 let patches: Vec<_> =
831 let patches: Vec<_> =
831 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
832 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
832 let patch = patch::fold_patch_lists(&patches);
833 let patch = patch::fold_patch_lists(&patches);
833 Ok(patch.apply(&snapshot))
834 Ok(patch.apply(&snapshot))
834 }
835 }
835
836
836 /// Return the revlog data.
837 /// Return the revlog data.
837 fn data(&self) -> &[u8] {
838 fn data(&self) -> &[u8] {
838 match &self.data_bytes {
839 match &self.data_bytes {
839 Some(data_bytes) => data_bytes,
840 Some(data_bytes) => data_bytes,
840 None => panic!(
841 None => panic!(
841 "forgot to load the data or trying to access inline data"
842 "forgot to load the data or trying to access inline data"
842 ),
843 ),
843 }
844 }
844 }
845 }
845
846
846 pub fn make_null_entry(&self) -> RevlogEntry {
847 pub fn make_null_entry(&self) -> RevlogEntry {
847 RevlogEntry {
848 RevlogEntry {
848 revlog: self,
849 revlog: self,
849 rev: NULL_REVISION,
850 rev: NULL_REVISION,
850 bytes: b"",
851 bytes: b"",
851 compressed_len: 0,
852 compressed_len: 0,
852 uncompressed_len: 0,
853 uncompressed_len: 0,
853 base_rev_or_base_of_delta_chain: None,
854 base_rev_or_base_of_delta_chain: None,
854 p1: NULL_REVISION,
855 p1: NULL_REVISION,
855 p2: NULL_REVISION,
856 p2: NULL_REVISION,
856 flags: NULL_REVLOG_ENTRY_FLAGS,
857 flags: NULL_REVLOG_ENTRY_FLAGS,
857 hash: NULL_NODE,
858 hash: NULL_NODE,
858 }
859 }
859 }
860 }
860
861
861 fn get_entry_for_checked_rev(
862 fn get_entry_for_checked_rev(
862 &self,
863 &self,
863 rev: Revision,
864 rev: Revision,
864 ) -> Result<RevlogEntry, RevlogError> {
865 ) -> Result<RevlogEntry, RevlogError> {
865 if rev == NULL_REVISION {
866 if rev == NULL_REVISION {
866 return Ok(self.make_null_entry());
867 return Ok(self.make_null_entry());
867 }
868 }
868 let index_entry = self
869 let index_entry = self
869 .index
870 .index
870 .get_entry(rev)
871 .get_entry(rev)
871 .ok_or(RevlogError::InvalidRevision(rev.to_string()))?;
872 .ok_or(RevlogError::InvalidRevision(rev.to_string()))?;
872 let offset = index_entry.offset();
873 let offset = index_entry.offset();
873 let start = if self.index.is_inline() {
874 let start = if self.index.is_inline() {
874 offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE)
875 offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE)
875 } else {
876 } else {
876 offset
877 offset
877 };
878 };
878 let end = start + index_entry.compressed_len() as usize;
879 let end = start + index_entry.compressed_len() as usize;
879 let data = if self.index.is_inline() {
880 let data = if self.index.is_inline() {
880 self.index.data(start, end)
881 self.index.data(start, end)
881 } else {
882 } else {
882 &self.data()[start..end]
883 &self.data()[start..end]
883 };
884 };
884 let base_rev = self
885 let base_rev = self
885 .index
886 .index
886 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
887 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
887 .ok_or_else(|| {
888 .ok_or_else(|| {
888 RevlogError::corrupted(format!(
889 RevlogError::corrupted(format!(
889 "base revision for rev {} is invalid",
890 "base revision for rev {} is invalid",
890 rev
891 rev
891 ))
892 ))
892 })?;
893 })?;
893 let p1 =
894 let p1 =
894 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
895 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
895 RevlogError::corrupted(format!(
896 RevlogError::corrupted(format!(
896 "p1 for rev {} is invalid",
897 "p1 for rev {} is invalid",
897 rev
898 rev
898 ))
899 ))
899 })?;
900 })?;
900 let p2 =
901 let p2 =
901 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
902 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
902 RevlogError::corrupted(format!(
903 RevlogError::corrupted(format!(
903 "p2 for rev {} is invalid",
904 "p2 for rev {} is invalid",
904 rev
905 rev
905 ))
906 ))
906 })?;
907 })?;
907 let entry = RevlogEntry {
908 let entry = RevlogEntry {
908 revlog: self,
909 revlog: self,
909 rev,
910 rev,
910 bytes: data,
911 bytes: data,
911 compressed_len: index_entry.compressed_len(),
912 compressed_len: index_entry.compressed_len(),
912 uncompressed_len: index_entry.uncompressed_len(),
913 uncompressed_len: index_entry.uncompressed_len(),
913 base_rev_or_base_of_delta_chain: if base_rev == rev {
914 base_rev_or_base_of_delta_chain: if base_rev == rev {
914 None
915 None
915 } else {
916 } else {
916 Some(base_rev)
917 Some(base_rev)
917 },
918 },
918 p1,
919 p1,
919 p2,
920 p2,
920 flags: index_entry.flags(),
921 flags: index_entry.flags(),
921 hash: *index_entry.hash(),
922 hash: *index_entry.hash(),
922 };
923 };
923 Ok(entry)
924 Ok(entry)
924 }
925 }
925
926
926 /// Get an entry of the revlog.
927 /// Get an entry of the revlog.
927 pub fn get_entry(
928 pub fn get_entry(
928 &self,
929 &self,
929 rev: UncheckedRevision,
930 rev: UncheckedRevision,
930 ) -> Result<RevlogEntry, RevlogError> {
931 ) -> Result<RevlogEntry, RevlogError> {
931 if rev == NULL_REVISION.into() {
932 if rev == NULL_REVISION.into() {
932 return Ok(self.make_null_entry());
933 return Ok(self.make_null_entry());
933 }
934 }
934 let rev = self.index.check_revision(rev).ok_or_else(|| {
935 let rev = self.index.check_revision(rev).ok_or_else(|| {
935 RevlogError::corrupted(format!("rev {} is invalid", rev))
936 RevlogError::corrupted(format!("rev {} is invalid", rev))
936 })?;
937 })?;
937 self.get_entry_for_checked_rev(rev)
938 self.get_entry_for_checked_rev(rev)
938 }
939 }
939 }
940 }
940
941
941 /// The revlog entry's bytes and the necessary informations to extract
942 /// The revlog entry's bytes and the necessary informations to extract
942 /// the entry's data.
943 /// the entry's data.
943 #[derive(Clone)]
944 #[derive(Clone)]
944 pub struct RevlogEntry<'revlog> {
945 pub struct RevlogEntry<'revlog> {
945 revlog: &'revlog Revlog,
946 revlog: &'revlog Revlog,
946 rev: Revision,
947 rev: Revision,
947 bytes: &'revlog [u8],
948 bytes: &'revlog [u8],
948 compressed_len: u32,
949 compressed_len: u32,
949 uncompressed_len: i32,
950 uncompressed_len: i32,
950 base_rev_or_base_of_delta_chain: Option<Revision>,
951 base_rev_or_base_of_delta_chain: Option<Revision>,
951 p1: Revision,
952 p1: Revision,
952 p2: Revision,
953 p2: Revision,
953 flags: u16,
954 flags: u16,
954 hash: Node,
955 hash: Node,
955 }
956 }
956
957
957 impl<'revlog> RevlogEntry<'revlog> {
958 impl<'revlog> RevlogEntry<'revlog> {
958 pub fn revision(&self) -> Revision {
959 pub fn revision(&self) -> Revision {
959 self.rev
960 self.rev
960 }
961 }
961
962
962 pub fn node(&self) -> &Node {
963 pub fn node(&self) -> &Node {
963 &self.hash
964 &self.hash
964 }
965 }
965
966
966 pub fn uncompressed_len(&self) -> Option<u32> {
967 pub fn uncompressed_len(&self) -> Option<u32> {
967 u32::try_from(self.uncompressed_len).ok()
968 u32::try_from(self.uncompressed_len).ok()
968 }
969 }
969
970
970 pub fn has_p1(&self) -> bool {
971 pub fn has_p1(&self) -> bool {
971 self.p1 != NULL_REVISION
972 self.p1 != NULL_REVISION
972 }
973 }
973
974
974 pub fn p1_entry(
975 pub fn p1_entry(
975 &self,
976 &self,
976 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
977 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
977 if self.p1 == NULL_REVISION {
978 if self.p1 == NULL_REVISION {
978 Ok(None)
979 Ok(None)
979 } else {
980 } else {
980 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
981 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
981 }
982 }
982 }
983 }
983
984
984 pub fn p2_entry(
985 pub fn p2_entry(
985 &self,
986 &self,
986 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
987 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
987 if self.p2 == NULL_REVISION {
988 if self.p2 == NULL_REVISION {
988 Ok(None)
989 Ok(None)
989 } else {
990 } else {
990 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
991 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
991 }
992 }
992 }
993 }
993
994
994 pub fn p1(&self) -> Option<Revision> {
995 pub fn p1(&self) -> Option<Revision> {
995 if self.p1 == NULL_REVISION {
996 if self.p1 == NULL_REVISION {
996 None
997 None
997 } else {
998 } else {
998 Some(self.p1)
999 Some(self.p1)
999 }
1000 }
1000 }
1001 }
1001
1002
1002 pub fn p2(&self) -> Option<Revision> {
1003 pub fn p2(&self) -> Option<Revision> {
1003 if self.p2 == NULL_REVISION {
1004 if self.p2 == NULL_REVISION {
1004 None
1005 None
1005 } else {
1006 } else {
1006 Some(self.p2)
1007 Some(self.p2)
1007 }
1008 }
1008 }
1009 }
1009
1010
1010 pub fn is_censored(&self) -> bool {
1011 pub fn is_censored(&self) -> bool {
1011 (self.flags & REVISION_FLAG_CENSORED) != 0
1012 (self.flags & REVISION_FLAG_CENSORED) != 0
1012 }
1013 }
1013
1014
1014 pub fn has_length_affecting_flag_processor(&self) -> bool {
1015 pub fn has_length_affecting_flag_processor(&self) -> bool {
1015 // Relevant Python code: revlog.size()
1016 // Relevant Python code: revlog.size()
1016 // note: ELLIPSIS is known to not change the content
1017 // note: ELLIPSIS is known to not change the content
1017 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
1018 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
1018 }
1019 }
1019
1020
1020 /// The data for this entry, after resolving deltas if any.
1021 /// The data for this entry, after resolving deltas if any.
1021 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1022 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1022 let mut entry = self.clone();
1023 let mut entry = self.clone();
1023 let mut delta_chain = vec![];
1024 let mut delta_chain = vec![];
1024
1025
1025 // The meaning of `base_rev_or_base_of_delta_chain` depends on
1026 // The meaning of `base_rev_or_base_of_delta_chain` depends on
1026 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
1027 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
1027 // `mercurial/revlogutils/constants.py` and the code in
1028 // `mercurial/revlogutils/constants.py` and the code in
1028 // [_chaininfo] and in [index_deltachain].
1029 // [_chaininfo] and in [index_deltachain].
1029 let uses_generaldelta = self.revlog.index.uses_generaldelta();
1030 let uses_generaldelta = self.revlog.index.uses_generaldelta();
1030 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
1031 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
1031 entry = if uses_generaldelta {
1032 entry = if uses_generaldelta {
1032 delta_chain.push(entry);
1033 delta_chain.push(entry);
1033 self.revlog.get_entry_for_checked_rev(base_rev)?
1034 self.revlog.get_entry_for_checked_rev(base_rev)?
1034 } else {
1035 } else {
1035 let base_rev = UncheckedRevision(entry.rev.0 - 1);
1036 let base_rev = UncheckedRevision(entry.rev.0 - 1);
1036 delta_chain.push(entry);
1037 delta_chain.push(entry);
1037 self.revlog.get_entry(base_rev)?
1038 self.revlog.get_entry(base_rev)?
1038 };
1039 };
1039 }
1040 }
1040
1041
1041 let data = if delta_chain.is_empty() {
1042 let data = if delta_chain.is_empty() {
1042 entry.data_chunk()?
1043 entry.data_chunk()?
1043 } else {
1044 } else {
1044 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
1045 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
1045 };
1046 };
1046
1047
1047 Ok(data)
1048 Ok(data)
1048 }
1049 }
1049
1050
1050 fn check_data(
1051 fn check_data(
1051 &self,
1052 &self,
1052 data: Cow<'revlog, [u8]>,
1053 data: Cow<'revlog, [u8]>,
1053 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1054 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1054 if self.revlog.check_hash(
1055 if self.revlog.check_hash(
1055 self.p1,
1056 self.p1,
1056 self.p2,
1057 self.p2,
1057 self.hash.as_bytes(),
1058 self.hash.as_bytes(),
1058 &data,
1059 &data,
1059 ) {
1060 ) {
1060 Ok(data)
1061 Ok(data)
1061 } else {
1062 } else {
1062 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
1063 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
1063 return Err(HgError::unsupported(
1064 return Err(HgError::unsupported(
1064 "support for ellipsis nodes is missing",
1065 "support for ellipsis nodes is missing",
1065 )
1066 )
1066 .into());
1067 .into());
1067 }
1068 }
1068 Err(corrupted(format!(
1069 Err(corrupted(format!(
1069 "hash check failed for revision {}",
1070 "hash check failed for revision {}",
1070 self.rev
1071 self.rev
1071 ))
1072 ))
1072 .into())
1073 .into())
1073 }
1074 }
1074 }
1075 }
1075
1076
1076 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1077 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1077 let data = self.rawdata()?;
1078 let data = self.rawdata()?;
1078 if self.rev == NULL_REVISION {
1079 if self.rev == NULL_REVISION {
1079 return Ok(data);
1080 return Ok(data);
1080 }
1081 }
1081 if self.is_censored() {
1082 if self.is_censored() {
1082 return Err(HgError::CensoredNodeError.into());
1083 return Err(HgError::CensoredNodeError.into());
1083 }
1084 }
1084 self.check_data(data)
1085 self.check_data(data)
1085 }
1086 }
1086
1087
1087 /// Extract the data contained in the entry.
1088 /// Extract the data contained in the entry.
1088 /// This may be a delta. (See `is_delta`.)
1089 /// This may be a delta. (See `is_delta`.)
1089 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
1090 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
1090 if self.bytes.is_empty() {
1091 if self.bytes.is_empty() {
1091 return Ok(Cow::Borrowed(&[]));
1092 return Ok(Cow::Borrowed(&[]));
1092 }
1093 }
1093 match self.bytes[0] {
1094 match self.bytes[0] {
1094 // Revision data is the entirety of the entry, including this
1095 // Revision data is the entirety of the entry, including this
1095 // header.
1096 // header.
1096 b'\0' => Ok(Cow::Borrowed(self.bytes)),
1097 b'\0' => Ok(Cow::Borrowed(self.bytes)),
1097 // Raw revision data follows.
1098 // Raw revision data follows.
1098 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
1099 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
1099 // zlib (RFC 1950) data.
1100 // zlib (RFC 1950) data.
1100 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
1101 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
1101 // zstd data.
1102 // zstd data.
1102 b'\x28' => Ok(Cow::Owned(uncompressed_zstd_data(
1103 b'\x28' => Ok(Cow::Owned(uncompressed_zstd_data(
1103 self.bytes,
1104 self.bytes,
1104 self.is_delta(),
1105 self.is_delta(),
1105 self.uncompressed_len.max(0),
1106 self.uncompressed_len.max(0),
1106 )?)),
1107 )?)),
1107 // A proper new format should have had a repo/store requirement.
1108 // A proper new format should have had a repo/store requirement.
1108 format_type => Err(corrupted(format!(
1109 format_type => Err(corrupted(format!(
1109 "unknown compression header '{}'",
1110 "unknown compression header '{}'",
1110 format_type
1111 format_type
1111 ))),
1112 ))),
1112 }
1113 }
1113 }
1114 }
1114
1115
1115 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
1116 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
1116 let mut decoder = ZlibDecoder::new(self.bytes);
1117 let mut decoder = ZlibDecoder::new(self.bytes);
1117 if self.is_delta() {
1118 if self.is_delta() {
1118 let mut buf = Vec::with_capacity(self.compressed_len as usize);
1119 let mut buf = Vec::with_capacity(self.compressed_len as usize);
1119 decoder
1120 decoder
1120 .read_to_end(&mut buf)
1121 .read_to_end(&mut buf)
1121 .map_err(|e| corrupted(e.to_string()))?;
1122 .map_err(|e| corrupted(e.to_string()))?;
1122 Ok(buf)
1123 Ok(buf)
1123 } else {
1124 } else {
1124 let cap = self.uncompressed_len.max(0) as usize;
1125 let cap = self.uncompressed_len.max(0) as usize;
1125 let mut buf = vec![0; cap];
1126 let mut buf = vec![0; cap];
1126 decoder
1127 decoder
1127 .read_exact(&mut buf)
1128 .read_exact(&mut buf)
1128 .map_err(|e| corrupted(e.to_string()))?;
1129 .map_err(|e| corrupted(e.to_string()))?;
1129 Ok(buf)
1130 Ok(buf)
1130 }
1131 }
1131 }
1132 }
1132
1133
1133 /// Tell if the entry is a snapshot or a delta
1134 /// Tell if the entry is a snapshot or a delta
1134 /// (influences on decompression).
1135 /// (influences on decompression).
1135 fn is_delta(&self) -> bool {
1136 fn is_delta(&self) -> bool {
1136 self.base_rev_or_base_of_delta_chain.is_some()
1137 self.base_rev_or_base_of_delta_chain.is_some()
1137 }
1138 }
1138 }
1139 }
1139
1140
1140 /// Calculate the hash of a revision given its data and its parents.
1141 /// Calculate the hash of a revision given its data and its parents.
1141 fn hash(
1142 fn hash(
1142 data: &[u8],
1143 data: &[u8],
1143 p1_hash: &[u8],
1144 p1_hash: &[u8],
1144 p2_hash: &[u8],
1145 p2_hash: &[u8],
1145 ) -> [u8; NODE_BYTES_LENGTH] {
1146 ) -> [u8; NODE_BYTES_LENGTH] {
1146 let mut hasher = Sha1::new();
1147 let mut hasher = Sha1::new();
1147 let (a, b) = (p1_hash, p2_hash);
1148 let (a, b) = (p1_hash, p2_hash);
1148 if a > b {
1149 if a > b {
1149 hasher.update(b);
1150 hasher.update(b);
1150 hasher.update(a);
1151 hasher.update(a);
1151 } else {
1152 } else {
1152 hasher.update(a);
1153 hasher.update(a);
1153 hasher.update(b);
1154 hasher.update(b);
1154 }
1155 }
1155 hasher.update(data);
1156 hasher.update(data);
1156 *hasher.finalize().as_ref()
1157 *hasher.finalize().as_ref()
1157 }
1158 }
1158
1159
1159 #[cfg(test)]
1160 #[cfg(test)]
1160 mod tests {
1161 mod tests {
1161 use super::*;
1162 use super::*;
1162 use crate::index::IndexEntryBuilder;
1163 use crate::index::IndexEntryBuilder;
1163 use itertools::Itertools;
1164 use itertools::Itertools;
1164
1165
1165 #[test]
1166 #[test]
1166 fn test_empty() {
1167 fn test_empty() {
1167 let temp = tempfile::tempdir().unwrap();
1168 let temp = tempfile::tempdir().unwrap();
1168 let vfs = VfsImpl {
1169 let vfs = VfsImpl {
1169 base: temp.path().to_owned(),
1170 base: temp.path().to_owned(),
1170 };
1171 };
1171 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
1172 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
1172 std::fs::write(temp.path().join("foo.d"), b"").unwrap();
1173 std::fs::write(temp.path().join("foo.d"), b"").unwrap();
1173 let revlog =
1174 let revlog =
1174 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
1175 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
1175 .unwrap();
1176 .unwrap();
1176 assert!(revlog.is_empty());
1177 assert!(revlog.is_empty());
1177 assert_eq!(revlog.len(), 0);
1178 assert_eq!(revlog.len(), 0);
1178 assert!(revlog.get_entry(0.into()).is_err());
1179 assert!(revlog.get_entry(0.into()).is_err());
1179 assert!(!revlog.has_rev(0.into()));
1180 assert!(!revlog.has_rev(0.into()));
1180 assert_eq!(
1181 assert_eq!(
1181 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1182 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1182 NULL_REVISION
1183 NULL_REVISION
1183 );
1184 );
1184 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
1185 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
1185 assert_eq!(null_entry.revision(), NULL_REVISION);
1186 assert_eq!(null_entry.revision(), NULL_REVISION);
1186 assert!(null_entry.data().unwrap().is_empty());
1187 assert!(null_entry.data().unwrap().is_empty());
1187 }
1188 }
1188
1189
1189 #[test]
1190 #[test]
1190 fn test_inline() {
1191 fn test_inline() {
1191 let temp = tempfile::tempdir().unwrap();
1192 let temp = tempfile::tempdir().unwrap();
1192 let vfs = VfsImpl {
1193 let vfs = VfsImpl {
1193 base: temp.path().to_owned(),
1194 base: temp.path().to_owned(),
1194 };
1195 };
1195 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
1196 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
1196 .unwrap();
1197 .unwrap();
1197 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1198 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1198 .unwrap();
1199 .unwrap();
1199 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
1200 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
1200 .unwrap();
1201 .unwrap();
1201 let entry0_bytes = IndexEntryBuilder::new()
1202 let entry0_bytes = IndexEntryBuilder::new()
1202 .is_first(true)
1203 .is_first(true)
1203 .with_version(1)
1204 .with_version(1)
1204 .with_inline(true)
1205 .with_inline(true)
1205 .with_node(node0)
1206 .with_node(node0)
1206 .build();
1207 .build();
1207 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1208 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1208 let entry2_bytes = IndexEntryBuilder::new()
1209 let entry2_bytes = IndexEntryBuilder::new()
1209 .with_p1(Revision(0))
1210 .with_p1(Revision(0))
1210 .with_p2(Revision(1))
1211 .with_p2(Revision(1))
1211 .with_node(node2)
1212 .with_node(node2)
1212 .build();
1213 .build();
1213 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
1214 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
1214 .into_iter()
1215 .into_iter()
1215 .flatten()
1216 .flatten()
1216 .collect_vec();
1217 .collect_vec();
1217 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1218 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1218 let revlog =
1219 let revlog =
1219 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
1220 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
1220 .unwrap();
1221 .unwrap();
1221
1222
1222 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
1223 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
1223 assert_eq!(entry0.revision(), Revision(0));
1224 assert_eq!(entry0.revision(), Revision(0));
1224 assert_eq!(*entry0.node(), node0);
1225 assert_eq!(*entry0.node(), node0);
1225 assert!(!entry0.has_p1());
1226 assert!(!entry0.has_p1());
1226 assert_eq!(entry0.p1(), None);
1227 assert_eq!(entry0.p1(), None);
1227 assert_eq!(entry0.p2(), None);
1228 assert_eq!(entry0.p2(), None);
1228 let p1_entry = entry0.p1_entry().unwrap();
1229 let p1_entry = entry0.p1_entry().unwrap();
1229 assert!(p1_entry.is_none());
1230 assert!(p1_entry.is_none());
1230 let p2_entry = entry0.p2_entry().unwrap();
1231 let p2_entry = entry0.p2_entry().unwrap();
1231 assert!(p2_entry.is_none());
1232 assert!(p2_entry.is_none());
1232
1233
1233 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
1234 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
1234 assert_eq!(entry1.revision(), Revision(1));
1235 assert_eq!(entry1.revision(), Revision(1));
1235 assert_eq!(*entry1.node(), node1);
1236 assert_eq!(*entry1.node(), node1);
1236 assert!(!entry1.has_p1());
1237 assert!(!entry1.has_p1());
1237 assert_eq!(entry1.p1(), None);
1238 assert_eq!(entry1.p1(), None);
1238 assert_eq!(entry1.p2(), None);
1239 assert_eq!(entry1.p2(), None);
1239 let p1_entry = entry1.p1_entry().unwrap();
1240 let p1_entry = entry1.p1_entry().unwrap();
1240 assert!(p1_entry.is_none());
1241 assert!(p1_entry.is_none());
1241 let p2_entry = entry1.p2_entry().unwrap();
1242 let p2_entry = entry1.p2_entry().unwrap();
1242 assert!(p2_entry.is_none());
1243 assert!(p2_entry.is_none());
1243
1244
1244 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
1245 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
1245 assert_eq!(entry2.revision(), Revision(2));
1246 assert_eq!(entry2.revision(), Revision(2));
1246 assert_eq!(*entry2.node(), node2);
1247 assert_eq!(*entry2.node(), node2);
1247 assert!(entry2.has_p1());
1248 assert!(entry2.has_p1());
1248 assert_eq!(entry2.p1(), Some(Revision(0)));
1249 assert_eq!(entry2.p1(), Some(Revision(0)));
1249 assert_eq!(entry2.p2(), Some(Revision(1)));
1250 assert_eq!(entry2.p2(), Some(Revision(1)));
1250 let p1_entry = entry2.p1_entry().unwrap();
1251 let p1_entry = entry2.p1_entry().unwrap();
1251 assert!(p1_entry.is_some());
1252 assert!(p1_entry.is_some());
1252 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
1253 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
1253 let p2_entry = entry2.p2_entry().unwrap();
1254 let p2_entry = entry2.p2_entry().unwrap();
1254 assert!(p2_entry.is_some());
1255 assert!(p2_entry.is_some());
1255 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
1256 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
1256 }
1257 }
1257
1258
1258 #[test]
1259 #[test]
1259 fn test_nodemap() {
1260 fn test_nodemap() {
1260 let temp = tempfile::tempdir().unwrap();
1261 let temp = tempfile::tempdir().unwrap();
1261 let vfs = VfsImpl {
1262 let vfs = VfsImpl {
1262 base: temp.path().to_owned(),
1263 base: temp.path().to_owned(),
1263 };
1264 };
1264
1265
1265 // building a revlog with a forced Node starting with zeros
1266 // building a revlog with a forced Node starting with zeros
1266 // This is a corruption, but it does not preclude using the nodemap
1267 // This is a corruption, but it does not preclude using the nodemap
1267 // if we don't try and access the data
1268 // if we don't try and access the data
1268 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
1269 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
1269 .unwrap();
1270 .unwrap();
1270 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1271 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1271 .unwrap();
1272 .unwrap();
1272 let entry0_bytes = IndexEntryBuilder::new()
1273 let entry0_bytes = IndexEntryBuilder::new()
1273 .is_first(true)
1274 .is_first(true)
1274 .with_version(1)
1275 .with_version(1)
1275 .with_inline(true)
1276 .with_inline(true)
1276 .with_node(node0)
1277 .with_node(node0)
1277 .build();
1278 .build();
1278 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1279 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1279 let contents = vec![entry0_bytes, entry1_bytes]
1280 let contents = vec![entry0_bytes, entry1_bytes]
1280 .into_iter()
1281 .into_iter()
1281 .flatten()
1282 .flatten()
1282 .collect_vec();
1283 .collect_vec();
1283 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1284 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1284
1285
1285 let mut idx = nodemap::tests::TestNtIndex::new();
1286 let mut idx = nodemap::tests::TestNtIndex::new();
1286 idx.insert_node(Revision(0), node0).unwrap();
1287 idx.insert_node(Revision(0), node0).unwrap();
1287 idx.insert_node(Revision(1), node1).unwrap();
1288 idx.insert_node(Revision(1), node1).unwrap();
1288
1289
1289 let revlog = Revlog::open_gen(
1290 let revlog = Revlog::open_gen(
1290 &vfs,
1291 &vfs,
1291 "foo.i",
1292 "foo.i",
1292 None,
1293 None,
1293 RevlogOpenOptions::default(),
1294 RevlogOpenOptions::default(),
1294 Some(idx.nt),
1295 Some(idx.nt),
1295 )
1296 )
1296 .unwrap();
1297 .unwrap();
1297
1298
1298 // accessing the data shows the corruption
1299 // accessing the data shows the corruption
1299 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
1300 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
1300
1301
1301 assert_eq!(
1302 assert_eq!(
1302 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1303 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1303 Revision(-1)
1304 Revision(-1)
1304 );
1305 );
1305 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
1306 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
1306 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
1307 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
1307 assert_eq!(
1308 assert_eq!(
1308 revlog
1309 revlog
1309 .rev_from_node(NodePrefix::from_hex("000").unwrap())
1310 .rev_from_node(NodePrefix::from_hex("000").unwrap())
1310 .unwrap(),
1311 .unwrap(),
1311 Revision(-1)
1312 Revision(-1)
1312 );
1313 );
1313 assert_eq!(
1314 assert_eq!(
1314 revlog
1315 revlog
1315 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
1316 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
1316 .unwrap(),
1317 .unwrap(),
1317 Revision(1)
1318 Revision(1)
1318 );
1319 );
1319 // RevlogError does not implement PartialEq
1320 // RevlogError does not implement PartialEq
1320 // (ultimately because io::Error does not)
1321 // (ultimately because io::Error does not)
1321 match revlog
1322 match revlog
1322 .rev_from_node(NodePrefix::from_hex("00").unwrap())
1323 .rev_from_node(NodePrefix::from_hex("00").unwrap())
1323 .expect_err("Expected to give AmbiguousPrefix error")
1324 .expect_err("Expected to give AmbiguousPrefix error")
1324 {
1325 {
1325 RevlogError::AmbiguousPrefix => (),
1326 RevlogError::AmbiguousPrefix => (),
1326 e => {
1327 e => {
1327 panic!("Got another error than AmbiguousPrefix: {:?}", e);
1328 panic!("Got another error than AmbiguousPrefix: {:?}", e);
1328 }
1329 }
1329 };
1330 };
1330 }
1331 }
1331 }
1332 }
General Comments 0
You need to be logged in to leave comments. Login now