Show More
This diff has been collapsed as it changes many lines, (535 lines changed) Show them Hide them | |||||
@@ -0,0 +1,535 | |||||
|
1 | //! Helpers for revlog file reading and writing. | |||
|
2 | ||||
|
3 | use std::{ | |||
|
4 | cell::RefCell, | |||
|
5 | fs::File, | |||
|
6 | io::{Read, Seek, SeekFrom, Write}, | |||
|
7 | path::{Path, PathBuf}, | |||
|
8 | sync::{Arc, Mutex}, | |||
|
9 | }; | |||
|
10 | ||||
|
11 | use crate::{ | |||
|
12 | errors::{HgError, IoResultExt}, | |||
|
13 | vfs::Vfs, | |||
|
14 | }; | |||
|
15 | ||||
|
16 | /// Wraps accessing arbitrary chunks of data within a file and reusing handles. | |||
|
17 | /// This is currently useful for accessing a revlog's data file, only reading | |||
|
18 | /// the ranges that are currently relevant, like a sort of basic and manual | |||
|
19 | /// file-based mmap. | |||
|
20 | /// | |||
|
21 | /// XXX should this just be replaced with `mmap` + `madvise` ranges? | |||
|
22 | /// The upcoming `UncompressedChunkCache` will make up for most of the slowness | |||
|
23 | /// of re-reading the same chunks, so this might not be as useful. Aside from | |||
|
24 | /// the major benefit of having less code to take care of, using `mmap` will | |||
|
25 | /// allow multiple processes to share the same pages, especially for the | |||
|
26 | /// changelog and manifest, which would make a difference in server contexts. | |||
|
27 | pub struct RandomAccessFile { | |||
|
28 | /// The current store VFS to pass it to [`FileHandle`] | |||
|
29 | vfs: Box<dyn Vfs>, | |||
|
30 | /// Filename of the open file, relative to the vfs root | |||
|
31 | pub filename: PathBuf, | |||
|
32 | /// The current read-only handle on the file, if any | |||
|
33 | pub reading_handle: RefCell<Option<FileHandle>>, | |||
|
34 | /// The current read-write handle on the file, if any | |||
|
35 | pub writing_handle: RefCell<Option<FileHandle>>, | |||
|
36 | } | |||
|
37 | ||||
|
38 | impl RandomAccessFile { | |||
|
39 | /// Wrap a file for random access | |||
|
40 | pub fn new(vfs: Box<dyn Vfs>, filename: PathBuf) -> Self { | |||
|
41 | assert!(filename.is_relative()); | |||
|
42 | Self { | |||
|
43 | vfs, | |||
|
44 | filename, | |||
|
45 | reading_handle: RefCell::new(None), | |||
|
46 | writing_handle: RefCell::new(None), | |||
|
47 | } | |||
|
48 | } | |||
|
49 | ||||
|
50 | /// Read a chunk of bytes from the file. | |||
|
51 | pub fn read_chunk( | |||
|
52 | &self, | |||
|
53 | offset: usize, | |||
|
54 | length: usize, | |||
|
55 | ) -> Result<Vec<u8>, HgError> { | |||
|
56 | let mut handle = self.get_read_handle()?; | |||
|
57 | handle | |||
|
58 | .seek(SeekFrom::Start(offset as u64)) | |||
|
59 | .when_reading_file(&self.filename)?; | |||
|
60 | handle.read_exact(length).when_reading_file(&self.filename) | |||
|
61 | } | |||
|
62 | ||||
|
63 | /// `pub` only for hg-cpython | |||
|
64 | #[doc(hidden)] | |||
|
65 | pub fn get_read_handle(&self) -> Result<FileHandle, HgError> { | |||
|
66 | if let Some(handle) = &*self.writing_handle.borrow() { | |||
|
67 | // Use a file handle being actively used for writes, if available. | |||
|
68 | // There is some danger to doing this because reads will seek the | |||
|
69 | // file. | |||
|
70 | // However, [`Revlog::write_entry`] performs a `SeekFrom::End(0)` | |||
|
71 | // before all writes, so we should be safe. | |||
|
72 | return Ok(handle.clone()); | |||
|
73 | } | |||
|
74 | if let Some(handle) = &*self.reading_handle.borrow() { | |||
|
75 | return Ok(handle.clone()); | |||
|
76 | } | |||
|
77 | // early returns done to work around borrowck being overzealous | |||
|
78 | // See https://github.com/rust-lang/rust/issues/103108 | |||
|
79 | let new_handle = FileHandle::new( | |||
|
80 | dyn_clone::clone_box(&*self.vfs), | |||
|
81 | &self.filename, | |||
|
82 | false, | |||
|
83 | false, | |||
|
84 | )?; | |||
|
85 | *self.reading_handle.borrow_mut() = Some(new_handle.clone()); | |||
|
86 | Ok(new_handle) | |||
|
87 | } | |||
|
88 | ||||
|
89 | /// `pub` only for hg-cpython | |||
|
90 | #[doc(hidden)] | |||
|
91 | pub fn exit_reading_context(&self) { | |||
|
92 | self.reading_handle.take(); | |||
|
93 | } | |||
|
94 | ||||
|
95 | // Returns whether this file currently open | |||
|
96 | pub fn is_open(&self) -> bool { | |||
|
97 | self.reading_handle.borrow().is_some() | |||
|
98 | || self.writing_handle.borrow().is_some() | |||
|
99 | } | |||
|
100 | } | |||
|
101 | ||||
|
102 | /// A buffer that holds new changelog index data that needs to be written | |||
|
103 | /// after the manifest and filelogs so that the repo is updated atomically to | |||
|
104 | /// external processes. | |||
|
105 | #[derive(Clone, Debug, Default)] | |||
|
106 | pub struct DelayedBuffer { | |||
|
107 | // The actual in-memory bytes storing the delayed writes | |||
|
108 | pub(super) buffer: Vec<u8>, | |||
|
109 | /// The current offset into the virtual file composed of file + buffer | |||
|
110 | offset: u64, | |||
|
111 | /// The size of the file at the time of opening | |||
|
112 | file_size: u64, | |||
|
113 | } | |||
|
114 | ||||
|
115 | impl DelayedBuffer { | |||
|
116 | /// Returns the length of the full data (on-disk + buffer length). | |||
|
117 | pub fn len(&self) -> u64 { | |||
|
118 | self.buffer.len() as u64 + self.file_size | |||
|
119 | } | |||
|
120 | ||||
|
121 | pub fn is_empty(&self) -> bool { | |||
|
122 | self.len() == 0 | |||
|
123 | } | |||
|
124 | } | |||
|
125 | ||||
|
126 | /// Holds an open [`File`] and the related data. This can be used for reading | |||
|
127 | /// and writing. Writes can be delayed to a buffer before touching the disk, | |||
|
128 | /// if relevant (in the changelog case), but reads are transparent. | |||
|
129 | pub struct FileHandle { | |||
|
130 | /// The actual open file | |||
|
131 | pub file: File, | |||
|
132 | /// The VFS with which the file was opened | |||
|
133 | vfs: Box<dyn Vfs>, | |||
|
134 | /// Filename of the open file, relative to the repo root | |||
|
135 | filename: PathBuf, | |||
|
136 | /// Buffer of delayed entry writes to the changelog index. This points | |||
|
137 | /// back to the buffer inside the revlog this handle refers to. | |||
|
138 | delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>, | |||
|
139 | } | |||
|
140 | ||||
|
141 | impl std::fmt::Debug for FileHandle { | |||
|
142 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |||
|
143 | f.debug_struct("FileHandle") | |||
|
144 | .field("filename", &self.filename) | |||
|
145 | .field("delayed_buffer", &self.delayed_buffer) | |||
|
146 | .field("file", &self.file) | |||
|
147 | .finish() | |||
|
148 | } | |||
|
149 | } | |||
|
150 | ||||
|
151 | impl Clone for FileHandle { | |||
|
152 | fn clone(&self) -> Self { | |||
|
153 | Self { | |||
|
154 | vfs: dyn_clone::clone_box(&*self.vfs), | |||
|
155 | filename: self.filename.clone(), | |||
|
156 | delayed_buffer: self.delayed_buffer.clone(), | |||
|
157 | // This can only fail if the OS doesn't have the file handle | |||
|
158 | // anymore, so we're not going to do anything useful anyway. | |||
|
159 | file: self.file.try_clone().expect("couldn't clone file handle"), | |||
|
160 | } | |||
|
161 | } | |||
|
162 | } | |||
|
163 | ||||
|
164 | impl FileHandle { | |||
|
165 | /// Get a (read or write) file handle to `filename`. Only creates the file | |||
|
166 | /// if `create` is `true`. | |||
|
167 | pub fn new( | |||
|
168 | vfs: Box<dyn Vfs>, | |||
|
169 | filename: impl AsRef<Path>, | |||
|
170 | create: bool, | |||
|
171 | write: bool, | |||
|
172 | ) -> Result<Self, HgError> { | |||
|
173 | let file = if create { | |||
|
174 | vfs.create(filename.as_ref())? | |||
|
175 | } else if write { | |||
|
176 | vfs.open(filename.as_ref())? | |||
|
177 | } else { | |||
|
178 | vfs.open_read(filename.as_ref())? | |||
|
179 | }; | |||
|
180 | Ok(Self { | |||
|
181 | vfs, | |||
|
182 | filename: filename.as_ref().to_owned(), | |||
|
183 | delayed_buffer: None, | |||
|
184 | file, | |||
|
185 | }) | |||
|
186 | } | |||
|
187 | ||||
|
188 | /// Get a file handle to `filename`, but writes go to a [`DelayedBuffer`]. | |||
|
189 | pub fn new_delayed( | |||
|
190 | vfs: Box<dyn Vfs>, | |||
|
191 | filename: impl AsRef<Path>, | |||
|
192 | create: bool, | |||
|
193 | delayed_buffer: Arc<Mutex<DelayedBuffer>>, | |||
|
194 | ) -> Result<Self, HgError> { | |||
|
195 | let mut file = if create { | |||
|
196 | vfs.create(filename.as_ref())? | |||
|
197 | } else { | |||
|
198 | vfs.open(filename.as_ref())? | |||
|
199 | }; | |||
|
200 | let size = vfs.file_size(&file)?; | |||
|
201 | let offset = file | |||
|
202 | .stream_position() | |||
|
203 | .when_reading_file(filename.as_ref())?; | |||
|
204 | ||||
|
205 | { | |||
|
206 | let mut buf = delayed_buffer.lock().unwrap(); | |||
|
207 | buf.file_size = size; | |||
|
208 | buf.offset = offset; | |||
|
209 | } | |||
|
210 | ||||
|
211 | Ok(Self { | |||
|
212 | vfs, | |||
|
213 | filename: filename.as_ref().to_owned(), | |||
|
214 | delayed_buffer: Some(delayed_buffer), | |||
|
215 | file, | |||
|
216 | }) | |||
|
217 | } | |||
|
218 | ||||
|
219 | /// Wrap an existing [`File`] | |||
|
220 | pub fn from_file( | |||
|
221 | file: File, | |||
|
222 | vfs: Box<dyn Vfs>, | |||
|
223 | filename: impl AsRef<Path>, | |||
|
224 | ) -> Self { | |||
|
225 | Self { | |||
|
226 | vfs, | |||
|
227 | filename: filename.as_ref().to_owned(), | |||
|
228 | delayed_buffer: None, | |||
|
229 | file, | |||
|
230 | } | |||
|
231 | } | |||
|
232 | ||||
|
233 | /// Wrap an existing [`File`], but writes go to a [`DelayedBuffer`]. | |||
|
234 | pub fn from_file_delayed( | |||
|
235 | mut file: File, | |||
|
236 | vfs: Box<dyn Vfs>, | |||
|
237 | filename: impl AsRef<Path>, | |||
|
238 | delayed_buffer: Arc<Mutex<DelayedBuffer>>, | |||
|
239 | ) -> Result<Self, HgError> { | |||
|
240 | let size = vfs.file_size(&file)?; | |||
|
241 | let offset = file | |||
|
242 | .stream_position() | |||
|
243 | .when_reading_file(filename.as_ref())?; | |||
|
244 | ||||
|
245 | { | |||
|
246 | let mut buf = delayed_buffer.lock().unwrap(); | |||
|
247 | buf.file_size = size; | |||
|
248 | buf.offset = offset; | |||
|
249 | } | |||
|
250 | ||||
|
251 | Ok(Self { | |||
|
252 | vfs, | |||
|
253 | filename: filename.as_ref().to_owned(), | |||
|
254 | delayed_buffer: Some(delayed_buffer), | |||
|
255 | file, | |||
|
256 | }) | |||
|
257 | } | |||
|
258 | ||||
|
259 | /// Move the position of the handle to `pos`, | |||
|
260 | /// spanning the [`DelayedBuffer`] if defined. Will return an error if | |||
|
261 | /// an invalid seek position is asked, or for any standard io error. | |||
|
262 | pub fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> { | |||
|
263 | if let Some(delay_buf) = &self.delayed_buffer { | |||
|
264 | let mut delay_buf = delay_buf.lock().unwrap(); | |||
|
265 | // Virtual file offset spans real file and data | |||
|
266 | match pos { | |||
|
267 | SeekFrom::Start(offset) => delay_buf.offset = offset, | |||
|
268 | SeekFrom::End(offset) => { | |||
|
269 | delay_buf.offset = | |||
|
270 | delay_buf.len().saturating_add_signed(offset) | |||
|
271 | } | |||
|
272 | SeekFrom::Current(offset) => { | |||
|
273 | delay_buf.offset = | |||
|
274 | delay_buf.offset.saturating_add_signed(offset); | |||
|
275 | } | |||
|
276 | } | |||
|
277 | if delay_buf.offset < delay_buf.file_size { | |||
|
278 | self.file.seek(pos) | |||
|
279 | } else { | |||
|
280 | Ok(delay_buf.offset) | |||
|
281 | } | |||
|
282 | } else { | |||
|
283 | self.file.seek(pos) | |||
|
284 | } | |||
|
285 | } | |||
|
286 | ||||
|
287 | /// Read exactly `length` bytes from the current position. | |||
|
288 | /// Errors are the same as [`std::io::Read::read_exact`]. | |||
|
289 | pub fn read_exact( | |||
|
290 | &mut self, | |||
|
291 | length: usize, | |||
|
292 | ) -> Result<Vec<u8>, std::io::Error> { | |||
|
293 | if let Some(delay_buf) = self.delayed_buffer.as_mut() { | |||
|
294 | let mut delay_buf = delay_buf.lock().unwrap(); | |||
|
295 | let mut buf = vec![0; length]; | |||
|
296 | let offset: isize = | |||
|
297 | delay_buf.offset.try_into().expect("buffer too large"); | |||
|
298 | let file_size: isize = | |||
|
299 | delay_buf.file_size.try_into().expect("file too large"); | |||
|
300 | let span: isize = offset - file_size; | |||
|
301 | let length = length.try_into().expect("too large of a length"); | |||
|
302 | let absolute_span: u64 = | |||
|
303 | span.unsigned_abs().try_into().expect("length too large"); | |||
|
304 | if span < 0 { | |||
|
305 | if length <= absolute_span { | |||
|
306 | // We're only in the file | |||
|
307 | self.file.read_exact(&mut buf)?; | |||
|
308 | } else { | |||
|
309 | // We're spanning file and buffer | |||
|
310 | self.file | |||
|
311 | .read_exact(&mut buf[..absolute_span as usize])?; | |||
|
312 | delay_buf | |||
|
313 | .buffer | |||
|
314 | .take(length - absolute_span) | |||
|
315 | .read_exact(&mut buf[absolute_span as usize..])?; | |||
|
316 | } | |||
|
317 | } else { | |||
|
318 | // We're only in the buffer | |||
|
319 | delay_buf.buffer[absolute_span as usize..] | |||
|
320 | .take(length) | |||
|
321 | .read_exact(&mut buf)?; | |||
|
322 | } | |||
|
323 | delay_buf.offset += length; | |||
|
324 | Ok(buf.to_owned()) | |||
|
325 | } else { | |||
|
326 | let mut buf = vec![0; length]; | |||
|
327 | self.file.read_exact(&mut buf)?; | |||
|
328 | Ok(buf) | |||
|
329 | } | |||
|
330 | } | |||
|
331 | ||||
|
332 | /// Flush the in-memory changes to disk. This does *not* write the | |||
|
333 | /// delayed buffer, only the pending file changes. | |||
|
334 | pub fn flush(&mut self) -> Result<(), HgError> { | |||
|
335 | self.file.flush().when_writing_file(&self.filename) | |||
|
336 | } | |||
|
337 | ||||
|
338 | /// Return the current position in the file | |||
|
339 | pub fn position(&mut self) -> Result<u64, HgError> { | |||
|
340 | self.file | |||
|
341 | .stream_position() | |||
|
342 | .when_reading_file(&self.filename) | |||
|
343 | } | |||
|
344 | ||||
|
345 | /// Append `data` to the file, or to the [`DelayedBuffer`], if any. | |||
|
346 | pub fn write_all(&mut self, data: &[u8]) -> Result<(), HgError> { | |||
|
347 | if let Some(buf) = &mut self.delayed_buffer { | |||
|
348 | let mut delayed_buffer = buf.lock().expect("propagate the panic"); | |||
|
349 | assert_eq!(delayed_buffer.offset, delayed_buffer.len()); | |||
|
350 | delayed_buffer.buffer.extend_from_slice(data); | |||
|
351 | delayed_buffer.offset += data.len() as u64; | |||
|
352 | Ok(()) | |||
|
353 | } else { | |||
|
354 | self.file | |||
|
355 | .write_all(data) | |||
|
356 | .when_writing_file(&self.filename)?; | |||
|
357 | Ok(()) | |||
|
358 | } | |||
|
359 | } | |||
|
360 | } | |||
|
361 | ||||
|
362 | /// Write handles to a given revlog (index + maybe data) | |||
|
363 | #[derive(Debug)] | |||
|
364 | pub struct WriteHandles { | |||
|
365 | /// Handle to the index file | |||
|
366 | pub index_handle: FileHandle, | |||
|
367 | /// Handle to the data file, if the revlog is non-inline | |||
|
368 | pub data_handle: Option<FileHandle>, | |||
|
369 | } | |||
|
370 | ||||
|
371 | #[cfg(test)] | |||
|
372 | mod tests { | |||
|
373 | use std::io::ErrorKind; | |||
|
374 | ||||
|
375 | use crate::vfs::VfsImpl; | |||
|
376 | ||||
|
377 | use super::*; | |||
|
378 | ||||
|
379 | #[test] | |||
|
380 | fn test_random_access_file() { | |||
|
381 | let base = tempfile::tempdir().unwrap().into_path(); | |||
|
382 | let filename = Path::new("a"); | |||
|
383 | let file_path = base.join(filename); | |||
|
384 | let raf = RandomAccessFile::new( | |||
|
385 | Box::new(VfsImpl { base }), | |||
|
386 | filename.to_owned(), | |||
|
387 | ); | |||
|
388 | ||||
|
389 | assert!(!raf.is_open()); | |||
|
390 | assert_eq!(&raf.filename, &filename); | |||
|
391 | // Should fail to read a non-existing file | |||
|
392 | match raf.get_read_handle().unwrap_err() { | |||
|
393 | HgError::IoError { error, .. } => match error.kind() { | |||
|
394 | std::io::ErrorKind::NotFound => {} | |||
|
395 | _ => panic!("should be not found"), | |||
|
396 | }, | |||
|
397 | e => panic!("{}", e.to_string()), | |||
|
398 | } | |||
|
399 | ||||
|
400 | std::fs::write(file_path, b"1234567890").unwrap(); | |||
|
401 | ||||
|
402 | // Should be able to open an existing file | |||
|
403 | let mut handle = raf.get_read_handle().unwrap(); | |||
|
404 | assert!(raf.is_open()); | |||
|
405 | assert_eq!(handle.read_exact(10).unwrap(), b"1234567890".to_vec()); | |||
|
406 | } | |||
|
407 | ||||
|
408 | #[test] | |||
|
409 | fn test_file_handle() { | |||
|
410 | let base = tempfile::tempdir().unwrap().into_path(); | |||
|
411 | let filename = base.join("a"); | |||
|
412 | // No `create` should fail | |||
|
413 | FileHandle::new( | |||
|
414 | Box::new(VfsImpl { base: base.clone() }), | |||
|
415 | &filename, | |||
|
416 | false, | |||
|
417 | false, | |||
|
418 | ) | |||
|
419 | .unwrap_err(); | |||
|
420 | std::fs::write(&filename, b"1234567890").unwrap(); | |||
|
421 | ||||
|
422 | let mut read_handle = FileHandle::new( | |||
|
423 | Box::new(VfsImpl { base: base.clone() }), | |||
|
424 | &filename, | |||
|
425 | false, | |||
|
426 | false, | |||
|
427 | ) | |||
|
428 | .unwrap(); | |||
|
429 | assert_eq!(&read_handle.filename, &filename); | |||
|
430 | assert_eq!(read_handle.position().unwrap(), 0); | |||
|
431 | ||||
|
432 | // Writing to an explicit read handle should fail | |||
|
433 | read_handle.write_all(b"some data").unwrap_err(); | |||
|
434 | ||||
|
435 | // reading exactly n bytes should work | |||
|
436 | assert_eq!(read_handle.read_exact(3).unwrap(), b"123".to_vec()); | |||
|
437 | // and the position should be remembered | |||
|
438 | assert_eq!(read_handle.read_exact(2).unwrap(), b"45".to_vec()); | |||
|
439 | ||||
|
440 | // Seeking should work | |||
|
441 | let position = read_handle.position().unwrap(); | |||
|
442 | read_handle.seek(SeekFrom::Current(-2)).unwrap(); | |||
|
443 | assert_eq!(position - 2, read_handle.position().unwrap()); | |||
|
444 | ||||
|
445 | // Seeking too much data should fail | |||
|
446 | read_handle.read_exact(1000).unwrap_err(); | |||
|
447 | ||||
|
448 | // Work around the yet unimplemented VFS for write | |||
|
449 | let mut options = std::fs::OpenOptions::new(); | |||
|
450 | options.read(true); | |||
|
451 | options.write(true); | |||
|
452 | let file = options.open(&filename).unwrap(); | |||
|
453 | // Open a write handle | |||
|
454 | let mut handle = FileHandle::from_file( | |||
|
455 | file, | |||
|
456 | Box::new(VfsImpl { base: base.clone() }), | |||
|
457 | &filename, | |||
|
458 | ); | |||
|
459 | ||||
|
460 | // Now writing should succeed | |||
|
461 | handle.write_all(b"new data").unwrap(); | |||
|
462 | // Opening or writing does not seek, so we should be at the start | |||
|
463 | assert_eq!(handle.position().unwrap(), 8); | |||
|
464 | // We can still read | |||
|
465 | assert_eq!(handle.read_exact(2).unwrap(), b"90".to_vec()); | |||
|
466 | // Flushing doesn't do anything unexpected | |||
|
467 | handle.flush().unwrap(); | |||
|
468 | ||||
|
469 | let delayed_buffer = Arc::new(Mutex::new(DelayedBuffer::default())); | |||
|
470 | let file = options.open(&filename).unwrap(); | |||
|
471 | let mut handle = FileHandle::from_file_delayed( | |||
|
472 | file, | |||
|
473 | Box::new(VfsImpl { base: base.clone() }), | |||
|
474 | &filename, | |||
|
475 | delayed_buffer, | |||
|
476 | ) | |||
|
477 | .unwrap(); | |||
|
478 | ||||
|
479 | assert_eq!( | |||
|
480 | handle | |||
|
481 | .delayed_buffer | |||
|
482 | .as_ref() | |||
|
483 | .unwrap() | |||
|
484 | .lock() | |||
|
485 | .unwrap() | |||
|
486 | .file_size, | |||
|
487 | 10 | |||
|
488 | ); | |||
|
489 | handle.seek(SeekFrom::End(0)).unwrap(); | |||
|
490 | handle.write_all(b"should go to buffer").unwrap(); | |||
|
491 | assert_eq!( | |||
|
492 | handle | |||
|
493 | .delayed_buffer | |||
|
494 | .as_ref() | |||
|
495 | .unwrap() | |||
|
496 | .lock() | |||
|
497 | .unwrap() | |||
|
498 | .len(), | |||
|
499 | 29 | |||
|
500 | ); | |||
|
501 | read_handle.seek(SeekFrom::Start(0)).unwrap(); | |||
|
502 | // On-disk file contents should be unchanged | |||
|
503 | assert_eq!( | |||
|
504 | read_handle.read_exact(10).unwrap(), | |||
|
505 | b"new data90".to_vec(), | |||
|
506 | ); | |||
|
507 | ||||
|
508 | assert_eq!( | |||
|
509 | read_handle.read_exact(1).unwrap_err().kind(), | |||
|
510 | ErrorKind::UnexpectedEof | |||
|
511 | ); | |||
|
512 | ||||
|
513 | handle.flush().unwrap(); | |||
|
514 | // On-disk file contents should still be unchanged after a flush | |||
|
515 | assert_eq!( | |||
|
516 | read_handle.read_exact(1).unwrap_err().kind(), | |||
|
517 | ErrorKind::UnexpectedEof | |||
|
518 | ); | |||
|
519 | ||||
|
520 | // Read from the buffer only | |||
|
521 | handle.seek(SeekFrom::End(-1)).unwrap(); | |||
|
522 | assert_eq!(handle.read_exact(1).unwrap(), b"r".to_vec()); | |||
|
523 | ||||
|
524 | // Read from an overlapping section of file and buffer | |||
|
525 | handle.seek(SeekFrom::Start(6)).unwrap(); | |||
|
526 | assert_eq!( | |||
|
527 | handle.read_exact(20).unwrap(), | |||
|
528 | b"ta90should go to buf".to_vec() | |||
|
529 | ); | |||
|
530 | ||||
|
531 | // Read from file only | |||
|
532 | handle.seek(SeekFrom::Start(0)).unwrap(); | |||
|
533 | assert_eq!(handle.read_exact(8).unwrap(), b"new data".to_vec()); | |||
|
534 | } | |||
|
535 | } |
@@ -1,1331 +1,1332 | |||||
1 | // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net> |
|
1 | // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net> | |
2 | // and Mercurial contributors |
|
2 | // and Mercurial contributors | |
3 | // |
|
3 | // | |
4 | // This software may be used and distributed according to the terms of the |
|
4 | // This software may be used and distributed according to the terms of the | |
5 | // GNU General Public License version 2 or any later version. |
|
5 | // GNU General Public License version 2 or any later version. | |
6 | //! Mercurial concepts for handling revision history |
|
6 | //! Mercurial concepts for handling revision history | |
7 |
|
7 | |||
8 | pub mod node; |
|
8 | pub mod node; | |
9 | pub mod nodemap; |
|
9 | pub mod nodemap; | |
10 | mod nodemap_docket; |
|
10 | mod nodemap_docket; | |
11 | pub mod path_encode; |
|
11 | pub mod path_encode; | |
12 | use compression::{uncompressed_zstd_data, CompressionConfig}; |
|
12 | use compression::{uncompressed_zstd_data, CompressionConfig}; | |
13 | pub use node::{FromHexError, Node, NodePrefix}; |
|
13 | pub use node::{FromHexError, Node, NodePrefix}; | |
14 | pub mod changelog; |
|
14 | pub mod changelog; | |
15 | pub mod compression; |
|
15 | pub mod compression; | |
|
16 | pub mod file_io; | |||
16 | pub mod filelog; |
|
17 | pub mod filelog; | |
17 | pub mod index; |
|
18 | pub mod index; | |
18 | pub mod manifest; |
|
19 | pub mod manifest; | |
19 | pub mod patch; |
|
20 | pub mod patch; | |
20 |
|
21 | |||
21 | use std::borrow::Cow; |
|
22 | use std::borrow::Cow; | |
22 | use std::collections::HashSet; |
|
23 | use std::collections::HashSet; | |
23 | use std::io::Read; |
|
24 | use std::io::Read; | |
24 | use std::ops::Deref; |
|
25 | use std::ops::Deref; | |
25 | use std::path::Path; |
|
26 | use std::path::Path; | |
26 |
|
27 | |||
27 | use flate2::read::ZlibDecoder; |
|
28 | use flate2::read::ZlibDecoder; | |
28 | use sha1::{Digest, Sha1}; |
|
29 | use sha1::{Digest, Sha1}; | |
29 |
|
30 | |||
30 | use self::node::{NODE_BYTES_LENGTH, NULL_NODE}; |
|
31 | use self::node::{NODE_BYTES_LENGTH, NULL_NODE}; | |
31 | use self::nodemap_docket::NodeMapDocket; |
|
32 | use self::nodemap_docket::NodeMapDocket; | |
32 | use super::index::Index; |
|
33 | use super::index::Index; | |
33 | use super::index::INDEX_ENTRY_SIZE; |
|
34 | use super::index::INDEX_ENTRY_SIZE; | |
34 | use super::nodemap::{NodeMap, NodeMapError}; |
|
35 | use super::nodemap::{NodeMap, NodeMapError}; | |
35 | use crate::config::{Config, ResourceProfileValue}; |
|
36 | use crate::config::{Config, ResourceProfileValue}; | |
36 | use crate::errors::HgError; |
|
37 | use crate::errors::HgError; | |
37 | use crate::exit_codes; |
|
38 | use crate::exit_codes; | |
38 | use crate::requirements::{ |
|
39 | use crate::requirements::{ | |
39 | GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT, |
|
40 | GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT, | |
40 | }; |
|
41 | }; | |
41 | use crate::vfs::VfsImpl; |
|
42 | use crate::vfs::VfsImpl; | |
42 |
|
43 | |||
43 | /// As noted in revlog.c, revision numbers are actually encoded in |
|
44 | /// As noted in revlog.c, revision numbers are actually encoded in | |
44 | /// 4 bytes, and are liberally converted to ints, whence the i32 |
|
45 | /// 4 bytes, and are liberally converted to ints, whence the i32 | |
45 | pub type BaseRevision = i32; |
|
46 | pub type BaseRevision = i32; | |
46 |
|
47 | |||
47 | /// Mercurial revision numbers |
|
48 | /// Mercurial revision numbers | |
48 | /// In contrast to the more general [`UncheckedRevision`], these are "checked" |
|
49 | /// In contrast to the more general [`UncheckedRevision`], these are "checked" | |
49 | /// in the sense that they should only be used for revisions that are |
|
50 | /// in the sense that they should only be used for revisions that are | |
50 | /// valid for a given index (i.e. in bounds). |
|
51 | /// valid for a given index (i.e. in bounds). | |
51 | #[derive( |
|
52 | #[derive( | |
52 | Debug, |
|
53 | Debug, | |
53 | derive_more::Display, |
|
54 | derive_more::Display, | |
54 | Clone, |
|
55 | Clone, | |
55 | Copy, |
|
56 | Copy, | |
56 | Hash, |
|
57 | Hash, | |
57 | PartialEq, |
|
58 | PartialEq, | |
58 | Eq, |
|
59 | Eq, | |
59 | PartialOrd, |
|
60 | PartialOrd, | |
60 | Ord, |
|
61 | Ord, | |
61 | )] |
|
62 | )] | |
62 | pub struct Revision(pub BaseRevision); |
|
63 | pub struct Revision(pub BaseRevision); | |
63 |
|
64 | |||
64 | impl format_bytes::DisplayBytes for Revision { |
|
65 | impl format_bytes::DisplayBytes for Revision { | |
65 | fn display_bytes( |
|
66 | fn display_bytes( | |
66 | &self, |
|
67 | &self, | |
67 | output: &mut dyn std::io::Write, |
|
68 | output: &mut dyn std::io::Write, | |
68 | ) -> std::io::Result<()> { |
|
69 | ) -> std::io::Result<()> { | |
69 | self.0.display_bytes(output) |
|
70 | self.0.display_bytes(output) | |
70 | } |
|
71 | } | |
71 | } |
|
72 | } | |
72 |
|
73 | |||
73 | /// Unchecked Mercurial revision numbers. |
|
74 | /// Unchecked Mercurial revision numbers. | |
74 | /// |
|
75 | /// | |
75 | /// Values of this type have no guarantee of being a valid revision number |
|
76 | /// Values of this type have no guarantee of being a valid revision number | |
76 | /// in any context. Use method `check_revision` to get a valid revision within |
|
77 | /// in any context. Use method `check_revision` to get a valid revision within | |
77 | /// the appropriate index object. |
|
78 | /// the appropriate index object. | |
78 | #[derive( |
|
79 | #[derive( | |
79 | Debug, |
|
80 | Debug, | |
80 | derive_more::Display, |
|
81 | derive_more::Display, | |
81 | Clone, |
|
82 | Clone, | |
82 | Copy, |
|
83 | Copy, | |
83 | Hash, |
|
84 | Hash, | |
84 | PartialEq, |
|
85 | PartialEq, | |
85 | Eq, |
|
86 | Eq, | |
86 | PartialOrd, |
|
87 | PartialOrd, | |
87 | Ord, |
|
88 | Ord, | |
88 | )] |
|
89 | )] | |
89 | pub struct UncheckedRevision(pub BaseRevision); |
|
90 | pub struct UncheckedRevision(pub BaseRevision); | |
90 |
|
91 | |||
91 | impl format_bytes::DisplayBytes for UncheckedRevision { |
|
92 | impl format_bytes::DisplayBytes for UncheckedRevision { | |
92 | fn display_bytes( |
|
93 | fn display_bytes( | |
93 | &self, |
|
94 | &self, | |
94 | output: &mut dyn std::io::Write, |
|
95 | output: &mut dyn std::io::Write, | |
95 | ) -> std::io::Result<()> { |
|
96 | ) -> std::io::Result<()> { | |
96 | self.0.display_bytes(output) |
|
97 | self.0.display_bytes(output) | |
97 | } |
|
98 | } | |
98 | } |
|
99 | } | |
99 |
|
100 | |||
100 | impl From<Revision> for UncheckedRevision { |
|
101 | impl From<Revision> for UncheckedRevision { | |
101 | fn from(value: Revision) -> Self { |
|
102 | fn from(value: Revision) -> Self { | |
102 | Self(value.0) |
|
103 | Self(value.0) | |
103 | } |
|
104 | } | |
104 | } |
|
105 | } | |
105 |
|
106 | |||
106 | impl From<BaseRevision> for UncheckedRevision { |
|
107 | impl From<BaseRevision> for UncheckedRevision { | |
107 | fn from(value: BaseRevision) -> Self { |
|
108 | fn from(value: BaseRevision) -> Self { | |
108 | Self(value) |
|
109 | Self(value) | |
109 | } |
|
110 | } | |
110 | } |
|
111 | } | |
111 |
|
112 | |||
112 | /// Marker expressing the absence of a parent |
|
113 | /// Marker expressing the absence of a parent | |
113 | /// |
|
114 | /// | |
114 | /// Independently of the actual representation, `NULL_REVISION` is guaranteed |
|
115 | /// Independently of the actual representation, `NULL_REVISION` is guaranteed | |
115 | /// to be smaller than all existing revisions. |
|
116 | /// to be smaller than all existing revisions. | |
116 | pub const NULL_REVISION: Revision = Revision(-1); |
|
117 | pub const NULL_REVISION: Revision = Revision(-1); | |
117 |
|
118 | |||
118 | /// Same as `mercurial.node.wdirrev` |
|
119 | /// Same as `mercurial.node.wdirrev` | |
119 | /// |
|
120 | /// | |
120 | /// This is also equal to `i32::max_value()`, but it's better to spell |
|
121 | /// This is also equal to `i32::max_value()`, but it's better to spell | |
121 | /// it out explicitely, same as in `mercurial.node` |
|
122 | /// it out explicitely, same as in `mercurial.node` | |
122 | #[allow(clippy::unreadable_literal)] |
|
123 | #[allow(clippy::unreadable_literal)] | |
123 | pub const WORKING_DIRECTORY_REVISION: UncheckedRevision = |
|
124 | pub const WORKING_DIRECTORY_REVISION: UncheckedRevision = | |
124 | UncheckedRevision(0x7fffffff); |
|
125 | UncheckedRevision(0x7fffffff); | |
125 |
|
126 | |||
126 | pub const WORKING_DIRECTORY_HEX: &str = |
|
127 | pub const WORKING_DIRECTORY_HEX: &str = | |
127 | "ffffffffffffffffffffffffffffffffffffffff"; |
|
128 | "ffffffffffffffffffffffffffffffffffffffff"; | |
128 |
|
129 | |||
129 | /// The simplest expression of what we need of Mercurial DAGs. |
|
130 | /// The simplest expression of what we need of Mercurial DAGs. | |
130 | pub trait Graph { |
|
131 | pub trait Graph { | |
131 | /// Return the two parents of the given `Revision`. |
|
132 | /// Return the two parents of the given `Revision`. | |
132 | /// |
|
133 | /// | |
133 | /// Each of the parents can be independently `NULL_REVISION` |
|
134 | /// Each of the parents can be independently `NULL_REVISION` | |
134 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>; |
|
135 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>; | |
135 | } |
|
136 | } | |
136 |
|
137 | |||
137 | #[derive(Clone, Debug, PartialEq)] |
|
138 | #[derive(Clone, Debug, PartialEq)] | |
138 | pub enum GraphError { |
|
139 | pub enum GraphError { | |
139 | ParentOutOfRange(Revision), |
|
140 | ParentOutOfRange(Revision), | |
140 | } |
|
141 | } | |
141 |
|
142 | |||
142 | impl std::fmt::Display for GraphError { |
|
143 | impl std::fmt::Display for GraphError { | |
143 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
|
144 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
144 | match self { |
|
145 | match self { | |
145 | GraphError::ParentOutOfRange(revision) => { |
|
146 | GraphError::ParentOutOfRange(revision) => { | |
146 | write!(f, "parent out of range ({})", revision) |
|
147 | write!(f, "parent out of range ({})", revision) | |
147 | } |
|
148 | } | |
148 | } |
|
149 | } | |
149 | } |
|
150 | } | |
150 | } |
|
151 | } | |
151 |
|
152 | |||
152 | impl<T: Graph> Graph for &T { |
|
153 | impl<T: Graph> Graph for &T { | |
153 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { |
|
154 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { | |
154 | (*self).parents(rev) |
|
155 | (*self).parents(rev) | |
155 | } |
|
156 | } | |
156 | } |
|
157 | } | |
157 |
|
158 | |||
158 | /// The Mercurial Revlog Index |
|
159 | /// The Mercurial Revlog Index | |
159 | /// |
|
160 | /// | |
160 | /// This is currently limited to the minimal interface that is needed for |
|
161 | /// This is currently limited to the minimal interface that is needed for | |
161 | /// the [`nodemap`](nodemap/index.html) module |
|
162 | /// the [`nodemap`](nodemap/index.html) module | |
162 | pub trait RevlogIndex { |
|
163 | pub trait RevlogIndex { | |
163 | /// Total number of Revisions referenced in this index |
|
164 | /// Total number of Revisions referenced in this index | |
164 | fn len(&self) -> usize; |
|
165 | fn len(&self) -> usize; | |
165 |
|
166 | |||
166 | fn is_empty(&self) -> bool { |
|
167 | fn is_empty(&self) -> bool { | |
167 | self.len() == 0 |
|
168 | self.len() == 0 | |
168 | } |
|
169 | } | |
169 |
|
170 | |||
170 | /// Return a reference to the Node or `None` for `NULL_REVISION` |
|
171 | /// Return a reference to the Node or `None` for `NULL_REVISION` | |
171 | fn node(&self, rev: Revision) -> Option<&Node>; |
|
172 | fn node(&self, rev: Revision) -> Option<&Node>; | |
172 |
|
173 | |||
173 | /// Return a [`Revision`] if `rev` is a valid revision number for this |
|
174 | /// Return a [`Revision`] if `rev` is a valid revision number for this | |
174 | /// index. |
|
175 | /// index. | |
175 | /// |
|
176 | /// | |
176 | /// [`NULL_REVISION`] is considered to be valid. |
|
177 | /// [`NULL_REVISION`] is considered to be valid. | |
177 | #[inline(always)] |
|
178 | #[inline(always)] | |
178 | fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { |
|
179 | fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { | |
179 | let rev = rev.0; |
|
180 | let rev = rev.0; | |
180 |
|
181 | |||
181 | if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len()) |
|
182 | if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len()) | |
182 | { |
|
183 | { | |
183 | Some(Revision(rev)) |
|
184 | Some(Revision(rev)) | |
184 | } else { |
|
185 | } else { | |
185 | None |
|
186 | None | |
186 | } |
|
187 | } | |
187 | } |
|
188 | } | |
188 | } |
|
189 | } | |
189 |
|
190 | |||
190 | const REVISION_FLAG_CENSORED: u16 = 1 << 15; |
|
191 | const REVISION_FLAG_CENSORED: u16 = 1 << 15; | |
191 | const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14; |
|
192 | const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14; | |
192 | const REVISION_FLAG_EXTSTORED: u16 = 1 << 13; |
|
193 | const REVISION_FLAG_EXTSTORED: u16 = 1 << 13; | |
193 | const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12; |
|
194 | const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12; | |
194 |
|
195 | |||
195 | // Keep this in sync with REVIDX_KNOWN_FLAGS in |
|
196 | // Keep this in sync with REVIDX_KNOWN_FLAGS in | |
196 | // mercurial/revlogutils/flagutil.py |
|
197 | // mercurial/revlogutils/flagutil.py | |
197 | const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED |
|
198 | const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED | |
198 | | REVISION_FLAG_ELLIPSIS |
|
199 | | REVISION_FLAG_ELLIPSIS | |
199 | | REVISION_FLAG_EXTSTORED |
|
200 | | REVISION_FLAG_EXTSTORED | |
200 | | REVISION_FLAG_HASCOPIESINFO; |
|
201 | | REVISION_FLAG_HASCOPIESINFO; | |
201 |
|
202 | |||
202 | const NULL_REVLOG_ENTRY_FLAGS: u16 = 0; |
|
203 | const NULL_REVLOG_ENTRY_FLAGS: u16 = 0; | |
203 |
|
204 | |||
204 | #[derive(Debug, derive_more::From, derive_more::Display)] |
|
205 | #[derive(Debug, derive_more::From, derive_more::Display)] | |
205 | pub enum RevlogError { |
|
206 | pub enum RevlogError { | |
206 | #[display(fmt = "invalid revision identifier: {}", "_0")] |
|
207 | #[display(fmt = "invalid revision identifier: {}", "_0")] | |
207 | InvalidRevision(String), |
|
208 | InvalidRevision(String), | |
208 | /// Working directory is not supported |
|
209 | /// Working directory is not supported | |
209 | WDirUnsupported, |
|
210 | WDirUnsupported, | |
210 | /// Found more than one entry whose ID match the requested prefix |
|
211 | /// Found more than one entry whose ID match the requested prefix | |
211 | AmbiguousPrefix, |
|
212 | AmbiguousPrefix, | |
212 | #[from] |
|
213 | #[from] | |
213 | Other(HgError), |
|
214 | Other(HgError), | |
214 | } |
|
215 | } | |
215 |
|
216 | |||
216 | impl From<NodeMapError> for RevlogError { |
|
217 | impl From<NodeMapError> for RevlogError { | |
217 | fn from(error: NodeMapError) -> Self { |
|
218 | fn from(error: NodeMapError) -> Self { | |
218 | match error { |
|
219 | match error { | |
219 | NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix, |
|
220 | NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix, | |
220 | NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted( |
|
221 | NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted( | |
221 | format!("nodemap point to revision {} not in index", rev), |
|
222 | format!("nodemap point to revision {} not in index", rev), | |
222 | ), |
|
223 | ), | |
223 | } |
|
224 | } | |
224 | } |
|
225 | } | |
225 | } |
|
226 | } | |
226 |
|
227 | |||
227 | fn corrupted<S: AsRef<str>>(context: S) -> HgError { |
|
228 | fn corrupted<S: AsRef<str>>(context: S) -> HgError { | |
228 | HgError::corrupted(format!("corrupted revlog, {}", context.as_ref())) |
|
229 | HgError::corrupted(format!("corrupted revlog, {}", context.as_ref())) | |
229 | } |
|
230 | } | |
230 |
|
231 | |||
231 | impl RevlogError { |
|
232 | impl RevlogError { | |
232 | fn corrupted<S: AsRef<str>>(context: S) -> Self { |
|
233 | fn corrupted<S: AsRef<str>>(context: S) -> Self { | |
233 | RevlogError::Other(corrupted(context)) |
|
234 | RevlogError::Other(corrupted(context)) | |
234 | } |
|
235 | } | |
235 | } |
|
236 | } | |
236 |
|
237 | |||
237 | #[derive(derive_more::Display, Debug, Copy, Clone, PartialEq, Eq)] |
|
238 | #[derive(derive_more::Display, Debug, Copy, Clone, PartialEq, Eq)] | |
238 | pub enum RevlogType { |
|
239 | pub enum RevlogType { | |
239 | Changelog, |
|
240 | Changelog, | |
240 | Manifestlog, |
|
241 | Manifestlog, | |
241 | Filelog, |
|
242 | Filelog, | |
242 | } |
|
243 | } | |
243 |
|
244 | |||
244 | impl TryFrom<usize> for RevlogType { |
|
245 | impl TryFrom<usize> for RevlogType { | |
245 | type Error = HgError; |
|
246 | type Error = HgError; | |
246 |
|
247 | |||
247 | fn try_from(value: usize) -> Result<Self, Self::Error> { |
|
248 | fn try_from(value: usize) -> Result<Self, Self::Error> { | |
248 | match value { |
|
249 | match value { | |
249 | 1001 => Ok(Self::Changelog), |
|
250 | 1001 => Ok(Self::Changelog), | |
250 | 1002 => Ok(Self::Manifestlog), |
|
251 | 1002 => Ok(Self::Manifestlog), | |
251 | 1003 => Ok(Self::Filelog), |
|
252 | 1003 => Ok(Self::Filelog), | |
252 | t => Err(HgError::abort( |
|
253 | t => Err(HgError::abort( | |
253 | format!("Unknown revlog type {}", t), |
|
254 | format!("Unknown revlog type {}", t), | |
254 | exit_codes::ABORT, |
|
255 | exit_codes::ABORT, | |
255 | None, |
|
256 | None, | |
256 | )), |
|
257 | )), | |
257 | } |
|
258 | } | |
258 | } |
|
259 | } | |
259 | } |
|
260 | } | |
260 |
|
261 | |||
261 | #[derive(Debug, Clone, Copy, PartialEq)] |
|
262 | #[derive(Debug, Clone, Copy, PartialEq)] | |
262 | /// Holds configuration values about how the revlog data is read |
|
263 | /// Holds configuration values about how the revlog data is read | |
263 | pub struct RevlogDataConfig { |
|
264 | pub struct RevlogDataConfig { | |
264 | /// Should we try to open the "pending" version of the revlog |
|
265 | /// Should we try to open the "pending" version of the revlog | |
265 | pub try_pending: bool, |
|
266 | pub try_pending: bool, | |
266 | /// Should we try to open the "split" version of the revlog |
|
267 | /// Should we try to open the "split" version of the revlog | |
267 | pub try_split: bool, |
|
268 | pub try_split: bool, | |
268 | /// When True, `indexfile` should be opened with `checkambig=True` at |
|
269 | /// When True, `indexfile` should be opened with `checkambig=True` at | |
269 | /// writing time, to avoid file stat ambiguity |
|
270 | /// writing time, to avoid file stat ambiguity | |
270 | pub check_ambig: bool, |
|
271 | pub check_ambig: bool, | |
271 | /// If true, use mmap instead of reading to deal with large indexes |
|
272 | /// If true, use mmap instead of reading to deal with large indexes | |
272 | pub mmap_large_index: bool, |
|
273 | pub mmap_large_index: bool, | |
273 | /// How much data is considered large |
|
274 | /// How much data is considered large | |
274 | pub mmap_index_threshold: Option<u64>, |
|
275 | pub mmap_index_threshold: Option<u64>, | |
275 | /// How much data to read and cache into the raw revlog data cache |
|
276 | /// How much data to read and cache into the raw revlog data cache | |
276 | pub chunk_cache_size: u64, |
|
277 | pub chunk_cache_size: u64, | |
277 | /// The size of the uncompressed cache compared to the largest revision |
|
278 | /// The size of the uncompressed cache compared to the largest revision | |
278 | /// seen |
|
279 | /// seen | |
279 | pub uncompressed_cache_factor: Option<f64>, |
|
280 | pub uncompressed_cache_factor: Option<f64>, | |
280 | /// The number of chunks cached |
|
281 | /// The number of chunks cached | |
281 | pub uncompressed_cache_count: Option<u64>, |
|
282 | pub uncompressed_cache_count: Option<u64>, | |
282 | /// Allow sparse reading of the revlog data |
|
283 | /// Allow sparse reading of the revlog data | |
283 | pub with_sparse_read: bool, |
|
284 | pub with_sparse_read: bool, | |
284 | /// Minimal density of a sparse read chunk |
|
285 | /// Minimal density of a sparse read chunk | |
285 | pub sr_density_threshold: f64, |
|
286 | pub sr_density_threshold: f64, | |
286 | /// Minimal size of the data we skip when performing sparse reads |
|
287 | /// Minimal size of the data we skip when performing sparse reads | |
287 | pub sr_min_gap_size: u64, |
|
288 | pub sr_min_gap_size: u64, | |
288 | /// Whether deltas are encoded against arbitrary bases |
|
289 | /// Whether deltas are encoded against arbitrary bases | |
289 | pub general_delta: bool, |
|
290 | pub general_delta: bool, | |
290 | } |
|
291 | } | |
291 |
|
292 | |||
292 | impl RevlogDataConfig { |
|
293 | impl RevlogDataConfig { | |
293 | pub fn new( |
|
294 | pub fn new( | |
294 | config: &Config, |
|
295 | config: &Config, | |
295 | requirements: &HashSet<String>, |
|
296 | requirements: &HashSet<String>, | |
296 | ) -> Result<Self, HgError> { |
|
297 | ) -> Result<Self, HgError> { | |
297 | let mut data_config = Self::default(); |
|
298 | let mut data_config = Self::default(); | |
298 | if let Some(chunk_cache_size) = |
|
299 | if let Some(chunk_cache_size) = | |
299 | config.get_byte_size(b"format", b"chunkcachesize")? |
|
300 | config.get_byte_size(b"format", b"chunkcachesize")? | |
300 | { |
|
301 | { | |
301 | data_config.chunk_cache_size = chunk_cache_size; |
|
302 | data_config.chunk_cache_size = chunk_cache_size; | |
302 | } |
|
303 | } | |
303 |
|
304 | |||
304 | let memory_profile = config.get_resource_profile(Some("memory")); |
|
305 | let memory_profile = config.get_resource_profile(Some("memory")); | |
305 | if memory_profile.value >= ResourceProfileValue::Medium { |
|
306 | if memory_profile.value >= ResourceProfileValue::Medium { | |
306 | data_config.uncompressed_cache_count = Some(10_000); |
|
307 | data_config.uncompressed_cache_count = Some(10_000); | |
307 | data_config.uncompressed_cache_factor = Some(4.0); |
|
308 | data_config.uncompressed_cache_factor = Some(4.0); | |
308 | if memory_profile.value >= ResourceProfileValue::High { |
|
309 | if memory_profile.value >= ResourceProfileValue::High { | |
309 | data_config.uncompressed_cache_factor = Some(10.0) |
|
310 | data_config.uncompressed_cache_factor = Some(10.0) | |
310 | } |
|
311 | } | |
311 | } |
|
312 | } | |
312 |
|
313 | |||
313 | if let Some(mmap_index_threshold) = config |
|
314 | if let Some(mmap_index_threshold) = config | |
314 | .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")? |
|
315 | .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")? | |
315 | { |
|
316 | { | |
316 | data_config.mmap_index_threshold = Some(mmap_index_threshold); |
|
317 | data_config.mmap_index_threshold = Some(mmap_index_threshold); | |
317 | } |
|
318 | } | |
318 |
|
319 | |||
319 | let with_sparse_read = |
|
320 | let with_sparse_read = | |
320 | config.get_bool(b"experimental", b"sparse-read")?; |
|
321 | config.get_bool(b"experimental", b"sparse-read")?; | |
321 | if let Some(sr_density_threshold) = config |
|
322 | if let Some(sr_density_threshold) = config | |
322 | .get_f64(b"experimental", b"sparse-read.density-threshold")? |
|
323 | .get_f64(b"experimental", b"sparse-read.density-threshold")? | |
323 | { |
|
324 | { | |
324 | data_config.sr_density_threshold = sr_density_threshold; |
|
325 | data_config.sr_density_threshold = sr_density_threshold; | |
325 | } |
|
326 | } | |
326 | data_config.with_sparse_read = with_sparse_read; |
|
327 | data_config.with_sparse_read = with_sparse_read; | |
327 | if let Some(sr_min_gap_size) = config |
|
328 | if let Some(sr_min_gap_size) = config | |
328 | .get_byte_size(b"experimental", b"sparse-read.min-gap-size")? |
|
329 | .get_byte_size(b"experimental", b"sparse-read.min-gap-size")? | |
329 | { |
|
330 | { | |
330 | data_config.sr_min_gap_size = sr_min_gap_size; |
|
331 | data_config.sr_min_gap_size = sr_min_gap_size; | |
331 | } |
|
332 | } | |
332 |
|
333 | |||
333 | data_config.with_sparse_read = |
|
334 | data_config.with_sparse_read = | |
334 | requirements.contains(SPARSEREVLOG_REQUIREMENT); |
|
335 | requirements.contains(SPARSEREVLOG_REQUIREMENT); | |
335 |
|
336 | |||
336 | Ok(data_config) |
|
337 | Ok(data_config) | |
337 | } |
|
338 | } | |
338 | } |
|
339 | } | |
339 |
|
340 | |||
340 | impl Default for RevlogDataConfig { |
|
341 | impl Default for RevlogDataConfig { | |
341 | fn default() -> Self { |
|
342 | fn default() -> Self { | |
342 | Self { |
|
343 | Self { | |
343 | chunk_cache_size: 65536, |
|
344 | chunk_cache_size: 65536, | |
344 | sr_density_threshold: 0.50, |
|
345 | sr_density_threshold: 0.50, | |
345 | sr_min_gap_size: 262144, |
|
346 | sr_min_gap_size: 262144, | |
346 | try_pending: Default::default(), |
|
347 | try_pending: Default::default(), | |
347 | try_split: Default::default(), |
|
348 | try_split: Default::default(), | |
348 | check_ambig: Default::default(), |
|
349 | check_ambig: Default::default(), | |
349 | mmap_large_index: Default::default(), |
|
350 | mmap_large_index: Default::default(), | |
350 | mmap_index_threshold: Default::default(), |
|
351 | mmap_index_threshold: Default::default(), | |
351 | uncompressed_cache_factor: Default::default(), |
|
352 | uncompressed_cache_factor: Default::default(), | |
352 | uncompressed_cache_count: Default::default(), |
|
353 | uncompressed_cache_count: Default::default(), | |
353 | with_sparse_read: Default::default(), |
|
354 | with_sparse_read: Default::default(), | |
354 | general_delta: Default::default(), |
|
355 | general_delta: Default::default(), | |
355 | } |
|
356 | } | |
356 | } |
|
357 | } | |
357 | } |
|
358 | } | |
358 |
|
359 | |||
359 | #[derive(Debug, Clone, Copy, PartialEq)] |
|
360 | #[derive(Debug, Clone, Copy, PartialEq)] | |
360 | /// Holds configuration values about how new deltas are computed. |
|
361 | /// Holds configuration values about how new deltas are computed. | |
361 | /// |
|
362 | /// | |
362 | /// Some attributes are duplicated from [`RevlogDataConfig`] to help having |
|
363 | /// Some attributes are duplicated from [`RevlogDataConfig`] to help having | |
363 | /// each object self contained. |
|
364 | /// each object self contained. | |
364 | pub struct RevlogDeltaConfig { |
|
365 | pub struct RevlogDeltaConfig { | |
365 | /// Whether deltas can be encoded against arbitrary bases |
|
366 | /// Whether deltas can be encoded against arbitrary bases | |
366 | pub general_delta: bool, |
|
367 | pub general_delta: bool, | |
367 | /// Allow sparse writing of the revlog data |
|
368 | /// Allow sparse writing of the revlog data | |
368 | pub sparse_revlog: bool, |
|
369 | pub sparse_revlog: bool, | |
369 | /// Maximum length of a delta chain |
|
370 | /// Maximum length of a delta chain | |
370 | pub max_chain_len: Option<u64>, |
|
371 | pub max_chain_len: Option<u64>, | |
371 | /// Maximum distance between a delta chain's start and end |
|
372 | /// Maximum distance between a delta chain's start and end | |
372 | pub max_deltachain_span: Option<u64>, |
|
373 | pub max_deltachain_span: Option<u64>, | |
373 | /// If `upper_bound_comp` is not None, this is the expected maximal |
|
374 | /// If `upper_bound_comp` is not None, this is the expected maximal | |
374 | /// gain from compression for the data content |
|
375 | /// gain from compression for the data content | |
375 | pub upper_bound_comp: Option<f64>, |
|
376 | pub upper_bound_comp: Option<f64>, | |
376 | /// Should we try a delta against both parents |
|
377 | /// Should we try a delta against both parents | |
377 | pub delta_both_parents: bool, |
|
378 | pub delta_both_parents: bool, | |
378 | /// Test delta base candidate groups by chunks of this maximal size |
|
379 | /// Test delta base candidate groups by chunks of this maximal size | |
379 | pub candidate_group_chunk_size: u64, |
|
380 | pub candidate_group_chunk_size: u64, | |
380 | /// Should we display debug information about delta computation |
|
381 | /// Should we display debug information about delta computation | |
381 | pub debug_delta: bool, |
|
382 | pub debug_delta: bool, | |
382 | /// Trust incoming deltas by default |
|
383 | /// Trust incoming deltas by default | |
383 | pub lazy_delta: bool, |
|
384 | pub lazy_delta: bool, | |
384 | /// Trust the base of incoming deltas by default |
|
385 | /// Trust the base of incoming deltas by default | |
385 | pub lazy_delta_base: bool, |
|
386 | pub lazy_delta_base: bool, | |
386 | } |
|
387 | } | |
387 | impl RevlogDeltaConfig { |
|
388 | impl RevlogDeltaConfig { | |
388 | pub fn new( |
|
389 | pub fn new( | |
389 | config: &Config, |
|
390 | config: &Config, | |
390 | requirements: &HashSet<String>, |
|
391 | requirements: &HashSet<String>, | |
391 | revlog_type: RevlogType, |
|
392 | revlog_type: RevlogType, | |
392 | ) -> Result<Self, HgError> { |
|
393 | ) -> Result<Self, HgError> { | |
393 | let mut delta_config = Self { |
|
394 | let mut delta_config = Self { | |
394 | delta_both_parents: config |
|
395 | delta_both_parents: config | |
395 | .get_option_no_default( |
|
396 | .get_option_no_default( | |
396 | b"storage", |
|
397 | b"storage", | |
397 | b"revlog.optimize-delta-parent-choice", |
|
398 | b"revlog.optimize-delta-parent-choice", | |
398 | )? |
|
399 | )? | |
399 | .unwrap_or(true), |
|
400 | .unwrap_or(true), | |
400 | candidate_group_chunk_size: config |
|
401 | candidate_group_chunk_size: config | |
401 | .get_u64( |
|
402 | .get_u64( | |
402 | b"storage", |
|
403 | b"storage", | |
403 | b"revlog.delta-parent-search.candidate-group-chunk-size", |
|
404 | b"revlog.delta-parent-search.candidate-group-chunk-size", | |
404 | )? |
|
405 | )? | |
405 | .unwrap_or_default(), |
|
406 | .unwrap_or_default(), | |
406 | ..Default::default() |
|
407 | ..Default::default() | |
407 | }; |
|
408 | }; | |
408 |
|
409 | |||
409 | delta_config.debug_delta = |
|
410 | delta_config.debug_delta = | |
410 | config.get_bool(b"debug", b"revlog.debug-delta")?; |
|
411 | config.get_bool(b"debug", b"revlog.debug-delta")?; | |
411 |
|
412 | |||
412 | delta_config.general_delta = |
|
413 | delta_config.general_delta = | |
413 | requirements.contains(GENERALDELTA_REQUIREMENT); |
|
414 | requirements.contains(GENERALDELTA_REQUIREMENT); | |
414 |
|
415 | |||
415 | let lazy_delta = |
|
416 | let lazy_delta = | |
416 | config.get_bool(b"storage", b"revlog.reuse-external-delta")?; |
|
417 | config.get_bool(b"storage", b"revlog.reuse-external-delta")?; | |
417 |
|
418 | |||
418 | if revlog_type == RevlogType::Manifestlog { |
|
419 | if revlog_type == RevlogType::Manifestlog { | |
419 | // upper bound of what we expect from compression |
|
420 | // upper bound of what we expect from compression | |
420 | // (real life value seems to be 3) |
|
421 | // (real life value seems to be 3) | |
421 | delta_config.upper_bound_comp = Some(3.0) |
|
422 | delta_config.upper_bound_comp = Some(3.0) | |
422 | } |
|
423 | } | |
423 |
|
424 | |||
424 | let mut lazy_delta_base = false; |
|
425 | let mut lazy_delta_base = false; | |
425 | if lazy_delta { |
|
426 | if lazy_delta { | |
426 | lazy_delta_base = match config.get_option_no_default( |
|
427 | lazy_delta_base = match config.get_option_no_default( | |
427 | b"storage", |
|
428 | b"storage", | |
428 | b"revlog.reuse-external-delta-parent", |
|
429 | b"revlog.reuse-external-delta-parent", | |
429 | )? { |
|
430 | )? { | |
430 | Some(base) => base, |
|
431 | Some(base) => base, | |
431 | None => config.get_bool(b"format", b"generaldelta")?, |
|
432 | None => config.get_bool(b"format", b"generaldelta")?, | |
432 | }; |
|
433 | }; | |
433 | } |
|
434 | } | |
434 | delta_config.lazy_delta = lazy_delta; |
|
435 | delta_config.lazy_delta = lazy_delta; | |
435 | delta_config.lazy_delta_base = lazy_delta_base; |
|
436 | delta_config.lazy_delta_base = lazy_delta_base; | |
436 |
|
437 | |||
437 | delta_config.max_deltachain_span = |
|
438 | delta_config.max_deltachain_span = | |
438 | match config.get_i64(b"experimental", b"maxdeltachainspan")? { |
|
439 | match config.get_i64(b"experimental", b"maxdeltachainspan")? { | |
439 | Some(span) => { |
|
440 | Some(span) => { | |
440 | if span < 0 { |
|
441 | if span < 0 { | |
441 | None |
|
442 | None | |
442 | } else { |
|
443 | } else { | |
443 | Some(span as u64) |
|
444 | Some(span as u64) | |
444 | } |
|
445 | } | |
445 | } |
|
446 | } | |
446 | None => None, |
|
447 | None => None, | |
447 | }; |
|
448 | }; | |
448 |
|
449 | |||
449 | delta_config.sparse_revlog = |
|
450 | delta_config.sparse_revlog = | |
450 | requirements.contains(SPARSEREVLOG_REQUIREMENT); |
|
451 | requirements.contains(SPARSEREVLOG_REQUIREMENT); | |
451 |
|
452 | |||
452 | delta_config.max_chain_len = |
|
453 | delta_config.max_chain_len = | |
453 | config.get_byte_size_no_default(b"format", b"maxchainlen")?; |
|
454 | config.get_byte_size_no_default(b"format", b"maxchainlen")?; | |
454 |
|
455 | |||
455 | Ok(delta_config) |
|
456 | Ok(delta_config) | |
456 | } |
|
457 | } | |
457 | } |
|
458 | } | |
458 |
|
459 | |||
459 | impl Default for RevlogDeltaConfig { |
|
460 | impl Default for RevlogDeltaConfig { | |
460 | fn default() -> Self { |
|
461 | fn default() -> Self { | |
461 | Self { |
|
462 | Self { | |
462 | delta_both_parents: true, |
|
463 | delta_both_parents: true, | |
463 | lazy_delta: true, |
|
464 | lazy_delta: true, | |
464 | general_delta: Default::default(), |
|
465 | general_delta: Default::default(), | |
465 | sparse_revlog: Default::default(), |
|
466 | sparse_revlog: Default::default(), | |
466 | max_chain_len: Default::default(), |
|
467 | max_chain_len: Default::default(), | |
467 | max_deltachain_span: Default::default(), |
|
468 | max_deltachain_span: Default::default(), | |
468 | upper_bound_comp: Default::default(), |
|
469 | upper_bound_comp: Default::default(), | |
469 | candidate_group_chunk_size: Default::default(), |
|
470 | candidate_group_chunk_size: Default::default(), | |
470 | debug_delta: Default::default(), |
|
471 | debug_delta: Default::default(), | |
471 | lazy_delta_base: Default::default(), |
|
472 | lazy_delta_base: Default::default(), | |
472 | } |
|
473 | } | |
473 | } |
|
474 | } | |
474 | } |
|
475 | } | |
475 |
|
476 | |||
476 | #[derive(Debug, Default, Clone, Copy, PartialEq)] |
|
477 | #[derive(Debug, Default, Clone, Copy, PartialEq)] | |
477 | /// Holds configuration values about the available revlog features |
|
478 | /// Holds configuration values about the available revlog features | |
478 | pub struct RevlogFeatureConfig { |
|
479 | pub struct RevlogFeatureConfig { | |
479 | /// The compression engine and its options |
|
480 | /// The compression engine and its options | |
480 | pub compression_engine: CompressionConfig, |
|
481 | pub compression_engine: CompressionConfig, | |
481 | /// Can we use censor on this revlog |
|
482 | /// Can we use censor on this revlog | |
482 | pub censorable: bool, |
|
483 | pub censorable: bool, | |
483 | /// Does this revlog use the "side data" feature |
|
484 | /// Does this revlog use the "side data" feature | |
484 | pub has_side_data: bool, |
|
485 | pub has_side_data: bool, | |
485 | /// Might remove this configuration once the rank computation has no |
|
486 | /// Might remove this configuration once the rank computation has no | |
486 | /// impact |
|
487 | /// impact | |
487 | pub compute_rank: bool, |
|
488 | pub compute_rank: bool, | |
488 | /// Parent order is supposed to be semantically irrelevant, so we |
|
489 | /// Parent order is supposed to be semantically irrelevant, so we | |
489 | /// normally re-sort parents to ensure that the first parent is non-null, |
|
490 | /// normally re-sort parents to ensure that the first parent is non-null, | |
490 | /// if there is a non-null parent at all. |
|
491 | /// if there is a non-null parent at all. | |
491 | /// filelog abuses the parent order as a flag to mark some instances of |
|
492 | /// filelog abuses the parent order as a flag to mark some instances of | |
492 | /// meta-encoded files, so allow it to disable this behavior. |
|
493 | /// meta-encoded files, so allow it to disable this behavior. | |
493 | pub canonical_parent_order: bool, |
|
494 | pub canonical_parent_order: bool, | |
494 | /// Can ellipsis commit be used |
|
495 | /// Can ellipsis commit be used | |
495 | pub enable_ellipsis: bool, |
|
496 | pub enable_ellipsis: bool, | |
496 | } |
|
497 | } | |
497 | impl RevlogFeatureConfig { |
|
498 | impl RevlogFeatureConfig { | |
498 | pub fn new( |
|
499 | pub fn new( | |
499 | config: &Config, |
|
500 | config: &Config, | |
500 | requirements: &HashSet<String>, |
|
501 | requirements: &HashSet<String>, | |
501 | ) -> Result<Self, HgError> { |
|
502 | ) -> Result<Self, HgError> { | |
502 | Ok(Self { |
|
503 | Ok(Self { | |
503 | compression_engine: CompressionConfig::new(config, requirements)?, |
|
504 | compression_engine: CompressionConfig::new(config, requirements)?, | |
504 | enable_ellipsis: requirements.contains(NARROW_REQUIREMENT), |
|
505 | enable_ellipsis: requirements.contains(NARROW_REQUIREMENT), | |
505 | ..Default::default() |
|
506 | ..Default::default() | |
506 | }) |
|
507 | }) | |
507 | } |
|
508 | } | |
508 | } |
|
509 | } | |
509 |
|
510 | |||
510 | /// Read only implementation of revlog. |
|
511 | /// Read only implementation of revlog. | |
511 | pub struct Revlog { |
|
512 | pub struct Revlog { | |
512 | /// When index and data are not interleaved: bytes of the revlog index. |
|
513 | /// When index and data are not interleaved: bytes of the revlog index. | |
513 | /// When index and data are interleaved: bytes of the revlog index and |
|
514 | /// When index and data are interleaved: bytes of the revlog index and | |
514 | /// data. |
|
515 | /// data. | |
515 | index: Index, |
|
516 | index: Index, | |
516 | /// When index and data are not interleaved: bytes of the revlog data |
|
517 | /// When index and data are not interleaved: bytes of the revlog data | |
517 | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, |
|
518 | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, | |
518 | /// When present on disk: the persistent nodemap for this revlog |
|
519 | /// When present on disk: the persistent nodemap for this revlog | |
519 | nodemap: Option<nodemap::NodeTree>, |
|
520 | nodemap: Option<nodemap::NodeTree>, | |
520 | } |
|
521 | } | |
521 |
|
522 | |||
522 | impl Graph for Revlog { |
|
523 | impl Graph for Revlog { | |
523 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { |
|
524 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { | |
524 | self.index.parents(rev) |
|
525 | self.index.parents(rev) | |
525 | } |
|
526 | } | |
526 | } |
|
527 | } | |
527 |
|
528 | |||
528 | #[derive(Debug, Copy, Clone, PartialEq)] |
|
529 | #[derive(Debug, Copy, Clone, PartialEq)] | |
529 | pub enum RevlogVersionOptions { |
|
530 | pub enum RevlogVersionOptions { | |
530 | V0, |
|
531 | V0, | |
531 | V1 { general_delta: bool, inline: bool }, |
|
532 | V1 { general_delta: bool, inline: bool }, | |
532 | V2, |
|
533 | V2, | |
533 | ChangelogV2 { compute_rank: bool }, |
|
534 | ChangelogV2 { compute_rank: bool }, | |
534 | } |
|
535 | } | |
535 |
|
536 | |||
536 | /// Options to govern how a revlog should be opened, usually from the |
|
537 | /// Options to govern how a revlog should be opened, usually from the | |
537 | /// repository configuration or requirements. |
|
538 | /// repository configuration or requirements. | |
538 | #[derive(Debug, Copy, Clone)] |
|
539 | #[derive(Debug, Copy, Clone)] | |
539 | pub struct RevlogOpenOptions { |
|
540 | pub struct RevlogOpenOptions { | |
540 | /// The revlog version, along with any option specific to this version |
|
541 | /// The revlog version, along with any option specific to this version | |
541 | pub version: RevlogVersionOptions, |
|
542 | pub version: RevlogVersionOptions, | |
542 | /// Whether the revlog uses a persistent nodemap. |
|
543 | /// Whether the revlog uses a persistent nodemap. | |
543 | pub use_nodemap: bool, |
|
544 | pub use_nodemap: bool, | |
544 | pub delta_config: RevlogDeltaConfig, |
|
545 | pub delta_config: RevlogDeltaConfig, | |
545 | pub data_config: RevlogDataConfig, |
|
546 | pub data_config: RevlogDataConfig, | |
546 | pub feature_config: RevlogFeatureConfig, |
|
547 | pub feature_config: RevlogFeatureConfig, | |
547 | } |
|
548 | } | |
548 |
|
549 | |||
549 | #[cfg(test)] |
|
550 | #[cfg(test)] | |
550 | impl Default for RevlogOpenOptions { |
|
551 | impl Default for RevlogOpenOptions { | |
551 | fn default() -> Self { |
|
552 | fn default() -> Self { | |
552 | Self { |
|
553 | Self { | |
553 | version: RevlogVersionOptions::V1 { |
|
554 | version: RevlogVersionOptions::V1 { | |
554 | general_delta: true, |
|
555 | general_delta: true, | |
555 | inline: false, |
|
556 | inline: false, | |
556 | }, |
|
557 | }, | |
557 | use_nodemap: true, |
|
558 | use_nodemap: true, | |
558 | data_config: Default::default(), |
|
559 | data_config: Default::default(), | |
559 | delta_config: Default::default(), |
|
560 | delta_config: Default::default(), | |
560 | feature_config: Default::default(), |
|
561 | feature_config: Default::default(), | |
561 | } |
|
562 | } | |
562 | } |
|
563 | } | |
563 | } |
|
564 | } | |
564 |
|
565 | |||
565 | impl RevlogOpenOptions { |
|
566 | impl RevlogOpenOptions { | |
566 | pub fn new( |
|
567 | pub fn new( | |
567 | inline: bool, |
|
568 | inline: bool, | |
568 | data_config: RevlogDataConfig, |
|
569 | data_config: RevlogDataConfig, | |
569 | delta_config: RevlogDeltaConfig, |
|
570 | delta_config: RevlogDeltaConfig, | |
570 | feature_config: RevlogFeatureConfig, |
|
571 | feature_config: RevlogFeatureConfig, | |
571 | ) -> Self { |
|
572 | ) -> Self { | |
572 | Self { |
|
573 | Self { | |
573 | version: RevlogVersionOptions::V1 { |
|
574 | version: RevlogVersionOptions::V1 { | |
574 | general_delta: data_config.general_delta, |
|
575 | general_delta: data_config.general_delta, | |
575 | inline, |
|
576 | inline, | |
576 | }, |
|
577 | }, | |
577 | use_nodemap: false, |
|
578 | use_nodemap: false, | |
578 | data_config, |
|
579 | data_config, | |
579 | delta_config, |
|
580 | delta_config, | |
580 | feature_config, |
|
581 | feature_config, | |
581 | } |
|
582 | } | |
582 | } |
|
583 | } | |
583 |
|
584 | |||
584 | pub fn index_header(&self) -> index::IndexHeader { |
|
585 | pub fn index_header(&self) -> index::IndexHeader { | |
585 | index::IndexHeader { |
|
586 | index::IndexHeader { | |
586 | header_bytes: match self.version { |
|
587 | header_bytes: match self.version { | |
587 | RevlogVersionOptions::V0 => [0, 0, 0, 0], |
|
588 | RevlogVersionOptions::V0 => [0, 0, 0, 0], | |
588 | RevlogVersionOptions::V1 { |
|
589 | RevlogVersionOptions::V1 { | |
589 | general_delta, |
|
590 | general_delta, | |
590 | inline, |
|
591 | inline, | |
591 | } => [ |
|
592 | } => [ | |
592 | 0, |
|
593 | 0, | |
593 | if general_delta && inline { |
|
594 | if general_delta && inline { | |
594 | 3 |
|
595 | 3 | |
595 | } else if general_delta { |
|
596 | } else if general_delta { | |
596 | 2 |
|
597 | 2 | |
597 | } else { |
|
598 | } else { | |
598 | u8::from(inline) |
|
599 | u8::from(inline) | |
599 | }, |
|
600 | }, | |
600 | 0, |
|
601 | 0, | |
601 | 1, |
|
602 | 1, | |
602 | ], |
|
603 | ], | |
603 | RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(), |
|
604 | RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(), | |
604 | RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => { |
|
605 | RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => { | |
605 | 0xD34Du32.to_be_bytes() |
|
606 | 0xD34Du32.to_be_bytes() | |
606 | } |
|
607 | } | |
607 | }, |
|
608 | }, | |
608 | } |
|
609 | } | |
609 | } |
|
610 | } | |
610 | } |
|
611 | } | |
611 |
|
612 | |||
612 | impl Revlog { |
|
613 | impl Revlog { | |
613 | /// Open a revlog index file. |
|
614 | /// Open a revlog index file. | |
614 | /// |
|
615 | /// | |
615 | /// It will also open the associated data file if index and data are not |
|
616 | /// It will also open the associated data file if index and data are not | |
616 | /// interleaved. |
|
617 | /// interleaved. | |
617 | pub fn open( |
|
618 | pub fn open( | |
618 | // Todo use the `Vfs` trait here once we create a function for mmap |
|
619 | // Todo use the `Vfs` trait here once we create a function for mmap | |
619 | store_vfs: &VfsImpl, |
|
620 | store_vfs: &VfsImpl, | |
620 | index_path: impl AsRef<Path>, |
|
621 | index_path: impl AsRef<Path>, | |
621 | data_path: Option<&Path>, |
|
622 | data_path: Option<&Path>, | |
622 | options: RevlogOpenOptions, |
|
623 | options: RevlogOpenOptions, | |
623 | ) -> Result<Self, HgError> { |
|
624 | ) -> Result<Self, HgError> { | |
624 | Self::open_gen(store_vfs, index_path, data_path, options, None) |
|
625 | Self::open_gen(store_vfs, index_path, data_path, options, None) | |
625 | } |
|
626 | } | |
626 |
|
627 | |||
627 | fn open_gen( |
|
628 | fn open_gen( | |
628 | // Todo use the `Vfs` trait here once we create a function for mmap |
|
629 | // Todo use the `Vfs` trait here once we create a function for mmap | |
629 | store_vfs: &VfsImpl, |
|
630 | store_vfs: &VfsImpl, | |
630 | index_path: impl AsRef<Path>, |
|
631 | index_path: impl AsRef<Path>, | |
631 | data_path: Option<&Path>, |
|
632 | data_path: Option<&Path>, | |
632 | options: RevlogOpenOptions, |
|
633 | options: RevlogOpenOptions, | |
633 | nodemap_for_test: Option<nodemap::NodeTree>, |
|
634 | nodemap_for_test: Option<nodemap::NodeTree>, | |
634 | ) -> Result<Self, HgError> { |
|
635 | ) -> Result<Self, HgError> { | |
635 | let index_path = index_path.as_ref(); |
|
636 | let index_path = index_path.as_ref(); | |
636 | let index = { |
|
637 | let index = { | |
637 | match store_vfs.mmap_open_opt(index_path)? { |
|
638 | match store_vfs.mmap_open_opt(index_path)? { | |
638 | None => Index::new( |
|
639 | None => Index::new( | |
639 | Box::<Vec<_>>::default(), |
|
640 | Box::<Vec<_>>::default(), | |
640 | options.index_header(), |
|
641 | options.index_header(), | |
641 | ), |
|
642 | ), | |
642 | Some(index_mmap) => { |
|
643 | Some(index_mmap) => { | |
643 | let index = Index::new( |
|
644 | let index = Index::new( | |
644 | Box::new(index_mmap), |
|
645 | Box::new(index_mmap), | |
645 | options.index_header(), |
|
646 | options.index_header(), | |
646 | )?; |
|
647 | )?; | |
647 | Ok(index) |
|
648 | Ok(index) | |
648 | } |
|
649 | } | |
649 | } |
|
650 | } | |
650 | }?; |
|
651 | }?; | |
651 |
|
652 | |||
652 | let default_data_path = index_path.with_extension("d"); |
|
653 | let default_data_path = index_path.with_extension("d"); | |
653 |
|
654 | |||
654 | // type annotation required |
|
655 | // type annotation required | |
655 | // won't recognize Mmap as Deref<Target = [u8]> |
|
656 | // won't recognize Mmap as Deref<Target = [u8]> | |
656 | let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = |
|
657 | let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = | |
657 | if index.is_inline() { |
|
658 | if index.is_inline() { | |
658 | None |
|
659 | None | |
659 | } else if index.is_empty() { |
|
660 | } else if index.is_empty() { | |
660 | // No need to even try to open the data file then. |
|
661 | // No need to even try to open the data file then. | |
661 | Some(Box::new(&[][..])) |
|
662 | Some(Box::new(&[][..])) | |
662 | } else { |
|
663 | } else { | |
663 | let data_path = data_path.unwrap_or(&default_data_path); |
|
664 | let data_path = data_path.unwrap_or(&default_data_path); | |
664 | let data_mmap = store_vfs.mmap_open(data_path)?; |
|
665 | let data_mmap = store_vfs.mmap_open(data_path)?; | |
665 | Some(Box::new(data_mmap)) |
|
666 | Some(Box::new(data_mmap)) | |
666 | }; |
|
667 | }; | |
667 |
|
668 | |||
668 | let nodemap = if index.is_inline() || !options.use_nodemap { |
|
669 | let nodemap = if index.is_inline() || !options.use_nodemap { | |
669 | None |
|
670 | None | |
670 | } else { |
|
671 | } else { | |
671 | NodeMapDocket::read_from_file(store_vfs, index_path)?.map( |
|
672 | NodeMapDocket::read_from_file(store_vfs, index_path)?.map( | |
672 | |(docket, data)| { |
|
673 | |(docket, data)| { | |
673 | nodemap::NodeTree::load_bytes( |
|
674 | nodemap::NodeTree::load_bytes( | |
674 | Box::new(data), |
|
675 | Box::new(data), | |
675 | docket.data_length, |
|
676 | docket.data_length, | |
676 | ) |
|
677 | ) | |
677 | }, |
|
678 | }, | |
678 | ) |
|
679 | ) | |
679 | }; |
|
680 | }; | |
680 |
|
681 | |||
681 | let nodemap = nodemap_for_test.or(nodemap); |
|
682 | let nodemap = nodemap_for_test.or(nodemap); | |
682 |
|
683 | |||
683 | Ok(Revlog { |
|
684 | Ok(Revlog { | |
684 | index, |
|
685 | index, | |
685 | data_bytes, |
|
686 | data_bytes, | |
686 | nodemap, |
|
687 | nodemap, | |
687 | }) |
|
688 | }) | |
688 | } |
|
689 | } | |
689 |
|
690 | |||
690 | /// Return number of entries of the `Revlog`. |
|
691 | /// Return number of entries of the `Revlog`. | |
691 | pub fn len(&self) -> usize { |
|
692 | pub fn len(&self) -> usize { | |
692 | self.index.len() |
|
693 | self.index.len() | |
693 | } |
|
694 | } | |
694 |
|
695 | |||
695 | /// Returns `true` if the `Revlog` has zero `entries`. |
|
696 | /// Returns `true` if the `Revlog` has zero `entries`. | |
696 | pub fn is_empty(&self) -> bool { |
|
697 | pub fn is_empty(&self) -> bool { | |
697 | self.index.is_empty() |
|
698 | self.index.is_empty() | |
698 | } |
|
699 | } | |
699 |
|
700 | |||
700 | /// Returns the node ID for the given revision number, if it exists in this |
|
701 | /// Returns the node ID for the given revision number, if it exists in this | |
701 | /// revlog |
|
702 | /// revlog | |
702 | pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { |
|
703 | pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { | |
703 | if rev == NULL_REVISION.into() { |
|
704 | if rev == NULL_REVISION.into() { | |
704 | return Some(&NULL_NODE); |
|
705 | return Some(&NULL_NODE); | |
705 | } |
|
706 | } | |
706 | let rev = self.index.check_revision(rev)?; |
|
707 | let rev = self.index.check_revision(rev)?; | |
707 | Some(self.index.get_entry(rev)?.hash()) |
|
708 | Some(self.index.get_entry(rev)?.hash()) | |
708 | } |
|
709 | } | |
709 |
|
710 | |||
710 | /// Return the revision number for the given node ID, if it exists in this |
|
711 | /// Return the revision number for the given node ID, if it exists in this | |
711 | /// revlog |
|
712 | /// revlog | |
712 | pub fn rev_from_node( |
|
713 | pub fn rev_from_node( | |
713 | &self, |
|
714 | &self, | |
714 | node: NodePrefix, |
|
715 | node: NodePrefix, | |
715 | ) -> Result<Revision, RevlogError> { |
|
716 | ) -> Result<Revision, RevlogError> { | |
716 | if let Some(nodemap) = &self.nodemap { |
|
717 | if let Some(nodemap) = &self.nodemap { | |
717 | nodemap |
|
718 | nodemap | |
718 | .find_bin(&self.index, node)? |
|
719 | .find_bin(&self.index, node)? | |
719 | .ok_or(RevlogError::InvalidRevision(format!("{:x}", node))) |
|
720 | .ok_or(RevlogError::InvalidRevision(format!("{:x}", node))) | |
720 | } else { |
|
721 | } else { | |
721 | self.rev_from_node_no_persistent_nodemap(node) |
|
722 | self.rev_from_node_no_persistent_nodemap(node) | |
722 | } |
|
723 | } | |
723 | } |
|
724 | } | |
724 |
|
725 | |||
725 | /// Same as `rev_from_node`, without using a persistent nodemap |
|
726 | /// Same as `rev_from_node`, without using a persistent nodemap | |
726 | /// |
|
727 | /// | |
727 | /// This is used as fallback when a persistent nodemap is not present. |
|
728 | /// This is used as fallback when a persistent nodemap is not present. | |
728 | /// This happens when the persistent-nodemap experimental feature is not |
|
729 | /// This happens when the persistent-nodemap experimental feature is not | |
729 | /// enabled, or for small revlogs. |
|
730 | /// enabled, or for small revlogs. | |
730 | fn rev_from_node_no_persistent_nodemap( |
|
731 | fn rev_from_node_no_persistent_nodemap( | |
731 | &self, |
|
732 | &self, | |
732 | node: NodePrefix, |
|
733 | node: NodePrefix, | |
733 | ) -> Result<Revision, RevlogError> { |
|
734 | ) -> Result<Revision, RevlogError> { | |
734 | // Linear scan of the revlog |
|
735 | // Linear scan of the revlog | |
735 | // TODO: consider building a non-persistent nodemap in memory to |
|
736 | // TODO: consider building a non-persistent nodemap in memory to | |
736 | // optimize these cases. |
|
737 | // optimize these cases. | |
737 | let mut found_by_prefix = None; |
|
738 | let mut found_by_prefix = None; | |
738 | for rev in (-1..self.len() as BaseRevision).rev() { |
|
739 | for rev in (-1..self.len() as BaseRevision).rev() { | |
739 | let rev = Revision(rev as BaseRevision); |
|
740 | let rev = Revision(rev as BaseRevision); | |
740 | let candidate_node = if rev == Revision(-1) { |
|
741 | let candidate_node = if rev == Revision(-1) { | |
741 | NULL_NODE |
|
742 | NULL_NODE | |
742 | } else { |
|
743 | } else { | |
743 | let index_entry = |
|
744 | let index_entry = | |
744 | self.index.get_entry(rev).ok_or_else(|| { |
|
745 | self.index.get_entry(rev).ok_or_else(|| { | |
745 | HgError::corrupted( |
|
746 | HgError::corrupted( | |
746 | "revlog references a revision not in the index", |
|
747 | "revlog references a revision not in the index", | |
747 | ) |
|
748 | ) | |
748 | })?; |
|
749 | })?; | |
749 | *index_entry.hash() |
|
750 | *index_entry.hash() | |
750 | }; |
|
751 | }; | |
751 | if node == candidate_node { |
|
752 | if node == candidate_node { | |
752 | return Ok(rev); |
|
753 | return Ok(rev); | |
753 | } |
|
754 | } | |
754 | if node.is_prefix_of(&candidate_node) { |
|
755 | if node.is_prefix_of(&candidate_node) { | |
755 | if found_by_prefix.is_some() { |
|
756 | if found_by_prefix.is_some() { | |
756 | return Err(RevlogError::AmbiguousPrefix); |
|
757 | return Err(RevlogError::AmbiguousPrefix); | |
757 | } |
|
758 | } | |
758 | found_by_prefix = Some(rev) |
|
759 | found_by_prefix = Some(rev) | |
759 | } |
|
760 | } | |
760 | } |
|
761 | } | |
761 | found_by_prefix |
|
762 | found_by_prefix | |
762 | .ok_or(RevlogError::InvalidRevision(format!("{:x}", node))) |
|
763 | .ok_or(RevlogError::InvalidRevision(format!("{:x}", node))) | |
763 | } |
|
764 | } | |
764 |
|
765 | |||
765 | /// Returns whether the given revision exists in this revlog. |
|
766 | /// Returns whether the given revision exists in this revlog. | |
766 | pub fn has_rev(&self, rev: UncheckedRevision) -> bool { |
|
767 | pub fn has_rev(&self, rev: UncheckedRevision) -> bool { | |
767 | self.index.check_revision(rev).is_some() |
|
768 | self.index.check_revision(rev).is_some() | |
768 | } |
|
769 | } | |
769 |
|
770 | |||
770 | /// Return the full data associated to a revision. |
|
771 | /// Return the full data associated to a revision. | |
771 | /// |
|
772 | /// | |
772 | /// All entries required to build the final data out of deltas will be |
|
773 | /// All entries required to build the final data out of deltas will be | |
773 | /// retrieved as needed, and the deltas will be applied to the inital |
|
774 | /// retrieved as needed, and the deltas will be applied to the inital | |
774 | /// snapshot to rebuild the final data. |
|
775 | /// snapshot to rebuild the final data. | |
775 | pub fn get_rev_data( |
|
776 | pub fn get_rev_data( | |
776 | &self, |
|
777 | &self, | |
777 | rev: UncheckedRevision, |
|
778 | rev: UncheckedRevision, | |
778 | ) -> Result<Cow<[u8]>, RevlogError> { |
|
779 | ) -> Result<Cow<[u8]>, RevlogError> { | |
779 | if rev == NULL_REVISION.into() { |
|
780 | if rev == NULL_REVISION.into() { | |
780 | return Ok(Cow::Borrowed(&[])); |
|
781 | return Ok(Cow::Borrowed(&[])); | |
781 | }; |
|
782 | }; | |
782 | self.get_entry(rev)?.data() |
|
783 | self.get_entry(rev)?.data() | |
783 | } |
|
784 | } | |
784 |
|
785 | |||
785 | /// [`Self::get_rev_data`] for checked revisions. |
|
786 | /// [`Self::get_rev_data`] for checked revisions. | |
786 | pub fn get_rev_data_for_checked_rev( |
|
787 | pub fn get_rev_data_for_checked_rev( | |
787 | &self, |
|
788 | &self, | |
788 | rev: Revision, |
|
789 | rev: Revision, | |
789 | ) -> Result<Cow<[u8]>, RevlogError> { |
|
790 | ) -> Result<Cow<[u8]>, RevlogError> { | |
790 | if rev == NULL_REVISION { |
|
791 | if rev == NULL_REVISION { | |
791 | return Ok(Cow::Borrowed(&[])); |
|
792 | return Ok(Cow::Borrowed(&[])); | |
792 | }; |
|
793 | }; | |
793 | self.get_entry_for_checked_rev(rev)?.data() |
|
794 | self.get_entry_for_checked_rev(rev)?.data() | |
794 | } |
|
795 | } | |
795 |
|
796 | |||
796 | /// Check the hash of some given data against the recorded hash. |
|
797 | /// Check the hash of some given data against the recorded hash. | |
797 | pub fn check_hash( |
|
798 | pub fn check_hash( | |
798 | &self, |
|
799 | &self, | |
799 | p1: Revision, |
|
800 | p1: Revision, | |
800 | p2: Revision, |
|
801 | p2: Revision, | |
801 | expected: &[u8], |
|
802 | expected: &[u8], | |
802 | data: &[u8], |
|
803 | data: &[u8], | |
803 | ) -> bool { |
|
804 | ) -> bool { | |
804 | let e1 = self.index.get_entry(p1); |
|
805 | let e1 = self.index.get_entry(p1); | |
805 | let h1 = match e1 { |
|
806 | let h1 = match e1 { | |
806 | Some(ref entry) => entry.hash(), |
|
807 | Some(ref entry) => entry.hash(), | |
807 | None => &NULL_NODE, |
|
808 | None => &NULL_NODE, | |
808 | }; |
|
809 | }; | |
809 | let e2 = self.index.get_entry(p2); |
|
810 | let e2 = self.index.get_entry(p2); | |
810 | let h2 = match e2 { |
|
811 | let h2 = match e2 { | |
811 | Some(ref entry) => entry.hash(), |
|
812 | Some(ref entry) => entry.hash(), | |
812 | None => &NULL_NODE, |
|
813 | None => &NULL_NODE, | |
813 | }; |
|
814 | }; | |
814 |
|
815 | |||
815 | hash(data, h1.as_bytes(), h2.as_bytes()) == expected |
|
816 | hash(data, h1.as_bytes(), h2.as_bytes()) == expected | |
816 | } |
|
817 | } | |
817 |
|
818 | |||
818 | /// Build the full data of a revision out its snapshot |
|
819 | /// Build the full data of a revision out its snapshot | |
819 | /// and its deltas. |
|
820 | /// and its deltas. | |
820 | fn build_data_from_deltas( |
|
821 | fn build_data_from_deltas( | |
821 | snapshot: RevlogEntry, |
|
822 | snapshot: RevlogEntry, | |
822 | deltas: &[RevlogEntry], |
|
823 | deltas: &[RevlogEntry], | |
823 | ) -> Result<Vec<u8>, HgError> { |
|
824 | ) -> Result<Vec<u8>, HgError> { | |
824 | let snapshot = snapshot.data_chunk()?; |
|
825 | let snapshot = snapshot.data_chunk()?; | |
825 | let deltas = deltas |
|
826 | let deltas = deltas | |
826 | .iter() |
|
827 | .iter() | |
827 | .rev() |
|
828 | .rev() | |
828 | .map(RevlogEntry::data_chunk) |
|
829 | .map(RevlogEntry::data_chunk) | |
829 | .collect::<Result<Vec<_>, _>>()?; |
|
830 | .collect::<Result<Vec<_>, _>>()?; | |
830 | let patches: Vec<_> = |
|
831 | let patches: Vec<_> = | |
831 | deltas.iter().map(|d| patch::PatchList::new(d)).collect(); |
|
832 | deltas.iter().map(|d| patch::PatchList::new(d)).collect(); | |
832 | let patch = patch::fold_patch_lists(&patches); |
|
833 | let patch = patch::fold_patch_lists(&patches); | |
833 | Ok(patch.apply(&snapshot)) |
|
834 | Ok(patch.apply(&snapshot)) | |
834 | } |
|
835 | } | |
835 |
|
836 | |||
836 | /// Return the revlog data. |
|
837 | /// Return the revlog data. | |
837 | fn data(&self) -> &[u8] { |
|
838 | fn data(&self) -> &[u8] { | |
838 | match &self.data_bytes { |
|
839 | match &self.data_bytes { | |
839 | Some(data_bytes) => data_bytes, |
|
840 | Some(data_bytes) => data_bytes, | |
840 | None => panic!( |
|
841 | None => panic!( | |
841 | "forgot to load the data or trying to access inline data" |
|
842 | "forgot to load the data or trying to access inline data" | |
842 | ), |
|
843 | ), | |
843 | } |
|
844 | } | |
844 | } |
|
845 | } | |
845 |
|
846 | |||
846 | pub fn make_null_entry(&self) -> RevlogEntry { |
|
847 | pub fn make_null_entry(&self) -> RevlogEntry { | |
847 | RevlogEntry { |
|
848 | RevlogEntry { | |
848 | revlog: self, |
|
849 | revlog: self, | |
849 | rev: NULL_REVISION, |
|
850 | rev: NULL_REVISION, | |
850 | bytes: b"", |
|
851 | bytes: b"", | |
851 | compressed_len: 0, |
|
852 | compressed_len: 0, | |
852 | uncompressed_len: 0, |
|
853 | uncompressed_len: 0, | |
853 | base_rev_or_base_of_delta_chain: None, |
|
854 | base_rev_or_base_of_delta_chain: None, | |
854 | p1: NULL_REVISION, |
|
855 | p1: NULL_REVISION, | |
855 | p2: NULL_REVISION, |
|
856 | p2: NULL_REVISION, | |
856 | flags: NULL_REVLOG_ENTRY_FLAGS, |
|
857 | flags: NULL_REVLOG_ENTRY_FLAGS, | |
857 | hash: NULL_NODE, |
|
858 | hash: NULL_NODE, | |
858 | } |
|
859 | } | |
859 | } |
|
860 | } | |
860 |
|
861 | |||
861 | fn get_entry_for_checked_rev( |
|
862 | fn get_entry_for_checked_rev( | |
862 | &self, |
|
863 | &self, | |
863 | rev: Revision, |
|
864 | rev: Revision, | |
864 | ) -> Result<RevlogEntry, RevlogError> { |
|
865 | ) -> Result<RevlogEntry, RevlogError> { | |
865 | if rev == NULL_REVISION { |
|
866 | if rev == NULL_REVISION { | |
866 | return Ok(self.make_null_entry()); |
|
867 | return Ok(self.make_null_entry()); | |
867 | } |
|
868 | } | |
868 | let index_entry = self |
|
869 | let index_entry = self | |
869 | .index |
|
870 | .index | |
870 | .get_entry(rev) |
|
871 | .get_entry(rev) | |
871 | .ok_or(RevlogError::InvalidRevision(rev.to_string()))?; |
|
872 | .ok_or(RevlogError::InvalidRevision(rev.to_string()))?; | |
872 | let offset = index_entry.offset(); |
|
873 | let offset = index_entry.offset(); | |
873 | let start = if self.index.is_inline() { |
|
874 | let start = if self.index.is_inline() { | |
874 | offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE) |
|
875 | offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE) | |
875 | } else { |
|
876 | } else { | |
876 | offset |
|
877 | offset | |
877 | }; |
|
878 | }; | |
878 | let end = start + index_entry.compressed_len() as usize; |
|
879 | let end = start + index_entry.compressed_len() as usize; | |
879 | let data = if self.index.is_inline() { |
|
880 | let data = if self.index.is_inline() { | |
880 | self.index.data(start, end) |
|
881 | self.index.data(start, end) | |
881 | } else { |
|
882 | } else { | |
882 | &self.data()[start..end] |
|
883 | &self.data()[start..end] | |
883 | }; |
|
884 | }; | |
884 | let base_rev = self |
|
885 | let base_rev = self | |
885 | .index |
|
886 | .index | |
886 | .check_revision(index_entry.base_revision_or_base_of_delta_chain()) |
|
887 | .check_revision(index_entry.base_revision_or_base_of_delta_chain()) | |
887 | .ok_or_else(|| { |
|
888 | .ok_or_else(|| { | |
888 | RevlogError::corrupted(format!( |
|
889 | RevlogError::corrupted(format!( | |
889 | "base revision for rev {} is invalid", |
|
890 | "base revision for rev {} is invalid", | |
890 | rev |
|
891 | rev | |
891 | )) |
|
892 | )) | |
892 | })?; |
|
893 | })?; | |
893 | let p1 = |
|
894 | let p1 = | |
894 | self.index.check_revision(index_entry.p1()).ok_or_else(|| { |
|
895 | self.index.check_revision(index_entry.p1()).ok_or_else(|| { | |
895 | RevlogError::corrupted(format!( |
|
896 | RevlogError::corrupted(format!( | |
896 | "p1 for rev {} is invalid", |
|
897 | "p1 for rev {} is invalid", | |
897 | rev |
|
898 | rev | |
898 | )) |
|
899 | )) | |
899 | })?; |
|
900 | })?; | |
900 | let p2 = |
|
901 | let p2 = | |
901 | self.index.check_revision(index_entry.p2()).ok_or_else(|| { |
|
902 | self.index.check_revision(index_entry.p2()).ok_or_else(|| { | |
902 | RevlogError::corrupted(format!( |
|
903 | RevlogError::corrupted(format!( | |
903 | "p2 for rev {} is invalid", |
|
904 | "p2 for rev {} is invalid", | |
904 | rev |
|
905 | rev | |
905 | )) |
|
906 | )) | |
906 | })?; |
|
907 | })?; | |
907 | let entry = RevlogEntry { |
|
908 | let entry = RevlogEntry { | |
908 | revlog: self, |
|
909 | revlog: self, | |
909 | rev, |
|
910 | rev, | |
910 | bytes: data, |
|
911 | bytes: data, | |
911 | compressed_len: index_entry.compressed_len(), |
|
912 | compressed_len: index_entry.compressed_len(), | |
912 | uncompressed_len: index_entry.uncompressed_len(), |
|
913 | uncompressed_len: index_entry.uncompressed_len(), | |
913 | base_rev_or_base_of_delta_chain: if base_rev == rev { |
|
914 | base_rev_or_base_of_delta_chain: if base_rev == rev { | |
914 | None |
|
915 | None | |
915 | } else { |
|
916 | } else { | |
916 | Some(base_rev) |
|
917 | Some(base_rev) | |
917 | }, |
|
918 | }, | |
918 | p1, |
|
919 | p1, | |
919 | p2, |
|
920 | p2, | |
920 | flags: index_entry.flags(), |
|
921 | flags: index_entry.flags(), | |
921 | hash: *index_entry.hash(), |
|
922 | hash: *index_entry.hash(), | |
922 | }; |
|
923 | }; | |
923 | Ok(entry) |
|
924 | Ok(entry) | |
924 | } |
|
925 | } | |
925 |
|
926 | |||
926 | /// Get an entry of the revlog. |
|
927 | /// Get an entry of the revlog. | |
927 | pub fn get_entry( |
|
928 | pub fn get_entry( | |
928 | &self, |
|
929 | &self, | |
929 | rev: UncheckedRevision, |
|
930 | rev: UncheckedRevision, | |
930 | ) -> Result<RevlogEntry, RevlogError> { |
|
931 | ) -> Result<RevlogEntry, RevlogError> { | |
931 | if rev == NULL_REVISION.into() { |
|
932 | if rev == NULL_REVISION.into() { | |
932 | return Ok(self.make_null_entry()); |
|
933 | return Ok(self.make_null_entry()); | |
933 | } |
|
934 | } | |
934 | let rev = self.index.check_revision(rev).ok_or_else(|| { |
|
935 | let rev = self.index.check_revision(rev).ok_or_else(|| { | |
935 | RevlogError::corrupted(format!("rev {} is invalid", rev)) |
|
936 | RevlogError::corrupted(format!("rev {} is invalid", rev)) | |
936 | })?; |
|
937 | })?; | |
937 | self.get_entry_for_checked_rev(rev) |
|
938 | self.get_entry_for_checked_rev(rev) | |
938 | } |
|
939 | } | |
939 | } |
|
940 | } | |
940 |
|
941 | |||
941 | /// The revlog entry's bytes and the necessary informations to extract |
|
942 | /// The revlog entry's bytes and the necessary informations to extract | |
942 | /// the entry's data. |
|
943 | /// the entry's data. | |
943 | #[derive(Clone)] |
|
944 | #[derive(Clone)] | |
944 | pub struct RevlogEntry<'revlog> { |
|
945 | pub struct RevlogEntry<'revlog> { | |
945 | revlog: &'revlog Revlog, |
|
946 | revlog: &'revlog Revlog, | |
946 | rev: Revision, |
|
947 | rev: Revision, | |
947 | bytes: &'revlog [u8], |
|
948 | bytes: &'revlog [u8], | |
948 | compressed_len: u32, |
|
949 | compressed_len: u32, | |
949 | uncompressed_len: i32, |
|
950 | uncompressed_len: i32, | |
950 | base_rev_or_base_of_delta_chain: Option<Revision>, |
|
951 | base_rev_or_base_of_delta_chain: Option<Revision>, | |
951 | p1: Revision, |
|
952 | p1: Revision, | |
952 | p2: Revision, |
|
953 | p2: Revision, | |
953 | flags: u16, |
|
954 | flags: u16, | |
954 | hash: Node, |
|
955 | hash: Node, | |
955 | } |
|
956 | } | |
956 |
|
957 | |||
957 | impl<'revlog> RevlogEntry<'revlog> { |
|
958 | impl<'revlog> RevlogEntry<'revlog> { | |
958 | pub fn revision(&self) -> Revision { |
|
959 | pub fn revision(&self) -> Revision { | |
959 | self.rev |
|
960 | self.rev | |
960 | } |
|
961 | } | |
961 |
|
962 | |||
962 | pub fn node(&self) -> &Node { |
|
963 | pub fn node(&self) -> &Node { | |
963 | &self.hash |
|
964 | &self.hash | |
964 | } |
|
965 | } | |
965 |
|
966 | |||
966 | pub fn uncompressed_len(&self) -> Option<u32> { |
|
967 | pub fn uncompressed_len(&self) -> Option<u32> { | |
967 | u32::try_from(self.uncompressed_len).ok() |
|
968 | u32::try_from(self.uncompressed_len).ok() | |
968 | } |
|
969 | } | |
969 |
|
970 | |||
970 | pub fn has_p1(&self) -> bool { |
|
971 | pub fn has_p1(&self) -> bool { | |
971 | self.p1 != NULL_REVISION |
|
972 | self.p1 != NULL_REVISION | |
972 | } |
|
973 | } | |
973 |
|
974 | |||
974 | pub fn p1_entry( |
|
975 | pub fn p1_entry( | |
975 | &self, |
|
976 | &self, | |
976 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { |
|
977 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { | |
977 | if self.p1 == NULL_REVISION { |
|
978 | if self.p1 == NULL_REVISION { | |
978 | Ok(None) |
|
979 | Ok(None) | |
979 | } else { |
|
980 | } else { | |
980 | Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?)) |
|
981 | Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?)) | |
981 | } |
|
982 | } | |
982 | } |
|
983 | } | |
983 |
|
984 | |||
984 | pub fn p2_entry( |
|
985 | pub fn p2_entry( | |
985 | &self, |
|
986 | &self, | |
986 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { |
|
987 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { | |
987 | if self.p2 == NULL_REVISION { |
|
988 | if self.p2 == NULL_REVISION { | |
988 | Ok(None) |
|
989 | Ok(None) | |
989 | } else { |
|
990 | } else { | |
990 | Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?)) |
|
991 | Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?)) | |
991 | } |
|
992 | } | |
992 | } |
|
993 | } | |
993 |
|
994 | |||
994 | pub fn p1(&self) -> Option<Revision> { |
|
995 | pub fn p1(&self) -> Option<Revision> { | |
995 | if self.p1 == NULL_REVISION { |
|
996 | if self.p1 == NULL_REVISION { | |
996 | None |
|
997 | None | |
997 | } else { |
|
998 | } else { | |
998 | Some(self.p1) |
|
999 | Some(self.p1) | |
999 | } |
|
1000 | } | |
1000 | } |
|
1001 | } | |
1001 |
|
1002 | |||
1002 | pub fn p2(&self) -> Option<Revision> { |
|
1003 | pub fn p2(&self) -> Option<Revision> { | |
1003 | if self.p2 == NULL_REVISION { |
|
1004 | if self.p2 == NULL_REVISION { | |
1004 | None |
|
1005 | None | |
1005 | } else { |
|
1006 | } else { | |
1006 | Some(self.p2) |
|
1007 | Some(self.p2) | |
1007 | } |
|
1008 | } | |
1008 | } |
|
1009 | } | |
1009 |
|
1010 | |||
1010 | pub fn is_censored(&self) -> bool { |
|
1011 | pub fn is_censored(&self) -> bool { | |
1011 | (self.flags & REVISION_FLAG_CENSORED) != 0 |
|
1012 | (self.flags & REVISION_FLAG_CENSORED) != 0 | |
1012 | } |
|
1013 | } | |
1013 |
|
1014 | |||
1014 | pub fn has_length_affecting_flag_processor(&self) -> bool { |
|
1015 | pub fn has_length_affecting_flag_processor(&self) -> bool { | |
1015 | // Relevant Python code: revlog.size() |
|
1016 | // Relevant Python code: revlog.size() | |
1016 | // note: ELLIPSIS is known to not change the content |
|
1017 | // note: ELLIPSIS is known to not change the content | |
1017 | (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 |
|
1018 | (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 | |
1018 | } |
|
1019 | } | |
1019 |
|
1020 | |||
1020 | /// The data for this entry, after resolving deltas if any. |
|
1021 | /// The data for this entry, after resolving deltas if any. | |
1021 | pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { |
|
1022 | pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
1022 | let mut entry = self.clone(); |
|
1023 | let mut entry = self.clone(); | |
1023 | let mut delta_chain = vec![]; |
|
1024 | let mut delta_chain = vec![]; | |
1024 |
|
1025 | |||
1025 | // The meaning of `base_rev_or_base_of_delta_chain` depends on |
|
1026 | // The meaning of `base_rev_or_base_of_delta_chain` depends on | |
1026 | // generaldelta. See the doc on `ENTRY_DELTA_BASE` in |
|
1027 | // generaldelta. See the doc on `ENTRY_DELTA_BASE` in | |
1027 | // `mercurial/revlogutils/constants.py` and the code in |
|
1028 | // `mercurial/revlogutils/constants.py` and the code in | |
1028 | // [_chaininfo] and in [index_deltachain]. |
|
1029 | // [_chaininfo] and in [index_deltachain]. | |
1029 | let uses_generaldelta = self.revlog.index.uses_generaldelta(); |
|
1030 | let uses_generaldelta = self.revlog.index.uses_generaldelta(); | |
1030 | while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { |
|
1031 | while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { | |
1031 | entry = if uses_generaldelta { |
|
1032 | entry = if uses_generaldelta { | |
1032 | delta_chain.push(entry); |
|
1033 | delta_chain.push(entry); | |
1033 | self.revlog.get_entry_for_checked_rev(base_rev)? |
|
1034 | self.revlog.get_entry_for_checked_rev(base_rev)? | |
1034 | } else { |
|
1035 | } else { | |
1035 | let base_rev = UncheckedRevision(entry.rev.0 - 1); |
|
1036 | let base_rev = UncheckedRevision(entry.rev.0 - 1); | |
1036 | delta_chain.push(entry); |
|
1037 | delta_chain.push(entry); | |
1037 | self.revlog.get_entry(base_rev)? |
|
1038 | self.revlog.get_entry(base_rev)? | |
1038 | }; |
|
1039 | }; | |
1039 | } |
|
1040 | } | |
1040 |
|
1041 | |||
1041 | let data = if delta_chain.is_empty() { |
|
1042 | let data = if delta_chain.is_empty() { | |
1042 | entry.data_chunk()? |
|
1043 | entry.data_chunk()? | |
1043 | } else { |
|
1044 | } else { | |
1044 | Revlog::build_data_from_deltas(entry, &delta_chain)?.into() |
|
1045 | Revlog::build_data_from_deltas(entry, &delta_chain)?.into() | |
1045 | }; |
|
1046 | }; | |
1046 |
|
1047 | |||
1047 | Ok(data) |
|
1048 | Ok(data) | |
1048 | } |
|
1049 | } | |
1049 |
|
1050 | |||
1050 | fn check_data( |
|
1051 | fn check_data( | |
1051 | &self, |
|
1052 | &self, | |
1052 | data: Cow<'revlog, [u8]>, |
|
1053 | data: Cow<'revlog, [u8]>, | |
1053 | ) -> Result<Cow<'revlog, [u8]>, RevlogError> { |
|
1054 | ) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
1054 | if self.revlog.check_hash( |
|
1055 | if self.revlog.check_hash( | |
1055 | self.p1, |
|
1056 | self.p1, | |
1056 | self.p2, |
|
1057 | self.p2, | |
1057 | self.hash.as_bytes(), |
|
1058 | self.hash.as_bytes(), | |
1058 | &data, |
|
1059 | &data, | |
1059 | ) { |
|
1060 | ) { | |
1060 | Ok(data) |
|
1061 | Ok(data) | |
1061 | } else { |
|
1062 | } else { | |
1062 | if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 { |
|
1063 | if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 { | |
1063 | return Err(HgError::unsupported( |
|
1064 | return Err(HgError::unsupported( | |
1064 | "support for ellipsis nodes is missing", |
|
1065 | "support for ellipsis nodes is missing", | |
1065 | ) |
|
1066 | ) | |
1066 | .into()); |
|
1067 | .into()); | |
1067 | } |
|
1068 | } | |
1068 | Err(corrupted(format!( |
|
1069 | Err(corrupted(format!( | |
1069 | "hash check failed for revision {}", |
|
1070 | "hash check failed for revision {}", | |
1070 | self.rev |
|
1071 | self.rev | |
1071 | )) |
|
1072 | )) | |
1072 | .into()) |
|
1073 | .into()) | |
1073 | } |
|
1074 | } | |
1074 | } |
|
1075 | } | |
1075 |
|
1076 | |||
1076 | pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { |
|
1077 | pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
1077 | let data = self.rawdata()?; |
|
1078 | let data = self.rawdata()?; | |
1078 | if self.rev == NULL_REVISION { |
|
1079 | if self.rev == NULL_REVISION { | |
1079 | return Ok(data); |
|
1080 | return Ok(data); | |
1080 | } |
|
1081 | } | |
1081 | if self.is_censored() { |
|
1082 | if self.is_censored() { | |
1082 | return Err(HgError::CensoredNodeError.into()); |
|
1083 | return Err(HgError::CensoredNodeError.into()); | |
1083 | } |
|
1084 | } | |
1084 | self.check_data(data) |
|
1085 | self.check_data(data) | |
1085 | } |
|
1086 | } | |
1086 |
|
1087 | |||
1087 | /// Extract the data contained in the entry. |
|
1088 | /// Extract the data contained in the entry. | |
1088 | /// This may be a delta. (See `is_delta`.) |
|
1089 | /// This may be a delta. (See `is_delta`.) | |
1089 | fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> { |
|
1090 | fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> { | |
1090 | if self.bytes.is_empty() { |
|
1091 | if self.bytes.is_empty() { | |
1091 | return Ok(Cow::Borrowed(&[])); |
|
1092 | return Ok(Cow::Borrowed(&[])); | |
1092 | } |
|
1093 | } | |
1093 | match self.bytes[0] { |
|
1094 | match self.bytes[0] { | |
1094 | // Revision data is the entirety of the entry, including this |
|
1095 | // Revision data is the entirety of the entry, including this | |
1095 | // header. |
|
1096 | // header. | |
1096 | b'\0' => Ok(Cow::Borrowed(self.bytes)), |
|
1097 | b'\0' => Ok(Cow::Borrowed(self.bytes)), | |
1097 | // Raw revision data follows. |
|
1098 | // Raw revision data follows. | |
1098 | b'u' => Ok(Cow::Borrowed(&self.bytes[1..])), |
|
1099 | b'u' => Ok(Cow::Borrowed(&self.bytes[1..])), | |
1099 | // zlib (RFC 1950) data. |
|
1100 | // zlib (RFC 1950) data. | |
1100 | b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)), |
|
1101 | b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)), | |
1101 | // zstd data. |
|
1102 | // zstd data. | |
1102 | b'\x28' => Ok(Cow::Owned(uncompressed_zstd_data( |
|
1103 | b'\x28' => Ok(Cow::Owned(uncompressed_zstd_data( | |
1103 | self.bytes, |
|
1104 | self.bytes, | |
1104 | self.is_delta(), |
|
1105 | self.is_delta(), | |
1105 | self.uncompressed_len.max(0), |
|
1106 | self.uncompressed_len.max(0), | |
1106 | )?)), |
|
1107 | )?)), | |
1107 | // A proper new format should have had a repo/store requirement. |
|
1108 | // A proper new format should have had a repo/store requirement. | |
1108 | format_type => Err(corrupted(format!( |
|
1109 | format_type => Err(corrupted(format!( | |
1109 | "unknown compression header '{}'", |
|
1110 | "unknown compression header '{}'", | |
1110 | format_type |
|
1111 | format_type | |
1111 | ))), |
|
1112 | ))), | |
1112 | } |
|
1113 | } | |
1113 | } |
|
1114 | } | |
1114 |
|
1115 | |||
1115 | fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> { |
|
1116 | fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> { | |
1116 | let mut decoder = ZlibDecoder::new(self.bytes); |
|
1117 | let mut decoder = ZlibDecoder::new(self.bytes); | |
1117 | if self.is_delta() { |
|
1118 | if self.is_delta() { | |
1118 | let mut buf = Vec::with_capacity(self.compressed_len as usize); |
|
1119 | let mut buf = Vec::with_capacity(self.compressed_len as usize); | |
1119 | decoder |
|
1120 | decoder | |
1120 | .read_to_end(&mut buf) |
|
1121 | .read_to_end(&mut buf) | |
1121 | .map_err(|e| corrupted(e.to_string()))?; |
|
1122 | .map_err(|e| corrupted(e.to_string()))?; | |
1122 | Ok(buf) |
|
1123 | Ok(buf) | |
1123 | } else { |
|
1124 | } else { | |
1124 | let cap = self.uncompressed_len.max(0) as usize; |
|
1125 | let cap = self.uncompressed_len.max(0) as usize; | |
1125 | let mut buf = vec![0; cap]; |
|
1126 | let mut buf = vec![0; cap]; | |
1126 | decoder |
|
1127 | decoder | |
1127 | .read_exact(&mut buf) |
|
1128 | .read_exact(&mut buf) | |
1128 | .map_err(|e| corrupted(e.to_string()))?; |
|
1129 | .map_err(|e| corrupted(e.to_string()))?; | |
1129 | Ok(buf) |
|
1130 | Ok(buf) | |
1130 | } |
|
1131 | } | |
1131 | } |
|
1132 | } | |
1132 |
|
1133 | |||
1133 | /// Tell if the entry is a snapshot or a delta |
|
1134 | /// Tell if the entry is a snapshot or a delta | |
1134 | /// (influences on decompression). |
|
1135 | /// (influences on decompression). | |
1135 | fn is_delta(&self) -> bool { |
|
1136 | fn is_delta(&self) -> bool { | |
1136 | self.base_rev_or_base_of_delta_chain.is_some() |
|
1137 | self.base_rev_or_base_of_delta_chain.is_some() | |
1137 | } |
|
1138 | } | |
1138 | } |
|
1139 | } | |
1139 |
|
1140 | |||
1140 | /// Calculate the hash of a revision given its data and its parents. |
|
1141 | /// Calculate the hash of a revision given its data and its parents. | |
1141 | fn hash( |
|
1142 | fn hash( | |
1142 | data: &[u8], |
|
1143 | data: &[u8], | |
1143 | p1_hash: &[u8], |
|
1144 | p1_hash: &[u8], | |
1144 | p2_hash: &[u8], |
|
1145 | p2_hash: &[u8], | |
1145 | ) -> [u8; NODE_BYTES_LENGTH] { |
|
1146 | ) -> [u8; NODE_BYTES_LENGTH] { | |
1146 | let mut hasher = Sha1::new(); |
|
1147 | let mut hasher = Sha1::new(); | |
1147 | let (a, b) = (p1_hash, p2_hash); |
|
1148 | let (a, b) = (p1_hash, p2_hash); | |
1148 | if a > b { |
|
1149 | if a > b { | |
1149 | hasher.update(b); |
|
1150 | hasher.update(b); | |
1150 | hasher.update(a); |
|
1151 | hasher.update(a); | |
1151 | } else { |
|
1152 | } else { | |
1152 | hasher.update(a); |
|
1153 | hasher.update(a); | |
1153 | hasher.update(b); |
|
1154 | hasher.update(b); | |
1154 | } |
|
1155 | } | |
1155 | hasher.update(data); |
|
1156 | hasher.update(data); | |
1156 | *hasher.finalize().as_ref() |
|
1157 | *hasher.finalize().as_ref() | |
1157 | } |
|
1158 | } | |
1158 |
|
1159 | |||
1159 | #[cfg(test)] |
|
1160 | #[cfg(test)] | |
1160 | mod tests { |
|
1161 | mod tests { | |
1161 | use super::*; |
|
1162 | use super::*; | |
1162 | use crate::index::IndexEntryBuilder; |
|
1163 | use crate::index::IndexEntryBuilder; | |
1163 | use itertools::Itertools; |
|
1164 | use itertools::Itertools; | |
1164 |
|
1165 | |||
1165 | #[test] |
|
1166 | #[test] | |
1166 | fn test_empty() { |
|
1167 | fn test_empty() { | |
1167 | let temp = tempfile::tempdir().unwrap(); |
|
1168 | let temp = tempfile::tempdir().unwrap(); | |
1168 | let vfs = VfsImpl { |
|
1169 | let vfs = VfsImpl { | |
1169 | base: temp.path().to_owned(), |
|
1170 | base: temp.path().to_owned(), | |
1170 | }; |
|
1171 | }; | |
1171 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); |
|
1172 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); | |
1172 | std::fs::write(temp.path().join("foo.d"), b"").unwrap(); |
|
1173 | std::fs::write(temp.path().join("foo.d"), b"").unwrap(); | |
1173 | let revlog = |
|
1174 | let revlog = | |
1174 | Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default()) |
|
1175 | Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default()) | |
1175 | .unwrap(); |
|
1176 | .unwrap(); | |
1176 | assert!(revlog.is_empty()); |
|
1177 | assert!(revlog.is_empty()); | |
1177 | assert_eq!(revlog.len(), 0); |
|
1178 | assert_eq!(revlog.len(), 0); | |
1178 | assert!(revlog.get_entry(0.into()).is_err()); |
|
1179 | assert!(revlog.get_entry(0.into()).is_err()); | |
1179 | assert!(!revlog.has_rev(0.into())); |
|
1180 | assert!(!revlog.has_rev(0.into())); | |
1180 | assert_eq!( |
|
1181 | assert_eq!( | |
1181 | revlog.rev_from_node(NULL_NODE.into()).unwrap(), |
|
1182 | revlog.rev_from_node(NULL_NODE.into()).unwrap(), | |
1182 | NULL_REVISION |
|
1183 | NULL_REVISION | |
1183 | ); |
|
1184 | ); | |
1184 | let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap(); |
|
1185 | let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap(); | |
1185 | assert_eq!(null_entry.revision(), NULL_REVISION); |
|
1186 | assert_eq!(null_entry.revision(), NULL_REVISION); | |
1186 | assert!(null_entry.data().unwrap().is_empty()); |
|
1187 | assert!(null_entry.data().unwrap().is_empty()); | |
1187 | } |
|
1188 | } | |
1188 |
|
1189 | |||
1189 | #[test] |
|
1190 | #[test] | |
1190 | fn test_inline() { |
|
1191 | fn test_inline() { | |
1191 | let temp = tempfile::tempdir().unwrap(); |
|
1192 | let temp = tempfile::tempdir().unwrap(); | |
1192 | let vfs = VfsImpl { |
|
1193 | let vfs = VfsImpl { | |
1193 | base: temp.path().to_owned(), |
|
1194 | base: temp.path().to_owned(), | |
1194 | }; |
|
1195 | }; | |
1195 | let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd") |
|
1196 | let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd") | |
1196 | .unwrap(); |
|
1197 | .unwrap(); | |
1197 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") |
|
1198 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") | |
1198 | .unwrap(); |
|
1199 | .unwrap(); | |
1199 | let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582") |
|
1200 | let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582") | |
1200 | .unwrap(); |
|
1201 | .unwrap(); | |
1201 | let entry0_bytes = IndexEntryBuilder::new() |
|
1202 | let entry0_bytes = IndexEntryBuilder::new() | |
1202 | .is_first(true) |
|
1203 | .is_first(true) | |
1203 | .with_version(1) |
|
1204 | .with_version(1) | |
1204 | .with_inline(true) |
|
1205 | .with_inline(true) | |
1205 | .with_node(node0) |
|
1206 | .with_node(node0) | |
1206 | .build(); |
|
1207 | .build(); | |
1207 | let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build(); |
|
1208 | let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build(); | |
1208 | let entry2_bytes = IndexEntryBuilder::new() |
|
1209 | let entry2_bytes = IndexEntryBuilder::new() | |
1209 | .with_p1(Revision(0)) |
|
1210 | .with_p1(Revision(0)) | |
1210 | .with_p2(Revision(1)) |
|
1211 | .with_p2(Revision(1)) | |
1211 | .with_node(node2) |
|
1212 | .with_node(node2) | |
1212 | .build(); |
|
1213 | .build(); | |
1213 | let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes] |
|
1214 | let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes] | |
1214 | .into_iter() |
|
1215 | .into_iter() | |
1215 | .flatten() |
|
1216 | .flatten() | |
1216 | .collect_vec(); |
|
1217 | .collect_vec(); | |
1217 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); |
|
1218 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); | |
1218 | let revlog = |
|
1219 | let revlog = | |
1219 | Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default()) |
|
1220 | Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default()) | |
1220 | .unwrap(); |
|
1221 | .unwrap(); | |
1221 |
|
1222 | |||
1222 | let entry0 = revlog.get_entry(0.into()).ok().unwrap(); |
|
1223 | let entry0 = revlog.get_entry(0.into()).ok().unwrap(); | |
1223 | assert_eq!(entry0.revision(), Revision(0)); |
|
1224 | assert_eq!(entry0.revision(), Revision(0)); | |
1224 | assert_eq!(*entry0.node(), node0); |
|
1225 | assert_eq!(*entry0.node(), node0); | |
1225 | assert!(!entry0.has_p1()); |
|
1226 | assert!(!entry0.has_p1()); | |
1226 | assert_eq!(entry0.p1(), None); |
|
1227 | assert_eq!(entry0.p1(), None); | |
1227 | assert_eq!(entry0.p2(), None); |
|
1228 | assert_eq!(entry0.p2(), None); | |
1228 | let p1_entry = entry0.p1_entry().unwrap(); |
|
1229 | let p1_entry = entry0.p1_entry().unwrap(); | |
1229 | assert!(p1_entry.is_none()); |
|
1230 | assert!(p1_entry.is_none()); | |
1230 | let p2_entry = entry0.p2_entry().unwrap(); |
|
1231 | let p2_entry = entry0.p2_entry().unwrap(); | |
1231 | assert!(p2_entry.is_none()); |
|
1232 | assert!(p2_entry.is_none()); | |
1232 |
|
1233 | |||
1233 | let entry1 = revlog.get_entry(1.into()).ok().unwrap(); |
|
1234 | let entry1 = revlog.get_entry(1.into()).ok().unwrap(); | |
1234 | assert_eq!(entry1.revision(), Revision(1)); |
|
1235 | assert_eq!(entry1.revision(), Revision(1)); | |
1235 | assert_eq!(*entry1.node(), node1); |
|
1236 | assert_eq!(*entry1.node(), node1); | |
1236 | assert!(!entry1.has_p1()); |
|
1237 | assert!(!entry1.has_p1()); | |
1237 | assert_eq!(entry1.p1(), None); |
|
1238 | assert_eq!(entry1.p1(), None); | |
1238 | assert_eq!(entry1.p2(), None); |
|
1239 | assert_eq!(entry1.p2(), None); | |
1239 | let p1_entry = entry1.p1_entry().unwrap(); |
|
1240 | let p1_entry = entry1.p1_entry().unwrap(); | |
1240 | assert!(p1_entry.is_none()); |
|
1241 | assert!(p1_entry.is_none()); | |
1241 | let p2_entry = entry1.p2_entry().unwrap(); |
|
1242 | let p2_entry = entry1.p2_entry().unwrap(); | |
1242 | assert!(p2_entry.is_none()); |
|
1243 | assert!(p2_entry.is_none()); | |
1243 |
|
1244 | |||
1244 | let entry2 = revlog.get_entry(2.into()).ok().unwrap(); |
|
1245 | let entry2 = revlog.get_entry(2.into()).ok().unwrap(); | |
1245 | assert_eq!(entry2.revision(), Revision(2)); |
|
1246 | assert_eq!(entry2.revision(), Revision(2)); | |
1246 | assert_eq!(*entry2.node(), node2); |
|
1247 | assert_eq!(*entry2.node(), node2); | |
1247 | assert!(entry2.has_p1()); |
|
1248 | assert!(entry2.has_p1()); | |
1248 | assert_eq!(entry2.p1(), Some(Revision(0))); |
|
1249 | assert_eq!(entry2.p1(), Some(Revision(0))); | |
1249 | assert_eq!(entry2.p2(), Some(Revision(1))); |
|
1250 | assert_eq!(entry2.p2(), Some(Revision(1))); | |
1250 | let p1_entry = entry2.p1_entry().unwrap(); |
|
1251 | let p1_entry = entry2.p1_entry().unwrap(); | |
1251 | assert!(p1_entry.is_some()); |
|
1252 | assert!(p1_entry.is_some()); | |
1252 | assert_eq!(p1_entry.unwrap().revision(), Revision(0)); |
|
1253 | assert_eq!(p1_entry.unwrap().revision(), Revision(0)); | |
1253 | let p2_entry = entry2.p2_entry().unwrap(); |
|
1254 | let p2_entry = entry2.p2_entry().unwrap(); | |
1254 | assert!(p2_entry.is_some()); |
|
1255 | assert!(p2_entry.is_some()); | |
1255 | assert_eq!(p2_entry.unwrap().revision(), Revision(1)); |
|
1256 | assert_eq!(p2_entry.unwrap().revision(), Revision(1)); | |
1256 | } |
|
1257 | } | |
1257 |
|
1258 | |||
1258 | #[test] |
|
1259 | #[test] | |
1259 | fn test_nodemap() { |
|
1260 | fn test_nodemap() { | |
1260 | let temp = tempfile::tempdir().unwrap(); |
|
1261 | let temp = tempfile::tempdir().unwrap(); | |
1261 | let vfs = VfsImpl { |
|
1262 | let vfs = VfsImpl { | |
1262 | base: temp.path().to_owned(), |
|
1263 | base: temp.path().to_owned(), | |
1263 | }; |
|
1264 | }; | |
1264 |
|
1265 | |||
1265 | // building a revlog with a forced Node starting with zeros |
|
1266 | // building a revlog with a forced Node starting with zeros | |
1266 | // This is a corruption, but it does not preclude using the nodemap |
|
1267 | // This is a corruption, but it does not preclude using the nodemap | |
1267 | // if we don't try and access the data |
|
1268 | // if we don't try and access the data | |
1268 | let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd") |
|
1269 | let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd") | |
1269 | .unwrap(); |
|
1270 | .unwrap(); | |
1270 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") |
|
1271 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") | |
1271 | .unwrap(); |
|
1272 | .unwrap(); | |
1272 | let entry0_bytes = IndexEntryBuilder::new() |
|
1273 | let entry0_bytes = IndexEntryBuilder::new() | |
1273 | .is_first(true) |
|
1274 | .is_first(true) | |
1274 | .with_version(1) |
|
1275 | .with_version(1) | |
1275 | .with_inline(true) |
|
1276 | .with_inline(true) | |
1276 | .with_node(node0) |
|
1277 | .with_node(node0) | |
1277 | .build(); |
|
1278 | .build(); | |
1278 | let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build(); |
|
1279 | let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build(); | |
1279 | let contents = vec![entry0_bytes, entry1_bytes] |
|
1280 | let contents = vec![entry0_bytes, entry1_bytes] | |
1280 | .into_iter() |
|
1281 | .into_iter() | |
1281 | .flatten() |
|
1282 | .flatten() | |
1282 | .collect_vec(); |
|
1283 | .collect_vec(); | |
1283 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); |
|
1284 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); | |
1284 |
|
1285 | |||
1285 | let mut idx = nodemap::tests::TestNtIndex::new(); |
|
1286 | let mut idx = nodemap::tests::TestNtIndex::new(); | |
1286 | idx.insert_node(Revision(0), node0).unwrap(); |
|
1287 | idx.insert_node(Revision(0), node0).unwrap(); | |
1287 | idx.insert_node(Revision(1), node1).unwrap(); |
|
1288 | idx.insert_node(Revision(1), node1).unwrap(); | |
1288 |
|
1289 | |||
1289 | let revlog = Revlog::open_gen( |
|
1290 | let revlog = Revlog::open_gen( | |
1290 | &vfs, |
|
1291 | &vfs, | |
1291 | "foo.i", |
|
1292 | "foo.i", | |
1292 | None, |
|
1293 | None, | |
1293 | RevlogOpenOptions::default(), |
|
1294 | RevlogOpenOptions::default(), | |
1294 | Some(idx.nt), |
|
1295 | Some(idx.nt), | |
1295 | ) |
|
1296 | ) | |
1296 | .unwrap(); |
|
1297 | .unwrap(); | |
1297 |
|
1298 | |||
1298 | // accessing the data shows the corruption |
|
1299 | // accessing the data shows the corruption | |
1299 | revlog.get_entry(0.into()).unwrap().data().unwrap_err(); |
|
1300 | revlog.get_entry(0.into()).unwrap().data().unwrap_err(); | |
1300 |
|
1301 | |||
1301 | assert_eq!( |
|
1302 | assert_eq!( | |
1302 | revlog.rev_from_node(NULL_NODE.into()).unwrap(), |
|
1303 | revlog.rev_from_node(NULL_NODE.into()).unwrap(), | |
1303 | Revision(-1) |
|
1304 | Revision(-1) | |
1304 | ); |
|
1305 | ); | |
1305 | assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0)); |
|
1306 | assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0)); | |
1306 | assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1)); |
|
1307 | assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1)); | |
1307 | assert_eq!( |
|
1308 | assert_eq!( | |
1308 | revlog |
|
1309 | revlog | |
1309 | .rev_from_node(NodePrefix::from_hex("000").unwrap()) |
|
1310 | .rev_from_node(NodePrefix::from_hex("000").unwrap()) | |
1310 | .unwrap(), |
|
1311 | .unwrap(), | |
1311 | Revision(-1) |
|
1312 | Revision(-1) | |
1312 | ); |
|
1313 | ); | |
1313 | assert_eq!( |
|
1314 | assert_eq!( | |
1314 | revlog |
|
1315 | revlog | |
1315 | .rev_from_node(NodePrefix::from_hex("b00").unwrap()) |
|
1316 | .rev_from_node(NodePrefix::from_hex("b00").unwrap()) | |
1316 | .unwrap(), |
|
1317 | .unwrap(), | |
1317 | Revision(1) |
|
1318 | Revision(1) | |
1318 | ); |
|
1319 | ); | |
1319 | // RevlogError does not implement PartialEq |
|
1320 | // RevlogError does not implement PartialEq | |
1320 | // (ultimately because io::Error does not) |
|
1321 | // (ultimately because io::Error does not) | |
1321 | match revlog |
|
1322 | match revlog | |
1322 | .rev_from_node(NodePrefix::from_hex("00").unwrap()) |
|
1323 | .rev_from_node(NodePrefix::from_hex("00").unwrap()) | |
1323 | .expect_err("Expected to give AmbiguousPrefix error") |
|
1324 | .expect_err("Expected to give AmbiguousPrefix error") | |
1324 | { |
|
1325 | { | |
1325 | RevlogError::AmbiguousPrefix => (), |
|
1326 | RevlogError::AmbiguousPrefix => (), | |
1326 | e => { |
|
1327 | e => { | |
1327 | panic!("Got another error than AmbiguousPrefix: {:?}", e); |
|
1328 | panic!("Got another error than AmbiguousPrefix: {:?}", e); | |
1328 | } |
|
1329 | } | |
1329 | }; |
|
1330 | }; | |
1330 | } |
|
1331 | } | |
1331 | } |
|
1332 | } |
General Comments 0
You need to be logged in to leave comments.
Login now