##// END OF EJS Templates
rust-revlog: don't try to open the data file if the index is empty...
Raphaël Gomès -
r52759:09ece563 default
parent child Browse files
Show More
@@ -1,1419 +1,1422
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 mod nodemap_docket;
10 mod nodemap_docket;
11 pub mod path_encode;
11 pub mod path_encode;
12 pub use node::{FromHexError, Node, NodePrefix};
12 pub use node::{FromHexError, Node, NodePrefix};
13 pub mod changelog;
13 pub mod changelog;
14 pub mod filelog;
14 pub mod filelog;
15 pub mod index;
15 pub mod index;
16 pub mod manifest;
16 pub mod manifest;
17 pub mod patch;
17 pub mod patch;
18
18
19 use std::borrow::Cow;
19 use std::borrow::Cow;
20 use std::collections::HashSet;
20 use std::collections::HashSet;
21 use std::io::Read;
21 use std::io::Read;
22 use std::ops::Deref;
22 use std::ops::Deref;
23 use std::path::Path;
23 use std::path::Path;
24
24
25 use flate2::read::ZlibDecoder;
25 use flate2::read::ZlibDecoder;
26 use sha1::{Digest, Sha1};
26 use sha1::{Digest, Sha1};
27 use std::cell::RefCell;
27 use std::cell::RefCell;
28 use zstd;
28 use zstd;
29
29
30 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
31 use self::nodemap_docket::NodeMapDocket;
31 use self::nodemap_docket::NodeMapDocket;
32 use super::index::Index;
32 use super::index::Index;
33 use super::index::INDEX_ENTRY_SIZE;
33 use super::index::INDEX_ENTRY_SIZE;
34 use super::nodemap::{NodeMap, NodeMapError};
34 use super::nodemap::{NodeMap, NodeMapError};
35 use crate::config::{Config, ResourceProfileValue};
35 use crate::config::{Config, ResourceProfileValue};
36 use crate::errors::HgError;
36 use crate::errors::HgError;
37 use crate::exit_codes;
37 use crate::exit_codes;
38 use crate::requirements::{
38 use crate::requirements::{
39 GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
39 GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
40 };
40 };
41 use crate::vfs::Vfs;
41 use crate::vfs::Vfs;
42
42
43 /// As noted in revlog.c, revision numbers are actually encoded in
43 /// As noted in revlog.c, revision numbers are actually encoded in
44 /// 4 bytes, and are liberally converted to ints, whence the i32
44 /// 4 bytes, and are liberally converted to ints, whence the i32
45 pub type BaseRevision = i32;
45 pub type BaseRevision = i32;
46
46
47 /// Mercurial revision numbers
47 /// Mercurial revision numbers
48 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
48 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
49 /// in the sense that they should only be used for revisions that are
49 /// in the sense that they should only be used for revisions that are
50 /// valid for a given index (i.e. in bounds).
50 /// valid for a given index (i.e. in bounds).
51 #[derive(
51 #[derive(
52 Debug,
52 Debug,
53 derive_more::Display,
53 derive_more::Display,
54 Clone,
54 Clone,
55 Copy,
55 Copy,
56 Hash,
56 Hash,
57 PartialEq,
57 PartialEq,
58 Eq,
58 Eq,
59 PartialOrd,
59 PartialOrd,
60 Ord,
60 Ord,
61 )]
61 )]
62 pub struct Revision(pub BaseRevision);
62 pub struct Revision(pub BaseRevision);
63
63
64 impl format_bytes::DisplayBytes for Revision {
64 impl format_bytes::DisplayBytes for Revision {
65 fn display_bytes(
65 fn display_bytes(
66 &self,
66 &self,
67 output: &mut dyn std::io::Write,
67 output: &mut dyn std::io::Write,
68 ) -> std::io::Result<()> {
68 ) -> std::io::Result<()> {
69 self.0.display_bytes(output)
69 self.0.display_bytes(output)
70 }
70 }
71 }
71 }
72
72
73 /// Unchecked Mercurial revision numbers.
73 /// Unchecked Mercurial revision numbers.
74 ///
74 ///
75 /// Values of this type have no guarantee of being a valid revision number
75 /// Values of this type have no guarantee of being a valid revision number
76 /// in any context. Use method `check_revision` to get a valid revision within
76 /// in any context. Use method `check_revision` to get a valid revision within
77 /// the appropriate index object.
77 /// the appropriate index object.
78 #[derive(
78 #[derive(
79 Debug,
79 Debug,
80 derive_more::Display,
80 derive_more::Display,
81 Clone,
81 Clone,
82 Copy,
82 Copy,
83 Hash,
83 Hash,
84 PartialEq,
84 PartialEq,
85 Eq,
85 Eq,
86 PartialOrd,
86 PartialOrd,
87 Ord,
87 Ord,
88 )]
88 )]
89 pub struct UncheckedRevision(pub BaseRevision);
89 pub struct UncheckedRevision(pub BaseRevision);
90
90
91 impl format_bytes::DisplayBytes for UncheckedRevision {
91 impl format_bytes::DisplayBytes for UncheckedRevision {
92 fn display_bytes(
92 fn display_bytes(
93 &self,
93 &self,
94 output: &mut dyn std::io::Write,
94 output: &mut dyn std::io::Write,
95 ) -> std::io::Result<()> {
95 ) -> std::io::Result<()> {
96 self.0.display_bytes(output)
96 self.0.display_bytes(output)
97 }
97 }
98 }
98 }
99
99
100 impl From<Revision> for UncheckedRevision {
100 impl From<Revision> for UncheckedRevision {
101 fn from(value: Revision) -> Self {
101 fn from(value: Revision) -> Self {
102 Self(value.0)
102 Self(value.0)
103 }
103 }
104 }
104 }
105
105
106 impl From<BaseRevision> for UncheckedRevision {
106 impl From<BaseRevision> for UncheckedRevision {
107 fn from(value: BaseRevision) -> Self {
107 fn from(value: BaseRevision) -> Self {
108 Self(value)
108 Self(value)
109 }
109 }
110 }
110 }
111
111
112 /// Marker expressing the absence of a parent
112 /// Marker expressing the absence of a parent
113 ///
113 ///
114 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
114 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
115 /// to be smaller than all existing revisions.
115 /// to be smaller than all existing revisions.
116 pub const NULL_REVISION: Revision = Revision(-1);
116 pub const NULL_REVISION: Revision = Revision(-1);
117
117
118 /// Same as `mercurial.node.wdirrev`
118 /// Same as `mercurial.node.wdirrev`
119 ///
119 ///
120 /// This is also equal to `i32::max_value()`, but it's better to spell
120 /// This is also equal to `i32::max_value()`, but it's better to spell
121 /// it out explicitely, same as in `mercurial.node`
121 /// it out explicitely, same as in `mercurial.node`
122 #[allow(clippy::unreadable_literal)]
122 #[allow(clippy::unreadable_literal)]
123 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
123 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
124 UncheckedRevision(0x7fffffff);
124 UncheckedRevision(0x7fffffff);
125
125
126 pub const WORKING_DIRECTORY_HEX: &str =
126 pub const WORKING_DIRECTORY_HEX: &str =
127 "ffffffffffffffffffffffffffffffffffffffff";
127 "ffffffffffffffffffffffffffffffffffffffff";
128
128
129 /// The simplest expression of what we need of Mercurial DAGs.
129 /// The simplest expression of what we need of Mercurial DAGs.
130 pub trait Graph {
130 pub trait Graph {
131 /// Return the two parents of the given `Revision`.
131 /// Return the two parents of the given `Revision`.
132 ///
132 ///
133 /// Each of the parents can be independently `NULL_REVISION`
133 /// Each of the parents can be independently `NULL_REVISION`
134 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
134 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
135 }
135 }
136
136
137 #[derive(Clone, Debug, PartialEq)]
137 #[derive(Clone, Debug, PartialEq)]
138 pub enum GraphError {
138 pub enum GraphError {
139 ParentOutOfRange(Revision),
139 ParentOutOfRange(Revision),
140 }
140 }
141
141
142 impl<T: Graph> Graph for &T {
142 impl<T: Graph> Graph for &T {
143 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
143 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
144 (*self).parents(rev)
144 (*self).parents(rev)
145 }
145 }
146 }
146 }
147
147
148 /// The Mercurial Revlog Index
148 /// The Mercurial Revlog Index
149 ///
149 ///
150 /// This is currently limited to the minimal interface that is needed for
150 /// This is currently limited to the minimal interface that is needed for
151 /// the [`nodemap`](nodemap/index.html) module
151 /// the [`nodemap`](nodemap/index.html) module
152 pub trait RevlogIndex {
152 pub trait RevlogIndex {
153 /// Total number of Revisions referenced in this index
153 /// Total number of Revisions referenced in this index
154 fn len(&self) -> usize;
154 fn len(&self) -> usize;
155
155
156 fn is_empty(&self) -> bool {
156 fn is_empty(&self) -> bool {
157 self.len() == 0
157 self.len() == 0
158 }
158 }
159
159
160 /// Return a reference to the Node or `None` for `NULL_REVISION`
160 /// Return a reference to the Node or `None` for `NULL_REVISION`
161 fn node(&self, rev: Revision) -> Option<&Node>;
161 fn node(&self, rev: Revision) -> Option<&Node>;
162
162
163 /// Return a [`Revision`] if `rev` is a valid revision number for this
163 /// Return a [`Revision`] if `rev` is a valid revision number for this
164 /// index.
164 /// index.
165 ///
165 ///
166 /// [`NULL_REVISION`] is considered to be valid.
166 /// [`NULL_REVISION`] is considered to be valid.
167 #[inline(always)]
167 #[inline(always)]
168 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
168 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
169 let rev = rev.0;
169 let rev = rev.0;
170
170
171 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
171 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
172 {
172 {
173 Some(Revision(rev))
173 Some(Revision(rev))
174 } else {
174 } else {
175 None
175 None
176 }
176 }
177 }
177 }
178 }
178 }
179
179
180 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
180 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
181 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
181 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
182 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
182 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
183 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
183 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
184
184
185 // Keep this in sync with REVIDX_KNOWN_FLAGS in
185 // Keep this in sync with REVIDX_KNOWN_FLAGS in
186 // mercurial/revlogutils/flagutil.py
186 // mercurial/revlogutils/flagutil.py
187 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
187 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
188 | REVISION_FLAG_ELLIPSIS
188 | REVISION_FLAG_ELLIPSIS
189 | REVISION_FLAG_EXTSTORED
189 | REVISION_FLAG_EXTSTORED
190 | REVISION_FLAG_HASCOPIESINFO;
190 | REVISION_FLAG_HASCOPIESINFO;
191
191
192 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
192 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
193
193
194 #[derive(Debug, derive_more::From, derive_more::Display)]
194 #[derive(Debug, derive_more::From, derive_more::Display)]
195 pub enum RevlogError {
195 pub enum RevlogError {
196 InvalidRevision,
196 InvalidRevision,
197 /// Working directory is not supported
197 /// Working directory is not supported
198 WDirUnsupported,
198 WDirUnsupported,
199 /// Found more than one entry whose ID match the requested prefix
199 /// Found more than one entry whose ID match the requested prefix
200 AmbiguousPrefix,
200 AmbiguousPrefix,
201 #[from]
201 #[from]
202 Other(HgError),
202 Other(HgError),
203 }
203 }
204
204
205 impl From<NodeMapError> for RevlogError {
205 impl From<NodeMapError> for RevlogError {
206 fn from(error: NodeMapError) -> Self {
206 fn from(error: NodeMapError) -> Self {
207 match error {
207 match error {
208 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
208 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
209 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
209 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
210 format!("nodemap point to revision {} not in index", rev),
210 format!("nodemap point to revision {} not in index", rev),
211 ),
211 ),
212 }
212 }
213 }
213 }
214 }
214 }
215
215
216 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
216 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
217 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
217 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
218 }
218 }
219
219
220 impl RevlogError {
220 impl RevlogError {
221 fn corrupted<S: AsRef<str>>(context: S) -> Self {
221 fn corrupted<S: AsRef<str>>(context: S) -> Self {
222 RevlogError::Other(corrupted(context))
222 RevlogError::Other(corrupted(context))
223 }
223 }
224 }
224 }
225
225
226 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
226 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
227 pub enum RevlogType {
227 pub enum RevlogType {
228 Changelog,
228 Changelog,
229 Manifestlog,
229 Manifestlog,
230 Filelog,
230 Filelog,
231 }
231 }
232
232
233 impl TryFrom<usize> for RevlogType {
233 impl TryFrom<usize> for RevlogType {
234 type Error = HgError;
234 type Error = HgError;
235
235
236 fn try_from(value: usize) -> Result<Self, Self::Error> {
236 fn try_from(value: usize) -> Result<Self, Self::Error> {
237 match value {
237 match value {
238 1001 => Ok(Self::Changelog),
238 1001 => Ok(Self::Changelog),
239 1002 => Ok(Self::Manifestlog),
239 1002 => Ok(Self::Manifestlog),
240 1003 => Ok(Self::Filelog),
240 1003 => Ok(Self::Filelog),
241 t => Err(HgError::abort(
241 t => Err(HgError::abort(
242 format!("Unknown revlog type {}", t),
242 format!("Unknown revlog type {}", t),
243 exit_codes::ABORT,
243 exit_codes::ABORT,
244 None,
244 None,
245 )),
245 )),
246 }
246 }
247 }
247 }
248 }
248 }
249
249
250 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
250 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
251 pub enum CompressionEngine {
251 pub enum CompressionEngine {
252 Zlib {
252 Zlib {
253 /// Between 0 and 9 included
253 /// Between 0 and 9 included
254 level: u32,
254 level: u32,
255 },
255 },
256 Zstd {
256 Zstd {
257 /// Between 0 and 22 included
257 /// Between 0 and 22 included
258 level: u32,
258 level: u32,
259 /// Never used in practice for now
259 /// Never used in practice for now
260 threads: u32,
260 threads: u32,
261 },
261 },
262 /// No compression is performed
262 /// No compression is performed
263 None,
263 None,
264 }
264 }
265 impl CompressionEngine {
265 impl CompressionEngine {
266 pub fn set_level(&mut self, new_level: usize) -> Result<(), HgError> {
266 pub fn set_level(&mut self, new_level: usize) -> Result<(), HgError> {
267 match self {
267 match self {
268 CompressionEngine::Zlib { level } => {
268 CompressionEngine::Zlib { level } => {
269 if new_level > 9 {
269 if new_level > 9 {
270 return Err(HgError::abort(
270 return Err(HgError::abort(
271 format!(
271 format!(
272 "invalid compression zlib compression level {}",
272 "invalid compression zlib compression level {}",
273 new_level
273 new_level
274 ),
274 ),
275 exit_codes::ABORT,
275 exit_codes::ABORT,
276 None,
276 None,
277 ));
277 ));
278 }
278 }
279 *level = new_level as u32;
279 *level = new_level as u32;
280 }
280 }
281 CompressionEngine::Zstd { level, .. } => {
281 CompressionEngine::Zstd { level, .. } => {
282 if new_level > 22 {
282 if new_level > 22 {
283 return Err(HgError::abort(
283 return Err(HgError::abort(
284 format!(
284 format!(
285 "invalid compression zstd compression level {}",
285 "invalid compression zstd compression level {}",
286 new_level
286 new_level
287 ),
287 ),
288 exit_codes::ABORT,
288 exit_codes::ABORT,
289 None,
289 None,
290 ));
290 ));
291 }
291 }
292 *level = new_level as u32;
292 *level = new_level as u32;
293 }
293 }
294 CompressionEngine::None => {}
294 CompressionEngine::None => {}
295 }
295 }
296 Ok(())
296 Ok(())
297 }
297 }
298
298
299 pub fn zstd(
299 pub fn zstd(
300 zstd_level: Option<u32>,
300 zstd_level: Option<u32>,
301 ) -> Result<CompressionEngine, HgError> {
301 ) -> Result<CompressionEngine, HgError> {
302 let mut engine = CompressionEngine::Zstd {
302 let mut engine = CompressionEngine::Zstd {
303 level: 3,
303 level: 3,
304 threads: 0,
304 threads: 0,
305 };
305 };
306 if let Some(level) = zstd_level {
306 if let Some(level) = zstd_level {
307 engine.set_level(level as usize)?;
307 engine.set_level(level as usize)?;
308 }
308 }
309 Ok(engine)
309 Ok(engine)
310 }
310 }
311 }
311 }
312
312
313 impl Default for CompressionEngine {
313 impl Default for CompressionEngine {
314 fn default() -> Self {
314 fn default() -> Self {
315 Self::Zlib { level: 6 }
315 Self::Zlib { level: 6 }
316 }
316 }
317 }
317 }
318
318
319 #[derive(Debug, Clone, Copy, PartialEq)]
319 #[derive(Debug, Clone, Copy, PartialEq)]
320 /// Holds configuration values about how the revlog data is read
320 /// Holds configuration values about how the revlog data is read
321 pub struct RevlogDataConfig {
321 pub struct RevlogDataConfig {
322 /// Should we try to open the "pending" version of the revlog
322 /// Should we try to open the "pending" version of the revlog
323 pub try_pending: bool,
323 pub try_pending: bool,
324 /// Should we try to open the "split" version of the revlog
324 /// Should we try to open the "split" version of the revlog
325 pub try_split: bool,
325 pub try_split: bool,
326 /// When True, `indexfile` should be opened with `checkambig=True` at
326 /// When True, `indexfile` should be opened with `checkambig=True` at
327 /// writing time, to avoid file stat ambiguity
327 /// writing time, to avoid file stat ambiguity
328 pub check_ambig: bool,
328 pub check_ambig: bool,
329 /// If true, use mmap instead of reading to deal with large indexes
329 /// If true, use mmap instead of reading to deal with large indexes
330 pub mmap_large_index: bool,
330 pub mmap_large_index: bool,
331 /// How much data is considered large
331 /// How much data is considered large
332 pub mmap_index_threshold: Option<u64>,
332 pub mmap_index_threshold: Option<u64>,
333 /// How much data to read and cache into the raw revlog data cache
333 /// How much data to read and cache into the raw revlog data cache
334 pub chunk_cache_size: u64,
334 pub chunk_cache_size: u64,
335 /// The size of the uncompressed cache compared to the largest revision
335 /// The size of the uncompressed cache compared to the largest revision
336 /// seen
336 /// seen
337 pub uncompressed_cache_factor: Option<f64>,
337 pub uncompressed_cache_factor: Option<f64>,
338 /// The number of chunks cached
338 /// The number of chunks cached
339 pub uncompressed_cache_count: Option<u64>,
339 pub uncompressed_cache_count: Option<u64>,
340 /// Allow sparse reading of the revlog data
340 /// Allow sparse reading of the revlog data
341 pub with_sparse_read: bool,
341 pub with_sparse_read: bool,
342 /// Minimal density of a sparse read chunk
342 /// Minimal density of a sparse read chunk
343 pub sr_density_threshold: f64,
343 pub sr_density_threshold: f64,
344 /// Minimal size of the data we skip when performing sparse reads
344 /// Minimal size of the data we skip when performing sparse reads
345 pub sr_min_gap_size: u64,
345 pub sr_min_gap_size: u64,
346 /// Whether deltas are encoded against arbitrary bases
346 /// Whether deltas are encoded against arbitrary bases
347 pub general_delta: bool,
347 pub general_delta: bool,
348 }
348 }
349
349
350 impl RevlogDataConfig {
350 impl RevlogDataConfig {
351 pub fn new(
351 pub fn new(
352 config: &Config,
352 config: &Config,
353 requirements: &HashSet<String>,
353 requirements: &HashSet<String>,
354 ) -> Result<Self, HgError> {
354 ) -> Result<Self, HgError> {
355 let mut data_config = Self::default();
355 let mut data_config = Self::default();
356 if let Some(chunk_cache_size) =
356 if let Some(chunk_cache_size) =
357 config.get_byte_size(b"format", b"chunkcachesize")?
357 config.get_byte_size(b"format", b"chunkcachesize")?
358 {
358 {
359 data_config.chunk_cache_size = chunk_cache_size;
359 data_config.chunk_cache_size = chunk_cache_size;
360 }
360 }
361
361
362 let memory_profile = config.get_resource_profile(Some("memory"));
362 let memory_profile = config.get_resource_profile(Some("memory"));
363 if memory_profile.value >= ResourceProfileValue::Medium {
363 if memory_profile.value >= ResourceProfileValue::Medium {
364 data_config.uncompressed_cache_count = Some(10_000);
364 data_config.uncompressed_cache_count = Some(10_000);
365 data_config.uncompressed_cache_factor = Some(4.0);
365 data_config.uncompressed_cache_factor = Some(4.0);
366 if memory_profile.value >= ResourceProfileValue::High {
366 if memory_profile.value >= ResourceProfileValue::High {
367 data_config.uncompressed_cache_factor = Some(10.0)
367 data_config.uncompressed_cache_factor = Some(10.0)
368 }
368 }
369 }
369 }
370
370
371 if let Some(mmap_index_threshold) =
371 if let Some(mmap_index_threshold) =
372 config.get_byte_size(b"experimental", b"mmapindexthreshold")?
372 config.get_byte_size(b"experimental", b"mmapindexthreshold")?
373 {
373 {
374 data_config.mmap_index_threshold = Some(mmap_index_threshold);
374 data_config.mmap_index_threshold = Some(mmap_index_threshold);
375 }
375 }
376
376
377 let with_sparse_read =
377 let with_sparse_read =
378 config.get_bool(b"experimental", b"sparse-read")?;
378 config.get_bool(b"experimental", b"sparse-read")?;
379 if let Some(sr_density_threshold) = config
379 if let Some(sr_density_threshold) = config
380 .get_f64(b"experimental", b"sparse-read.density-threshold")?
380 .get_f64(b"experimental", b"sparse-read.density-threshold")?
381 {
381 {
382 data_config.sr_density_threshold = sr_density_threshold;
382 data_config.sr_density_threshold = sr_density_threshold;
383 }
383 }
384 data_config.with_sparse_read = with_sparse_read;
384 data_config.with_sparse_read = with_sparse_read;
385 if let Some(sr_min_gap_size) = config
385 if let Some(sr_min_gap_size) = config
386 .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
386 .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
387 {
387 {
388 data_config.sr_min_gap_size = sr_min_gap_size;
388 data_config.sr_min_gap_size = sr_min_gap_size;
389 }
389 }
390
390
391 data_config.with_sparse_read =
391 data_config.with_sparse_read =
392 requirements.contains(SPARSEREVLOG_REQUIREMENT);
392 requirements.contains(SPARSEREVLOG_REQUIREMENT);
393
393
394 Ok(data_config)
394 Ok(data_config)
395 }
395 }
396 }
396 }
397
397
398 impl Default for RevlogDataConfig {
398 impl Default for RevlogDataConfig {
399 fn default() -> Self {
399 fn default() -> Self {
400 Self {
400 Self {
401 chunk_cache_size: 65536,
401 chunk_cache_size: 65536,
402 sr_density_threshold: 0.50,
402 sr_density_threshold: 0.50,
403 sr_min_gap_size: 262144,
403 sr_min_gap_size: 262144,
404 try_pending: Default::default(),
404 try_pending: Default::default(),
405 try_split: Default::default(),
405 try_split: Default::default(),
406 check_ambig: Default::default(),
406 check_ambig: Default::default(),
407 mmap_large_index: Default::default(),
407 mmap_large_index: Default::default(),
408 mmap_index_threshold: Default::default(),
408 mmap_index_threshold: Default::default(),
409 uncompressed_cache_factor: Default::default(),
409 uncompressed_cache_factor: Default::default(),
410 uncompressed_cache_count: Default::default(),
410 uncompressed_cache_count: Default::default(),
411 with_sparse_read: Default::default(),
411 with_sparse_read: Default::default(),
412 general_delta: Default::default(),
412 general_delta: Default::default(),
413 }
413 }
414 }
414 }
415 }
415 }
416
416
417 #[derive(Debug, Clone, Copy, PartialEq)]
417 #[derive(Debug, Clone, Copy, PartialEq)]
418 /// Holds configuration values about how new deltas are computed.
418 /// Holds configuration values about how new deltas are computed.
419 ///
419 ///
420 /// Some attributes are duplicated from [`RevlogDataConfig`] to help having
420 /// Some attributes are duplicated from [`RevlogDataConfig`] to help having
421 /// each object self contained.
421 /// each object self contained.
422 pub struct RevlogDeltaConfig {
422 pub struct RevlogDeltaConfig {
423 /// Whether deltas can be encoded against arbitrary bases
423 /// Whether deltas can be encoded against arbitrary bases
424 pub general_delta: bool,
424 pub general_delta: bool,
425 /// Allow sparse writing of the revlog data
425 /// Allow sparse writing of the revlog data
426 pub sparse_revlog: bool,
426 pub sparse_revlog: bool,
427 /// Maximum length of a delta chain
427 /// Maximum length of a delta chain
428 pub max_chain_len: Option<u64>,
428 pub max_chain_len: Option<u64>,
429 /// Maximum distance between a delta chain's start and end
429 /// Maximum distance between a delta chain's start and end
430 pub max_deltachain_span: Option<u64>,
430 pub max_deltachain_span: Option<u64>,
431 /// If `upper_bound_comp` is not None, this is the expected maximal
431 /// If `upper_bound_comp` is not None, this is the expected maximal
432 /// gain from compression for the data content
432 /// gain from compression for the data content
433 pub upper_bound_comp: Option<f64>,
433 pub upper_bound_comp: Option<f64>,
434 /// Should we try a delta against both parents
434 /// Should we try a delta against both parents
435 pub delta_both_parents: bool,
435 pub delta_both_parents: bool,
436 /// Test delta base candidate groups by chunks of this maximal size
436 /// Test delta base candidate groups by chunks of this maximal size
437 pub candidate_group_chunk_size: u64,
437 pub candidate_group_chunk_size: u64,
438 /// Should we display debug information about delta computation
438 /// Should we display debug information about delta computation
439 pub debug_delta: bool,
439 pub debug_delta: bool,
440 /// Trust incoming deltas by default
440 /// Trust incoming deltas by default
441 pub lazy_delta: bool,
441 pub lazy_delta: bool,
442 /// Trust the base of incoming deltas by default
442 /// Trust the base of incoming deltas by default
443 pub lazy_delta_base: bool,
443 pub lazy_delta_base: bool,
444 }
444 }
445 impl RevlogDeltaConfig {
445 impl RevlogDeltaConfig {
446 pub fn new(
446 pub fn new(
447 config: &Config,
447 config: &Config,
448 requirements: &HashSet<String>,
448 requirements: &HashSet<String>,
449 revlog_type: RevlogType,
449 revlog_type: RevlogType,
450 ) -> Result<Self, HgError> {
450 ) -> Result<Self, HgError> {
451 let mut delta_config = Self {
451 let mut delta_config = Self {
452 delta_both_parents: config
452 delta_both_parents: config
453 .get_option_no_default(
453 .get_option_no_default(
454 b"storage",
454 b"storage",
455 b"revlog.optimize-delta-parent-choice",
455 b"revlog.optimize-delta-parent-choice",
456 )?
456 )?
457 .unwrap_or(true),
457 .unwrap_or(true),
458 candidate_group_chunk_size: config
458 candidate_group_chunk_size: config
459 .get_u64(
459 .get_u64(
460 b"storage",
460 b"storage",
461 b"revlog.delta-parent-search.candidate-group-chunk-size",
461 b"revlog.delta-parent-search.candidate-group-chunk-size",
462 )?
462 )?
463 .unwrap_or_default(),
463 .unwrap_or_default(),
464 ..Default::default()
464 ..Default::default()
465 };
465 };
466
466
467 delta_config.debug_delta =
467 delta_config.debug_delta =
468 config.get_bool(b"debug", b"revlog.debug-delta")?;
468 config.get_bool(b"debug", b"revlog.debug-delta")?;
469
469
470 delta_config.general_delta =
470 delta_config.general_delta =
471 requirements.contains(GENERALDELTA_REQUIREMENT);
471 requirements.contains(GENERALDELTA_REQUIREMENT);
472
472
473 let lazy_delta =
473 let lazy_delta =
474 config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
474 config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
475
475
476 if revlog_type == RevlogType::Manifestlog {
476 if revlog_type == RevlogType::Manifestlog {
477 // upper bound of what we expect from compression
477 // upper bound of what we expect from compression
478 // (real life value seems to be 3)
478 // (real life value seems to be 3)
479 delta_config.upper_bound_comp = Some(3.0)
479 delta_config.upper_bound_comp = Some(3.0)
480 }
480 }
481
481
482 let mut lazy_delta_base = false;
482 let mut lazy_delta_base = false;
483 if lazy_delta {
483 if lazy_delta {
484 lazy_delta_base = match config.get_option_no_default(
484 lazy_delta_base = match config.get_option_no_default(
485 b"storage",
485 b"storage",
486 b"revlog.reuse-external-delta-parent",
486 b"revlog.reuse-external-delta-parent",
487 )? {
487 )? {
488 Some(base) => base,
488 Some(base) => base,
489 None => config.get_bool(b"format", b"generaldelta")?,
489 None => config.get_bool(b"format", b"generaldelta")?,
490 };
490 };
491 }
491 }
492 delta_config.lazy_delta = lazy_delta;
492 delta_config.lazy_delta = lazy_delta;
493 delta_config.lazy_delta_base = lazy_delta_base;
493 delta_config.lazy_delta_base = lazy_delta_base;
494
494
495 delta_config.max_deltachain_span =
495 delta_config.max_deltachain_span =
496 match config.get_i64(b"experimental", b"maxdeltachainspan")? {
496 match config.get_i64(b"experimental", b"maxdeltachainspan")? {
497 Some(span) => {
497 Some(span) => {
498 if span < 0 {
498 if span < 0 {
499 None
499 None
500 } else {
500 } else {
501 Some(span as u64)
501 Some(span as u64)
502 }
502 }
503 }
503 }
504 None => None,
504 None => None,
505 };
505 };
506
506
507 delta_config.sparse_revlog =
507 delta_config.sparse_revlog =
508 requirements.contains(SPARSEREVLOG_REQUIREMENT);
508 requirements.contains(SPARSEREVLOG_REQUIREMENT);
509
509
510 delta_config.max_chain_len =
510 delta_config.max_chain_len =
511 config.get_byte_size_no_default(b"format", b"maxchainlen")?;
511 config.get_byte_size_no_default(b"format", b"maxchainlen")?;
512
512
513 Ok(delta_config)
513 Ok(delta_config)
514 }
514 }
515 }
515 }
516
516
517 impl Default for RevlogDeltaConfig {
517 impl Default for RevlogDeltaConfig {
518 fn default() -> Self {
518 fn default() -> Self {
519 Self {
519 Self {
520 delta_both_parents: true,
520 delta_both_parents: true,
521 lazy_delta: true,
521 lazy_delta: true,
522 general_delta: Default::default(),
522 general_delta: Default::default(),
523 sparse_revlog: Default::default(),
523 sparse_revlog: Default::default(),
524 max_chain_len: Default::default(),
524 max_chain_len: Default::default(),
525 max_deltachain_span: Default::default(),
525 max_deltachain_span: Default::default(),
526 upper_bound_comp: Default::default(),
526 upper_bound_comp: Default::default(),
527 candidate_group_chunk_size: Default::default(),
527 candidate_group_chunk_size: Default::default(),
528 debug_delta: Default::default(),
528 debug_delta: Default::default(),
529 lazy_delta_base: Default::default(),
529 lazy_delta_base: Default::default(),
530 }
530 }
531 }
531 }
532 }
532 }
533
533
534 #[derive(Debug, Default, Clone, Copy, PartialEq)]
534 #[derive(Debug, Default, Clone, Copy, PartialEq)]
535 /// Holds configuration values about the available revlog features
535 /// Holds configuration values about the available revlog features
536 pub struct RevlogFeatureConfig {
536 pub struct RevlogFeatureConfig {
537 /// The compression engine and its options
537 /// The compression engine and its options
538 pub compression_engine: CompressionEngine,
538 pub compression_engine: CompressionEngine,
539 /// Can we use censor on this revlog
539 /// Can we use censor on this revlog
540 pub censorable: bool,
540 pub censorable: bool,
541 /// Does this revlog use the "side data" feature
541 /// Does this revlog use the "side data" feature
542 pub has_side_data: bool,
542 pub has_side_data: bool,
543 /// Might remove this configuration once the rank computation has no
543 /// Might remove this configuration once the rank computation has no
544 /// impact
544 /// impact
545 pub compute_rank: bool,
545 pub compute_rank: bool,
546 /// Parent order is supposed to be semantically irrelevant, so we
546 /// Parent order is supposed to be semantically irrelevant, so we
547 /// normally re-sort parents to ensure that the first parent is non-null,
547 /// normally re-sort parents to ensure that the first parent is non-null,
548 /// if there is a non-null parent at all.
548 /// if there is a non-null parent at all.
549 /// filelog abuses the parent order as a flag to mark some instances of
549 /// filelog abuses the parent order as a flag to mark some instances of
550 /// meta-encoded files, so allow it to disable this behavior.
550 /// meta-encoded files, so allow it to disable this behavior.
551 pub canonical_parent_order: bool,
551 pub canonical_parent_order: bool,
552 /// Can ellipsis commit be used
552 /// Can ellipsis commit be used
553 pub enable_ellipsis: bool,
553 pub enable_ellipsis: bool,
554 }
554 }
555 impl RevlogFeatureConfig {
555 impl RevlogFeatureConfig {
556 pub fn new(
556 pub fn new(
557 config: &Config,
557 config: &Config,
558 requirements: &HashSet<String>,
558 requirements: &HashSet<String>,
559 ) -> Result<Self, HgError> {
559 ) -> Result<Self, HgError> {
560 let mut feature_config = Self::default();
560 let mut feature_config = Self::default();
561
561
562 let zlib_level = config.get_u32(b"storage", b"revlog.zlib.level")?;
562 let zlib_level = config.get_u32(b"storage", b"revlog.zlib.level")?;
563 let zstd_level = config.get_u32(b"storage", b"revlog.zstd.level")?;
563 let zstd_level = config.get_u32(b"storage", b"revlog.zstd.level")?;
564
564
565 feature_config.compression_engine = CompressionEngine::default();
565 feature_config.compression_engine = CompressionEngine::default();
566
566
567 for requirement in requirements {
567 for requirement in requirements {
568 if requirement.starts_with("revlog-compression-")
568 if requirement.starts_with("revlog-compression-")
569 || requirement.starts_with("exp-compression-")
569 || requirement.starts_with("exp-compression-")
570 {
570 {
571 let split = &mut requirement.splitn(3, '-');
571 let split = &mut requirement.splitn(3, '-');
572 split.next();
572 split.next();
573 split.next();
573 split.next();
574 feature_config.compression_engine = match split.next().unwrap()
574 feature_config.compression_engine = match split.next().unwrap()
575 {
575 {
576 "zstd" => CompressionEngine::zstd(zstd_level)?,
576 "zstd" => CompressionEngine::zstd(zstd_level)?,
577 e => {
577 e => {
578 return Err(HgError::UnsupportedFeature(format!(
578 return Err(HgError::UnsupportedFeature(format!(
579 "Unsupported compression engine '{e}'"
579 "Unsupported compression engine '{e}'"
580 )))
580 )))
581 }
581 }
582 };
582 };
583 }
583 }
584 }
584 }
585 if let Some(level) = zlib_level {
585 if let Some(level) = zlib_level {
586 if matches!(
586 if matches!(
587 feature_config.compression_engine,
587 feature_config.compression_engine,
588 CompressionEngine::Zlib { .. }
588 CompressionEngine::Zlib { .. }
589 ) {
589 ) {
590 feature_config
590 feature_config
591 .compression_engine
591 .compression_engine
592 .set_level(level as usize)?;
592 .set_level(level as usize)?;
593 }
593 }
594 }
594 }
595
595
596 feature_config.enable_ellipsis =
596 feature_config.enable_ellipsis =
597 requirements.contains(NARROW_REQUIREMENT);
597 requirements.contains(NARROW_REQUIREMENT);
598
598
599 Ok(feature_config)
599 Ok(feature_config)
600 }
600 }
601 }
601 }
602
602
603 /// Read only implementation of revlog.
603 /// Read only implementation of revlog.
604 pub struct Revlog {
604 pub struct Revlog {
605 /// When index and data are not interleaved: bytes of the revlog index.
605 /// When index and data are not interleaved: bytes of the revlog index.
606 /// When index and data are interleaved: bytes of the revlog index and
606 /// When index and data are interleaved: bytes of the revlog index and
607 /// data.
607 /// data.
608 index: Index,
608 index: Index,
609 /// When index and data are not interleaved: bytes of the revlog data
609 /// When index and data are not interleaved: bytes of the revlog data
610 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
610 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
611 /// When present on disk: the persistent nodemap for this revlog
611 /// When present on disk: the persistent nodemap for this revlog
612 nodemap: Option<nodemap::NodeTree>,
612 nodemap: Option<nodemap::NodeTree>,
613 }
613 }
614
614
615 impl Graph for Revlog {
615 impl Graph for Revlog {
616 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
616 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
617 self.index.parents(rev)
617 self.index.parents(rev)
618 }
618 }
619 }
619 }
620
620
621 #[derive(Debug, Copy, Clone)]
621 #[derive(Debug, Copy, Clone)]
622 pub enum RevlogVersionOptions {
622 pub enum RevlogVersionOptions {
623 V0,
623 V0,
624 V1 { generaldelta: bool },
624 V1 { generaldelta: bool },
625 V2,
625 V2,
626 ChangelogV2 { compute_rank: bool },
626 ChangelogV2 { compute_rank: bool },
627 }
627 }
628
628
629 /// Options to govern how a revlog should be opened, usually from the
629 /// Options to govern how a revlog should be opened, usually from the
630 /// repository configuration or requirements.
630 /// repository configuration or requirements.
631 #[derive(Debug, Copy, Clone)]
631 #[derive(Debug, Copy, Clone)]
632 pub struct RevlogOpenOptions {
632 pub struct RevlogOpenOptions {
633 /// The revlog version, along with any option specific to this version
633 /// The revlog version, along with any option specific to this version
634 pub version: RevlogVersionOptions,
634 pub version: RevlogVersionOptions,
635 /// Whether the revlog uses a persistent nodemap.
635 /// Whether the revlog uses a persistent nodemap.
636 pub use_nodemap: bool,
636 pub use_nodemap: bool,
637 // TODO other non-header/version options,
637 // TODO other non-header/version options,
638 }
638 }
639
639
640 impl RevlogOpenOptions {
640 impl RevlogOpenOptions {
641 pub fn new() -> Self {
641 pub fn new() -> Self {
642 Self {
642 Self {
643 version: RevlogVersionOptions::V1 { generaldelta: true },
643 version: RevlogVersionOptions::V1 { generaldelta: true },
644 use_nodemap: false,
644 use_nodemap: false,
645 }
645 }
646 }
646 }
647
647
648 fn default_index_header(&self) -> index::IndexHeader {
648 fn default_index_header(&self) -> index::IndexHeader {
649 index::IndexHeader {
649 index::IndexHeader {
650 header_bytes: match self.version {
650 header_bytes: match self.version {
651 RevlogVersionOptions::V0 => [0, 0, 0, 0],
651 RevlogVersionOptions::V0 => [0, 0, 0, 0],
652 RevlogVersionOptions::V1 { generaldelta } => {
652 RevlogVersionOptions::V1 { generaldelta } => {
653 [0, if generaldelta { 3 } else { 1 }, 0, 1]
653 [0, if generaldelta { 3 } else { 1 }, 0, 1]
654 }
654 }
655 RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
655 RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
656 RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
656 RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
657 0xD34Du32.to_be_bytes()
657 0xD34Du32.to_be_bytes()
658 }
658 }
659 },
659 },
660 }
660 }
661 }
661 }
662 }
662 }
663
663
664 impl Default for RevlogOpenOptions {
664 impl Default for RevlogOpenOptions {
665 fn default() -> Self {
665 fn default() -> Self {
666 Self::new()
666 Self::new()
667 }
667 }
668 }
668 }
669
669
670 impl Revlog {
670 impl Revlog {
671 /// Open a revlog index file.
671 /// Open a revlog index file.
672 ///
672 ///
673 /// It will also open the associated data file if index and data are not
673 /// It will also open the associated data file if index and data are not
674 /// interleaved.
674 /// interleaved.
675 pub fn open(
675 pub fn open(
676 store_vfs: &Vfs,
676 store_vfs: &Vfs,
677 index_path: impl AsRef<Path>,
677 index_path: impl AsRef<Path>,
678 data_path: Option<&Path>,
678 data_path: Option<&Path>,
679 options: RevlogOpenOptions,
679 options: RevlogOpenOptions,
680 ) -> Result<Self, HgError> {
680 ) -> Result<Self, HgError> {
681 Self::open_gen(store_vfs, index_path, data_path, options, None)
681 Self::open_gen(store_vfs, index_path, data_path, options, None)
682 }
682 }
683
683
684 fn open_gen(
684 fn open_gen(
685 store_vfs: &Vfs,
685 store_vfs: &Vfs,
686 index_path: impl AsRef<Path>,
686 index_path: impl AsRef<Path>,
687 data_path: Option<&Path>,
687 data_path: Option<&Path>,
688 options: RevlogOpenOptions,
688 options: RevlogOpenOptions,
689 nodemap_for_test: Option<nodemap::NodeTree>,
689 nodemap_for_test: Option<nodemap::NodeTree>,
690 ) -> Result<Self, HgError> {
690 ) -> Result<Self, HgError> {
691 let index_path = index_path.as_ref();
691 let index_path = index_path.as_ref();
692 let index = {
692 let index = {
693 match store_vfs.mmap_open_opt(index_path)? {
693 match store_vfs.mmap_open_opt(index_path)? {
694 None => Index::new(
694 None => Index::new(
695 Box::<Vec<_>>::default(),
695 Box::<Vec<_>>::default(),
696 options.default_index_header(),
696 options.default_index_header(),
697 ),
697 ),
698 Some(index_mmap) => {
698 Some(index_mmap) => {
699 let index = Index::new(
699 let index = Index::new(
700 Box::new(index_mmap),
700 Box::new(index_mmap),
701 options.default_index_header(),
701 options.default_index_header(),
702 )?;
702 )?;
703 Ok(index)
703 Ok(index)
704 }
704 }
705 }
705 }
706 }?;
706 }?;
707
707
708 let default_data_path = index_path.with_extension("d");
708 let default_data_path = index_path.with_extension("d");
709
709
710 // type annotation required
710 // type annotation required
711 // won't recognize Mmap as Deref<Target = [u8]>
711 // won't recognize Mmap as Deref<Target = [u8]>
712 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
712 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
713 if index.is_inline() {
713 if index.is_inline() {
714 None
714 None
715 } else if index.is_empty() {
716 // No need to even try to open the data file then.
717 Some(Box::new(&[][..]))
715 } else {
718 } else {
716 let data_path = data_path.unwrap_or(&default_data_path);
719 let data_path = data_path.unwrap_or(&default_data_path);
717 let data_mmap = store_vfs.mmap_open(data_path)?;
720 let data_mmap = store_vfs.mmap_open(data_path)?;
718 Some(Box::new(data_mmap))
721 Some(Box::new(data_mmap))
719 };
722 };
720
723
721 let nodemap = if index.is_inline() || !options.use_nodemap {
724 let nodemap = if index.is_inline() || !options.use_nodemap {
722 None
725 None
723 } else {
726 } else {
724 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
727 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
725 |(docket, data)| {
728 |(docket, data)| {
726 nodemap::NodeTree::load_bytes(
729 nodemap::NodeTree::load_bytes(
727 Box::new(data),
730 Box::new(data),
728 docket.data_length,
731 docket.data_length,
729 )
732 )
730 },
733 },
731 )
734 )
732 };
735 };
733
736
734 let nodemap = nodemap_for_test.or(nodemap);
737 let nodemap = nodemap_for_test.or(nodemap);
735
738
736 Ok(Revlog {
739 Ok(Revlog {
737 index,
740 index,
738 data_bytes,
741 data_bytes,
739 nodemap,
742 nodemap,
740 })
743 })
741 }
744 }
742
745
743 /// Return number of entries of the `Revlog`.
746 /// Return number of entries of the `Revlog`.
744 pub fn len(&self) -> usize {
747 pub fn len(&self) -> usize {
745 self.index.len()
748 self.index.len()
746 }
749 }
747
750
748 /// Returns `true` if the `Revlog` has zero `entries`.
751 /// Returns `true` if the `Revlog` has zero `entries`.
749 pub fn is_empty(&self) -> bool {
752 pub fn is_empty(&self) -> bool {
750 self.index.is_empty()
753 self.index.is_empty()
751 }
754 }
752
755
753 /// Returns the node ID for the given revision number, if it exists in this
756 /// Returns the node ID for the given revision number, if it exists in this
754 /// revlog
757 /// revlog
755 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
758 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
756 if rev == NULL_REVISION.into() {
759 if rev == NULL_REVISION.into() {
757 return Some(&NULL_NODE);
760 return Some(&NULL_NODE);
758 }
761 }
759 let rev = self.index.check_revision(rev)?;
762 let rev = self.index.check_revision(rev)?;
760 Some(self.index.get_entry(rev)?.hash())
763 Some(self.index.get_entry(rev)?.hash())
761 }
764 }
762
765
763 /// Return the revision number for the given node ID, if it exists in this
766 /// Return the revision number for the given node ID, if it exists in this
764 /// revlog
767 /// revlog
765 pub fn rev_from_node(
768 pub fn rev_from_node(
766 &self,
769 &self,
767 node: NodePrefix,
770 node: NodePrefix,
768 ) -> Result<Revision, RevlogError> {
771 ) -> Result<Revision, RevlogError> {
769 if let Some(nodemap) = &self.nodemap {
772 if let Some(nodemap) = &self.nodemap {
770 nodemap
773 nodemap
771 .find_bin(&self.index, node)?
774 .find_bin(&self.index, node)?
772 .ok_or(RevlogError::InvalidRevision)
775 .ok_or(RevlogError::InvalidRevision)
773 } else {
776 } else {
774 self.rev_from_node_no_persistent_nodemap(node)
777 self.rev_from_node_no_persistent_nodemap(node)
775 }
778 }
776 }
779 }
777
780
778 /// Same as `rev_from_node`, without using a persistent nodemap
781 /// Same as `rev_from_node`, without using a persistent nodemap
779 ///
782 ///
780 /// This is used as fallback when a persistent nodemap is not present.
783 /// This is used as fallback when a persistent nodemap is not present.
781 /// This happens when the persistent-nodemap experimental feature is not
784 /// This happens when the persistent-nodemap experimental feature is not
782 /// enabled, or for small revlogs.
785 /// enabled, or for small revlogs.
783 fn rev_from_node_no_persistent_nodemap(
786 fn rev_from_node_no_persistent_nodemap(
784 &self,
787 &self,
785 node: NodePrefix,
788 node: NodePrefix,
786 ) -> Result<Revision, RevlogError> {
789 ) -> Result<Revision, RevlogError> {
787 // Linear scan of the revlog
790 // Linear scan of the revlog
788 // TODO: consider building a non-persistent nodemap in memory to
791 // TODO: consider building a non-persistent nodemap in memory to
789 // optimize these cases.
792 // optimize these cases.
790 let mut found_by_prefix = None;
793 let mut found_by_prefix = None;
791 for rev in (-1..self.len() as BaseRevision).rev() {
794 for rev in (-1..self.len() as BaseRevision).rev() {
792 let rev = Revision(rev as BaseRevision);
795 let rev = Revision(rev as BaseRevision);
793 let candidate_node = if rev == Revision(-1) {
796 let candidate_node = if rev == Revision(-1) {
794 NULL_NODE
797 NULL_NODE
795 } else {
798 } else {
796 let index_entry =
799 let index_entry =
797 self.index.get_entry(rev).ok_or_else(|| {
800 self.index.get_entry(rev).ok_or_else(|| {
798 HgError::corrupted(
801 HgError::corrupted(
799 "revlog references a revision not in the index",
802 "revlog references a revision not in the index",
800 )
803 )
801 })?;
804 })?;
802 *index_entry.hash()
805 *index_entry.hash()
803 };
806 };
804 if node == candidate_node {
807 if node == candidate_node {
805 return Ok(rev);
808 return Ok(rev);
806 }
809 }
807 if node.is_prefix_of(&candidate_node) {
810 if node.is_prefix_of(&candidate_node) {
808 if found_by_prefix.is_some() {
811 if found_by_prefix.is_some() {
809 return Err(RevlogError::AmbiguousPrefix);
812 return Err(RevlogError::AmbiguousPrefix);
810 }
813 }
811 found_by_prefix = Some(rev)
814 found_by_prefix = Some(rev)
812 }
815 }
813 }
816 }
814 found_by_prefix.ok_or(RevlogError::InvalidRevision)
817 found_by_prefix.ok_or(RevlogError::InvalidRevision)
815 }
818 }
816
819
817 /// Returns whether the given revision exists in this revlog.
820 /// Returns whether the given revision exists in this revlog.
818 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
821 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
819 self.index.check_revision(rev).is_some()
822 self.index.check_revision(rev).is_some()
820 }
823 }
821
824
822 /// Return the full data associated to a revision.
825 /// Return the full data associated to a revision.
823 ///
826 ///
824 /// All entries required to build the final data out of deltas will be
827 /// All entries required to build the final data out of deltas will be
825 /// retrieved as needed, and the deltas will be applied to the inital
828 /// retrieved as needed, and the deltas will be applied to the inital
826 /// snapshot to rebuild the final data.
829 /// snapshot to rebuild the final data.
827 pub fn get_rev_data(
830 pub fn get_rev_data(
828 &self,
831 &self,
829 rev: UncheckedRevision,
832 rev: UncheckedRevision,
830 ) -> Result<Cow<[u8]>, RevlogError> {
833 ) -> Result<Cow<[u8]>, RevlogError> {
831 if rev == NULL_REVISION.into() {
834 if rev == NULL_REVISION.into() {
832 return Ok(Cow::Borrowed(&[]));
835 return Ok(Cow::Borrowed(&[]));
833 };
836 };
834 self.get_entry(rev)?.data()
837 self.get_entry(rev)?.data()
835 }
838 }
836
839
837 /// [`Self::get_rev_data`] for checked revisions.
840 /// [`Self::get_rev_data`] for checked revisions.
838 pub fn get_rev_data_for_checked_rev(
841 pub fn get_rev_data_for_checked_rev(
839 &self,
842 &self,
840 rev: Revision,
843 rev: Revision,
841 ) -> Result<Cow<[u8]>, RevlogError> {
844 ) -> Result<Cow<[u8]>, RevlogError> {
842 if rev == NULL_REVISION {
845 if rev == NULL_REVISION {
843 return Ok(Cow::Borrowed(&[]));
846 return Ok(Cow::Borrowed(&[]));
844 };
847 };
845 self.get_entry_for_checked_rev(rev)?.data()
848 self.get_entry_for_checked_rev(rev)?.data()
846 }
849 }
847
850
848 /// Check the hash of some given data against the recorded hash.
851 /// Check the hash of some given data against the recorded hash.
849 pub fn check_hash(
852 pub fn check_hash(
850 &self,
853 &self,
851 p1: Revision,
854 p1: Revision,
852 p2: Revision,
855 p2: Revision,
853 expected: &[u8],
856 expected: &[u8],
854 data: &[u8],
857 data: &[u8],
855 ) -> bool {
858 ) -> bool {
856 let e1 = self.index.get_entry(p1);
859 let e1 = self.index.get_entry(p1);
857 let h1 = match e1 {
860 let h1 = match e1 {
858 Some(ref entry) => entry.hash(),
861 Some(ref entry) => entry.hash(),
859 None => &NULL_NODE,
862 None => &NULL_NODE,
860 };
863 };
861 let e2 = self.index.get_entry(p2);
864 let e2 = self.index.get_entry(p2);
862 let h2 = match e2 {
865 let h2 = match e2 {
863 Some(ref entry) => entry.hash(),
866 Some(ref entry) => entry.hash(),
864 None => &NULL_NODE,
867 None => &NULL_NODE,
865 };
868 };
866
869
867 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
870 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
868 }
871 }
869
872
870 /// Build the full data of a revision out its snapshot
873 /// Build the full data of a revision out its snapshot
871 /// and its deltas.
874 /// and its deltas.
872 fn build_data_from_deltas(
875 fn build_data_from_deltas(
873 snapshot: RevlogEntry,
876 snapshot: RevlogEntry,
874 deltas: &[RevlogEntry],
877 deltas: &[RevlogEntry],
875 ) -> Result<Vec<u8>, HgError> {
878 ) -> Result<Vec<u8>, HgError> {
876 let snapshot = snapshot.data_chunk()?;
879 let snapshot = snapshot.data_chunk()?;
877 let deltas = deltas
880 let deltas = deltas
878 .iter()
881 .iter()
879 .rev()
882 .rev()
880 .map(RevlogEntry::data_chunk)
883 .map(RevlogEntry::data_chunk)
881 .collect::<Result<Vec<_>, _>>()?;
884 .collect::<Result<Vec<_>, _>>()?;
882 let patches: Vec<_> =
885 let patches: Vec<_> =
883 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
886 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
884 let patch = patch::fold_patch_lists(&patches);
887 let patch = patch::fold_patch_lists(&patches);
885 Ok(patch.apply(&snapshot))
888 Ok(patch.apply(&snapshot))
886 }
889 }
887
890
888 /// Return the revlog data.
891 /// Return the revlog data.
889 fn data(&self) -> &[u8] {
892 fn data(&self) -> &[u8] {
890 match &self.data_bytes {
893 match &self.data_bytes {
891 Some(data_bytes) => data_bytes,
894 Some(data_bytes) => data_bytes,
892 None => panic!(
895 None => panic!(
893 "forgot to load the data or trying to access inline data"
896 "forgot to load the data or trying to access inline data"
894 ),
897 ),
895 }
898 }
896 }
899 }
897
900
898 pub fn make_null_entry(&self) -> RevlogEntry {
901 pub fn make_null_entry(&self) -> RevlogEntry {
899 RevlogEntry {
902 RevlogEntry {
900 revlog: self,
903 revlog: self,
901 rev: NULL_REVISION,
904 rev: NULL_REVISION,
902 bytes: b"",
905 bytes: b"",
903 compressed_len: 0,
906 compressed_len: 0,
904 uncompressed_len: 0,
907 uncompressed_len: 0,
905 base_rev_or_base_of_delta_chain: None,
908 base_rev_or_base_of_delta_chain: None,
906 p1: NULL_REVISION,
909 p1: NULL_REVISION,
907 p2: NULL_REVISION,
910 p2: NULL_REVISION,
908 flags: NULL_REVLOG_ENTRY_FLAGS,
911 flags: NULL_REVLOG_ENTRY_FLAGS,
909 hash: NULL_NODE,
912 hash: NULL_NODE,
910 }
913 }
911 }
914 }
912
915
913 fn get_entry_for_checked_rev(
916 fn get_entry_for_checked_rev(
914 &self,
917 &self,
915 rev: Revision,
918 rev: Revision,
916 ) -> Result<RevlogEntry, RevlogError> {
919 ) -> Result<RevlogEntry, RevlogError> {
917 if rev == NULL_REVISION {
920 if rev == NULL_REVISION {
918 return Ok(self.make_null_entry());
921 return Ok(self.make_null_entry());
919 }
922 }
920 let index_entry = self
923 let index_entry = self
921 .index
924 .index
922 .get_entry(rev)
925 .get_entry(rev)
923 .ok_or(RevlogError::InvalidRevision)?;
926 .ok_or(RevlogError::InvalidRevision)?;
924 let offset = index_entry.offset();
927 let offset = index_entry.offset();
925 let start = if self.index.is_inline() {
928 let start = if self.index.is_inline() {
926 offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE)
929 offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE)
927 } else {
930 } else {
928 offset
931 offset
929 };
932 };
930 let end = start + index_entry.compressed_len() as usize;
933 let end = start + index_entry.compressed_len() as usize;
931 let data = if self.index.is_inline() {
934 let data = if self.index.is_inline() {
932 self.index.data(start, end)
935 self.index.data(start, end)
933 } else {
936 } else {
934 &self.data()[start..end]
937 &self.data()[start..end]
935 };
938 };
936 let base_rev = self
939 let base_rev = self
937 .index
940 .index
938 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
941 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
939 .ok_or_else(|| {
942 .ok_or_else(|| {
940 RevlogError::corrupted(format!(
943 RevlogError::corrupted(format!(
941 "base revision for rev {} is invalid",
944 "base revision for rev {} is invalid",
942 rev
945 rev
943 ))
946 ))
944 })?;
947 })?;
945 let p1 =
948 let p1 =
946 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
949 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
947 RevlogError::corrupted(format!(
950 RevlogError::corrupted(format!(
948 "p1 for rev {} is invalid",
951 "p1 for rev {} is invalid",
949 rev
952 rev
950 ))
953 ))
951 })?;
954 })?;
952 let p2 =
955 let p2 =
953 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
956 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
954 RevlogError::corrupted(format!(
957 RevlogError::corrupted(format!(
955 "p2 for rev {} is invalid",
958 "p2 for rev {} is invalid",
956 rev
959 rev
957 ))
960 ))
958 })?;
961 })?;
959 let entry = RevlogEntry {
962 let entry = RevlogEntry {
960 revlog: self,
963 revlog: self,
961 rev,
964 rev,
962 bytes: data,
965 bytes: data,
963 compressed_len: index_entry.compressed_len(),
966 compressed_len: index_entry.compressed_len(),
964 uncompressed_len: index_entry.uncompressed_len(),
967 uncompressed_len: index_entry.uncompressed_len(),
965 base_rev_or_base_of_delta_chain: if base_rev == rev {
968 base_rev_or_base_of_delta_chain: if base_rev == rev {
966 None
969 None
967 } else {
970 } else {
968 Some(base_rev)
971 Some(base_rev)
969 },
972 },
970 p1,
973 p1,
971 p2,
974 p2,
972 flags: index_entry.flags(),
975 flags: index_entry.flags(),
973 hash: *index_entry.hash(),
976 hash: *index_entry.hash(),
974 };
977 };
975 Ok(entry)
978 Ok(entry)
976 }
979 }
977
980
978 /// Get an entry of the revlog.
981 /// Get an entry of the revlog.
979 pub fn get_entry(
982 pub fn get_entry(
980 &self,
983 &self,
981 rev: UncheckedRevision,
984 rev: UncheckedRevision,
982 ) -> Result<RevlogEntry, RevlogError> {
985 ) -> Result<RevlogEntry, RevlogError> {
983 if rev == NULL_REVISION.into() {
986 if rev == NULL_REVISION.into() {
984 return Ok(self.make_null_entry());
987 return Ok(self.make_null_entry());
985 }
988 }
986 let rev = self.index.check_revision(rev).ok_or_else(|| {
989 let rev = self.index.check_revision(rev).ok_or_else(|| {
987 RevlogError::corrupted(format!("rev {} is invalid", rev))
990 RevlogError::corrupted(format!("rev {} is invalid", rev))
988 })?;
991 })?;
989 self.get_entry_for_checked_rev(rev)
992 self.get_entry_for_checked_rev(rev)
990 }
993 }
991 }
994 }
992
995
993 /// The revlog entry's bytes and the necessary informations to extract
996 /// The revlog entry's bytes and the necessary informations to extract
994 /// the entry's data.
997 /// the entry's data.
995 #[derive(Clone)]
998 #[derive(Clone)]
996 pub struct RevlogEntry<'revlog> {
999 pub struct RevlogEntry<'revlog> {
997 revlog: &'revlog Revlog,
1000 revlog: &'revlog Revlog,
998 rev: Revision,
1001 rev: Revision,
999 bytes: &'revlog [u8],
1002 bytes: &'revlog [u8],
1000 compressed_len: u32,
1003 compressed_len: u32,
1001 uncompressed_len: i32,
1004 uncompressed_len: i32,
1002 base_rev_or_base_of_delta_chain: Option<Revision>,
1005 base_rev_or_base_of_delta_chain: Option<Revision>,
1003 p1: Revision,
1006 p1: Revision,
1004 p2: Revision,
1007 p2: Revision,
1005 flags: u16,
1008 flags: u16,
1006 hash: Node,
1009 hash: Node,
1007 }
1010 }
1008
1011
1009 thread_local! {
1012 thread_local! {
1010 // seems fine to [unwrap] here: this can only fail due to memory allocation
1013 // seems fine to [unwrap] here: this can only fail due to memory allocation
1011 // failing, and it's normal for that to cause panic.
1014 // failing, and it's normal for that to cause panic.
1012 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
1015 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
1013 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
1016 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
1014 }
1017 }
1015
1018
1016 fn zstd_decompress_to_buffer(
1019 fn zstd_decompress_to_buffer(
1017 bytes: &[u8],
1020 bytes: &[u8],
1018 buf: &mut Vec<u8>,
1021 buf: &mut Vec<u8>,
1019 ) -> Result<usize, std::io::Error> {
1022 ) -> Result<usize, std::io::Error> {
1020 ZSTD_DECODER
1023 ZSTD_DECODER
1021 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
1024 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
1022 }
1025 }
1023
1026
1024 impl<'revlog> RevlogEntry<'revlog> {
1027 impl<'revlog> RevlogEntry<'revlog> {
1025 pub fn revision(&self) -> Revision {
1028 pub fn revision(&self) -> Revision {
1026 self.rev
1029 self.rev
1027 }
1030 }
1028
1031
1029 pub fn node(&self) -> &Node {
1032 pub fn node(&self) -> &Node {
1030 &self.hash
1033 &self.hash
1031 }
1034 }
1032
1035
1033 pub fn uncompressed_len(&self) -> Option<u32> {
1036 pub fn uncompressed_len(&self) -> Option<u32> {
1034 u32::try_from(self.uncompressed_len).ok()
1037 u32::try_from(self.uncompressed_len).ok()
1035 }
1038 }
1036
1039
1037 pub fn has_p1(&self) -> bool {
1040 pub fn has_p1(&self) -> bool {
1038 self.p1 != NULL_REVISION
1041 self.p1 != NULL_REVISION
1039 }
1042 }
1040
1043
1041 pub fn p1_entry(
1044 pub fn p1_entry(
1042 &self,
1045 &self,
1043 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
1046 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
1044 if self.p1 == NULL_REVISION {
1047 if self.p1 == NULL_REVISION {
1045 Ok(None)
1048 Ok(None)
1046 } else {
1049 } else {
1047 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
1050 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
1048 }
1051 }
1049 }
1052 }
1050
1053
1051 pub fn p2_entry(
1054 pub fn p2_entry(
1052 &self,
1055 &self,
1053 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
1056 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
1054 if self.p2 == NULL_REVISION {
1057 if self.p2 == NULL_REVISION {
1055 Ok(None)
1058 Ok(None)
1056 } else {
1059 } else {
1057 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
1060 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
1058 }
1061 }
1059 }
1062 }
1060
1063
1061 pub fn p1(&self) -> Option<Revision> {
1064 pub fn p1(&self) -> Option<Revision> {
1062 if self.p1 == NULL_REVISION {
1065 if self.p1 == NULL_REVISION {
1063 None
1066 None
1064 } else {
1067 } else {
1065 Some(self.p1)
1068 Some(self.p1)
1066 }
1069 }
1067 }
1070 }
1068
1071
1069 pub fn p2(&self) -> Option<Revision> {
1072 pub fn p2(&self) -> Option<Revision> {
1070 if self.p2 == NULL_REVISION {
1073 if self.p2 == NULL_REVISION {
1071 None
1074 None
1072 } else {
1075 } else {
1073 Some(self.p2)
1076 Some(self.p2)
1074 }
1077 }
1075 }
1078 }
1076
1079
1077 pub fn is_censored(&self) -> bool {
1080 pub fn is_censored(&self) -> bool {
1078 (self.flags & REVISION_FLAG_CENSORED) != 0
1081 (self.flags & REVISION_FLAG_CENSORED) != 0
1079 }
1082 }
1080
1083
1081 pub fn has_length_affecting_flag_processor(&self) -> bool {
1084 pub fn has_length_affecting_flag_processor(&self) -> bool {
1082 // Relevant Python code: revlog.size()
1085 // Relevant Python code: revlog.size()
1083 // note: ELLIPSIS is known to not change the content
1086 // note: ELLIPSIS is known to not change the content
1084 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
1087 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
1085 }
1088 }
1086
1089
1087 /// The data for this entry, after resolving deltas if any.
1090 /// The data for this entry, after resolving deltas if any.
1088 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1091 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1089 let mut entry = self.clone();
1092 let mut entry = self.clone();
1090 let mut delta_chain = vec![];
1093 let mut delta_chain = vec![];
1091
1094
1092 // The meaning of `base_rev_or_base_of_delta_chain` depends on
1095 // The meaning of `base_rev_or_base_of_delta_chain` depends on
1093 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
1096 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
1094 // `mercurial/revlogutils/constants.py` and the code in
1097 // `mercurial/revlogutils/constants.py` and the code in
1095 // [_chaininfo] and in [index_deltachain].
1098 // [_chaininfo] and in [index_deltachain].
1096 let uses_generaldelta = self.revlog.index.uses_generaldelta();
1099 let uses_generaldelta = self.revlog.index.uses_generaldelta();
1097 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
1100 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
1098 entry = if uses_generaldelta {
1101 entry = if uses_generaldelta {
1099 delta_chain.push(entry);
1102 delta_chain.push(entry);
1100 self.revlog.get_entry_for_checked_rev(base_rev)?
1103 self.revlog.get_entry_for_checked_rev(base_rev)?
1101 } else {
1104 } else {
1102 let base_rev = UncheckedRevision(entry.rev.0 - 1);
1105 let base_rev = UncheckedRevision(entry.rev.0 - 1);
1103 delta_chain.push(entry);
1106 delta_chain.push(entry);
1104 self.revlog.get_entry(base_rev)?
1107 self.revlog.get_entry(base_rev)?
1105 };
1108 };
1106 }
1109 }
1107
1110
1108 let data = if delta_chain.is_empty() {
1111 let data = if delta_chain.is_empty() {
1109 entry.data_chunk()?
1112 entry.data_chunk()?
1110 } else {
1113 } else {
1111 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
1114 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
1112 };
1115 };
1113
1116
1114 Ok(data)
1117 Ok(data)
1115 }
1118 }
1116
1119
1117 fn check_data(
1120 fn check_data(
1118 &self,
1121 &self,
1119 data: Cow<'revlog, [u8]>,
1122 data: Cow<'revlog, [u8]>,
1120 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1123 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1121 if self.revlog.check_hash(
1124 if self.revlog.check_hash(
1122 self.p1,
1125 self.p1,
1123 self.p2,
1126 self.p2,
1124 self.hash.as_bytes(),
1127 self.hash.as_bytes(),
1125 &data,
1128 &data,
1126 ) {
1129 ) {
1127 Ok(data)
1130 Ok(data)
1128 } else {
1131 } else {
1129 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
1132 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
1130 return Err(HgError::unsupported(
1133 return Err(HgError::unsupported(
1131 "ellipsis revisions are not supported by rhg",
1134 "ellipsis revisions are not supported by rhg",
1132 )
1135 )
1133 .into());
1136 .into());
1134 }
1137 }
1135 Err(corrupted(format!(
1138 Err(corrupted(format!(
1136 "hash check failed for revision {}",
1139 "hash check failed for revision {}",
1137 self.rev
1140 self.rev
1138 ))
1141 ))
1139 .into())
1142 .into())
1140 }
1143 }
1141 }
1144 }
1142
1145
1143 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1146 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
1144 let data = self.rawdata()?;
1147 let data = self.rawdata()?;
1145 if self.rev == NULL_REVISION {
1148 if self.rev == NULL_REVISION {
1146 return Ok(data);
1149 return Ok(data);
1147 }
1150 }
1148 if self.is_censored() {
1151 if self.is_censored() {
1149 return Err(HgError::CensoredNodeError.into());
1152 return Err(HgError::CensoredNodeError.into());
1150 }
1153 }
1151 self.check_data(data)
1154 self.check_data(data)
1152 }
1155 }
1153
1156
1154 /// Extract the data contained in the entry.
1157 /// Extract the data contained in the entry.
1155 /// This may be a delta. (See `is_delta`.)
1158 /// This may be a delta. (See `is_delta`.)
1156 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
1159 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
1157 if self.bytes.is_empty() {
1160 if self.bytes.is_empty() {
1158 return Ok(Cow::Borrowed(&[]));
1161 return Ok(Cow::Borrowed(&[]));
1159 }
1162 }
1160 match self.bytes[0] {
1163 match self.bytes[0] {
1161 // Revision data is the entirety of the entry, including this
1164 // Revision data is the entirety of the entry, including this
1162 // header.
1165 // header.
1163 b'\0' => Ok(Cow::Borrowed(self.bytes)),
1166 b'\0' => Ok(Cow::Borrowed(self.bytes)),
1164 // Raw revision data follows.
1167 // Raw revision data follows.
1165 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
1168 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
1166 // zlib (RFC 1950) data.
1169 // zlib (RFC 1950) data.
1167 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
1170 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
1168 // zstd data.
1171 // zstd data.
1169 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
1172 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
1170 // A proper new format should have had a repo/store requirement.
1173 // A proper new format should have had a repo/store requirement.
1171 format_type => Err(corrupted(format!(
1174 format_type => Err(corrupted(format!(
1172 "unknown compression header '{}'",
1175 "unknown compression header '{}'",
1173 format_type
1176 format_type
1174 ))),
1177 ))),
1175 }
1178 }
1176 }
1179 }
1177
1180
1178 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
1181 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
1179 let mut decoder = ZlibDecoder::new(self.bytes);
1182 let mut decoder = ZlibDecoder::new(self.bytes);
1180 if self.is_delta() {
1183 if self.is_delta() {
1181 let mut buf = Vec::with_capacity(self.compressed_len as usize);
1184 let mut buf = Vec::with_capacity(self.compressed_len as usize);
1182 decoder
1185 decoder
1183 .read_to_end(&mut buf)
1186 .read_to_end(&mut buf)
1184 .map_err(|e| corrupted(e.to_string()))?;
1187 .map_err(|e| corrupted(e.to_string()))?;
1185 Ok(buf)
1188 Ok(buf)
1186 } else {
1189 } else {
1187 let cap = self.uncompressed_len.max(0) as usize;
1190 let cap = self.uncompressed_len.max(0) as usize;
1188 let mut buf = vec![0; cap];
1191 let mut buf = vec![0; cap];
1189 decoder
1192 decoder
1190 .read_exact(&mut buf)
1193 .read_exact(&mut buf)
1191 .map_err(|e| corrupted(e.to_string()))?;
1194 .map_err(|e| corrupted(e.to_string()))?;
1192 Ok(buf)
1195 Ok(buf)
1193 }
1196 }
1194 }
1197 }
1195
1198
1196 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
1199 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
1197 let cap = self.uncompressed_len.max(0) as usize;
1200 let cap = self.uncompressed_len.max(0) as usize;
1198 if self.is_delta() {
1201 if self.is_delta() {
1199 // [cap] is usually an over-estimate of the space needed because
1202 // [cap] is usually an over-estimate of the space needed because
1200 // it's the length of delta-decoded data, but we're interested
1203 // it's the length of delta-decoded data, but we're interested
1201 // in the size of the delta.
1204 // in the size of the delta.
1202 // This means we have to [shrink_to_fit] to avoid holding on
1205 // This means we have to [shrink_to_fit] to avoid holding on
1203 // to a large chunk of memory, but it also means we must have a
1206 // to a large chunk of memory, but it also means we must have a
1204 // fallback branch, for the case when the delta is longer than
1207 // fallback branch, for the case when the delta is longer than
1205 // the original data (surprisingly, this does happen in practice)
1208 // the original data (surprisingly, this does happen in practice)
1206 let mut buf = Vec::with_capacity(cap);
1209 let mut buf = Vec::with_capacity(cap);
1207 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
1210 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
1208 Ok(_) => buf.shrink_to_fit(),
1211 Ok(_) => buf.shrink_to_fit(),
1209 Err(_) => {
1212 Err(_) => {
1210 buf.clear();
1213 buf.clear();
1211 zstd::stream::copy_decode(self.bytes, &mut buf)
1214 zstd::stream::copy_decode(self.bytes, &mut buf)
1212 .map_err(|e| corrupted(e.to_string()))?;
1215 .map_err(|e| corrupted(e.to_string()))?;
1213 }
1216 }
1214 };
1217 };
1215 Ok(buf)
1218 Ok(buf)
1216 } else {
1219 } else {
1217 let mut buf = Vec::with_capacity(cap);
1220 let mut buf = Vec::with_capacity(cap);
1218 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
1221 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
1219 .map_err(|e| corrupted(e.to_string()))?;
1222 .map_err(|e| corrupted(e.to_string()))?;
1220 if len != self.uncompressed_len as usize {
1223 if len != self.uncompressed_len as usize {
1221 Err(corrupted("uncompressed length does not match"))
1224 Err(corrupted("uncompressed length does not match"))
1222 } else {
1225 } else {
1223 Ok(buf)
1226 Ok(buf)
1224 }
1227 }
1225 }
1228 }
1226 }
1229 }
1227
1230
1228 /// Tell if the entry is a snapshot or a delta
1231 /// Tell if the entry is a snapshot or a delta
1229 /// (influences on decompression).
1232 /// (influences on decompression).
1230 fn is_delta(&self) -> bool {
1233 fn is_delta(&self) -> bool {
1231 self.base_rev_or_base_of_delta_chain.is_some()
1234 self.base_rev_or_base_of_delta_chain.is_some()
1232 }
1235 }
1233 }
1236 }
1234
1237
1235 /// Calculate the hash of a revision given its data and its parents.
1238 /// Calculate the hash of a revision given its data and its parents.
1236 fn hash(
1239 fn hash(
1237 data: &[u8],
1240 data: &[u8],
1238 p1_hash: &[u8],
1241 p1_hash: &[u8],
1239 p2_hash: &[u8],
1242 p2_hash: &[u8],
1240 ) -> [u8; NODE_BYTES_LENGTH] {
1243 ) -> [u8; NODE_BYTES_LENGTH] {
1241 let mut hasher = Sha1::new();
1244 let mut hasher = Sha1::new();
1242 let (a, b) = (p1_hash, p2_hash);
1245 let (a, b) = (p1_hash, p2_hash);
1243 if a > b {
1246 if a > b {
1244 hasher.update(b);
1247 hasher.update(b);
1245 hasher.update(a);
1248 hasher.update(a);
1246 } else {
1249 } else {
1247 hasher.update(a);
1250 hasher.update(a);
1248 hasher.update(b);
1251 hasher.update(b);
1249 }
1252 }
1250 hasher.update(data);
1253 hasher.update(data);
1251 *hasher.finalize().as_ref()
1254 *hasher.finalize().as_ref()
1252 }
1255 }
1253
1256
1254 #[cfg(test)]
1257 #[cfg(test)]
1255 mod tests {
1258 mod tests {
1256 use super::*;
1259 use super::*;
1257 use crate::index::IndexEntryBuilder;
1260 use crate::index::IndexEntryBuilder;
1258 use itertools::Itertools;
1261 use itertools::Itertools;
1259
1262
1260 #[test]
1263 #[test]
1261 fn test_empty() {
1264 fn test_empty() {
1262 let temp = tempfile::tempdir().unwrap();
1265 let temp = tempfile::tempdir().unwrap();
1263 let vfs = Vfs { base: temp.path() };
1266 let vfs = Vfs { base: temp.path() };
1264 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
1267 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
1265 let revlog =
1268 let revlog =
1266 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
1269 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
1267 .unwrap();
1270 .unwrap();
1268 assert!(revlog.is_empty());
1271 assert!(revlog.is_empty());
1269 assert_eq!(revlog.len(), 0);
1272 assert_eq!(revlog.len(), 0);
1270 assert!(revlog.get_entry(0.into()).is_err());
1273 assert!(revlog.get_entry(0.into()).is_err());
1271 assert!(!revlog.has_rev(0.into()));
1274 assert!(!revlog.has_rev(0.into()));
1272 assert_eq!(
1275 assert_eq!(
1273 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1276 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1274 NULL_REVISION
1277 NULL_REVISION
1275 );
1278 );
1276 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
1279 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
1277 assert_eq!(null_entry.revision(), NULL_REVISION);
1280 assert_eq!(null_entry.revision(), NULL_REVISION);
1278 assert!(null_entry.data().unwrap().is_empty());
1281 assert!(null_entry.data().unwrap().is_empty());
1279 }
1282 }
1280
1283
1281 #[test]
1284 #[test]
1282 fn test_inline() {
1285 fn test_inline() {
1283 let temp = tempfile::tempdir().unwrap();
1286 let temp = tempfile::tempdir().unwrap();
1284 let vfs = Vfs { base: temp.path() };
1287 let vfs = Vfs { base: temp.path() };
1285 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
1288 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
1286 .unwrap();
1289 .unwrap();
1287 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1290 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1288 .unwrap();
1291 .unwrap();
1289 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
1292 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
1290 .unwrap();
1293 .unwrap();
1291 let entry0_bytes = IndexEntryBuilder::new()
1294 let entry0_bytes = IndexEntryBuilder::new()
1292 .is_first(true)
1295 .is_first(true)
1293 .with_version(1)
1296 .with_version(1)
1294 .with_inline(true)
1297 .with_inline(true)
1295 .with_node(node0)
1298 .with_node(node0)
1296 .build();
1299 .build();
1297 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1300 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1298 let entry2_bytes = IndexEntryBuilder::new()
1301 let entry2_bytes = IndexEntryBuilder::new()
1299 .with_p1(Revision(0))
1302 .with_p1(Revision(0))
1300 .with_p2(Revision(1))
1303 .with_p2(Revision(1))
1301 .with_node(node2)
1304 .with_node(node2)
1302 .build();
1305 .build();
1303 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
1306 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
1304 .into_iter()
1307 .into_iter()
1305 .flatten()
1308 .flatten()
1306 .collect_vec();
1309 .collect_vec();
1307 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1310 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1308 let revlog =
1311 let revlog =
1309 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
1312 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
1310 .unwrap();
1313 .unwrap();
1311
1314
1312 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
1315 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
1313 assert_eq!(entry0.revision(), Revision(0));
1316 assert_eq!(entry0.revision(), Revision(0));
1314 assert_eq!(*entry0.node(), node0);
1317 assert_eq!(*entry0.node(), node0);
1315 assert!(!entry0.has_p1());
1318 assert!(!entry0.has_p1());
1316 assert_eq!(entry0.p1(), None);
1319 assert_eq!(entry0.p1(), None);
1317 assert_eq!(entry0.p2(), None);
1320 assert_eq!(entry0.p2(), None);
1318 let p1_entry = entry0.p1_entry().unwrap();
1321 let p1_entry = entry0.p1_entry().unwrap();
1319 assert!(p1_entry.is_none());
1322 assert!(p1_entry.is_none());
1320 let p2_entry = entry0.p2_entry().unwrap();
1323 let p2_entry = entry0.p2_entry().unwrap();
1321 assert!(p2_entry.is_none());
1324 assert!(p2_entry.is_none());
1322
1325
1323 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
1326 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
1324 assert_eq!(entry1.revision(), Revision(1));
1327 assert_eq!(entry1.revision(), Revision(1));
1325 assert_eq!(*entry1.node(), node1);
1328 assert_eq!(*entry1.node(), node1);
1326 assert!(!entry1.has_p1());
1329 assert!(!entry1.has_p1());
1327 assert_eq!(entry1.p1(), None);
1330 assert_eq!(entry1.p1(), None);
1328 assert_eq!(entry1.p2(), None);
1331 assert_eq!(entry1.p2(), None);
1329 let p1_entry = entry1.p1_entry().unwrap();
1332 let p1_entry = entry1.p1_entry().unwrap();
1330 assert!(p1_entry.is_none());
1333 assert!(p1_entry.is_none());
1331 let p2_entry = entry1.p2_entry().unwrap();
1334 let p2_entry = entry1.p2_entry().unwrap();
1332 assert!(p2_entry.is_none());
1335 assert!(p2_entry.is_none());
1333
1336
1334 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
1337 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
1335 assert_eq!(entry2.revision(), Revision(2));
1338 assert_eq!(entry2.revision(), Revision(2));
1336 assert_eq!(*entry2.node(), node2);
1339 assert_eq!(*entry2.node(), node2);
1337 assert!(entry2.has_p1());
1340 assert!(entry2.has_p1());
1338 assert_eq!(entry2.p1(), Some(Revision(0)));
1341 assert_eq!(entry2.p1(), Some(Revision(0)));
1339 assert_eq!(entry2.p2(), Some(Revision(1)));
1342 assert_eq!(entry2.p2(), Some(Revision(1)));
1340 let p1_entry = entry2.p1_entry().unwrap();
1343 let p1_entry = entry2.p1_entry().unwrap();
1341 assert!(p1_entry.is_some());
1344 assert!(p1_entry.is_some());
1342 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
1345 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
1343 let p2_entry = entry2.p2_entry().unwrap();
1346 let p2_entry = entry2.p2_entry().unwrap();
1344 assert!(p2_entry.is_some());
1347 assert!(p2_entry.is_some());
1345 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
1348 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
1346 }
1349 }
1347
1350
1348 #[test]
1351 #[test]
1349 fn test_nodemap() {
1352 fn test_nodemap() {
1350 let temp = tempfile::tempdir().unwrap();
1353 let temp = tempfile::tempdir().unwrap();
1351 let vfs = Vfs { base: temp.path() };
1354 let vfs = Vfs { base: temp.path() };
1352
1355
1353 // building a revlog with a forced Node starting with zeros
1356 // building a revlog with a forced Node starting with zeros
1354 // This is a corruption, but it does not preclude using the nodemap
1357 // This is a corruption, but it does not preclude using the nodemap
1355 // if we don't try and access the data
1358 // if we don't try and access the data
1356 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
1359 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
1357 .unwrap();
1360 .unwrap();
1358 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1361 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
1359 .unwrap();
1362 .unwrap();
1360 let entry0_bytes = IndexEntryBuilder::new()
1363 let entry0_bytes = IndexEntryBuilder::new()
1361 .is_first(true)
1364 .is_first(true)
1362 .with_version(1)
1365 .with_version(1)
1363 .with_inline(true)
1366 .with_inline(true)
1364 .with_node(node0)
1367 .with_node(node0)
1365 .build();
1368 .build();
1366 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1369 let entry1_bytes = IndexEntryBuilder::new().with_node(node1).build();
1367 let contents = vec![entry0_bytes, entry1_bytes]
1370 let contents = vec![entry0_bytes, entry1_bytes]
1368 .into_iter()
1371 .into_iter()
1369 .flatten()
1372 .flatten()
1370 .collect_vec();
1373 .collect_vec();
1371 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1374 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
1372
1375
1373 let mut idx = nodemap::tests::TestNtIndex::new();
1376 let mut idx = nodemap::tests::TestNtIndex::new();
1374 idx.insert_node(Revision(0), node0).unwrap();
1377 idx.insert_node(Revision(0), node0).unwrap();
1375 idx.insert_node(Revision(1), node1).unwrap();
1378 idx.insert_node(Revision(1), node1).unwrap();
1376
1379
1377 let revlog = Revlog::open_gen(
1380 let revlog = Revlog::open_gen(
1378 &vfs,
1381 &vfs,
1379 "foo.i",
1382 "foo.i",
1380 None,
1383 None,
1381 RevlogOpenOptions::new(),
1384 RevlogOpenOptions::new(),
1382 Some(idx.nt),
1385 Some(idx.nt),
1383 )
1386 )
1384 .unwrap();
1387 .unwrap();
1385
1388
1386 // accessing the data shows the corruption
1389 // accessing the data shows the corruption
1387 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
1390 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
1388
1391
1389 assert_eq!(
1392 assert_eq!(
1390 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1393 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1391 Revision(-1)
1394 Revision(-1)
1392 );
1395 );
1393 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
1396 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
1394 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
1397 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
1395 assert_eq!(
1398 assert_eq!(
1396 revlog
1399 revlog
1397 .rev_from_node(NodePrefix::from_hex("000").unwrap())
1400 .rev_from_node(NodePrefix::from_hex("000").unwrap())
1398 .unwrap(),
1401 .unwrap(),
1399 Revision(-1)
1402 Revision(-1)
1400 );
1403 );
1401 assert_eq!(
1404 assert_eq!(
1402 revlog
1405 revlog
1403 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
1406 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
1404 .unwrap(),
1407 .unwrap(),
1405 Revision(1)
1408 Revision(1)
1406 );
1409 );
1407 // RevlogError does not implement PartialEq
1410 // RevlogError does not implement PartialEq
1408 // (ultimately because io::Error does not)
1411 // (ultimately because io::Error does not)
1409 match revlog
1412 match revlog
1410 .rev_from_node(NodePrefix::from_hex("00").unwrap())
1413 .rev_from_node(NodePrefix::from_hex("00").unwrap())
1411 .expect_err("Expected to give AmbiguousPrefix error")
1414 .expect_err("Expected to give AmbiguousPrefix error")
1412 {
1415 {
1413 RevlogError::AmbiguousPrefix => (),
1416 RevlogError::AmbiguousPrefix => (),
1414 e => {
1417 e => {
1415 panic!("Got another error than AmbiguousPrefix: {:?}", e);
1418 panic!("Got another error than AmbiguousPrefix: {:?}", e);
1416 }
1419 }
1417 };
1420 };
1418 }
1421 }
1419 }
1422 }
General Comments 0
You need to be logged in to leave comments. Login now