##// END OF EJS Templates
rust-revlog: rename `start` to `data_start`...
Raphaël Gomès -
r53193:e2319309 default
parent child Browse files
Show More
@@ -1,1351 +1,1351
1 //! A layer of lower-level revlog functionality to encapsulate most of the
1 //! A layer of lower-level revlog functionality to encapsulate most of the
2 //! IO work and expensive operations.
2 //! IO work and expensive operations.
3 use std::{
3 use std::{
4 borrow::Cow,
4 borrow::Cow,
5 cell::RefCell,
5 cell::RefCell,
6 io::{ErrorKind, Seek, SeekFrom, Write},
6 io::{ErrorKind, Seek, SeekFrom, Write},
7 ops::Deref,
7 ops::Deref,
8 path::PathBuf,
8 path::PathBuf,
9 sync::{Arc, Mutex},
9 sync::{Arc, Mutex},
10 };
10 };
11
11
12 use schnellru::{ByMemoryUsage, LruMap};
12 use schnellru::{ByMemoryUsage, LruMap};
13 use sha1::{Digest, Sha1};
13 use sha1::{Digest, Sha1};
14
14
15 use crate::{
15 use crate::{
16 errors::{HgError, IoResultExt},
16 errors::{HgError, IoResultExt},
17 exit_codes,
17 exit_codes,
18 transaction::Transaction,
18 transaction::Transaction,
19 vfs::Vfs,
19 vfs::Vfs,
20 };
20 };
21
21
22 use super::{
22 use super::{
23 compression::{
23 compression::{
24 uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
24 uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
25 ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
25 ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
26 },
26 },
27 file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
27 file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
28 index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
28 index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
29 node::{NODE_BYTES_LENGTH, NULL_NODE},
29 node::{NODE_BYTES_LENGTH, NULL_NODE},
30 options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
30 options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
31 BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
31 BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
32 UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
32 UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
33 };
33 };
34
34
35 /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
35 /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
36 /// boundary to incrementally rewrite higher-level revlog functionality in
36 /// boundary to incrementally rewrite higher-level revlog functionality in
37 /// Rust.
37 /// Rust.
38 pub struct InnerRevlog {
38 pub struct InnerRevlog {
39 /// When index and data are not interleaved: bytes of the revlog index.
39 /// When index and data are not interleaved: bytes of the revlog index.
40 /// When index and data are interleaved (inline revlog): bytes of the
40 /// When index and data are interleaved (inline revlog): bytes of the
41 /// revlog index and data.
41 /// revlog index and data.
42 pub index: Index,
42 pub index: Index,
43 /// The store vfs that is used to interact with the filesystem
43 /// The store vfs that is used to interact with the filesystem
44 vfs: Box<dyn Vfs>,
44 vfs: Box<dyn Vfs>,
45 /// The index file path, relative to the vfs root
45 /// The index file path, relative to the vfs root
46 pub index_file: PathBuf,
46 pub index_file: PathBuf,
47 /// The data file path, relative to the vfs root (same as `index_file`
47 /// The data file path, relative to the vfs root (same as `index_file`
48 /// if inline)
48 /// if inline)
49 data_file: PathBuf,
49 data_file: PathBuf,
50 /// Data config that applies to this revlog
50 /// Data config that applies to this revlog
51 data_config: RevlogDataConfig,
51 data_config: RevlogDataConfig,
52 /// Delta config that applies to this revlog
52 /// Delta config that applies to this revlog
53 delta_config: RevlogDeltaConfig,
53 delta_config: RevlogDeltaConfig,
54 /// Feature config that applies to this revlog
54 /// Feature config that applies to this revlog
55 feature_config: RevlogFeatureConfig,
55 feature_config: RevlogFeatureConfig,
56 /// A view into this revlog's data file
56 /// A view into this revlog's data file
57 segment_file: RandomAccessFile,
57 segment_file: RandomAccessFile,
58 /// A cache of uncompressed chunks that have previously been restored.
58 /// A cache of uncompressed chunks that have previously been restored.
59 /// Its eviction policy is defined in [`Self::new`].
59 /// Its eviction policy is defined in [`Self::new`].
60 uncompressed_chunk_cache: Option<UncompressedChunkCache>,
60 uncompressed_chunk_cache: Option<UncompressedChunkCache>,
61 /// Used to keep track of the actual target during diverted writes
61 /// Used to keep track of the actual target during diverted writes
62 /// for the changelog
62 /// for the changelog
63 original_index_file: Option<PathBuf>,
63 original_index_file: Option<PathBuf>,
64 /// Write handles to the index and data files
64 /// Write handles to the index and data files
65 /// XXX why duplicate from `index` and `segment_file`?
65 /// XXX why duplicate from `index` and `segment_file`?
66 writing_handles: Option<WriteHandles>,
66 writing_handles: Option<WriteHandles>,
67 /// See [`DelayedBuffer`].
67 /// See [`DelayedBuffer`].
68 delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
68 delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
69 /// Whether this revlog is inline. XXX why duplicate from `index`?
69 /// Whether this revlog is inline. XXX why duplicate from `index`?
70 pub inline: bool,
70 pub inline: bool,
71 /// A cache of the last revision, which is usually accessed multiple
71 /// A cache of the last revision, which is usually accessed multiple
72 /// times.
72 /// times.
73 pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
73 pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
74 }
74 }
75
75
76 impl InnerRevlog {
76 impl InnerRevlog {
77 pub fn new(
77 pub fn new(
78 vfs: Box<dyn Vfs>,
78 vfs: Box<dyn Vfs>,
79 index: Index,
79 index: Index,
80 index_file: PathBuf,
80 index_file: PathBuf,
81 data_file: PathBuf,
81 data_file: PathBuf,
82 data_config: RevlogDataConfig,
82 data_config: RevlogDataConfig,
83 delta_config: RevlogDeltaConfig,
83 delta_config: RevlogDeltaConfig,
84 feature_config: RevlogFeatureConfig,
84 feature_config: RevlogFeatureConfig,
85 ) -> Self {
85 ) -> Self {
86 assert!(index_file.is_relative());
86 assert!(index_file.is_relative());
87 assert!(data_file.is_relative());
87 assert!(data_file.is_relative());
88 let segment_file = RandomAccessFile::new(
88 let segment_file = RandomAccessFile::new(
89 dyn_clone::clone_box(&*vfs),
89 dyn_clone::clone_box(&*vfs),
90 if index.is_inline() {
90 if index.is_inline() {
91 index_file.to_owned()
91 index_file.to_owned()
92 } else {
92 } else {
93 data_file.to_owned()
93 data_file.to_owned()
94 },
94 },
95 );
95 );
96
96
97 let uncompressed_chunk_cache =
97 let uncompressed_chunk_cache =
98 data_config.uncompressed_cache_factor.map(
98 data_config.uncompressed_cache_factor.map(
99 // Arbitrary initial value
99 // Arbitrary initial value
100 // TODO check if using a hasher specific to integers is useful
100 // TODO check if using a hasher specific to integers is useful
101 |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
101 |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
102 );
102 );
103
103
104 let inline = index.is_inline();
104 let inline = index.is_inline();
105 Self {
105 Self {
106 index,
106 index,
107 vfs,
107 vfs,
108 index_file,
108 index_file,
109 data_file,
109 data_file,
110 data_config,
110 data_config,
111 delta_config,
111 delta_config,
112 feature_config,
112 feature_config,
113 segment_file,
113 segment_file,
114 uncompressed_chunk_cache,
114 uncompressed_chunk_cache,
115 original_index_file: None,
115 original_index_file: None,
116 writing_handles: None,
116 writing_handles: None,
117 delayed_buffer: None,
117 delayed_buffer: None,
118 inline,
118 inline,
119 last_revision_cache: Mutex::new(None),
119 last_revision_cache: Mutex::new(None),
120 }
120 }
121 }
121 }
122
122
123 /// Return number of entries of the revlog index
123 /// Return number of entries of the revlog index
124 pub fn len(&self) -> usize {
124 pub fn len(&self) -> usize {
125 self.index.len()
125 self.index.len()
126 }
126 }
127
127
128 /// Return `true` if this revlog has no entries
128 /// Return `true` if this revlog has no entries
129 pub fn is_empty(&self) -> bool {
129 pub fn is_empty(&self) -> bool {
130 self.len() == 0
130 self.len() == 0
131 }
131 }
132
132
133 /// Return whether this revlog is inline (mixed index and data)
133 /// Return whether this revlog is inline (mixed index and data)
134 pub fn is_inline(&self) -> bool {
134 pub fn is_inline(&self) -> bool {
135 self.inline
135 self.inline
136 }
136 }
137
137
138 /// Clear all caches from this revlog
138 /// Clear all caches from this revlog
139 pub fn clear_cache(&mut self) {
139 pub fn clear_cache(&mut self) {
140 assert!(!self.is_delaying());
140 assert!(!self.is_delaying());
141 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
141 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
142 // We don't clear the allocation here because it's probably faster.
142 // We don't clear the allocation here because it's probably faster.
143 // We could change our minds later if this ends up being a problem
143 // We could change our minds later if this ends up being a problem
144 // with regards to memory consumption.
144 // with regards to memory consumption.
145 cache.borrow_mut().clear();
145 cache.borrow_mut().clear();
146 }
146 }
147 }
147 }
148
148
149 /// Return an entry for the null revision
149 /// Return an entry for the null revision
150 pub fn make_null_entry(&self) -> RevlogEntry {
150 pub fn make_null_entry(&self) -> RevlogEntry {
151 RevlogEntry {
151 RevlogEntry {
152 revlog: self,
152 revlog: self,
153 rev: NULL_REVISION,
153 rev: NULL_REVISION,
154 uncompressed_len: 0,
154 uncompressed_len: 0,
155 p1: NULL_REVISION,
155 p1: NULL_REVISION,
156 p2: NULL_REVISION,
156 p2: NULL_REVISION,
157 flags: NULL_REVLOG_ENTRY_FLAGS,
157 flags: NULL_REVLOG_ENTRY_FLAGS,
158 hash: NULL_NODE,
158 hash: NULL_NODE,
159 }
159 }
160 }
160 }
161
161
162 /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
162 /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
163 pub fn get_entry(
163 pub fn get_entry(
164 &self,
164 &self,
165 rev: Revision,
165 rev: Revision,
166 ) -> Result<RevlogEntry, RevlogError> {
166 ) -> Result<RevlogEntry, RevlogError> {
167 if rev == NULL_REVISION {
167 if rev == NULL_REVISION {
168 return Ok(self.make_null_entry());
168 return Ok(self.make_null_entry());
169 }
169 }
170 let index_entry = self
170 let index_entry = self
171 .index
171 .index
172 .get_entry(rev)
172 .get_entry(rev)
173 .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
173 .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
174 let p1 =
174 let p1 =
175 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
175 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
176 RevlogError::corrupted(format!(
176 RevlogError::corrupted(format!(
177 "p1 for rev {} is invalid",
177 "p1 for rev {} is invalid",
178 rev
178 rev
179 ))
179 ))
180 })?;
180 })?;
181 let p2 =
181 let p2 =
182 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
182 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
183 RevlogError::corrupted(format!(
183 RevlogError::corrupted(format!(
184 "p2 for rev {} is invalid",
184 "p2 for rev {} is invalid",
185 rev
185 rev
186 ))
186 ))
187 })?;
187 })?;
188 let entry = RevlogEntry {
188 let entry = RevlogEntry {
189 revlog: self,
189 revlog: self,
190 rev,
190 rev,
191 uncompressed_len: index_entry.uncompressed_len(),
191 uncompressed_len: index_entry.uncompressed_len(),
192 p1,
192 p1,
193 p2,
193 p2,
194 flags: index_entry.flags(),
194 flags: index_entry.flags(),
195 hash: *index_entry.hash(),
195 hash: *index_entry.hash(),
196 };
196 };
197 Ok(entry)
197 Ok(entry)
198 }
198 }
199
199
200 /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
200 /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
201 /// returns a [`RevlogError`].
201 /// returns a [`RevlogError`].
202 pub fn get_entry_for_unchecked_rev(
202 pub fn get_entry_for_unchecked_rev(
203 &self,
203 &self,
204 rev: UncheckedRevision,
204 rev: UncheckedRevision,
205 ) -> Result<RevlogEntry, RevlogError> {
205 ) -> Result<RevlogEntry, RevlogError> {
206 if rev == NULL_REVISION.into() {
206 if rev == NULL_REVISION.into() {
207 return Ok(self.make_null_entry());
207 return Ok(self.make_null_entry());
208 }
208 }
209 let rev = self.index.check_revision(rev).ok_or_else(|| {
209 let rev = self.index.check_revision(rev).ok_or_else(|| {
210 RevlogError::corrupted(format!("rev {} is invalid", rev))
210 RevlogError::corrupted(format!("rev {} is invalid", rev))
211 })?;
211 })?;
212 self.get_entry(rev)
212 self.get_entry(rev)
213 }
213 }
214
214
215 /// Is the revlog currently delaying the visibility of written data?
215 /// Is the revlog currently delaying the visibility of written data?
216 ///
216 ///
217 /// The delaying mechanism can be either in-memory or written on disk in a
217 /// The delaying mechanism can be either in-memory or written on disk in a
218 /// side-file.
218 /// side-file.
219 pub fn is_delaying(&self) -> bool {
219 pub fn is_delaying(&self) -> bool {
220 self.delayed_buffer.is_some() || self.original_index_file.is_some()
220 self.delayed_buffer.is_some() || self.original_index_file.is_some()
221 }
221 }
222
222
223 /// The offset of the data chunk for this revision
223 /// The offset of the data chunk for this revision
224 #[inline(always)]
224 #[inline(always)]
225 pub fn start(&self, rev: Revision) -> usize {
225 pub fn data_start(&self, rev: Revision) -> usize {
226 self.index.start(
226 self.index.start(
227 rev,
227 rev,
228 &self
228 &self
229 .index
229 .index
230 .get_entry(rev)
230 .get_entry(rev)
231 .unwrap_or_else(|| self.index.make_null_entry()),
231 .unwrap_or_else(|| self.index.make_null_entry()),
232 )
232 )
233 }
233 }
234
234
235 /// The length of the data chunk for this revision
235 /// The length of the data chunk for this revision
236 #[inline(always)]
236 #[inline(always)]
237 pub fn data_compressed_length(&self, rev: Revision) -> usize {
237 pub fn data_compressed_length(&self, rev: Revision) -> usize {
238 self.index
238 self.index
239 .get_entry(rev)
239 .get_entry(rev)
240 .unwrap_or_else(|| self.index.make_null_entry())
240 .unwrap_or_else(|| self.index.make_null_entry())
241 .compressed_len() as usize
241 .compressed_len() as usize
242 }
242 }
243
243
244 /// The end of the data chunk for this revision
244 /// The end of the data chunk for this revision
245 #[inline(always)]
245 #[inline(always)]
246 pub fn end(&self, rev: Revision) -> usize {
246 pub fn end(&self, rev: Revision) -> usize {
247 self.start(rev) + self.data_compressed_length(rev)
247 self.data_start(rev) + self.data_compressed_length(rev)
248 }
248 }
249
249
250 /// Return the delta parent of the given revision
250 /// Return the delta parent of the given revision
251 pub fn delta_parent(&self, rev: Revision) -> Revision {
251 pub fn delta_parent(&self, rev: Revision) -> Revision {
252 let base = self
252 let base = self
253 .index
253 .index
254 .get_entry(rev)
254 .get_entry(rev)
255 .unwrap()
255 .unwrap()
256 .base_revision_or_base_of_delta_chain();
256 .base_revision_or_base_of_delta_chain();
257 if base.0 == rev.0 {
257 if base.0 == rev.0 {
258 NULL_REVISION
258 NULL_REVISION
259 } else if self.delta_config.general_delta {
259 } else if self.delta_config.general_delta {
260 Revision(base.0)
260 Revision(base.0)
261 } else {
261 } else {
262 Revision(rev.0 - 1)
262 Revision(rev.0 - 1)
263 }
263 }
264 }
264 }
265
265
266 /// Return whether `rev` points to a snapshot revision (i.e. does not have
266 /// Return whether `rev` points to a snapshot revision (i.e. does not have
267 /// a delta base).
267 /// a delta base).
268 pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
268 pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
269 if !self.delta_config.sparse_revlog {
269 if !self.delta_config.sparse_revlog {
270 return Ok(self.delta_parent(rev) == NULL_REVISION);
270 return Ok(self.delta_parent(rev) == NULL_REVISION);
271 }
271 }
272 self.index.is_snapshot_unchecked(rev)
272 self.index.is_snapshot_unchecked(rev)
273 }
273 }
274
274
275 /// Return the delta chain for `rev` according to this revlog's config.
275 /// Return the delta chain for `rev` according to this revlog's config.
276 /// See [`Index::delta_chain`] for more information.
276 /// See [`Index::delta_chain`] for more information.
277 pub fn delta_chain(
277 pub fn delta_chain(
278 &self,
278 &self,
279 rev: Revision,
279 rev: Revision,
280 stop_rev: Option<Revision>,
280 stop_rev: Option<Revision>,
281 ) -> Result<(Vec<Revision>, bool), HgError> {
281 ) -> Result<(Vec<Revision>, bool), HgError> {
282 self.index.delta_chain(
282 self.index.delta_chain(
283 rev,
283 rev,
284 stop_rev,
284 stop_rev,
285 self.delta_config.general_delta.into(),
285 self.delta_config.general_delta.into(),
286 )
286 )
287 }
287 }
288
288
289 fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
289 fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
290 // TODO cache the compressor?
290 // TODO cache the compressor?
291 Ok(match self.feature_config.compression_engine {
291 Ok(match self.feature_config.compression_engine {
292 CompressionConfig::Zlib { level } => {
292 CompressionConfig::Zlib { level } => {
293 Box::new(ZlibCompressor::new(level))
293 Box::new(ZlibCompressor::new(level))
294 }
294 }
295 CompressionConfig::Zstd { level, threads } => {
295 CompressionConfig::Zstd { level, threads } => {
296 Box::new(ZstdCompressor::new(level, threads))
296 Box::new(ZstdCompressor::new(level, threads))
297 }
297 }
298 CompressionConfig::None => Box::new(NoneCompressor),
298 CompressionConfig::None => Box::new(NoneCompressor),
299 })
299 })
300 }
300 }
301
301
302 /// Generate a possibly-compressed representation of data.
302 /// Generate a possibly-compressed representation of data.
303 /// Returns `None` if the data was not compressed.
303 /// Returns `None` if the data was not compressed.
304 pub fn compress<'data>(
304 pub fn compress<'data>(
305 &self,
305 &self,
306 data: &'data [u8],
306 data: &'data [u8],
307 ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
307 ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
308 if data.is_empty() {
308 if data.is_empty() {
309 return Ok(Some(data.into()));
309 return Ok(Some(data.into()));
310 }
310 }
311 let res = self.compressor()?.compress(data)?;
311 let res = self.compressor()?.compress(data)?;
312 if let Some(compressed) = res {
312 if let Some(compressed) = res {
313 // The revlog compressor added the header in the returned data.
313 // The revlog compressor added the header in the returned data.
314 return Ok(Some(compressed.into()));
314 return Ok(Some(compressed.into()));
315 }
315 }
316
316
317 if data[0] == b'\0' {
317 if data[0] == b'\0' {
318 return Ok(Some(data.into()));
318 return Ok(Some(data.into()));
319 }
319 }
320 Ok(None)
320 Ok(None)
321 }
321 }
322
322
323 /// Decompress a revlog chunk.
323 /// Decompress a revlog chunk.
324 ///
324 ///
325 /// The chunk is expected to begin with a header identifying the
325 /// The chunk is expected to begin with a header identifying the
326 /// format type so it can be routed to an appropriate decompressor.
326 /// format type so it can be routed to an appropriate decompressor.
327 pub fn decompress<'a>(
327 pub fn decompress<'a>(
328 &'a self,
328 &'a self,
329 data: &'a [u8],
329 data: &'a [u8],
330 ) -> Result<Cow<[u8]>, RevlogError> {
330 ) -> Result<Cow<[u8]>, RevlogError> {
331 if data.is_empty() {
331 if data.is_empty() {
332 return Ok(data.into());
332 return Ok(data.into());
333 }
333 }
334
334
335 // Revlogs are read much more frequently than they are written and many
335 // Revlogs are read much more frequently than they are written and many
336 // chunks only take microseconds to decompress, so performance is
336 // chunks only take microseconds to decompress, so performance is
337 // important here.
337 // important here.
338
338
339 let header = data[0];
339 let header = data[0];
340 match header {
340 match header {
341 // Settings don't matter as they only affect compression
341 // Settings don't matter as they only affect compression
342 ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
342 ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
343 // Settings don't matter as they only affect compression
343 // Settings don't matter as they only affect compression
344 ZSTD_BYTE => {
344 ZSTD_BYTE => {
345 Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
345 Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
346 }
346 }
347 b'\0' => Ok(data.into()),
347 b'\0' => Ok(data.into()),
348 b'u' => Ok((&data[1..]).into()),
348 b'u' => Ok((&data[1..]).into()),
349 other => Err(HgError::UnsupportedFeature(format!(
349 other => Err(HgError::UnsupportedFeature(format!(
350 "unknown compression header '{}'",
350 "unknown compression header '{}'",
351 other
351 other
352 ))
352 ))
353 .into()),
353 .into()),
354 }
354 }
355 }
355 }
356
356
357 /// Obtain a segment of raw data corresponding to a range of revisions.
357 /// Obtain a segment of raw data corresponding to a range of revisions.
358 ///
358 ///
359 /// Requests for data may be satisfied by a cache.
359 /// Requests for data may be satisfied by a cache.
360 ///
360 ///
361 /// Returns a 2-tuple of (offset, data) for the requested range of
361 /// Returns a 2-tuple of (offset, data) for the requested range of
362 /// revisions. Offset is the integer offset from the beginning of the
362 /// revisions. Offset is the integer offset from the beginning of the
363 /// revlog and data is a slice of the raw byte data.
363 /// revlog and data is a slice of the raw byte data.
364 ///
364 ///
365 /// Callers will need to call `self.start(rev)` and `self.length(rev)`
365 /// Callers will need to call `self.start(rev)` and `self.length(rev)`
366 /// to determine where each revision's data begins and ends.
366 /// to determine where each revision's data begins and ends.
367 pub fn get_segment_for_revs(
367 pub fn get_segment_for_revs(
368 &self,
368 &self,
369 start_rev: Revision,
369 start_rev: Revision,
370 end_rev: Revision,
370 end_rev: Revision,
371 ) -> Result<(usize, Vec<u8>), HgError> {
371 ) -> Result<(usize, Vec<u8>), HgError> {
372 let start = if start_rev == NULL_REVISION {
372 let start = if start_rev == NULL_REVISION {
373 0
373 0
374 } else {
374 } else {
375 let start_entry = self
375 let start_entry = self
376 .index
376 .index
377 .get_entry(start_rev)
377 .get_entry(start_rev)
378 .expect("null revision segment");
378 .expect("null revision segment");
379 self.index.start(start_rev, &start_entry)
379 self.index.start(start_rev, &start_entry)
380 };
380 };
381 let end_entry = self
381 let end_entry = self
382 .index
382 .index
383 .get_entry(end_rev)
383 .get_entry(end_rev)
384 .expect("null revision segment");
384 .expect("null revision segment");
385 let end = self.index.start(end_rev, &end_entry)
385 let end = self.index.start(end_rev, &end_entry)
386 + self.data_compressed_length(end_rev);
386 + self.data_compressed_length(end_rev);
387
387
388 let length = end - start;
388 let length = end - start;
389
389
390 // XXX should we use mmap instead of doing this for platforms that
390 // XXX should we use mmap instead of doing this for platforms that
391 // support madvise/populate?
391 // support madvise/populate?
392 Ok((start, self.segment_file.read_chunk(start, length)?))
392 Ok((start, self.segment_file.read_chunk(start, length)?))
393 }
393 }
394
394
395 /// Return the uncompressed raw data for `rev`
395 /// Return the uncompressed raw data for `rev`
396 pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
396 pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
397 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
397 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
398 if let Some(chunk) = cache.borrow_mut().get(&rev) {
398 if let Some(chunk) = cache.borrow_mut().get(&rev) {
399 return Ok(chunk.clone());
399 return Ok(chunk.clone());
400 }
400 }
401 }
401 }
402 // TODO revlogv2 should check the compression mode
402 // TODO revlogv2 should check the compression mode
403 let data = self.get_segment_for_revs(rev, rev)?.1;
403 let data = self.get_segment_for_revs(rev, rev)?.1;
404 let uncompressed = self.decompress(&data).map_err(|e| {
404 let uncompressed = self.decompress(&data).map_err(|e| {
405 HgError::abort(
405 HgError::abort(
406 format!("revlog decompression error: {}", e),
406 format!("revlog decompression error: {}", e),
407 exit_codes::ABORT,
407 exit_codes::ABORT,
408 None,
408 None,
409 )
409 )
410 })?;
410 })?;
411 let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
411 let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
412 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
412 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
413 cache.borrow_mut().insert(rev, uncompressed.clone());
413 cache.borrow_mut().insert(rev, uncompressed.clone());
414 }
414 }
415 Ok(uncompressed)
415 Ok(uncompressed)
416 }
416 }
417
417
418 /// Execute `func` within a read context for the data file, meaning that
418 /// Execute `func` within a read context for the data file, meaning that
419 /// the read handle will be taken and discarded after the operation.
419 /// the read handle will be taken and discarded after the operation.
420 pub fn with_read<R>(
420 pub fn with_read<R>(
421 &self,
421 &self,
422 func: impl FnOnce() -> Result<R, RevlogError>,
422 func: impl FnOnce() -> Result<R, RevlogError>,
423 ) -> Result<R, RevlogError> {
423 ) -> Result<R, RevlogError> {
424 self.enter_reading_context()?;
424 self.enter_reading_context()?;
425 let res = func();
425 let res = func();
426 self.exit_reading_context();
426 self.exit_reading_context();
427 res.map_err(Into::into)
427 res.map_err(Into::into)
428 }
428 }
429
429
430 /// `pub` only for use in hg-cpython
430 /// `pub` only for use in hg-cpython
431 #[doc(hidden)]
431 #[doc(hidden)]
432 pub fn enter_reading_context(&self) -> Result<(), HgError> {
432 pub fn enter_reading_context(&self) -> Result<(), HgError> {
433 if self.is_empty() {
433 if self.is_empty() {
434 // Nothing to be read
434 // Nothing to be read
435 return Ok(());
435 return Ok(());
436 }
436 }
437 if self.delayed_buffer.is_some() && self.is_inline() {
437 if self.delayed_buffer.is_some() && self.is_inline() {
438 return Err(HgError::abort(
438 return Err(HgError::abort(
439 "revlog with delayed write should not be inline",
439 "revlog with delayed write should not be inline",
440 exit_codes::ABORT,
440 exit_codes::ABORT,
441 None,
441 None,
442 ));
442 ));
443 }
443 }
444 self.segment_file.get_read_handle()?;
444 self.segment_file.get_read_handle()?;
445 Ok(())
445 Ok(())
446 }
446 }
447
447
448 /// `pub` only for use in hg-cpython
448 /// `pub` only for use in hg-cpython
449 #[doc(hidden)]
449 #[doc(hidden)]
450 pub fn exit_reading_context(&self) {
450 pub fn exit_reading_context(&self) {
451 self.segment_file.exit_reading_context()
451 self.segment_file.exit_reading_context()
452 }
452 }
453
453
454 /// Fill the buffer returned by `get_buffer` with the possibly un-validated
454 /// Fill the buffer returned by `get_buffer` with the possibly un-validated
455 /// raw text for a revision. It can be already validated if it comes
455 /// raw text for a revision. It can be already validated if it comes
456 /// from the cache.
456 /// from the cache.
457 pub fn raw_text<G, T>(
457 pub fn raw_text<G, T>(
458 &self,
458 &self,
459 rev: Revision,
459 rev: Revision,
460 get_buffer: G,
460 get_buffer: G,
461 ) -> Result<(), RevlogError>
461 ) -> Result<(), RevlogError>
462 where
462 where
463 G: FnOnce(
463 G: FnOnce(
464 usize,
464 usize,
465 &mut dyn FnMut(
465 &mut dyn FnMut(
466 &mut dyn RevisionBuffer<Target = T>,
466 &mut dyn RevisionBuffer<Target = T>,
467 ) -> Result<(), RevlogError>,
467 ) -> Result<(), RevlogError>,
468 ) -> Result<(), RevlogError>,
468 ) -> Result<(), RevlogError>,
469 {
469 {
470 let entry = &self.get_entry(rev)?;
470 let entry = &self.get_entry(rev)?;
471 let raw_size = entry.uncompressed_len();
471 let raw_size = entry.uncompressed_len();
472 let mut mutex_guard = self
472 let mut mutex_guard = self
473 .last_revision_cache
473 .last_revision_cache
474 .lock()
474 .lock()
475 .expect("lock should not be held");
475 .expect("lock should not be held");
476 let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
476 let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
477 Some((*rev, data.deref().as_ref()))
477 Some((*rev, data.deref().as_ref()))
478 } else {
478 } else {
479 None
479 None
480 };
480 };
481 if let Some(cache) = &self.uncompressed_chunk_cache {
481 if let Some(cache) = &self.uncompressed_chunk_cache {
482 let cache = &mut cache.borrow_mut();
482 let cache = &mut cache.borrow_mut();
483 if let Some(size) = raw_size {
483 if let Some(size) = raw_size {
484 // Dynamically update the uncompressed_chunk_cache size to the
484 // Dynamically update the uncompressed_chunk_cache size to the
485 // largest revision we've seen in this revlog.
485 // largest revision we've seen in this revlog.
486 // Do it *before* restoration in case the current revision
486 // Do it *before* restoration in case the current revision
487 // is the largest.
487 // is the largest.
488 let factor = self
488 let factor = self
489 .data_config
489 .data_config
490 .uncompressed_cache_factor
490 .uncompressed_cache_factor
491 .expect("cache should not exist without factor");
491 .expect("cache should not exist without factor");
492 let candidate_size = (size as f64 * factor) as usize;
492 let candidate_size = (size as f64 * factor) as usize;
493 let limiter_mut = cache.limiter_mut();
493 let limiter_mut = cache.limiter_mut();
494 if candidate_size > limiter_mut.max_memory_usage() {
494 if candidate_size > limiter_mut.max_memory_usage() {
495 std::mem::swap(
495 std::mem::swap(
496 limiter_mut,
496 limiter_mut,
497 &mut ByMemoryUsage::new(candidate_size),
497 &mut ByMemoryUsage::new(candidate_size),
498 );
498 );
499 }
499 }
500 }
500 }
501 }
501 }
502 entry.rawdata(cached_rev, get_buffer)?;
502 entry.rawdata(cached_rev, get_buffer)?;
503 // drop cache to save memory, the caller is expected to update
503 // drop cache to save memory, the caller is expected to update
504 // the revision cache after validating the text
504 // the revision cache after validating the text
505 mutex_guard.take();
505 mutex_guard.take();
506 Ok(())
506 Ok(())
507 }
507 }
508
508
509 /// Only `pub` for `hg-cpython`.
509 /// Only `pub` for `hg-cpython`.
510 /// Obtain decompressed raw data for the specified revisions that are
510 /// Obtain decompressed raw data for the specified revisions that are
511 /// assumed to be in ascending order.
511 /// assumed to be in ascending order.
512 ///
512 ///
513 /// Returns a list with decompressed data for each requested revision.
513 /// Returns a list with decompressed data for each requested revision.
514 #[doc(hidden)]
514 #[doc(hidden)]
515 pub fn chunks(
515 pub fn chunks(
516 &self,
516 &self,
517 revs: Vec<Revision>,
517 revs: Vec<Revision>,
518 target_size: Option<u64>,
518 target_size: Option<u64>,
519 ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
519 ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
520 if revs.is_empty() {
520 if revs.is_empty() {
521 return Ok(vec![]);
521 return Ok(vec![]);
522 }
522 }
523 let mut fetched_revs = vec![];
523 let mut fetched_revs = vec![];
524 let mut chunks = Vec::with_capacity(revs.len());
524 let mut chunks = Vec::with_capacity(revs.len());
525
525
526 match self.uncompressed_chunk_cache.as_ref() {
526 match self.uncompressed_chunk_cache.as_ref() {
527 Some(cache) => {
527 Some(cache) => {
528 let mut cache = cache.borrow_mut();
528 let mut cache = cache.borrow_mut();
529 for rev in revs.iter() {
529 for rev in revs.iter() {
530 match cache.get(rev) {
530 match cache.get(rev) {
531 Some(hit) => chunks.push((*rev, hit.to_owned())),
531 Some(hit) => chunks.push((*rev, hit.to_owned())),
532 None => fetched_revs.push(*rev),
532 None => fetched_revs.push(*rev),
533 }
533 }
534 }
534 }
535 }
535 }
536 None => fetched_revs = revs,
536 None => fetched_revs = revs,
537 }
537 }
538
538
539 let already_cached = chunks.len();
539 let already_cached = chunks.len();
540
540
541 let sliced_chunks = if fetched_revs.is_empty() {
541 let sliced_chunks = if fetched_revs.is_empty() {
542 vec![]
542 vec![]
543 } else if !self.data_config.with_sparse_read || self.is_inline() {
543 } else if !self.data_config.with_sparse_read || self.is_inline() {
544 vec![fetched_revs]
544 vec![fetched_revs]
545 } else {
545 } else {
546 self.slice_chunk(&fetched_revs, target_size)?
546 self.slice_chunk(&fetched_revs, target_size)?
547 };
547 };
548
548
549 self.with_read(|| {
549 self.with_read(|| {
550 for revs_chunk in sliced_chunks {
550 for revs_chunk in sliced_chunks {
551 let first_rev = revs_chunk[0];
551 let first_rev = revs_chunk[0];
552 // Skip trailing revisions with empty diff
552 // Skip trailing revisions with empty diff
553 let last_rev_idx = revs_chunk
553 let last_rev_idx = revs_chunk
554 .iter()
554 .iter()
555 .rposition(|r| self.data_compressed_length(*r) != 0)
555 .rposition(|r| self.data_compressed_length(*r) != 0)
556 .unwrap_or(revs_chunk.len() - 1);
556 .unwrap_or(revs_chunk.len() - 1);
557
557
558 let last_rev = revs_chunk[last_rev_idx];
558 let last_rev = revs_chunk[last_rev_idx];
559
559
560 let (offset, data) =
560 let (offset, data) =
561 self.get_segment_for_revs(first_rev, last_rev)?;
561 self.get_segment_for_revs(first_rev, last_rev)?;
562
562
563 let revs_chunk = &revs_chunk[..=last_rev_idx];
563 let revs_chunk = &revs_chunk[..=last_rev_idx];
564
564
565 for rev in revs_chunk {
565 for rev in revs_chunk {
566 let chunk_start = self.start(*rev);
566 let chunk_start = self.data_start(*rev);
567 let chunk_length = self.data_compressed_length(*rev);
567 let chunk_length = self.data_compressed_length(*rev);
568 // TODO revlogv2 should check the compression mode
568 // TODO revlogv2 should check the compression mode
569 let bytes = &data[chunk_start - offset..][..chunk_length];
569 let bytes = &data[chunk_start - offset..][..chunk_length];
570 let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
570 let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
571 // If we're using `zstd`, we want to try a more
571 // If we're using `zstd`, we want to try a more
572 // specialized decompression
572 // specialized decompression
573 let entry = self.index.get_entry(*rev).unwrap();
573 let entry = self.index.get_entry(*rev).unwrap();
574 let is_delta = entry
574 let is_delta = entry
575 .base_revision_or_base_of_delta_chain()
575 .base_revision_or_base_of_delta_chain()
576 != (*rev).into();
576 != (*rev).into();
577 let uncompressed = uncompressed_zstd_data(
577 let uncompressed = uncompressed_zstd_data(
578 bytes,
578 bytes,
579 is_delta,
579 is_delta,
580 entry.uncompressed_len(),
580 entry.uncompressed_len(),
581 )?;
581 )?;
582 Cow::Owned(uncompressed)
582 Cow::Owned(uncompressed)
583 } else {
583 } else {
584 // Otherwise just fallback to generic decompression.
584 // Otherwise just fallback to generic decompression.
585 self.decompress(bytes)?
585 self.decompress(bytes)?
586 };
586 };
587
587
588 chunks.push((*rev, chunk.into()));
588 chunks.push((*rev, chunk.into()));
589 }
589 }
590 }
590 }
591 Ok(())
591 Ok(())
592 })?;
592 })?;
593
593
594 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
594 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
595 let mut cache = cache.borrow_mut();
595 let mut cache = cache.borrow_mut();
596 for (rev, chunk) in chunks.iter().skip(already_cached) {
596 for (rev, chunk) in chunks.iter().skip(already_cached) {
597 cache.insert(*rev, chunk.clone());
597 cache.insert(*rev, chunk.clone());
598 }
598 }
599 }
599 }
600 // Use stable sort here since it's *mostly* sorted
600 // Use stable sort here since it's *mostly* sorted
601 chunks.sort_by(|a, b| a.0.cmp(&b.0));
601 chunks.sort_by(|a, b| a.0.cmp(&b.0));
602 Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
602 Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
603 }
603 }
604
604
605 /// Slice revs to reduce the amount of unrelated data to be read from disk.
605 /// Slice revs to reduce the amount of unrelated data to be read from disk.
606 ///
606 ///
607 /// ``revs`` is sliced into groups that should be read in one time.
607 /// ``revs`` is sliced into groups that should be read in one time.
608 /// Assume that revs are sorted.
608 /// Assume that revs are sorted.
609 ///
609 ///
610 /// The initial chunk is sliced until the overall density
610 /// The initial chunk is sliced until the overall density
611 /// (payload/chunks-span ratio) is above
611 /// (payload/chunks-span ratio) is above
612 /// `revlog.data_config.sr_density_threshold`.
612 /// `revlog.data_config.sr_density_threshold`.
613 /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
613 /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
614 ///
614 ///
615 /// If `target_size` is set, no chunk larger than `target_size`
615 /// If `target_size` is set, no chunk larger than `target_size`
616 /// will be returned.
616 /// will be returned.
617 /// For consistency with other slicing choices, this limit won't go lower
617 /// For consistency with other slicing choices, this limit won't go lower
618 /// than `revlog.data_config.sr_min_gap_size`.
618 /// than `revlog.data_config.sr_min_gap_size`.
619 ///
619 ///
620 /// If individual revision chunks are larger than this limit, they will
620 /// If individual revision chunks are larger than this limit, they will
621 /// still be raised individually.
621 /// still be raised individually.
622 pub fn slice_chunk(
622 pub fn slice_chunk(
623 &self,
623 &self,
624 revs: &[Revision],
624 revs: &[Revision],
625 target_size: Option<u64>,
625 target_size: Option<u64>,
626 ) -> Result<Vec<Vec<Revision>>, RevlogError> {
626 ) -> Result<Vec<Vec<Revision>>, RevlogError> {
627 let target_size =
627 let target_size =
628 target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
628 target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
629
629
630 let target_density = self.data_config.sr_density_threshold;
630 let target_density = self.data_config.sr_density_threshold;
631 let min_gap_size = self.data_config.sr_min_gap_size as usize;
631 let min_gap_size = self.data_config.sr_min_gap_size as usize;
632 let to_density = self.index.slice_chunk_to_density(
632 let to_density = self.index.slice_chunk_to_density(
633 revs,
633 revs,
634 target_density,
634 target_density,
635 min_gap_size,
635 min_gap_size,
636 );
636 );
637
637
638 let mut sliced = vec![];
638 let mut sliced = vec![];
639
639
640 for chunk in to_density {
640 for chunk in to_density {
641 sliced.extend(
641 sliced.extend(
642 self.slice_chunk_to_size(&chunk, target_size)?
642 self.slice_chunk_to_size(&chunk, target_size)?
643 .into_iter()
643 .into_iter()
644 .map(ToOwned::to_owned),
644 .map(ToOwned::to_owned),
645 );
645 );
646 }
646 }
647
647
648 Ok(sliced)
648 Ok(sliced)
649 }
649 }
650
650
651 /// Slice revs to match the target size
651 /// Slice revs to match the target size
652 ///
652 ///
653 /// This is intended to be used on chunks that density slicing selected,
653 /// This is intended to be used on chunks that density slicing selected,
654 /// but that are still too large compared to the read guarantee of revlogs.
654 /// but that are still too large compared to the read guarantee of revlogs.
655 /// This might happen when the "minimal gap size" interrupted the slicing
655 /// This might happen when the "minimal gap size" interrupted the slicing
656 /// or when chains are built in a way that create large blocks next to
656 /// or when chains are built in a way that create large blocks next to
657 /// each other.
657 /// each other.
658 fn slice_chunk_to_size<'a>(
658 fn slice_chunk_to_size<'a>(
659 &self,
659 &self,
660 revs: &'a [Revision],
660 revs: &'a [Revision],
661 target_size: Option<u64>,
661 target_size: Option<u64>,
662 ) -> Result<Vec<&'a [Revision]>, RevlogError> {
662 ) -> Result<Vec<&'a [Revision]>, RevlogError> {
663 let mut start_data = self.start(revs[0]);
663 let mut start_data = self.data_start(revs[0]);
664 let end_data = self.end(revs[revs.len() - 1]);
664 let end_data = self.end(revs[revs.len() - 1]);
665 let full_span = end_data - start_data;
665 let full_span = end_data - start_data;
666
666
667 let nothing_to_do = target_size
667 let nothing_to_do = target_size
668 .map(|size| full_span <= size as usize)
668 .map(|size| full_span <= size as usize)
669 .unwrap_or(true);
669 .unwrap_or(true);
670
670
671 if nothing_to_do {
671 if nothing_to_do {
672 return Ok(vec![revs]);
672 return Ok(vec![revs]);
673 }
673 }
674 let target_size = target_size.expect("target_size is set") as usize;
674 let target_size = target_size.expect("target_size is set") as usize;
675
675
676 let mut start_rev_idx = 0;
676 let mut start_rev_idx = 0;
677 let mut end_rev_idx = 1;
677 let mut end_rev_idx = 1;
678 let mut chunks = vec![];
678 let mut chunks = vec![];
679
679
680 for (idx, rev) in revs.iter().enumerate().skip(1) {
680 for (idx, rev) in revs.iter().enumerate().skip(1) {
681 let span = self.end(*rev) - start_data;
681 let span = self.end(*rev) - start_data;
682 let is_snapshot = self.is_snapshot(*rev)?;
682 let is_snapshot = self.is_snapshot(*rev)?;
683 if span <= target_size && is_snapshot {
683 if span <= target_size && is_snapshot {
684 end_rev_idx = idx + 1;
684 end_rev_idx = idx + 1;
685 } else {
685 } else {
686 let chunk =
686 let chunk =
687 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
687 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
688 if !chunk.is_empty() {
688 if !chunk.is_empty() {
689 chunks.push(chunk);
689 chunks.push(chunk);
690 }
690 }
691 start_rev_idx = idx;
691 start_rev_idx = idx;
692 start_data = self.start(*rev);
692 start_data = self.data_start(*rev);
693 end_rev_idx = idx + 1;
693 end_rev_idx = idx + 1;
694 }
694 }
695 if !is_snapshot {
695 if !is_snapshot {
696 break;
696 break;
697 }
697 }
698 }
698 }
699
699
700 // For the others, we use binary slicing to quickly converge towards
700 // For the others, we use binary slicing to quickly converge towards
701 // valid chunks (otherwise, we might end up looking for the start/end
701 // valid chunks (otherwise, we might end up looking for the start/end
702 // of many revisions). This logic is not looking for the perfect
702 // of many revisions). This logic is not looking for the perfect
703 // slicing point, it quickly converges towards valid chunks.
703 // slicing point, it quickly converges towards valid chunks.
704 let number_of_items = revs.len();
704 let number_of_items = revs.len();
705
705
706 while (end_data - start_data) > target_size {
706 while (end_data - start_data) > target_size {
707 end_rev_idx = number_of_items;
707 end_rev_idx = number_of_items;
708 if number_of_items - start_rev_idx <= 1 {
708 if number_of_items - start_rev_idx <= 1 {
709 // Protect against individual chunks larger than the limit
709 // Protect against individual chunks larger than the limit
710 break;
710 break;
711 }
711 }
712 let mut local_end_data = self.end(revs[end_rev_idx - 1]);
712 let mut local_end_data = self.end(revs[end_rev_idx - 1]);
713 let mut span = local_end_data - start_data;
713 let mut span = local_end_data - start_data;
714 while span > target_size {
714 while span > target_size {
715 if end_rev_idx - start_rev_idx <= 1 {
715 if end_rev_idx - start_rev_idx <= 1 {
716 // Protect against individual chunks larger than the limit
716 // Protect against individual chunks larger than the limit
717 break;
717 break;
718 }
718 }
719 end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
719 end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
720 local_end_data = self.end(revs[end_rev_idx - 1]);
720 local_end_data = self.end(revs[end_rev_idx - 1]);
721 span = local_end_data - start_data;
721 span = local_end_data - start_data;
722 }
722 }
723 let chunk =
723 let chunk =
724 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
724 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
725 if !chunk.is_empty() {
725 if !chunk.is_empty() {
726 chunks.push(chunk);
726 chunks.push(chunk);
727 }
727 }
728 start_rev_idx = end_rev_idx;
728 start_rev_idx = end_rev_idx;
729 start_data = self.start(revs[start_rev_idx]);
729 start_data = self.data_start(revs[start_rev_idx]);
730 }
730 }
731
731
732 let chunk = self.trim_chunk(revs, start_rev_idx, None);
732 let chunk = self.trim_chunk(revs, start_rev_idx, None);
733 if !chunk.is_empty() {
733 if !chunk.is_empty() {
734 chunks.push(chunk);
734 chunks.push(chunk);
735 }
735 }
736
736
737 Ok(chunks)
737 Ok(chunks)
738 }
738 }
739
739
740 /// Returns `revs[startidx..endidx]` without empty trailing revs
740 /// Returns `revs[startidx..endidx]` without empty trailing revs
741 fn trim_chunk<'a>(
741 fn trim_chunk<'a>(
742 &self,
742 &self,
743 revs: &'a [Revision],
743 revs: &'a [Revision],
744 start_rev_idx: usize,
744 start_rev_idx: usize,
745 end_rev_idx: Option<usize>,
745 end_rev_idx: Option<usize>,
746 ) -> &'a [Revision] {
746 ) -> &'a [Revision] {
747 let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
747 let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
748
748
749 // If we have a non-empty delta candidate, there is nothing to trim
749 // If we have a non-empty delta candidate, there is nothing to trim
750 if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
750 if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
751 // Trim empty revs at the end, except the very first rev of a chain
751 // Trim empty revs at the end, except the very first rev of a chain
752 while end_rev_idx > 1
752 while end_rev_idx > 1
753 && end_rev_idx > start_rev_idx
753 && end_rev_idx > start_rev_idx
754 && self.data_compressed_length(revs[end_rev_idx - 1]) == 0
754 && self.data_compressed_length(revs[end_rev_idx - 1]) == 0
755 {
755 {
756 end_rev_idx -= 1
756 end_rev_idx -= 1
757 }
757 }
758 }
758 }
759
759
760 &revs[start_rev_idx..end_rev_idx]
760 &revs[start_rev_idx..end_rev_idx]
761 }
761 }
762
762
763 /// Check the hash of some given data against the recorded hash.
763 /// Check the hash of some given data against the recorded hash.
764 pub fn check_hash(
764 pub fn check_hash(
765 &self,
765 &self,
766 p1: Revision,
766 p1: Revision,
767 p2: Revision,
767 p2: Revision,
768 expected: &[u8],
768 expected: &[u8],
769 data: &[u8],
769 data: &[u8],
770 ) -> bool {
770 ) -> bool {
771 let e1 = self.index.get_entry(p1);
771 let e1 = self.index.get_entry(p1);
772 let h1 = match e1 {
772 let h1 = match e1 {
773 Some(ref entry) => entry.hash(),
773 Some(ref entry) => entry.hash(),
774 None => &NULL_NODE,
774 None => &NULL_NODE,
775 };
775 };
776 let e2 = self.index.get_entry(p2);
776 let e2 = self.index.get_entry(p2);
777 let h2 = match e2 {
777 let h2 = match e2 {
778 Some(ref entry) => entry.hash(),
778 Some(ref entry) => entry.hash(),
779 None => &NULL_NODE,
779 None => &NULL_NODE,
780 };
780 };
781
781
782 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
782 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
783 }
783 }
784
784
785 /// Returns whether we are currently in a [`Self::with_write`] context
785 /// Returns whether we are currently in a [`Self::with_write`] context
786 pub fn is_writing(&self) -> bool {
786 pub fn is_writing(&self) -> bool {
787 self.writing_handles.is_some()
787 self.writing_handles.is_some()
788 }
788 }
789
789
790 /// Open the revlog files for writing
790 /// Open the revlog files for writing
791 ///
791 ///
792 /// Adding content to a revlog should be done within this context.
792 /// Adding content to a revlog should be done within this context.
793 /// TODO try using `BufRead` and `BufWrite` and see if performance improves
793 /// TODO try using `BufRead` and `BufWrite` and see if performance improves
794 pub fn with_write<R>(
794 pub fn with_write<R>(
795 &mut self,
795 &mut self,
796 transaction: &mut impl Transaction,
796 transaction: &mut impl Transaction,
797 data_end: Option<usize>,
797 data_end: Option<usize>,
798 func: impl FnOnce() -> R,
798 func: impl FnOnce() -> R,
799 ) -> Result<R, HgError> {
799 ) -> Result<R, HgError> {
800 if self.is_writing() {
800 if self.is_writing() {
801 return Ok(func());
801 return Ok(func());
802 }
802 }
803 self.enter_writing_context(data_end, transaction)
803 self.enter_writing_context(data_end, transaction)
804 .inspect_err(|_| {
804 .inspect_err(|_| {
805 self.exit_writing_context();
805 self.exit_writing_context();
806 })?;
806 })?;
807 let res = func();
807 let res = func();
808 self.exit_writing_context();
808 self.exit_writing_context();
809 Ok(res)
809 Ok(res)
810 }
810 }
811
811
812 /// `pub` only for use in hg-cpython
812 /// `pub` only for use in hg-cpython
813 #[doc(hidden)]
813 #[doc(hidden)]
814 pub fn exit_writing_context(&mut self) {
814 pub fn exit_writing_context(&mut self) {
815 self.writing_handles.take();
815 self.writing_handles.take();
816 self.segment_file.writing_handle.take();
816 self.segment_file.writing_handle.take();
817 self.segment_file.reading_handle.take();
817 self.segment_file.reading_handle.take();
818 }
818 }
819
819
820 /// `pub` only for use in hg-cpython
820 /// `pub` only for use in hg-cpython
821 #[doc(hidden)]
821 #[doc(hidden)]
822 pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
822 pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
823 self.writing_handles.as_ref()
823 self.writing_handles.as_ref()
824 }
824 }
825
825
826 /// `pub` only for use in hg-cpython
826 /// `pub` only for use in hg-cpython
827 #[doc(hidden)]
827 #[doc(hidden)]
828 pub fn enter_writing_context(
828 pub fn enter_writing_context(
829 &mut self,
829 &mut self,
830 data_end: Option<usize>,
830 data_end: Option<usize>,
831 transaction: &mut impl Transaction,
831 transaction: &mut impl Transaction,
832 ) -> Result<(), HgError> {
832 ) -> Result<(), HgError> {
833 let data_size = if self.is_empty() {
833 let data_size = if self.is_empty() {
834 0
834 0
835 } else {
835 } else {
836 self.end(Revision((self.len() - 1) as BaseRevision))
836 self.end(Revision((self.len() - 1) as BaseRevision))
837 };
837 };
838 let data_handle = if !self.is_inline() {
838 let data_handle = if !self.is_inline() {
839 let data_handle = match self.vfs.open_write(&self.data_file) {
839 let data_handle = match self.vfs.open_write(&self.data_file) {
840 Ok(mut f) => {
840 Ok(mut f) => {
841 if let Some(end) = data_end {
841 if let Some(end) = data_end {
842 f.seek(SeekFrom::Start(end as u64))
842 f.seek(SeekFrom::Start(end as u64))
843 .when_reading_file(&self.data_file)?;
843 .when_reading_file(&self.data_file)?;
844 } else {
844 } else {
845 f.seek(SeekFrom::End(0))
845 f.seek(SeekFrom::End(0))
846 .when_reading_file(&self.data_file)?;
846 .when_reading_file(&self.data_file)?;
847 }
847 }
848 f
848 f
849 }
849 }
850 Err(e) => match e {
850 Err(e) => match e {
851 HgError::IoError { error, context } => {
851 HgError::IoError { error, context } => {
852 if error.kind() != ErrorKind::NotFound {
852 if error.kind() != ErrorKind::NotFound {
853 return Err(HgError::IoError { error, context });
853 return Err(HgError::IoError { error, context });
854 }
854 }
855 self.vfs.create(&self.data_file, true)?
855 self.vfs.create(&self.data_file, true)?
856 }
856 }
857 e => return Err(e),
857 e => return Err(e),
858 },
858 },
859 };
859 };
860 transaction.add(&self.data_file, data_size);
860 transaction.add(&self.data_file, data_size);
861 Some(FileHandle::from_file(
861 Some(FileHandle::from_file(
862 data_handle,
862 data_handle,
863 dyn_clone::clone_box(&*self.vfs),
863 dyn_clone::clone_box(&*self.vfs),
864 &self.data_file,
864 &self.data_file,
865 ))
865 ))
866 } else {
866 } else {
867 None
867 None
868 };
868 };
869 let index_size = self.len() * INDEX_ENTRY_SIZE;
869 let index_size = self.len() * INDEX_ENTRY_SIZE;
870 let index_handle = self.index_write_handle()?;
870 let index_handle = self.index_write_handle()?;
871 if self.is_inline() {
871 if self.is_inline() {
872 transaction.add(&self.index_file, data_size);
872 transaction.add(&self.index_file, data_size);
873 } else {
873 } else {
874 transaction.add(&self.index_file, index_size);
874 transaction.add(&self.index_file, index_size);
875 }
875 }
876 self.writing_handles = Some(WriteHandles {
876 self.writing_handles = Some(WriteHandles {
877 index_handle: index_handle.clone(),
877 index_handle: index_handle.clone(),
878 data_handle: data_handle.clone(),
878 data_handle: data_handle.clone(),
879 });
879 });
880 *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
880 *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
881 Some(index_handle)
881 Some(index_handle)
882 } else {
882 } else {
883 data_handle
883 data_handle
884 };
884 };
885 Ok(())
885 Ok(())
886 }
886 }
887
887
888 /// Get a write handle to the index, sought to the end of its data.
888 /// Get a write handle to the index, sought to the end of its data.
889 fn index_write_handle(&self) -> Result<FileHandle, HgError> {
889 fn index_write_handle(&self) -> Result<FileHandle, HgError> {
890 let res = if self.delayed_buffer.is_none() {
890 let res = if self.delayed_buffer.is_none() {
891 if self.data_config.check_ambig {
891 if self.data_config.check_ambig {
892 self.vfs.open_check_ambig(&self.index_file)
892 self.vfs.open_check_ambig(&self.index_file)
893 } else {
893 } else {
894 self.vfs.open_write(&self.index_file)
894 self.vfs.open_write(&self.index_file)
895 }
895 }
896 } else {
896 } else {
897 self.vfs.open_write(&self.index_file)
897 self.vfs.open_write(&self.index_file)
898 };
898 };
899 match res {
899 match res {
900 Ok(mut handle) => {
900 Ok(mut handle) => {
901 handle
901 handle
902 .seek(SeekFrom::End(0))
902 .seek(SeekFrom::End(0))
903 .when_reading_file(&self.index_file)?;
903 .when_reading_file(&self.index_file)?;
904 Ok(
904 Ok(
905 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
905 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
906 {
906 {
907 FileHandle::from_file_delayed(
907 FileHandle::from_file_delayed(
908 handle,
908 handle,
909 dyn_clone::clone_box(&*self.vfs),
909 dyn_clone::clone_box(&*self.vfs),
910 &self.index_file,
910 &self.index_file,
911 delayed_buffer.clone(),
911 delayed_buffer.clone(),
912 )?
912 )?
913 } else {
913 } else {
914 FileHandle::from_file(
914 FileHandle::from_file(
915 handle,
915 handle,
916 dyn_clone::clone_box(&*self.vfs),
916 dyn_clone::clone_box(&*self.vfs),
917 &self.index_file,
917 &self.index_file,
918 )
918 )
919 },
919 },
920 )
920 )
921 }
921 }
922 Err(e) => match e {
922 Err(e) => match e {
923 HgError::IoError { error, context } => {
923 HgError::IoError { error, context } => {
924 if error.kind() != ErrorKind::NotFound {
924 if error.kind() != ErrorKind::NotFound {
925 return Err(HgError::IoError { error, context });
925 return Err(HgError::IoError { error, context });
926 };
926 };
927 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
927 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
928 {
928 {
929 FileHandle::new_delayed(
929 FileHandle::new_delayed(
930 dyn_clone::clone_box(&*self.vfs),
930 dyn_clone::clone_box(&*self.vfs),
931 &self.index_file,
931 &self.index_file,
932 true,
932 true,
933 delayed_buffer.clone(),
933 delayed_buffer.clone(),
934 )
934 )
935 } else {
935 } else {
936 FileHandle::new(
936 FileHandle::new(
937 dyn_clone::clone_box(&*self.vfs),
937 dyn_clone::clone_box(&*self.vfs),
938 &self.index_file,
938 &self.index_file,
939 true,
939 true,
940 true,
940 true,
941 )
941 )
942 }
942 }
943 }
943 }
944 e => Err(e),
944 e => Err(e),
945 },
945 },
946 }
946 }
947 }
947 }
948
948
949 /// Split the data of an inline revlog into an index and a data file
949 /// Split the data of an inline revlog into an index and a data file
950 pub fn split_inline(
950 pub fn split_inline(
951 &mut self,
951 &mut self,
952 header: IndexHeader,
952 header: IndexHeader,
953 new_index_file_path: Option<PathBuf>,
953 new_index_file_path: Option<PathBuf>,
954 ) -> Result<PathBuf, RevlogError> {
954 ) -> Result<PathBuf, RevlogError> {
955 assert!(self.delayed_buffer.is_none());
955 assert!(self.delayed_buffer.is_none());
956 let existing_handles = self.writing_handles.is_some();
956 let existing_handles = self.writing_handles.is_some();
957 if let Some(handles) = &mut self.writing_handles {
957 if let Some(handles) = &mut self.writing_handles {
958 handles.index_handle.flush()?;
958 handles.index_handle.flush()?;
959 self.writing_handles.take();
959 self.writing_handles.take();
960 self.segment_file.writing_handle.take();
960 self.segment_file.writing_handle.take();
961 }
961 }
962 let mut new_data_file_handle =
962 let mut new_data_file_handle =
963 self.vfs.create(&self.data_file, true)?;
963 self.vfs.create(&self.data_file, true)?;
964 // Drop any potential data, possibly redundant with the VFS impl.
964 // Drop any potential data, possibly redundant with the VFS impl.
965 new_data_file_handle
965 new_data_file_handle
966 .set_len(0)
966 .set_len(0)
967 .when_writing_file(&self.data_file)?;
967 .when_writing_file(&self.data_file)?;
968
968
969 self.with_read(|| -> Result<(), RevlogError> {
969 self.with_read(|| -> Result<(), RevlogError> {
970 for r in 0..self.index.len() {
970 for r in 0..self.index.len() {
971 let rev = Revision(r as BaseRevision);
971 let rev = Revision(r as BaseRevision);
972 let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
972 let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
973 new_data_file_handle
973 new_data_file_handle
974 .write_all(&rev_segment)
974 .write_all(&rev_segment)
975 .when_writing_file(&self.data_file)?;
975 .when_writing_file(&self.data_file)?;
976 }
976 }
977 new_data_file_handle
977 new_data_file_handle
978 .flush()
978 .flush()
979 .when_writing_file(&self.data_file)?;
979 .when_writing_file(&self.data_file)?;
980 Ok(())
980 Ok(())
981 })?;
981 })?;
982
982
983 if let Some(index_path) = new_index_file_path {
983 if let Some(index_path) = new_index_file_path {
984 self.index_file = index_path
984 self.index_file = index_path
985 }
985 }
986
986
987 let mut new_index_handle = self.vfs.create(&self.index_file, true)?;
987 let mut new_index_handle = self.vfs.create(&self.index_file, true)?;
988 let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
988 let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
989 for r in 0..self.len() {
989 for r in 0..self.len() {
990 let rev = Revision(r as BaseRevision);
990 let rev = Revision(r as BaseRevision);
991 let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
991 let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
992 panic!(
992 panic!(
993 "entry {} should exist in {}",
993 "entry {} should exist in {}",
994 r,
994 r,
995 self.index_file.display()
995 self.index_file.display()
996 )
996 )
997 });
997 });
998 if r == 0 {
998 if r == 0 {
999 new_data.extend(header.header_bytes);
999 new_data.extend(header.header_bytes);
1000 }
1000 }
1001 new_data.extend(entry);
1001 new_data.extend(entry);
1002 }
1002 }
1003 new_index_handle
1003 new_index_handle
1004 .write_all(&new_data)
1004 .write_all(&new_data)
1005 .when_writing_file(&self.index_file)?;
1005 .when_writing_file(&self.index_file)?;
1006 // Replace the index with a new one because the buffer contains inline
1006 // Replace the index with a new one because the buffer contains inline
1007 // data
1007 // data
1008 self.index = Index::new(Box::new(new_data), header)?;
1008 self.index = Index::new(Box::new(new_data), header)?;
1009 self.inline = false;
1009 self.inline = false;
1010
1010
1011 self.segment_file = RandomAccessFile::new(
1011 self.segment_file = RandomAccessFile::new(
1012 dyn_clone::clone_box(&*self.vfs),
1012 dyn_clone::clone_box(&*self.vfs),
1013 self.data_file.to_owned(),
1013 self.data_file.to_owned(),
1014 );
1014 );
1015 if existing_handles {
1015 if existing_handles {
1016 // Switched from inline to conventional, reopen the index
1016 // Switched from inline to conventional, reopen the index
1017 let new_data_handle = Some(FileHandle::from_file(
1017 let new_data_handle = Some(FileHandle::from_file(
1018 new_data_file_handle,
1018 new_data_file_handle,
1019 dyn_clone::clone_box(&*self.vfs),
1019 dyn_clone::clone_box(&*self.vfs),
1020 &self.data_file,
1020 &self.data_file,
1021 ));
1021 ));
1022 self.writing_handles = Some(WriteHandles {
1022 self.writing_handles = Some(WriteHandles {
1023 index_handle: self.index_write_handle()?,
1023 index_handle: self.index_write_handle()?,
1024 data_handle: new_data_handle.clone(),
1024 data_handle: new_data_handle.clone(),
1025 });
1025 });
1026 *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
1026 *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
1027 }
1027 }
1028
1028
1029 Ok(self.index_file.to_owned())
1029 Ok(self.index_file.to_owned())
1030 }
1030 }
1031
1031
1032 /// Write a new entry to this revlog.
1032 /// Write a new entry to this revlog.
1033 /// - `entry` is the index bytes
1033 /// - `entry` is the index bytes
1034 /// - `header_and_data` is the compression header and the revision data
1034 /// - `header_and_data` is the compression header and the revision data
1035 /// - `offset` is the position in the data file to write to
1035 /// - `offset` is the position in the data file to write to
1036 /// - `index_end` is the overwritten position in the index in revlog-v2,
1036 /// - `index_end` is the overwritten position in the index in revlog-v2,
1037 /// since the format may allow a rewrite of garbage data at the end.
1037 /// since the format may allow a rewrite of garbage data at the end.
1038 /// - `data_end` is the overwritten position in the data-file in revlog-v2,
1038 /// - `data_end` is the overwritten position in the data-file in revlog-v2,
1039 /// since the format may allow a rewrite of garbage data at the end.
1039 /// since the format may allow a rewrite of garbage data at the end.
1040 ///
1040 ///
1041 /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
1041 /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
1042 pub fn write_entry(
1042 pub fn write_entry(
1043 &mut self,
1043 &mut self,
1044 mut transaction: impl Transaction,
1044 mut transaction: impl Transaction,
1045 entry: &[u8],
1045 entry: &[u8],
1046 header_and_data: (&[u8], &[u8]),
1046 header_and_data: (&[u8], &[u8]),
1047 mut offset: usize,
1047 mut offset: usize,
1048 index_end: Option<u64>,
1048 index_end: Option<u64>,
1049 data_end: Option<u64>,
1049 data_end: Option<u64>,
1050 ) -> Result<(u64, Option<u64>), HgError> {
1050 ) -> Result<(u64, Option<u64>), HgError> {
1051 let current_revision = self.len() - 1;
1051 let current_revision = self.len() - 1;
1052 let canonical_index_file = self.canonical_index_file();
1052 let canonical_index_file = self.canonical_index_file();
1053
1053
1054 let is_inline = self.is_inline();
1054 let is_inline = self.is_inline();
1055 let handles = match &mut self.writing_handles {
1055 let handles = match &mut self.writing_handles {
1056 None => {
1056 None => {
1057 return Err(HgError::abort(
1057 return Err(HgError::abort(
1058 "adding revision outside of the `with_write` context",
1058 "adding revision outside of the `with_write` context",
1059 exit_codes::ABORT,
1059 exit_codes::ABORT,
1060 None,
1060 None,
1061 ));
1061 ));
1062 }
1062 }
1063 Some(handles) => handles,
1063 Some(handles) => handles,
1064 };
1064 };
1065 let index_handle = &mut handles.index_handle;
1065 let index_handle = &mut handles.index_handle;
1066 let data_handle = &mut handles.data_handle;
1066 let data_handle = &mut handles.data_handle;
1067 if let Some(end) = index_end {
1067 if let Some(end) = index_end {
1068 index_handle
1068 index_handle
1069 .seek(SeekFrom::Start(end))
1069 .seek(SeekFrom::Start(end))
1070 .when_reading_file(&self.index_file)?;
1070 .when_reading_file(&self.index_file)?;
1071 } else {
1071 } else {
1072 index_handle
1072 index_handle
1073 .seek(SeekFrom::End(0))
1073 .seek(SeekFrom::End(0))
1074 .when_reading_file(&self.index_file)?;
1074 .when_reading_file(&self.index_file)?;
1075 }
1075 }
1076 if let Some(data_handle) = data_handle {
1076 if let Some(data_handle) = data_handle {
1077 if let Some(end) = data_end {
1077 if let Some(end) = data_end {
1078 data_handle
1078 data_handle
1079 .seek(SeekFrom::Start(end))
1079 .seek(SeekFrom::Start(end))
1080 .when_reading_file(&self.data_file)?;
1080 .when_reading_file(&self.data_file)?;
1081 } else {
1081 } else {
1082 data_handle
1082 data_handle
1083 .seek(SeekFrom::End(0))
1083 .seek(SeekFrom::End(0))
1084 .when_reading_file(&self.data_file)?;
1084 .when_reading_file(&self.data_file)?;
1085 }
1085 }
1086 }
1086 }
1087 let (header, data) = header_and_data;
1087 let (header, data) = header_and_data;
1088
1088
1089 if !is_inline {
1089 if !is_inline {
1090 transaction.add(&self.data_file, offset);
1090 transaction.add(&self.data_file, offset);
1091 transaction
1091 transaction
1092 .add(&canonical_index_file, current_revision * entry.len());
1092 .add(&canonical_index_file, current_revision * entry.len());
1093 let data_handle = data_handle
1093 let data_handle = data_handle
1094 .as_mut()
1094 .as_mut()
1095 .expect("data handle should exist when not inline");
1095 .expect("data handle should exist when not inline");
1096 if !header.is_empty() {
1096 if !header.is_empty() {
1097 data_handle.write_all(header)?;
1097 data_handle.write_all(header)?;
1098 }
1098 }
1099 data_handle.write_all(data)?;
1099 data_handle.write_all(data)?;
1100 match &mut self.delayed_buffer {
1100 match &mut self.delayed_buffer {
1101 Some(buf) => {
1101 Some(buf) => {
1102 buf.lock()
1102 buf.lock()
1103 .expect("propagate the panic")
1103 .expect("propagate the panic")
1104 .buffer
1104 .buffer
1105 .write_all(entry)
1105 .write_all(entry)
1106 .expect("write to delay buffer should succeed");
1106 .expect("write to delay buffer should succeed");
1107 }
1107 }
1108 None => index_handle.write_all(entry)?,
1108 None => index_handle.write_all(entry)?,
1109 }
1109 }
1110 } else if self.delayed_buffer.is_some() {
1110 } else if self.delayed_buffer.is_some() {
1111 return Err(HgError::abort(
1111 return Err(HgError::abort(
1112 "invalid delayed write on inline revlog",
1112 "invalid delayed write on inline revlog",
1113 exit_codes::ABORT,
1113 exit_codes::ABORT,
1114 None,
1114 None,
1115 ));
1115 ));
1116 } else {
1116 } else {
1117 offset += current_revision * entry.len();
1117 offset += current_revision * entry.len();
1118 transaction.add(&canonical_index_file, offset);
1118 transaction.add(&canonical_index_file, offset);
1119 index_handle.write_all(entry)?;
1119 index_handle.write_all(entry)?;
1120 index_handle.write_all(header)?;
1120 index_handle.write_all(header)?;
1121 index_handle.write_all(data)?;
1121 index_handle.write_all(data)?;
1122 }
1122 }
1123 let data_position = match data_handle {
1123 let data_position = match data_handle {
1124 Some(h) => Some(h.position()?),
1124 Some(h) => Some(h.position()?),
1125 None => None,
1125 None => None,
1126 };
1126 };
1127 Ok((index_handle.position()?, data_position))
1127 Ok((index_handle.position()?, data_position))
1128 }
1128 }
1129
1129
1130 /// Return the real target index file and not the temporary when diverting
1130 /// Return the real target index file and not the temporary when diverting
1131 pub fn canonical_index_file(&self) -> PathBuf {
1131 pub fn canonical_index_file(&self) -> PathBuf {
1132 self.original_index_file
1132 self.original_index_file
1133 .as_ref()
1133 .as_ref()
1134 .map(ToOwned::to_owned)
1134 .map(ToOwned::to_owned)
1135 .unwrap_or_else(|| self.index_file.to_owned())
1135 .unwrap_or_else(|| self.index_file.to_owned())
1136 }
1136 }
1137
1137
1138 /// Return the path to the diverted index
1138 /// Return the path to the diverted index
1139 fn diverted_index(&self) -> PathBuf {
1139 fn diverted_index(&self) -> PathBuf {
1140 self.index_file.with_extension("i.a")
1140 self.index_file.with_extension("i.a")
1141 }
1141 }
1142
1142
1143 /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
1143 /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
1144 pub fn is_open(&self) -> bool {
1144 pub fn is_open(&self) -> bool {
1145 self.segment_file.is_open()
1145 self.segment_file.is_open()
1146 }
1146 }
1147
1147
1148 /// Set this revlog to delay its writes to a buffer
1148 /// Set this revlog to delay its writes to a buffer
1149 pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
1149 pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
1150 assert!(!self.is_open());
1150 assert!(!self.is_open());
1151 if self.is_inline() {
1151 if self.is_inline() {
1152 return Err(HgError::abort(
1152 return Err(HgError::abort(
1153 "revlog with delayed write should not be inline",
1153 "revlog with delayed write should not be inline",
1154 exit_codes::ABORT,
1154 exit_codes::ABORT,
1155 None,
1155 None,
1156 ));
1156 ));
1157 }
1157 }
1158 if self.delayed_buffer.is_some() || self.original_index_file.is_some()
1158 if self.delayed_buffer.is_some() || self.original_index_file.is_some()
1159 {
1159 {
1160 // Delay or divert already happening
1160 // Delay or divert already happening
1161 return Ok(None);
1161 return Ok(None);
1162 }
1162 }
1163 if self.is_empty() {
1163 if self.is_empty() {
1164 self.original_index_file = Some(self.index_file.to_owned());
1164 self.original_index_file = Some(self.index_file.to_owned());
1165 self.index_file = self.diverted_index();
1165 self.index_file = self.diverted_index();
1166 if self.vfs.exists(&self.index_file) {
1166 if self.vfs.exists(&self.index_file) {
1167 self.vfs.unlink(&self.index_file)?;
1167 self.vfs.unlink(&self.index_file)?;
1168 }
1168 }
1169 Ok(Some(self.index_file.to_owned()))
1169 Ok(Some(self.index_file.to_owned()))
1170 } else {
1170 } else {
1171 self.delayed_buffer =
1171 self.delayed_buffer =
1172 Some(Arc::new(Mutex::new(DelayedBuffer::default())));
1172 Some(Arc::new(Mutex::new(DelayedBuffer::default())));
1173 Ok(None)
1173 Ok(None)
1174 }
1174 }
1175 }
1175 }
1176
1176
1177 /// Write the pending data (in memory) if any to the diverted index file
1177 /// Write the pending data (in memory) if any to the diverted index file
1178 /// (on disk temporary file)
1178 /// (on disk temporary file)
1179 pub fn write_pending(
1179 pub fn write_pending(
1180 &mut self,
1180 &mut self,
1181 ) -> Result<(Option<PathBuf>, bool), HgError> {
1181 ) -> Result<(Option<PathBuf>, bool), HgError> {
1182 assert!(!self.is_open());
1182 assert!(!self.is_open());
1183 if self.is_inline() {
1183 if self.is_inline() {
1184 return Err(HgError::abort(
1184 return Err(HgError::abort(
1185 "revlog with delayed write should not be inline",
1185 "revlog with delayed write should not be inline",
1186 exit_codes::ABORT,
1186 exit_codes::ABORT,
1187 None,
1187 None,
1188 ));
1188 ));
1189 }
1189 }
1190 if self.original_index_file.is_some() {
1190 if self.original_index_file.is_some() {
1191 return Ok((None, true));
1191 return Ok((None, true));
1192 }
1192 }
1193 let mut any_pending = false;
1193 let mut any_pending = false;
1194 let pending_index_file = self.diverted_index();
1194 let pending_index_file = self.diverted_index();
1195 if self.vfs.exists(&pending_index_file) {
1195 if self.vfs.exists(&pending_index_file) {
1196 self.vfs.unlink(&pending_index_file)?;
1196 self.vfs.unlink(&pending_index_file)?;
1197 }
1197 }
1198 self.vfs.copy(&self.index_file, &pending_index_file)?;
1198 self.vfs.copy(&self.index_file, &pending_index_file)?;
1199 if let Some(delayed_buffer) = self.delayed_buffer.take() {
1199 if let Some(delayed_buffer) = self.delayed_buffer.take() {
1200 let mut index_file_handle =
1200 let mut index_file_handle =
1201 self.vfs.open_write(&pending_index_file)?;
1201 self.vfs.open_write(&pending_index_file)?;
1202 index_file_handle
1202 index_file_handle
1203 .seek(SeekFrom::End(0))
1203 .seek(SeekFrom::End(0))
1204 .when_writing_file(&pending_index_file)?;
1204 .when_writing_file(&pending_index_file)?;
1205 let delayed_data =
1205 let delayed_data =
1206 &delayed_buffer.lock().expect("propagate the panic").buffer;
1206 &delayed_buffer.lock().expect("propagate the panic").buffer;
1207 index_file_handle
1207 index_file_handle
1208 .write_all(delayed_data)
1208 .write_all(delayed_data)
1209 .when_writing_file(&pending_index_file)?;
1209 .when_writing_file(&pending_index_file)?;
1210 any_pending = true;
1210 any_pending = true;
1211 }
1211 }
1212 self.original_index_file = Some(self.index_file.to_owned());
1212 self.original_index_file = Some(self.index_file.to_owned());
1213 self.index_file = pending_index_file;
1213 self.index_file = pending_index_file;
1214 Ok((Some(self.index_file.to_owned()), any_pending))
1214 Ok((Some(self.index_file.to_owned()), any_pending))
1215 }
1215 }
1216
1216
1217 /// Overwrite the canonical file with the diverted file, or write out the
1217 /// Overwrite the canonical file with the diverted file, or write out the
1218 /// delayed buffer.
1218 /// delayed buffer.
1219 /// Returns an error if the revlog is neither diverted nor delayed.
1219 /// Returns an error if the revlog is neither diverted nor delayed.
1220 pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
1220 pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
1221 assert!(!self.is_open());
1221 assert!(!self.is_open());
1222 if self.is_inline() {
1222 if self.is_inline() {
1223 return Err(HgError::abort(
1223 return Err(HgError::abort(
1224 "revlog with delayed write should not be inline",
1224 "revlog with delayed write should not be inline",
1225 exit_codes::ABORT,
1225 exit_codes::ABORT,
1226 None,
1226 None,
1227 ));
1227 ));
1228 }
1228 }
1229 match (
1229 match (
1230 self.delayed_buffer.as_ref(),
1230 self.delayed_buffer.as_ref(),
1231 self.original_index_file.as_ref(),
1231 self.original_index_file.as_ref(),
1232 ) {
1232 ) {
1233 (None, None) => {
1233 (None, None) => {
1234 return Err(HgError::abort(
1234 return Err(HgError::abort(
1235 "neither delay nor divert found on this revlog",
1235 "neither delay nor divert found on this revlog",
1236 exit_codes::ABORT,
1236 exit_codes::ABORT,
1237 None,
1237 None,
1238 ));
1238 ));
1239 }
1239 }
1240 (Some(delay), None) => {
1240 (Some(delay), None) => {
1241 let mut index_file_handle =
1241 let mut index_file_handle =
1242 self.vfs.open_write(&self.index_file)?;
1242 self.vfs.open_write(&self.index_file)?;
1243 index_file_handle
1243 index_file_handle
1244 .seek(SeekFrom::End(0))
1244 .seek(SeekFrom::End(0))
1245 .when_writing_file(&self.index_file)?;
1245 .when_writing_file(&self.index_file)?;
1246 index_file_handle
1246 index_file_handle
1247 .write_all(
1247 .write_all(
1248 &delay.lock().expect("propagate the panic").buffer,
1248 &delay.lock().expect("propagate the panic").buffer,
1249 )
1249 )
1250 .when_writing_file(&self.index_file)?;
1250 .when_writing_file(&self.index_file)?;
1251 self.delayed_buffer = None;
1251 self.delayed_buffer = None;
1252 }
1252 }
1253 (None, Some(divert)) => {
1253 (None, Some(divert)) => {
1254 if self.vfs.exists(&self.index_file) {
1254 if self.vfs.exists(&self.index_file) {
1255 self.vfs.rename(&self.index_file, divert, true)?;
1255 self.vfs.rename(&self.index_file, divert, true)?;
1256 }
1256 }
1257 divert.clone_into(&mut self.index_file);
1257 divert.clone_into(&mut self.index_file);
1258 self.original_index_file = None;
1258 self.original_index_file = None;
1259 }
1259 }
1260 (Some(_), Some(_)) => unreachable!(
1260 (Some(_), Some(_)) => unreachable!(
1261 "{} is in an inconsistent state of both delay and divert",
1261 "{} is in an inconsistent state of both delay and divert",
1262 self.canonical_index_file().display(),
1262 self.canonical_index_file().display(),
1263 ),
1263 ),
1264 }
1264 }
1265 Ok(self.canonical_index_file())
1265 Ok(self.canonical_index_file())
1266 }
1266 }
1267
1267
1268 /// `pub` only for `hg-cpython`. This is made a different method than
1268 /// `pub` only for `hg-cpython`. This is made a different method than
1269 /// [`Revlog::index`] in case there is a different invariant that pops up
1269 /// [`Revlog::index`] in case there is a different invariant that pops up
1270 /// later.
1270 /// later.
1271 #[doc(hidden)]
1271 #[doc(hidden)]
1272 pub fn shared_index(&self) -> &Index {
1272 pub fn shared_index(&self) -> &Index {
1273 &self.index
1273 &self.index
1274 }
1274 }
1275 }
1275 }
1276
1276
1277 /// The use of a [`Refcell`] assumes that a given revlog will only
1277 /// The use of a [`Refcell`] assumes that a given revlog will only
1278 /// be accessed (read or write) by a single thread.
1278 /// be accessed (read or write) by a single thread.
1279 type UncompressedChunkCache =
1279 type UncompressedChunkCache =
1280 RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
1280 RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
1281
1281
1282 /// The node, revision and data for the last revision we've seen. Speeds up
1282 /// The node, revision and data for the last revision we've seen. Speeds up
1283 /// a lot of sequential operations of the revlog.
1283 /// a lot of sequential operations of the revlog.
1284 ///
1284 ///
1285 /// The data is not just bytes since it can come from Python and we want to
1285 /// The data is not just bytes since it can come from Python and we want to
1286 /// avoid copies if possible.
1286 /// avoid copies if possible.
1287 type SingleRevisionCache =
1287 type SingleRevisionCache =
1288 (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
1288 (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
1289
1289
1290 /// A way of progressively filling a buffer with revision data, then return
1290 /// A way of progressively filling a buffer with revision data, then return
1291 /// that buffer. Used to abstract away Python-allocated code to reduce copying
1291 /// that buffer. Used to abstract away Python-allocated code to reduce copying
1292 /// for performance reasons.
1292 /// for performance reasons.
1293 pub trait RevisionBuffer {
1293 pub trait RevisionBuffer {
1294 /// The owned buffer type to return
1294 /// The owned buffer type to return
1295 type Target;
1295 type Target;
1296 /// Copies the slice into the buffer
1296 /// Copies the slice into the buffer
1297 fn extend_from_slice(&mut self, slice: &[u8]);
1297 fn extend_from_slice(&mut self, slice: &[u8]);
1298 /// Returns the now finished owned buffer
1298 /// Returns the now finished owned buffer
1299 fn finish(self) -> Self::Target;
1299 fn finish(self) -> Self::Target;
1300 }
1300 }
1301
1301
1302 /// A simple vec-based buffer. This is uselessly complicated for the pure Rust
1302 /// A simple vec-based buffer. This is uselessly complicated for the pure Rust
1303 /// case, but it's the price to pay for Python compatibility.
1303 /// case, but it's the price to pay for Python compatibility.
1304 #[derive(Debug)]
1304 #[derive(Debug)]
1305 pub(super) struct CoreRevisionBuffer {
1305 pub(super) struct CoreRevisionBuffer {
1306 buf: Vec<u8>,
1306 buf: Vec<u8>,
1307 }
1307 }
1308
1308
1309 impl CoreRevisionBuffer {
1309 impl CoreRevisionBuffer {
1310 pub fn new() -> Self {
1310 pub fn new() -> Self {
1311 Self { buf: vec![] }
1311 Self { buf: vec![] }
1312 }
1312 }
1313
1313
1314 #[inline]
1314 #[inline]
1315 pub fn resize(&mut self, size: usize) {
1315 pub fn resize(&mut self, size: usize) {
1316 self.buf.reserve_exact(size - self.buf.capacity());
1316 self.buf.reserve_exact(size - self.buf.capacity());
1317 }
1317 }
1318 }
1318 }
1319
1319
1320 impl RevisionBuffer for CoreRevisionBuffer {
1320 impl RevisionBuffer for CoreRevisionBuffer {
1321 type Target = Vec<u8>;
1321 type Target = Vec<u8>;
1322
1322
1323 #[inline]
1323 #[inline]
1324 fn extend_from_slice(&mut self, slice: &[u8]) {
1324 fn extend_from_slice(&mut self, slice: &[u8]) {
1325 self.buf.extend_from_slice(slice);
1325 self.buf.extend_from_slice(slice);
1326 }
1326 }
1327
1327
1328 #[inline]
1328 #[inline]
1329 fn finish(self) -> Self::Target {
1329 fn finish(self) -> Self::Target {
1330 self.buf
1330 self.buf
1331 }
1331 }
1332 }
1332 }
1333
1333
1334 /// Calculate the hash of a revision given its data and its parents.
1334 /// Calculate the hash of a revision given its data and its parents.
1335 pub fn hash(
1335 pub fn hash(
1336 data: &[u8],
1336 data: &[u8],
1337 p1_hash: &[u8],
1337 p1_hash: &[u8],
1338 p2_hash: &[u8],
1338 p2_hash: &[u8],
1339 ) -> [u8; NODE_BYTES_LENGTH] {
1339 ) -> [u8; NODE_BYTES_LENGTH] {
1340 let mut hasher = Sha1::new();
1340 let mut hasher = Sha1::new();
1341 let (a, b) = (p1_hash, p2_hash);
1341 let (a, b) = (p1_hash, p2_hash);
1342 if a > b {
1342 if a > b {
1343 hasher.update(b);
1343 hasher.update(b);
1344 hasher.update(a);
1344 hasher.update(a);
1345 } else {
1345 } else {
1346 hasher.update(a);
1346 hasher.update(a);
1347 hasher.update(b);
1347 hasher.update(b);
1348 }
1348 }
1349 hasher.update(data);
1349 hasher.update(data);
1350 *hasher.finalize().as_ref()
1350 *hasher.finalize().as_ref()
1351 }
1351 }
General Comments 0
You need to be logged in to leave comments. Login now