Show More
@@ -1,6 +1,7 | |||||
1 | use crate::errors::HgError; |
|
1 | use crate::errors::HgError; | |
2 | use crate::repo::Repo; |
|
2 | use crate::repo::Repo; | |
3 | use crate::revlog::path_encode::path_encode; |
|
3 | use crate::revlog::path_encode::path_encode; | |
|
4 | use crate::revlog::revlog::RevlogEntry; | |||
4 | use crate::revlog::revlog::{Revlog, RevlogError}; |
|
5 | use crate::revlog::revlog::{Revlog, RevlogError}; | |
5 | use crate::revlog::NodePrefix; |
|
6 | use crate::revlog::NodePrefix; | |
6 | use crate::revlog::Revision; |
|
7 | use crate::revlog::Revision; | |
@@ -23,7 +24,7 impl Filelog { | |||||
23 | Ok(Self { revlog }) |
|
24 | Ok(Self { revlog }) | |
24 | } |
|
25 | } | |
25 |
|
26 | |||
26 |
/// The given node ID is that of the file as found in a |
|
27 | /// The given node ID is that of the file as found in a filelog, not of a | |
27 | /// changeset. |
|
28 | /// changeset. | |
28 | pub fn data_for_node( |
|
29 | pub fn data_for_node( | |
29 | &self, |
|
30 | &self, | |
@@ -33,7 +34,7 impl Filelog { | |||||
33 | self.data_for_rev(file_rev) |
|
34 | self.data_for_rev(file_rev) | |
34 | } |
|
35 | } | |
35 |
|
36 | |||
36 |
/// The given revision is that of the file as found in a |
|
37 | /// The given revision is that of the file as found in a filelog, not of a | |
37 | /// changeset. |
|
38 | /// changeset. | |
38 | pub fn data_for_rev( |
|
39 | pub fn data_for_rev( | |
39 | &self, |
|
40 | &self, | |
@@ -42,6 +43,25 impl Filelog { | |||||
42 | let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned(); |
|
43 | let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned(); | |
43 | Ok(FilelogRevisionData(data.into())) |
|
44 | Ok(FilelogRevisionData(data.into())) | |
44 | } |
|
45 | } | |
|
46 | ||||
|
47 | /// The given node ID is that of the file as found in a filelog, not of a | |||
|
48 | /// changeset. | |||
|
49 | pub fn entry_for_node( | |||
|
50 | &self, | |||
|
51 | file_node: impl Into<NodePrefix>, | |||
|
52 | ) -> Result<FilelogEntry, RevlogError> { | |||
|
53 | let file_rev = self.revlog.rev_from_node(file_node.into())?; | |||
|
54 | self.entry_for_rev(file_rev) | |||
|
55 | } | |||
|
56 | ||||
|
57 | /// The given revision is that of the file as found in a filelog, not of a | |||
|
58 | /// changeset. | |||
|
59 | pub fn entry_for_rev( | |||
|
60 | &self, | |||
|
61 | file_rev: Revision, | |||
|
62 | ) -> Result<FilelogEntry, RevlogError> { | |||
|
63 | Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) | |||
|
64 | } | |||
45 | } |
|
65 | } | |
46 |
|
66 | |||
47 | fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { |
|
67 | fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { | |
@@ -50,6 +70,14 fn store_path(hg_path: &HgPath, suffix: | |||||
50 | get_path_from_bytes(&encoded_bytes).into() |
|
70 | get_path_from_bytes(&encoded_bytes).into() | |
51 | } |
|
71 | } | |
52 |
|
72 | |||
|
73 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); | |||
|
74 | ||||
|
75 | impl FilelogEntry<'_> { | |||
|
76 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { | |||
|
77 | Ok(FilelogRevisionData(self.0.data()?.into_owned())) | |||
|
78 | } | |||
|
79 | } | |||
|
80 | ||||
53 | /// The data for one revision in a filelog, uncompressed and delta-resolved. |
|
81 | /// The data for one revision in a filelog, uncompressed and delta-resolved. | |
54 | pub struct FilelogRevisionData(Vec<u8>); |
|
82 | pub struct FilelogRevisionData(Vec<u8>); | |
55 |
|
83 |
@@ -39,9 +39,13 impl From<NodeMapError> for RevlogError | |||||
39 | } |
|
39 | } | |
40 | } |
|
40 | } | |
41 |
|
41 | |||
|
42 | fn corrupted() -> HgError { | |||
|
43 | HgError::corrupted("corrupted revlog") | |||
|
44 | } | |||
|
45 | ||||
42 | impl RevlogError { |
|
46 | impl RevlogError { | |
43 | fn corrupted() -> Self { |
|
47 | fn corrupted() -> Self { | |
44 |
RevlogError::Other( |
|
48 | RevlogError::Other(corrupted()) | |
45 | } |
|
49 | } | |
46 | } |
|
50 | } | |
47 |
|
51 | |||
@@ -191,7 +195,7 impl Revlog { | |||||
191 | if rev == NULL_REVISION { |
|
195 | if rev == NULL_REVISION { | |
192 | return Ok(Cow::Borrowed(&[])); |
|
196 | return Ok(Cow::Borrowed(&[])); | |
193 | }; |
|
197 | }; | |
194 | self.get_entry(rev)?.data() |
|
198 | Ok(self.get_entry(rev)?.data()?) | |
195 | } |
|
199 | } | |
196 |
|
200 | |||
197 | /// Check the hash of some given data against the recorded hash. |
|
201 | /// Check the hash of some given data against the recorded hash. | |
@@ -222,13 +226,13 impl Revlog { | |||||
222 | fn build_data_from_deltas( |
|
226 | fn build_data_from_deltas( | |
223 | snapshot: RevlogEntry, |
|
227 | snapshot: RevlogEntry, | |
224 | deltas: &[RevlogEntry], |
|
228 | deltas: &[RevlogEntry], | |
225 |
) -> Result<Vec<u8>, |
|
229 | ) -> Result<Vec<u8>, HgError> { | |
226 | let snapshot = snapshot.data_chunk()?; |
|
230 | let snapshot = snapshot.data_chunk()?; | |
227 | let deltas = deltas |
|
231 | let deltas = deltas | |
228 | .iter() |
|
232 | .iter() | |
229 | .rev() |
|
233 | .rev() | |
230 | .map(RevlogEntry::data_chunk) |
|
234 | .map(RevlogEntry::data_chunk) | |
231 |
.collect::<Result<Vec< |
|
235 | .collect::<Result<Vec<_>, _>>()?; | |
232 | let patches: Vec<_> = |
|
236 | let patches: Vec<_> = | |
233 | deltas.iter().map(|d| patch::PatchList::new(d)).collect(); |
|
237 | deltas.iter().map(|d| patch::PatchList::new(d)).collect(); | |
234 | let patch = patch::fold_patch_lists(&patches); |
|
238 | let patch = patch::fold_patch_lists(&patches); | |
@@ -246,7 +250,10 impl Revlog { | |||||
246 | } |
|
250 | } | |
247 |
|
251 | |||
248 | /// Get an entry of the revlog. |
|
252 | /// Get an entry of the revlog. | |
249 | fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> { |
|
253 | pub fn get_entry( | |
|
254 | &self, | |||
|
255 | rev: Revision, | |||
|
256 | ) -> Result<RevlogEntry, RevlogError> { | |||
250 | let index_entry = self |
|
257 | let index_entry = self | |
251 | .index |
|
258 | .index | |
252 | .get_entry(rev) |
|
259 | .get_entry(rev) | |
@@ -281,8 +288,8 impl Revlog { | |||||
281 | fn get_entry_internal( |
|
288 | fn get_entry_internal( | |
282 | &self, |
|
289 | &self, | |
283 | rev: Revision, |
|
290 | rev: Revision, | |
284 |
) -> Result<RevlogEntry, |
|
291 | ) -> Result<RevlogEntry, HgError> { | |
285 |
return self.get_entry(rev).map_err(|_| |
|
292 | return self.get_entry(rev).map_err(|_| corrupted()); | |
286 | } |
|
293 | } | |
287 | } |
|
294 | } | |
288 |
|
295 | |||
@@ -304,7 +311,7 impl<'a> RevlogEntry<'a> { | |||||
304 | } |
|
311 | } | |
305 |
|
312 | |||
306 | /// The data for this entry, after resolving deltas if any. |
|
313 | /// The data for this entry, after resolving deltas if any. | |
307 |
pub fn data(&self) -> Result<Cow<'a, [u8]>, |
|
314 | pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> { | |
308 | let mut entry = self.clone(); |
|
315 | let mut entry = self.clone(); | |
309 | let mut delta_chain = vec![]; |
|
316 | let mut delta_chain = vec![]; | |
310 |
|
317 | |||
@@ -328,7 +335,7 impl<'a> RevlogEntry<'a> { | |||||
328 | .revlog |
|
335 | .revlog | |
329 | .index |
|
336 | .index | |
330 | .get_entry(self.rev) |
|
337 | .get_entry(self.rev) | |
331 |
.ok_or( |
|
338 | .ok_or_else(corrupted)?; | |
332 |
|
339 | |||
333 | let data = if delta_chain.is_empty() { |
|
340 | let data = if delta_chain.is_empty() { | |
334 | entry.data_chunk()? |
|
341 | entry.data_chunk()? | |
@@ -344,13 +351,13 impl<'a> RevlogEntry<'a> { | |||||
344 | ) { |
|
351 | ) { | |
345 | Ok(data) |
|
352 | Ok(data) | |
346 | } else { |
|
353 | } else { | |
347 |
Err( |
|
354 | Err(corrupted()) | |
348 | } |
|
355 | } | |
349 | } |
|
356 | } | |
350 |
|
357 | |||
351 | /// Extract the data contained in the entry. |
|
358 | /// Extract the data contained in the entry. | |
352 | /// This may be a delta. (See `is_delta`.) |
|
359 | /// This may be a delta. (See `is_delta`.) | |
353 |
fn data_chunk(&self) -> Result<Cow<'a, [u8]>, |
|
360 | fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> { | |
354 | if self.bytes.is_empty() { |
|
361 | if self.bytes.is_empty() { | |
355 | return Ok(Cow::Borrowed(&[])); |
|
362 | return Ok(Cow::Borrowed(&[])); | |
356 | } |
|
363 | } | |
@@ -365,39 +372,35 impl<'a> RevlogEntry<'a> { | |||||
365 | // zstd data. |
|
372 | // zstd data. | |
366 | b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), |
|
373 | b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), | |
367 | // A proper new format should have had a repo/store requirement. |
|
374 | // A proper new format should have had a repo/store requirement. | |
368 |
_format_type => Err( |
|
375 | _format_type => Err(corrupted()), | |
369 | } |
|
376 | } | |
370 | } |
|
377 | } | |
371 |
|
378 | |||
372 |
fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, |
|
379 | fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> { | |
373 | let mut decoder = ZlibDecoder::new(self.bytes); |
|
380 | let mut decoder = ZlibDecoder::new(self.bytes); | |
374 | if self.is_delta() { |
|
381 | if self.is_delta() { | |
375 | let mut buf = Vec::with_capacity(self.compressed_len); |
|
382 | let mut buf = Vec::with_capacity(self.compressed_len); | |
376 | decoder |
|
383 | decoder.read_to_end(&mut buf).map_err(|_| corrupted())?; | |
377 | .read_to_end(&mut buf) |
|
|||
378 | .map_err(|_| RevlogError::corrupted())?; |
|
|||
379 | Ok(buf) |
|
384 | Ok(buf) | |
380 | } else { |
|
385 | } else { | |
381 | let mut buf = vec![0; self.uncompressed_len]; |
|
386 | let mut buf = vec![0; self.uncompressed_len]; | |
382 | decoder |
|
387 | decoder.read_exact(&mut buf).map_err(|_| corrupted())?; | |
383 | .read_exact(&mut buf) |
|
|||
384 | .map_err(|_| RevlogError::corrupted())?; |
|
|||
385 | Ok(buf) |
|
388 | Ok(buf) | |
386 | } |
|
389 | } | |
387 | } |
|
390 | } | |
388 |
|
391 | |||
389 |
fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, |
|
392 | fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> { | |
390 | if self.is_delta() { |
|
393 | if self.is_delta() { | |
391 | let mut buf = Vec::with_capacity(self.compressed_len); |
|
394 | let mut buf = Vec::with_capacity(self.compressed_len); | |
392 | zstd::stream::copy_decode(self.bytes, &mut buf) |
|
395 | zstd::stream::copy_decode(self.bytes, &mut buf) | |
393 |
.map_err(|_| |
|
396 | .map_err(|_| corrupted())?; | |
394 | Ok(buf) |
|
397 | Ok(buf) | |
395 | } else { |
|
398 | } else { | |
396 | let mut buf = vec![0; self.uncompressed_len]; |
|
399 | let mut buf = vec![0; self.uncompressed_len]; | |
397 | let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf) |
|
400 | let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf) | |
398 |
.map_err(|_| |
|
401 | .map_err(|_| corrupted())?; | |
399 | if len != self.uncompressed_len { |
|
402 | if len != self.uncompressed_len { | |
400 |
Err( |
|
403 | Err(corrupted()) | |
401 | } else { |
|
404 | } else { | |
402 | Ok(buf) |
|
405 | Ok(buf) | |
403 | } |
|
406 | } |
@@ -512,17 +512,18 fn unsure_is_modified( | |||||
512 | } |
|
512 | } | |
513 | let filelog = repo.filelog(hg_path)?; |
|
513 | let filelog = repo.filelog(hg_path)?; | |
514 | let fs_len = fs_metadata.len(); |
|
514 | let fs_len = fs_metadata.len(); | |
|
515 | let filelog_entry = | |||
|
516 | filelog.entry_for_node(entry.node_id()?).map_err(|_| { | |||
|
517 | HgError::corrupted("filelog missing node from manifest") | |||
|
518 | })?; | |||
515 | // TODO: check `fs_len` here like below, but based on |
|
519 | // TODO: check `fs_len` here like below, but based on | |
516 | // `RevlogEntry::uncompressed_len` without decompressing the full filelog |
|
520 | // `RevlogEntry::uncompressed_len` without decompressing the full filelog | |
517 | // contents where possible. This is only valid if the revlog data does not |
|
521 | // contents where possible. This is only valid if the revlog data does not | |
518 | // contain metadata. See how Python’s `revlog.rawsize` calls |
|
522 | // contain metadata. See how Python’s `revlog.rawsize` calls | |
519 | // `storageutil.filerevisioncopied`. |
|
523 | // `storageutil.filerevisioncopied`. | |
520 | // (Maybe also check for content-modifying flags? See `revlog.size`.) |
|
524 | // (Maybe also check for content-modifying flags? See `revlog.size`.) | |
521 |
let filelog_ |
|
525 | let filelog_data = filelog_entry.data()?; | |
522 | filelog.data_for_node(entry.node_id()?).map_err(|_| { |
|
526 | let contents_in_p1 = filelog_data.file_data()?; | |
523 | HgError::corrupted("filelog missing node from manifest") |
|
|||
524 | })?; |
|
|||
525 | let contents_in_p1 = filelog_entry.file_data()?; |
|
|||
526 | if contents_in_p1.len() as u64 != fs_len { |
|
527 | if contents_in_p1.len() as u64 != fs_len { | |
527 | // No need to read the file contents: |
|
528 | // No need to read the file contents: | |
528 | // it cannot be equal if it has a different length. |
|
529 | // it cannot be equal if it has a different length. |
General Comments 0
You need to be logged in to leave comments.
Login now