Show More
@@ -32,6 +32,7 b' REVIDX_DEFAULT_FLAGS' | |||||
32 | REVIDX_FLAGS_ORDER |
|
32 | REVIDX_FLAGS_ORDER | |
33 | REVIDX_RAWTEXT_CHANGING_FLAGS |
|
33 | REVIDX_RAWTEXT_CHANGING_FLAGS | |
34 |
|
34 | |||
|
35 | # Keep this in sync with REVIDX_KNOWN_FLAGS in rust/hg-core/src/revlog/revlog.rs | |||
35 | REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER) |
|
36 | REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER) | |
36 |
|
37 | |||
37 | # Store flag processors (cf. 'addflagprocessor()' to register) |
|
38 | # Store flag processors (cf. 'addflagprocessor()' to register) |
@@ -73,6 +73,89 b' fn store_path(hg_path: &HgPath, suffix: ' | |||||
73 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); |
|
73 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); | |
74 |
|
74 | |||
75 | impl FilelogEntry<'_> { |
|
75 | impl FilelogEntry<'_> { | |
|
76 | /// `self.data()` can be expensive, with decompression and delta | |||
|
77 | /// resolution. | |||
|
78 | /// | |||
|
79 | /// *Without* paying this cost, based on revlog index information | |||
|
80 | /// including `RevlogEntry::uncompressed_len`: | |||
|
81 | /// | |||
|
82 | /// * Returns `true` if the length that `self.data().file_data().len()` | |||
|
83 | /// would return is definitely **not equal** to `other_len`. | |||
|
84 | /// * Returns `false` if available information is inconclusive. | |||
|
85 | pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { | |||
|
86 | // Relevant code that implement this behavior in Python code: | |||
|
87 | // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, | |||
|
88 | // revlog.size, revlog.rawsize | |||
|
89 | ||||
|
90 | // Letβs call `file_data_len` what would be returned by | |||
|
91 | // `self.data().file_data().len()`. | |||
|
92 | ||||
|
93 | if self.0.is_cencored() { | |||
|
94 | let file_data_len = 0; | |||
|
95 | return other_len != file_data_len; | |||
|
96 | } | |||
|
97 | ||||
|
98 | if self.0.has_length_affecting_flag_processor() { | |||
|
99 | // We canβt conclude anything about `file_data_len`. | |||
|
100 | return false; | |||
|
101 | } | |||
|
102 | ||||
|
103 | // Revlog revisions (usually) have metadata for the size of | |||
|
104 | // their data after decompression and delta resolution | |||
|
105 | // as would be returned by `Revlog::get_rev_data`. | |||
|
106 | // | |||
|
107 | // For filelogs this is the fileβs contents preceded by an optional | |||
|
108 | // metadata block. | |||
|
109 | let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { | |||
|
110 | l as u64 | |||
|
111 | } else { | |||
|
112 | // The field was set to -1, the actual uncompressed len is unknown. | |||
|
113 | // We need to decompress to say more. | |||
|
114 | return false; | |||
|
115 | }; | |||
|
116 | // `uncompressed_len = file_data_len + optional_metadata_len`, | |||
|
117 | // so `file_data_len <= uncompressed_len`. | |||
|
118 | if uncompressed_len < other_len { | |||
|
119 | // Transitively, `file_data_len < other_len`. | |||
|
120 | // So `other_len != file_data_len` definitely. | |||
|
121 | return true; | |||
|
122 | } | |||
|
123 | ||||
|
124 | if uncompressed_len == other_len + 4 { | |||
|
125 | // Itβs possible that `file_data_len == other_len` with an empty | |||
|
126 | // metadata block (2 start marker bytes + 2 end marker bytes). | |||
|
127 | // This happens when there wouldnβt otherwise be metadata, but | |||
|
128 | // the first 2 bytes of file data happen to match a start marker | |||
|
129 | // and would be ambiguous. | |||
|
130 | return false; | |||
|
131 | } | |||
|
132 | ||||
|
133 | if !self.0.has_p1() { | |||
|
134 | // There may or may not be copy metadata, so we canβt deduce more | |||
|
135 | // about `file_data_len` without computing file data. | |||
|
136 | return false; | |||
|
137 | } | |||
|
138 | ||||
|
139 | // Filelog ancestry is not meaningful in the way changelog ancestry is. | |||
|
140 | // It only provides hints to delta generation. | |||
|
141 | // p1 and p2 are set to null when making a copy or rename since | |||
|
142 | // contents are likely unrelatedto what might have previously existed | |||
|
143 | // at the destination path. | |||
|
144 | // | |||
|
145 | // Conversely, since here p1 is non-null, there is no copy metadata. | |||
|
146 | // Note that this reasoning may be invalidated in the presence of | |||
|
147 | // merges made by some previous versions of Mercurial that | |||
|
148 | // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> | |||
|
149 | // and `tests/test-issue6528.t`. | |||
|
150 | // | |||
|
151 | // Since copy metadata is currently the only kind of metadata | |||
|
152 | // kept in revlog data of filelogs, | |||
|
153 | // this `FilelogEntry` does not have such metadata: | |||
|
154 | let file_data_len = uncompressed_len; | |||
|
155 | ||||
|
156 | return file_data_len != other_len; | |||
|
157 | } | |||
|
158 | ||||
76 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { |
|
159 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { | |
77 | Ok(FilelogRevisionData(self.0.data()?.into_owned())) |
|
160 | Ok(FilelogRevisionData(self.0.data()?.into_owned())) | |
78 | } |
|
161 | } |
@@ -260,6 +260,10 b" impl<'a> IndexEntry<'a> {" | |||||
260 | } |
|
260 | } | |
261 | } |
|
261 | } | |
262 |
|
262 | |||
|
263 | pub fn flags(&self) -> u16 { | |||
|
264 | BigEndian::read_u16(&self.bytes[6..=7]) | |||
|
265 | } | |||
|
266 | ||||
263 | /// Return the compressed length of the data. |
|
267 | /// Return the compressed length of the data. | |
264 | pub fn compressed_len(&self) -> u32 { |
|
268 | pub fn compressed_len(&self) -> u32 { | |
265 | BigEndian::read_u32(&self.bytes[8..=11]) |
|
269 | BigEndian::read_u32(&self.bytes[8..=11]) |
@@ -20,6 +20,18 b' use crate::repo::Repo;' | |||||
20 | use crate::revlog::Revision; |
|
20 | use crate::revlog::Revision; | |
21 | use crate::{Node, NULL_REVISION}; |
|
21 | use crate::{Node, NULL_REVISION}; | |
22 |
|
22 | |||
|
23 | const REVISION_FLAG_CENSORED: u16 = 1 << 15; | |||
|
24 | const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14; | |||
|
25 | const REVISION_FLAG_EXTSTORED: u16 = 1 << 13; | |||
|
26 | const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12; | |||
|
27 | ||||
|
28 | // Keep this in sync with REVIDX_KNOWN_FLAGS in | |||
|
29 | // mercurial/revlogutils/flagutil.py | |||
|
30 | const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED | |||
|
31 | | REVISION_FLAG_ELLIPSIS | |||
|
32 | | REVISION_FLAG_EXTSTORED | |||
|
33 | | REVISION_FLAG_HASCOPIESINFO; | |||
|
34 | ||||
23 | #[derive(derive_more::From)] |
|
35 | #[derive(derive_more::From)] | |
24 | pub enum RevlogError { |
|
36 | pub enum RevlogError { | |
25 | InvalidRevision, |
|
37 | InvalidRevision, | |
@@ -282,6 +294,7 b' impl Revlog {' | |||||
282 | }, |
|
294 | }, | |
283 | p1: index_entry.p1(), |
|
295 | p1: index_entry.p1(), | |
284 | p2: index_entry.p2(), |
|
296 | p2: index_entry.p2(), | |
|
297 | flags: index_entry.flags(), | |||
285 | hash: *index_entry.hash(), |
|
298 | hash: *index_entry.hash(), | |
286 | }; |
|
299 | }; | |
287 | Ok(entry) |
|
300 | Ok(entry) | |
@@ -309,6 +322,7 b" pub struct RevlogEntry<'a> {" | |||||
309 | base_rev_or_base_of_delta_chain: Option<Revision>, |
|
322 | base_rev_or_base_of_delta_chain: Option<Revision>, | |
310 | p1: Revision, |
|
323 | p1: Revision, | |
311 | p2: Revision, |
|
324 | p2: Revision, | |
|
325 | flags: u16, | |||
312 | hash: Node, |
|
326 | hash: Node, | |
313 | } |
|
327 | } | |
314 |
|
328 | |||
@@ -321,6 +335,20 b" impl<'a> RevlogEntry<'a> {" | |||||
321 | u32::try_from(self.uncompressed_len).ok() |
|
335 | u32::try_from(self.uncompressed_len).ok() | |
322 | } |
|
336 | } | |
323 |
|
337 | |||
|
338 | pub fn has_p1(&self) -> bool { | |||
|
339 | self.p1 != NULL_REVISION | |||
|
340 | } | |||
|
341 | ||||
|
342 | pub fn is_cencored(&self) -> bool { | |||
|
343 | (self.flags & REVISION_FLAG_CENSORED) != 0 | |||
|
344 | } | |||
|
345 | ||||
|
346 | pub fn has_length_affecting_flag_processor(&self) -> bool { | |||
|
347 | // Relevant Python code: revlog.size() | |||
|
348 | // note: ELLIPSIS is known to not change the content | |||
|
349 | (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 | |||
|
350 | } | |||
|
351 | ||||
324 | /// The data for this entry, after resolving deltas if any. |
|
352 | /// The data for this entry, after resolving deltas if any. | |
325 | pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> { |
|
353 | pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> { | |
326 | let mut entry = self.clone(); |
|
354 | let mut entry = self.clone(); |
@@ -516,16 +516,16 b' fn unsure_is_modified(' | |||||
516 | filelog.entry_for_node(entry.node_id()?).map_err(|_| { |
|
516 | filelog.entry_for_node(entry.node_id()?).map_err(|_| { | |
517 | HgError::corrupted("filelog missing node from manifest") |
|
517 | HgError::corrupted("filelog missing node from manifest") | |
518 | })?; |
|
518 | })?; | |
519 | // TODO: check `fs_len` here like below, but based on |
|
519 | if filelog_entry.file_data_len_not_equal_to(fs_len) { | |
520 | // `RevlogEntry::uncompressed_len` without decompressing the full filelog |
|
520 | // No need to read file contents: | |
521 | // contents where possible. This is only valid if the revlog data does not |
|
521 | // it cannot be equal if it has a different length. | |
522 | // contain metadata. See how Pythonβs `revlog.rawsize` calls |
|
522 | return Ok(true); | |
523 | // `storageutil.filerevisioncopied`. |
|
523 | } | |
524 | // (Maybe also check for content-modifying flags? See `revlog.size`.) |
|
524 | ||
525 | let filelog_data = filelog_entry.data()?; |
|
525 | let p1_filelog_data = filelog_entry.data()?; | |
526 |
let contents |
|
526 | let p1_contents = p1_filelog_data.file_data()?; | |
527 |
if contents |
|
527 | if p1_contents.len() as u64 != fs_len { | |
528 |
// No need to read |
|
528 | // No need to read file contents: | |
529 | // it cannot be equal if it has a different length. |
|
529 | // it cannot be equal if it has a different length. | |
530 | return Ok(true); |
|
530 | return Ok(true); | |
531 | } |
|
531 | } | |
@@ -535,5 +535,5 b' fn unsure_is_modified(' | |||||
535 | } else { |
|
535 | } else { | |
536 | vfs.read(fs_path)? |
|
536 | vfs.read(fs_path)? | |
537 | }; |
|
537 | }; | |
538 |
Ok(contents |
|
538 | Ok(p1_contents != &*fs_contents) | |
539 | } |
|
539 | } |
@@ -193,8 +193,8 b' if the size differs, and reading the exp' | |||||
193 | deltas where possible.) |
|
193 | deltas where possible.) | |
194 |
|
194 | |||
195 | $ hg st |
|
195 | $ hg st | |
196 |
M D.txt |
|
196 | M D.txt | |
197 |
M b.txt |
|
197 | M b.txt | |
198 | $ hg debugrevlogindex b.txt |
|
198 | $ hg debugrevlogindex b.txt | |
199 | rev linkrev nodeid p1 p2 |
|
199 | rev linkrev nodeid p1 p2 | |
200 | 0 2 05b806ebe5ea 000000000000 000000000000 |
|
200 | 0 2 05b806ebe5ea 000000000000 000000000000 | |
@@ -212,8 +212,8 b' Dry-run the fix' | |||||
212 | found affected revision 1 for filelog 'data/b.txt.i' |
|
212 | found affected revision 1 for filelog 'data/b.txt.i' | |
213 | found affected revision 3 for filelog 'data/b.txt.i' |
|
213 | found affected revision 3 for filelog 'data/b.txt.i' | |
214 | $ hg st |
|
214 | $ hg st | |
215 |
M D.txt |
|
215 | M D.txt | |
216 |
M b.txt |
|
216 | M b.txt | |
217 | $ hg debugrevlogindex b.txt |
|
217 | $ hg debugrevlogindex b.txt | |
218 | rev linkrev nodeid p1 p2 |
|
218 | rev linkrev nodeid p1 p2 | |
219 | 0 2 05b806ebe5ea 000000000000 000000000000 |
|
219 | 0 2 05b806ebe5ea 000000000000 000000000000 | |
@@ -231,8 +231,8 b' Test the --paranoid option' | |||||
231 | found affected revision 1 for filelog 'data/b.txt.i' |
|
231 | found affected revision 1 for filelog 'data/b.txt.i' | |
232 | found affected revision 3 for filelog 'data/b.txt.i' |
|
232 | found affected revision 3 for filelog 'data/b.txt.i' | |
233 | $ hg st |
|
233 | $ hg st | |
234 |
M D.txt |
|
234 | M D.txt | |
235 |
M b.txt |
|
235 | M b.txt | |
236 | $ hg debugrevlogindex b.txt |
|
236 | $ hg debugrevlogindex b.txt | |
237 | rev linkrev nodeid p1 p2 |
|
237 | rev linkrev nodeid p1 p2 | |
238 | 0 2 05b806ebe5ea 000000000000 000000000000 |
|
238 | 0 2 05b806ebe5ea 000000000000 000000000000 | |
@@ -308,8 +308,8 b" only since some versions of tar don't ha" | |||||
308 | found affected revision 1 for filelog 'b.txt' |
|
308 | found affected revision 1 for filelog 'b.txt' | |
309 | found affected revision 3 for filelog 'b.txt' |
|
309 | found affected revision 3 for filelog 'b.txt' | |
310 | $ hg st |
|
310 | $ hg st | |
311 |
M D.txt |
|
311 | M D.txt | |
312 |
M b.txt |
|
312 | M b.txt | |
313 | $ hg debugrevlogindex b.txt |
|
313 | $ hg debugrevlogindex b.txt | |
314 | rev linkrev nodeid p1 p2 |
|
314 | rev linkrev nodeid p1 p2 | |
315 | 0 2 05b806ebe5ea 000000000000 000000000000 |
|
315 | 0 2 05b806ebe5ea 000000000000 000000000000 |
General Comments 0
You need to be logged in to leave comments.
Login now