Show More
@@ -32,6 +32,7 b' REVIDX_DEFAULT_FLAGS' | |||
|
32 | 32 | REVIDX_FLAGS_ORDER |
|
33 | 33 | REVIDX_RAWTEXT_CHANGING_FLAGS |
|
34 | 34 | |
|
35 | # Keep this in sync with REVIDX_KNOWN_FLAGS in rust/hg-core/src/revlog/revlog.rs | |
|
35 | 36 | REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER) |
|
36 | 37 | |
|
37 | 38 | # Store flag processors (cf. 'addflagprocessor()' to register) |
@@ -73,6 +73,89 b' fn store_path(hg_path: &HgPath, suffix: ' | |||
|
73 | 73 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); |
|
74 | 74 | |
|
75 | 75 | impl FilelogEntry<'_> { |
|
76 | /// `self.data()` can be expensive, with decompression and delta | |
|
77 | /// resolution. | |
|
78 | /// | |
|
79 | /// *Without* paying this cost, based on revlog index information | |
|
80 | /// including `RevlogEntry::uncompressed_len`: | |
|
81 | /// | |
|
82 | /// * Returns `true` if the length that `self.data().file_data().len()` | |
|
83 | /// would return is definitely **not equal** to `other_len`. | |
|
84 | /// * Returns `false` if available information is inconclusive. | |
|
85 | pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { | |
|
86 | // Relevant code that implement this behavior in Python code: | |
|
87 | // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, | |
|
88 | // revlog.size, revlog.rawsize | |
|
89 | ||
|
90 | // Let’s call `file_data_len` what would be returned by | |
|
91 | // `self.data().file_data().len()`. | |
|
92 | ||
|
93 | if self.0.is_cencored() { | |
|
94 | let file_data_len = 0; | |
|
95 | return other_len != file_data_len; | |
|
96 | } | |
|
97 | ||
|
98 | if self.0.has_length_affecting_flag_processor() { | |
|
99 | // We can’t conclude anything about `file_data_len`. | |
|
100 | return false; | |
|
101 | } | |
|
102 | ||
|
103 | // Revlog revisions (usually) have metadata for the size of | |
|
104 | // their data after decompression and delta resolution | |
|
105 | // as would be returned by `Revlog::get_rev_data`. | |
|
106 | // | |
|
107 | // For filelogs this is the file’s contents preceded by an optional | |
|
108 | // metadata block. | |
|
109 | let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { | |
|
110 | l as u64 | |
|
111 | } else { | |
|
112 | // The field was set to -1, the actual uncompressed len is unknown. | |
|
113 | // We need to decompress to say more. | |
|
114 | return false; | |
|
115 | }; | |
|
116 | // `uncompressed_len = file_data_len + optional_metadata_len`, | |
|
117 | // so `file_data_len <= uncompressed_len`. | |
|
118 | if uncompressed_len < other_len { | |
|
119 | // Transitively, `file_data_len < other_len`. | |
|
120 | // So `other_len != file_data_len` definitely. | |
|
121 | return true; | |
|
122 | } | |
|
123 | ||
|
124 | if uncompressed_len == other_len + 4 { | |
|
125 | // It’s possible that `file_data_len == other_len` with an empty | |
|
126 | // metadata block (2 start marker bytes + 2 end marker bytes). | |
|
127 | // This happens when there wouldn’t otherwise be metadata, but | |
|
128 | // the first 2 bytes of file data happen to match a start marker | |
|
129 | // and would be ambiguous. | |
|
130 | return false; | |
|
131 | } | |
|
132 | ||
|
133 | if !self.0.has_p1() { | |
|
134 | // There may or may not be copy metadata, so we can’t deduce more | |
|
135 | // about `file_data_len` without computing file data. | |
|
136 | return false; | |
|
137 | } | |
|
138 | ||
|
139 | // Filelog ancestry is not meaningful in the way changelog ancestry is. | |
|
140 | // It only provides hints to delta generation. | |
|
141 | // p1 and p2 are set to null when making a copy or rename since | |
|
142 | // contents are likely unrelatedto what might have previously existed | |
|
143 | // at the destination path. | |
|
144 | // | |
|
145 | // Conversely, since here p1 is non-null, there is no copy metadata. | |
|
146 | // Note that this reasoning may be invalidated in the presence of | |
|
147 | // merges made by some previous versions of Mercurial that | |
|
148 | // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> | |
|
149 | // and `tests/test-issue6528.t`. | |
|
150 | // | |
|
151 | // Since copy metadata is currently the only kind of metadata | |
|
152 | // kept in revlog data of filelogs, | |
|
153 | // this `FilelogEntry` does not have such metadata: | |
|
154 | let file_data_len = uncompressed_len; | |
|
155 | ||
|
156 | return file_data_len != other_len; | |
|
157 | } | |
|
158 | ||
|
76 | 159 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { |
|
77 | 160 | Ok(FilelogRevisionData(self.0.data()?.into_owned())) |
|
78 | 161 | } |
@@ -260,6 +260,10 b" impl<'a> IndexEntry<'a> {" | |||
|
260 | 260 | } |
|
261 | 261 | } |
|
262 | 262 | |
|
263 | pub fn flags(&self) -> u16 { | |
|
264 | BigEndian::read_u16(&self.bytes[6..=7]) | |
|
265 | } | |
|
266 | ||
|
263 | 267 | /// Return the compressed length of the data. |
|
264 | 268 | pub fn compressed_len(&self) -> u32 { |
|
265 | 269 | BigEndian::read_u32(&self.bytes[8..=11]) |
@@ -20,6 +20,18 b' use crate::repo::Repo;' | |||
|
20 | 20 | use crate::revlog::Revision; |
|
21 | 21 | use crate::{Node, NULL_REVISION}; |
|
22 | 22 | |
|
23 | const REVISION_FLAG_CENSORED: u16 = 1 << 15; | |
|
24 | const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14; | |
|
25 | const REVISION_FLAG_EXTSTORED: u16 = 1 << 13; | |
|
26 | const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12; | |
|
27 | ||
|
28 | // Keep this in sync with REVIDX_KNOWN_FLAGS in | |
|
29 | // mercurial/revlogutils/flagutil.py | |
|
30 | const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED | |
|
31 | | REVISION_FLAG_ELLIPSIS | |
|
32 | | REVISION_FLAG_EXTSTORED | |
|
33 | | REVISION_FLAG_HASCOPIESINFO; | |
|
34 | ||
|
23 | 35 | #[derive(derive_more::From)] |
|
24 | 36 | pub enum RevlogError { |
|
25 | 37 | InvalidRevision, |
@@ -282,6 +294,7 b' impl Revlog {' | |||
|
282 | 294 | }, |
|
283 | 295 | p1: index_entry.p1(), |
|
284 | 296 | p2: index_entry.p2(), |
|
297 | flags: index_entry.flags(), | |
|
285 | 298 | hash: *index_entry.hash(), |
|
286 | 299 | }; |
|
287 | 300 | Ok(entry) |
@@ -309,6 +322,7 b" pub struct RevlogEntry<'a> {" | |||
|
309 | 322 | base_rev_or_base_of_delta_chain: Option<Revision>, |
|
310 | 323 | p1: Revision, |
|
311 | 324 | p2: Revision, |
|
325 | flags: u16, | |
|
312 | 326 | hash: Node, |
|
313 | 327 | } |
|
314 | 328 | |
@@ -321,6 +335,20 b" impl<'a> RevlogEntry<'a> {" | |||
|
321 | 335 | u32::try_from(self.uncompressed_len).ok() |
|
322 | 336 | } |
|
323 | 337 | |
|
338 | pub fn has_p1(&self) -> bool { | |
|
339 | self.p1 != NULL_REVISION | |
|
340 | } | |
|
341 | ||
|
342 | pub fn is_cencored(&self) -> bool { | |
|
343 | (self.flags & REVISION_FLAG_CENSORED) != 0 | |
|
344 | } | |
|
345 | ||
|
346 | pub fn has_length_affecting_flag_processor(&self) -> bool { | |
|
347 | // Relevant Python code: revlog.size() | |
|
348 | // note: ELLIPSIS is known to not change the content | |
|
349 | (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 | |
|
350 | } | |
|
351 | ||
|
324 | 352 | /// The data for this entry, after resolving deltas if any. |
|
325 | 353 | pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> { |
|
326 | 354 | let mut entry = self.clone(); |
@@ -516,16 +516,16 b' fn unsure_is_modified(' | |||
|
516 | 516 | filelog.entry_for_node(entry.node_id()?).map_err(|_| { |
|
517 | 517 | HgError::corrupted("filelog missing node from manifest") |
|
518 | 518 | })?; |
|
519 | // TODO: check `fs_len` here like below, but based on | |
|
520 | // `RevlogEntry::uncompressed_len` without decompressing the full filelog | |
|
521 | // contents where possible. This is only valid if the revlog data does not | |
|
522 | // contain metadata. See how Python’s `revlog.rawsize` calls | |
|
523 | // `storageutil.filerevisioncopied`. | |
|
524 | // (Maybe also check for content-modifying flags? See `revlog.size`.) | |
|
525 | let filelog_data = filelog_entry.data()?; | |
|
526 |
let contents |
|
|
527 |
if contents |
|
|
528 |
// No need to read |
|
|
519 | if filelog_entry.file_data_len_not_equal_to(fs_len) { | |
|
520 | // No need to read file contents: | |
|
521 | // it cannot be equal if it has a different length. | |
|
522 | return Ok(true); | |
|
523 | } | |
|
524 | ||
|
525 | let p1_filelog_data = filelog_entry.data()?; | |
|
526 | let p1_contents = p1_filelog_data.file_data()?; | |
|
527 | if p1_contents.len() as u64 != fs_len { | |
|
528 | // No need to read file contents: | |
|
529 | 529 | // it cannot be equal if it has a different length. |
|
530 | 530 | return Ok(true); |
|
531 | 531 | } |
@@ -535,5 +535,5 b' fn unsure_is_modified(' | |||
|
535 | 535 | } else { |
|
536 | 536 | vfs.read(fs_path)? |
|
537 | 537 | }; |
|
538 |
Ok(contents |
|
|
538 | Ok(p1_contents != &*fs_contents) | |
|
539 | 539 | } |
@@ -193,8 +193,8 b' if the size differs, and reading the exp' | |||
|
193 | 193 | deltas where possible.) |
|
194 | 194 | |
|
195 | 195 | $ hg st |
|
196 |
M D.txt |
|
|
197 |
M b.txt |
|
|
196 | M D.txt | |
|
197 | M b.txt | |
|
198 | 198 | $ hg debugrevlogindex b.txt |
|
199 | 199 | rev linkrev nodeid p1 p2 |
|
200 | 200 | 0 2 05b806ebe5ea 000000000000 000000000000 |
@@ -212,8 +212,8 b' Dry-run the fix' | |||
|
212 | 212 | found affected revision 1 for filelog 'data/b.txt.i' |
|
213 | 213 | found affected revision 3 for filelog 'data/b.txt.i' |
|
214 | 214 | $ hg st |
|
215 |
M D.txt |
|
|
216 |
M b.txt |
|
|
215 | M D.txt | |
|
216 | M b.txt | |
|
217 | 217 | $ hg debugrevlogindex b.txt |
|
218 | 218 | rev linkrev nodeid p1 p2 |
|
219 | 219 | 0 2 05b806ebe5ea 000000000000 000000000000 |
@@ -231,8 +231,8 b' Test the --paranoid option' | |||
|
231 | 231 | found affected revision 1 for filelog 'data/b.txt.i' |
|
232 | 232 | found affected revision 3 for filelog 'data/b.txt.i' |
|
233 | 233 | $ hg st |
|
234 |
M D.txt |
|
|
235 |
M b.txt |
|
|
234 | M D.txt | |
|
235 | M b.txt | |
|
236 | 236 | $ hg debugrevlogindex b.txt |
|
237 | 237 | rev linkrev nodeid p1 p2 |
|
238 | 238 | 0 2 05b806ebe5ea 000000000000 000000000000 |
@@ -308,8 +308,8 b" only since some versions of tar don't ha" | |||
|
308 | 308 | found affected revision 1 for filelog 'b.txt' |
|
309 | 309 | found affected revision 3 for filelog 'b.txt' |
|
310 | 310 | $ hg st |
|
311 |
M D.txt |
|
|
312 |
M b.txt |
|
|
311 | M D.txt | |
|
312 | M b.txt | |
|
313 | 313 | $ hg debugrevlogindex b.txt |
|
314 | 314 | rev linkrev nodeid p1 p2 |
|
315 | 315 | 0 2 05b806ebe5ea 000000000000 000000000000 |
General Comments 0
You need to be logged in to leave comments.
Login now