##// END OF EJS Templates
rhg: desambiguate status without decompressing filelog if possible...
Simon Sapin -
r49378:e91aa800 default
parent child Browse files
Show More
@@ -32,6 +32,7 b' REVIDX_DEFAULT_FLAGS'
32 32 REVIDX_FLAGS_ORDER
33 33 REVIDX_RAWTEXT_CHANGING_FLAGS
34 34
35 # Keep this in sync with REVIDX_KNOWN_FLAGS in rust/hg-core/src/revlog/revlog.rs
35 36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
36 37
37 38 # Store flag processors (cf. 'addflagprocessor()' to register)
@@ -73,6 +73,89 b' fn store_path(hg_path: &HgPath, suffix: '
73 73 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
74 74
75 75 impl FilelogEntry<'_> {
76 /// `self.data()` can be expensive, with decompression and delta
77 /// resolution.
78 ///
79 /// *Without* paying this cost, based on revlog index information
80 /// including `RevlogEntry::uncompressed_len`:
81 ///
82 /// * Returns `true` if the length that `self.data().file_data().len()`
83 /// would return is definitely **not equal** to `other_len`.
84 /// * Returns `false` if available information is inconclusive.
85 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
86 // Relevant code that implement this behavior in Python code:
87 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
88 // revlog.size, revlog.rawsize
89
90 // Let’s call `file_data_len` what would be returned by
91 // `self.data().file_data().len()`.
92
93 if self.0.is_cencored() {
94 let file_data_len = 0;
95 return other_len != file_data_len;
96 }
97
98 if self.0.has_length_affecting_flag_processor() {
99 // We can’t conclude anything about `file_data_len`.
100 return false;
101 }
102
103 // Revlog revisions (usually) have metadata for the size of
104 // their data after decompression and delta resolution
105 // as would be returned by `Revlog::get_rev_data`.
106 //
107 // For filelogs this is the file’s contents preceded by an optional
108 // metadata block.
109 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
110 l as u64
111 } else {
112 // The field was set to -1, the actual uncompressed len is unknown.
113 // We need to decompress to say more.
114 return false;
115 };
116 // `uncompressed_len = file_data_len + optional_metadata_len`,
117 // so `file_data_len <= uncompressed_len`.
118 if uncompressed_len < other_len {
119 // Transitively, `file_data_len < other_len`.
120 // So `other_len != file_data_len` definitely.
121 return true;
122 }
123
124 if uncompressed_len == other_len + 4 {
125 // It’s possible that `file_data_len == other_len` with an empty
126 // metadata block (2 start marker bytes + 2 end marker bytes).
127 // This happens when there wouldn’t otherwise be metadata, but
128 // the first 2 bytes of file data happen to match a start marker
129 // and would be ambiguous.
130 return false;
131 }
132
133 if !self.0.has_p1() {
134 // There may or may not be copy metadata, so we can’t deduce more
135 // about `file_data_len` without computing file data.
136 return false;
137 }
138
139 // Filelog ancestry is not meaningful in the way changelog ancestry is.
140 // It only provides hints to delta generation.
141 // p1 and p2 are set to null when making a copy or rename since
142 // contents are likely unrelatedto what might have previously existed
143 // at the destination path.
144 //
145 // Conversely, since here p1 is non-null, there is no copy metadata.
146 // Note that this reasoning may be invalidated in the presence of
147 // merges made by some previous versions of Mercurial that
148 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
149 // and `tests/test-issue6528.t`.
150 //
151 // Since copy metadata is currently the only kind of metadata
152 // kept in revlog data of filelogs,
153 // this `FilelogEntry` does not have such metadata:
154 let file_data_len = uncompressed_len;
155
156 return file_data_len != other_len;
157 }
158
76 159 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
77 160 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
78 161 }
@@ -260,6 +260,10 b" impl<'a> IndexEntry<'a> {"
260 260 }
261 261 }
262 262
263 pub fn flags(&self) -> u16 {
264 BigEndian::read_u16(&self.bytes[6..=7])
265 }
266
263 267 /// Return the compressed length of the data.
264 268 pub fn compressed_len(&self) -> u32 {
265 269 BigEndian::read_u32(&self.bytes[8..=11])
@@ -20,6 +20,18 b' use crate::repo::Repo;'
20 20 use crate::revlog::Revision;
21 21 use crate::{Node, NULL_REVISION};
22 22
23 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
24 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
25 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
26 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
27
28 // Keep this in sync with REVIDX_KNOWN_FLAGS in
29 // mercurial/revlogutils/flagutil.py
30 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
31 | REVISION_FLAG_ELLIPSIS
32 | REVISION_FLAG_EXTSTORED
33 | REVISION_FLAG_HASCOPIESINFO;
34
23 35 #[derive(derive_more::From)]
24 36 pub enum RevlogError {
25 37 InvalidRevision,
@@ -282,6 +294,7 b' impl Revlog {'
282 294 },
283 295 p1: index_entry.p1(),
284 296 p2: index_entry.p2(),
297 flags: index_entry.flags(),
285 298 hash: *index_entry.hash(),
286 299 };
287 300 Ok(entry)
@@ -309,6 +322,7 b" pub struct RevlogEntry<'a> {"
309 322 base_rev_or_base_of_delta_chain: Option<Revision>,
310 323 p1: Revision,
311 324 p2: Revision,
325 flags: u16,
312 326 hash: Node,
313 327 }
314 328
@@ -321,6 +335,20 b" impl<'a> RevlogEntry<'a> {"
321 335 u32::try_from(self.uncompressed_len).ok()
322 336 }
323 337
338 pub fn has_p1(&self) -> bool {
339 self.p1 != NULL_REVISION
340 }
341
342 pub fn is_cencored(&self) -> bool {
343 (self.flags & REVISION_FLAG_CENSORED) != 0
344 }
345
346 pub fn has_length_affecting_flag_processor(&self) -> bool {
347 // Relevant Python code: revlog.size()
348 // note: ELLIPSIS is known to not change the content
349 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
350 }
351
324 352 /// The data for this entry, after resolving deltas if any.
325 353 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
326 354 let mut entry = self.clone();
@@ -516,16 +516,16 b' fn unsure_is_modified('
516 516 filelog.entry_for_node(entry.node_id()?).map_err(|_| {
517 517 HgError::corrupted("filelog missing node from manifest")
518 518 })?;
519 // TODO: check `fs_len` here like below, but based on
520 // `RevlogEntry::uncompressed_len` without decompressing the full filelog
521 // contents where possible. This is only valid if the revlog data does not
522 // contain metadata. See how Python’s `revlog.rawsize` calls
523 // `storageutil.filerevisioncopied`.
524 // (Maybe also check for content-modifying flags? See `revlog.size`.)
525 let filelog_data = filelog_entry.data()?;
526 let contents_in_p1 = filelog_data.file_data()?;
527 if contents_in_p1.len() as u64 != fs_len {
528 // No need to read the file contents:
519 if filelog_entry.file_data_len_not_equal_to(fs_len) {
520 // No need to read file contents:
521 // it cannot be equal if it has a different length.
522 return Ok(true);
523 }
524
525 let p1_filelog_data = filelog_entry.data()?;
526 let p1_contents = p1_filelog_data.file_data()?;
527 if p1_contents.len() as u64 != fs_len {
528 // No need to read file contents:
529 529 // it cannot be equal if it has a different length.
530 530 return Ok(true);
531 531 }
@@ -535,5 +535,5 b' fn unsure_is_modified('
535 535 } else {
536 536 vfs.read(fs_path)?
537 537 };
538 Ok(contents_in_p1 != &*fs_contents)
538 Ok(p1_contents != &*fs_contents)
539 539 }
@@ -193,8 +193,8 b' if the size differs, and reading the exp'
193 193 deltas where possible.)
194 194
195 195 $ hg st
196 M D.txt (no-rhg !)
197 M b.txt (no-rhg !)
196 M D.txt
197 M b.txt
198 198 $ hg debugrevlogindex b.txt
199 199 rev linkrev nodeid p1 p2
200 200 0 2 05b806ebe5ea 000000000000 000000000000
@@ -212,8 +212,8 b' Dry-run the fix'
212 212 found affected revision 1 for filelog 'data/b.txt.i'
213 213 found affected revision 3 for filelog 'data/b.txt.i'
214 214 $ hg st
215 M D.txt (no-rhg !)
216 M b.txt (no-rhg !)
215 M D.txt
216 M b.txt
217 217 $ hg debugrevlogindex b.txt
218 218 rev linkrev nodeid p1 p2
219 219 0 2 05b806ebe5ea 000000000000 000000000000
@@ -231,8 +231,8 b' Test the --paranoid option'
231 231 found affected revision 1 for filelog 'data/b.txt.i'
232 232 found affected revision 3 for filelog 'data/b.txt.i'
233 233 $ hg st
234 M D.txt (no-rhg !)
235 M b.txt (no-rhg !)
234 M D.txt
235 M b.txt
236 236 $ hg debugrevlogindex b.txt
237 237 rev linkrev nodeid p1 p2
238 238 0 2 05b806ebe5ea 000000000000 000000000000
@@ -308,8 +308,8 b" only since some versions of tar don't ha"
308 308 found affected revision 1 for filelog 'b.txt'
309 309 found affected revision 3 for filelog 'b.txt'
310 310 $ hg st
311 M D.txt (no-rhg !)
312 M b.txt (no-rhg !)
311 M D.txt
312 M b.txt
313 313 $ hg debugrevlogindex b.txt
314 314 rev linkrev nodeid p1 p2
315 315 0 2 05b806ebe5ea 000000000000 000000000000
General Comments 0
You need to be logged in to leave comments. Login now