##// END OF EJS Templates
rhg: desambiguate status without decompressing filelog if possible...
Simon Sapin -
r49378:e91aa800 default
parent child Browse files
Show More
@@ -32,6 +32,7 b' REVIDX_DEFAULT_FLAGS'
32 REVIDX_FLAGS_ORDER
32 REVIDX_FLAGS_ORDER
33 REVIDX_RAWTEXT_CHANGING_FLAGS
33 REVIDX_RAWTEXT_CHANGING_FLAGS
34
34
35 # Keep this in sync with REVIDX_KNOWN_FLAGS in rust/hg-core/src/revlog/revlog.rs
35 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
36
37
37 # Store flag processors (cf. 'addflagprocessor()' to register)
38 # Store flag processors (cf. 'addflagprocessor()' to register)
@@ -73,6 +73,89 b' fn store_path(hg_path: &HgPath, suffix: '
73 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
73 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
74
74
75 impl FilelogEntry<'_> {
75 impl FilelogEntry<'_> {
76 /// `self.data()` can be expensive, with decompression and delta
77 /// resolution.
78 ///
79 /// *Without* paying this cost, based on revlog index information
80 /// including `RevlogEntry::uncompressed_len`:
81 ///
82 /// * Returns `true` if the length that `self.data().file_data().len()`
83 /// would return is definitely **not equal** to `other_len`.
84 /// * Returns `false` if available information is inconclusive.
85 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
86 // Relevant code that implement this behavior in Python code:
87 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
88 // revlog.size, revlog.rawsize
89
90 // Let’s call `file_data_len` what would be returned by
91 // `self.data().file_data().len()`.
92
93 if self.0.is_cencored() {
94 let file_data_len = 0;
95 return other_len != file_data_len;
96 }
97
98 if self.0.has_length_affecting_flag_processor() {
99 // We can’t conclude anything about `file_data_len`.
100 return false;
101 }
102
103 // Revlog revisions (usually) have metadata for the size of
104 // their data after decompression and delta resolution
105 // as would be returned by `Revlog::get_rev_data`.
106 //
107 // For filelogs this is the file’s contents preceded by an optional
108 // metadata block.
109 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
110 l as u64
111 } else {
112 // The field was set to -1, the actual uncompressed len is unknown.
113 // We need to decompress to say more.
114 return false;
115 };
116 // `uncompressed_len = file_data_len + optional_metadata_len`,
117 // so `file_data_len <= uncompressed_len`.
118 if uncompressed_len < other_len {
119 // Transitively, `file_data_len < other_len`.
120 // So `other_len != file_data_len` definitely.
121 return true;
122 }
123
124 if uncompressed_len == other_len + 4 {
125 // It’s possible that `file_data_len == other_len` with an empty
126 // metadata block (2 start marker bytes + 2 end marker bytes).
127 // This happens when there wouldn’t otherwise be metadata, but
128 // the first 2 bytes of file data happen to match a start marker
129 // and would be ambiguous.
130 return false;
131 }
132
133 if !self.0.has_p1() {
134 // There may or may not be copy metadata, so we can’t deduce more
135 // about `file_data_len` without computing file data.
136 return false;
137 }
138
139 // Filelog ancestry is not meaningful in the way changelog ancestry is.
140 // It only provides hints to delta generation.
141 // p1 and p2 are set to null when making a copy or rename since
142 // contents are likely unrelatedto what might have previously existed
143 // at the destination path.
144 //
145 // Conversely, since here p1 is non-null, there is no copy metadata.
146 // Note that this reasoning may be invalidated in the presence of
147 // merges made by some previous versions of Mercurial that
148 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
149 // and `tests/test-issue6528.t`.
150 //
151 // Since copy metadata is currently the only kind of metadata
152 // kept in revlog data of filelogs,
153 // this `FilelogEntry` does not have such metadata:
154 let file_data_len = uncompressed_len;
155
156 return file_data_len != other_len;
157 }
158
76 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
159 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
77 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
160 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
78 }
161 }
@@ -260,6 +260,10 b" impl<'a> IndexEntry<'a> {"
260 }
260 }
261 }
261 }
262
262
263 pub fn flags(&self) -> u16 {
264 BigEndian::read_u16(&self.bytes[6..=7])
265 }
266
263 /// Return the compressed length of the data.
267 /// Return the compressed length of the data.
264 pub fn compressed_len(&self) -> u32 {
268 pub fn compressed_len(&self) -> u32 {
265 BigEndian::read_u32(&self.bytes[8..=11])
269 BigEndian::read_u32(&self.bytes[8..=11])
@@ -20,6 +20,18 b' use crate::repo::Repo;'
20 use crate::revlog::Revision;
20 use crate::revlog::Revision;
21 use crate::{Node, NULL_REVISION};
21 use crate::{Node, NULL_REVISION};
22
22
23 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
24 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
25 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
26 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
27
28 // Keep this in sync with REVIDX_KNOWN_FLAGS in
29 // mercurial/revlogutils/flagutil.py
30 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
31 | REVISION_FLAG_ELLIPSIS
32 | REVISION_FLAG_EXTSTORED
33 | REVISION_FLAG_HASCOPIESINFO;
34
23 #[derive(derive_more::From)]
35 #[derive(derive_more::From)]
24 pub enum RevlogError {
36 pub enum RevlogError {
25 InvalidRevision,
37 InvalidRevision,
@@ -282,6 +294,7 b' impl Revlog {'
282 },
294 },
283 p1: index_entry.p1(),
295 p1: index_entry.p1(),
284 p2: index_entry.p2(),
296 p2: index_entry.p2(),
297 flags: index_entry.flags(),
285 hash: *index_entry.hash(),
298 hash: *index_entry.hash(),
286 };
299 };
287 Ok(entry)
300 Ok(entry)
@@ -309,6 +322,7 b" pub struct RevlogEntry<'a> {"
309 base_rev_or_base_of_delta_chain: Option<Revision>,
322 base_rev_or_base_of_delta_chain: Option<Revision>,
310 p1: Revision,
323 p1: Revision,
311 p2: Revision,
324 p2: Revision,
325 flags: u16,
312 hash: Node,
326 hash: Node,
313 }
327 }
314
328
@@ -321,6 +335,20 b" impl<'a> RevlogEntry<'a> {"
321 u32::try_from(self.uncompressed_len).ok()
335 u32::try_from(self.uncompressed_len).ok()
322 }
336 }
323
337
338 pub fn has_p1(&self) -> bool {
339 self.p1 != NULL_REVISION
340 }
341
342 pub fn is_cencored(&self) -> bool {
343 (self.flags & REVISION_FLAG_CENSORED) != 0
344 }
345
346 pub fn has_length_affecting_flag_processor(&self) -> bool {
347 // Relevant Python code: revlog.size()
348 // note: ELLIPSIS is known to not change the content
349 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
350 }
351
324 /// The data for this entry, after resolving deltas if any.
352 /// The data for this entry, after resolving deltas if any.
325 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
353 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
326 let mut entry = self.clone();
354 let mut entry = self.clone();
@@ -516,16 +516,16 b' fn unsure_is_modified('
516 filelog.entry_for_node(entry.node_id()?).map_err(|_| {
516 filelog.entry_for_node(entry.node_id()?).map_err(|_| {
517 HgError::corrupted("filelog missing node from manifest")
517 HgError::corrupted("filelog missing node from manifest")
518 })?;
518 })?;
519 // TODO: check `fs_len` here like below, but based on
519 if filelog_entry.file_data_len_not_equal_to(fs_len) {
520 // `RevlogEntry::uncompressed_len` without decompressing the full filelog
520 // No need to read file contents:
521 // contents where possible. This is only valid if the revlog data does not
521 // it cannot be equal if it has a different length.
522 // contain metadata. See how Python’s `revlog.rawsize` calls
522 return Ok(true);
523 // `storageutil.filerevisioncopied`.
523 }
524 // (Maybe also check for content-modifying flags? See `revlog.size`.)
524
525 let filelog_data = filelog_entry.data()?;
525 let p1_filelog_data = filelog_entry.data()?;
526 let contents_in_p1 = filelog_data.file_data()?;
526 let p1_contents = p1_filelog_data.file_data()?;
527 if contents_in_p1.len() as u64 != fs_len {
527 if p1_contents.len() as u64 != fs_len {
528 // No need to read the file contents:
528 // No need to read file contents:
529 // it cannot be equal if it has a different length.
529 // it cannot be equal if it has a different length.
530 return Ok(true);
530 return Ok(true);
531 }
531 }
@@ -535,5 +535,5 b' fn unsure_is_modified('
535 } else {
535 } else {
536 vfs.read(fs_path)?
536 vfs.read(fs_path)?
537 };
537 };
538 Ok(contents_in_p1 != &*fs_contents)
538 Ok(p1_contents != &*fs_contents)
539 }
539 }
@@ -193,8 +193,8 b' if the size differs, and reading the exp'
193 deltas where possible.)
193 deltas where possible.)
194
194
195 $ hg st
195 $ hg st
196 M D.txt (no-rhg !)
196 M D.txt
197 M b.txt (no-rhg !)
197 M b.txt
198 $ hg debugrevlogindex b.txt
198 $ hg debugrevlogindex b.txt
199 rev linkrev nodeid p1 p2
199 rev linkrev nodeid p1 p2
200 0 2 05b806ebe5ea 000000000000 000000000000
200 0 2 05b806ebe5ea 000000000000 000000000000
@@ -212,8 +212,8 b' Dry-run the fix'
212 found affected revision 1 for filelog 'data/b.txt.i'
212 found affected revision 1 for filelog 'data/b.txt.i'
213 found affected revision 3 for filelog 'data/b.txt.i'
213 found affected revision 3 for filelog 'data/b.txt.i'
214 $ hg st
214 $ hg st
215 M D.txt (no-rhg !)
215 M D.txt
216 M b.txt (no-rhg !)
216 M b.txt
217 $ hg debugrevlogindex b.txt
217 $ hg debugrevlogindex b.txt
218 rev linkrev nodeid p1 p2
218 rev linkrev nodeid p1 p2
219 0 2 05b806ebe5ea 000000000000 000000000000
219 0 2 05b806ebe5ea 000000000000 000000000000
@@ -231,8 +231,8 b' Test the --paranoid option'
231 found affected revision 1 for filelog 'data/b.txt.i'
231 found affected revision 1 for filelog 'data/b.txt.i'
232 found affected revision 3 for filelog 'data/b.txt.i'
232 found affected revision 3 for filelog 'data/b.txt.i'
233 $ hg st
233 $ hg st
234 M D.txt (no-rhg !)
234 M D.txt
235 M b.txt (no-rhg !)
235 M b.txt
236 $ hg debugrevlogindex b.txt
236 $ hg debugrevlogindex b.txt
237 rev linkrev nodeid p1 p2
237 rev linkrev nodeid p1 p2
238 0 2 05b806ebe5ea 000000000000 000000000000
238 0 2 05b806ebe5ea 000000000000 000000000000
@@ -308,8 +308,8 b" only since some versions of tar don't ha"
308 found affected revision 1 for filelog 'b.txt'
308 found affected revision 1 for filelog 'b.txt'
309 found affected revision 3 for filelog 'b.txt'
309 found affected revision 3 for filelog 'b.txt'
310 $ hg st
310 $ hg st
311 M D.txt (no-rhg !)
311 M D.txt
312 M b.txt (no-rhg !)
312 M b.txt
313 $ hg debugrevlogindex b.txt
313 $ hg debugrevlogindex b.txt
314 rev linkrev nodeid p1 p2
314 rev linkrev nodeid p1 p2
315 0 2 05b806ebe5ea 000000000000 000000000000
315 0 2 05b806ebe5ea 000000000000 000000000000
General Comments 0
You need to be logged in to leave comments. Login now