##// END OF EJS Templates
rust: implement the `Graph` trait for all revlogs...
Raphaël Gomès -
r51871:27e773aa default
parent child Browse files
Show More
@@ -1,353 +1,359 b''
1 1 use crate::errors::HgError;
2 2 use crate::revlog::Revision;
3 3 use crate::revlog::{Node, NodePrefix};
4 4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 5 use crate::utils::hg_path::HgPath;
6 6 use crate::vfs::Vfs;
7 use crate::UncheckedRevision;
7 use crate::{Graph, GraphError, UncheckedRevision};
8 8 use itertools::Itertools;
9 9 use std::ascii::escape_default;
10 10 use std::borrow::Cow;
11 11 use std::fmt::{Debug, Formatter};
12 12
13 13 /// A specialized `Revlog` to work with changelog data format.
14 14 pub struct Changelog {
15 15 /// The generic `revlog` format.
16 16 pub(crate) revlog: Revlog,
17 17 }
18 18
19 19 impl Changelog {
20 20 /// Open the `changelog` of a repository given by its root.
21 21 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
22 22 let revlog =
23 23 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
24 24 Ok(Self { revlog })
25 25 }
26 26
27 27 /// Return the `ChangelogRevisionData` for the given node ID.
28 28 pub fn data_for_node(
29 29 &self,
30 30 node: NodePrefix,
31 31 ) -> Result<ChangelogRevisionData, RevlogError> {
32 32 let rev = self.revlog.rev_from_node(node)?;
33 33 self.entry_for_checked_rev(rev)?.data()
34 34 }
35 35
36 36 /// Return the [`ChangelogEntry`] for the given revision number.
37 37 pub fn entry_for_rev(
38 38 &self,
39 39 rev: UncheckedRevision,
40 40 ) -> Result<ChangelogEntry, RevlogError> {
41 41 let revlog_entry = self.revlog.get_entry(rev)?;
42 42 Ok(ChangelogEntry { revlog_entry })
43 43 }
44 44
45 45 /// Same as [`Self::entry_for_rev`] for checked revisions.
46 46 fn entry_for_checked_rev(
47 47 &self,
48 48 rev: Revision,
49 49 ) -> Result<ChangelogEntry, RevlogError> {
50 50 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
51 51 Ok(ChangelogEntry { revlog_entry })
52 52 }
53 53
54 54 /// Return the [`ChangelogRevisionData`] for the given revision number.
55 55 ///
56 56 /// This is a useful shortcut in case the caller does not need the
57 57 /// generic revlog information (parents, hashes etc). Otherwise
58 58 /// consider taking a [`ChangelogEntry`] with
59 59 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
60 60 pub fn data_for_rev(
61 61 &self,
62 62 rev: UncheckedRevision,
63 63 ) -> Result<ChangelogRevisionData, RevlogError> {
64 64 self.entry_for_rev(rev)?.data()
65 65 }
66 66
67 67 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
68 68 self.revlog.node_from_rev(rev)
69 69 }
70 70
71 71 pub fn rev_from_node(
72 72 &self,
73 73 node: NodePrefix,
74 74 ) -> Result<Revision, RevlogError> {
75 75 self.revlog.rev_from_node(node)
76 76 }
77 77 }
78 78
79 impl Graph for Changelog {
80 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
81 self.revlog.parents(rev)
82 }
83 }
84
79 85 /// A specialized `RevlogEntry` for `changelog` data format
80 86 ///
81 87 /// This is a `RevlogEntry` with the added semantics that the associated
82 88 /// data should meet the requirements for `changelog`, materialized by
83 89 /// the fact that `data()` constructs a `ChangelogRevisionData`.
84 90 /// In case that promise would be broken, the `data` method returns an error.
85 91 #[derive(Clone)]
86 92 pub struct ChangelogEntry<'changelog> {
87 93 /// Same data, as a generic `RevlogEntry`.
88 94 pub(crate) revlog_entry: RevlogEntry<'changelog>,
89 95 }
90 96
91 97 impl<'changelog> ChangelogEntry<'changelog> {
92 98 pub fn data<'a>(
93 99 &'a self,
94 100 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
95 101 let bytes = self.revlog_entry.data()?;
96 102 if bytes.is_empty() {
97 103 Ok(ChangelogRevisionData::null())
98 104 } else {
99 105 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
100 106 RevlogError::Other(HgError::CorruptedRepository(format!(
101 107 "Invalid changelog data for revision {}: {:?}",
102 108 self.revlog_entry.revision(),
103 109 err
104 110 )))
105 111 })?)
106 112 }
107 113 }
108 114
109 115 /// Obtain a reference to the underlying `RevlogEntry`.
110 116 ///
111 117 /// This allows the caller to access the information that is common
112 118 /// to all revlog entries: revision number, node id, parent revisions etc.
113 119 pub fn as_revlog_entry(&self) -> &RevlogEntry {
114 120 &self.revlog_entry
115 121 }
116 122
117 123 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
118 124 Ok(self
119 125 .revlog_entry
120 126 .p1_entry()?
121 127 .map(|revlog_entry| Self { revlog_entry }))
122 128 }
123 129
124 130 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
125 131 Ok(self
126 132 .revlog_entry
127 133 .p2_entry()?
128 134 .map(|revlog_entry| Self { revlog_entry }))
129 135 }
130 136 }
131 137
132 138 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
133 139 #[derive(PartialEq)]
134 140 pub struct ChangelogRevisionData<'changelog> {
135 141 /// The data bytes of the `changelog` entry.
136 142 bytes: Cow<'changelog, [u8]>,
137 143 /// The end offset for the hex manifest (not including the newline)
138 144 manifest_end: usize,
139 145 /// The end offset for the user+email (not including the newline)
140 146 user_end: usize,
141 147 /// The end offset for the timestamp+timezone+extras (not including the
142 148 /// newline)
143 149 timestamp_end: usize,
144 150 /// The end offset for the file list (not including the newline)
145 151 files_end: usize,
146 152 }
147 153
148 154 impl<'changelog> ChangelogRevisionData<'changelog> {
149 155 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
150 156 let mut line_iter = bytes.split(|b| b == &b'\n');
151 157 let manifest_end = line_iter
152 158 .next()
153 159 .expect("Empty iterator from split()?")
154 160 .len();
155 161 let user_slice = line_iter.next().ok_or_else(|| {
156 162 HgError::corrupted("Changeset data truncated after manifest line")
157 163 })?;
158 164 let user_end = manifest_end + 1 + user_slice.len();
159 165 let timestamp_slice = line_iter.next().ok_or_else(|| {
160 166 HgError::corrupted("Changeset data truncated after user line")
161 167 })?;
162 168 let timestamp_end = user_end + 1 + timestamp_slice.len();
163 169 let mut files_end = timestamp_end + 1;
164 170 loop {
165 171 let line = line_iter.next().ok_or_else(|| {
166 172 HgError::corrupted("Changeset data truncated in files list")
167 173 })?;
168 174 if line.is_empty() {
169 175 if files_end == bytes.len() {
170 176 // The list of files ended with a single newline (there
171 177 // should be two)
172 178 return Err(HgError::corrupted(
173 179 "Changeset data truncated after files list",
174 180 ));
175 181 }
176 182 files_end -= 1;
177 183 break;
178 184 }
179 185 files_end += line.len() + 1;
180 186 }
181 187
182 188 Ok(Self {
183 189 bytes,
184 190 manifest_end,
185 191 user_end,
186 192 timestamp_end,
187 193 files_end,
188 194 })
189 195 }
190 196
191 197 fn null() -> Self {
192 198 Self::new(Cow::Borrowed(
193 199 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
194 200 ))
195 201 .unwrap()
196 202 }
197 203
198 204 /// Return an iterator over the lines of the entry.
199 205 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
200 206 self.bytes.split(|b| b == &b'\n')
201 207 }
202 208
203 209 /// Return the node id of the `manifest` referenced by this `changelog`
204 210 /// entry.
205 211 pub fn manifest_node(&self) -> Result<Node, HgError> {
206 212 let manifest_node_hex = &self.bytes[..self.manifest_end];
207 213 Node::from_hex_for_repo(manifest_node_hex)
208 214 }
209 215
210 216 /// The full user string (usually a name followed by an email enclosed in
211 217 /// angle brackets)
212 218 pub fn user(&self) -> &[u8] {
213 219 &self.bytes[self.manifest_end + 1..self.user_end]
214 220 }
215 221
216 222 /// The full timestamp line (timestamp in seconds, offset in seconds, and
217 223 /// possibly extras)
218 224 // TODO: We should expose this in a more useful way
219 225 pub fn timestamp_line(&self) -> &[u8] {
220 226 &self.bytes[self.user_end + 1..self.timestamp_end]
221 227 }
222 228
223 229 /// The files changed in this revision.
224 230 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
225 231 self.bytes[self.timestamp_end + 1..self.files_end]
226 232 .split(|b| b == &b'\n')
227 233 .map(HgPath::new)
228 234 }
229 235
230 236 /// The change description.
231 237 pub fn description(&self) -> &[u8] {
232 238 &self.bytes[self.files_end + 2..]
233 239 }
234 240 }
235 241
236 242 impl Debug for ChangelogRevisionData<'_> {
237 243 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
238 244 f.debug_struct("ChangelogRevisionData")
239 245 .field("bytes", &debug_bytes(&self.bytes))
240 246 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
241 247 .field(
242 248 "user",
243 249 &debug_bytes(
244 250 &self.bytes[self.manifest_end + 1..self.user_end],
245 251 ),
246 252 )
247 253 .field(
248 254 "timestamp",
249 255 &debug_bytes(
250 256 &self.bytes[self.user_end + 1..self.timestamp_end],
251 257 ),
252 258 )
253 259 .field(
254 260 "files",
255 261 &debug_bytes(
256 262 &self.bytes[self.timestamp_end + 1..self.files_end],
257 263 ),
258 264 )
259 265 .field(
260 266 "description",
261 267 &debug_bytes(&self.bytes[self.files_end + 2..]),
262 268 )
263 269 .finish()
264 270 }
265 271 }
266 272
267 273 fn debug_bytes(bytes: &[u8]) -> String {
268 274 String::from_utf8_lossy(
269 275 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
270 276 )
271 277 .to_string()
272 278 }
273 279
274 280 #[cfg(test)]
275 281 mod tests {
276 282 use super::*;
277 283 use crate::vfs::Vfs;
278 284 use crate::NULL_REVISION;
279 285 use pretty_assertions::assert_eq;
280 286
281 287 #[test]
282 288 fn test_create_changelogrevisiondata_invalid() {
283 289 // Completely empty
284 290 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
285 291 // No newline after manifest
286 292 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
287 293 // No newline after user
288 294 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
289 295 // No newline after timestamp
290 296 assert!(
291 297 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
292 298 );
293 299 // Missing newline after files
294 300 assert!(ChangelogRevisionData::new(Cow::Borrowed(
295 301 b"abcd\n\n0 0\nfile1\nfile2"
296 302 ))
297 303 .is_err(),);
298 304 // Only one newline after files
299 305 assert!(ChangelogRevisionData::new(Cow::Borrowed(
300 306 b"abcd\n\n0 0\nfile1\nfile2\n"
301 307 ))
302 308 .is_err(),);
303 309 }
304 310
305 311 #[test]
306 312 fn test_create_changelogrevisiondata() {
307 313 let data = ChangelogRevisionData::new(Cow::Borrowed(
308 314 b"0123456789abcdef0123456789abcdef01234567
309 315 Some One <someone@example.com>
310 316 0 0
311 317 file1
312 318 file2
313 319
314 320 some
315 321 commit
316 322 message",
317 323 ))
318 324 .unwrap();
319 325 assert_eq!(
320 326 data.manifest_node().unwrap(),
321 327 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
322 328 .unwrap()
323 329 );
324 330 assert_eq!(data.user(), b"Some One <someone@example.com>");
325 331 assert_eq!(data.timestamp_line(), b"0 0");
326 332 assert_eq!(
327 333 data.files().collect_vec(),
328 334 vec![HgPath::new("file1"), HgPath::new("file2")]
329 335 );
330 336 assert_eq!(data.description(), b"some\ncommit\nmessage");
331 337 }
332 338
333 339 #[test]
334 340 fn test_data_from_rev_null() -> Result<(), RevlogError> {
335 341 // an empty revlog will be enough for this case
336 342 let temp = tempfile::tempdir().unwrap();
337 343 let vfs = Vfs { base: temp.path() };
338 344 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
339 345 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
340 346
341 347 let changelog = Changelog { revlog };
342 348 assert_eq!(
343 349 changelog.data_for_rev(NULL_REVISION.into())?,
344 350 ChangelogRevisionData::null()
345 351 );
346 352 // same with the intermediate entry object
347 353 assert_eq!(
348 354 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
349 355 ChangelogRevisionData::null()
350 356 );
351 357 Ok(())
352 358 }
353 359 }
@@ -1,231 +1,239 b''
1 1 use crate::errors::HgError;
2 2 use crate::exit_codes;
3 3 use crate::repo::Repo;
4 4 use crate::revlog::path_encode::path_encode;
5 5 use crate::revlog::NodePrefix;
6 6 use crate::revlog::Revision;
7 7 use crate::revlog::RevlogEntry;
8 8 use crate::revlog::{Revlog, RevlogError};
9 9 use crate::utils::files::get_path_from_bytes;
10 10 use crate::utils::hg_path::HgPath;
11 11 use crate::utils::SliceExt;
12 use crate::Graph;
13 use crate::GraphError;
12 14 use crate::UncheckedRevision;
13 15 use std::path::PathBuf;
14 16
15 17 /// A specialized `Revlog` to work with file data logs.
16 18 pub struct Filelog {
17 19 /// The generic `revlog` format.
18 20 revlog: Revlog,
19 21 }
20 22
23 impl Graph for Filelog {
24 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
25 self.revlog.parents(rev)
26 }
27 }
28
21 29 impl Filelog {
22 30 pub fn open_vfs(
23 31 store_vfs: &crate::vfs::Vfs<'_>,
24 32 file_path: &HgPath,
25 33 ) -> Result<Self, HgError> {
26 34 let index_path = store_path(file_path, b".i");
27 35 let data_path = store_path(file_path, b".d");
28 36 let revlog =
29 37 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
30 38 Ok(Self { revlog })
31 39 }
32 40
33 41 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
34 42 Self::open_vfs(&repo.store_vfs(), file_path)
35 43 }
36 44
37 45 /// The given node ID is that of the file as found in a filelog, not of a
38 46 /// changeset.
39 47 pub fn data_for_node(
40 48 &self,
41 49 file_node: impl Into<NodePrefix>,
42 50 ) -> Result<FilelogRevisionData, RevlogError> {
43 51 let file_rev = self.revlog.rev_from_node(file_node.into())?;
44 52 self.data_for_rev(file_rev.into())
45 53 }
46 54
47 55 /// The given revision is that of the file as found in a filelog, not of a
48 56 /// changeset.
49 57 pub fn data_for_rev(
50 58 &self,
51 59 file_rev: UncheckedRevision,
52 60 ) -> Result<FilelogRevisionData, RevlogError> {
53 61 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
54 62 Ok(FilelogRevisionData(data))
55 63 }
56 64
57 65 /// The given node ID is that of the file as found in a filelog, not of a
58 66 /// changeset.
59 67 pub fn entry_for_node(
60 68 &self,
61 69 file_node: impl Into<NodePrefix>,
62 70 ) -> Result<FilelogEntry, RevlogError> {
63 71 let file_rev = self.revlog.rev_from_node(file_node.into())?;
64 72 self.entry_for_checked_rev(file_rev)
65 73 }
66 74
67 75 /// The given revision is that of the file as found in a filelog, not of a
68 76 /// changeset.
69 77 pub fn entry_for_rev(
70 78 &self,
71 79 file_rev: UncheckedRevision,
72 80 ) -> Result<FilelogEntry, RevlogError> {
73 81 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
74 82 }
75 83
76 84 fn entry_for_checked_rev(
77 85 &self,
78 86 file_rev: Revision,
79 87 ) -> Result<FilelogEntry, RevlogError> {
80 88 Ok(FilelogEntry(
81 89 self.revlog.get_entry_for_checked_rev(file_rev)?,
82 90 ))
83 91 }
84 92 }
85 93
86 94 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
87 95 let encoded_bytes =
88 96 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
89 97 get_path_from_bytes(&encoded_bytes).into()
90 98 }
91 99
92 100 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
93 101
94 102 impl FilelogEntry<'_> {
95 103 /// `self.data()` can be expensive, with decompression and delta
96 104 /// resolution.
97 105 ///
98 106 /// *Without* paying this cost, based on revlog index information
99 107 /// including `RevlogEntry::uncompressed_len`:
100 108 ///
101 109 /// * Returns `true` if the length that `self.data().file_data().len()`
102 110 /// would return is definitely **not equal** to `other_len`.
103 111 /// * Returns `false` if available information is inconclusive.
104 112 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
105 113 // Relevant code that implement this behavior in Python code:
106 114 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
107 115 // revlog.size, revlog.rawsize
108 116
109 117 // Let’s call `file_data_len` what would be returned by
110 118 // `self.data().file_data().len()`.
111 119
112 120 if self.0.is_censored() {
113 121 let file_data_len = 0;
114 122 return other_len != file_data_len;
115 123 }
116 124
117 125 if self.0.has_length_affecting_flag_processor() {
118 126 // We can’t conclude anything about `file_data_len`.
119 127 return false;
120 128 }
121 129
122 130 // Revlog revisions (usually) have metadata for the size of
123 131 // their data after decompression and delta resolution
124 132 // as would be returned by `Revlog::get_rev_data`.
125 133 //
126 134 // For filelogs this is the file’s contents preceded by an optional
127 135 // metadata block.
128 136 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
129 137 l as u64
130 138 } else {
131 139 // The field was set to -1, the actual uncompressed len is unknown.
132 140 // We need to decompress to say more.
133 141 return false;
134 142 };
135 143 // `uncompressed_len = file_data_len + optional_metadata_len`,
136 144 // so `file_data_len <= uncompressed_len`.
137 145 if uncompressed_len < other_len {
138 146 // Transitively, `file_data_len < other_len`.
139 147 // So `other_len != file_data_len` definitely.
140 148 return true;
141 149 }
142 150
143 151 if uncompressed_len == other_len + 4 {
144 152 // It’s possible that `file_data_len == other_len` with an empty
145 153 // metadata block (2 start marker bytes + 2 end marker bytes).
146 154 // This happens when there wouldn’t otherwise be metadata, but
147 155 // the first 2 bytes of file data happen to match a start marker
148 156 // and would be ambiguous.
149 157 return false;
150 158 }
151 159
152 160 if !self.0.has_p1() {
153 161 // There may or may not be copy metadata, so we can’t deduce more
154 162 // about `file_data_len` without computing file data.
155 163 return false;
156 164 }
157 165
158 166 // Filelog ancestry is not meaningful in the way changelog ancestry is.
159 167 // It only provides hints to delta generation.
160 168 // p1 and p2 are set to null when making a copy or rename since
161 169 // contents are likely unrelatedto what might have previously existed
162 170 // at the destination path.
163 171 //
164 172 // Conversely, since here p1 is non-null, there is no copy metadata.
165 173 // Note that this reasoning may be invalidated in the presence of
166 174 // merges made by some previous versions of Mercurial that
167 175 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
168 176 // and `tests/test-issue6528.t`.
169 177 //
170 178 // Since copy metadata is currently the only kind of metadata
171 179 // kept in revlog data of filelogs,
172 180 // this `FilelogEntry` does not have such metadata:
173 181 let file_data_len = uncompressed_len;
174 182
175 183 file_data_len != other_len
176 184 }
177 185
178 186 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
179 187 let data = self.0.data();
180 188 match data {
181 189 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
182 190 // Errors other than `HgError` should not happen at this point
183 191 Err(e) => match e {
184 192 RevlogError::Other(hg_error) => Err(hg_error),
185 193 revlog_error => Err(HgError::abort(
186 194 revlog_error.to_string(),
187 195 exit_codes::ABORT,
188 196 None,
189 197 )),
190 198 },
191 199 }
192 200 }
193 201 }
194 202
195 203 /// The data for one revision in a filelog, uncompressed and delta-resolved.
196 204 pub struct FilelogRevisionData(Vec<u8>);
197 205
198 206 impl FilelogRevisionData {
199 207 /// Split into metadata and data
200 208 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
201 209 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
202 210
203 211 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
204 212 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
205 213 Ok((Some(metadata), data))
206 214 } else {
207 215 Err(HgError::corrupted(
208 216 "Missing metadata end delimiter in filelog entry",
209 217 ))
210 218 }
211 219 } else {
212 220 Ok((None, &self.0))
213 221 }
214 222 }
215 223
216 224 /// Returns the file contents at this revision, stripped of any metadata
217 225 pub fn file_data(&self) -> Result<&[u8], HgError> {
218 226 let (_metadata, data) = self.split()?;
219 227 Ok(data)
220 228 }
221 229
222 230 /// Consume the entry, and convert it into data, discarding any metadata,
223 231 /// if present.
224 232 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
225 233 if let (Some(_metadata), data) = self.split()? {
226 234 Ok(data.to_owned())
227 235 } else {
228 236 Ok(self.0)
229 237 }
230 238 }
231 239 }
@@ -1,622 +1,639 b''
1 1 use std::fmt::Debug;
2 2 use std::ops::Deref;
3 3
4 4 use byteorder::{BigEndian, ByteOrder};
5 5
6 6 use crate::errors::HgError;
7 7 use crate::revlog::node::Node;
8 8 use crate::revlog::{Revision, NULL_REVISION};
9 use crate::UncheckedRevision;
9 use crate::{Graph, GraphError, RevlogIndex, UncheckedRevision};
10 10
11 11 pub const INDEX_ENTRY_SIZE: usize = 64;
12 12
13 13 pub struct IndexHeader {
14 14 header_bytes: [u8; 4],
15 15 }
16 16
17 17 #[derive(Copy, Clone)]
18 18 pub struct IndexHeaderFlags {
19 19 flags: u16,
20 20 }
21 21
22 22 /// Corresponds to the high bits of `_format_flags` in python
23 23 impl IndexHeaderFlags {
24 24 /// Corresponds to FLAG_INLINE_DATA in python
25 25 pub fn is_inline(self) -> bool {
26 26 self.flags & 1 != 0
27 27 }
28 28 /// Corresponds to FLAG_GENERALDELTA in python
29 29 pub fn uses_generaldelta(self) -> bool {
30 30 self.flags & 2 != 0
31 31 }
32 32 }
33 33
34 34 /// Corresponds to the INDEX_HEADER structure,
35 35 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
36 36 impl IndexHeader {
37 37 fn format_flags(&self) -> IndexHeaderFlags {
38 38 // No "unknown flags" check here, unlike in python. Maybe there should
39 39 // be.
40 40 IndexHeaderFlags {
41 41 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
42 42 }
43 43 }
44 44
45 45 /// The only revlog version currently supported by rhg.
46 46 const REVLOGV1: u16 = 1;
47 47
48 48 /// Corresponds to `_format_version` in Python.
49 49 fn format_version(&self) -> u16 {
50 50 BigEndian::read_u16(&self.header_bytes[2..4])
51 51 }
52 52
53 53 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
54 54 // We treat an empty file as a valid index with no entries.
55 55 // Here we make an arbitrary choice of what we assume the format of the
56 56 // index to be (V1, using generaldelta).
57 57 // This doesn't matter too much, since we're only doing read-only
58 58 // access. but the value corresponds to the `new_header` variable in
59 59 // `revlog.py`, `_loadindex`
60 60 header_bytes: [0, 3, 0, 1],
61 61 };
62 62
63 63 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
64 64 if index_bytes.is_empty() {
65 65 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
66 66 }
67 67 if index_bytes.len() < 4 {
68 68 return Err(HgError::corrupted(
69 69 "corrupted revlog: can't read the index format header",
70 70 ));
71 71 }
72 72 Ok(IndexHeader {
73 73 header_bytes: {
74 74 let bytes: [u8; 4] =
75 75 index_bytes[0..4].try_into().expect("impossible");
76 76 bytes
77 77 },
78 78 })
79 79 }
80 80 }
81 81
82 82 /// A Revlog index
83 83 pub struct Index {
84 84 bytes: Box<dyn Deref<Target = [u8]> + Send>,
85 85 /// Offsets of starts of index blocks.
86 86 /// Only needed when the index is interleaved with data.
87 87 offsets: Option<Vec<usize>>,
88 88 uses_generaldelta: bool,
89 89 }
90 90
91 91 impl Debug for Index {
92 92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 93 f.debug_struct("Index")
94 94 .field("offsets", &self.offsets)
95 95 .field("uses_generaldelta", &self.uses_generaldelta)
96 96 .finish()
97 97 }
98 98 }
99 99
100 impl Graph for Index {
101 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
102 let err = || GraphError::ParentOutOfRange(rev);
103 match self.get_entry(rev) {
104 Some(entry) => {
105 // The C implementation checks that the parents are valid
106 // before returning
107 Ok([
108 self.check_revision(entry.p1()).ok_or_else(err)?,
109 self.check_revision(entry.p2()).ok_or_else(err)?,
110 ])
111 }
112 None => Ok([NULL_REVISION, NULL_REVISION]),
113 }
114 }
115 }
116
100 117 impl Index {
101 118 /// Create an index from bytes.
102 119 /// Calculate the start of each entry when is_inline is true.
103 120 pub fn new(
104 121 bytes: Box<dyn Deref<Target = [u8]> + Send>,
105 122 ) -> Result<Self, HgError> {
106 123 let header = IndexHeader::parse(bytes.as_ref())?;
107 124
108 125 if header.format_version() != IndexHeader::REVLOGV1 {
109 126 // A proper new version should have had a repo/store
110 127 // requirement.
111 128 return Err(HgError::corrupted("unsupported revlog version"));
112 129 }
113 130
114 131 // This is only correct because we know version is REVLOGV1.
115 132 // In v2 we always use generaldelta, while in v0 we never use
116 133 // generaldelta. Similar for [is_inline] (it's only used in v1).
117 134 let uses_generaldelta = header.format_flags().uses_generaldelta();
118 135
119 136 if header.format_flags().is_inline() {
120 137 let mut offset: usize = 0;
121 138 let mut offsets = Vec::new();
122 139
123 140 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
124 141 offsets.push(offset);
125 142 let end = offset + INDEX_ENTRY_SIZE;
126 143 let entry = IndexEntry {
127 144 bytes: &bytes[offset..end],
128 145 offset_override: None,
129 146 };
130 147
131 148 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
132 149 }
133 150
134 151 if offset == bytes.len() {
135 152 Ok(Self {
136 153 bytes,
137 154 offsets: Some(offsets),
138 155 uses_generaldelta,
139 156 })
140 157 } else {
141 158 Err(HgError::corrupted("unexpected inline revlog length"))
142 159 }
143 160 } else {
144 161 Ok(Self {
145 162 bytes,
146 163 offsets: None,
147 164 uses_generaldelta,
148 165 })
149 166 }
150 167 }
151 168
152 169 pub fn uses_generaldelta(&self) -> bool {
153 170 self.uses_generaldelta
154 171 }
155 172
156 173 /// Value of the inline flag.
157 174 pub fn is_inline(&self) -> bool {
158 175 self.offsets.is_some()
159 176 }
160 177
161 178 /// Return a slice of bytes if `revlog` is inline. Panic if not.
162 179 pub fn data(&self, start: usize, end: usize) -> &[u8] {
163 180 if !self.is_inline() {
164 181 panic!("tried to access data in the index of a revlog that is not inline");
165 182 }
166 183 &self.bytes[start..end]
167 184 }
168 185
169 186 /// Return number of entries of the revlog index.
170 187 pub fn len(&self) -> usize {
171 188 if let Some(offsets) = &self.offsets {
172 189 offsets.len()
173 190 } else {
174 191 self.bytes.len() / INDEX_ENTRY_SIZE
175 192 }
176 193 }
177 194
178 195 /// Returns `true` if the `Index` has zero `entries`.
179 196 pub fn is_empty(&self) -> bool {
180 197 self.len() == 0
181 198 }
182 199
183 200 /// Return the index entry corresponding to the given revision if it
184 201 /// exists.
185 202 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
186 203 if rev == NULL_REVISION {
187 204 return None;
188 205 }
189 206 Some(if let Some(offsets) = &self.offsets {
190 207 self.get_entry_inline(rev, offsets)
191 208 } else {
192 209 self.get_entry_separated(rev)
193 210 })
194 211 }
195 212
196 213 fn get_entry_inline(
197 214 &self,
198 215 rev: Revision,
199 216 offsets: &[usize],
200 217 ) -> IndexEntry {
201 218 let start = offsets[rev as usize];
202 219 let end = start + INDEX_ENTRY_SIZE;
203 220 let bytes = &self.bytes[start..end];
204 221
205 222 // See IndexEntry for an explanation of this override.
206 223 let offset_override = Some(end);
207 224
208 225 IndexEntry {
209 226 bytes,
210 227 offset_override,
211 228 }
212 229 }
213 230
214 231 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
215 232 let start = rev as usize * INDEX_ENTRY_SIZE;
216 233 let end = start + INDEX_ENTRY_SIZE;
217 234 let bytes = &self.bytes[start..end];
218 235
219 236 // Override the offset of the first revision as its bytes are used
220 237 // for the index's metadata (saving space because it is always 0)
221 238 let offset_override = if rev == 0 { Some(0) } else { None };
222 239
223 240 IndexEntry {
224 241 bytes,
225 242 offset_override,
226 243 }
227 244 }
228 245 }
229 246
230 247 impl super::RevlogIndex for Index {
231 248 fn len(&self) -> usize {
232 249 self.len()
233 250 }
234 251
235 252 fn node(&self, rev: Revision) -> Option<&Node> {
236 253 self.get_entry(rev).map(|entry| entry.hash())
237 254 }
238 255 }
239 256
240 257 #[derive(Debug)]
241 258 pub struct IndexEntry<'a> {
242 259 bytes: &'a [u8],
243 260 /// Allows to override the offset value of the entry.
244 261 ///
245 262 /// For interleaved index and data, the offset stored in the index
246 263 /// corresponds to the separated data offset.
247 264 /// It has to be overridden with the actual offset in the interleaved
248 265 /// index which is just after the index block.
249 266 ///
250 267 /// For separated index and data, the offset stored in the first index
251 268 /// entry is mixed with the index headers.
252 269 /// It has to be overridden with 0.
253 270 offset_override: Option<usize>,
254 271 }
255 272
256 273 impl<'a> IndexEntry<'a> {
257 274 /// Return the offset of the data.
258 275 pub fn offset(&self) -> usize {
259 276 if let Some(offset_override) = self.offset_override {
260 277 offset_override
261 278 } else {
262 279 let mut bytes = [0; 8];
263 280 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
264 281 BigEndian::read_u64(&bytes[..]) as usize
265 282 }
266 283 }
267 284
268 285 pub fn flags(&self) -> u16 {
269 286 BigEndian::read_u16(&self.bytes[6..=7])
270 287 }
271 288
272 289 /// Return the compressed length of the data.
273 290 pub fn compressed_len(&self) -> u32 {
274 291 BigEndian::read_u32(&self.bytes[8..=11])
275 292 }
276 293
277 294 /// Return the uncompressed length of the data.
278 295 pub fn uncompressed_len(&self) -> i32 {
279 296 BigEndian::read_i32(&self.bytes[12..=15])
280 297 }
281 298
282 299 /// Return the revision upon which the data has been derived.
283 300 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
284 301 // TODO Maybe return an Option when base_revision == rev?
285 302 // Requires to add rev to IndexEntry
286 303
287 304 BigEndian::read_i32(&self.bytes[16..]).into()
288 305 }
289 306
290 307 pub fn link_revision(&self) -> UncheckedRevision {
291 308 BigEndian::read_i32(&self.bytes[20..]).into()
292 309 }
293 310
294 311 pub fn p1(&self) -> UncheckedRevision {
295 312 BigEndian::read_i32(&self.bytes[24..]).into()
296 313 }
297 314
298 315 pub fn p2(&self) -> UncheckedRevision {
299 316 BigEndian::read_i32(&self.bytes[28..]).into()
300 317 }
301 318
302 319 /// Return the hash of revision's full text.
303 320 ///
304 321 /// Currently, SHA-1 is used and only the first 20 bytes of this field
305 322 /// are used.
306 323 pub fn hash(&self) -> &'a Node {
307 324 (&self.bytes[32..52]).try_into().unwrap()
308 325 }
309 326 }
310 327
311 328 #[cfg(test)]
312 329 mod tests {
313 330 use super::*;
314 331 use crate::node::NULL_NODE;
315 332
316 333 #[cfg(test)]
317 334 #[derive(Debug, Copy, Clone)]
318 335 pub struct IndexEntryBuilder {
319 336 is_first: bool,
320 337 is_inline: bool,
321 338 is_general_delta: bool,
322 339 version: u16,
323 340 offset: usize,
324 341 compressed_len: usize,
325 342 uncompressed_len: usize,
326 343 base_revision_or_base_of_delta_chain: Revision,
327 344 link_revision: Revision,
328 345 p1: Revision,
329 346 p2: Revision,
330 347 node: Node,
331 348 }
332 349
333 350 #[cfg(test)]
334 351 impl IndexEntryBuilder {
335 352 #[allow(clippy::new_without_default)]
336 353 pub fn new() -> Self {
337 354 Self {
338 355 is_first: false,
339 356 is_inline: false,
340 357 is_general_delta: true,
341 358 version: 1,
342 359 offset: 0,
343 360 compressed_len: 0,
344 361 uncompressed_len: 0,
345 362 base_revision_or_base_of_delta_chain: 0,
346 363 link_revision: 0,
347 364 p1: NULL_REVISION,
348 365 p2: NULL_REVISION,
349 366 node: NULL_NODE,
350 367 }
351 368 }
352 369
353 370 pub fn is_first(&mut self, value: bool) -> &mut Self {
354 371 self.is_first = value;
355 372 self
356 373 }
357 374
358 375 pub fn with_inline(&mut self, value: bool) -> &mut Self {
359 376 self.is_inline = value;
360 377 self
361 378 }
362 379
363 380 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
364 381 self.is_general_delta = value;
365 382 self
366 383 }
367 384
368 385 pub fn with_version(&mut self, value: u16) -> &mut Self {
369 386 self.version = value;
370 387 self
371 388 }
372 389
373 390 pub fn with_offset(&mut self, value: usize) -> &mut Self {
374 391 self.offset = value;
375 392 self
376 393 }
377 394
378 395 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
379 396 self.compressed_len = value;
380 397 self
381 398 }
382 399
383 400 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
384 401 self.uncompressed_len = value;
385 402 self
386 403 }
387 404
388 405 pub fn with_base_revision_or_base_of_delta_chain(
389 406 &mut self,
390 407 value: Revision,
391 408 ) -> &mut Self {
392 409 self.base_revision_or_base_of_delta_chain = value;
393 410 self
394 411 }
395 412
396 413 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
397 414 self.link_revision = value;
398 415 self
399 416 }
400 417
401 418 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
402 419 self.p1 = value;
403 420 self
404 421 }
405 422
406 423 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
407 424 self.p2 = value;
408 425 self
409 426 }
410 427
411 428 pub fn with_node(&mut self, value: Node) -> &mut Self {
412 429 self.node = value;
413 430 self
414 431 }
415 432
416 433 pub fn build(&self) -> Vec<u8> {
417 434 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
418 435 if self.is_first {
419 436 bytes.extend(&match (self.is_general_delta, self.is_inline) {
420 437 (false, false) => [0u8, 0],
421 438 (false, true) => [0u8, 1],
422 439 (true, false) => [0u8, 2],
423 440 (true, true) => [0u8, 3],
424 441 });
425 442 bytes.extend(&self.version.to_be_bytes());
426 443 // Remaining offset bytes.
427 444 bytes.extend(&[0u8; 2]);
428 445 } else {
429 446 // Offset stored on 48 bits (6 bytes)
430 447 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
431 448 }
432 449 bytes.extend(&[0u8; 2]); // Revision flags.
433 450 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
434 451 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
435 452 bytes.extend(
436 453 &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
437 454 );
438 455 bytes.extend(&self.link_revision.to_be_bytes());
439 456 bytes.extend(&self.p1.to_be_bytes());
440 457 bytes.extend(&self.p2.to_be_bytes());
441 458 bytes.extend(self.node.as_bytes());
442 459 bytes.extend(vec![0u8; 12]);
443 460 bytes
444 461 }
445 462 }
446 463
447 464 pub fn is_inline(index_bytes: &[u8]) -> bool {
448 465 IndexHeader::parse(index_bytes)
449 466 .expect("too short")
450 467 .format_flags()
451 468 .is_inline()
452 469 }
453 470
454 471 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
455 472 IndexHeader::parse(index_bytes)
456 473 .expect("too short")
457 474 .format_flags()
458 475 .uses_generaldelta()
459 476 }
460 477
461 478 pub fn get_version(index_bytes: &[u8]) -> u16 {
462 479 IndexHeader::parse(index_bytes)
463 480 .expect("too short")
464 481 .format_version()
465 482 }
466 483
467 484 #[test]
468 485 fn flags_when_no_inline_flag_test() {
469 486 let bytes = IndexEntryBuilder::new()
470 487 .is_first(true)
471 488 .with_general_delta(false)
472 489 .with_inline(false)
473 490 .build();
474 491
475 492 assert!(!is_inline(&bytes));
476 493 assert!(!uses_generaldelta(&bytes));
477 494 }
478 495
479 496 #[test]
480 497 fn flags_when_inline_flag_test() {
481 498 let bytes = IndexEntryBuilder::new()
482 499 .is_first(true)
483 500 .with_general_delta(false)
484 501 .with_inline(true)
485 502 .build();
486 503
487 504 assert!(is_inline(&bytes));
488 505 assert!(!uses_generaldelta(&bytes));
489 506 }
490 507
491 508 #[test]
492 509 fn flags_when_inline_and_generaldelta_flags_test() {
493 510 let bytes = IndexEntryBuilder::new()
494 511 .is_first(true)
495 512 .with_general_delta(true)
496 513 .with_inline(true)
497 514 .build();
498 515
499 516 assert!(is_inline(&bytes));
500 517 assert!(uses_generaldelta(&bytes));
501 518 }
502 519
503 520 #[test]
504 521 fn test_offset() {
505 522 let bytes = IndexEntryBuilder::new().with_offset(1).build();
506 523 let entry = IndexEntry {
507 524 bytes: &bytes,
508 525 offset_override: None,
509 526 };
510 527
511 528 assert_eq!(entry.offset(), 1)
512 529 }
513 530
514 531 #[test]
515 532 fn test_with_overridden_offset() {
516 533 let bytes = IndexEntryBuilder::new().with_offset(1).build();
517 534 let entry = IndexEntry {
518 535 bytes: &bytes,
519 536 offset_override: Some(2),
520 537 };
521 538
522 539 assert_eq!(entry.offset(), 2)
523 540 }
524 541
525 542 #[test]
526 543 fn test_compressed_len() {
527 544 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
528 545 let entry = IndexEntry {
529 546 bytes: &bytes,
530 547 offset_override: None,
531 548 };
532 549
533 550 assert_eq!(entry.compressed_len(), 1)
534 551 }
535 552
536 553 #[test]
537 554 fn test_uncompressed_len() {
538 555 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
539 556 let entry = IndexEntry {
540 557 bytes: &bytes,
541 558 offset_override: None,
542 559 };
543 560
544 561 assert_eq!(entry.uncompressed_len(), 1)
545 562 }
546 563
547 564 #[test]
548 565 fn test_base_revision_or_base_of_delta_chain() {
549 566 let bytes = IndexEntryBuilder::new()
550 567 .with_base_revision_or_base_of_delta_chain(1)
551 568 .build();
552 569 let entry = IndexEntry {
553 570 bytes: &bytes,
554 571 offset_override: None,
555 572 };
556 573
557 574 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
558 575 }
559 576
560 577 #[test]
561 578 fn link_revision_test() {
562 579 let bytes = IndexEntryBuilder::new().with_link_revision(123).build();
563 580
564 581 let entry = IndexEntry {
565 582 bytes: &bytes,
566 583 offset_override: None,
567 584 };
568 585
569 586 assert_eq!(entry.link_revision(), 123.into());
570 587 }
571 588
572 589 #[test]
573 590 fn p1_test() {
574 591 let bytes = IndexEntryBuilder::new().with_p1(123).build();
575 592
576 593 let entry = IndexEntry {
577 594 bytes: &bytes,
578 595 offset_override: None,
579 596 };
580 597
581 598 assert_eq!(entry.p1(), 123.into());
582 599 }
583 600
584 601 #[test]
585 602 fn p2_test() {
586 603 let bytes = IndexEntryBuilder::new().with_p2(123).build();
587 604
588 605 let entry = IndexEntry {
589 606 bytes: &bytes,
590 607 offset_override: None,
591 608 };
592 609
593 610 assert_eq!(entry.p2(), 123.into());
594 611 }
595 612
596 613 #[test]
597 614 fn node_test() {
598 615 let node = Node::from_hex("0123456789012345678901234567890123456789")
599 616 .unwrap();
600 617 let bytes = IndexEntryBuilder::new().with_node(node).build();
601 618
602 619 let entry = IndexEntry {
603 620 bytes: &bytes,
604 621 offset_override: None,
605 622 };
606 623
607 624 assert_eq!(*entry.hash(), node);
608 625 }
609 626
610 627 #[test]
611 628 fn version_test() {
612 629 let bytes = IndexEntryBuilder::new()
613 630 .is_first(true)
614 631 .with_version(2)
615 632 .build();
616 633
617 634 assert_eq!(get_version(&bytes), 2)
618 635 }
619 636 }
620 637
621 638 #[cfg(test)]
622 639 pub use tests::IndexEntryBuilder;
@@ -1,203 +1,209 b''
1 1 use crate::errors::HgError;
2 2 use crate::revlog::{Node, NodePrefix};
3 3 use crate::revlog::{Revlog, RevlogError};
4 4 use crate::utils::hg_path::HgPath;
5 5 use crate::utils::SliceExt;
6 6 use crate::vfs::Vfs;
7 use crate::{Revision, UncheckedRevision};
7 use crate::{Graph, GraphError, Revision, UncheckedRevision};
8 8
9 9 /// A specialized `Revlog` to work with `manifest` data format.
10 10 pub struct Manifestlog {
11 11 /// The generic `revlog` format.
12 12 revlog: Revlog,
13 13 }
14 14
15 impl Graph for Manifestlog {
16 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
17 self.revlog.parents(rev)
18 }
19 }
20
15 21 impl Manifestlog {
16 22 /// Open the `manifest` of a repository given by its root.
17 23 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
18 24 let revlog =
19 25 Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
20 26 Ok(Self { revlog })
21 27 }
22 28
23 29 /// Return the `Manifest` for the given node ID.
24 30 ///
25 31 /// Note: this is a node ID in the manifestlog, typically found through
26 32 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
27 33 /// changeset.
28 34 ///
29 35 /// See also `Repo::manifest_for_node`
30 36 pub fn data_for_node(
31 37 &self,
32 38 node: NodePrefix,
33 39 ) -> Result<Manifest, RevlogError> {
34 40 let rev = self.revlog.rev_from_node(node)?;
35 41 self.data_for_checked_rev(rev)
36 42 }
37 43
38 44 /// Return the `Manifest` of a given revision number.
39 45 ///
40 46 /// Note: this is a revision number in the manifestlog, *not* of any
41 47 /// changeset.
42 48 ///
43 49 /// See also `Repo::manifest_for_rev`
44 50 pub fn data_for_rev(
45 51 &self,
46 52 rev: UncheckedRevision,
47 53 ) -> Result<Manifest, RevlogError> {
48 54 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
49 55 Ok(Manifest { bytes })
50 56 }
51 57
52 58 pub fn data_for_checked_rev(
53 59 &self,
54 60 rev: Revision,
55 61 ) -> Result<Manifest, RevlogError> {
56 62 let bytes =
57 63 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
58 64 Ok(Manifest { bytes })
59 65 }
60 66 }
61 67
62 68 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
63 69 #[derive(Debug)]
64 70 pub struct Manifest {
65 71 /// Format for a manifest: flat sequence of variable-size entries,
66 72 /// sorted by path, each as:
67 73 ///
68 74 /// ```text
69 75 /// <path> \0 <hex_node_id> <flags> \n
70 76 /// ```
71 77 ///
72 78 /// The last entry is also terminated by a newline character.
73 79 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
74 80 bytes: Vec<u8>,
75 81 }
76 82
77 83 impl Manifest {
78 84 pub fn iter(
79 85 &self,
80 86 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
81 87 self.bytes
82 88 .split(|b| b == &b'\n')
83 89 .filter(|line| !line.is_empty())
84 90 .map(ManifestEntry::from_raw)
85 91 }
86 92
87 93 /// If the given path is in this manifest, return its filelog node ID
88 94 pub fn find_by_path(
89 95 &self,
90 96 path: &HgPath,
91 97 ) -> Result<Option<ManifestEntry>, HgError> {
92 98 use std::cmp::Ordering::*;
93 99 let path = path.as_bytes();
94 100 // Both boundaries of this `&[u8]` slice are always at the boundary of
95 101 // an entry
96 102 let mut bytes = &*self.bytes;
97 103
98 104 // Binary search algorithm derived from `[T]::binary_search_by`
99 105 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
100 106 // except we don’t have a slice of entries. Instead we jump to the
101 107 // middle of the byte slice and look around for entry delimiters
102 108 // (newlines).
103 109 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
104 110 let (entry_path, rest) =
105 111 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
106 112 let cmp = entry_path.cmp(path);
107 113 if cmp == Less {
108 114 let after_newline = entry_range.end + 1;
109 115 bytes = &bytes[after_newline..];
110 116 } else if cmp == Greater {
111 117 bytes = &bytes[..entry_range.start];
112 118 } else {
113 119 return Ok(Some(ManifestEntry::from_path_and_rest(
114 120 entry_path, rest,
115 121 )));
116 122 }
117 123 }
118 124 Ok(None)
119 125 }
120 126
121 127 /// If there is at least one, return the byte range of an entry *excluding*
122 128 /// the final newline.
123 129 fn find_entry_near_middle_of(
124 130 bytes: &[u8],
125 131 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
126 132 let len = bytes.len();
127 133 if len > 0 {
128 134 let middle = bytes.len() / 2;
129 135 // Integer division rounds down, so `middle < len`.
130 136 let (before, after) = bytes.split_at(middle);
131 137 let is_newline = |&byte: &u8| byte == b'\n';
132 138 let entry_start = match before.iter().rposition(is_newline) {
133 139 Some(i) => i + 1,
134 140 None => 0, // We choose the first entry in `bytes`
135 141 };
136 142 let entry_end = match after.iter().position(is_newline) {
137 143 Some(i) => {
138 144 // No `+ 1` here to exclude this newline from the range
139 145 middle + i
140 146 }
141 147 None => {
142 148 // In a well-formed manifest:
143 149 //
144 150 // * Since `len > 0`, `bytes` contains at least one entry
145 151 // * Every entry ends with a newline
146 152 // * Since `middle < len`, `after` contains at least the
147 153 // newline at the end of the last entry of `bytes`.
148 154 //
149 155 // We didn’t find a newline, so this manifest is not
150 156 // well-formed.
151 157 return Err(HgError::corrupted(
152 158 "manifest entry without \\n delimiter",
153 159 ));
154 160 }
155 161 };
156 162 Ok(Some(entry_start..entry_end))
157 163 } else {
158 164 // len == 0
159 165 Ok(None)
160 166 }
161 167 }
162 168 }
163 169
164 170 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
165 171 #[derive(Debug)]
166 172 pub struct ManifestEntry<'manifest> {
167 173 pub path: &'manifest HgPath,
168 174 pub hex_node_id: &'manifest [u8],
169 175
170 176 /// `Some` values are b'x', b'l', or 't'
171 177 pub flags: Option<u8>,
172 178 }
173 179
174 180 impl<'a> ManifestEntry<'a> {
175 181 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
176 182 bytes.split_2(b'\0').ok_or_else(|| {
177 183 HgError::corrupted("manifest entry without \\0 delimiter")
178 184 })
179 185 }
180 186
181 187 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
182 188 let (hex_node_id, flags) = match rest.split_last() {
183 189 Some((&b'x', rest)) => (rest, Some(b'x')),
184 190 Some((&b'l', rest)) => (rest, Some(b'l')),
185 191 Some((&b't', rest)) => (rest, Some(b't')),
186 192 _ => (rest, None),
187 193 };
188 194 Self {
189 195 path: HgPath::new(path),
190 196 hex_node_id,
191 197 flags,
192 198 }
193 199 }
194 200
195 201 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
196 202 let (path, rest) = Self::split_path(bytes)?;
197 203 Ok(Self::from_path_and_rest(path, rest))
198 204 }
199 205
200 206 pub fn node_id(&self) -> Result<Node, HgError> {
201 207 Node::from_hex_for_repo(self.hex_node_id)
202 208 }
203 209 }
@@ -1,904 +1,910 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// Mercurial revision numbers
37 37 ///
38 38 /// As noted in revlog.c, revision numbers are actually encoded in
39 39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 40 pub type Revision = i32;
41 41
42 42 /// Unchecked Mercurial revision numbers.
43 43 ///
44 44 /// Values of this type have no guarantee of being a valid revision number
45 45 /// in any context. Use method `check_revision` to get a valid revision within
46 46 /// the appropriate index object.
47 47 ///
48 48 /// As noted in revlog.c, revision numbers are actually encoded in
49 49 /// 4 bytes, and are liberally converted to ints, whence the i32
50 50 #[derive(
51 51 Debug,
52 52 derive_more::Display,
53 53 Clone,
54 54 Copy,
55 55 Hash,
56 56 PartialEq,
57 57 Eq,
58 58 PartialOrd,
59 59 Ord,
60 60 )]
61 61 pub struct UncheckedRevision(i32);
62 62
63 63 impl From<Revision> for UncheckedRevision {
64 64 fn from(value: Revision) -> Self {
65 65 Self(value)
66 66 }
67 67 }
68 68
69 69 /// Marker expressing the absence of a parent
70 70 ///
71 71 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
72 72 /// to be smaller than all existing revisions.
73 73 pub const NULL_REVISION: Revision = -1;
74 74
75 75 /// Same as `mercurial.node.wdirrev`
76 76 ///
77 77 /// This is also equal to `i32::max_value()`, but it's better to spell
78 78 /// it out explicitely, same as in `mercurial.node`
79 79 #[allow(clippy::unreadable_literal)]
80 80 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
81 81 UncheckedRevision(0x7fffffff);
82 82
83 83 pub const WORKING_DIRECTORY_HEX: &str =
84 84 "ffffffffffffffffffffffffffffffffffffffff";
85 85
86 86 /// The simplest expression of what we need of Mercurial DAGs.
87 87 pub trait Graph {
88 88 /// Return the two parents of the given `Revision`.
89 89 ///
90 90 /// Each of the parents can be independently `NULL_REVISION`
91 91 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
92 92 }
93 93
94 94 #[derive(Clone, Debug, PartialEq)]
95 95 pub enum GraphError {
96 96 ParentOutOfRange(Revision),
97 97 }
98 98
99 99 /// The Mercurial Revlog Index
100 100 ///
101 101 /// This is currently limited to the minimal interface that is needed for
102 102 /// the [`nodemap`](nodemap/index.html) module
103 103 pub trait RevlogIndex {
104 104 /// Total number of Revisions referenced in this index
105 105 fn len(&self) -> usize;
106 106
107 107 fn is_empty(&self) -> bool {
108 108 self.len() == 0
109 109 }
110 110
111 111 /// Return a reference to the Node or `None` for `NULL_REVISION`
112 112 fn node(&self, rev: Revision) -> Option<&Node>;
113 113
114 114 /// Return a [`Revision`] if `rev` is a valid revision number for this
115 115 /// index
116 116 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
117 117 let rev = rev.0;
118 118
119 119 if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {
120 120 Some(rev)
121 121 } else {
122 122 None
123 123 }
124 124 }
125 125 }
126 126
127 127 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
128 128 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
129 129 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
130 130 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
131 131
132 132 // Keep this in sync with REVIDX_KNOWN_FLAGS in
133 133 // mercurial/revlogutils/flagutil.py
134 134 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
135 135 | REVISION_FLAG_ELLIPSIS
136 136 | REVISION_FLAG_EXTSTORED
137 137 | REVISION_FLAG_HASCOPIESINFO;
138 138
139 139 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
140 140
141 141 #[derive(Debug, derive_more::From, derive_more::Display)]
142 142 pub enum RevlogError {
143 143 InvalidRevision,
144 144 /// Working directory is not supported
145 145 WDirUnsupported,
146 146 /// Found more than one entry whose ID match the requested prefix
147 147 AmbiguousPrefix,
148 148 #[from]
149 149 Other(HgError),
150 150 }
151 151
152 152 impl From<NodeMapError> for RevlogError {
153 153 fn from(error: NodeMapError) -> Self {
154 154 match error {
155 155 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
156 156 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
157 157 format!("nodemap point to revision {} not in index", rev),
158 158 ),
159 159 }
160 160 }
161 161 }
162 162
163 163 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
164 164 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
165 165 }
166 166
167 167 impl RevlogError {
168 168 fn corrupted<S: AsRef<str>>(context: S) -> Self {
169 169 RevlogError::Other(corrupted(context))
170 170 }
171 171 }
172 172
173 173 /// Read only implementation of revlog.
174 174 pub struct Revlog {
175 175 /// When index and data are not interleaved: bytes of the revlog index.
176 176 /// When index and data are interleaved: bytes of the revlog index and
177 177 /// data.
178 178 index: Index,
179 179 /// When index and data are not interleaved: bytes of the revlog data
180 180 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
181 181 /// When present on disk: the persistent nodemap for this revlog
182 182 nodemap: Option<nodemap::NodeTree>,
183 183 }
184 184
185 impl Graph for Revlog {
186 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
187 self.index.parents(rev)
188 }
189 }
190
185 191 impl Revlog {
186 192 /// Open a revlog index file.
187 193 ///
188 194 /// It will also open the associated data file if index and data are not
189 195 /// interleaved.
190 196 pub fn open(
191 197 store_vfs: &Vfs,
192 198 index_path: impl AsRef<Path>,
193 199 data_path: Option<&Path>,
194 200 use_nodemap: bool,
195 201 ) -> Result<Self, HgError> {
196 202 let index_path = index_path.as_ref();
197 203 let index = {
198 204 match store_vfs.mmap_open_opt(&index_path)? {
199 205 None => Index::new(Box::new(vec![])),
200 206 Some(index_mmap) => {
201 207 let index = Index::new(Box::new(index_mmap))?;
202 208 Ok(index)
203 209 }
204 210 }
205 211 }?;
206 212
207 213 let default_data_path = index_path.with_extension("d");
208 214
209 215 // type annotation required
210 216 // won't recognize Mmap as Deref<Target = [u8]>
211 217 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
212 218 if index.is_inline() {
213 219 None
214 220 } else {
215 221 let data_path = data_path.unwrap_or(&default_data_path);
216 222 let data_mmap = store_vfs.mmap_open(data_path)?;
217 223 Some(Box::new(data_mmap))
218 224 };
219 225
220 226 let nodemap = if index.is_inline() || !use_nodemap {
221 227 None
222 228 } else {
223 229 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
224 230 |(docket, data)| {
225 231 nodemap::NodeTree::load_bytes(
226 232 Box::new(data),
227 233 docket.data_length,
228 234 )
229 235 },
230 236 )
231 237 };
232 238
233 239 Ok(Revlog {
234 240 index,
235 241 data_bytes,
236 242 nodemap,
237 243 })
238 244 }
239 245
240 246 /// Return number of entries of the `Revlog`.
241 247 pub fn len(&self) -> usize {
242 248 self.index.len()
243 249 }
244 250
245 251 /// Returns `true` if the `Revlog` has zero `entries`.
246 252 pub fn is_empty(&self) -> bool {
247 253 self.index.is_empty()
248 254 }
249 255
250 256 /// Returns the node ID for the given revision number, if it exists in this
251 257 /// revlog
252 258 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
253 259 if rev == NULL_REVISION.into() {
254 260 return Some(&NULL_NODE);
255 261 }
256 262 let rev = self.index.check_revision(rev)?;
257 263 Some(self.index.get_entry(rev)?.hash())
258 264 }
259 265
260 266 /// Return the revision number for the given node ID, if it exists in this
261 267 /// revlog
262 268 pub fn rev_from_node(
263 269 &self,
264 270 node: NodePrefix,
265 271 ) -> Result<Revision, RevlogError> {
266 272 let looked_up = if let Some(nodemap) = &self.nodemap {
267 273 nodemap
268 274 .find_bin(&self.index, node)?
269 275 .ok_or(RevlogError::InvalidRevision)
270 276 } else {
271 277 self.rev_from_node_no_persistent_nodemap(node)
272 278 };
273 279
274 280 if node.is_prefix_of(&NULL_NODE) {
275 281 return match looked_up {
276 282 Ok(_) => Err(RevlogError::AmbiguousPrefix),
277 283 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
278 284 res => res,
279 285 };
280 286 };
281 287
282 288 looked_up
283 289 }
284 290
285 291 /// Same as `rev_from_node`, without using a persistent nodemap
286 292 ///
287 293 /// This is used as fallback when a persistent nodemap is not present.
288 294 /// This happens when the persistent-nodemap experimental feature is not
289 295 /// enabled, or for small revlogs.
290 296 fn rev_from_node_no_persistent_nodemap(
291 297 &self,
292 298 node: NodePrefix,
293 299 ) -> Result<Revision, RevlogError> {
294 300 // Linear scan of the revlog
295 301 // TODO: consider building a non-persistent nodemap in memory to
296 302 // optimize these cases.
297 303 let mut found_by_prefix = None;
298 304 for rev in (0..self.len() as Revision).rev() {
299 305 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
300 306 HgError::corrupted(
301 307 "revlog references a revision not in the index",
302 308 )
303 309 })?;
304 310 if node == *index_entry.hash() {
305 311 return Ok(rev);
306 312 }
307 313 if node.is_prefix_of(index_entry.hash()) {
308 314 if found_by_prefix.is_some() {
309 315 return Err(RevlogError::AmbiguousPrefix);
310 316 }
311 317 found_by_prefix = Some(rev)
312 318 }
313 319 }
314 320 found_by_prefix.ok_or(RevlogError::InvalidRevision)
315 321 }
316 322
317 323 /// Returns whether the given revision exists in this revlog.
318 324 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
319 325 self.index.check_revision(rev).is_some()
320 326 }
321 327
322 328 /// Return the full data associated to a revision.
323 329 ///
324 330 /// All entries required to build the final data out of deltas will be
325 331 /// retrieved as needed, and the deltas will be applied to the inital
326 332 /// snapshot to rebuild the final data.
327 333 pub fn get_rev_data(
328 334 &self,
329 335 rev: UncheckedRevision,
330 336 ) -> Result<Cow<[u8]>, RevlogError> {
331 337 if rev == NULL_REVISION.into() {
332 338 return Ok(Cow::Borrowed(&[]));
333 339 };
334 340 self.get_entry(rev)?.data()
335 341 }
336 342
337 343 /// [`Self::get_rev_data`] for checked revisions.
338 344 pub fn get_rev_data_for_checked_rev(
339 345 &self,
340 346 rev: Revision,
341 347 ) -> Result<Cow<[u8]>, RevlogError> {
342 348 if rev == NULL_REVISION {
343 349 return Ok(Cow::Borrowed(&[]));
344 350 };
345 351 self.get_entry_for_checked_rev(rev)?.data()
346 352 }
347 353
348 354 /// Check the hash of some given data against the recorded hash.
349 355 pub fn check_hash(
350 356 &self,
351 357 p1: Revision,
352 358 p2: Revision,
353 359 expected: &[u8],
354 360 data: &[u8],
355 361 ) -> bool {
356 362 let e1 = self.index.get_entry(p1);
357 363 let h1 = match e1 {
358 364 Some(ref entry) => entry.hash(),
359 365 None => &NULL_NODE,
360 366 };
361 367 let e2 = self.index.get_entry(p2);
362 368 let h2 = match e2 {
363 369 Some(ref entry) => entry.hash(),
364 370 None => &NULL_NODE,
365 371 };
366 372
367 373 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
368 374 }
369 375
370 376 /// Build the full data of a revision out its snapshot
371 377 /// and its deltas.
372 378 fn build_data_from_deltas(
373 379 snapshot: RevlogEntry,
374 380 deltas: &[RevlogEntry],
375 381 ) -> Result<Vec<u8>, HgError> {
376 382 let snapshot = snapshot.data_chunk()?;
377 383 let deltas = deltas
378 384 .iter()
379 385 .rev()
380 386 .map(RevlogEntry::data_chunk)
381 387 .collect::<Result<Vec<_>, _>>()?;
382 388 let patches: Vec<_> =
383 389 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
384 390 let patch = patch::fold_patch_lists(&patches);
385 391 Ok(patch.apply(&snapshot))
386 392 }
387 393
388 394 /// Return the revlog data.
389 395 fn data(&self) -> &[u8] {
390 396 match &self.data_bytes {
391 397 Some(data_bytes) => data_bytes,
392 398 None => panic!(
393 399 "forgot to load the data or trying to access inline data"
394 400 ),
395 401 }
396 402 }
397 403
398 404 pub fn make_null_entry(&self) -> RevlogEntry {
399 405 RevlogEntry {
400 406 revlog: self,
401 407 rev: NULL_REVISION,
402 408 bytes: b"",
403 409 compressed_len: 0,
404 410 uncompressed_len: 0,
405 411 base_rev_or_base_of_delta_chain: None,
406 412 p1: NULL_REVISION,
407 413 p2: NULL_REVISION,
408 414 flags: NULL_REVLOG_ENTRY_FLAGS,
409 415 hash: NULL_NODE,
410 416 }
411 417 }
412 418
413 419 fn get_entry_for_checked_rev(
414 420 &self,
415 421 rev: Revision,
416 422 ) -> Result<RevlogEntry, RevlogError> {
417 423 if rev == NULL_REVISION {
418 424 return Ok(self.make_null_entry());
419 425 }
420 426 let index_entry = self
421 427 .index
422 428 .get_entry(rev)
423 429 .ok_or(RevlogError::InvalidRevision)?;
424 430 let start = index_entry.offset();
425 431 let end = start + index_entry.compressed_len() as usize;
426 432 let data = if self.index.is_inline() {
427 433 self.index.data(start, end)
428 434 } else {
429 435 &self.data()[start..end]
430 436 };
431 437 let base_rev = self
432 438 .index
433 439 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
434 440 .ok_or_else(|| {
435 441 RevlogError::corrupted(format!(
436 442 "base revision for rev {} is invalid",
437 443 rev
438 444 ))
439 445 })?;
440 446 let p1 =
441 447 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
442 448 RevlogError::corrupted(format!(
443 449 "p1 for rev {} is invalid",
444 450 rev
445 451 ))
446 452 })?;
447 453 let p2 =
448 454 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
449 455 RevlogError::corrupted(format!(
450 456 "p2 for rev {} is invalid",
451 457 rev
452 458 ))
453 459 })?;
454 460 let entry = RevlogEntry {
455 461 revlog: self,
456 462 rev,
457 463 bytes: data,
458 464 compressed_len: index_entry.compressed_len(),
459 465 uncompressed_len: index_entry.uncompressed_len(),
460 466 base_rev_or_base_of_delta_chain: if base_rev == rev {
461 467 None
462 468 } else {
463 469 Some(base_rev)
464 470 },
465 471 p1,
466 472 p2,
467 473 flags: index_entry.flags(),
468 474 hash: *index_entry.hash(),
469 475 };
470 476 Ok(entry)
471 477 }
472 478
473 479 /// Get an entry of the revlog.
474 480 pub fn get_entry(
475 481 &self,
476 482 rev: UncheckedRevision,
477 483 ) -> Result<RevlogEntry, RevlogError> {
478 484 if rev == NULL_REVISION.into() {
479 485 return Ok(self.make_null_entry());
480 486 }
481 487 let rev = self.index.check_revision(rev).ok_or_else(|| {
482 488 RevlogError::corrupted(format!("rev {} is invalid", rev))
483 489 })?;
484 490 self.get_entry_for_checked_rev(rev)
485 491 }
486 492 }
487 493
488 494 /// The revlog entry's bytes and the necessary informations to extract
489 495 /// the entry's data.
490 496 #[derive(Clone)]
491 497 pub struct RevlogEntry<'revlog> {
492 498 revlog: &'revlog Revlog,
493 499 rev: Revision,
494 500 bytes: &'revlog [u8],
495 501 compressed_len: u32,
496 502 uncompressed_len: i32,
497 503 base_rev_or_base_of_delta_chain: Option<Revision>,
498 504 p1: Revision,
499 505 p2: Revision,
500 506 flags: u16,
501 507 hash: Node,
502 508 }
503 509
504 510 thread_local! {
505 511 // seems fine to [unwrap] here: this can only fail due to memory allocation
506 512 // failing, and it's normal for that to cause panic.
507 513 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
508 514 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
509 515 }
510 516
511 517 fn zstd_decompress_to_buffer(
512 518 bytes: &[u8],
513 519 buf: &mut Vec<u8>,
514 520 ) -> Result<usize, std::io::Error> {
515 521 ZSTD_DECODER
516 522 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
517 523 }
518 524
519 525 impl<'revlog> RevlogEntry<'revlog> {
520 526 pub fn revision(&self) -> Revision {
521 527 self.rev
522 528 }
523 529
524 530 pub fn node(&self) -> &Node {
525 531 &self.hash
526 532 }
527 533
528 534 pub fn uncompressed_len(&self) -> Option<u32> {
529 535 u32::try_from(self.uncompressed_len).ok()
530 536 }
531 537
532 538 pub fn has_p1(&self) -> bool {
533 539 self.p1 != NULL_REVISION
534 540 }
535 541
536 542 pub fn p1_entry(
537 543 &self,
538 544 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
539 545 if self.p1 == NULL_REVISION {
540 546 Ok(None)
541 547 } else {
542 548 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
543 549 }
544 550 }
545 551
546 552 pub fn p2_entry(
547 553 &self,
548 554 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
549 555 if self.p2 == NULL_REVISION {
550 556 Ok(None)
551 557 } else {
552 558 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
553 559 }
554 560 }
555 561
556 562 pub fn p1(&self) -> Option<Revision> {
557 563 if self.p1 == NULL_REVISION {
558 564 None
559 565 } else {
560 566 Some(self.p1)
561 567 }
562 568 }
563 569
564 570 pub fn p2(&self) -> Option<Revision> {
565 571 if self.p2 == NULL_REVISION {
566 572 None
567 573 } else {
568 574 Some(self.p2)
569 575 }
570 576 }
571 577
572 578 pub fn is_censored(&self) -> bool {
573 579 (self.flags & REVISION_FLAG_CENSORED) != 0
574 580 }
575 581
576 582 pub fn has_length_affecting_flag_processor(&self) -> bool {
577 583 // Relevant Python code: revlog.size()
578 584 // note: ELLIPSIS is known to not change the content
579 585 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
580 586 }
581 587
582 588 /// The data for this entry, after resolving deltas if any.
583 589 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
584 590 let mut entry = self.clone();
585 591 let mut delta_chain = vec![];
586 592
587 593 // The meaning of `base_rev_or_base_of_delta_chain` depends on
588 594 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
589 595 // `mercurial/revlogutils/constants.py` and the code in
590 596 // [_chaininfo] and in [index_deltachain].
591 597 let uses_generaldelta = self.revlog.index.uses_generaldelta();
592 598 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
593 599 entry = if uses_generaldelta {
594 600 delta_chain.push(entry);
595 601 self.revlog.get_entry_for_checked_rev(base_rev)?
596 602 } else {
597 603 let base_rev = UncheckedRevision(entry.rev - 1);
598 604 delta_chain.push(entry);
599 605 self.revlog.get_entry(base_rev)?
600 606 };
601 607 }
602 608
603 609 let data = if delta_chain.is_empty() {
604 610 entry.data_chunk()?
605 611 } else {
606 612 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
607 613 };
608 614
609 615 Ok(data)
610 616 }
611 617
612 618 fn check_data(
613 619 &self,
614 620 data: Cow<'revlog, [u8]>,
615 621 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
616 622 if self.revlog.check_hash(
617 623 self.p1,
618 624 self.p2,
619 625 self.hash.as_bytes(),
620 626 &data,
621 627 ) {
622 628 Ok(data)
623 629 } else {
624 630 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
625 631 return Err(HgError::unsupported(
626 632 "ellipsis revisions are not supported by rhg",
627 633 )
628 634 .into());
629 635 }
630 636 Err(corrupted(format!(
631 637 "hash check failed for revision {}",
632 638 self.rev
633 639 ))
634 640 .into())
635 641 }
636 642 }
637 643
638 644 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
639 645 let data = self.rawdata()?;
640 646 if self.rev == NULL_REVISION {
641 647 return Ok(data);
642 648 }
643 649 if self.is_censored() {
644 650 return Err(HgError::CensoredNodeError.into());
645 651 }
646 652 self.check_data(data)
647 653 }
648 654
649 655 /// Extract the data contained in the entry.
650 656 /// This may be a delta. (See `is_delta`.)
651 657 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
652 658 if self.bytes.is_empty() {
653 659 return Ok(Cow::Borrowed(&[]));
654 660 }
655 661 match self.bytes[0] {
656 662 // Revision data is the entirety of the entry, including this
657 663 // header.
658 664 b'\0' => Ok(Cow::Borrowed(self.bytes)),
659 665 // Raw revision data follows.
660 666 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
661 667 // zlib (RFC 1950) data.
662 668 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
663 669 // zstd data.
664 670 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
665 671 // A proper new format should have had a repo/store requirement.
666 672 format_type => Err(corrupted(format!(
667 673 "unknown compression header '{}'",
668 674 format_type
669 675 ))),
670 676 }
671 677 }
672 678
673 679 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
674 680 let mut decoder = ZlibDecoder::new(self.bytes);
675 681 if self.is_delta() {
676 682 let mut buf = Vec::with_capacity(self.compressed_len as usize);
677 683 decoder
678 684 .read_to_end(&mut buf)
679 685 .map_err(|e| corrupted(e.to_string()))?;
680 686 Ok(buf)
681 687 } else {
682 688 let cap = self.uncompressed_len.max(0) as usize;
683 689 let mut buf = vec![0; cap];
684 690 decoder
685 691 .read_exact(&mut buf)
686 692 .map_err(|e| corrupted(e.to_string()))?;
687 693 Ok(buf)
688 694 }
689 695 }
690 696
691 697 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
692 698 let cap = self.uncompressed_len.max(0) as usize;
693 699 if self.is_delta() {
694 700 // [cap] is usually an over-estimate of the space needed because
695 701 // it's the length of delta-decoded data, but we're interested
696 702 // in the size of the delta.
697 703 // This means we have to [shrink_to_fit] to avoid holding on
698 704 // to a large chunk of memory, but it also means we must have a
699 705 // fallback branch, for the case when the delta is longer than
700 706 // the original data (surprisingly, this does happen in practice)
701 707 let mut buf = Vec::with_capacity(cap);
702 708 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
703 709 Ok(_) => buf.shrink_to_fit(),
704 710 Err(_) => {
705 711 buf.clear();
706 712 zstd::stream::copy_decode(self.bytes, &mut buf)
707 713 .map_err(|e| corrupted(e.to_string()))?;
708 714 }
709 715 };
710 716 Ok(buf)
711 717 } else {
712 718 let mut buf = Vec::with_capacity(cap);
713 719 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
714 720 .map_err(|e| corrupted(e.to_string()))?;
715 721 if len != self.uncompressed_len as usize {
716 722 Err(corrupted("uncompressed length does not match"))
717 723 } else {
718 724 Ok(buf)
719 725 }
720 726 }
721 727 }
722 728
723 729 /// Tell if the entry is a snapshot or a delta
724 730 /// (influences on decompression).
725 731 fn is_delta(&self) -> bool {
726 732 self.base_rev_or_base_of_delta_chain.is_some()
727 733 }
728 734 }
729 735
730 736 /// Calculate the hash of a revision given its data and its parents.
731 737 fn hash(
732 738 data: &[u8],
733 739 p1_hash: &[u8],
734 740 p2_hash: &[u8],
735 741 ) -> [u8; NODE_BYTES_LENGTH] {
736 742 let mut hasher = Sha1::new();
737 743 let (a, b) = (p1_hash, p2_hash);
738 744 if a > b {
739 745 hasher.update(b);
740 746 hasher.update(a);
741 747 } else {
742 748 hasher.update(a);
743 749 hasher.update(b);
744 750 }
745 751 hasher.update(data);
746 752 *hasher.finalize().as_ref()
747 753 }
748 754
749 755 #[cfg(test)]
750 756 mod tests {
751 757 use super::*;
752 758 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
753 759 use itertools::Itertools;
754 760
755 761 #[test]
756 762 fn test_empty() {
757 763 let temp = tempfile::tempdir().unwrap();
758 764 let vfs = Vfs { base: temp.path() };
759 765 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
760 766 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
761 767 assert!(revlog.is_empty());
762 768 assert_eq!(revlog.len(), 0);
763 769 assert!(revlog.get_entry(0.into()).is_err());
764 770 assert!(!revlog.has_rev(0.into()));
765 771 assert_eq!(
766 772 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
767 773 NULL_REVISION
768 774 );
769 775 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
770 776 assert_eq!(null_entry.revision(), NULL_REVISION);
771 777 assert!(null_entry.data().unwrap().is_empty());
772 778 }
773 779
774 780 #[test]
775 781 fn test_inline() {
776 782 let temp = tempfile::tempdir().unwrap();
777 783 let vfs = Vfs { base: temp.path() };
778 784 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
779 785 .unwrap();
780 786 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
781 787 .unwrap();
782 788 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
783 789 .unwrap();
784 790 let entry0_bytes = IndexEntryBuilder::new()
785 791 .is_first(true)
786 792 .with_version(1)
787 793 .with_inline(true)
788 794 .with_offset(INDEX_ENTRY_SIZE)
789 795 .with_node(node0)
790 796 .build();
791 797 let entry1_bytes = IndexEntryBuilder::new()
792 798 .with_offset(INDEX_ENTRY_SIZE)
793 799 .with_node(node1)
794 800 .build();
795 801 let entry2_bytes = IndexEntryBuilder::new()
796 802 .with_offset(INDEX_ENTRY_SIZE)
797 803 .with_p1(0)
798 804 .with_p2(1)
799 805 .with_node(node2)
800 806 .build();
801 807 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
802 808 .into_iter()
803 809 .flatten()
804 810 .collect_vec();
805 811 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
806 812 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
807 813
808 814 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
809 815 assert_eq!(entry0.revision(), 0);
810 816 assert_eq!(*entry0.node(), node0);
811 817 assert!(!entry0.has_p1());
812 818 assert_eq!(entry0.p1(), None);
813 819 assert_eq!(entry0.p2(), None);
814 820 let p1_entry = entry0.p1_entry().unwrap();
815 821 assert!(p1_entry.is_none());
816 822 let p2_entry = entry0.p2_entry().unwrap();
817 823 assert!(p2_entry.is_none());
818 824
819 825 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
820 826 assert_eq!(entry1.revision(), 1);
821 827 assert_eq!(*entry1.node(), node1);
822 828 assert!(!entry1.has_p1());
823 829 assert_eq!(entry1.p1(), None);
824 830 assert_eq!(entry1.p2(), None);
825 831 let p1_entry = entry1.p1_entry().unwrap();
826 832 assert!(p1_entry.is_none());
827 833 let p2_entry = entry1.p2_entry().unwrap();
828 834 assert!(p2_entry.is_none());
829 835
830 836 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
831 837 assert_eq!(entry2.revision(), 2);
832 838 assert_eq!(*entry2.node(), node2);
833 839 assert!(entry2.has_p1());
834 840 assert_eq!(entry2.p1(), Some(0));
835 841 assert_eq!(entry2.p2(), Some(1));
836 842 let p1_entry = entry2.p1_entry().unwrap();
837 843 assert!(p1_entry.is_some());
838 844 assert_eq!(p1_entry.unwrap().revision(), 0);
839 845 let p2_entry = entry2.p2_entry().unwrap();
840 846 assert!(p2_entry.is_some());
841 847 assert_eq!(p2_entry.unwrap().revision(), 1);
842 848 }
843 849
844 850 #[test]
845 851 fn test_nodemap() {
846 852 let temp = tempfile::tempdir().unwrap();
847 853 let vfs = Vfs { base: temp.path() };
848 854
849 855 // building a revlog with a forced Node starting with zeros
850 856 // This is a corruption, but it does not preclude using the nodemap
851 857 // if we don't try and access the data
852 858 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
853 859 .unwrap();
854 860 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
855 861 .unwrap();
856 862 let entry0_bytes = IndexEntryBuilder::new()
857 863 .is_first(true)
858 864 .with_version(1)
859 865 .with_inline(true)
860 866 .with_offset(INDEX_ENTRY_SIZE)
861 867 .with_node(node0)
862 868 .build();
863 869 let entry1_bytes = IndexEntryBuilder::new()
864 870 .with_offset(INDEX_ENTRY_SIZE)
865 871 .with_node(node1)
866 872 .build();
867 873 let contents = vec![entry0_bytes, entry1_bytes]
868 874 .into_iter()
869 875 .flatten()
870 876 .collect_vec();
871 877 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
872 878 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
873 879
874 880 // accessing the data shows the corruption
875 881 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
876 882
877 883 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
878 884 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
879 885 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
880 886 assert_eq!(
881 887 revlog
882 888 .rev_from_node(NodePrefix::from_hex("000").unwrap())
883 889 .unwrap(),
884 890 -1
885 891 );
886 892 assert_eq!(
887 893 revlog
888 894 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
889 895 .unwrap(),
890 896 1
891 897 );
892 898 // RevlogError does not implement PartialEq
893 899 // (ultimately because io::Error does not)
894 900 match revlog
895 901 .rev_from_node(NodePrefix::from_hex("00").unwrap())
896 902 .expect_err("Expected to give AmbiguousPrefix error")
897 903 {
898 904 RevlogError::AmbiguousPrefix => (),
899 905 e => {
900 906 panic!("Got another error than AmbiguousPrefix: {:?}", e);
901 907 }
902 908 };
903 909 }
904 910 }
General Comments 0
You need to be logged in to leave comments. Login now