##// END OF EJS Templates
rust: implement the `Graph` trait for all revlogs...
Raphaël Gomès -
r51871:27e773aa default
parent child Browse files
Show More
@@ -1,353 +1,359 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::Revision;
2 use crate::revlog::Revision;
3 use crate::revlog::{Node, NodePrefix};
3 use crate::revlog::{Node, NodePrefix};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 use crate::utils::hg_path::HgPath;
5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use crate::UncheckedRevision;
7 use crate::{Graph, GraphError, UncheckedRevision};
8 use itertools::Itertools;
8 use itertools::Itertools;
9 use std::ascii::escape_default;
9 use std::ascii::escape_default;
10 use std::borrow::Cow;
10 use std::borrow::Cow;
11 use std::fmt::{Debug, Formatter};
11 use std::fmt::{Debug, Formatter};
12
12
13 /// A specialized `Revlog` to work with changelog data format.
13 /// A specialized `Revlog` to work with changelog data format.
14 pub struct Changelog {
14 pub struct Changelog {
15 /// The generic `revlog` format.
15 /// The generic `revlog` format.
16 pub(crate) revlog: Revlog,
16 pub(crate) revlog: Revlog,
17 }
17 }
18
18
19 impl Changelog {
19 impl Changelog {
20 /// Open the `changelog` of a repository given by its root.
20 /// Open the `changelog` of a repository given by its root.
21 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
22 let revlog =
22 let revlog =
23 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
24 Ok(Self { revlog })
24 Ok(Self { revlog })
25 }
25 }
26
26
27 /// Return the `ChangelogRevisionData` for the given node ID.
27 /// Return the `ChangelogRevisionData` for the given node ID.
28 pub fn data_for_node(
28 pub fn data_for_node(
29 &self,
29 &self,
30 node: NodePrefix,
30 node: NodePrefix,
31 ) -> Result<ChangelogRevisionData, RevlogError> {
31 ) -> Result<ChangelogRevisionData, RevlogError> {
32 let rev = self.revlog.rev_from_node(node)?;
32 let rev = self.revlog.rev_from_node(node)?;
33 self.entry_for_checked_rev(rev)?.data()
33 self.entry_for_checked_rev(rev)?.data()
34 }
34 }
35
35
36 /// Return the [`ChangelogEntry`] for the given revision number.
36 /// Return the [`ChangelogEntry`] for the given revision number.
37 pub fn entry_for_rev(
37 pub fn entry_for_rev(
38 &self,
38 &self,
39 rev: UncheckedRevision,
39 rev: UncheckedRevision,
40 ) -> Result<ChangelogEntry, RevlogError> {
40 ) -> Result<ChangelogEntry, RevlogError> {
41 let revlog_entry = self.revlog.get_entry(rev)?;
41 let revlog_entry = self.revlog.get_entry(rev)?;
42 Ok(ChangelogEntry { revlog_entry })
42 Ok(ChangelogEntry { revlog_entry })
43 }
43 }
44
44
45 /// Same as [`Self::entry_for_rev`] for checked revisions.
45 /// Same as [`Self::entry_for_rev`] for checked revisions.
46 fn entry_for_checked_rev(
46 fn entry_for_checked_rev(
47 &self,
47 &self,
48 rev: Revision,
48 rev: Revision,
49 ) -> Result<ChangelogEntry, RevlogError> {
49 ) -> Result<ChangelogEntry, RevlogError> {
50 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
50 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
51 Ok(ChangelogEntry { revlog_entry })
51 Ok(ChangelogEntry { revlog_entry })
52 }
52 }
53
53
54 /// Return the [`ChangelogRevisionData`] for the given revision number.
54 /// Return the [`ChangelogRevisionData`] for the given revision number.
55 ///
55 ///
56 /// This is a useful shortcut in case the caller does not need the
56 /// This is a useful shortcut in case the caller does not need the
57 /// generic revlog information (parents, hashes etc). Otherwise
57 /// generic revlog information (parents, hashes etc). Otherwise
58 /// consider taking a [`ChangelogEntry`] with
58 /// consider taking a [`ChangelogEntry`] with
59 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
59 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
60 pub fn data_for_rev(
60 pub fn data_for_rev(
61 &self,
61 &self,
62 rev: UncheckedRevision,
62 rev: UncheckedRevision,
63 ) -> Result<ChangelogRevisionData, RevlogError> {
63 ) -> Result<ChangelogRevisionData, RevlogError> {
64 self.entry_for_rev(rev)?.data()
64 self.entry_for_rev(rev)?.data()
65 }
65 }
66
66
67 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
67 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
68 self.revlog.node_from_rev(rev)
68 self.revlog.node_from_rev(rev)
69 }
69 }
70
70
71 pub fn rev_from_node(
71 pub fn rev_from_node(
72 &self,
72 &self,
73 node: NodePrefix,
73 node: NodePrefix,
74 ) -> Result<Revision, RevlogError> {
74 ) -> Result<Revision, RevlogError> {
75 self.revlog.rev_from_node(node)
75 self.revlog.rev_from_node(node)
76 }
76 }
77 }
77 }
78
78
79 impl Graph for Changelog {
80 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
81 self.revlog.parents(rev)
82 }
83 }
84
79 /// A specialized `RevlogEntry` for `changelog` data format
85 /// A specialized `RevlogEntry` for `changelog` data format
80 ///
86 ///
81 /// This is a `RevlogEntry` with the added semantics that the associated
87 /// This is a `RevlogEntry` with the added semantics that the associated
82 /// data should meet the requirements for `changelog`, materialized by
88 /// data should meet the requirements for `changelog`, materialized by
83 /// the fact that `data()` constructs a `ChangelogRevisionData`.
89 /// the fact that `data()` constructs a `ChangelogRevisionData`.
84 /// In case that promise would be broken, the `data` method returns an error.
90 /// In case that promise would be broken, the `data` method returns an error.
85 #[derive(Clone)]
91 #[derive(Clone)]
86 pub struct ChangelogEntry<'changelog> {
92 pub struct ChangelogEntry<'changelog> {
87 /// Same data, as a generic `RevlogEntry`.
93 /// Same data, as a generic `RevlogEntry`.
88 pub(crate) revlog_entry: RevlogEntry<'changelog>,
94 pub(crate) revlog_entry: RevlogEntry<'changelog>,
89 }
95 }
90
96
91 impl<'changelog> ChangelogEntry<'changelog> {
97 impl<'changelog> ChangelogEntry<'changelog> {
92 pub fn data<'a>(
98 pub fn data<'a>(
93 &'a self,
99 &'a self,
94 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
100 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
95 let bytes = self.revlog_entry.data()?;
101 let bytes = self.revlog_entry.data()?;
96 if bytes.is_empty() {
102 if bytes.is_empty() {
97 Ok(ChangelogRevisionData::null())
103 Ok(ChangelogRevisionData::null())
98 } else {
104 } else {
99 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
105 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
100 RevlogError::Other(HgError::CorruptedRepository(format!(
106 RevlogError::Other(HgError::CorruptedRepository(format!(
101 "Invalid changelog data for revision {}: {:?}",
107 "Invalid changelog data for revision {}: {:?}",
102 self.revlog_entry.revision(),
108 self.revlog_entry.revision(),
103 err
109 err
104 )))
110 )))
105 })?)
111 })?)
106 }
112 }
107 }
113 }
108
114
109 /// Obtain a reference to the underlying `RevlogEntry`.
115 /// Obtain a reference to the underlying `RevlogEntry`.
110 ///
116 ///
111 /// This allows the caller to access the information that is common
117 /// This allows the caller to access the information that is common
112 /// to all revlog entries: revision number, node id, parent revisions etc.
118 /// to all revlog entries: revision number, node id, parent revisions etc.
113 pub fn as_revlog_entry(&self) -> &RevlogEntry {
119 pub fn as_revlog_entry(&self) -> &RevlogEntry {
114 &self.revlog_entry
120 &self.revlog_entry
115 }
121 }
116
122
117 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
123 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
118 Ok(self
124 Ok(self
119 .revlog_entry
125 .revlog_entry
120 .p1_entry()?
126 .p1_entry()?
121 .map(|revlog_entry| Self { revlog_entry }))
127 .map(|revlog_entry| Self { revlog_entry }))
122 }
128 }
123
129
124 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
130 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
125 Ok(self
131 Ok(self
126 .revlog_entry
132 .revlog_entry
127 .p2_entry()?
133 .p2_entry()?
128 .map(|revlog_entry| Self { revlog_entry }))
134 .map(|revlog_entry| Self { revlog_entry }))
129 }
135 }
130 }
136 }
131
137
132 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
138 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
133 #[derive(PartialEq)]
139 #[derive(PartialEq)]
134 pub struct ChangelogRevisionData<'changelog> {
140 pub struct ChangelogRevisionData<'changelog> {
135 /// The data bytes of the `changelog` entry.
141 /// The data bytes of the `changelog` entry.
136 bytes: Cow<'changelog, [u8]>,
142 bytes: Cow<'changelog, [u8]>,
137 /// The end offset for the hex manifest (not including the newline)
143 /// The end offset for the hex manifest (not including the newline)
138 manifest_end: usize,
144 manifest_end: usize,
139 /// The end offset for the user+email (not including the newline)
145 /// The end offset for the user+email (not including the newline)
140 user_end: usize,
146 user_end: usize,
141 /// The end offset for the timestamp+timezone+extras (not including the
147 /// The end offset for the timestamp+timezone+extras (not including the
142 /// newline)
148 /// newline)
143 timestamp_end: usize,
149 timestamp_end: usize,
144 /// The end offset for the file list (not including the newline)
150 /// The end offset for the file list (not including the newline)
145 files_end: usize,
151 files_end: usize,
146 }
152 }
147
153
148 impl<'changelog> ChangelogRevisionData<'changelog> {
154 impl<'changelog> ChangelogRevisionData<'changelog> {
149 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
155 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
150 let mut line_iter = bytes.split(|b| b == &b'\n');
156 let mut line_iter = bytes.split(|b| b == &b'\n');
151 let manifest_end = line_iter
157 let manifest_end = line_iter
152 .next()
158 .next()
153 .expect("Empty iterator from split()?")
159 .expect("Empty iterator from split()?")
154 .len();
160 .len();
155 let user_slice = line_iter.next().ok_or_else(|| {
161 let user_slice = line_iter.next().ok_or_else(|| {
156 HgError::corrupted("Changeset data truncated after manifest line")
162 HgError::corrupted("Changeset data truncated after manifest line")
157 })?;
163 })?;
158 let user_end = manifest_end + 1 + user_slice.len();
164 let user_end = manifest_end + 1 + user_slice.len();
159 let timestamp_slice = line_iter.next().ok_or_else(|| {
165 let timestamp_slice = line_iter.next().ok_or_else(|| {
160 HgError::corrupted("Changeset data truncated after user line")
166 HgError::corrupted("Changeset data truncated after user line")
161 })?;
167 })?;
162 let timestamp_end = user_end + 1 + timestamp_slice.len();
168 let timestamp_end = user_end + 1 + timestamp_slice.len();
163 let mut files_end = timestamp_end + 1;
169 let mut files_end = timestamp_end + 1;
164 loop {
170 loop {
165 let line = line_iter.next().ok_or_else(|| {
171 let line = line_iter.next().ok_or_else(|| {
166 HgError::corrupted("Changeset data truncated in files list")
172 HgError::corrupted("Changeset data truncated in files list")
167 })?;
173 })?;
168 if line.is_empty() {
174 if line.is_empty() {
169 if files_end == bytes.len() {
175 if files_end == bytes.len() {
170 // The list of files ended with a single newline (there
176 // The list of files ended with a single newline (there
171 // should be two)
177 // should be two)
172 return Err(HgError::corrupted(
178 return Err(HgError::corrupted(
173 "Changeset data truncated after files list",
179 "Changeset data truncated after files list",
174 ));
180 ));
175 }
181 }
176 files_end -= 1;
182 files_end -= 1;
177 break;
183 break;
178 }
184 }
179 files_end += line.len() + 1;
185 files_end += line.len() + 1;
180 }
186 }
181
187
182 Ok(Self {
188 Ok(Self {
183 bytes,
189 bytes,
184 manifest_end,
190 manifest_end,
185 user_end,
191 user_end,
186 timestamp_end,
192 timestamp_end,
187 files_end,
193 files_end,
188 })
194 })
189 }
195 }
190
196
191 fn null() -> Self {
197 fn null() -> Self {
192 Self::new(Cow::Borrowed(
198 Self::new(Cow::Borrowed(
193 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
199 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
194 ))
200 ))
195 .unwrap()
201 .unwrap()
196 }
202 }
197
203
198 /// Return an iterator over the lines of the entry.
204 /// Return an iterator over the lines of the entry.
199 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
205 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
200 self.bytes.split(|b| b == &b'\n')
206 self.bytes.split(|b| b == &b'\n')
201 }
207 }
202
208
203 /// Return the node id of the `manifest` referenced by this `changelog`
209 /// Return the node id of the `manifest` referenced by this `changelog`
204 /// entry.
210 /// entry.
205 pub fn manifest_node(&self) -> Result<Node, HgError> {
211 pub fn manifest_node(&self) -> Result<Node, HgError> {
206 let manifest_node_hex = &self.bytes[..self.manifest_end];
212 let manifest_node_hex = &self.bytes[..self.manifest_end];
207 Node::from_hex_for_repo(manifest_node_hex)
213 Node::from_hex_for_repo(manifest_node_hex)
208 }
214 }
209
215
210 /// The full user string (usually a name followed by an email enclosed in
216 /// The full user string (usually a name followed by an email enclosed in
211 /// angle brackets)
217 /// angle brackets)
212 pub fn user(&self) -> &[u8] {
218 pub fn user(&self) -> &[u8] {
213 &self.bytes[self.manifest_end + 1..self.user_end]
219 &self.bytes[self.manifest_end + 1..self.user_end]
214 }
220 }
215
221
216 /// The full timestamp line (timestamp in seconds, offset in seconds, and
222 /// The full timestamp line (timestamp in seconds, offset in seconds, and
217 /// possibly extras)
223 /// possibly extras)
218 // TODO: We should expose this in a more useful way
224 // TODO: We should expose this in a more useful way
219 pub fn timestamp_line(&self) -> &[u8] {
225 pub fn timestamp_line(&self) -> &[u8] {
220 &self.bytes[self.user_end + 1..self.timestamp_end]
226 &self.bytes[self.user_end + 1..self.timestamp_end]
221 }
227 }
222
228
223 /// The files changed in this revision.
229 /// The files changed in this revision.
224 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
230 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
225 self.bytes[self.timestamp_end + 1..self.files_end]
231 self.bytes[self.timestamp_end + 1..self.files_end]
226 .split(|b| b == &b'\n')
232 .split(|b| b == &b'\n')
227 .map(HgPath::new)
233 .map(HgPath::new)
228 }
234 }
229
235
230 /// The change description.
236 /// The change description.
231 pub fn description(&self) -> &[u8] {
237 pub fn description(&self) -> &[u8] {
232 &self.bytes[self.files_end + 2..]
238 &self.bytes[self.files_end + 2..]
233 }
239 }
234 }
240 }
235
241
236 impl Debug for ChangelogRevisionData<'_> {
242 impl Debug for ChangelogRevisionData<'_> {
237 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
243 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
238 f.debug_struct("ChangelogRevisionData")
244 f.debug_struct("ChangelogRevisionData")
239 .field("bytes", &debug_bytes(&self.bytes))
245 .field("bytes", &debug_bytes(&self.bytes))
240 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
246 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
241 .field(
247 .field(
242 "user",
248 "user",
243 &debug_bytes(
249 &debug_bytes(
244 &self.bytes[self.manifest_end + 1..self.user_end],
250 &self.bytes[self.manifest_end + 1..self.user_end],
245 ),
251 ),
246 )
252 )
247 .field(
253 .field(
248 "timestamp",
254 "timestamp",
249 &debug_bytes(
255 &debug_bytes(
250 &self.bytes[self.user_end + 1..self.timestamp_end],
256 &self.bytes[self.user_end + 1..self.timestamp_end],
251 ),
257 ),
252 )
258 )
253 .field(
259 .field(
254 "files",
260 "files",
255 &debug_bytes(
261 &debug_bytes(
256 &self.bytes[self.timestamp_end + 1..self.files_end],
262 &self.bytes[self.timestamp_end + 1..self.files_end],
257 ),
263 ),
258 )
264 )
259 .field(
265 .field(
260 "description",
266 "description",
261 &debug_bytes(&self.bytes[self.files_end + 2..]),
267 &debug_bytes(&self.bytes[self.files_end + 2..]),
262 )
268 )
263 .finish()
269 .finish()
264 }
270 }
265 }
271 }
266
272
267 fn debug_bytes(bytes: &[u8]) -> String {
273 fn debug_bytes(bytes: &[u8]) -> String {
268 String::from_utf8_lossy(
274 String::from_utf8_lossy(
269 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
275 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
270 )
276 )
271 .to_string()
277 .to_string()
272 }
278 }
273
279
274 #[cfg(test)]
280 #[cfg(test)]
275 mod tests {
281 mod tests {
276 use super::*;
282 use super::*;
277 use crate::vfs::Vfs;
283 use crate::vfs::Vfs;
278 use crate::NULL_REVISION;
284 use crate::NULL_REVISION;
279 use pretty_assertions::assert_eq;
285 use pretty_assertions::assert_eq;
280
286
281 #[test]
287 #[test]
282 fn test_create_changelogrevisiondata_invalid() {
288 fn test_create_changelogrevisiondata_invalid() {
283 // Completely empty
289 // Completely empty
284 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
290 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
285 // No newline after manifest
291 // No newline after manifest
286 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
292 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
287 // No newline after user
293 // No newline after user
288 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
294 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
289 // No newline after timestamp
295 // No newline after timestamp
290 assert!(
296 assert!(
291 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
297 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
292 );
298 );
293 // Missing newline after files
299 // Missing newline after files
294 assert!(ChangelogRevisionData::new(Cow::Borrowed(
300 assert!(ChangelogRevisionData::new(Cow::Borrowed(
295 b"abcd\n\n0 0\nfile1\nfile2"
301 b"abcd\n\n0 0\nfile1\nfile2"
296 ))
302 ))
297 .is_err(),);
303 .is_err(),);
298 // Only one newline after files
304 // Only one newline after files
299 assert!(ChangelogRevisionData::new(Cow::Borrowed(
305 assert!(ChangelogRevisionData::new(Cow::Borrowed(
300 b"abcd\n\n0 0\nfile1\nfile2\n"
306 b"abcd\n\n0 0\nfile1\nfile2\n"
301 ))
307 ))
302 .is_err(),);
308 .is_err(),);
303 }
309 }
304
310
305 #[test]
311 #[test]
306 fn test_create_changelogrevisiondata() {
312 fn test_create_changelogrevisiondata() {
307 let data = ChangelogRevisionData::new(Cow::Borrowed(
313 let data = ChangelogRevisionData::new(Cow::Borrowed(
308 b"0123456789abcdef0123456789abcdef01234567
314 b"0123456789abcdef0123456789abcdef01234567
309 Some One <someone@example.com>
315 Some One <someone@example.com>
310 0 0
316 0 0
311 file1
317 file1
312 file2
318 file2
313
319
314 some
320 some
315 commit
321 commit
316 message",
322 message",
317 ))
323 ))
318 .unwrap();
324 .unwrap();
319 assert_eq!(
325 assert_eq!(
320 data.manifest_node().unwrap(),
326 data.manifest_node().unwrap(),
321 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
327 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
322 .unwrap()
328 .unwrap()
323 );
329 );
324 assert_eq!(data.user(), b"Some One <someone@example.com>");
330 assert_eq!(data.user(), b"Some One <someone@example.com>");
325 assert_eq!(data.timestamp_line(), b"0 0");
331 assert_eq!(data.timestamp_line(), b"0 0");
326 assert_eq!(
332 assert_eq!(
327 data.files().collect_vec(),
333 data.files().collect_vec(),
328 vec![HgPath::new("file1"), HgPath::new("file2")]
334 vec![HgPath::new("file1"), HgPath::new("file2")]
329 );
335 );
330 assert_eq!(data.description(), b"some\ncommit\nmessage");
336 assert_eq!(data.description(), b"some\ncommit\nmessage");
331 }
337 }
332
338
333 #[test]
339 #[test]
334 fn test_data_from_rev_null() -> Result<(), RevlogError> {
340 fn test_data_from_rev_null() -> Result<(), RevlogError> {
335 // an empty revlog will be enough for this case
341 // an empty revlog will be enough for this case
336 let temp = tempfile::tempdir().unwrap();
342 let temp = tempfile::tempdir().unwrap();
337 let vfs = Vfs { base: temp.path() };
343 let vfs = Vfs { base: temp.path() };
338 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
344 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
339 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
345 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
340
346
341 let changelog = Changelog { revlog };
347 let changelog = Changelog { revlog };
342 assert_eq!(
348 assert_eq!(
343 changelog.data_for_rev(NULL_REVISION.into())?,
349 changelog.data_for_rev(NULL_REVISION.into())?,
344 ChangelogRevisionData::null()
350 ChangelogRevisionData::null()
345 );
351 );
346 // same with the intermediate entry object
352 // same with the intermediate entry object
347 assert_eq!(
353 assert_eq!(
348 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
354 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
349 ChangelogRevisionData::null()
355 ChangelogRevisionData::null()
350 );
356 );
351 Ok(())
357 Ok(())
352 }
358 }
353 }
359 }
@@ -1,231 +1,239 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::exit_codes;
2 use crate::exit_codes;
3 use crate::repo::Repo;
3 use crate::repo::Repo;
4 use crate::revlog::path_encode::path_encode;
4 use crate::revlog::path_encode::path_encode;
5 use crate::revlog::NodePrefix;
5 use crate::revlog::NodePrefix;
6 use crate::revlog::Revision;
6 use crate::revlog::Revision;
7 use crate::revlog::RevlogEntry;
7 use crate::revlog::RevlogEntry;
8 use crate::revlog::{Revlog, RevlogError};
8 use crate::revlog::{Revlog, RevlogError};
9 use crate::utils::files::get_path_from_bytes;
9 use crate::utils::files::get_path_from_bytes;
10 use crate::utils::hg_path::HgPath;
10 use crate::utils::hg_path::HgPath;
11 use crate::utils::SliceExt;
11 use crate::utils::SliceExt;
12 use crate::Graph;
13 use crate::GraphError;
12 use crate::UncheckedRevision;
14 use crate::UncheckedRevision;
13 use std::path::PathBuf;
15 use std::path::PathBuf;
14
16
15 /// A specialized `Revlog` to work with file data logs.
17 /// A specialized `Revlog` to work with file data logs.
16 pub struct Filelog {
18 pub struct Filelog {
17 /// The generic `revlog` format.
19 /// The generic `revlog` format.
18 revlog: Revlog,
20 revlog: Revlog,
19 }
21 }
20
22
23 impl Graph for Filelog {
24 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
25 self.revlog.parents(rev)
26 }
27 }
28
21 impl Filelog {
29 impl Filelog {
22 pub fn open_vfs(
30 pub fn open_vfs(
23 store_vfs: &crate::vfs::Vfs<'_>,
31 store_vfs: &crate::vfs::Vfs<'_>,
24 file_path: &HgPath,
32 file_path: &HgPath,
25 ) -> Result<Self, HgError> {
33 ) -> Result<Self, HgError> {
26 let index_path = store_path(file_path, b".i");
34 let index_path = store_path(file_path, b".i");
27 let data_path = store_path(file_path, b".d");
35 let data_path = store_path(file_path, b".d");
28 let revlog =
36 let revlog =
29 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
37 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
30 Ok(Self { revlog })
38 Ok(Self { revlog })
31 }
39 }
32
40
33 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
41 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
34 Self::open_vfs(&repo.store_vfs(), file_path)
42 Self::open_vfs(&repo.store_vfs(), file_path)
35 }
43 }
36
44
37 /// The given node ID is that of the file as found in a filelog, not of a
45 /// The given node ID is that of the file as found in a filelog, not of a
38 /// changeset.
46 /// changeset.
39 pub fn data_for_node(
47 pub fn data_for_node(
40 &self,
48 &self,
41 file_node: impl Into<NodePrefix>,
49 file_node: impl Into<NodePrefix>,
42 ) -> Result<FilelogRevisionData, RevlogError> {
50 ) -> Result<FilelogRevisionData, RevlogError> {
43 let file_rev = self.revlog.rev_from_node(file_node.into())?;
51 let file_rev = self.revlog.rev_from_node(file_node.into())?;
44 self.data_for_rev(file_rev.into())
52 self.data_for_rev(file_rev.into())
45 }
53 }
46
54
47 /// The given revision is that of the file as found in a filelog, not of a
55 /// The given revision is that of the file as found in a filelog, not of a
48 /// changeset.
56 /// changeset.
49 pub fn data_for_rev(
57 pub fn data_for_rev(
50 &self,
58 &self,
51 file_rev: UncheckedRevision,
59 file_rev: UncheckedRevision,
52 ) -> Result<FilelogRevisionData, RevlogError> {
60 ) -> Result<FilelogRevisionData, RevlogError> {
53 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
61 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
54 Ok(FilelogRevisionData(data))
62 Ok(FilelogRevisionData(data))
55 }
63 }
56
64
57 /// The given node ID is that of the file as found in a filelog, not of a
65 /// The given node ID is that of the file as found in a filelog, not of a
58 /// changeset.
66 /// changeset.
59 pub fn entry_for_node(
67 pub fn entry_for_node(
60 &self,
68 &self,
61 file_node: impl Into<NodePrefix>,
69 file_node: impl Into<NodePrefix>,
62 ) -> Result<FilelogEntry, RevlogError> {
70 ) -> Result<FilelogEntry, RevlogError> {
63 let file_rev = self.revlog.rev_from_node(file_node.into())?;
71 let file_rev = self.revlog.rev_from_node(file_node.into())?;
64 self.entry_for_checked_rev(file_rev)
72 self.entry_for_checked_rev(file_rev)
65 }
73 }
66
74
67 /// The given revision is that of the file as found in a filelog, not of a
75 /// The given revision is that of the file as found in a filelog, not of a
68 /// changeset.
76 /// changeset.
69 pub fn entry_for_rev(
77 pub fn entry_for_rev(
70 &self,
78 &self,
71 file_rev: UncheckedRevision,
79 file_rev: UncheckedRevision,
72 ) -> Result<FilelogEntry, RevlogError> {
80 ) -> Result<FilelogEntry, RevlogError> {
73 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
81 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
74 }
82 }
75
83
76 fn entry_for_checked_rev(
84 fn entry_for_checked_rev(
77 &self,
85 &self,
78 file_rev: Revision,
86 file_rev: Revision,
79 ) -> Result<FilelogEntry, RevlogError> {
87 ) -> Result<FilelogEntry, RevlogError> {
80 Ok(FilelogEntry(
88 Ok(FilelogEntry(
81 self.revlog.get_entry_for_checked_rev(file_rev)?,
89 self.revlog.get_entry_for_checked_rev(file_rev)?,
82 ))
90 ))
83 }
91 }
84 }
92 }
85
93
86 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
94 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
87 let encoded_bytes =
95 let encoded_bytes =
88 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
96 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
89 get_path_from_bytes(&encoded_bytes).into()
97 get_path_from_bytes(&encoded_bytes).into()
90 }
98 }
91
99
92 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
100 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
93
101
94 impl FilelogEntry<'_> {
102 impl FilelogEntry<'_> {
95 /// `self.data()` can be expensive, with decompression and delta
103 /// `self.data()` can be expensive, with decompression and delta
96 /// resolution.
104 /// resolution.
97 ///
105 ///
98 /// *Without* paying this cost, based on revlog index information
106 /// *Without* paying this cost, based on revlog index information
99 /// including `RevlogEntry::uncompressed_len`:
107 /// including `RevlogEntry::uncompressed_len`:
100 ///
108 ///
101 /// * Returns `true` if the length that `self.data().file_data().len()`
109 /// * Returns `true` if the length that `self.data().file_data().len()`
102 /// would return is definitely **not equal** to `other_len`.
110 /// would return is definitely **not equal** to `other_len`.
103 /// * Returns `false` if available information is inconclusive.
111 /// * Returns `false` if available information is inconclusive.
104 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
112 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
105 // Relevant code that implement this behavior in Python code:
113 // Relevant code that implement this behavior in Python code:
106 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
114 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
107 // revlog.size, revlog.rawsize
115 // revlog.size, revlog.rawsize
108
116
109 // Let’s call `file_data_len` what would be returned by
117 // Let’s call `file_data_len` what would be returned by
110 // `self.data().file_data().len()`.
118 // `self.data().file_data().len()`.
111
119
112 if self.0.is_censored() {
120 if self.0.is_censored() {
113 let file_data_len = 0;
121 let file_data_len = 0;
114 return other_len != file_data_len;
122 return other_len != file_data_len;
115 }
123 }
116
124
117 if self.0.has_length_affecting_flag_processor() {
125 if self.0.has_length_affecting_flag_processor() {
118 // We can’t conclude anything about `file_data_len`.
126 // We can’t conclude anything about `file_data_len`.
119 return false;
127 return false;
120 }
128 }
121
129
122 // Revlog revisions (usually) have metadata for the size of
130 // Revlog revisions (usually) have metadata for the size of
123 // their data after decompression and delta resolution
131 // their data after decompression and delta resolution
124 // as would be returned by `Revlog::get_rev_data`.
132 // as would be returned by `Revlog::get_rev_data`.
125 //
133 //
126 // For filelogs this is the file’s contents preceded by an optional
134 // For filelogs this is the file’s contents preceded by an optional
127 // metadata block.
135 // metadata block.
128 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
136 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
129 l as u64
137 l as u64
130 } else {
138 } else {
131 // The field was set to -1, the actual uncompressed len is unknown.
139 // The field was set to -1, the actual uncompressed len is unknown.
132 // We need to decompress to say more.
140 // We need to decompress to say more.
133 return false;
141 return false;
134 };
142 };
135 // `uncompressed_len = file_data_len + optional_metadata_len`,
143 // `uncompressed_len = file_data_len + optional_metadata_len`,
136 // so `file_data_len <= uncompressed_len`.
144 // so `file_data_len <= uncompressed_len`.
137 if uncompressed_len < other_len {
145 if uncompressed_len < other_len {
138 // Transitively, `file_data_len < other_len`.
146 // Transitively, `file_data_len < other_len`.
139 // So `other_len != file_data_len` definitely.
147 // So `other_len != file_data_len` definitely.
140 return true;
148 return true;
141 }
149 }
142
150
143 if uncompressed_len == other_len + 4 {
151 if uncompressed_len == other_len + 4 {
144 // It’s possible that `file_data_len == other_len` with an empty
152 // It’s possible that `file_data_len == other_len` with an empty
145 // metadata block (2 start marker bytes + 2 end marker bytes).
153 // metadata block (2 start marker bytes + 2 end marker bytes).
146 // This happens when there wouldn’t otherwise be metadata, but
154 // This happens when there wouldn’t otherwise be metadata, but
147 // the first 2 bytes of file data happen to match a start marker
155 // the first 2 bytes of file data happen to match a start marker
148 // and would be ambiguous.
156 // and would be ambiguous.
149 return false;
157 return false;
150 }
158 }
151
159
152 if !self.0.has_p1() {
160 if !self.0.has_p1() {
153 // There may or may not be copy metadata, so we can’t deduce more
161 // There may or may not be copy metadata, so we can’t deduce more
154 // about `file_data_len` without computing file data.
162 // about `file_data_len` without computing file data.
155 return false;
163 return false;
156 }
164 }
157
165
158 // Filelog ancestry is not meaningful in the way changelog ancestry is.
166 // Filelog ancestry is not meaningful in the way changelog ancestry is.
159 // It only provides hints to delta generation.
167 // It only provides hints to delta generation.
160 // p1 and p2 are set to null when making a copy or rename since
168 // p1 and p2 are set to null when making a copy or rename since
161 // contents are likely unrelatedto what might have previously existed
169 // contents are likely unrelatedto what might have previously existed
162 // at the destination path.
170 // at the destination path.
163 //
171 //
164 // Conversely, since here p1 is non-null, there is no copy metadata.
172 // Conversely, since here p1 is non-null, there is no copy metadata.
165 // Note that this reasoning may be invalidated in the presence of
173 // Note that this reasoning may be invalidated in the presence of
166 // merges made by some previous versions of Mercurial that
174 // merges made by some previous versions of Mercurial that
167 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
175 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
168 // and `tests/test-issue6528.t`.
176 // and `tests/test-issue6528.t`.
169 //
177 //
170 // Since copy metadata is currently the only kind of metadata
178 // Since copy metadata is currently the only kind of metadata
171 // kept in revlog data of filelogs,
179 // kept in revlog data of filelogs,
172 // this `FilelogEntry` does not have such metadata:
180 // this `FilelogEntry` does not have such metadata:
173 let file_data_len = uncompressed_len;
181 let file_data_len = uncompressed_len;
174
182
175 file_data_len != other_len
183 file_data_len != other_len
176 }
184 }
177
185
178 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
186 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
179 let data = self.0.data();
187 let data = self.0.data();
180 match data {
188 match data {
181 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
189 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
182 // Errors other than `HgError` should not happen at this point
190 // Errors other than `HgError` should not happen at this point
183 Err(e) => match e {
191 Err(e) => match e {
184 RevlogError::Other(hg_error) => Err(hg_error),
192 RevlogError::Other(hg_error) => Err(hg_error),
185 revlog_error => Err(HgError::abort(
193 revlog_error => Err(HgError::abort(
186 revlog_error.to_string(),
194 revlog_error.to_string(),
187 exit_codes::ABORT,
195 exit_codes::ABORT,
188 None,
196 None,
189 )),
197 )),
190 },
198 },
191 }
199 }
192 }
200 }
193 }
201 }
194
202
195 /// The data for one revision in a filelog, uncompressed and delta-resolved.
203 /// The data for one revision in a filelog, uncompressed and delta-resolved.
196 pub struct FilelogRevisionData(Vec<u8>);
204 pub struct FilelogRevisionData(Vec<u8>);
197
205
198 impl FilelogRevisionData {
206 impl FilelogRevisionData {
199 /// Split into metadata and data
207 /// Split into metadata and data
200 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
208 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
201 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
209 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
202
210
203 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
211 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
204 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
212 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
205 Ok((Some(metadata), data))
213 Ok((Some(metadata), data))
206 } else {
214 } else {
207 Err(HgError::corrupted(
215 Err(HgError::corrupted(
208 "Missing metadata end delimiter in filelog entry",
216 "Missing metadata end delimiter in filelog entry",
209 ))
217 ))
210 }
218 }
211 } else {
219 } else {
212 Ok((None, &self.0))
220 Ok((None, &self.0))
213 }
221 }
214 }
222 }
215
223
216 /// Returns the file contents at this revision, stripped of any metadata
224 /// Returns the file contents at this revision, stripped of any metadata
217 pub fn file_data(&self) -> Result<&[u8], HgError> {
225 pub fn file_data(&self) -> Result<&[u8], HgError> {
218 let (_metadata, data) = self.split()?;
226 let (_metadata, data) = self.split()?;
219 Ok(data)
227 Ok(data)
220 }
228 }
221
229
222 /// Consume the entry, and convert it into data, discarding any metadata,
230 /// Consume the entry, and convert it into data, discarding any metadata,
223 /// if present.
231 /// if present.
224 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
232 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
225 if let (Some(_metadata), data) = self.split()? {
233 if let (Some(_metadata), data) = self.split()? {
226 Ok(data.to_owned())
234 Ok(data.to_owned())
227 } else {
235 } else {
228 Ok(self.0)
236 Ok(self.0)
229 }
237 }
230 }
238 }
231 }
239 }
@@ -1,622 +1,639 b''
1 use std::fmt::Debug;
1 use std::fmt::Debug;
2 use std::ops::Deref;
2 use std::ops::Deref;
3
3
4 use byteorder::{BigEndian, ByteOrder};
4 use byteorder::{BigEndian, ByteOrder};
5
5
6 use crate::errors::HgError;
6 use crate::errors::HgError;
7 use crate::revlog::node::Node;
7 use crate::revlog::node::Node;
8 use crate::revlog::{Revision, NULL_REVISION};
8 use crate::revlog::{Revision, NULL_REVISION};
9 use crate::UncheckedRevision;
9 use crate::{Graph, GraphError, RevlogIndex, UncheckedRevision};
10
10
11 pub const INDEX_ENTRY_SIZE: usize = 64;
11 pub const INDEX_ENTRY_SIZE: usize = 64;
12
12
13 pub struct IndexHeader {
13 pub struct IndexHeader {
14 header_bytes: [u8; 4],
14 header_bytes: [u8; 4],
15 }
15 }
16
16
17 #[derive(Copy, Clone)]
17 #[derive(Copy, Clone)]
18 pub struct IndexHeaderFlags {
18 pub struct IndexHeaderFlags {
19 flags: u16,
19 flags: u16,
20 }
20 }
21
21
22 /// Corresponds to the high bits of `_format_flags` in python
22 /// Corresponds to the high bits of `_format_flags` in python
23 impl IndexHeaderFlags {
23 impl IndexHeaderFlags {
24 /// Corresponds to FLAG_INLINE_DATA in python
24 /// Corresponds to FLAG_INLINE_DATA in python
25 pub fn is_inline(self) -> bool {
25 pub fn is_inline(self) -> bool {
26 self.flags & 1 != 0
26 self.flags & 1 != 0
27 }
27 }
28 /// Corresponds to FLAG_GENERALDELTA in python
28 /// Corresponds to FLAG_GENERALDELTA in python
29 pub fn uses_generaldelta(self) -> bool {
29 pub fn uses_generaldelta(self) -> bool {
30 self.flags & 2 != 0
30 self.flags & 2 != 0
31 }
31 }
32 }
32 }
33
33
34 /// Corresponds to the INDEX_HEADER structure,
34 /// Corresponds to the INDEX_HEADER structure,
35 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
35 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
36 impl IndexHeader {
36 impl IndexHeader {
37 fn format_flags(&self) -> IndexHeaderFlags {
37 fn format_flags(&self) -> IndexHeaderFlags {
38 // No "unknown flags" check here, unlike in python. Maybe there should
38 // No "unknown flags" check here, unlike in python. Maybe there should
39 // be.
39 // be.
40 IndexHeaderFlags {
40 IndexHeaderFlags {
41 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
41 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
42 }
42 }
43 }
43 }
44
44
45 /// The only revlog version currently supported by rhg.
45 /// The only revlog version currently supported by rhg.
46 const REVLOGV1: u16 = 1;
46 const REVLOGV1: u16 = 1;
47
47
48 /// Corresponds to `_format_version` in Python.
48 /// Corresponds to `_format_version` in Python.
49 fn format_version(&self) -> u16 {
49 fn format_version(&self) -> u16 {
50 BigEndian::read_u16(&self.header_bytes[2..4])
50 BigEndian::read_u16(&self.header_bytes[2..4])
51 }
51 }
52
52
53 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
53 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
54 // We treat an empty file as a valid index with no entries.
54 // We treat an empty file as a valid index with no entries.
55 // Here we make an arbitrary choice of what we assume the format of the
55 // Here we make an arbitrary choice of what we assume the format of the
56 // index to be (V1, using generaldelta).
56 // index to be (V1, using generaldelta).
57 // This doesn't matter too much, since we're only doing read-only
57 // This doesn't matter too much, since we're only doing read-only
58 // access. but the value corresponds to the `new_header` variable in
58 // access. but the value corresponds to the `new_header` variable in
59 // `revlog.py`, `_loadindex`
59 // `revlog.py`, `_loadindex`
60 header_bytes: [0, 3, 0, 1],
60 header_bytes: [0, 3, 0, 1],
61 };
61 };
62
62
63 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
63 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
64 if index_bytes.is_empty() {
64 if index_bytes.is_empty() {
65 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
65 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
66 }
66 }
67 if index_bytes.len() < 4 {
67 if index_bytes.len() < 4 {
68 return Err(HgError::corrupted(
68 return Err(HgError::corrupted(
69 "corrupted revlog: can't read the index format header",
69 "corrupted revlog: can't read the index format header",
70 ));
70 ));
71 }
71 }
72 Ok(IndexHeader {
72 Ok(IndexHeader {
73 header_bytes: {
73 header_bytes: {
74 let bytes: [u8; 4] =
74 let bytes: [u8; 4] =
75 index_bytes[0..4].try_into().expect("impossible");
75 index_bytes[0..4].try_into().expect("impossible");
76 bytes
76 bytes
77 },
77 },
78 })
78 })
79 }
79 }
80 }
80 }
81
81
82 /// A Revlog index
82 /// A Revlog index
83 pub struct Index {
83 pub struct Index {
84 bytes: Box<dyn Deref<Target = [u8]> + Send>,
84 bytes: Box<dyn Deref<Target = [u8]> + Send>,
85 /// Offsets of starts of index blocks.
85 /// Offsets of starts of index blocks.
86 /// Only needed when the index is interleaved with data.
86 /// Only needed when the index is interleaved with data.
87 offsets: Option<Vec<usize>>,
87 offsets: Option<Vec<usize>>,
88 uses_generaldelta: bool,
88 uses_generaldelta: bool,
89 }
89 }
90
90
91 impl Debug for Index {
91 impl Debug for Index {
92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 f.debug_struct("Index")
93 f.debug_struct("Index")
94 .field("offsets", &self.offsets)
94 .field("offsets", &self.offsets)
95 .field("uses_generaldelta", &self.uses_generaldelta)
95 .field("uses_generaldelta", &self.uses_generaldelta)
96 .finish()
96 .finish()
97 }
97 }
98 }
98 }
99
99
100 impl Graph for Index {
101 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
102 let err = || GraphError::ParentOutOfRange(rev);
103 match self.get_entry(rev) {
104 Some(entry) => {
105 // The C implementation checks that the parents are valid
106 // before returning
107 Ok([
108 self.check_revision(entry.p1()).ok_or_else(err)?,
109 self.check_revision(entry.p2()).ok_or_else(err)?,
110 ])
111 }
112 None => Ok([NULL_REVISION, NULL_REVISION]),
113 }
114 }
115 }
116
100 impl Index {
117 impl Index {
101 /// Create an index from bytes.
118 /// Create an index from bytes.
102 /// Calculate the start of each entry when is_inline is true.
119 /// Calculate the start of each entry when is_inline is true.
103 pub fn new(
120 pub fn new(
104 bytes: Box<dyn Deref<Target = [u8]> + Send>,
121 bytes: Box<dyn Deref<Target = [u8]> + Send>,
105 ) -> Result<Self, HgError> {
122 ) -> Result<Self, HgError> {
106 let header = IndexHeader::parse(bytes.as_ref())?;
123 let header = IndexHeader::parse(bytes.as_ref())?;
107
124
108 if header.format_version() != IndexHeader::REVLOGV1 {
125 if header.format_version() != IndexHeader::REVLOGV1 {
109 // A proper new version should have had a repo/store
126 // A proper new version should have had a repo/store
110 // requirement.
127 // requirement.
111 return Err(HgError::corrupted("unsupported revlog version"));
128 return Err(HgError::corrupted("unsupported revlog version"));
112 }
129 }
113
130
114 // This is only correct because we know version is REVLOGV1.
131 // This is only correct because we know version is REVLOGV1.
115 // In v2 we always use generaldelta, while in v0 we never use
132 // In v2 we always use generaldelta, while in v0 we never use
116 // generaldelta. Similar for [is_inline] (it's only used in v1).
133 // generaldelta. Similar for [is_inline] (it's only used in v1).
117 let uses_generaldelta = header.format_flags().uses_generaldelta();
134 let uses_generaldelta = header.format_flags().uses_generaldelta();
118
135
119 if header.format_flags().is_inline() {
136 if header.format_flags().is_inline() {
120 let mut offset: usize = 0;
137 let mut offset: usize = 0;
121 let mut offsets = Vec::new();
138 let mut offsets = Vec::new();
122
139
123 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
140 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
124 offsets.push(offset);
141 offsets.push(offset);
125 let end = offset + INDEX_ENTRY_SIZE;
142 let end = offset + INDEX_ENTRY_SIZE;
126 let entry = IndexEntry {
143 let entry = IndexEntry {
127 bytes: &bytes[offset..end],
144 bytes: &bytes[offset..end],
128 offset_override: None,
145 offset_override: None,
129 };
146 };
130
147
131 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
148 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
132 }
149 }
133
150
134 if offset == bytes.len() {
151 if offset == bytes.len() {
135 Ok(Self {
152 Ok(Self {
136 bytes,
153 bytes,
137 offsets: Some(offsets),
154 offsets: Some(offsets),
138 uses_generaldelta,
155 uses_generaldelta,
139 })
156 })
140 } else {
157 } else {
141 Err(HgError::corrupted("unexpected inline revlog length"))
158 Err(HgError::corrupted("unexpected inline revlog length"))
142 }
159 }
143 } else {
160 } else {
144 Ok(Self {
161 Ok(Self {
145 bytes,
162 bytes,
146 offsets: None,
163 offsets: None,
147 uses_generaldelta,
164 uses_generaldelta,
148 })
165 })
149 }
166 }
150 }
167 }
151
168
152 pub fn uses_generaldelta(&self) -> bool {
169 pub fn uses_generaldelta(&self) -> bool {
153 self.uses_generaldelta
170 self.uses_generaldelta
154 }
171 }
155
172
156 /// Value of the inline flag.
173 /// Value of the inline flag.
157 pub fn is_inline(&self) -> bool {
174 pub fn is_inline(&self) -> bool {
158 self.offsets.is_some()
175 self.offsets.is_some()
159 }
176 }
160
177
161 /// Return a slice of bytes if `revlog` is inline. Panic if not.
178 /// Return a slice of bytes if `revlog` is inline. Panic if not.
162 pub fn data(&self, start: usize, end: usize) -> &[u8] {
179 pub fn data(&self, start: usize, end: usize) -> &[u8] {
163 if !self.is_inline() {
180 if !self.is_inline() {
164 panic!("tried to access data in the index of a revlog that is not inline");
181 panic!("tried to access data in the index of a revlog that is not inline");
165 }
182 }
166 &self.bytes[start..end]
183 &self.bytes[start..end]
167 }
184 }
168
185
169 /// Return number of entries of the revlog index.
186 /// Return number of entries of the revlog index.
170 pub fn len(&self) -> usize {
187 pub fn len(&self) -> usize {
171 if let Some(offsets) = &self.offsets {
188 if let Some(offsets) = &self.offsets {
172 offsets.len()
189 offsets.len()
173 } else {
190 } else {
174 self.bytes.len() / INDEX_ENTRY_SIZE
191 self.bytes.len() / INDEX_ENTRY_SIZE
175 }
192 }
176 }
193 }
177
194
178 /// Returns `true` if the `Index` has zero `entries`.
195 /// Returns `true` if the `Index` has zero `entries`.
179 pub fn is_empty(&self) -> bool {
196 pub fn is_empty(&self) -> bool {
180 self.len() == 0
197 self.len() == 0
181 }
198 }
182
199
183 /// Return the index entry corresponding to the given revision if it
200 /// Return the index entry corresponding to the given revision if it
184 /// exists.
201 /// exists.
185 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
202 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
186 if rev == NULL_REVISION {
203 if rev == NULL_REVISION {
187 return None;
204 return None;
188 }
205 }
189 Some(if let Some(offsets) = &self.offsets {
206 Some(if let Some(offsets) = &self.offsets {
190 self.get_entry_inline(rev, offsets)
207 self.get_entry_inline(rev, offsets)
191 } else {
208 } else {
192 self.get_entry_separated(rev)
209 self.get_entry_separated(rev)
193 })
210 })
194 }
211 }
195
212
196 fn get_entry_inline(
213 fn get_entry_inline(
197 &self,
214 &self,
198 rev: Revision,
215 rev: Revision,
199 offsets: &[usize],
216 offsets: &[usize],
200 ) -> IndexEntry {
217 ) -> IndexEntry {
201 let start = offsets[rev as usize];
218 let start = offsets[rev as usize];
202 let end = start + INDEX_ENTRY_SIZE;
219 let end = start + INDEX_ENTRY_SIZE;
203 let bytes = &self.bytes[start..end];
220 let bytes = &self.bytes[start..end];
204
221
205 // See IndexEntry for an explanation of this override.
222 // See IndexEntry for an explanation of this override.
206 let offset_override = Some(end);
223 let offset_override = Some(end);
207
224
208 IndexEntry {
225 IndexEntry {
209 bytes,
226 bytes,
210 offset_override,
227 offset_override,
211 }
228 }
212 }
229 }
213
230
214 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
231 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
215 let start = rev as usize * INDEX_ENTRY_SIZE;
232 let start = rev as usize * INDEX_ENTRY_SIZE;
216 let end = start + INDEX_ENTRY_SIZE;
233 let end = start + INDEX_ENTRY_SIZE;
217 let bytes = &self.bytes[start..end];
234 let bytes = &self.bytes[start..end];
218
235
219 // Override the offset of the first revision as its bytes are used
236 // Override the offset of the first revision as its bytes are used
220 // for the index's metadata (saving space because it is always 0)
237 // for the index's metadata (saving space because it is always 0)
221 let offset_override = if rev == 0 { Some(0) } else { None };
238 let offset_override = if rev == 0 { Some(0) } else { None };
222
239
223 IndexEntry {
240 IndexEntry {
224 bytes,
241 bytes,
225 offset_override,
242 offset_override,
226 }
243 }
227 }
244 }
228 }
245 }
229
246
230 impl super::RevlogIndex for Index {
247 impl super::RevlogIndex for Index {
231 fn len(&self) -> usize {
248 fn len(&self) -> usize {
232 self.len()
249 self.len()
233 }
250 }
234
251
235 fn node(&self, rev: Revision) -> Option<&Node> {
252 fn node(&self, rev: Revision) -> Option<&Node> {
236 self.get_entry(rev).map(|entry| entry.hash())
253 self.get_entry(rev).map(|entry| entry.hash())
237 }
254 }
238 }
255 }
239
256
240 #[derive(Debug)]
257 #[derive(Debug)]
241 pub struct IndexEntry<'a> {
258 pub struct IndexEntry<'a> {
242 bytes: &'a [u8],
259 bytes: &'a [u8],
243 /// Allows to override the offset value of the entry.
260 /// Allows to override the offset value of the entry.
244 ///
261 ///
245 /// For interleaved index and data, the offset stored in the index
262 /// For interleaved index and data, the offset stored in the index
246 /// corresponds to the separated data offset.
263 /// corresponds to the separated data offset.
247 /// It has to be overridden with the actual offset in the interleaved
264 /// It has to be overridden with the actual offset in the interleaved
248 /// index which is just after the index block.
265 /// index which is just after the index block.
249 ///
266 ///
250 /// For separated index and data, the offset stored in the first index
267 /// For separated index and data, the offset stored in the first index
251 /// entry is mixed with the index headers.
268 /// entry is mixed with the index headers.
252 /// It has to be overridden with 0.
269 /// It has to be overridden with 0.
253 offset_override: Option<usize>,
270 offset_override: Option<usize>,
254 }
271 }
255
272
256 impl<'a> IndexEntry<'a> {
273 impl<'a> IndexEntry<'a> {
257 /// Return the offset of the data.
274 /// Return the offset of the data.
258 pub fn offset(&self) -> usize {
275 pub fn offset(&self) -> usize {
259 if let Some(offset_override) = self.offset_override {
276 if let Some(offset_override) = self.offset_override {
260 offset_override
277 offset_override
261 } else {
278 } else {
262 let mut bytes = [0; 8];
279 let mut bytes = [0; 8];
263 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
280 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
264 BigEndian::read_u64(&bytes[..]) as usize
281 BigEndian::read_u64(&bytes[..]) as usize
265 }
282 }
266 }
283 }
267
284
268 pub fn flags(&self) -> u16 {
285 pub fn flags(&self) -> u16 {
269 BigEndian::read_u16(&self.bytes[6..=7])
286 BigEndian::read_u16(&self.bytes[6..=7])
270 }
287 }
271
288
272 /// Return the compressed length of the data.
289 /// Return the compressed length of the data.
273 pub fn compressed_len(&self) -> u32 {
290 pub fn compressed_len(&self) -> u32 {
274 BigEndian::read_u32(&self.bytes[8..=11])
291 BigEndian::read_u32(&self.bytes[8..=11])
275 }
292 }
276
293
277 /// Return the uncompressed length of the data.
294 /// Return the uncompressed length of the data.
278 pub fn uncompressed_len(&self) -> i32 {
295 pub fn uncompressed_len(&self) -> i32 {
279 BigEndian::read_i32(&self.bytes[12..=15])
296 BigEndian::read_i32(&self.bytes[12..=15])
280 }
297 }
281
298
282 /// Return the revision upon which the data has been derived.
299 /// Return the revision upon which the data has been derived.
283 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
300 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
284 // TODO Maybe return an Option when base_revision == rev?
301 // TODO Maybe return an Option when base_revision == rev?
285 // Requires to add rev to IndexEntry
302 // Requires to add rev to IndexEntry
286
303
287 BigEndian::read_i32(&self.bytes[16..]).into()
304 BigEndian::read_i32(&self.bytes[16..]).into()
288 }
305 }
289
306
290 pub fn link_revision(&self) -> UncheckedRevision {
307 pub fn link_revision(&self) -> UncheckedRevision {
291 BigEndian::read_i32(&self.bytes[20..]).into()
308 BigEndian::read_i32(&self.bytes[20..]).into()
292 }
309 }
293
310
294 pub fn p1(&self) -> UncheckedRevision {
311 pub fn p1(&self) -> UncheckedRevision {
295 BigEndian::read_i32(&self.bytes[24..]).into()
312 BigEndian::read_i32(&self.bytes[24..]).into()
296 }
313 }
297
314
298 pub fn p2(&self) -> UncheckedRevision {
315 pub fn p2(&self) -> UncheckedRevision {
299 BigEndian::read_i32(&self.bytes[28..]).into()
316 BigEndian::read_i32(&self.bytes[28..]).into()
300 }
317 }
301
318
302 /// Return the hash of revision's full text.
319 /// Return the hash of revision's full text.
303 ///
320 ///
304 /// Currently, SHA-1 is used and only the first 20 bytes of this field
321 /// Currently, SHA-1 is used and only the first 20 bytes of this field
305 /// are used.
322 /// are used.
306 pub fn hash(&self) -> &'a Node {
323 pub fn hash(&self) -> &'a Node {
307 (&self.bytes[32..52]).try_into().unwrap()
324 (&self.bytes[32..52]).try_into().unwrap()
308 }
325 }
309 }
326 }
310
327
311 #[cfg(test)]
328 #[cfg(test)]
312 mod tests {
329 mod tests {
313 use super::*;
330 use super::*;
314 use crate::node::NULL_NODE;
331 use crate::node::NULL_NODE;
315
332
316 #[cfg(test)]
333 #[cfg(test)]
317 #[derive(Debug, Copy, Clone)]
334 #[derive(Debug, Copy, Clone)]
318 pub struct IndexEntryBuilder {
335 pub struct IndexEntryBuilder {
319 is_first: bool,
336 is_first: bool,
320 is_inline: bool,
337 is_inline: bool,
321 is_general_delta: bool,
338 is_general_delta: bool,
322 version: u16,
339 version: u16,
323 offset: usize,
340 offset: usize,
324 compressed_len: usize,
341 compressed_len: usize,
325 uncompressed_len: usize,
342 uncompressed_len: usize,
326 base_revision_or_base_of_delta_chain: Revision,
343 base_revision_or_base_of_delta_chain: Revision,
327 link_revision: Revision,
344 link_revision: Revision,
328 p1: Revision,
345 p1: Revision,
329 p2: Revision,
346 p2: Revision,
330 node: Node,
347 node: Node,
331 }
348 }
332
349
333 #[cfg(test)]
350 #[cfg(test)]
334 impl IndexEntryBuilder {
351 impl IndexEntryBuilder {
335 #[allow(clippy::new_without_default)]
352 #[allow(clippy::new_without_default)]
336 pub fn new() -> Self {
353 pub fn new() -> Self {
337 Self {
354 Self {
338 is_first: false,
355 is_first: false,
339 is_inline: false,
356 is_inline: false,
340 is_general_delta: true,
357 is_general_delta: true,
341 version: 1,
358 version: 1,
342 offset: 0,
359 offset: 0,
343 compressed_len: 0,
360 compressed_len: 0,
344 uncompressed_len: 0,
361 uncompressed_len: 0,
345 base_revision_or_base_of_delta_chain: 0,
362 base_revision_or_base_of_delta_chain: 0,
346 link_revision: 0,
363 link_revision: 0,
347 p1: NULL_REVISION,
364 p1: NULL_REVISION,
348 p2: NULL_REVISION,
365 p2: NULL_REVISION,
349 node: NULL_NODE,
366 node: NULL_NODE,
350 }
367 }
351 }
368 }
352
369
353 pub fn is_first(&mut self, value: bool) -> &mut Self {
370 pub fn is_first(&mut self, value: bool) -> &mut Self {
354 self.is_first = value;
371 self.is_first = value;
355 self
372 self
356 }
373 }
357
374
358 pub fn with_inline(&mut self, value: bool) -> &mut Self {
375 pub fn with_inline(&mut self, value: bool) -> &mut Self {
359 self.is_inline = value;
376 self.is_inline = value;
360 self
377 self
361 }
378 }
362
379
363 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
380 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
364 self.is_general_delta = value;
381 self.is_general_delta = value;
365 self
382 self
366 }
383 }
367
384
368 pub fn with_version(&mut self, value: u16) -> &mut Self {
385 pub fn with_version(&mut self, value: u16) -> &mut Self {
369 self.version = value;
386 self.version = value;
370 self
387 self
371 }
388 }
372
389
373 pub fn with_offset(&mut self, value: usize) -> &mut Self {
390 pub fn with_offset(&mut self, value: usize) -> &mut Self {
374 self.offset = value;
391 self.offset = value;
375 self
392 self
376 }
393 }
377
394
378 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
395 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
379 self.compressed_len = value;
396 self.compressed_len = value;
380 self
397 self
381 }
398 }
382
399
383 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
400 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
384 self.uncompressed_len = value;
401 self.uncompressed_len = value;
385 self
402 self
386 }
403 }
387
404
388 pub fn with_base_revision_or_base_of_delta_chain(
405 pub fn with_base_revision_or_base_of_delta_chain(
389 &mut self,
406 &mut self,
390 value: Revision,
407 value: Revision,
391 ) -> &mut Self {
408 ) -> &mut Self {
392 self.base_revision_or_base_of_delta_chain = value;
409 self.base_revision_or_base_of_delta_chain = value;
393 self
410 self
394 }
411 }
395
412
396 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
413 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
397 self.link_revision = value;
414 self.link_revision = value;
398 self
415 self
399 }
416 }
400
417
401 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
418 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
402 self.p1 = value;
419 self.p1 = value;
403 self
420 self
404 }
421 }
405
422
406 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
423 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
407 self.p2 = value;
424 self.p2 = value;
408 self
425 self
409 }
426 }
410
427
411 pub fn with_node(&mut self, value: Node) -> &mut Self {
428 pub fn with_node(&mut self, value: Node) -> &mut Self {
412 self.node = value;
429 self.node = value;
413 self
430 self
414 }
431 }
415
432
416 pub fn build(&self) -> Vec<u8> {
433 pub fn build(&self) -> Vec<u8> {
417 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
434 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
418 if self.is_first {
435 if self.is_first {
419 bytes.extend(&match (self.is_general_delta, self.is_inline) {
436 bytes.extend(&match (self.is_general_delta, self.is_inline) {
420 (false, false) => [0u8, 0],
437 (false, false) => [0u8, 0],
421 (false, true) => [0u8, 1],
438 (false, true) => [0u8, 1],
422 (true, false) => [0u8, 2],
439 (true, false) => [0u8, 2],
423 (true, true) => [0u8, 3],
440 (true, true) => [0u8, 3],
424 });
441 });
425 bytes.extend(&self.version.to_be_bytes());
442 bytes.extend(&self.version.to_be_bytes());
426 // Remaining offset bytes.
443 // Remaining offset bytes.
427 bytes.extend(&[0u8; 2]);
444 bytes.extend(&[0u8; 2]);
428 } else {
445 } else {
429 // Offset stored on 48 bits (6 bytes)
446 // Offset stored on 48 bits (6 bytes)
430 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
447 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
431 }
448 }
432 bytes.extend(&[0u8; 2]); // Revision flags.
449 bytes.extend(&[0u8; 2]); // Revision flags.
433 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
450 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
434 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
451 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
435 bytes.extend(
452 bytes.extend(
436 &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
453 &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
437 );
454 );
438 bytes.extend(&self.link_revision.to_be_bytes());
455 bytes.extend(&self.link_revision.to_be_bytes());
439 bytes.extend(&self.p1.to_be_bytes());
456 bytes.extend(&self.p1.to_be_bytes());
440 bytes.extend(&self.p2.to_be_bytes());
457 bytes.extend(&self.p2.to_be_bytes());
441 bytes.extend(self.node.as_bytes());
458 bytes.extend(self.node.as_bytes());
442 bytes.extend(vec![0u8; 12]);
459 bytes.extend(vec![0u8; 12]);
443 bytes
460 bytes
444 }
461 }
445 }
462 }
446
463
447 pub fn is_inline(index_bytes: &[u8]) -> bool {
464 pub fn is_inline(index_bytes: &[u8]) -> bool {
448 IndexHeader::parse(index_bytes)
465 IndexHeader::parse(index_bytes)
449 .expect("too short")
466 .expect("too short")
450 .format_flags()
467 .format_flags()
451 .is_inline()
468 .is_inline()
452 }
469 }
453
470
454 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
471 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
455 IndexHeader::parse(index_bytes)
472 IndexHeader::parse(index_bytes)
456 .expect("too short")
473 .expect("too short")
457 .format_flags()
474 .format_flags()
458 .uses_generaldelta()
475 .uses_generaldelta()
459 }
476 }
460
477
461 pub fn get_version(index_bytes: &[u8]) -> u16 {
478 pub fn get_version(index_bytes: &[u8]) -> u16 {
462 IndexHeader::parse(index_bytes)
479 IndexHeader::parse(index_bytes)
463 .expect("too short")
480 .expect("too short")
464 .format_version()
481 .format_version()
465 }
482 }
466
483
467 #[test]
484 #[test]
468 fn flags_when_no_inline_flag_test() {
485 fn flags_when_no_inline_flag_test() {
469 let bytes = IndexEntryBuilder::new()
486 let bytes = IndexEntryBuilder::new()
470 .is_first(true)
487 .is_first(true)
471 .with_general_delta(false)
488 .with_general_delta(false)
472 .with_inline(false)
489 .with_inline(false)
473 .build();
490 .build();
474
491
475 assert!(!is_inline(&bytes));
492 assert!(!is_inline(&bytes));
476 assert!(!uses_generaldelta(&bytes));
493 assert!(!uses_generaldelta(&bytes));
477 }
494 }
478
495
479 #[test]
496 #[test]
480 fn flags_when_inline_flag_test() {
497 fn flags_when_inline_flag_test() {
481 let bytes = IndexEntryBuilder::new()
498 let bytes = IndexEntryBuilder::new()
482 .is_first(true)
499 .is_first(true)
483 .with_general_delta(false)
500 .with_general_delta(false)
484 .with_inline(true)
501 .with_inline(true)
485 .build();
502 .build();
486
503
487 assert!(is_inline(&bytes));
504 assert!(is_inline(&bytes));
488 assert!(!uses_generaldelta(&bytes));
505 assert!(!uses_generaldelta(&bytes));
489 }
506 }
490
507
491 #[test]
508 #[test]
492 fn flags_when_inline_and_generaldelta_flags_test() {
509 fn flags_when_inline_and_generaldelta_flags_test() {
493 let bytes = IndexEntryBuilder::new()
510 let bytes = IndexEntryBuilder::new()
494 .is_first(true)
511 .is_first(true)
495 .with_general_delta(true)
512 .with_general_delta(true)
496 .with_inline(true)
513 .with_inline(true)
497 .build();
514 .build();
498
515
499 assert!(is_inline(&bytes));
516 assert!(is_inline(&bytes));
500 assert!(uses_generaldelta(&bytes));
517 assert!(uses_generaldelta(&bytes));
501 }
518 }
502
519
503 #[test]
520 #[test]
504 fn test_offset() {
521 fn test_offset() {
505 let bytes = IndexEntryBuilder::new().with_offset(1).build();
522 let bytes = IndexEntryBuilder::new().with_offset(1).build();
506 let entry = IndexEntry {
523 let entry = IndexEntry {
507 bytes: &bytes,
524 bytes: &bytes,
508 offset_override: None,
525 offset_override: None,
509 };
526 };
510
527
511 assert_eq!(entry.offset(), 1)
528 assert_eq!(entry.offset(), 1)
512 }
529 }
513
530
514 #[test]
531 #[test]
515 fn test_with_overridden_offset() {
532 fn test_with_overridden_offset() {
516 let bytes = IndexEntryBuilder::new().with_offset(1).build();
533 let bytes = IndexEntryBuilder::new().with_offset(1).build();
517 let entry = IndexEntry {
534 let entry = IndexEntry {
518 bytes: &bytes,
535 bytes: &bytes,
519 offset_override: Some(2),
536 offset_override: Some(2),
520 };
537 };
521
538
522 assert_eq!(entry.offset(), 2)
539 assert_eq!(entry.offset(), 2)
523 }
540 }
524
541
525 #[test]
542 #[test]
526 fn test_compressed_len() {
543 fn test_compressed_len() {
527 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
544 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
528 let entry = IndexEntry {
545 let entry = IndexEntry {
529 bytes: &bytes,
546 bytes: &bytes,
530 offset_override: None,
547 offset_override: None,
531 };
548 };
532
549
533 assert_eq!(entry.compressed_len(), 1)
550 assert_eq!(entry.compressed_len(), 1)
534 }
551 }
535
552
536 #[test]
553 #[test]
537 fn test_uncompressed_len() {
554 fn test_uncompressed_len() {
538 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
555 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
539 let entry = IndexEntry {
556 let entry = IndexEntry {
540 bytes: &bytes,
557 bytes: &bytes,
541 offset_override: None,
558 offset_override: None,
542 };
559 };
543
560
544 assert_eq!(entry.uncompressed_len(), 1)
561 assert_eq!(entry.uncompressed_len(), 1)
545 }
562 }
546
563
547 #[test]
564 #[test]
548 fn test_base_revision_or_base_of_delta_chain() {
565 fn test_base_revision_or_base_of_delta_chain() {
549 let bytes = IndexEntryBuilder::new()
566 let bytes = IndexEntryBuilder::new()
550 .with_base_revision_or_base_of_delta_chain(1)
567 .with_base_revision_or_base_of_delta_chain(1)
551 .build();
568 .build();
552 let entry = IndexEntry {
569 let entry = IndexEntry {
553 bytes: &bytes,
570 bytes: &bytes,
554 offset_override: None,
571 offset_override: None,
555 };
572 };
556
573
557 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
574 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
558 }
575 }
559
576
560 #[test]
577 #[test]
561 fn link_revision_test() {
578 fn link_revision_test() {
562 let bytes = IndexEntryBuilder::new().with_link_revision(123).build();
579 let bytes = IndexEntryBuilder::new().with_link_revision(123).build();
563
580
564 let entry = IndexEntry {
581 let entry = IndexEntry {
565 bytes: &bytes,
582 bytes: &bytes,
566 offset_override: None,
583 offset_override: None,
567 };
584 };
568
585
569 assert_eq!(entry.link_revision(), 123.into());
586 assert_eq!(entry.link_revision(), 123.into());
570 }
587 }
571
588
572 #[test]
589 #[test]
573 fn p1_test() {
590 fn p1_test() {
574 let bytes = IndexEntryBuilder::new().with_p1(123).build();
591 let bytes = IndexEntryBuilder::new().with_p1(123).build();
575
592
576 let entry = IndexEntry {
593 let entry = IndexEntry {
577 bytes: &bytes,
594 bytes: &bytes,
578 offset_override: None,
595 offset_override: None,
579 };
596 };
580
597
581 assert_eq!(entry.p1(), 123.into());
598 assert_eq!(entry.p1(), 123.into());
582 }
599 }
583
600
584 #[test]
601 #[test]
585 fn p2_test() {
602 fn p2_test() {
586 let bytes = IndexEntryBuilder::new().with_p2(123).build();
603 let bytes = IndexEntryBuilder::new().with_p2(123).build();
587
604
588 let entry = IndexEntry {
605 let entry = IndexEntry {
589 bytes: &bytes,
606 bytes: &bytes,
590 offset_override: None,
607 offset_override: None,
591 };
608 };
592
609
593 assert_eq!(entry.p2(), 123.into());
610 assert_eq!(entry.p2(), 123.into());
594 }
611 }
595
612
596 #[test]
613 #[test]
597 fn node_test() {
614 fn node_test() {
598 let node = Node::from_hex("0123456789012345678901234567890123456789")
615 let node = Node::from_hex("0123456789012345678901234567890123456789")
599 .unwrap();
616 .unwrap();
600 let bytes = IndexEntryBuilder::new().with_node(node).build();
617 let bytes = IndexEntryBuilder::new().with_node(node).build();
601
618
602 let entry = IndexEntry {
619 let entry = IndexEntry {
603 bytes: &bytes,
620 bytes: &bytes,
604 offset_override: None,
621 offset_override: None,
605 };
622 };
606
623
607 assert_eq!(*entry.hash(), node);
624 assert_eq!(*entry.hash(), node);
608 }
625 }
609
626
610 #[test]
627 #[test]
611 fn version_test() {
628 fn version_test() {
612 let bytes = IndexEntryBuilder::new()
629 let bytes = IndexEntryBuilder::new()
613 .is_first(true)
630 .is_first(true)
614 .with_version(2)
631 .with_version(2)
615 .build();
632 .build();
616
633
617 assert_eq!(get_version(&bytes), 2)
634 assert_eq!(get_version(&bytes), 2)
618 }
635 }
619 }
636 }
620
637
621 #[cfg(test)]
638 #[cfg(test)]
622 pub use tests::IndexEntryBuilder;
639 pub use tests::IndexEntryBuilder;
@@ -1,203 +1,209 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::{Node, NodePrefix};
2 use crate::revlog::{Node, NodePrefix};
3 use crate::revlog::{Revlog, RevlogError};
3 use crate::revlog::{Revlog, RevlogError};
4 use crate::utils::hg_path::HgPath;
4 use crate::utils::hg_path::HgPath;
5 use crate::utils::SliceExt;
5 use crate::utils::SliceExt;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use crate::{Revision, UncheckedRevision};
7 use crate::{Graph, GraphError, Revision, UncheckedRevision};
8
8
9 /// A specialized `Revlog` to work with `manifest` data format.
9 /// A specialized `Revlog` to work with `manifest` data format.
10 pub struct Manifestlog {
10 pub struct Manifestlog {
11 /// The generic `revlog` format.
11 /// The generic `revlog` format.
12 revlog: Revlog,
12 revlog: Revlog,
13 }
13 }
14
14
15 impl Graph for Manifestlog {
16 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
17 self.revlog.parents(rev)
18 }
19 }
20
15 impl Manifestlog {
21 impl Manifestlog {
16 /// Open the `manifest` of a repository given by its root.
22 /// Open the `manifest` of a repository given by its root.
17 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
23 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
18 let revlog =
24 let revlog =
19 Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
25 Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
20 Ok(Self { revlog })
26 Ok(Self { revlog })
21 }
27 }
22
28
23 /// Return the `Manifest` for the given node ID.
29 /// Return the `Manifest` for the given node ID.
24 ///
30 ///
25 /// Note: this is a node ID in the manifestlog, typically found through
31 /// Note: this is a node ID in the manifestlog, typically found through
26 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
32 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
27 /// changeset.
33 /// changeset.
28 ///
34 ///
29 /// See also `Repo::manifest_for_node`
35 /// See also `Repo::manifest_for_node`
30 pub fn data_for_node(
36 pub fn data_for_node(
31 &self,
37 &self,
32 node: NodePrefix,
38 node: NodePrefix,
33 ) -> Result<Manifest, RevlogError> {
39 ) -> Result<Manifest, RevlogError> {
34 let rev = self.revlog.rev_from_node(node)?;
40 let rev = self.revlog.rev_from_node(node)?;
35 self.data_for_checked_rev(rev)
41 self.data_for_checked_rev(rev)
36 }
42 }
37
43
38 /// Return the `Manifest` of a given revision number.
44 /// Return the `Manifest` of a given revision number.
39 ///
45 ///
40 /// Note: this is a revision number in the manifestlog, *not* of any
46 /// Note: this is a revision number in the manifestlog, *not* of any
41 /// changeset.
47 /// changeset.
42 ///
48 ///
43 /// See also `Repo::manifest_for_rev`
49 /// See also `Repo::manifest_for_rev`
44 pub fn data_for_rev(
50 pub fn data_for_rev(
45 &self,
51 &self,
46 rev: UncheckedRevision,
52 rev: UncheckedRevision,
47 ) -> Result<Manifest, RevlogError> {
53 ) -> Result<Manifest, RevlogError> {
48 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
54 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
49 Ok(Manifest { bytes })
55 Ok(Manifest { bytes })
50 }
56 }
51
57
52 pub fn data_for_checked_rev(
58 pub fn data_for_checked_rev(
53 &self,
59 &self,
54 rev: Revision,
60 rev: Revision,
55 ) -> Result<Manifest, RevlogError> {
61 ) -> Result<Manifest, RevlogError> {
56 let bytes =
62 let bytes =
57 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
63 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
58 Ok(Manifest { bytes })
64 Ok(Manifest { bytes })
59 }
65 }
60 }
66 }
61
67
62 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
68 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
63 #[derive(Debug)]
69 #[derive(Debug)]
64 pub struct Manifest {
70 pub struct Manifest {
65 /// Format for a manifest: flat sequence of variable-size entries,
71 /// Format for a manifest: flat sequence of variable-size entries,
66 /// sorted by path, each as:
72 /// sorted by path, each as:
67 ///
73 ///
68 /// ```text
74 /// ```text
69 /// <path> \0 <hex_node_id> <flags> \n
75 /// <path> \0 <hex_node_id> <flags> \n
70 /// ```
76 /// ```
71 ///
77 ///
72 /// The last entry is also terminated by a newline character.
78 /// The last entry is also terminated by a newline character.
73 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
79 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
74 bytes: Vec<u8>,
80 bytes: Vec<u8>,
75 }
81 }
76
82
77 impl Manifest {
83 impl Manifest {
78 pub fn iter(
84 pub fn iter(
79 &self,
85 &self,
80 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
86 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
81 self.bytes
87 self.bytes
82 .split(|b| b == &b'\n')
88 .split(|b| b == &b'\n')
83 .filter(|line| !line.is_empty())
89 .filter(|line| !line.is_empty())
84 .map(ManifestEntry::from_raw)
90 .map(ManifestEntry::from_raw)
85 }
91 }
86
92
87 /// If the given path is in this manifest, return its filelog node ID
93 /// If the given path is in this manifest, return its filelog node ID
88 pub fn find_by_path(
94 pub fn find_by_path(
89 &self,
95 &self,
90 path: &HgPath,
96 path: &HgPath,
91 ) -> Result<Option<ManifestEntry>, HgError> {
97 ) -> Result<Option<ManifestEntry>, HgError> {
92 use std::cmp::Ordering::*;
98 use std::cmp::Ordering::*;
93 let path = path.as_bytes();
99 let path = path.as_bytes();
94 // Both boundaries of this `&[u8]` slice are always at the boundary of
100 // Both boundaries of this `&[u8]` slice are always at the boundary of
95 // an entry
101 // an entry
96 let mut bytes = &*self.bytes;
102 let mut bytes = &*self.bytes;
97
103
98 // Binary search algorithm derived from `[T]::binary_search_by`
104 // Binary search algorithm derived from `[T]::binary_search_by`
99 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
105 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
100 // except we don’t have a slice of entries. Instead we jump to the
106 // except we don’t have a slice of entries. Instead we jump to the
101 // middle of the byte slice and look around for entry delimiters
107 // middle of the byte slice and look around for entry delimiters
102 // (newlines).
108 // (newlines).
103 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
109 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
104 let (entry_path, rest) =
110 let (entry_path, rest) =
105 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
111 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
106 let cmp = entry_path.cmp(path);
112 let cmp = entry_path.cmp(path);
107 if cmp == Less {
113 if cmp == Less {
108 let after_newline = entry_range.end + 1;
114 let after_newline = entry_range.end + 1;
109 bytes = &bytes[after_newline..];
115 bytes = &bytes[after_newline..];
110 } else if cmp == Greater {
116 } else if cmp == Greater {
111 bytes = &bytes[..entry_range.start];
117 bytes = &bytes[..entry_range.start];
112 } else {
118 } else {
113 return Ok(Some(ManifestEntry::from_path_and_rest(
119 return Ok(Some(ManifestEntry::from_path_and_rest(
114 entry_path, rest,
120 entry_path, rest,
115 )));
121 )));
116 }
122 }
117 }
123 }
118 Ok(None)
124 Ok(None)
119 }
125 }
120
126
121 /// If there is at least one, return the byte range of an entry *excluding*
127 /// If there is at least one, return the byte range of an entry *excluding*
122 /// the final newline.
128 /// the final newline.
123 fn find_entry_near_middle_of(
129 fn find_entry_near_middle_of(
124 bytes: &[u8],
130 bytes: &[u8],
125 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
131 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
126 let len = bytes.len();
132 let len = bytes.len();
127 if len > 0 {
133 if len > 0 {
128 let middle = bytes.len() / 2;
134 let middle = bytes.len() / 2;
129 // Integer division rounds down, so `middle < len`.
135 // Integer division rounds down, so `middle < len`.
130 let (before, after) = bytes.split_at(middle);
136 let (before, after) = bytes.split_at(middle);
131 let is_newline = |&byte: &u8| byte == b'\n';
137 let is_newline = |&byte: &u8| byte == b'\n';
132 let entry_start = match before.iter().rposition(is_newline) {
138 let entry_start = match before.iter().rposition(is_newline) {
133 Some(i) => i + 1,
139 Some(i) => i + 1,
134 None => 0, // We choose the first entry in `bytes`
140 None => 0, // We choose the first entry in `bytes`
135 };
141 };
136 let entry_end = match after.iter().position(is_newline) {
142 let entry_end = match after.iter().position(is_newline) {
137 Some(i) => {
143 Some(i) => {
138 // No `+ 1` here to exclude this newline from the range
144 // No `+ 1` here to exclude this newline from the range
139 middle + i
145 middle + i
140 }
146 }
141 None => {
147 None => {
142 // In a well-formed manifest:
148 // In a well-formed manifest:
143 //
149 //
144 // * Since `len > 0`, `bytes` contains at least one entry
150 // * Since `len > 0`, `bytes` contains at least one entry
145 // * Every entry ends with a newline
151 // * Every entry ends with a newline
146 // * Since `middle < len`, `after` contains at least the
152 // * Since `middle < len`, `after` contains at least the
147 // newline at the end of the last entry of `bytes`.
153 // newline at the end of the last entry of `bytes`.
148 //
154 //
149 // We didn’t find a newline, so this manifest is not
155 // We didn’t find a newline, so this manifest is not
150 // well-formed.
156 // well-formed.
151 return Err(HgError::corrupted(
157 return Err(HgError::corrupted(
152 "manifest entry without \\n delimiter",
158 "manifest entry without \\n delimiter",
153 ));
159 ));
154 }
160 }
155 };
161 };
156 Ok(Some(entry_start..entry_end))
162 Ok(Some(entry_start..entry_end))
157 } else {
163 } else {
158 // len == 0
164 // len == 0
159 Ok(None)
165 Ok(None)
160 }
166 }
161 }
167 }
162 }
168 }
163
169
164 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
170 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
165 #[derive(Debug)]
171 #[derive(Debug)]
166 pub struct ManifestEntry<'manifest> {
172 pub struct ManifestEntry<'manifest> {
167 pub path: &'manifest HgPath,
173 pub path: &'manifest HgPath,
168 pub hex_node_id: &'manifest [u8],
174 pub hex_node_id: &'manifest [u8],
169
175
170 /// `Some` values are b'x', b'l', or 't'
176 /// `Some` values are b'x', b'l', or 't'
171 pub flags: Option<u8>,
177 pub flags: Option<u8>,
172 }
178 }
173
179
174 impl<'a> ManifestEntry<'a> {
180 impl<'a> ManifestEntry<'a> {
175 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
181 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
176 bytes.split_2(b'\0').ok_or_else(|| {
182 bytes.split_2(b'\0').ok_or_else(|| {
177 HgError::corrupted("manifest entry without \\0 delimiter")
183 HgError::corrupted("manifest entry without \\0 delimiter")
178 })
184 })
179 }
185 }
180
186
181 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
187 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
182 let (hex_node_id, flags) = match rest.split_last() {
188 let (hex_node_id, flags) = match rest.split_last() {
183 Some((&b'x', rest)) => (rest, Some(b'x')),
189 Some((&b'x', rest)) => (rest, Some(b'x')),
184 Some((&b'l', rest)) => (rest, Some(b'l')),
190 Some((&b'l', rest)) => (rest, Some(b'l')),
185 Some((&b't', rest)) => (rest, Some(b't')),
191 Some((&b't', rest)) => (rest, Some(b't')),
186 _ => (rest, None),
192 _ => (rest, None),
187 };
193 };
188 Self {
194 Self {
189 path: HgPath::new(path),
195 path: HgPath::new(path),
190 hex_node_id,
196 hex_node_id,
191 flags,
197 flags,
192 }
198 }
193 }
199 }
194
200
195 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
201 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
196 let (path, rest) = Self::split_path(bytes)?;
202 let (path, rest) = Self::split_path(bytes)?;
197 Ok(Self::from_path_and_rest(path, rest))
203 Ok(Self::from_path_and_rest(path, rest))
198 }
204 }
199
205
200 pub fn node_id(&self) -> Result<Node, HgError> {
206 pub fn node_id(&self) -> Result<Node, HgError> {
201 Node::from_hex_for_repo(self.hex_node_id)
207 Node::from_hex_for_repo(self.hex_node_id)
202 }
208 }
203 }
209 }
@@ -1,904 +1,910 b''
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 mod nodemap_docket;
10 mod nodemap_docket;
11 pub mod path_encode;
11 pub mod path_encode;
12 pub use node::{FromHexError, Node, NodePrefix};
12 pub use node::{FromHexError, Node, NodePrefix};
13 pub mod changelog;
13 pub mod changelog;
14 pub mod filelog;
14 pub mod filelog;
15 pub mod index;
15 pub mod index;
16 pub mod manifest;
16 pub mod manifest;
17 pub mod patch;
17 pub mod patch;
18
18
19 use std::borrow::Cow;
19 use std::borrow::Cow;
20 use std::io::Read;
20 use std::io::Read;
21 use std::ops::Deref;
21 use std::ops::Deref;
22 use std::path::Path;
22 use std::path::Path;
23
23
24 use flate2::read::ZlibDecoder;
24 use flate2::read::ZlibDecoder;
25 use sha1::{Digest, Sha1};
25 use sha1::{Digest, Sha1};
26 use std::cell::RefCell;
26 use std::cell::RefCell;
27 use zstd;
27 use zstd;
28
28
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 use self::nodemap_docket::NodeMapDocket;
30 use self::nodemap_docket::NodeMapDocket;
31 use super::index::Index;
31 use super::index::Index;
32 use super::nodemap::{NodeMap, NodeMapError};
32 use super::nodemap::{NodeMap, NodeMapError};
33 use crate::errors::HgError;
33 use crate::errors::HgError;
34 use crate::vfs::Vfs;
34 use crate::vfs::Vfs;
35
35
36 /// Mercurial revision numbers
36 /// Mercurial revision numbers
37 ///
37 ///
38 /// As noted in revlog.c, revision numbers are actually encoded in
38 /// As noted in revlog.c, revision numbers are actually encoded in
39 /// 4 bytes, and are liberally converted to ints, whence the i32
39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 pub type Revision = i32;
40 pub type Revision = i32;
41
41
42 /// Unchecked Mercurial revision numbers.
42 /// Unchecked Mercurial revision numbers.
43 ///
43 ///
44 /// Values of this type have no guarantee of being a valid revision number
44 /// Values of this type have no guarantee of being a valid revision number
45 /// in any context. Use method `check_revision` to get a valid revision within
45 /// in any context. Use method `check_revision` to get a valid revision within
46 /// the appropriate index object.
46 /// the appropriate index object.
47 ///
47 ///
48 /// As noted in revlog.c, revision numbers are actually encoded in
48 /// As noted in revlog.c, revision numbers are actually encoded in
49 /// 4 bytes, and are liberally converted to ints, whence the i32
49 /// 4 bytes, and are liberally converted to ints, whence the i32
50 #[derive(
50 #[derive(
51 Debug,
51 Debug,
52 derive_more::Display,
52 derive_more::Display,
53 Clone,
53 Clone,
54 Copy,
54 Copy,
55 Hash,
55 Hash,
56 PartialEq,
56 PartialEq,
57 Eq,
57 Eq,
58 PartialOrd,
58 PartialOrd,
59 Ord,
59 Ord,
60 )]
60 )]
61 pub struct UncheckedRevision(i32);
61 pub struct UncheckedRevision(i32);
62
62
63 impl From<Revision> for UncheckedRevision {
63 impl From<Revision> for UncheckedRevision {
64 fn from(value: Revision) -> Self {
64 fn from(value: Revision) -> Self {
65 Self(value)
65 Self(value)
66 }
66 }
67 }
67 }
68
68
69 /// Marker expressing the absence of a parent
69 /// Marker expressing the absence of a parent
70 ///
70 ///
71 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
71 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
72 /// to be smaller than all existing revisions.
72 /// to be smaller than all existing revisions.
73 pub const NULL_REVISION: Revision = -1;
73 pub const NULL_REVISION: Revision = -1;
74
74
75 /// Same as `mercurial.node.wdirrev`
75 /// Same as `mercurial.node.wdirrev`
76 ///
76 ///
77 /// This is also equal to `i32::max_value()`, but it's better to spell
77 /// This is also equal to `i32::max_value()`, but it's better to spell
78 /// it out explicitely, same as in `mercurial.node`
78 /// it out explicitely, same as in `mercurial.node`
79 #[allow(clippy::unreadable_literal)]
79 #[allow(clippy::unreadable_literal)]
80 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
80 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
81 UncheckedRevision(0x7fffffff);
81 UncheckedRevision(0x7fffffff);
82
82
83 pub const WORKING_DIRECTORY_HEX: &str =
83 pub const WORKING_DIRECTORY_HEX: &str =
84 "ffffffffffffffffffffffffffffffffffffffff";
84 "ffffffffffffffffffffffffffffffffffffffff";
85
85
86 /// The simplest expression of what we need of Mercurial DAGs.
86 /// The simplest expression of what we need of Mercurial DAGs.
87 pub trait Graph {
87 pub trait Graph {
88 /// Return the two parents of the given `Revision`.
88 /// Return the two parents of the given `Revision`.
89 ///
89 ///
90 /// Each of the parents can be independently `NULL_REVISION`
90 /// Each of the parents can be independently `NULL_REVISION`
91 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
91 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
92 }
92 }
93
93
94 #[derive(Clone, Debug, PartialEq)]
94 #[derive(Clone, Debug, PartialEq)]
95 pub enum GraphError {
95 pub enum GraphError {
96 ParentOutOfRange(Revision),
96 ParentOutOfRange(Revision),
97 }
97 }
98
98
99 /// The Mercurial Revlog Index
99 /// The Mercurial Revlog Index
100 ///
100 ///
101 /// This is currently limited to the minimal interface that is needed for
101 /// This is currently limited to the minimal interface that is needed for
102 /// the [`nodemap`](nodemap/index.html) module
102 /// the [`nodemap`](nodemap/index.html) module
103 pub trait RevlogIndex {
103 pub trait RevlogIndex {
104 /// Total number of Revisions referenced in this index
104 /// Total number of Revisions referenced in this index
105 fn len(&self) -> usize;
105 fn len(&self) -> usize;
106
106
107 fn is_empty(&self) -> bool {
107 fn is_empty(&self) -> bool {
108 self.len() == 0
108 self.len() == 0
109 }
109 }
110
110
111 /// Return a reference to the Node or `None` for `NULL_REVISION`
111 /// Return a reference to the Node or `None` for `NULL_REVISION`
112 fn node(&self, rev: Revision) -> Option<&Node>;
112 fn node(&self, rev: Revision) -> Option<&Node>;
113
113
114 /// Return a [`Revision`] if `rev` is a valid revision number for this
114 /// Return a [`Revision`] if `rev` is a valid revision number for this
115 /// index
115 /// index
116 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
116 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
117 let rev = rev.0;
117 let rev = rev.0;
118
118
119 if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {
119 if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {
120 Some(rev)
120 Some(rev)
121 } else {
121 } else {
122 None
122 None
123 }
123 }
124 }
124 }
125 }
125 }
126
126
127 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
127 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
128 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
128 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
129 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
129 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
130 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
130 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
131
131
132 // Keep this in sync with REVIDX_KNOWN_FLAGS in
132 // Keep this in sync with REVIDX_KNOWN_FLAGS in
133 // mercurial/revlogutils/flagutil.py
133 // mercurial/revlogutils/flagutil.py
134 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
134 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
135 | REVISION_FLAG_ELLIPSIS
135 | REVISION_FLAG_ELLIPSIS
136 | REVISION_FLAG_EXTSTORED
136 | REVISION_FLAG_EXTSTORED
137 | REVISION_FLAG_HASCOPIESINFO;
137 | REVISION_FLAG_HASCOPIESINFO;
138
138
139 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
139 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
140
140
141 #[derive(Debug, derive_more::From, derive_more::Display)]
141 #[derive(Debug, derive_more::From, derive_more::Display)]
142 pub enum RevlogError {
142 pub enum RevlogError {
143 InvalidRevision,
143 InvalidRevision,
144 /// Working directory is not supported
144 /// Working directory is not supported
145 WDirUnsupported,
145 WDirUnsupported,
146 /// Found more than one entry whose ID match the requested prefix
146 /// Found more than one entry whose ID match the requested prefix
147 AmbiguousPrefix,
147 AmbiguousPrefix,
148 #[from]
148 #[from]
149 Other(HgError),
149 Other(HgError),
150 }
150 }
151
151
152 impl From<NodeMapError> for RevlogError {
152 impl From<NodeMapError> for RevlogError {
153 fn from(error: NodeMapError) -> Self {
153 fn from(error: NodeMapError) -> Self {
154 match error {
154 match error {
155 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
155 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
156 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
156 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
157 format!("nodemap point to revision {} not in index", rev),
157 format!("nodemap point to revision {} not in index", rev),
158 ),
158 ),
159 }
159 }
160 }
160 }
161 }
161 }
162
162
163 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
163 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
164 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
164 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
165 }
165 }
166
166
167 impl RevlogError {
167 impl RevlogError {
168 fn corrupted<S: AsRef<str>>(context: S) -> Self {
168 fn corrupted<S: AsRef<str>>(context: S) -> Self {
169 RevlogError::Other(corrupted(context))
169 RevlogError::Other(corrupted(context))
170 }
170 }
171 }
171 }
172
172
173 /// Read only implementation of revlog.
173 /// Read only implementation of revlog.
174 pub struct Revlog {
174 pub struct Revlog {
175 /// When index and data are not interleaved: bytes of the revlog index.
175 /// When index and data are not interleaved: bytes of the revlog index.
176 /// When index and data are interleaved: bytes of the revlog index and
176 /// When index and data are interleaved: bytes of the revlog index and
177 /// data.
177 /// data.
178 index: Index,
178 index: Index,
179 /// When index and data are not interleaved: bytes of the revlog data
179 /// When index and data are not interleaved: bytes of the revlog data
180 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
180 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
181 /// When present on disk: the persistent nodemap for this revlog
181 /// When present on disk: the persistent nodemap for this revlog
182 nodemap: Option<nodemap::NodeTree>,
182 nodemap: Option<nodemap::NodeTree>,
183 }
183 }
184
184
185 impl Graph for Revlog {
186 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
187 self.index.parents(rev)
188 }
189 }
190
185 impl Revlog {
191 impl Revlog {
186 /// Open a revlog index file.
192 /// Open a revlog index file.
187 ///
193 ///
188 /// It will also open the associated data file if index and data are not
194 /// It will also open the associated data file if index and data are not
189 /// interleaved.
195 /// interleaved.
190 pub fn open(
196 pub fn open(
191 store_vfs: &Vfs,
197 store_vfs: &Vfs,
192 index_path: impl AsRef<Path>,
198 index_path: impl AsRef<Path>,
193 data_path: Option<&Path>,
199 data_path: Option<&Path>,
194 use_nodemap: bool,
200 use_nodemap: bool,
195 ) -> Result<Self, HgError> {
201 ) -> Result<Self, HgError> {
196 let index_path = index_path.as_ref();
202 let index_path = index_path.as_ref();
197 let index = {
203 let index = {
198 match store_vfs.mmap_open_opt(&index_path)? {
204 match store_vfs.mmap_open_opt(&index_path)? {
199 None => Index::new(Box::new(vec![])),
205 None => Index::new(Box::new(vec![])),
200 Some(index_mmap) => {
206 Some(index_mmap) => {
201 let index = Index::new(Box::new(index_mmap))?;
207 let index = Index::new(Box::new(index_mmap))?;
202 Ok(index)
208 Ok(index)
203 }
209 }
204 }
210 }
205 }?;
211 }?;
206
212
207 let default_data_path = index_path.with_extension("d");
213 let default_data_path = index_path.with_extension("d");
208
214
209 // type annotation required
215 // type annotation required
210 // won't recognize Mmap as Deref<Target = [u8]>
216 // won't recognize Mmap as Deref<Target = [u8]>
211 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
217 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
212 if index.is_inline() {
218 if index.is_inline() {
213 None
219 None
214 } else {
220 } else {
215 let data_path = data_path.unwrap_or(&default_data_path);
221 let data_path = data_path.unwrap_or(&default_data_path);
216 let data_mmap = store_vfs.mmap_open(data_path)?;
222 let data_mmap = store_vfs.mmap_open(data_path)?;
217 Some(Box::new(data_mmap))
223 Some(Box::new(data_mmap))
218 };
224 };
219
225
220 let nodemap = if index.is_inline() || !use_nodemap {
226 let nodemap = if index.is_inline() || !use_nodemap {
221 None
227 None
222 } else {
228 } else {
223 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
229 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
224 |(docket, data)| {
230 |(docket, data)| {
225 nodemap::NodeTree::load_bytes(
231 nodemap::NodeTree::load_bytes(
226 Box::new(data),
232 Box::new(data),
227 docket.data_length,
233 docket.data_length,
228 )
234 )
229 },
235 },
230 )
236 )
231 };
237 };
232
238
233 Ok(Revlog {
239 Ok(Revlog {
234 index,
240 index,
235 data_bytes,
241 data_bytes,
236 nodemap,
242 nodemap,
237 })
243 })
238 }
244 }
239
245
240 /// Return number of entries of the `Revlog`.
246 /// Return number of entries of the `Revlog`.
241 pub fn len(&self) -> usize {
247 pub fn len(&self) -> usize {
242 self.index.len()
248 self.index.len()
243 }
249 }
244
250
245 /// Returns `true` if the `Revlog` has zero `entries`.
251 /// Returns `true` if the `Revlog` has zero `entries`.
246 pub fn is_empty(&self) -> bool {
252 pub fn is_empty(&self) -> bool {
247 self.index.is_empty()
253 self.index.is_empty()
248 }
254 }
249
255
250 /// Returns the node ID for the given revision number, if it exists in this
256 /// Returns the node ID for the given revision number, if it exists in this
251 /// revlog
257 /// revlog
252 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
258 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
253 if rev == NULL_REVISION.into() {
259 if rev == NULL_REVISION.into() {
254 return Some(&NULL_NODE);
260 return Some(&NULL_NODE);
255 }
261 }
256 let rev = self.index.check_revision(rev)?;
262 let rev = self.index.check_revision(rev)?;
257 Some(self.index.get_entry(rev)?.hash())
263 Some(self.index.get_entry(rev)?.hash())
258 }
264 }
259
265
260 /// Return the revision number for the given node ID, if it exists in this
266 /// Return the revision number for the given node ID, if it exists in this
261 /// revlog
267 /// revlog
262 pub fn rev_from_node(
268 pub fn rev_from_node(
263 &self,
269 &self,
264 node: NodePrefix,
270 node: NodePrefix,
265 ) -> Result<Revision, RevlogError> {
271 ) -> Result<Revision, RevlogError> {
266 let looked_up = if let Some(nodemap) = &self.nodemap {
272 let looked_up = if let Some(nodemap) = &self.nodemap {
267 nodemap
273 nodemap
268 .find_bin(&self.index, node)?
274 .find_bin(&self.index, node)?
269 .ok_or(RevlogError::InvalidRevision)
275 .ok_or(RevlogError::InvalidRevision)
270 } else {
276 } else {
271 self.rev_from_node_no_persistent_nodemap(node)
277 self.rev_from_node_no_persistent_nodemap(node)
272 };
278 };
273
279
274 if node.is_prefix_of(&NULL_NODE) {
280 if node.is_prefix_of(&NULL_NODE) {
275 return match looked_up {
281 return match looked_up {
276 Ok(_) => Err(RevlogError::AmbiguousPrefix),
282 Ok(_) => Err(RevlogError::AmbiguousPrefix),
277 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
283 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
278 res => res,
284 res => res,
279 };
285 };
280 };
286 };
281
287
282 looked_up
288 looked_up
283 }
289 }
284
290
285 /// Same as `rev_from_node`, without using a persistent nodemap
291 /// Same as `rev_from_node`, without using a persistent nodemap
286 ///
292 ///
287 /// This is used as fallback when a persistent nodemap is not present.
293 /// This is used as fallback when a persistent nodemap is not present.
288 /// This happens when the persistent-nodemap experimental feature is not
294 /// This happens when the persistent-nodemap experimental feature is not
289 /// enabled, or for small revlogs.
295 /// enabled, or for small revlogs.
290 fn rev_from_node_no_persistent_nodemap(
296 fn rev_from_node_no_persistent_nodemap(
291 &self,
297 &self,
292 node: NodePrefix,
298 node: NodePrefix,
293 ) -> Result<Revision, RevlogError> {
299 ) -> Result<Revision, RevlogError> {
294 // Linear scan of the revlog
300 // Linear scan of the revlog
295 // TODO: consider building a non-persistent nodemap in memory to
301 // TODO: consider building a non-persistent nodemap in memory to
296 // optimize these cases.
302 // optimize these cases.
297 let mut found_by_prefix = None;
303 let mut found_by_prefix = None;
298 for rev in (0..self.len() as Revision).rev() {
304 for rev in (0..self.len() as Revision).rev() {
299 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
305 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
300 HgError::corrupted(
306 HgError::corrupted(
301 "revlog references a revision not in the index",
307 "revlog references a revision not in the index",
302 )
308 )
303 })?;
309 })?;
304 if node == *index_entry.hash() {
310 if node == *index_entry.hash() {
305 return Ok(rev);
311 return Ok(rev);
306 }
312 }
307 if node.is_prefix_of(index_entry.hash()) {
313 if node.is_prefix_of(index_entry.hash()) {
308 if found_by_prefix.is_some() {
314 if found_by_prefix.is_some() {
309 return Err(RevlogError::AmbiguousPrefix);
315 return Err(RevlogError::AmbiguousPrefix);
310 }
316 }
311 found_by_prefix = Some(rev)
317 found_by_prefix = Some(rev)
312 }
318 }
313 }
319 }
314 found_by_prefix.ok_or(RevlogError::InvalidRevision)
320 found_by_prefix.ok_or(RevlogError::InvalidRevision)
315 }
321 }
316
322
317 /// Returns whether the given revision exists in this revlog.
323 /// Returns whether the given revision exists in this revlog.
318 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
324 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
319 self.index.check_revision(rev).is_some()
325 self.index.check_revision(rev).is_some()
320 }
326 }
321
327
322 /// Return the full data associated to a revision.
328 /// Return the full data associated to a revision.
323 ///
329 ///
324 /// All entries required to build the final data out of deltas will be
330 /// All entries required to build the final data out of deltas will be
325 /// retrieved as needed, and the deltas will be applied to the inital
331 /// retrieved as needed, and the deltas will be applied to the inital
326 /// snapshot to rebuild the final data.
332 /// snapshot to rebuild the final data.
327 pub fn get_rev_data(
333 pub fn get_rev_data(
328 &self,
334 &self,
329 rev: UncheckedRevision,
335 rev: UncheckedRevision,
330 ) -> Result<Cow<[u8]>, RevlogError> {
336 ) -> Result<Cow<[u8]>, RevlogError> {
331 if rev == NULL_REVISION.into() {
337 if rev == NULL_REVISION.into() {
332 return Ok(Cow::Borrowed(&[]));
338 return Ok(Cow::Borrowed(&[]));
333 };
339 };
334 self.get_entry(rev)?.data()
340 self.get_entry(rev)?.data()
335 }
341 }
336
342
337 /// [`Self::get_rev_data`] for checked revisions.
343 /// [`Self::get_rev_data`] for checked revisions.
338 pub fn get_rev_data_for_checked_rev(
344 pub fn get_rev_data_for_checked_rev(
339 &self,
345 &self,
340 rev: Revision,
346 rev: Revision,
341 ) -> Result<Cow<[u8]>, RevlogError> {
347 ) -> Result<Cow<[u8]>, RevlogError> {
342 if rev == NULL_REVISION {
348 if rev == NULL_REVISION {
343 return Ok(Cow::Borrowed(&[]));
349 return Ok(Cow::Borrowed(&[]));
344 };
350 };
345 self.get_entry_for_checked_rev(rev)?.data()
351 self.get_entry_for_checked_rev(rev)?.data()
346 }
352 }
347
353
348 /// Check the hash of some given data against the recorded hash.
354 /// Check the hash of some given data against the recorded hash.
349 pub fn check_hash(
355 pub fn check_hash(
350 &self,
356 &self,
351 p1: Revision,
357 p1: Revision,
352 p2: Revision,
358 p2: Revision,
353 expected: &[u8],
359 expected: &[u8],
354 data: &[u8],
360 data: &[u8],
355 ) -> bool {
361 ) -> bool {
356 let e1 = self.index.get_entry(p1);
362 let e1 = self.index.get_entry(p1);
357 let h1 = match e1 {
363 let h1 = match e1 {
358 Some(ref entry) => entry.hash(),
364 Some(ref entry) => entry.hash(),
359 None => &NULL_NODE,
365 None => &NULL_NODE,
360 };
366 };
361 let e2 = self.index.get_entry(p2);
367 let e2 = self.index.get_entry(p2);
362 let h2 = match e2 {
368 let h2 = match e2 {
363 Some(ref entry) => entry.hash(),
369 Some(ref entry) => entry.hash(),
364 None => &NULL_NODE,
370 None => &NULL_NODE,
365 };
371 };
366
372
367 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
373 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
368 }
374 }
369
375
370 /// Build the full data of a revision out its snapshot
376 /// Build the full data of a revision out its snapshot
371 /// and its deltas.
377 /// and its deltas.
372 fn build_data_from_deltas(
378 fn build_data_from_deltas(
373 snapshot: RevlogEntry,
379 snapshot: RevlogEntry,
374 deltas: &[RevlogEntry],
380 deltas: &[RevlogEntry],
375 ) -> Result<Vec<u8>, HgError> {
381 ) -> Result<Vec<u8>, HgError> {
376 let snapshot = snapshot.data_chunk()?;
382 let snapshot = snapshot.data_chunk()?;
377 let deltas = deltas
383 let deltas = deltas
378 .iter()
384 .iter()
379 .rev()
385 .rev()
380 .map(RevlogEntry::data_chunk)
386 .map(RevlogEntry::data_chunk)
381 .collect::<Result<Vec<_>, _>>()?;
387 .collect::<Result<Vec<_>, _>>()?;
382 let patches: Vec<_> =
388 let patches: Vec<_> =
383 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
389 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
384 let patch = patch::fold_patch_lists(&patches);
390 let patch = patch::fold_patch_lists(&patches);
385 Ok(patch.apply(&snapshot))
391 Ok(patch.apply(&snapshot))
386 }
392 }
387
393
388 /// Return the revlog data.
394 /// Return the revlog data.
389 fn data(&self) -> &[u8] {
395 fn data(&self) -> &[u8] {
390 match &self.data_bytes {
396 match &self.data_bytes {
391 Some(data_bytes) => data_bytes,
397 Some(data_bytes) => data_bytes,
392 None => panic!(
398 None => panic!(
393 "forgot to load the data or trying to access inline data"
399 "forgot to load the data or trying to access inline data"
394 ),
400 ),
395 }
401 }
396 }
402 }
397
403
398 pub fn make_null_entry(&self) -> RevlogEntry {
404 pub fn make_null_entry(&self) -> RevlogEntry {
399 RevlogEntry {
405 RevlogEntry {
400 revlog: self,
406 revlog: self,
401 rev: NULL_REVISION,
407 rev: NULL_REVISION,
402 bytes: b"",
408 bytes: b"",
403 compressed_len: 0,
409 compressed_len: 0,
404 uncompressed_len: 0,
410 uncompressed_len: 0,
405 base_rev_or_base_of_delta_chain: None,
411 base_rev_or_base_of_delta_chain: None,
406 p1: NULL_REVISION,
412 p1: NULL_REVISION,
407 p2: NULL_REVISION,
413 p2: NULL_REVISION,
408 flags: NULL_REVLOG_ENTRY_FLAGS,
414 flags: NULL_REVLOG_ENTRY_FLAGS,
409 hash: NULL_NODE,
415 hash: NULL_NODE,
410 }
416 }
411 }
417 }
412
418
413 fn get_entry_for_checked_rev(
419 fn get_entry_for_checked_rev(
414 &self,
420 &self,
415 rev: Revision,
421 rev: Revision,
416 ) -> Result<RevlogEntry, RevlogError> {
422 ) -> Result<RevlogEntry, RevlogError> {
417 if rev == NULL_REVISION {
423 if rev == NULL_REVISION {
418 return Ok(self.make_null_entry());
424 return Ok(self.make_null_entry());
419 }
425 }
420 let index_entry = self
426 let index_entry = self
421 .index
427 .index
422 .get_entry(rev)
428 .get_entry(rev)
423 .ok_or(RevlogError::InvalidRevision)?;
429 .ok_or(RevlogError::InvalidRevision)?;
424 let start = index_entry.offset();
430 let start = index_entry.offset();
425 let end = start + index_entry.compressed_len() as usize;
431 let end = start + index_entry.compressed_len() as usize;
426 let data = if self.index.is_inline() {
432 let data = if self.index.is_inline() {
427 self.index.data(start, end)
433 self.index.data(start, end)
428 } else {
434 } else {
429 &self.data()[start..end]
435 &self.data()[start..end]
430 };
436 };
431 let base_rev = self
437 let base_rev = self
432 .index
438 .index
433 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
439 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
434 .ok_or_else(|| {
440 .ok_or_else(|| {
435 RevlogError::corrupted(format!(
441 RevlogError::corrupted(format!(
436 "base revision for rev {} is invalid",
442 "base revision for rev {} is invalid",
437 rev
443 rev
438 ))
444 ))
439 })?;
445 })?;
440 let p1 =
446 let p1 =
441 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
447 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
442 RevlogError::corrupted(format!(
448 RevlogError::corrupted(format!(
443 "p1 for rev {} is invalid",
449 "p1 for rev {} is invalid",
444 rev
450 rev
445 ))
451 ))
446 })?;
452 })?;
447 let p2 =
453 let p2 =
448 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
454 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
449 RevlogError::corrupted(format!(
455 RevlogError::corrupted(format!(
450 "p2 for rev {} is invalid",
456 "p2 for rev {} is invalid",
451 rev
457 rev
452 ))
458 ))
453 })?;
459 })?;
454 let entry = RevlogEntry {
460 let entry = RevlogEntry {
455 revlog: self,
461 revlog: self,
456 rev,
462 rev,
457 bytes: data,
463 bytes: data,
458 compressed_len: index_entry.compressed_len(),
464 compressed_len: index_entry.compressed_len(),
459 uncompressed_len: index_entry.uncompressed_len(),
465 uncompressed_len: index_entry.uncompressed_len(),
460 base_rev_or_base_of_delta_chain: if base_rev == rev {
466 base_rev_or_base_of_delta_chain: if base_rev == rev {
461 None
467 None
462 } else {
468 } else {
463 Some(base_rev)
469 Some(base_rev)
464 },
470 },
465 p1,
471 p1,
466 p2,
472 p2,
467 flags: index_entry.flags(),
473 flags: index_entry.flags(),
468 hash: *index_entry.hash(),
474 hash: *index_entry.hash(),
469 };
475 };
470 Ok(entry)
476 Ok(entry)
471 }
477 }
472
478
473 /// Get an entry of the revlog.
479 /// Get an entry of the revlog.
474 pub fn get_entry(
480 pub fn get_entry(
475 &self,
481 &self,
476 rev: UncheckedRevision,
482 rev: UncheckedRevision,
477 ) -> Result<RevlogEntry, RevlogError> {
483 ) -> Result<RevlogEntry, RevlogError> {
478 if rev == NULL_REVISION.into() {
484 if rev == NULL_REVISION.into() {
479 return Ok(self.make_null_entry());
485 return Ok(self.make_null_entry());
480 }
486 }
481 let rev = self.index.check_revision(rev).ok_or_else(|| {
487 let rev = self.index.check_revision(rev).ok_or_else(|| {
482 RevlogError::corrupted(format!("rev {} is invalid", rev))
488 RevlogError::corrupted(format!("rev {} is invalid", rev))
483 })?;
489 })?;
484 self.get_entry_for_checked_rev(rev)
490 self.get_entry_for_checked_rev(rev)
485 }
491 }
486 }
492 }
487
493
488 /// The revlog entry's bytes and the necessary informations to extract
494 /// The revlog entry's bytes and the necessary informations to extract
489 /// the entry's data.
495 /// the entry's data.
490 #[derive(Clone)]
496 #[derive(Clone)]
491 pub struct RevlogEntry<'revlog> {
497 pub struct RevlogEntry<'revlog> {
492 revlog: &'revlog Revlog,
498 revlog: &'revlog Revlog,
493 rev: Revision,
499 rev: Revision,
494 bytes: &'revlog [u8],
500 bytes: &'revlog [u8],
495 compressed_len: u32,
501 compressed_len: u32,
496 uncompressed_len: i32,
502 uncompressed_len: i32,
497 base_rev_or_base_of_delta_chain: Option<Revision>,
503 base_rev_or_base_of_delta_chain: Option<Revision>,
498 p1: Revision,
504 p1: Revision,
499 p2: Revision,
505 p2: Revision,
500 flags: u16,
506 flags: u16,
501 hash: Node,
507 hash: Node,
502 }
508 }
503
509
504 thread_local! {
510 thread_local! {
505 // seems fine to [unwrap] here: this can only fail due to memory allocation
511 // seems fine to [unwrap] here: this can only fail due to memory allocation
506 // failing, and it's normal for that to cause panic.
512 // failing, and it's normal for that to cause panic.
507 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
513 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
508 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
514 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
509 }
515 }
510
516
511 fn zstd_decompress_to_buffer(
517 fn zstd_decompress_to_buffer(
512 bytes: &[u8],
518 bytes: &[u8],
513 buf: &mut Vec<u8>,
519 buf: &mut Vec<u8>,
514 ) -> Result<usize, std::io::Error> {
520 ) -> Result<usize, std::io::Error> {
515 ZSTD_DECODER
521 ZSTD_DECODER
516 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
522 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
517 }
523 }
518
524
519 impl<'revlog> RevlogEntry<'revlog> {
525 impl<'revlog> RevlogEntry<'revlog> {
520 pub fn revision(&self) -> Revision {
526 pub fn revision(&self) -> Revision {
521 self.rev
527 self.rev
522 }
528 }
523
529
524 pub fn node(&self) -> &Node {
530 pub fn node(&self) -> &Node {
525 &self.hash
531 &self.hash
526 }
532 }
527
533
528 pub fn uncompressed_len(&self) -> Option<u32> {
534 pub fn uncompressed_len(&self) -> Option<u32> {
529 u32::try_from(self.uncompressed_len).ok()
535 u32::try_from(self.uncompressed_len).ok()
530 }
536 }
531
537
532 pub fn has_p1(&self) -> bool {
538 pub fn has_p1(&self) -> bool {
533 self.p1 != NULL_REVISION
539 self.p1 != NULL_REVISION
534 }
540 }
535
541
536 pub fn p1_entry(
542 pub fn p1_entry(
537 &self,
543 &self,
538 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
544 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
539 if self.p1 == NULL_REVISION {
545 if self.p1 == NULL_REVISION {
540 Ok(None)
546 Ok(None)
541 } else {
547 } else {
542 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
548 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
543 }
549 }
544 }
550 }
545
551
546 pub fn p2_entry(
552 pub fn p2_entry(
547 &self,
553 &self,
548 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
554 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
549 if self.p2 == NULL_REVISION {
555 if self.p2 == NULL_REVISION {
550 Ok(None)
556 Ok(None)
551 } else {
557 } else {
552 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
558 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
553 }
559 }
554 }
560 }
555
561
556 pub fn p1(&self) -> Option<Revision> {
562 pub fn p1(&self) -> Option<Revision> {
557 if self.p1 == NULL_REVISION {
563 if self.p1 == NULL_REVISION {
558 None
564 None
559 } else {
565 } else {
560 Some(self.p1)
566 Some(self.p1)
561 }
567 }
562 }
568 }
563
569
564 pub fn p2(&self) -> Option<Revision> {
570 pub fn p2(&self) -> Option<Revision> {
565 if self.p2 == NULL_REVISION {
571 if self.p2 == NULL_REVISION {
566 None
572 None
567 } else {
573 } else {
568 Some(self.p2)
574 Some(self.p2)
569 }
575 }
570 }
576 }
571
577
572 pub fn is_censored(&self) -> bool {
578 pub fn is_censored(&self) -> bool {
573 (self.flags & REVISION_FLAG_CENSORED) != 0
579 (self.flags & REVISION_FLAG_CENSORED) != 0
574 }
580 }
575
581
576 pub fn has_length_affecting_flag_processor(&self) -> bool {
582 pub fn has_length_affecting_flag_processor(&self) -> bool {
577 // Relevant Python code: revlog.size()
583 // Relevant Python code: revlog.size()
578 // note: ELLIPSIS is known to not change the content
584 // note: ELLIPSIS is known to not change the content
579 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
585 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
580 }
586 }
581
587
582 /// The data for this entry, after resolving deltas if any.
588 /// The data for this entry, after resolving deltas if any.
583 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
589 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
584 let mut entry = self.clone();
590 let mut entry = self.clone();
585 let mut delta_chain = vec![];
591 let mut delta_chain = vec![];
586
592
587 // The meaning of `base_rev_or_base_of_delta_chain` depends on
593 // The meaning of `base_rev_or_base_of_delta_chain` depends on
588 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
594 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
589 // `mercurial/revlogutils/constants.py` and the code in
595 // `mercurial/revlogutils/constants.py` and the code in
590 // [_chaininfo] and in [index_deltachain].
596 // [_chaininfo] and in [index_deltachain].
591 let uses_generaldelta = self.revlog.index.uses_generaldelta();
597 let uses_generaldelta = self.revlog.index.uses_generaldelta();
592 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
598 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
593 entry = if uses_generaldelta {
599 entry = if uses_generaldelta {
594 delta_chain.push(entry);
600 delta_chain.push(entry);
595 self.revlog.get_entry_for_checked_rev(base_rev)?
601 self.revlog.get_entry_for_checked_rev(base_rev)?
596 } else {
602 } else {
597 let base_rev = UncheckedRevision(entry.rev - 1);
603 let base_rev = UncheckedRevision(entry.rev - 1);
598 delta_chain.push(entry);
604 delta_chain.push(entry);
599 self.revlog.get_entry(base_rev)?
605 self.revlog.get_entry(base_rev)?
600 };
606 };
601 }
607 }
602
608
603 let data = if delta_chain.is_empty() {
609 let data = if delta_chain.is_empty() {
604 entry.data_chunk()?
610 entry.data_chunk()?
605 } else {
611 } else {
606 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
612 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
607 };
613 };
608
614
609 Ok(data)
615 Ok(data)
610 }
616 }
611
617
612 fn check_data(
618 fn check_data(
613 &self,
619 &self,
614 data: Cow<'revlog, [u8]>,
620 data: Cow<'revlog, [u8]>,
615 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
621 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
616 if self.revlog.check_hash(
622 if self.revlog.check_hash(
617 self.p1,
623 self.p1,
618 self.p2,
624 self.p2,
619 self.hash.as_bytes(),
625 self.hash.as_bytes(),
620 &data,
626 &data,
621 ) {
627 ) {
622 Ok(data)
628 Ok(data)
623 } else {
629 } else {
624 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
630 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
625 return Err(HgError::unsupported(
631 return Err(HgError::unsupported(
626 "ellipsis revisions are not supported by rhg",
632 "ellipsis revisions are not supported by rhg",
627 )
633 )
628 .into());
634 .into());
629 }
635 }
630 Err(corrupted(format!(
636 Err(corrupted(format!(
631 "hash check failed for revision {}",
637 "hash check failed for revision {}",
632 self.rev
638 self.rev
633 ))
639 ))
634 .into())
640 .into())
635 }
641 }
636 }
642 }
637
643
638 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
644 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
639 let data = self.rawdata()?;
645 let data = self.rawdata()?;
640 if self.rev == NULL_REVISION {
646 if self.rev == NULL_REVISION {
641 return Ok(data);
647 return Ok(data);
642 }
648 }
643 if self.is_censored() {
649 if self.is_censored() {
644 return Err(HgError::CensoredNodeError.into());
650 return Err(HgError::CensoredNodeError.into());
645 }
651 }
646 self.check_data(data)
652 self.check_data(data)
647 }
653 }
648
654
649 /// Extract the data contained in the entry.
655 /// Extract the data contained in the entry.
650 /// This may be a delta. (See `is_delta`.)
656 /// This may be a delta. (See `is_delta`.)
651 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
657 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
652 if self.bytes.is_empty() {
658 if self.bytes.is_empty() {
653 return Ok(Cow::Borrowed(&[]));
659 return Ok(Cow::Borrowed(&[]));
654 }
660 }
655 match self.bytes[0] {
661 match self.bytes[0] {
656 // Revision data is the entirety of the entry, including this
662 // Revision data is the entirety of the entry, including this
657 // header.
663 // header.
658 b'\0' => Ok(Cow::Borrowed(self.bytes)),
664 b'\0' => Ok(Cow::Borrowed(self.bytes)),
659 // Raw revision data follows.
665 // Raw revision data follows.
660 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
666 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
661 // zlib (RFC 1950) data.
667 // zlib (RFC 1950) data.
662 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
668 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
663 // zstd data.
669 // zstd data.
664 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
670 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
665 // A proper new format should have had a repo/store requirement.
671 // A proper new format should have had a repo/store requirement.
666 format_type => Err(corrupted(format!(
672 format_type => Err(corrupted(format!(
667 "unknown compression header '{}'",
673 "unknown compression header '{}'",
668 format_type
674 format_type
669 ))),
675 ))),
670 }
676 }
671 }
677 }
672
678
673 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
679 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
674 let mut decoder = ZlibDecoder::new(self.bytes);
680 let mut decoder = ZlibDecoder::new(self.bytes);
675 if self.is_delta() {
681 if self.is_delta() {
676 let mut buf = Vec::with_capacity(self.compressed_len as usize);
682 let mut buf = Vec::with_capacity(self.compressed_len as usize);
677 decoder
683 decoder
678 .read_to_end(&mut buf)
684 .read_to_end(&mut buf)
679 .map_err(|e| corrupted(e.to_string()))?;
685 .map_err(|e| corrupted(e.to_string()))?;
680 Ok(buf)
686 Ok(buf)
681 } else {
687 } else {
682 let cap = self.uncompressed_len.max(0) as usize;
688 let cap = self.uncompressed_len.max(0) as usize;
683 let mut buf = vec![0; cap];
689 let mut buf = vec![0; cap];
684 decoder
690 decoder
685 .read_exact(&mut buf)
691 .read_exact(&mut buf)
686 .map_err(|e| corrupted(e.to_string()))?;
692 .map_err(|e| corrupted(e.to_string()))?;
687 Ok(buf)
693 Ok(buf)
688 }
694 }
689 }
695 }
690
696
691 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
697 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
692 let cap = self.uncompressed_len.max(0) as usize;
698 let cap = self.uncompressed_len.max(0) as usize;
693 if self.is_delta() {
699 if self.is_delta() {
694 // [cap] is usually an over-estimate of the space needed because
700 // [cap] is usually an over-estimate of the space needed because
695 // it's the length of delta-decoded data, but we're interested
701 // it's the length of delta-decoded data, but we're interested
696 // in the size of the delta.
702 // in the size of the delta.
697 // This means we have to [shrink_to_fit] to avoid holding on
703 // This means we have to [shrink_to_fit] to avoid holding on
698 // to a large chunk of memory, but it also means we must have a
704 // to a large chunk of memory, but it also means we must have a
699 // fallback branch, for the case when the delta is longer than
705 // fallback branch, for the case when the delta is longer than
700 // the original data (surprisingly, this does happen in practice)
706 // the original data (surprisingly, this does happen in practice)
701 let mut buf = Vec::with_capacity(cap);
707 let mut buf = Vec::with_capacity(cap);
702 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
708 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
703 Ok(_) => buf.shrink_to_fit(),
709 Ok(_) => buf.shrink_to_fit(),
704 Err(_) => {
710 Err(_) => {
705 buf.clear();
711 buf.clear();
706 zstd::stream::copy_decode(self.bytes, &mut buf)
712 zstd::stream::copy_decode(self.bytes, &mut buf)
707 .map_err(|e| corrupted(e.to_string()))?;
713 .map_err(|e| corrupted(e.to_string()))?;
708 }
714 }
709 };
715 };
710 Ok(buf)
716 Ok(buf)
711 } else {
717 } else {
712 let mut buf = Vec::with_capacity(cap);
718 let mut buf = Vec::with_capacity(cap);
713 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
719 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
714 .map_err(|e| corrupted(e.to_string()))?;
720 .map_err(|e| corrupted(e.to_string()))?;
715 if len != self.uncompressed_len as usize {
721 if len != self.uncompressed_len as usize {
716 Err(corrupted("uncompressed length does not match"))
722 Err(corrupted("uncompressed length does not match"))
717 } else {
723 } else {
718 Ok(buf)
724 Ok(buf)
719 }
725 }
720 }
726 }
721 }
727 }
722
728
723 /// Tell if the entry is a snapshot or a delta
729 /// Tell if the entry is a snapshot or a delta
724 /// (influences on decompression).
730 /// (influences on decompression).
725 fn is_delta(&self) -> bool {
731 fn is_delta(&self) -> bool {
726 self.base_rev_or_base_of_delta_chain.is_some()
732 self.base_rev_or_base_of_delta_chain.is_some()
727 }
733 }
728 }
734 }
729
735
730 /// Calculate the hash of a revision given its data and its parents.
736 /// Calculate the hash of a revision given its data and its parents.
731 fn hash(
737 fn hash(
732 data: &[u8],
738 data: &[u8],
733 p1_hash: &[u8],
739 p1_hash: &[u8],
734 p2_hash: &[u8],
740 p2_hash: &[u8],
735 ) -> [u8; NODE_BYTES_LENGTH] {
741 ) -> [u8; NODE_BYTES_LENGTH] {
736 let mut hasher = Sha1::new();
742 let mut hasher = Sha1::new();
737 let (a, b) = (p1_hash, p2_hash);
743 let (a, b) = (p1_hash, p2_hash);
738 if a > b {
744 if a > b {
739 hasher.update(b);
745 hasher.update(b);
740 hasher.update(a);
746 hasher.update(a);
741 } else {
747 } else {
742 hasher.update(a);
748 hasher.update(a);
743 hasher.update(b);
749 hasher.update(b);
744 }
750 }
745 hasher.update(data);
751 hasher.update(data);
746 *hasher.finalize().as_ref()
752 *hasher.finalize().as_ref()
747 }
753 }
748
754
749 #[cfg(test)]
755 #[cfg(test)]
750 mod tests {
756 mod tests {
751 use super::*;
757 use super::*;
752 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
758 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
753 use itertools::Itertools;
759 use itertools::Itertools;
754
760
755 #[test]
761 #[test]
756 fn test_empty() {
762 fn test_empty() {
757 let temp = tempfile::tempdir().unwrap();
763 let temp = tempfile::tempdir().unwrap();
758 let vfs = Vfs { base: temp.path() };
764 let vfs = Vfs { base: temp.path() };
759 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
765 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
760 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
766 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
761 assert!(revlog.is_empty());
767 assert!(revlog.is_empty());
762 assert_eq!(revlog.len(), 0);
768 assert_eq!(revlog.len(), 0);
763 assert!(revlog.get_entry(0.into()).is_err());
769 assert!(revlog.get_entry(0.into()).is_err());
764 assert!(!revlog.has_rev(0.into()));
770 assert!(!revlog.has_rev(0.into()));
765 assert_eq!(
771 assert_eq!(
766 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
772 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
767 NULL_REVISION
773 NULL_REVISION
768 );
774 );
769 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
775 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
770 assert_eq!(null_entry.revision(), NULL_REVISION);
776 assert_eq!(null_entry.revision(), NULL_REVISION);
771 assert!(null_entry.data().unwrap().is_empty());
777 assert!(null_entry.data().unwrap().is_empty());
772 }
778 }
773
779
774 #[test]
780 #[test]
775 fn test_inline() {
781 fn test_inline() {
776 let temp = tempfile::tempdir().unwrap();
782 let temp = tempfile::tempdir().unwrap();
777 let vfs = Vfs { base: temp.path() };
783 let vfs = Vfs { base: temp.path() };
778 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
784 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
779 .unwrap();
785 .unwrap();
780 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
786 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
781 .unwrap();
787 .unwrap();
782 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
788 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
783 .unwrap();
789 .unwrap();
784 let entry0_bytes = IndexEntryBuilder::new()
790 let entry0_bytes = IndexEntryBuilder::new()
785 .is_first(true)
791 .is_first(true)
786 .with_version(1)
792 .with_version(1)
787 .with_inline(true)
793 .with_inline(true)
788 .with_offset(INDEX_ENTRY_SIZE)
794 .with_offset(INDEX_ENTRY_SIZE)
789 .with_node(node0)
795 .with_node(node0)
790 .build();
796 .build();
791 let entry1_bytes = IndexEntryBuilder::new()
797 let entry1_bytes = IndexEntryBuilder::new()
792 .with_offset(INDEX_ENTRY_SIZE)
798 .with_offset(INDEX_ENTRY_SIZE)
793 .with_node(node1)
799 .with_node(node1)
794 .build();
800 .build();
795 let entry2_bytes = IndexEntryBuilder::new()
801 let entry2_bytes = IndexEntryBuilder::new()
796 .with_offset(INDEX_ENTRY_SIZE)
802 .with_offset(INDEX_ENTRY_SIZE)
797 .with_p1(0)
803 .with_p1(0)
798 .with_p2(1)
804 .with_p2(1)
799 .with_node(node2)
805 .with_node(node2)
800 .build();
806 .build();
801 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
807 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
802 .into_iter()
808 .into_iter()
803 .flatten()
809 .flatten()
804 .collect_vec();
810 .collect_vec();
805 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
811 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
806 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
812 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
807
813
808 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
814 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
809 assert_eq!(entry0.revision(), 0);
815 assert_eq!(entry0.revision(), 0);
810 assert_eq!(*entry0.node(), node0);
816 assert_eq!(*entry0.node(), node0);
811 assert!(!entry0.has_p1());
817 assert!(!entry0.has_p1());
812 assert_eq!(entry0.p1(), None);
818 assert_eq!(entry0.p1(), None);
813 assert_eq!(entry0.p2(), None);
819 assert_eq!(entry0.p2(), None);
814 let p1_entry = entry0.p1_entry().unwrap();
820 let p1_entry = entry0.p1_entry().unwrap();
815 assert!(p1_entry.is_none());
821 assert!(p1_entry.is_none());
816 let p2_entry = entry0.p2_entry().unwrap();
822 let p2_entry = entry0.p2_entry().unwrap();
817 assert!(p2_entry.is_none());
823 assert!(p2_entry.is_none());
818
824
819 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
825 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
820 assert_eq!(entry1.revision(), 1);
826 assert_eq!(entry1.revision(), 1);
821 assert_eq!(*entry1.node(), node1);
827 assert_eq!(*entry1.node(), node1);
822 assert!(!entry1.has_p1());
828 assert!(!entry1.has_p1());
823 assert_eq!(entry1.p1(), None);
829 assert_eq!(entry1.p1(), None);
824 assert_eq!(entry1.p2(), None);
830 assert_eq!(entry1.p2(), None);
825 let p1_entry = entry1.p1_entry().unwrap();
831 let p1_entry = entry1.p1_entry().unwrap();
826 assert!(p1_entry.is_none());
832 assert!(p1_entry.is_none());
827 let p2_entry = entry1.p2_entry().unwrap();
833 let p2_entry = entry1.p2_entry().unwrap();
828 assert!(p2_entry.is_none());
834 assert!(p2_entry.is_none());
829
835
830 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
836 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
831 assert_eq!(entry2.revision(), 2);
837 assert_eq!(entry2.revision(), 2);
832 assert_eq!(*entry2.node(), node2);
838 assert_eq!(*entry2.node(), node2);
833 assert!(entry2.has_p1());
839 assert!(entry2.has_p1());
834 assert_eq!(entry2.p1(), Some(0));
840 assert_eq!(entry2.p1(), Some(0));
835 assert_eq!(entry2.p2(), Some(1));
841 assert_eq!(entry2.p2(), Some(1));
836 let p1_entry = entry2.p1_entry().unwrap();
842 let p1_entry = entry2.p1_entry().unwrap();
837 assert!(p1_entry.is_some());
843 assert!(p1_entry.is_some());
838 assert_eq!(p1_entry.unwrap().revision(), 0);
844 assert_eq!(p1_entry.unwrap().revision(), 0);
839 let p2_entry = entry2.p2_entry().unwrap();
845 let p2_entry = entry2.p2_entry().unwrap();
840 assert!(p2_entry.is_some());
846 assert!(p2_entry.is_some());
841 assert_eq!(p2_entry.unwrap().revision(), 1);
847 assert_eq!(p2_entry.unwrap().revision(), 1);
842 }
848 }
843
849
844 #[test]
850 #[test]
845 fn test_nodemap() {
851 fn test_nodemap() {
846 let temp = tempfile::tempdir().unwrap();
852 let temp = tempfile::tempdir().unwrap();
847 let vfs = Vfs { base: temp.path() };
853 let vfs = Vfs { base: temp.path() };
848
854
849 // building a revlog with a forced Node starting with zeros
855 // building a revlog with a forced Node starting with zeros
850 // This is a corruption, but it does not preclude using the nodemap
856 // This is a corruption, but it does not preclude using the nodemap
851 // if we don't try and access the data
857 // if we don't try and access the data
852 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
858 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
853 .unwrap();
859 .unwrap();
854 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
860 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
855 .unwrap();
861 .unwrap();
856 let entry0_bytes = IndexEntryBuilder::new()
862 let entry0_bytes = IndexEntryBuilder::new()
857 .is_first(true)
863 .is_first(true)
858 .with_version(1)
864 .with_version(1)
859 .with_inline(true)
865 .with_inline(true)
860 .with_offset(INDEX_ENTRY_SIZE)
866 .with_offset(INDEX_ENTRY_SIZE)
861 .with_node(node0)
867 .with_node(node0)
862 .build();
868 .build();
863 let entry1_bytes = IndexEntryBuilder::new()
869 let entry1_bytes = IndexEntryBuilder::new()
864 .with_offset(INDEX_ENTRY_SIZE)
870 .with_offset(INDEX_ENTRY_SIZE)
865 .with_node(node1)
871 .with_node(node1)
866 .build();
872 .build();
867 let contents = vec![entry0_bytes, entry1_bytes]
873 let contents = vec![entry0_bytes, entry1_bytes]
868 .into_iter()
874 .into_iter()
869 .flatten()
875 .flatten()
870 .collect_vec();
876 .collect_vec();
871 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
877 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
872 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
878 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
873
879
874 // accessing the data shows the corruption
880 // accessing the data shows the corruption
875 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
881 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
876
882
877 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
883 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
878 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
884 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
879 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
885 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
880 assert_eq!(
886 assert_eq!(
881 revlog
887 revlog
882 .rev_from_node(NodePrefix::from_hex("000").unwrap())
888 .rev_from_node(NodePrefix::from_hex("000").unwrap())
883 .unwrap(),
889 .unwrap(),
884 -1
890 -1
885 );
891 );
886 assert_eq!(
892 assert_eq!(
887 revlog
893 revlog
888 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
894 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
889 .unwrap(),
895 .unwrap(),
890 1
896 1
891 );
897 );
892 // RevlogError does not implement PartialEq
898 // RevlogError does not implement PartialEq
893 // (ultimately because io::Error does not)
899 // (ultimately because io::Error does not)
894 match revlog
900 match revlog
895 .rev_from_node(NodePrefix::from_hex("00").unwrap())
901 .rev_from_node(NodePrefix::from_hex("00").unwrap())
896 .expect_err("Expected to give AmbiguousPrefix error")
902 .expect_err("Expected to give AmbiguousPrefix error")
897 {
903 {
898 RevlogError::AmbiguousPrefix => (),
904 RevlogError::AmbiguousPrefix => (),
899 e => {
905 e => {
900 panic!("Got another error than AmbiguousPrefix: {:?}", e);
906 panic!("Got another error than AmbiguousPrefix: {:?}", e);
901 }
907 }
902 };
908 };
903 }
909 }
904 }
910 }
General Comments 0
You need to be logged in to leave comments. Login now