##// END OF EJS Templates
rust-revlog: fix RevlogEntry.data() for NULL_REVISION...
Georges Racinet -
r51639:124c44b5 stable
parent child Browse files
Show More
@@ -1,341 +1,346 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::{Node, NodePrefix};
2 use crate::revlog::{Node, NodePrefix};
3 use crate::revlog::{Revision, NULL_REVISION};
3 use crate::revlog::{Revision, NULL_REVISION};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 use crate::utils::hg_path::HgPath;
5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use itertools::Itertools;
7 use itertools::Itertools;
8 use std::ascii::escape_default;
8 use std::ascii::escape_default;
9 use std::borrow::Cow;
9 use std::borrow::Cow;
10 use std::fmt::{Debug, Formatter};
10 use std::fmt::{Debug, Formatter};
11
11
12 /// A specialized `Revlog` to work with changelog data format.
12 /// A specialized `Revlog` to work with changelog data format.
13 pub struct Changelog {
13 pub struct Changelog {
14 /// The generic `revlog` format.
14 /// The generic `revlog` format.
15 pub(crate) revlog: Revlog,
15 pub(crate) revlog: Revlog,
16 }
16 }
17
17
18 impl Changelog {
18 impl Changelog {
19 /// Open the `changelog` of a repository given by its root.
19 /// Open the `changelog` of a repository given by its root.
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 let revlog =
21 let revlog =
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 Ok(Self { revlog })
23 Ok(Self { revlog })
24 }
24 }
25
25
26 /// Return the `ChangelogRevisionData` for the given node ID.
26 /// Return the `ChangelogRevisionData` for the given node ID.
27 pub fn data_for_node(
27 pub fn data_for_node(
28 &self,
28 &self,
29 node: NodePrefix,
29 node: NodePrefix,
30 ) -> Result<ChangelogRevisionData, RevlogError> {
30 ) -> Result<ChangelogRevisionData, RevlogError> {
31 let rev = self.revlog.rev_from_node(node)?;
31 let rev = self.revlog.rev_from_node(node)?;
32 self.data_for_rev(rev)
32 self.data_for_rev(rev)
33 }
33 }
34
34
35 /// Return the [`ChangelogEntry`] for the given revision number.
35 /// Return the [`ChangelogEntry`] for the given revision number.
36 pub fn entry_for_rev(
36 pub fn entry_for_rev(
37 &self,
37 &self,
38 rev: Revision,
38 rev: Revision,
39 ) -> Result<ChangelogEntry, RevlogError> {
39 ) -> Result<ChangelogEntry, RevlogError> {
40 let revlog_entry = self.revlog.get_entry(rev)?;
40 let revlog_entry = self.revlog.get_entry(rev)?;
41 Ok(ChangelogEntry { revlog_entry })
41 Ok(ChangelogEntry { revlog_entry })
42 }
42 }
43
43
44 /// Return the [`ChangelogRevisionData`] for the given revision number.
44 /// Return the [`ChangelogRevisionData`] for the given revision number.
45 ///
45 ///
46 /// This is a useful shortcut in case the caller does not need the
46 /// This is a useful shortcut in case the caller does not need the
47 /// generic revlog information (parents, hashes etc). Otherwise
47 /// generic revlog information (parents, hashes etc). Otherwise
48 /// consider taking a [`ChangelogEntry`] with
48 /// consider taking a [`ChangelogEntry`] with
49 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
49 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
50 pub fn data_for_rev(
50 pub fn data_for_rev(
51 &self,
51 &self,
52 rev: Revision,
52 rev: Revision,
53 ) -> Result<ChangelogRevisionData, RevlogError> {
53 ) -> Result<ChangelogRevisionData, RevlogError> {
54 if rev == NULL_REVISION {
54 if rev == NULL_REVISION {
55 return Ok(ChangelogRevisionData::null());
55 return Ok(ChangelogRevisionData::null());
56 }
56 }
57 self.entry_for_rev(rev)?.data()
57 self.entry_for_rev(rev)?.data()
58 }
58 }
59
59
60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
61 self.revlog.node_from_rev(rev)
61 self.revlog.node_from_rev(rev)
62 }
62 }
63
63
64 pub fn rev_from_node(
64 pub fn rev_from_node(
65 &self,
65 &self,
66 node: NodePrefix,
66 node: NodePrefix,
67 ) -> Result<Revision, RevlogError> {
67 ) -> Result<Revision, RevlogError> {
68 self.revlog.rev_from_node(node)
68 self.revlog.rev_from_node(node)
69 }
69 }
70 }
70 }
71
71
72 /// A specialized `RevlogEntry` for `changelog` data format
72 /// A specialized `RevlogEntry` for `changelog` data format
73 ///
73 ///
74 /// This is a `RevlogEntry` with the added semantics that the associated
74 /// This is a `RevlogEntry` with the added semantics that the associated
75 /// data should meet the requirements for `changelog`, materialized by
75 /// data should meet the requirements for `changelog`, materialized by
76 /// the fact that `data()` constructs a `ChangelogRevisionData`.
76 /// the fact that `data()` constructs a `ChangelogRevisionData`.
77 /// In case that promise would be broken, the `data` method returns an error.
77 /// In case that promise would be broken, the `data` method returns an error.
78 #[derive(Clone)]
78 #[derive(Clone)]
79 pub struct ChangelogEntry<'changelog> {
79 pub struct ChangelogEntry<'changelog> {
80 /// Same data, as a generic `RevlogEntry`.
80 /// Same data, as a generic `RevlogEntry`.
81 pub(crate) revlog_entry: RevlogEntry<'changelog>,
81 pub(crate) revlog_entry: RevlogEntry<'changelog>,
82 }
82 }
83
83
84 impl<'changelog> ChangelogEntry<'changelog> {
84 impl<'changelog> ChangelogEntry<'changelog> {
85 pub fn data<'a>(
85 pub fn data<'a>(
86 &'a self,
86 &'a self,
87 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
87 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
88 let bytes = self.revlog_entry.data()?;
88 let bytes = self.revlog_entry.data()?;
89 if bytes.is_empty() {
89 if bytes.is_empty() {
90 Ok(ChangelogRevisionData::null())
90 Ok(ChangelogRevisionData::null())
91 } else {
91 } else {
92 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
92 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
93 RevlogError::Other(HgError::CorruptedRepository(format!(
93 RevlogError::Other(HgError::CorruptedRepository(format!(
94 "Invalid changelog data for revision {}: {:?}",
94 "Invalid changelog data for revision {}: {:?}",
95 self.revlog_entry.revision(),
95 self.revlog_entry.revision(),
96 err
96 err
97 )))
97 )))
98 })?)
98 })?)
99 }
99 }
100 }
100 }
101
101
102 /// Obtain a reference to the underlying `RevlogEntry`.
102 /// Obtain a reference to the underlying `RevlogEntry`.
103 ///
103 ///
104 /// This allows the caller to access the information that is common
104 /// This allows the caller to access the information that is common
105 /// to all revlog entries: revision number, node id, parent revisions etc.
105 /// to all revlog entries: revision number, node id, parent revisions etc.
106 pub fn as_revlog_entry(&self) -> &RevlogEntry {
106 pub fn as_revlog_entry(&self) -> &RevlogEntry {
107 &self.revlog_entry
107 &self.revlog_entry
108 }
108 }
109
109
110 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
110 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
111 Ok(self
111 Ok(self
112 .revlog_entry
112 .revlog_entry
113 .p1_entry()?
113 .p1_entry()?
114 .map(|revlog_entry| Self { revlog_entry }))
114 .map(|revlog_entry| Self { revlog_entry }))
115 }
115 }
116
116
117 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
117 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
118 Ok(self
118 Ok(self
119 .revlog_entry
119 .revlog_entry
120 .p2_entry()?
120 .p2_entry()?
121 .map(|revlog_entry| Self { revlog_entry }))
121 .map(|revlog_entry| Self { revlog_entry }))
122 }
122 }
123 }
123 }
124
124
125 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
125 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
126 #[derive(PartialEq)]
126 #[derive(PartialEq)]
127 pub struct ChangelogRevisionData<'changelog> {
127 pub struct ChangelogRevisionData<'changelog> {
128 /// The data bytes of the `changelog` entry.
128 /// The data bytes of the `changelog` entry.
129 bytes: Cow<'changelog, [u8]>,
129 bytes: Cow<'changelog, [u8]>,
130 /// The end offset for the hex manifest (not including the newline)
130 /// The end offset for the hex manifest (not including the newline)
131 manifest_end: usize,
131 manifest_end: usize,
132 /// The end offset for the user+email (not including the newline)
132 /// The end offset for the user+email (not including the newline)
133 user_end: usize,
133 user_end: usize,
134 /// The end offset for the timestamp+timezone+extras (not including the
134 /// The end offset for the timestamp+timezone+extras (not including the
135 /// newline)
135 /// newline)
136 timestamp_end: usize,
136 timestamp_end: usize,
137 /// The end offset for the file list (not including the newline)
137 /// The end offset for the file list (not including the newline)
138 files_end: usize,
138 files_end: usize,
139 }
139 }
140
140
141 impl<'changelog> ChangelogRevisionData<'changelog> {
141 impl<'changelog> ChangelogRevisionData<'changelog> {
142 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
142 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
143 let mut line_iter = bytes.split(|b| b == &b'\n');
143 let mut line_iter = bytes.split(|b| b == &b'\n');
144 let manifest_end = line_iter
144 let manifest_end = line_iter
145 .next()
145 .next()
146 .expect("Empty iterator from split()?")
146 .expect("Empty iterator from split()?")
147 .len();
147 .len();
148 let user_slice = line_iter.next().ok_or_else(|| {
148 let user_slice = line_iter.next().ok_or_else(|| {
149 HgError::corrupted("Changeset data truncated after manifest line")
149 HgError::corrupted("Changeset data truncated after manifest line")
150 })?;
150 })?;
151 let user_end = manifest_end + 1 + user_slice.len();
151 let user_end = manifest_end + 1 + user_slice.len();
152 let timestamp_slice = line_iter.next().ok_or_else(|| {
152 let timestamp_slice = line_iter.next().ok_or_else(|| {
153 HgError::corrupted("Changeset data truncated after user line")
153 HgError::corrupted("Changeset data truncated after user line")
154 })?;
154 })?;
155 let timestamp_end = user_end + 1 + timestamp_slice.len();
155 let timestamp_end = user_end + 1 + timestamp_slice.len();
156 let mut files_end = timestamp_end + 1;
156 let mut files_end = timestamp_end + 1;
157 loop {
157 loop {
158 let line = line_iter.next().ok_or_else(|| {
158 let line = line_iter.next().ok_or_else(|| {
159 HgError::corrupted("Changeset data truncated in files list")
159 HgError::corrupted("Changeset data truncated in files list")
160 })?;
160 })?;
161 if line.is_empty() {
161 if line.is_empty() {
162 if files_end == bytes.len() {
162 if files_end == bytes.len() {
163 // The list of files ended with a single newline (there
163 // The list of files ended with a single newline (there
164 // should be two)
164 // should be two)
165 return Err(HgError::corrupted(
165 return Err(HgError::corrupted(
166 "Changeset data truncated after files list",
166 "Changeset data truncated after files list",
167 ));
167 ));
168 }
168 }
169 files_end -= 1;
169 files_end -= 1;
170 break;
170 break;
171 }
171 }
172 files_end += line.len() + 1;
172 files_end += line.len() + 1;
173 }
173 }
174
174
175 Ok(Self {
175 Ok(Self {
176 bytes,
176 bytes,
177 manifest_end,
177 manifest_end,
178 user_end,
178 user_end,
179 timestamp_end,
179 timestamp_end,
180 files_end,
180 files_end,
181 })
181 })
182 }
182 }
183
183
184 fn null() -> Self {
184 fn null() -> Self {
185 Self::new(Cow::Borrowed(
185 Self::new(Cow::Borrowed(
186 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
186 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
187 ))
187 ))
188 .unwrap()
188 .unwrap()
189 }
189 }
190
190
191 /// Return an iterator over the lines of the entry.
191 /// Return an iterator over the lines of the entry.
192 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
192 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
193 self.bytes.split(|b| b == &b'\n')
193 self.bytes.split(|b| b == &b'\n')
194 }
194 }
195
195
196 /// Return the node id of the `manifest` referenced by this `changelog`
196 /// Return the node id of the `manifest` referenced by this `changelog`
197 /// entry.
197 /// entry.
198 pub fn manifest_node(&self) -> Result<Node, HgError> {
198 pub fn manifest_node(&self) -> Result<Node, HgError> {
199 let manifest_node_hex = &self.bytes[..self.manifest_end];
199 let manifest_node_hex = &self.bytes[..self.manifest_end];
200 Node::from_hex_for_repo(manifest_node_hex)
200 Node::from_hex_for_repo(manifest_node_hex)
201 }
201 }
202
202
203 /// The full user string (usually a name followed by an email enclosed in
203 /// The full user string (usually a name followed by an email enclosed in
204 /// angle brackets)
204 /// angle brackets)
205 pub fn user(&self) -> &[u8] {
205 pub fn user(&self) -> &[u8] {
206 &self.bytes[self.manifest_end + 1..self.user_end]
206 &self.bytes[self.manifest_end + 1..self.user_end]
207 }
207 }
208
208
209 /// The full timestamp line (timestamp in seconds, offset in seconds, and
209 /// The full timestamp line (timestamp in seconds, offset in seconds, and
210 /// possibly extras)
210 /// possibly extras)
211 // TODO: We should expose this in a more useful way
211 // TODO: We should expose this in a more useful way
212 pub fn timestamp_line(&self) -> &[u8] {
212 pub fn timestamp_line(&self) -> &[u8] {
213 &self.bytes[self.user_end + 1..self.timestamp_end]
213 &self.bytes[self.user_end + 1..self.timestamp_end]
214 }
214 }
215
215
216 /// The files changed in this revision.
216 /// The files changed in this revision.
217 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
217 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
218 self.bytes[self.timestamp_end + 1..self.files_end]
218 self.bytes[self.timestamp_end + 1..self.files_end]
219 .split(|b| b == &b'\n')
219 .split(|b| b == &b'\n')
220 .map(HgPath::new)
220 .map(HgPath::new)
221 }
221 }
222
222
223 /// The change description.
223 /// The change description.
224 pub fn description(&self) -> &[u8] {
224 pub fn description(&self) -> &[u8] {
225 &self.bytes[self.files_end + 2..]
225 &self.bytes[self.files_end + 2..]
226 }
226 }
227 }
227 }
228
228
229 impl Debug for ChangelogRevisionData<'_> {
229 impl Debug for ChangelogRevisionData<'_> {
230 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
230 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
231 f.debug_struct("ChangelogRevisionData")
231 f.debug_struct("ChangelogRevisionData")
232 .field("bytes", &debug_bytes(&self.bytes))
232 .field("bytes", &debug_bytes(&self.bytes))
233 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
233 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
234 .field(
234 .field(
235 "user",
235 "user",
236 &debug_bytes(
236 &debug_bytes(
237 &self.bytes[self.manifest_end + 1..self.user_end],
237 &self.bytes[self.manifest_end + 1..self.user_end],
238 ),
238 ),
239 )
239 )
240 .field(
240 .field(
241 "timestamp",
241 "timestamp",
242 &debug_bytes(
242 &debug_bytes(
243 &self.bytes[self.user_end + 1..self.timestamp_end],
243 &self.bytes[self.user_end + 1..self.timestamp_end],
244 ),
244 ),
245 )
245 )
246 .field(
246 .field(
247 "files",
247 "files",
248 &debug_bytes(
248 &debug_bytes(
249 &self.bytes[self.timestamp_end + 1..self.files_end],
249 &self.bytes[self.timestamp_end + 1..self.files_end],
250 ),
250 ),
251 )
251 )
252 .field(
252 .field(
253 "description",
253 "description",
254 &debug_bytes(&self.bytes[self.files_end + 2..]),
254 &debug_bytes(&self.bytes[self.files_end + 2..]),
255 )
255 )
256 .finish()
256 .finish()
257 }
257 }
258 }
258 }
259
259
260 fn debug_bytes(bytes: &[u8]) -> String {
260 fn debug_bytes(bytes: &[u8]) -> String {
261 String::from_utf8_lossy(
261 String::from_utf8_lossy(
262 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
262 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
263 )
263 )
264 .to_string()
264 .to_string()
265 }
265 }
266
266
267 #[cfg(test)]
267 #[cfg(test)]
268 mod tests {
268 mod tests {
269 use super::*;
269 use super::*;
270 use crate::vfs::Vfs;
270 use crate::vfs::Vfs;
271 use crate::NULL_REVISION;
271 use crate::NULL_REVISION;
272 use pretty_assertions::assert_eq;
272 use pretty_assertions::assert_eq;
273
273
274 #[test]
274 #[test]
275 fn test_create_changelogrevisiondata_invalid() {
275 fn test_create_changelogrevisiondata_invalid() {
276 // Completely empty
276 // Completely empty
277 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
277 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
278 // No newline after manifest
278 // No newline after manifest
279 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
279 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
280 // No newline after user
280 // No newline after user
281 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
281 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
282 // No newline after timestamp
282 // No newline after timestamp
283 assert!(
283 assert!(
284 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
284 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
285 );
285 );
286 // Missing newline after files
286 // Missing newline after files
287 assert!(ChangelogRevisionData::new(Cow::Borrowed(
287 assert!(ChangelogRevisionData::new(Cow::Borrowed(
288 b"abcd\n\n0 0\nfile1\nfile2"
288 b"abcd\n\n0 0\nfile1\nfile2"
289 ))
289 ))
290 .is_err(),);
290 .is_err(),);
291 // Only one newline after files
291 // Only one newline after files
292 assert!(ChangelogRevisionData::new(Cow::Borrowed(
292 assert!(ChangelogRevisionData::new(Cow::Borrowed(
293 b"abcd\n\n0 0\nfile1\nfile2\n"
293 b"abcd\n\n0 0\nfile1\nfile2\n"
294 ))
294 ))
295 .is_err(),);
295 .is_err(),);
296 }
296 }
297
297
298 #[test]
298 #[test]
299 fn test_create_changelogrevisiondata() {
299 fn test_create_changelogrevisiondata() {
300 let data = ChangelogRevisionData::new(Cow::Borrowed(
300 let data = ChangelogRevisionData::new(Cow::Borrowed(
301 b"0123456789abcdef0123456789abcdef01234567
301 b"0123456789abcdef0123456789abcdef01234567
302 Some One <someone@example.com>
302 Some One <someone@example.com>
303 0 0
303 0 0
304 file1
304 file1
305 file2
305 file2
306
306
307 some
307 some
308 commit
308 commit
309 message",
309 message",
310 ))
310 ))
311 .unwrap();
311 .unwrap();
312 assert_eq!(
312 assert_eq!(
313 data.manifest_node().unwrap(),
313 data.manifest_node().unwrap(),
314 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
314 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
315 .unwrap()
315 .unwrap()
316 );
316 );
317 assert_eq!(data.user(), b"Some One <someone@example.com>");
317 assert_eq!(data.user(), b"Some One <someone@example.com>");
318 assert_eq!(data.timestamp_line(), b"0 0");
318 assert_eq!(data.timestamp_line(), b"0 0");
319 assert_eq!(
319 assert_eq!(
320 data.files().collect_vec(),
320 data.files().collect_vec(),
321 vec![HgPath::new("file1"), HgPath::new("file2")]
321 vec![HgPath::new("file1"), HgPath::new("file2")]
322 );
322 );
323 assert_eq!(data.description(), b"some\ncommit\nmessage");
323 assert_eq!(data.description(), b"some\ncommit\nmessage");
324 }
324 }
325
325
326 #[test]
326 #[test]
327 fn test_data_from_rev_null() -> Result<(), RevlogError> {
327 fn test_data_from_rev_null() -> Result<(), RevlogError> {
328 // an empty revlog will be enough for this case
328 // an empty revlog will be enough for this case
329 let temp = tempfile::tempdir().unwrap();
329 let temp = tempfile::tempdir().unwrap();
330 let vfs = Vfs { base: temp.path() };
330 let vfs = Vfs { base: temp.path() };
331 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
331 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
332 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
332 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
333
333
334 let changelog = Changelog { revlog };
334 let changelog = Changelog { revlog };
335 assert_eq!(
335 assert_eq!(
336 changelog.data_for_rev(NULL_REVISION)?,
336 changelog.data_for_rev(NULL_REVISION)?,
337 ChangelogRevisionData::null()
337 ChangelogRevisionData::null()
338 );
338 );
339 // same with the intermediate entry object
340 assert_eq!(
341 changelog.entry_for_rev(NULL_REVISION)?.data()?,
342 ChangelogRevisionData::null()
343 );
339 Ok(())
344 Ok(())
340 }
345 }
341 }
346 }
@@ -1,823 +1,829 b''
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 mod nodemap_docket;
10 mod nodemap_docket;
11 pub mod path_encode;
11 pub mod path_encode;
12 pub use node::{FromHexError, Node, NodePrefix};
12 pub use node::{FromHexError, Node, NodePrefix};
13 pub mod changelog;
13 pub mod changelog;
14 pub mod filelog;
14 pub mod filelog;
15 pub mod index;
15 pub mod index;
16 pub mod manifest;
16 pub mod manifest;
17 pub mod patch;
17 pub mod patch;
18
18
19 use std::borrow::Cow;
19 use std::borrow::Cow;
20 use std::io::Read;
20 use std::io::Read;
21 use std::ops::Deref;
21 use std::ops::Deref;
22 use std::path::Path;
22 use std::path::Path;
23
23
24 use flate2::read::ZlibDecoder;
24 use flate2::read::ZlibDecoder;
25 use sha1::{Digest, Sha1};
25 use sha1::{Digest, Sha1};
26 use std::cell::RefCell;
26 use std::cell::RefCell;
27 use zstd;
27 use zstd;
28
28
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 use self::nodemap_docket::NodeMapDocket;
30 use self::nodemap_docket::NodeMapDocket;
31 use super::index::Index;
31 use super::index::Index;
32 use super::nodemap::{NodeMap, NodeMapError};
32 use super::nodemap::{NodeMap, NodeMapError};
33 use crate::errors::HgError;
33 use crate::errors::HgError;
34 use crate::vfs::Vfs;
34 use crate::vfs::Vfs;
35
35
36 /// Mercurial revision numbers
36 /// Mercurial revision numbers
37 ///
37 ///
38 /// As noted in revlog.c, revision numbers are actually encoded in
38 /// As noted in revlog.c, revision numbers are actually encoded in
39 /// 4 bytes, and are liberally converted to ints, whence the i32
39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 pub type Revision = i32;
40 pub type Revision = i32;
41
41
42 /// Marker expressing the absence of a parent
42 /// Marker expressing the absence of a parent
43 ///
43 ///
44 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
44 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
45 /// to be smaller than all existing revisions.
45 /// to be smaller than all existing revisions.
46 pub const NULL_REVISION: Revision = -1;
46 pub const NULL_REVISION: Revision = -1;
47
47
48 /// Same as `mercurial.node.wdirrev`
48 /// Same as `mercurial.node.wdirrev`
49 ///
49 ///
50 /// This is also equal to `i32::max_value()`, but it's better to spell
50 /// This is also equal to `i32::max_value()`, but it's better to spell
51 /// it out explicitely, same as in `mercurial.node`
51 /// it out explicitely, same as in `mercurial.node`
52 #[allow(clippy::unreadable_literal)]
52 #[allow(clippy::unreadable_literal)]
53 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
53 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
54
54
55 pub const WORKING_DIRECTORY_HEX: &str =
55 pub const WORKING_DIRECTORY_HEX: &str =
56 "ffffffffffffffffffffffffffffffffffffffff";
56 "ffffffffffffffffffffffffffffffffffffffff";
57
57
58 /// The simplest expression of what we need of Mercurial DAGs.
58 /// The simplest expression of what we need of Mercurial DAGs.
59 pub trait Graph {
59 pub trait Graph {
60 /// Return the two parents of the given `Revision`.
60 /// Return the two parents of the given `Revision`.
61 ///
61 ///
62 /// Each of the parents can be independently `NULL_REVISION`
62 /// Each of the parents can be independently `NULL_REVISION`
63 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
63 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
64 }
64 }
65
65
66 #[derive(Clone, Debug, PartialEq)]
66 #[derive(Clone, Debug, PartialEq)]
67 pub enum GraphError {
67 pub enum GraphError {
68 ParentOutOfRange(Revision),
68 ParentOutOfRange(Revision),
69 WorkingDirectoryUnsupported,
69 WorkingDirectoryUnsupported,
70 }
70 }
71
71
72 /// The Mercurial Revlog Index
72 /// The Mercurial Revlog Index
73 ///
73 ///
74 /// This is currently limited to the minimal interface that is needed for
74 /// This is currently limited to the minimal interface that is needed for
75 /// the [`nodemap`](nodemap/index.html) module
75 /// the [`nodemap`](nodemap/index.html) module
76 pub trait RevlogIndex {
76 pub trait RevlogIndex {
77 /// Total number of Revisions referenced in this index
77 /// Total number of Revisions referenced in this index
78 fn len(&self) -> usize;
78 fn len(&self) -> usize;
79
79
80 fn is_empty(&self) -> bool {
80 fn is_empty(&self) -> bool {
81 self.len() == 0
81 self.len() == 0
82 }
82 }
83
83
84 /// Return a reference to the Node or `None` if rev is out of bounds
84 /// Return a reference to the Node or `None` if rev is out of bounds
85 ///
85 ///
86 /// `NULL_REVISION` is not considered to be out of bounds.
86 /// `NULL_REVISION` is not considered to be out of bounds.
87 fn node(&self, rev: Revision) -> Option<&Node>;
87 fn node(&self, rev: Revision) -> Option<&Node>;
88 }
88 }
89
89
90 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
90 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
91 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
91 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
92 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
92 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
93 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
93 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
94
94
95 // Keep this in sync with REVIDX_KNOWN_FLAGS in
95 // Keep this in sync with REVIDX_KNOWN_FLAGS in
96 // mercurial/revlogutils/flagutil.py
96 // mercurial/revlogutils/flagutil.py
97 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
97 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
98 | REVISION_FLAG_ELLIPSIS
98 | REVISION_FLAG_ELLIPSIS
99 | REVISION_FLAG_EXTSTORED
99 | REVISION_FLAG_EXTSTORED
100 | REVISION_FLAG_HASCOPIESINFO;
100 | REVISION_FLAG_HASCOPIESINFO;
101
101
102 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
102 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
103
103
104 #[derive(Debug, derive_more::From)]
104 #[derive(Debug, derive_more::From)]
105 pub enum RevlogError {
105 pub enum RevlogError {
106 InvalidRevision,
106 InvalidRevision,
107 /// Working directory is not supported
107 /// Working directory is not supported
108 WDirUnsupported,
108 WDirUnsupported,
109 /// Found more than one entry whose ID match the requested prefix
109 /// Found more than one entry whose ID match the requested prefix
110 AmbiguousPrefix,
110 AmbiguousPrefix,
111 #[from]
111 #[from]
112 Other(HgError),
112 Other(HgError),
113 }
113 }
114
114
115 impl From<NodeMapError> for RevlogError {
115 impl From<NodeMapError> for RevlogError {
116 fn from(error: NodeMapError) -> Self {
116 fn from(error: NodeMapError) -> Self {
117 match error {
117 match error {
118 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
118 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
119 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
119 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
120 format!("nodemap point to revision {} not in index", rev),
120 format!("nodemap point to revision {} not in index", rev),
121 ),
121 ),
122 }
122 }
123 }
123 }
124 }
124 }
125
125
126 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
126 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
127 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
127 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
128 }
128 }
129
129
130 impl RevlogError {
130 impl RevlogError {
131 fn corrupted<S: AsRef<str>>(context: S) -> Self {
131 fn corrupted<S: AsRef<str>>(context: S) -> Self {
132 RevlogError::Other(corrupted(context))
132 RevlogError::Other(corrupted(context))
133 }
133 }
134 }
134 }
135
135
136 /// Read only implementation of revlog.
136 /// Read only implementation of revlog.
137 pub struct Revlog {
137 pub struct Revlog {
138 /// When index and data are not interleaved: bytes of the revlog index.
138 /// When index and data are not interleaved: bytes of the revlog index.
139 /// When index and data are interleaved: bytes of the revlog index and
139 /// When index and data are interleaved: bytes of the revlog index and
140 /// data.
140 /// data.
141 index: Index,
141 index: Index,
142 /// When index and data are not interleaved: bytes of the revlog data
142 /// When index and data are not interleaved: bytes of the revlog data
143 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
143 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
144 /// When present on disk: the persistent nodemap for this revlog
144 /// When present on disk: the persistent nodemap for this revlog
145 nodemap: Option<nodemap::NodeTree>,
145 nodemap: Option<nodemap::NodeTree>,
146 }
146 }
147
147
148 impl Revlog {
148 impl Revlog {
149 /// Open a revlog index file.
149 /// Open a revlog index file.
150 ///
150 ///
151 /// It will also open the associated data file if index and data are not
151 /// It will also open the associated data file if index and data are not
152 /// interleaved.
152 /// interleaved.
153 pub fn open(
153 pub fn open(
154 store_vfs: &Vfs,
154 store_vfs: &Vfs,
155 index_path: impl AsRef<Path>,
155 index_path: impl AsRef<Path>,
156 data_path: Option<&Path>,
156 data_path: Option<&Path>,
157 use_nodemap: bool,
157 use_nodemap: bool,
158 ) -> Result<Self, HgError> {
158 ) -> Result<Self, HgError> {
159 let index_path = index_path.as_ref();
159 let index_path = index_path.as_ref();
160 let index = {
160 let index = {
161 match store_vfs.mmap_open_opt(&index_path)? {
161 match store_vfs.mmap_open_opt(&index_path)? {
162 None => Index::new(Box::new(vec![])),
162 None => Index::new(Box::new(vec![])),
163 Some(index_mmap) => {
163 Some(index_mmap) => {
164 let index = Index::new(Box::new(index_mmap))?;
164 let index = Index::new(Box::new(index_mmap))?;
165 Ok(index)
165 Ok(index)
166 }
166 }
167 }
167 }
168 }?;
168 }?;
169
169
170 let default_data_path = index_path.with_extension("d");
170 let default_data_path = index_path.with_extension("d");
171
171
172 // type annotation required
172 // type annotation required
173 // won't recognize Mmap as Deref<Target = [u8]>
173 // won't recognize Mmap as Deref<Target = [u8]>
174 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
174 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
175 if index.is_inline() {
175 if index.is_inline() {
176 None
176 None
177 } else {
177 } else {
178 let data_path = data_path.unwrap_or(&default_data_path);
178 let data_path = data_path.unwrap_or(&default_data_path);
179 let data_mmap = store_vfs.mmap_open(data_path)?;
179 let data_mmap = store_vfs.mmap_open(data_path)?;
180 Some(Box::new(data_mmap))
180 Some(Box::new(data_mmap))
181 };
181 };
182
182
183 let nodemap = if index.is_inline() || !use_nodemap {
183 let nodemap = if index.is_inline() || !use_nodemap {
184 None
184 None
185 } else {
185 } else {
186 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
186 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
187 |(docket, data)| {
187 |(docket, data)| {
188 nodemap::NodeTree::load_bytes(
188 nodemap::NodeTree::load_bytes(
189 Box::new(data),
189 Box::new(data),
190 docket.data_length,
190 docket.data_length,
191 )
191 )
192 },
192 },
193 )
193 )
194 };
194 };
195
195
196 Ok(Revlog {
196 Ok(Revlog {
197 index,
197 index,
198 data_bytes,
198 data_bytes,
199 nodemap,
199 nodemap,
200 })
200 })
201 }
201 }
202
202
203 /// Return number of entries of the `Revlog`.
203 /// Return number of entries of the `Revlog`.
204 pub fn len(&self) -> usize {
204 pub fn len(&self) -> usize {
205 self.index.len()
205 self.index.len()
206 }
206 }
207
207
208 /// Returns `true` if the `Revlog` has zero `entries`.
208 /// Returns `true` if the `Revlog` has zero `entries`.
209 pub fn is_empty(&self) -> bool {
209 pub fn is_empty(&self) -> bool {
210 self.index.is_empty()
210 self.index.is_empty()
211 }
211 }
212
212
213 /// Returns the node ID for the given revision number, if it exists in this
213 /// Returns the node ID for the given revision number, if it exists in this
214 /// revlog
214 /// revlog
215 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
215 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
216 if rev == NULL_REVISION {
216 if rev == NULL_REVISION {
217 return Some(&NULL_NODE);
217 return Some(&NULL_NODE);
218 }
218 }
219 Some(self.index.get_entry(rev)?.hash())
219 Some(self.index.get_entry(rev)?.hash())
220 }
220 }
221
221
222 /// Return the revision number for the given node ID, if it exists in this
222 /// Return the revision number for the given node ID, if it exists in this
223 /// revlog
223 /// revlog
224 pub fn rev_from_node(
224 pub fn rev_from_node(
225 &self,
225 &self,
226 node: NodePrefix,
226 node: NodePrefix,
227 ) -> Result<Revision, RevlogError> {
227 ) -> Result<Revision, RevlogError> {
228 let looked_up = if let Some(nodemap) = &self.nodemap {
228 let looked_up = if let Some(nodemap) = &self.nodemap {
229 nodemap
229 nodemap
230 .find_bin(&self.index, node)?
230 .find_bin(&self.index, node)?
231 .ok_or(RevlogError::InvalidRevision)
231 .ok_or(RevlogError::InvalidRevision)
232 } else {
232 } else {
233 self.rev_from_node_no_persistent_nodemap(node)
233 self.rev_from_node_no_persistent_nodemap(node)
234 };
234 };
235
235
236 if node.is_prefix_of(&NULL_NODE) {
236 if node.is_prefix_of(&NULL_NODE) {
237 return match looked_up {
237 return match looked_up {
238 Ok(_) => Err(RevlogError::AmbiguousPrefix),
238 Ok(_) => Err(RevlogError::AmbiguousPrefix),
239 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
239 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
240 res => res,
240 res => res,
241 };
241 };
242 };
242 };
243
243
244 looked_up
244 looked_up
245 }
245 }
246
246
247 /// Same as `rev_from_node`, without using a persistent nodemap
247 /// Same as `rev_from_node`, without using a persistent nodemap
248 ///
248 ///
249 /// This is used as fallback when a persistent nodemap is not present.
249 /// This is used as fallback when a persistent nodemap is not present.
250 /// This happens when the persistent-nodemap experimental feature is not
250 /// This happens when the persistent-nodemap experimental feature is not
251 /// enabled, or for small revlogs.
251 /// enabled, or for small revlogs.
252 fn rev_from_node_no_persistent_nodemap(
252 fn rev_from_node_no_persistent_nodemap(
253 &self,
253 &self,
254 node: NodePrefix,
254 node: NodePrefix,
255 ) -> Result<Revision, RevlogError> {
255 ) -> Result<Revision, RevlogError> {
256 // Linear scan of the revlog
256 // Linear scan of the revlog
257 // TODO: consider building a non-persistent nodemap in memory to
257 // TODO: consider building a non-persistent nodemap in memory to
258 // optimize these cases.
258 // optimize these cases.
259 let mut found_by_prefix = None;
259 let mut found_by_prefix = None;
260 for rev in (0..self.len() as Revision).rev() {
260 for rev in (0..self.len() as Revision).rev() {
261 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
261 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
262 HgError::corrupted(
262 HgError::corrupted(
263 "revlog references a revision not in the index",
263 "revlog references a revision not in the index",
264 )
264 )
265 })?;
265 })?;
266 if node == *index_entry.hash() {
266 if node == *index_entry.hash() {
267 return Ok(rev);
267 return Ok(rev);
268 }
268 }
269 if node.is_prefix_of(index_entry.hash()) {
269 if node.is_prefix_of(index_entry.hash()) {
270 if found_by_prefix.is_some() {
270 if found_by_prefix.is_some() {
271 return Err(RevlogError::AmbiguousPrefix);
271 return Err(RevlogError::AmbiguousPrefix);
272 }
272 }
273 found_by_prefix = Some(rev)
273 found_by_prefix = Some(rev)
274 }
274 }
275 }
275 }
276 found_by_prefix.ok_or(RevlogError::InvalidRevision)
276 found_by_prefix.ok_or(RevlogError::InvalidRevision)
277 }
277 }
278
278
279 /// Returns whether the given revision exists in this revlog.
279 /// Returns whether the given revision exists in this revlog.
280 pub fn has_rev(&self, rev: Revision) -> bool {
280 pub fn has_rev(&self, rev: Revision) -> bool {
281 self.index.get_entry(rev).is_some()
281 self.index.get_entry(rev).is_some()
282 }
282 }
283
283
284 /// Return the full data associated to a revision.
284 /// Return the full data associated to a revision.
285 ///
285 ///
286 /// All entries required to build the final data out of deltas will be
286 /// All entries required to build the final data out of deltas will be
287 /// retrieved as needed, and the deltas will be applied to the inital
287 /// retrieved as needed, and the deltas will be applied to the inital
288 /// snapshot to rebuild the final data.
288 /// snapshot to rebuild the final data.
289 pub fn get_rev_data(
289 pub fn get_rev_data(
290 &self,
290 &self,
291 rev: Revision,
291 rev: Revision,
292 ) -> Result<Cow<[u8]>, RevlogError> {
292 ) -> Result<Cow<[u8]>, RevlogError> {
293 if rev == NULL_REVISION {
293 if rev == NULL_REVISION {
294 return Ok(Cow::Borrowed(&[]));
294 return Ok(Cow::Borrowed(&[]));
295 };
295 };
296 Ok(self.get_entry(rev)?.data()?)
296 Ok(self.get_entry(rev)?.data()?)
297 }
297 }
298
298
299 /// Check the hash of some given data against the recorded hash.
299 /// Check the hash of some given data against the recorded hash.
300 pub fn check_hash(
300 pub fn check_hash(
301 &self,
301 &self,
302 p1: Revision,
302 p1: Revision,
303 p2: Revision,
303 p2: Revision,
304 expected: &[u8],
304 expected: &[u8],
305 data: &[u8],
305 data: &[u8],
306 ) -> bool {
306 ) -> bool {
307 let e1 = self.index.get_entry(p1);
307 let e1 = self.index.get_entry(p1);
308 let h1 = match e1 {
308 let h1 = match e1 {
309 Some(ref entry) => entry.hash(),
309 Some(ref entry) => entry.hash(),
310 None => &NULL_NODE,
310 None => &NULL_NODE,
311 };
311 };
312 let e2 = self.index.get_entry(p2);
312 let e2 = self.index.get_entry(p2);
313 let h2 = match e2 {
313 let h2 = match e2 {
314 Some(ref entry) => entry.hash(),
314 Some(ref entry) => entry.hash(),
315 None => &NULL_NODE,
315 None => &NULL_NODE,
316 };
316 };
317
317
318 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
318 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
319 }
319 }
320
320
321 /// Build the full data of a revision out its snapshot
321 /// Build the full data of a revision out its snapshot
322 /// and its deltas.
322 /// and its deltas.
323 fn build_data_from_deltas(
323 fn build_data_from_deltas(
324 snapshot: RevlogEntry,
324 snapshot: RevlogEntry,
325 deltas: &[RevlogEntry],
325 deltas: &[RevlogEntry],
326 ) -> Result<Vec<u8>, HgError> {
326 ) -> Result<Vec<u8>, HgError> {
327 let snapshot = snapshot.data_chunk()?;
327 let snapshot = snapshot.data_chunk()?;
328 let deltas = deltas
328 let deltas = deltas
329 .iter()
329 .iter()
330 .rev()
330 .rev()
331 .map(RevlogEntry::data_chunk)
331 .map(RevlogEntry::data_chunk)
332 .collect::<Result<Vec<_>, _>>()?;
332 .collect::<Result<Vec<_>, _>>()?;
333 let patches: Vec<_> =
333 let patches: Vec<_> =
334 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
334 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
335 let patch = patch::fold_patch_lists(&patches);
335 let patch = patch::fold_patch_lists(&patches);
336 Ok(patch.apply(&snapshot))
336 Ok(patch.apply(&snapshot))
337 }
337 }
338
338
339 /// Return the revlog data.
339 /// Return the revlog data.
340 fn data(&self) -> &[u8] {
340 fn data(&self) -> &[u8] {
341 match &self.data_bytes {
341 match &self.data_bytes {
342 Some(data_bytes) => data_bytes,
342 Some(data_bytes) => data_bytes,
343 None => panic!(
343 None => panic!(
344 "forgot to load the data or trying to access inline data"
344 "forgot to load the data or trying to access inline data"
345 ),
345 ),
346 }
346 }
347 }
347 }
348
348
349 pub fn make_null_entry(&self) -> RevlogEntry {
349 pub fn make_null_entry(&self) -> RevlogEntry {
350 RevlogEntry {
350 RevlogEntry {
351 revlog: self,
351 revlog: self,
352 rev: NULL_REVISION,
352 rev: NULL_REVISION,
353 bytes: b"",
353 bytes: b"",
354 compressed_len: 0,
354 compressed_len: 0,
355 uncompressed_len: 0,
355 uncompressed_len: 0,
356 base_rev_or_base_of_delta_chain: None,
356 base_rev_or_base_of_delta_chain: None,
357 p1: NULL_REVISION,
357 p1: NULL_REVISION,
358 p2: NULL_REVISION,
358 p2: NULL_REVISION,
359 flags: NULL_REVLOG_ENTRY_FLAGS,
359 flags: NULL_REVLOG_ENTRY_FLAGS,
360 hash: NULL_NODE,
360 hash: NULL_NODE,
361 }
361 }
362 }
362 }
363
363
364 /// Get an entry of the revlog.
364 /// Get an entry of the revlog.
365 pub fn get_entry(
365 pub fn get_entry(
366 &self,
366 &self,
367 rev: Revision,
367 rev: Revision,
368 ) -> Result<RevlogEntry, RevlogError> {
368 ) -> Result<RevlogEntry, RevlogError> {
369 if rev == NULL_REVISION {
369 if rev == NULL_REVISION {
370 return Ok(self.make_null_entry());
370 return Ok(self.make_null_entry());
371 }
371 }
372 let index_entry = self
372 let index_entry = self
373 .index
373 .index
374 .get_entry(rev)
374 .get_entry(rev)
375 .ok_or(RevlogError::InvalidRevision)?;
375 .ok_or(RevlogError::InvalidRevision)?;
376 let start = index_entry.offset();
376 let start = index_entry.offset();
377 let end = start + index_entry.compressed_len() as usize;
377 let end = start + index_entry.compressed_len() as usize;
378 let data = if self.index.is_inline() {
378 let data = if self.index.is_inline() {
379 self.index.data(start, end)
379 self.index.data(start, end)
380 } else {
380 } else {
381 &self.data()[start..end]
381 &self.data()[start..end]
382 };
382 };
383 let entry = RevlogEntry {
383 let entry = RevlogEntry {
384 revlog: self,
384 revlog: self,
385 rev,
385 rev,
386 bytes: data,
386 bytes: data,
387 compressed_len: index_entry.compressed_len(),
387 compressed_len: index_entry.compressed_len(),
388 uncompressed_len: index_entry.uncompressed_len(),
388 uncompressed_len: index_entry.uncompressed_len(),
389 base_rev_or_base_of_delta_chain: if index_entry
389 base_rev_or_base_of_delta_chain: if index_entry
390 .base_revision_or_base_of_delta_chain()
390 .base_revision_or_base_of_delta_chain()
391 == rev
391 == rev
392 {
392 {
393 None
393 None
394 } else {
394 } else {
395 Some(index_entry.base_revision_or_base_of_delta_chain())
395 Some(index_entry.base_revision_or_base_of_delta_chain())
396 },
396 },
397 p1: index_entry.p1(),
397 p1: index_entry.p1(),
398 p2: index_entry.p2(),
398 p2: index_entry.p2(),
399 flags: index_entry.flags(),
399 flags: index_entry.flags(),
400 hash: *index_entry.hash(),
400 hash: *index_entry.hash(),
401 };
401 };
402 Ok(entry)
402 Ok(entry)
403 }
403 }
404
404
405 /// when resolving internal references within revlog, any errors
405 /// when resolving internal references within revlog, any errors
406 /// should be reported as corruption, instead of e.g. "invalid revision"
406 /// should be reported as corruption, instead of e.g. "invalid revision"
407 fn get_entry_internal(
407 fn get_entry_internal(
408 &self,
408 &self,
409 rev: Revision,
409 rev: Revision,
410 ) -> Result<RevlogEntry, HgError> {
410 ) -> Result<RevlogEntry, HgError> {
411 self.get_entry(rev)
411 self.get_entry(rev)
412 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
412 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
413 }
413 }
414 }
414 }
415
415
416 /// The revlog entry's bytes and the necessary informations to extract
416 /// The revlog entry's bytes and the necessary informations to extract
417 /// the entry's data.
417 /// the entry's data.
418 #[derive(Clone)]
418 #[derive(Clone)]
419 pub struct RevlogEntry<'revlog> {
419 pub struct RevlogEntry<'revlog> {
420 revlog: &'revlog Revlog,
420 revlog: &'revlog Revlog,
421 rev: Revision,
421 rev: Revision,
422 bytes: &'revlog [u8],
422 bytes: &'revlog [u8],
423 compressed_len: u32,
423 compressed_len: u32,
424 uncompressed_len: i32,
424 uncompressed_len: i32,
425 base_rev_or_base_of_delta_chain: Option<Revision>,
425 base_rev_or_base_of_delta_chain: Option<Revision>,
426 p1: Revision,
426 p1: Revision,
427 p2: Revision,
427 p2: Revision,
428 flags: u16,
428 flags: u16,
429 hash: Node,
429 hash: Node,
430 }
430 }
431
431
432 thread_local! {
432 thread_local! {
433 // seems fine to [unwrap] here: this can only fail due to memory allocation
433 // seems fine to [unwrap] here: this can only fail due to memory allocation
434 // failing, and it's normal for that to cause panic.
434 // failing, and it's normal for that to cause panic.
435 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
435 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
436 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
436 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
437 }
437 }
438
438
439 fn zstd_decompress_to_buffer(
439 fn zstd_decompress_to_buffer(
440 bytes: &[u8],
440 bytes: &[u8],
441 buf: &mut Vec<u8>,
441 buf: &mut Vec<u8>,
442 ) -> Result<usize, std::io::Error> {
442 ) -> Result<usize, std::io::Error> {
443 ZSTD_DECODER
443 ZSTD_DECODER
444 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
444 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
445 }
445 }
446
446
447 impl<'revlog> RevlogEntry<'revlog> {
447 impl<'revlog> RevlogEntry<'revlog> {
448 pub fn revision(&self) -> Revision {
448 pub fn revision(&self) -> Revision {
449 self.rev
449 self.rev
450 }
450 }
451
451
452 pub fn node(&self) -> &Node {
452 pub fn node(&self) -> &Node {
453 &self.hash
453 &self.hash
454 }
454 }
455
455
456 pub fn uncompressed_len(&self) -> Option<u32> {
456 pub fn uncompressed_len(&self) -> Option<u32> {
457 u32::try_from(self.uncompressed_len).ok()
457 u32::try_from(self.uncompressed_len).ok()
458 }
458 }
459
459
460 pub fn has_p1(&self) -> bool {
460 pub fn has_p1(&self) -> bool {
461 self.p1 != NULL_REVISION
461 self.p1 != NULL_REVISION
462 }
462 }
463
463
464 pub fn p1_entry(
464 pub fn p1_entry(
465 &self,
465 &self,
466 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
466 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
467 if self.p1 == NULL_REVISION {
467 if self.p1 == NULL_REVISION {
468 Ok(None)
468 Ok(None)
469 } else {
469 } else {
470 Ok(Some(self.revlog.get_entry(self.p1)?))
470 Ok(Some(self.revlog.get_entry(self.p1)?))
471 }
471 }
472 }
472 }
473
473
474 pub fn p2_entry(
474 pub fn p2_entry(
475 &self,
475 &self,
476 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
476 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
477 if self.p2 == NULL_REVISION {
477 if self.p2 == NULL_REVISION {
478 Ok(None)
478 Ok(None)
479 } else {
479 } else {
480 Ok(Some(self.revlog.get_entry(self.p2)?))
480 Ok(Some(self.revlog.get_entry(self.p2)?))
481 }
481 }
482 }
482 }
483
483
484 pub fn p1(&self) -> Option<Revision> {
484 pub fn p1(&self) -> Option<Revision> {
485 if self.p1 == NULL_REVISION {
485 if self.p1 == NULL_REVISION {
486 None
486 None
487 } else {
487 } else {
488 Some(self.p1)
488 Some(self.p1)
489 }
489 }
490 }
490 }
491
491
492 pub fn p2(&self) -> Option<Revision> {
492 pub fn p2(&self) -> Option<Revision> {
493 if self.p2 == NULL_REVISION {
493 if self.p2 == NULL_REVISION {
494 None
494 None
495 } else {
495 } else {
496 Some(self.p2)
496 Some(self.p2)
497 }
497 }
498 }
498 }
499
499
500 pub fn is_censored(&self) -> bool {
500 pub fn is_censored(&self) -> bool {
501 (self.flags & REVISION_FLAG_CENSORED) != 0
501 (self.flags & REVISION_FLAG_CENSORED) != 0
502 }
502 }
503
503
504 pub fn has_length_affecting_flag_processor(&self) -> bool {
504 pub fn has_length_affecting_flag_processor(&self) -> bool {
505 // Relevant Python code: revlog.size()
505 // Relevant Python code: revlog.size()
506 // note: ELLIPSIS is known to not change the content
506 // note: ELLIPSIS is known to not change the content
507 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
507 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
508 }
508 }
509
509
510 /// The data for this entry, after resolving deltas if any.
510 /// The data for this entry, after resolving deltas if any.
511 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
511 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
512 let mut entry = self.clone();
512 let mut entry = self.clone();
513 let mut delta_chain = vec![];
513 let mut delta_chain = vec![];
514
514
515 // The meaning of `base_rev_or_base_of_delta_chain` depends on
515 // The meaning of `base_rev_or_base_of_delta_chain` depends on
516 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
516 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
517 // `mercurial/revlogutils/constants.py` and the code in
517 // `mercurial/revlogutils/constants.py` and the code in
518 // [_chaininfo] and in [index_deltachain].
518 // [_chaininfo] and in [index_deltachain].
519 let uses_generaldelta = self.revlog.index.uses_generaldelta();
519 let uses_generaldelta = self.revlog.index.uses_generaldelta();
520 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
520 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
521 let base_rev = if uses_generaldelta {
521 let base_rev = if uses_generaldelta {
522 base_rev
522 base_rev
523 } else {
523 } else {
524 entry.rev - 1
524 entry.rev - 1
525 };
525 };
526 delta_chain.push(entry);
526 delta_chain.push(entry);
527 entry = self.revlog.get_entry_internal(base_rev)?;
527 entry = self.revlog.get_entry_internal(base_rev)?;
528 }
528 }
529
529
530 let data = if delta_chain.is_empty() {
530 let data = if delta_chain.is_empty() {
531 entry.data_chunk()?
531 entry.data_chunk()?
532 } else {
532 } else {
533 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
533 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
534 };
534 };
535
535
536 Ok(data)
536 Ok(data)
537 }
537 }
538
538
539 fn check_data(
539 fn check_data(
540 &self,
540 &self,
541 data: Cow<'revlog, [u8]>,
541 data: Cow<'revlog, [u8]>,
542 ) -> Result<Cow<'revlog, [u8]>, HgError> {
542 ) -> Result<Cow<'revlog, [u8]>, HgError> {
543 if self.revlog.check_hash(
543 if self.revlog.check_hash(
544 self.p1,
544 self.p1,
545 self.p2,
545 self.p2,
546 self.hash.as_bytes(),
546 self.hash.as_bytes(),
547 &data,
547 &data,
548 ) {
548 ) {
549 Ok(data)
549 Ok(data)
550 } else {
550 } else {
551 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
551 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
552 return Err(HgError::unsupported(
552 return Err(HgError::unsupported(
553 "ellipsis revisions are not supported by rhg",
553 "ellipsis revisions are not supported by rhg",
554 ));
554 ));
555 }
555 }
556 Err(corrupted(format!(
556 Err(corrupted(format!(
557 "hash check failed for revision {}",
557 "hash check failed for revision {}",
558 self.rev
558 self.rev
559 )))
559 )))
560 }
560 }
561 }
561 }
562
562
563 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
563 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
564 let data = self.rawdata()?;
564 let data = self.rawdata()?;
565 if self.rev == NULL_REVISION {
566 return Ok(data);
567 }
565 if self.is_censored() {
568 if self.is_censored() {
566 return Err(HgError::CensoredNodeError);
569 return Err(HgError::CensoredNodeError);
567 }
570 }
568 self.check_data(data)
571 self.check_data(data)
569 }
572 }
570
573
571 /// Extract the data contained in the entry.
574 /// Extract the data contained in the entry.
572 /// This may be a delta. (See `is_delta`.)
575 /// This may be a delta. (See `is_delta`.)
573 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
576 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
574 if self.bytes.is_empty() {
577 if self.bytes.is_empty() {
575 return Ok(Cow::Borrowed(&[]));
578 return Ok(Cow::Borrowed(&[]));
576 }
579 }
577 match self.bytes[0] {
580 match self.bytes[0] {
578 // Revision data is the entirety of the entry, including this
581 // Revision data is the entirety of the entry, including this
579 // header.
582 // header.
580 b'\0' => Ok(Cow::Borrowed(self.bytes)),
583 b'\0' => Ok(Cow::Borrowed(self.bytes)),
581 // Raw revision data follows.
584 // Raw revision data follows.
582 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
585 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
583 // zlib (RFC 1950) data.
586 // zlib (RFC 1950) data.
584 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
587 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
585 // zstd data.
588 // zstd data.
586 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
589 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
587 // A proper new format should have had a repo/store requirement.
590 // A proper new format should have had a repo/store requirement.
588 format_type => Err(corrupted(format!(
591 format_type => Err(corrupted(format!(
589 "unknown compression header '{}'",
592 "unknown compression header '{}'",
590 format_type
593 format_type
591 ))),
594 ))),
592 }
595 }
593 }
596 }
594
597
595 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
598 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
596 let mut decoder = ZlibDecoder::new(self.bytes);
599 let mut decoder = ZlibDecoder::new(self.bytes);
597 if self.is_delta() {
600 if self.is_delta() {
598 let mut buf = Vec::with_capacity(self.compressed_len as usize);
601 let mut buf = Vec::with_capacity(self.compressed_len as usize);
599 decoder
602 decoder
600 .read_to_end(&mut buf)
603 .read_to_end(&mut buf)
601 .map_err(|e| corrupted(e.to_string()))?;
604 .map_err(|e| corrupted(e.to_string()))?;
602 Ok(buf)
605 Ok(buf)
603 } else {
606 } else {
604 let cap = self.uncompressed_len.max(0) as usize;
607 let cap = self.uncompressed_len.max(0) as usize;
605 let mut buf = vec![0; cap];
608 let mut buf = vec![0; cap];
606 decoder
609 decoder
607 .read_exact(&mut buf)
610 .read_exact(&mut buf)
608 .map_err(|e| corrupted(e.to_string()))?;
611 .map_err(|e| corrupted(e.to_string()))?;
609 Ok(buf)
612 Ok(buf)
610 }
613 }
611 }
614 }
612
615
613 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
616 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
614 let cap = self.uncompressed_len.max(0) as usize;
617 let cap = self.uncompressed_len.max(0) as usize;
615 if self.is_delta() {
618 if self.is_delta() {
616 // [cap] is usually an over-estimate of the space needed because
619 // [cap] is usually an over-estimate of the space needed because
617 // it's the length of delta-decoded data, but we're interested
620 // it's the length of delta-decoded data, but we're interested
618 // in the size of the delta.
621 // in the size of the delta.
619 // This means we have to [shrink_to_fit] to avoid holding on
622 // This means we have to [shrink_to_fit] to avoid holding on
620 // to a large chunk of memory, but it also means we must have a
623 // to a large chunk of memory, but it also means we must have a
621 // fallback branch, for the case when the delta is longer than
624 // fallback branch, for the case when the delta is longer than
622 // the original data (surprisingly, this does happen in practice)
625 // the original data (surprisingly, this does happen in practice)
623 let mut buf = Vec::with_capacity(cap);
626 let mut buf = Vec::with_capacity(cap);
624 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
627 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
625 Ok(_) => buf.shrink_to_fit(),
628 Ok(_) => buf.shrink_to_fit(),
626 Err(_) => {
629 Err(_) => {
627 buf.clear();
630 buf.clear();
628 zstd::stream::copy_decode(self.bytes, &mut buf)
631 zstd::stream::copy_decode(self.bytes, &mut buf)
629 .map_err(|e| corrupted(e.to_string()))?;
632 .map_err(|e| corrupted(e.to_string()))?;
630 }
633 }
631 };
634 };
632 Ok(buf)
635 Ok(buf)
633 } else {
636 } else {
634 let mut buf = Vec::with_capacity(cap);
637 let mut buf = Vec::with_capacity(cap);
635 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
638 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
636 .map_err(|e| corrupted(e.to_string()))?;
639 .map_err(|e| corrupted(e.to_string()))?;
637 if len != self.uncompressed_len as usize {
640 if len != self.uncompressed_len as usize {
638 Err(corrupted("uncompressed length does not match"))
641 Err(corrupted("uncompressed length does not match"))
639 } else {
642 } else {
640 Ok(buf)
643 Ok(buf)
641 }
644 }
642 }
645 }
643 }
646 }
644
647
645 /// Tell if the entry is a snapshot or a delta
648 /// Tell if the entry is a snapshot or a delta
646 /// (influences on decompression).
649 /// (influences on decompression).
647 fn is_delta(&self) -> bool {
650 fn is_delta(&self) -> bool {
648 self.base_rev_or_base_of_delta_chain.is_some()
651 self.base_rev_or_base_of_delta_chain.is_some()
649 }
652 }
650 }
653 }
651
654
652 /// Calculate the hash of a revision given its data and its parents.
655 /// Calculate the hash of a revision given its data and its parents.
653 fn hash(
656 fn hash(
654 data: &[u8],
657 data: &[u8],
655 p1_hash: &[u8],
658 p1_hash: &[u8],
656 p2_hash: &[u8],
659 p2_hash: &[u8],
657 ) -> [u8; NODE_BYTES_LENGTH] {
660 ) -> [u8; NODE_BYTES_LENGTH] {
658 let mut hasher = Sha1::new();
661 let mut hasher = Sha1::new();
659 let (a, b) = (p1_hash, p2_hash);
662 let (a, b) = (p1_hash, p2_hash);
660 if a > b {
663 if a > b {
661 hasher.update(b);
664 hasher.update(b);
662 hasher.update(a);
665 hasher.update(a);
663 } else {
666 } else {
664 hasher.update(a);
667 hasher.update(a);
665 hasher.update(b);
668 hasher.update(b);
666 }
669 }
667 hasher.update(data);
670 hasher.update(data);
668 *hasher.finalize().as_ref()
671 *hasher.finalize().as_ref()
669 }
672 }
670
673
671 #[cfg(test)]
674 #[cfg(test)]
672 mod tests {
675 mod tests {
673 use super::*;
676 use super::*;
674 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
677 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
675 use itertools::Itertools;
678 use itertools::Itertools;
676
679
677 #[test]
680 #[test]
678 fn test_empty() {
681 fn test_empty() {
679 let temp = tempfile::tempdir().unwrap();
682 let temp = tempfile::tempdir().unwrap();
680 let vfs = Vfs { base: temp.path() };
683 let vfs = Vfs { base: temp.path() };
681 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
684 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
682 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
685 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
683 assert!(revlog.is_empty());
686 assert!(revlog.is_empty());
684 assert_eq!(revlog.len(), 0);
687 assert_eq!(revlog.len(), 0);
685 assert!(revlog.get_entry(0).is_err());
688 assert!(revlog.get_entry(0).is_err());
686 assert!(!revlog.has_rev(0));
689 assert!(!revlog.has_rev(0));
687 assert_eq!(
690 assert_eq!(
688 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
691 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
689 NULL_REVISION
692 NULL_REVISION
690 );
693 );
694 let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
695 assert_eq!(null_entry.revision(), NULL_REVISION);
696 assert!(null_entry.data().unwrap().is_empty());
691 }
697 }
692
698
693 #[test]
699 #[test]
694 fn test_inline() {
700 fn test_inline() {
695 let temp = tempfile::tempdir().unwrap();
701 let temp = tempfile::tempdir().unwrap();
696 let vfs = Vfs { base: temp.path() };
702 let vfs = Vfs { base: temp.path() };
697 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
703 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
698 .unwrap();
704 .unwrap();
699 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
705 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
700 .unwrap();
706 .unwrap();
701 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
707 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
702 .unwrap();
708 .unwrap();
703 let entry0_bytes = IndexEntryBuilder::new()
709 let entry0_bytes = IndexEntryBuilder::new()
704 .is_first(true)
710 .is_first(true)
705 .with_version(1)
711 .with_version(1)
706 .with_inline(true)
712 .with_inline(true)
707 .with_offset(INDEX_ENTRY_SIZE)
713 .with_offset(INDEX_ENTRY_SIZE)
708 .with_node(node0)
714 .with_node(node0)
709 .build();
715 .build();
710 let entry1_bytes = IndexEntryBuilder::new()
716 let entry1_bytes = IndexEntryBuilder::new()
711 .with_offset(INDEX_ENTRY_SIZE)
717 .with_offset(INDEX_ENTRY_SIZE)
712 .with_node(node1)
718 .with_node(node1)
713 .build();
719 .build();
714 let entry2_bytes = IndexEntryBuilder::new()
720 let entry2_bytes = IndexEntryBuilder::new()
715 .with_offset(INDEX_ENTRY_SIZE)
721 .with_offset(INDEX_ENTRY_SIZE)
716 .with_p1(0)
722 .with_p1(0)
717 .with_p2(1)
723 .with_p2(1)
718 .with_node(node2)
724 .with_node(node2)
719 .build();
725 .build();
720 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
726 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
721 .into_iter()
727 .into_iter()
722 .flatten()
728 .flatten()
723 .collect_vec();
729 .collect_vec();
724 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
730 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
725 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
731 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
726
732
727 let entry0 = revlog.get_entry(0).ok().unwrap();
733 let entry0 = revlog.get_entry(0).ok().unwrap();
728 assert_eq!(entry0.revision(), 0);
734 assert_eq!(entry0.revision(), 0);
729 assert_eq!(*entry0.node(), node0);
735 assert_eq!(*entry0.node(), node0);
730 assert!(!entry0.has_p1());
736 assert!(!entry0.has_p1());
731 assert_eq!(entry0.p1(), None);
737 assert_eq!(entry0.p1(), None);
732 assert_eq!(entry0.p2(), None);
738 assert_eq!(entry0.p2(), None);
733 let p1_entry = entry0.p1_entry().unwrap();
739 let p1_entry = entry0.p1_entry().unwrap();
734 assert!(p1_entry.is_none());
740 assert!(p1_entry.is_none());
735 let p2_entry = entry0.p2_entry().unwrap();
741 let p2_entry = entry0.p2_entry().unwrap();
736 assert!(p2_entry.is_none());
742 assert!(p2_entry.is_none());
737
743
738 let entry1 = revlog.get_entry(1).ok().unwrap();
744 let entry1 = revlog.get_entry(1).ok().unwrap();
739 assert_eq!(entry1.revision(), 1);
745 assert_eq!(entry1.revision(), 1);
740 assert_eq!(*entry1.node(), node1);
746 assert_eq!(*entry1.node(), node1);
741 assert!(!entry1.has_p1());
747 assert!(!entry1.has_p1());
742 assert_eq!(entry1.p1(), None);
748 assert_eq!(entry1.p1(), None);
743 assert_eq!(entry1.p2(), None);
749 assert_eq!(entry1.p2(), None);
744 let p1_entry = entry1.p1_entry().unwrap();
750 let p1_entry = entry1.p1_entry().unwrap();
745 assert!(p1_entry.is_none());
751 assert!(p1_entry.is_none());
746 let p2_entry = entry1.p2_entry().unwrap();
752 let p2_entry = entry1.p2_entry().unwrap();
747 assert!(p2_entry.is_none());
753 assert!(p2_entry.is_none());
748
754
749 let entry2 = revlog.get_entry(2).ok().unwrap();
755 let entry2 = revlog.get_entry(2).ok().unwrap();
750 assert_eq!(entry2.revision(), 2);
756 assert_eq!(entry2.revision(), 2);
751 assert_eq!(*entry2.node(), node2);
757 assert_eq!(*entry2.node(), node2);
752 assert!(entry2.has_p1());
758 assert!(entry2.has_p1());
753 assert_eq!(entry2.p1(), Some(0));
759 assert_eq!(entry2.p1(), Some(0));
754 assert_eq!(entry2.p2(), Some(1));
760 assert_eq!(entry2.p2(), Some(1));
755 let p1_entry = entry2.p1_entry().unwrap();
761 let p1_entry = entry2.p1_entry().unwrap();
756 assert!(p1_entry.is_some());
762 assert!(p1_entry.is_some());
757 assert_eq!(p1_entry.unwrap().revision(), 0);
763 assert_eq!(p1_entry.unwrap().revision(), 0);
758 let p2_entry = entry2.p2_entry().unwrap();
764 let p2_entry = entry2.p2_entry().unwrap();
759 assert!(p2_entry.is_some());
765 assert!(p2_entry.is_some());
760 assert_eq!(p2_entry.unwrap().revision(), 1);
766 assert_eq!(p2_entry.unwrap().revision(), 1);
761 }
767 }
762
768
763 #[test]
769 #[test]
764 fn test_nodemap() {
770 fn test_nodemap() {
765 let temp = tempfile::tempdir().unwrap();
771 let temp = tempfile::tempdir().unwrap();
766 let vfs = Vfs { base: temp.path() };
772 let vfs = Vfs { base: temp.path() };
767
773
768 // building a revlog with a forced Node starting with zeros
774 // building a revlog with a forced Node starting with zeros
769 // This is a corruption, but it does not preclude using the nodemap
775 // This is a corruption, but it does not preclude using the nodemap
770 // if we don't try and access the data
776 // if we don't try and access the data
771 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
777 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
772 .unwrap();
778 .unwrap();
773 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
779 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
774 .unwrap();
780 .unwrap();
775 let entry0_bytes = IndexEntryBuilder::new()
781 let entry0_bytes = IndexEntryBuilder::new()
776 .is_first(true)
782 .is_first(true)
777 .with_version(1)
783 .with_version(1)
778 .with_inline(true)
784 .with_inline(true)
779 .with_offset(INDEX_ENTRY_SIZE)
785 .with_offset(INDEX_ENTRY_SIZE)
780 .with_node(node0)
786 .with_node(node0)
781 .build();
787 .build();
782 let entry1_bytes = IndexEntryBuilder::new()
788 let entry1_bytes = IndexEntryBuilder::new()
783 .with_offset(INDEX_ENTRY_SIZE)
789 .with_offset(INDEX_ENTRY_SIZE)
784 .with_node(node1)
790 .with_node(node1)
785 .build();
791 .build();
786 let contents = vec![entry0_bytes, entry1_bytes]
792 let contents = vec![entry0_bytes, entry1_bytes]
787 .into_iter()
793 .into_iter()
788 .flatten()
794 .flatten()
789 .collect_vec();
795 .collect_vec();
790 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
796 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
791 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
797 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
792
798
793 // accessing the data shows the corruption
799 // accessing the data shows the corruption
794 revlog.get_entry(0).unwrap().data().unwrap_err();
800 revlog.get_entry(0).unwrap().data().unwrap_err();
795
801
796 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
802 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
797 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
803 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
798 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
804 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
799 assert_eq!(
805 assert_eq!(
800 revlog
806 revlog
801 .rev_from_node(NodePrefix::from_hex("000").unwrap())
807 .rev_from_node(NodePrefix::from_hex("000").unwrap())
802 .unwrap(),
808 .unwrap(),
803 -1
809 -1
804 );
810 );
805 assert_eq!(
811 assert_eq!(
806 revlog
812 revlog
807 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
813 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
808 .unwrap(),
814 .unwrap(),
809 1
815 1
810 );
816 );
811 // RevlogError does not implement PartialEq
817 // RevlogError does not implement PartialEq
812 // (ultimately because io::Error does not)
818 // (ultimately because io::Error does not)
813 match revlog
819 match revlog
814 .rev_from_node(NodePrefix::from_hex("00").unwrap())
820 .rev_from_node(NodePrefix::from_hex("00").unwrap())
815 .expect_err("Expected to give AmbiguousPrefix error")
821 .expect_err("Expected to give AmbiguousPrefix error")
816 {
822 {
817 RevlogError::AmbiguousPrefix => (),
823 RevlogError::AmbiguousPrefix => (),
818 e => {
824 e => {
819 panic!("Got another error than AmbiguousPrefix: {:?}", e);
825 panic!("Got another error than AmbiguousPrefix: {:?}", e);
820 }
826 }
821 };
827 };
822 }
828 }
823 }
829 }
General Comments 0
You need to be logged in to leave comments. Login now