##// END OF EJS Templates
rust-revlog: fix RevlogEntry.data() for NULL_REVISION...
Georges Racinet -
r51639:124c44b5 stable
parent child Browse files
Show More
@@ -1,341 +1,346 b''
1 1 use crate::errors::HgError;
2 2 use crate::revlog::{Node, NodePrefix};
3 3 use crate::revlog::{Revision, NULL_REVISION};
4 4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 5 use crate::utils::hg_path::HgPath;
6 6 use crate::vfs::Vfs;
7 7 use itertools::Itertools;
8 8 use std::ascii::escape_default;
9 9 use std::borrow::Cow;
10 10 use std::fmt::{Debug, Formatter};
11 11
12 12 /// A specialized `Revlog` to work with changelog data format.
13 13 pub struct Changelog {
14 14 /// The generic `revlog` format.
15 15 pub(crate) revlog: Revlog,
16 16 }
17 17
18 18 impl Changelog {
19 19 /// Open the `changelog` of a repository given by its root.
20 20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 21 let revlog =
22 22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 23 Ok(Self { revlog })
24 24 }
25 25
26 26 /// Return the `ChangelogRevisionData` for the given node ID.
27 27 pub fn data_for_node(
28 28 &self,
29 29 node: NodePrefix,
30 30 ) -> Result<ChangelogRevisionData, RevlogError> {
31 31 let rev = self.revlog.rev_from_node(node)?;
32 32 self.data_for_rev(rev)
33 33 }
34 34
35 35 /// Return the [`ChangelogEntry`] for the given revision number.
36 36 pub fn entry_for_rev(
37 37 &self,
38 38 rev: Revision,
39 39 ) -> Result<ChangelogEntry, RevlogError> {
40 40 let revlog_entry = self.revlog.get_entry(rev)?;
41 41 Ok(ChangelogEntry { revlog_entry })
42 42 }
43 43
44 44 /// Return the [`ChangelogRevisionData`] for the given revision number.
45 45 ///
46 46 /// This is a useful shortcut in case the caller does not need the
47 47 /// generic revlog information (parents, hashes etc). Otherwise
48 48 /// consider taking a [`ChangelogEntry`] with
49 49 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
50 50 pub fn data_for_rev(
51 51 &self,
52 52 rev: Revision,
53 53 ) -> Result<ChangelogRevisionData, RevlogError> {
54 54 if rev == NULL_REVISION {
55 55 return Ok(ChangelogRevisionData::null());
56 56 }
57 57 self.entry_for_rev(rev)?.data()
58 58 }
59 59
60 60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
61 61 self.revlog.node_from_rev(rev)
62 62 }
63 63
64 64 pub fn rev_from_node(
65 65 &self,
66 66 node: NodePrefix,
67 67 ) -> Result<Revision, RevlogError> {
68 68 self.revlog.rev_from_node(node)
69 69 }
70 70 }
71 71
72 72 /// A specialized `RevlogEntry` for `changelog` data format
73 73 ///
74 74 /// This is a `RevlogEntry` with the added semantics that the associated
75 75 /// data should meet the requirements for `changelog`, materialized by
76 76 /// the fact that `data()` constructs a `ChangelogRevisionData`.
77 77 /// In case that promise would be broken, the `data` method returns an error.
78 78 #[derive(Clone)]
79 79 pub struct ChangelogEntry<'changelog> {
80 80 /// Same data, as a generic `RevlogEntry`.
81 81 pub(crate) revlog_entry: RevlogEntry<'changelog>,
82 82 }
83 83
84 84 impl<'changelog> ChangelogEntry<'changelog> {
85 85 pub fn data<'a>(
86 86 &'a self,
87 87 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
88 88 let bytes = self.revlog_entry.data()?;
89 89 if bytes.is_empty() {
90 90 Ok(ChangelogRevisionData::null())
91 91 } else {
92 92 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
93 93 RevlogError::Other(HgError::CorruptedRepository(format!(
94 94 "Invalid changelog data for revision {}: {:?}",
95 95 self.revlog_entry.revision(),
96 96 err
97 97 )))
98 98 })?)
99 99 }
100 100 }
101 101
102 102 /// Obtain a reference to the underlying `RevlogEntry`.
103 103 ///
104 104 /// This allows the caller to access the information that is common
105 105 /// to all revlog entries: revision number, node id, parent revisions etc.
106 106 pub fn as_revlog_entry(&self) -> &RevlogEntry {
107 107 &self.revlog_entry
108 108 }
109 109
110 110 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
111 111 Ok(self
112 112 .revlog_entry
113 113 .p1_entry()?
114 114 .map(|revlog_entry| Self { revlog_entry }))
115 115 }
116 116
117 117 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
118 118 Ok(self
119 119 .revlog_entry
120 120 .p2_entry()?
121 121 .map(|revlog_entry| Self { revlog_entry }))
122 122 }
123 123 }
124 124
125 125 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
126 126 #[derive(PartialEq)]
127 127 pub struct ChangelogRevisionData<'changelog> {
128 128 /// The data bytes of the `changelog` entry.
129 129 bytes: Cow<'changelog, [u8]>,
130 130 /// The end offset for the hex manifest (not including the newline)
131 131 manifest_end: usize,
132 132 /// The end offset for the user+email (not including the newline)
133 133 user_end: usize,
134 134 /// The end offset for the timestamp+timezone+extras (not including the
135 135 /// newline)
136 136 timestamp_end: usize,
137 137 /// The end offset for the file list (not including the newline)
138 138 files_end: usize,
139 139 }
140 140
141 141 impl<'changelog> ChangelogRevisionData<'changelog> {
142 142 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
143 143 let mut line_iter = bytes.split(|b| b == &b'\n');
144 144 let manifest_end = line_iter
145 145 .next()
146 146 .expect("Empty iterator from split()?")
147 147 .len();
148 148 let user_slice = line_iter.next().ok_or_else(|| {
149 149 HgError::corrupted("Changeset data truncated after manifest line")
150 150 })?;
151 151 let user_end = manifest_end + 1 + user_slice.len();
152 152 let timestamp_slice = line_iter.next().ok_or_else(|| {
153 153 HgError::corrupted("Changeset data truncated after user line")
154 154 })?;
155 155 let timestamp_end = user_end + 1 + timestamp_slice.len();
156 156 let mut files_end = timestamp_end + 1;
157 157 loop {
158 158 let line = line_iter.next().ok_or_else(|| {
159 159 HgError::corrupted("Changeset data truncated in files list")
160 160 })?;
161 161 if line.is_empty() {
162 162 if files_end == bytes.len() {
163 163 // The list of files ended with a single newline (there
164 164 // should be two)
165 165 return Err(HgError::corrupted(
166 166 "Changeset data truncated after files list",
167 167 ));
168 168 }
169 169 files_end -= 1;
170 170 break;
171 171 }
172 172 files_end += line.len() + 1;
173 173 }
174 174
175 175 Ok(Self {
176 176 bytes,
177 177 manifest_end,
178 178 user_end,
179 179 timestamp_end,
180 180 files_end,
181 181 })
182 182 }
183 183
184 184 fn null() -> Self {
185 185 Self::new(Cow::Borrowed(
186 186 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
187 187 ))
188 188 .unwrap()
189 189 }
190 190
191 191 /// Return an iterator over the lines of the entry.
192 192 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
193 193 self.bytes.split(|b| b == &b'\n')
194 194 }
195 195
196 196 /// Return the node id of the `manifest` referenced by this `changelog`
197 197 /// entry.
198 198 pub fn manifest_node(&self) -> Result<Node, HgError> {
199 199 let manifest_node_hex = &self.bytes[..self.manifest_end];
200 200 Node::from_hex_for_repo(manifest_node_hex)
201 201 }
202 202
203 203 /// The full user string (usually a name followed by an email enclosed in
204 204 /// angle brackets)
205 205 pub fn user(&self) -> &[u8] {
206 206 &self.bytes[self.manifest_end + 1..self.user_end]
207 207 }
208 208
209 209 /// The full timestamp line (timestamp in seconds, offset in seconds, and
210 210 /// possibly extras)
211 211 // TODO: We should expose this in a more useful way
212 212 pub fn timestamp_line(&self) -> &[u8] {
213 213 &self.bytes[self.user_end + 1..self.timestamp_end]
214 214 }
215 215
216 216 /// The files changed in this revision.
217 217 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
218 218 self.bytes[self.timestamp_end + 1..self.files_end]
219 219 .split(|b| b == &b'\n')
220 220 .map(HgPath::new)
221 221 }
222 222
223 223 /// The change description.
224 224 pub fn description(&self) -> &[u8] {
225 225 &self.bytes[self.files_end + 2..]
226 226 }
227 227 }
228 228
229 229 impl Debug for ChangelogRevisionData<'_> {
230 230 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
231 231 f.debug_struct("ChangelogRevisionData")
232 232 .field("bytes", &debug_bytes(&self.bytes))
233 233 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
234 234 .field(
235 235 "user",
236 236 &debug_bytes(
237 237 &self.bytes[self.manifest_end + 1..self.user_end],
238 238 ),
239 239 )
240 240 .field(
241 241 "timestamp",
242 242 &debug_bytes(
243 243 &self.bytes[self.user_end + 1..self.timestamp_end],
244 244 ),
245 245 )
246 246 .field(
247 247 "files",
248 248 &debug_bytes(
249 249 &self.bytes[self.timestamp_end + 1..self.files_end],
250 250 ),
251 251 )
252 252 .field(
253 253 "description",
254 254 &debug_bytes(&self.bytes[self.files_end + 2..]),
255 255 )
256 256 .finish()
257 257 }
258 258 }
259 259
260 260 fn debug_bytes(bytes: &[u8]) -> String {
261 261 String::from_utf8_lossy(
262 262 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
263 263 )
264 264 .to_string()
265 265 }
266 266
267 267 #[cfg(test)]
268 268 mod tests {
269 269 use super::*;
270 270 use crate::vfs::Vfs;
271 271 use crate::NULL_REVISION;
272 272 use pretty_assertions::assert_eq;
273 273
274 274 #[test]
275 275 fn test_create_changelogrevisiondata_invalid() {
276 276 // Completely empty
277 277 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
278 278 // No newline after manifest
279 279 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
280 280 // No newline after user
281 281 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
282 282 // No newline after timestamp
283 283 assert!(
284 284 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
285 285 );
286 286 // Missing newline after files
287 287 assert!(ChangelogRevisionData::new(Cow::Borrowed(
288 288 b"abcd\n\n0 0\nfile1\nfile2"
289 289 ))
290 290 .is_err(),);
291 291 // Only one newline after files
292 292 assert!(ChangelogRevisionData::new(Cow::Borrowed(
293 293 b"abcd\n\n0 0\nfile1\nfile2\n"
294 294 ))
295 295 .is_err(),);
296 296 }
297 297
298 298 #[test]
299 299 fn test_create_changelogrevisiondata() {
300 300 let data = ChangelogRevisionData::new(Cow::Borrowed(
301 301 b"0123456789abcdef0123456789abcdef01234567
302 302 Some One <someone@example.com>
303 303 0 0
304 304 file1
305 305 file2
306 306
307 307 some
308 308 commit
309 309 message",
310 310 ))
311 311 .unwrap();
312 312 assert_eq!(
313 313 data.manifest_node().unwrap(),
314 314 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
315 315 .unwrap()
316 316 );
317 317 assert_eq!(data.user(), b"Some One <someone@example.com>");
318 318 assert_eq!(data.timestamp_line(), b"0 0");
319 319 assert_eq!(
320 320 data.files().collect_vec(),
321 321 vec![HgPath::new("file1"), HgPath::new("file2")]
322 322 );
323 323 assert_eq!(data.description(), b"some\ncommit\nmessage");
324 324 }
325 325
326 326 #[test]
327 327 fn test_data_from_rev_null() -> Result<(), RevlogError> {
328 328 // an empty revlog will be enough for this case
329 329 let temp = tempfile::tempdir().unwrap();
330 330 let vfs = Vfs { base: temp.path() };
331 331 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
332 332 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
333 333
334 334 let changelog = Changelog { revlog };
335 335 assert_eq!(
336 336 changelog.data_for_rev(NULL_REVISION)?,
337 337 ChangelogRevisionData::null()
338 338 );
339 // same with the intermediate entry object
340 assert_eq!(
341 changelog.entry_for_rev(NULL_REVISION)?.data()?,
342 ChangelogRevisionData::null()
343 );
339 344 Ok(())
340 345 }
341 346 }
@@ -1,823 +1,829 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// Mercurial revision numbers
37 37 ///
38 38 /// As noted in revlog.c, revision numbers are actually encoded in
39 39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 40 pub type Revision = i32;
41 41
42 42 /// Marker expressing the absence of a parent
43 43 ///
44 44 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
45 45 /// to be smaller than all existing revisions.
46 46 pub const NULL_REVISION: Revision = -1;
47 47
48 48 /// Same as `mercurial.node.wdirrev`
49 49 ///
50 50 /// This is also equal to `i32::max_value()`, but it's better to spell
51 51 /// it out explicitely, same as in `mercurial.node`
52 52 #[allow(clippy::unreadable_literal)]
53 53 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
54 54
55 55 pub const WORKING_DIRECTORY_HEX: &str =
56 56 "ffffffffffffffffffffffffffffffffffffffff";
57 57
58 58 /// The simplest expression of what we need of Mercurial DAGs.
59 59 pub trait Graph {
60 60 /// Return the two parents of the given `Revision`.
61 61 ///
62 62 /// Each of the parents can be independently `NULL_REVISION`
63 63 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
64 64 }
65 65
66 66 #[derive(Clone, Debug, PartialEq)]
67 67 pub enum GraphError {
68 68 ParentOutOfRange(Revision),
69 69 WorkingDirectoryUnsupported,
70 70 }
71 71
72 72 /// The Mercurial Revlog Index
73 73 ///
74 74 /// This is currently limited to the minimal interface that is needed for
75 75 /// the [`nodemap`](nodemap/index.html) module
76 76 pub trait RevlogIndex {
77 77 /// Total number of Revisions referenced in this index
78 78 fn len(&self) -> usize;
79 79
80 80 fn is_empty(&self) -> bool {
81 81 self.len() == 0
82 82 }
83 83
84 84 /// Return a reference to the Node or `None` if rev is out of bounds
85 85 ///
86 86 /// `NULL_REVISION` is not considered to be out of bounds.
87 87 fn node(&self, rev: Revision) -> Option<&Node>;
88 88 }
89 89
90 90 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
91 91 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
92 92 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
93 93 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
94 94
95 95 // Keep this in sync with REVIDX_KNOWN_FLAGS in
96 96 // mercurial/revlogutils/flagutil.py
97 97 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
98 98 | REVISION_FLAG_ELLIPSIS
99 99 | REVISION_FLAG_EXTSTORED
100 100 | REVISION_FLAG_HASCOPIESINFO;
101 101
102 102 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
103 103
104 104 #[derive(Debug, derive_more::From)]
105 105 pub enum RevlogError {
106 106 InvalidRevision,
107 107 /// Working directory is not supported
108 108 WDirUnsupported,
109 109 /// Found more than one entry whose ID match the requested prefix
110 110 AmbiguousPrefix,
111 111 #[from]
112 112 Other(HgError),
113 113 }
114 114
115 115 impl From<NodeMapError> for RevlogError {
116 116 fn from(error: NodeMapError) -> Self {
117 117 match error {
118 118 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
119 119 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
120 120 format!("nodemap point to revision {} not in index", rev),
121 121 ),
122 122 }
123 123 }
124 124 }
125 125
126 126 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
127 127 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
128 128 }
129 129
130 130 impl RevlogError {
131 131 fn corrupted<S: AsRef<str>>(context: S) -> Self {
132 132 RevlogError::Other(corrupted(context))
133 133 }
134 134 }
135 135
136 136 /// Read only implementation of revlog.
137 137 pub struct Revlog {
138 138 /// When index and data are not interleaved: bytes of the revlog index.
139 139 /// When index and data are interleaved: bytes of the revlog index and
140 140 /// data.
141 141 index: Index,
142 142 /// When index and data are not interleaved: bytes of the revlog data
143 143 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
144 144 /// When present on disk: the persistent nodemap for this revlog
145 145 nodemap: Option<nodemap::NodeTree>,
146 146 }
147 147
148 148 impl Revlog {
149 149 /// Open a revlog index file.
150 150 ///
151 151 /// It will also open the associated data file if index and data are not
152 152 /// interleaved.
153 153 pub fn open(
154 154 store_vfs: &Vfs,
155 155 index_path: impl AsRef<Path>,
156 156 data_path: Option<&Path>,
157 157 use_nodemap: bool,
158 158 ) -> Result<Self, HgError> {
159 159 let index_path = index_path.as_ref();
160 160 let index = {
161 161 match store_vfs.mmap_open_opt(&index_path)? {
162 162 None => Index::new(Box::new(vec![])),
163 163 Some(index_mmap) => {
164 164 let index = Index::new(Box::new(index_mmap))?;
165 165 Ok(index)
166 166 }
167 167 }
168 168 }?;
169 169
170 170 let default_data_path = index_path.with_extension("d");
171 171
172 172 // type annotation required
173 173 // won't recognize Mmap as Deref<Target = [u8]>
174 174 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
175 175 if index.is_inline() {
176 176 None
177 177 } else {
178 178 let data_path = data_path.unwrap_or(&default_data_path);
179 179 let data_mmap = store_vfs.mmap_open(data_path)?;
180 180 Some(Box::new(data_mmap))
181 181 };
182 182
183 183 let nodemap = if index.is_inline() || !use_nodemap {
184 184 None
185 185 } else {
186 186 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
187 187 |(docket, data)| {
188 188 nodemap::NodeTree::load_bytes(
189 189 Box::new(data),
190 190 docket.data_length,
191 191 )
192 192 },
193 193 )
194 194 };
195 195
196 196 Ok(Revlog {
197 197 index,
198 198 data_bytes,
199 199 nodemap,
200 200 })
201 201 }
202 202
203 203 /// Return number of entries of the `Revlog`.
204 204 pub fn len(&self) -> usize {
205 205 self.index.len()
206 206 }
207 207
208 208 /// Returns `true` if the `Revlog` has zero `entries`.
209 209 pub fn is_empty(&self) -> bool {
210 210 self.index.is_empty()
211 211 }
212 212
213 213 /// Returns the node ID for the given revision number, if it exists in this
214 214 /// revlog
215 215 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
216 216 if rev == NULL_REVISION {
217 217 return Some(&NULL_NODE);
218 218 }
219 219 Some(self.index.get_entry(rev)?.hash())
220 220 }
221 221
222 222 /// Return the revision number for the given node ID, if it exists in this
223 223 /// revlog
224 224 pub fn rev_from_node(
225 225 &self,
226 226 node: NodePrefix,
227 227 ) -> Result<Revision, RevlogError> {
228 228 let looked_up = if let Some(nodemap) = &self.nodemap {
229 229 nodemap
230 230 .find_bin(&self.index, node)?
231 231 .ok_or(RevlogError::InvalidRevision)
232 232 } else {
233 233 self.rev_from_node_no_persistent_nodemap(node)
234 234 };
235 235
236 236 if node.is_prefix_of(&NULL_NODE) {
237 237 return match looked_up {
238 238 Ok(_) => Err(RevlogError::AmbiguousPrefix),
239 239 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
240 240 res => res,
241 241 };
242 242 };
243 243
244 244 looked_up
245 245 }
246 246
247 247 /// Same as `rev_from_node`, without using a persistent nodemap
248 248 ///
249 249 /// This is used as fallback when a persistent nodemap is not present.
250 250 /// This happens when the persistent-nodemap experimental feature is not
251 251 /// enabled, or for small revlogs.
252 252 fn rev_from_node_no_persistent_nodemap(
253 253 &self,
254 254 node: NodePrefix,
255 255 ) -> Result<Revision, RevlogError> {
256 256 // Linear scan of the revlog
257 257 // TODO: consider building a non-persistent nodemap in memory to
258 258 // optimize these cases.
259 259 let mut found_by_prefix = None;
260 260 for rev in (0..self.len() as Revision).rev() {
261 261 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
262 262 HgError::corrupted(
263 263 "revlog references a revision not in the index",
264 264 )
265 265 })?;
266 266 if node == *index_entry.hash() {
267 267 return Ok(rev);
268 268 }
269 269 if node.is_prefix_of(index_entry.hash()) {
270 270 if found_by_prefix.is_some() {
271 271 return Err(RevlogError::AmbiguousPrefix);
272 272 }
273 273 found_by_prefix = Some(rev)
274 274 }
275 275 }
276 276 found_by_prefix.ok_or(RevlogError::InvalidRevision)
277 277 }
278 278
279 279 /// Returns whether the given revision exists in this revlog.
280 280 pub fn has_rev(&self, rev: Revision) -> bool {
281 281 self.index.get_entry(rev).is_some()
282 282 }
283 283
284 284 /// Return the full data associated to a revision.
285 285 ///
286 286 /// All entries required to build the final data out of deltas will be
287 287 /// retrieved as needed, and the deltas will be applied to the inital
288 288 /// snapshot to rebuild the final data.
289 289 pub fn get_rev_data(
290 290 &self,
291 291 rev: Revision,
292 292 ) -> Result<Cow<[u8]>, RevlogError> {
293 293 if rev == NULL_REVISION {
294 294 return Ok(Cow::Borrowed(&[]));
295 295 };
296 296 Ok(self.get_entry(rev)?.data()?)
297 297 }
298 298
299 299 /// Check the hash of some given data against the recorded hash.
300 300 pub fn check_hash(
301 301 &self,
302 302 p1: Revision,
303 303 p2: Revision,
304 304 expected: &[u8],
305 305 data: &[u8],
306 306 ) -> bool {
307 307 let e1 = self.index.get_entry(p1);
308 308 let h1 = match e1 {
309 309 Some(ref entry) => entry.hash(),
310 310 None => &NULL_NODE,
311 311 };
312 312 let e2 = self.index.get_entry(p2);
313 313 let h2 = match e2 {
314 314 Some(ref entry) => entry.hash(),
315 315 None => &NULL_NODE,
316 316 };
317 317
318 318 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
319 319 }
320 320
321 321 /// Build the full data of a revision out its snapshot
322 322 /// and its deltas.
323 323 fn build_data_from_deltas(
324 324 snapshot: RevlogEntry,
325 325 deltas: &[RevlogEntry],
326 326 ) -> Result<Vec<u8>, HgError> {
327 327 let snapshot = snapshot.data_chunk()?;
328 328 let deltas = deltas
329 329 .iter()
330 330 .rev()
331 331 .map(RevlogEntry::data_chunk)
332 332 .collect::<Result<Vec<_>, _>>()?;
333 333 let patches: Vec<_> =
334 334 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
335 335 let patch = patch::fold_patch_lists(&patches);
336 336 Ok(patch.apply(&snapshot))
337 337 }
338 338
339 339 /// Return the revlog data.
340 340 fn data(&self) -> &[u8] {
341 341 match &self.data_bytes {
342 342 Some(data_bytes) => data_bytes,
343 343 None => panic!(
344 344 "forgot to load the data or trying to access inline data"
345 345 ),
346 346 }
347 347 }
348 348
349 349 pub fn make_null_entry(&self) -> RevlogEntry {
350 350 RevlogEntry {
351 351 revlog: self,
352 352 rev: NULL_REVISION,
353 353 bytes: b"",
354 354 compressed_len: 0,
355 355 uncompressed_len: 0,
356 356 base_rev_or_base_of_delta_chain: None,
357 357 p1: NULL_REVISION,
358 358 p2: NULL_REVISION,
359 359 flags: NULL_REVLOG_ENTRY_FLAGS,
360 360 hash: NULL_NODE,
361 361 }
362 362 }
363 363
364 364 /// Get an entry of the revlog.
365 365 pub fn get_entry(
366 366 &self,
367 367 rev: Revision,
368 368 ) -> Result<RevlogEntry, RevlogError> {
369 369 if rev == NULL_REVISION {
370 370 return Ok(self.make_null_entry());
371 371 }
372 372 let index_entry = self
373 373 .index
374 374 .get_entry(rev)
375 375 .ok_or(RevlogError::InvalidRevision)?;
376 376 let start = index_entry.offset();
377 377 let end = start + index_entry.compressed_len() as usize;
378 378 let data = if self.index.is_inline() {
379 379 self.index.data(start, end)
380 380 } else {
381 381 &self.data()[start..end]
382 382 };
383 383 let entry = RevlogEntry {
384 384 revlog: self,
385 385 rev,
386 386 bytes: data,
387 387 compressed_len: index_entry.compressed_len(),
388 388 uncompressed_len: index_entry.uncompressed_len(),
389 389 base_rev_or_base_of_delta_chain: if index_entry
390 390 .base_revision_or_base_of_delta_chain()
391 391 == rev
392 392 {
393 393 None
394 394 } else {
395 395 Some(index_entry.base_revision_or_base_of_delta_chain())
396 396 },
397 397 p1: index_entry.p1(),
398 398 p2: index_entry.p2(),
399 399 flags: index_entry.flags(),
400 400 hash: *index_entry.hash(),
401 401 };
402 402 Ok(entry)
403 403 }
404 404
405 405 /// when resolving internal references within revlog, any errors
406 406 /// should be reported as corruption, instead of e.g. "invalid revision"
407 407 fn get_entry_internal(
408 408 &self,
409 409 rev: Revision,
410 410 ) -> Result<RevlogEntry, HgError> {
411 411 self.get_entry(rev)
412 412 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
413 413 }
414 414 }
415 415
416 416 /// The revlog entry's bytes and the necessary informations to extract
417 417 /// the entry's data.
418 418 #[derive(Clone)]
419 419 pub struct RevlogEntry<'revlog> {
420 420 revlog: &'revlog Revlog,
421 421 rev: Revision,
422 422 bytes: &'revlog [u8],
423 423 compressed_len: u32,
424 424 uncompressed_len: i32,
425 425 base_rev_or_base_of_delta_chain: Option<Revision>,
426 426 p1: Revision,
427 427 p2: Revision,
428 428 flags: u16,
429 429 hash: Node,
430 430 }
431 431
432 432 thread_local! {
433 433 // seems fine to [unwrap] here: this can only fail due to memory allocation
434 434 // failing, and it's normal for that to cause panic.
435 435 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
436 436 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
437 437 }
438 438
439 439 fn zstd_decompress_to_buffer(
440 440 bytes: &[u8],
441 441 buf: &mut Vec<u8>,
442 442 ) -> Result<usize, std::io::Error> {
443 443 ZSTD_DECODER
444 444 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
445 445 }
446 446
447 447 impl<'revlog> RevlogEntry<'revlog> {
448 448 pub fn revision(&self) -> Revision {
449 449 self.rev
450 450 }
451 451
452 452 pub fn node(&self) -> &Node {
453 453 &self.hash
454 454 }
455 455
456 456 pub fn uncompressed_len(&self) -> Option<u32> {
457 457 u32::try_from(self.uncompressed_len).ok()
458 458 }
459 459
460 460 pub fn has_p1(&self) -> bool {
461 461 self.p1 != NULL_REVISION
462 462 }
463 463
464 464 pub fn p1_entry(
465 465 &self,
466 466 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
467 467 if self.p1 == NULL_REVISION {
468 468 Ok(None)
469 469 } else {
470 470 Ok(Some(self.revlog.get_entry(self.p1)?))
471 471 }
472 472 }
473 473
474 474 pub fn p2_entry(
475 475 &self,
476 476 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
477 477 if self.p2 == NULL_REVISION {
478 478 Ok(None)
479 479 } else {
480 480 Ok(Some(self.revlog.get_entry(self.p2)?))
481 481 }
482 482 }
483 483
484 484 pub fn p1(&self) -> Option<Revision> {
485 485 if self.p1 == NULL_REVISION {
486 486 None
487 487 } else {
488 488 Some(self.p1)
489 489 }
490 490 }
491 491
492 492 pub fn p2(&self) -> Option<Revision> {
493 493 if self.p2 == NULL_REVISION {
494 494 None
495 495 } else {
496 496 Some(self.p2)
497 497 }
498 498 }
499 499
500 500 pub fn is_censored(&self) -> bool {
501 501 (self.flags & REVISION_FLAG_CENSORED) != 0
502 502 }
503 503
504 504 pub fn has_length_affecting_flag_processor(&self) -> bool {
505 505 // Relevant Python code: revlog.size()
506 506 // note: ELLIPSIS is known to not change the content
507 507 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
508 508 }
509 509
510 510 /// The data for this entry, after resolving deltas if any.
511 511 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
512 512 let mut entry = self.clone();
513 513 let mut delta_chain = vec![];
514 514
515 515 // The meaning of `base_rev_or_base_of_delta_chain` depends on
516 516 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
517 517 // `mercurial/revlogutils/constants.py` and the code in
518 518 // [_chaininfo] and in [index_deltachain].
519 519 let uses_generaldelta = self.revlog.index.uses_generaldelta();
520 520 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
521 521 let base_rev = if uses_generaldelta {
522 522 base_rev
523 523 } else {
524 524 entry.rev - 1
525 525 };
526 526 delta_chain.push(entry);
527 527 entry = self.revlog.get_entry_internal(base_rev)?;
528 528 }
529 529
530 530 let data = if delta_chain.is_empty() {
531 531 entry.data_chunk()?
532 532 } else {
533 533 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
534 534 };
535 535
536 536 Ok(data)
537 537 }
538 538
539 539 fn check_data(
540 540 &self,
541 541 data: Cow<'revlog, [u8]>,
542 542 ) -> Result<Cow<'revlog, [u8]>, HgError> {
543 543 if self.revlog.check_hash(
544 544 self.p1,
545 545 self.p2,
546 546 self.hash.as_bytes(),
547 547 &data,
548 548 ) {
549 549 Ok(data)
550 550 } else {
551 551 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
552 552 return Err(HgError::unsupported(
553 553 "ellipsis revisions are not supported by rhg",
554 554 ));
555 555 }
556 556 Err(corrupted(format!(
557 557 "hash check failed for revision {}",
558 558 self.rev
559 559 )))
560 560 }
561 561 }
562 562
563 563 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
564 564 let data = self.rawdata()?;
565 if self.rev == NULL_REVISION {
566 return Ok(data);
567 }
565 568 if self.is_censored() {
566 569 return Err(HgError::CensoredNodeError);
567 570 }
568 571 self.check_data(data)
569 572 }
570 573
571 574 /// Extract the data contained in the entry.
572 575 /// This may be a delta. (See `is_delta`.)
573 576 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
574 577 if self.bytes.is_empty() {
575 578 return Ok(Cow::Borrowed(&[]));
576 579 }
577 580 match self.bytes[0] {
578 581 // Revision data is the entirety of the entry, including this
579 582 // header.
580 583 b'\0' => Ok(Cow::Borrowed(self.bytes)),
581 584 // Raw revision data follows.
582 585 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
583 586 // zlib (RFC 1950) data.
584 587 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
585 588 // zstd data.
586 589 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
587 590 // A proper new format should have had a repo/store requirement.
588 591 format_type => Err(corrupted(format!(
589 592 "unknown compression header '{}'",
590 593 format_type
591 594 ))),
592 595 }
593 596 }
594 597
595 598 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
596 599 let mut decoder = ZlibDecoder::new(self.bytes);
597 600 if self.is_delta() {
598 601 let mut buf = Vec::with_capacity(self.compressed_len as usize);
599 602 decoder
600 603 .read_to_end(&mut buf)
601 604 .map_err(|e| corrupted(e.to_string()))?;
602 605 Ok(buf)
603 606 } else {
604 607 let cap = self.uncompressed_len.max(0) as usize;
605 608 let mut buf = vec![0; cap];
606 609 decoder
607 610 .read_exact(&mut buf)
608 611 .map_err(|e| corrupted(e.to_string()))?;
609 612 Ok(buf)
610 613 }
611 614 }
612 615
613 616 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
614 617 let cap = self.uncompressed_len.max(0) as usize;
615 618 if self.is_delta() {
616 619 // [cap] is usually an over-estimate of the space needed because
617 620 // it's the length of delta-decoded data, but we're interested
618 621 // in the size of the delta.
619 622 // This means we have to [shrink_to_fit] to avoid holding on
620 623 // to a large chunk of memory, but it also means we must have a
621 624 // fallback branch, for the case when the delta is longer than
622 625 // the original data (surprisingly, this does happen in practice)
623 626 let mut buf = Vec::with_capacity(cap);
624 627 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
625 628 Ok(_) => buf.shrink_to_fit(),
626 629 Err(_) => {
627 630 buf.clear();
628 631 zstd::stream::copy_decode(self.bytes, &mut buf)
629 632 .map_err(|e| corrupted(e.to_string()))?;
630 633 }
631 634 };
632 635 Ok(buf)
633 636 } else {
634 637 let mut buf = Vec::with_capacity(cap);
635 638 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
636 639 .map_err(|e| corrupted(e.to_string()))?;
637 640 if len != self.uncompressed_len as usize {
638 641 Err(corrupted("uncompressed length does not match"))
639 642 } else {
640 643 Ok(buf)
641 644 }
642 645 }
643 646 }
644 647
645 648 /// Tell if the entry is a snapshot or a delta
646 649 /// (influences on decompression).
647 650 fn is_delta(&self) -> bool {
648 651 self.base_rev_or_base_of_delta_chain.is_some()
649 652 }
650 653 }
651 654
652 655 /// Calculate the hash of a revision given its data and its parents.
653 656 fn hash(
654 657 data: &[u8],
655 658 p1_hash: &[u8],
656 659 p2_hash: &[u8],
657 660 ) -> [u8; NODE_BYTES_LENGTH] {
658 661 let mut hasher = Sha1::new();
659 662 let (a, b) = (p1_hash, p2_hash);
660 663 if a > b {
661 664 hasher.update(b);
662 665 hasher.update(a);
663 666 } else {
664 667 hasher.update(a);
665 668 hasher.update(b);
666 669 }
667 670 hasher.update(data);
668 671 *hasher.finalize().as_ref()
669 672 }
670 673
671 674 #[cfg(test)]
672 675 mod tests {
673 676 use super::*;
674 677 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
675 678 use itertools::Itertools;
676 679
677 680 #[test]
678 681 fn test_empty() {
679 682 let temp = tempfile::tempdir().unwrap();
680 683 let vfs = Vfs { base: temp.path() };
681 684 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
682 685 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
683 686 assert!(revlog.is_empty());
684 687 assert_eq!(revlog.len(), 0);
685 688 assert!(revlog.get_entry(0).is_err());
686 689 assert!(!revlog.has_rev(0));
687 690 assert_eq!(
688 691 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
689 692 NULL_REVISION
690 693 );
694 let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
695 assert_eq!(null_entry.revision(), NULL_REVISION);
696 assert!(null_entry.data().unwrap().is_empty());
691 697 }
692 698
693 699 #[test]
694 700 fn test_inline() {
695 701 let temp = tempfile::tempdir().unwrap();
696 702 let vfs = Vfs { base: temp.path() };
697 703 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
698 704 .unwrap();
699 705 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
700 706 .unwrap();
701 707 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
702 708 .unwrap();
703 709 let entry0_bytes = IndexEntryBuilder::new()
704 710 .is_first(true)
705 711 .with_version(1)
706 712 .with_inline(true)
707 713 .with_offset(INDEX_ENTRY_SIZE)
708 714 .with_node(node0)
709 715 .build();
710 716 let entry1_bytes = IndexEntryBuilder::new()
711 717 .with_offset(INDEX_ENTRY_SIZE)
712 718 .with_node(node1)
713 719 .build();
714 720 let entry2_bytes = IndexEntryBuilder::new()
715 721 .with_offset(INDEX_ENTRY_SIZE)
716 722 .with_p1(0)
717 723 .with_p2(1)
718 724 .with_node(node2)
719 725 .build();
720 726 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
721 727 .into_iter()
722 728 .flatten()
723 729 .collect_vec();
724 730 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
725 731 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
726 732
727 733 let entry0 = revlog.get_entry(0).ok().unwrap();
728 734 assert_eq!(entry0.revision(), 0);
729 735 assert_eq!(*entry0.node(), node0);
730 736 assert!(!entry0.has_p1());
731 737 assert_eq!(entry0.p1(), None);
732 738 assert_eq!(entry0.p2(), None);
733 739 let p1_entry = entry0.p1_entry().unwrap();
734 740 assert!(p1_entry.is_none());
735 741 let p2_entry = entry0.p2_entry().unwrap();
736 742 assert!(p2_entry.is_none());
737 743
738 744 let entry1 = revlog.get_entry(1).ok().unwrap();
739 745 assert_eq!(entry1.revision(), 1);
740 746 assert_eq!(*entry1.node(), node1);
741 747 assert!(!entry1.has_p1());
742 748 assert_eq!(entry1.p1(), None);
743 749 assert_eq!(entry1.p2(), None);
744 750 let p1_entry = entry1.p1_entry().unwrap();
745 751 assert!(p1_entry.is_none());
746 752 let p2_entry = entry1.p2_entry().unwrap();
747 753 assert!(p2_entry.is_none());
748 754
749 755 let entry2 = revlog.get_entry(2).ok().unwrap();
750 756 assert_eq!(entry2.revision(), 2);
751 757 assert_eq!(*entry2.node(), node2);
752 758 assert!(entry2.has_p1());
753 759 assert_eq!(entry2.p1(), Some(0));
754 760 assert_eq!(entry2.p2(), Some(1));
755 761 let p1_entry = entry2.p1_entry().unwrap();
756 762 assert!(p1_entry.is_some());
757 763 assert_eq!(p1_entry.unwrap().revision(), 0);
758 764 let p2_entry = entry2.p2_entry().unwrap();
759 765 assert!(p2_entry.is_some());
760 766 assert_eq!(p2_entry.unwrap().revision(), 1);
761 767 }
762 768
763 769 #[test]
764 770 fn test_nodemap() {
765 771 let temp = tempfile::tempdir().unwrap();
766 772 let vfs = Vfs { base: temp.path() };
767 773
768 774 // building a revlog with a forced Node starting with zeros
769 775 // This is a corruption, but it does not preclude using the nodemap
770 776 // if we don't try and access the data
771 777 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
772 778 .unwrap();
773 779 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
774 780 .unwrap();
775 781 let entry0_bytes = IndexEntryBuilder::new()
776 782 .is_first(true)
777 783 .with_version(1)
778 784 .with_inline(true)
779 785 .with_offset(INDEX_ENTRY_SIZE)
780 786 .with_node(node0)
781 787 .build();
782 788 let entry1_bytes = IndexEntryBuilder::new()
783 789 .with_offset(INDEX_ENTRY_SIZE)
784 790 .with_node(node1)
785 791 .build();
786 792 let contents = vec![entry0_bytes, entry1_bytes]
787 793 .into_iter()
788 794 .flatten()
789 795 .collect_vec();
790 796 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
791 797 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
792 798
793 799 // accessing the data shows the corruption
794 800 revlog.get_entry(0).unwrap().data().unwrap_err();
795 801
796 802 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
797 803 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
798 804 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
799 805 assert_eq!(
800 806 revlog
801 807 .rev_from_node(NodePrefix::from_hex("000").unwrap())
802 808 .unwrap(),
803 809 -1
804 810 );
805 811 assert_eq!(
806 812 revlog
807 813 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
808 814 .unwrap(),
809 815 1
810 816 );
811 817 // RevlogError does not implement PartialEq
812 818 // (ultimately because io::Error does not)
813 819 match revlog
814 820 .rev_from_node(NodePrefix::from_hex("00").unwrap())
815 821 .expect_err("Expected to give AmbiguousPrefix error")
816 822 {
817 823 RevlogError::AmbiguousPrefix => (),
818 824 e => {
819 825 panic!("Got another error than AmbiguousPrefix: {:?}", e);
820 826 }
821 827 };
822 828 }
823 829 }
General Comments 0
You need to be logged in to leave comments. Login now