##// END OF EJS Templates
rhg: handle null changelog and manifest revisions...
Arseniy Alekseyev -
r49012:61ce70fd default
parent child Browse files
Show More
@@ -0,0 +1,23
1 Create a repo such that the changelog entry refers to a null manifest node:
2
3 $ hg init a
4 $ cd a
5 $ hg log
6 $ touch x
7 $ hg add x
8 $ hg commit -m "init"
9 $ hg rm x
10 $ hg commit -q --amend
11
12 $ wc -c < .hg/store/00manifest.i
13 0
14
15 Make sure that the manifest can be read (and is empty):
16
17 $ hg --config rhg.on-unsupported=abort files -r .
18 [1]
19
20 Test a null changelog rev, too:
21
22 $ hg --config rhg.on-unsupported=abort files -r 0000000000000000000000000000000000000000
23 [1]
@@ -1,67 +1,67
1 1 use crate::errors::HgError;
2 2 use crate::repo::Repo;
3 use crate::revlog::node::NULL_NODE;
3 4 use crate::revlog::revlog::{Revlog, RevlogError};
4 5 use crate::revlog::Revision;
5 6 use crate::revlog::{Node, NodePrefix};
6 7
7 8 /// A specialized `Revlog` to work with `changelog` data format.
8 9 pub struct Changelog {
9 10 /// The generic `revlog` format.
10 11 pub(crate) revlog: Revlog,
11 12 }
12 13
13 14 impl Changelog {
14 15 /// Open the `changelog` of a repository given by its root.
15 16 pub fn open(repo: &Repo) -> Result<Self, HgError> {
16 17 let revlog = Revlog::open(repo, "00changelog.i", None)?;
17 18 Ok(Self { revlog })
18 19 }
19 20
20 21 /// Return the `ChangelogEntry` for the given node ID.
21 22 pub fn data_for_node(
22 23 &self,
23 24 node: NodePrefix,
24 25 ) -> Result<ChangelogEntry, RevlogError> {
25 26 let rev = self.revlog.rev_from_node(node)?;
26 27 self.data_for_rev(rev)
27 28 }
28 29
29 30 /// Return the `ChangelogEntry` of the given revision number.
30 31 pub fn data_for_rev(
31 32 &self,
32 33 rev: Revision,
33 34 ) -> Result<ChangelogEntry, RevlogError> {
34 35 let bytes = self.revlog.get_rev_data(rev)?;
35 36 Ok(ChangelogEntry { bytes })
36 37 }
37 38
38 39 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
39 40 self.revlog.node_from_rev(rev)
40 41 }
41 42 }
42 43
43 44 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
44 45 #[derive(Debug)]
45 46 pub struct ChangelogEntry {
46 47 /// The data bytes of the `changelog` entry.
47 48 bytes: Vec<u8>,
48 49 }
49 50
50 51 impl ChangelogEntry {
51 52 /// Return an iterator over the lines of the entry.
52 53 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
53 54 self.bytes
54 55 .split(|b| b == &b'\n')
55 56 .filter(|line| !line.is_empty())
56 57 }
57 58
58 59 /// Return the node id of the `manifest` referenced by this `changelog`
59 60 /// entry.
60 61 pub fn manifest_node(&self) -> Result<Node, HgError> {
61 Node::from_hex_for_repo(
62 self.lines()
63 .next()
64 .ok_or_else(|| HgError::corrupted("empty changelog entry"))?,
65 )
62 match self.lines().next() {
63 None => Ok(NULL_NODE),
64 Some(x) => Node::from_hex_for_repo(x),
66 65 }
67 66 }
67 }
@@ -1,403 +1,406
1 1 use std::convert::TryInto;
2 2 use std::ops::Deref;
3 3
4 4 use byteorder::{BigEndian, ByteOrder};
5 5
6 6 use crate::errors::HgError;
7 7 use crate::revlog::node::Node;
8 8 use crate::revlog::{Revision, NULL_REVISION};
9 9
10 10 pub const INDEX_ENTRY_SIZE: usize = 64;
11 11
12 12 /// A Revlog index
13 13 pub struct Index {
14 14 bytes: Box<dyn Deref<Target = [u8]> + Send>,
15 15 /// Offsets of starts of index blocks.
16 16 /// Only needed when the index is interleaved with data.
17 17 offsets: Option<Vec<usize>>,
18 18 }
19 19
20 20 impl Index {
21 21 /// Create an index from bytes.
22 22 /// Calculate the start of each entry when is_inline is true.
23 23 pub fn new(
24 24 bytes: Box<dyn Deref<Target = [u8]> + Send>,
25 25 ) -> Result<Self, HgError> {
26 26 if is_inline(&bytes) {
27 27 let mut offset: usize = 0;
28 28 let mut offsets = Vec::new();
29 29
30 30 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
31 31 offsets.push(offset);
32 32 let end = offset + INDEX_ENTRY_SIZE;
33 33 let entry = IndexEntry {
34 34 bytes: &bytes[offset..end],
35 35 offset_override: None,
36 36 };
37 37
38 38 offset += INDEX_ENTRY_SIZE + entry.compressed_len();
39 39 }
40 40
41 41 if offset == bytes.len() {
42 42 Ok(Self {
43 43 bytes,
44 44 offsets: Some(offsets),
45 45 })
46 46 } else {
47 47 Err(HgError::corrupted("unexpected inline revlog length")
48 48 .into())
49 49 }
50 50 } else {
51 51 Ok(Self {
52 52 bytes,
53 53 offsets: None,
54 54 })
55 55 }
56 56 }
57 57
58 58 /// Value of the inline flag.
59 59 pub fn is_inline(&self) -> bool {
60 60 is_inline(&self.bytes)
61 61 }
62 62
63 63 /// Return a slice of bytes if `revlog` is inline. Panic if not.
64 64 pub fn data(&self, start: usize, end: usize) -> &[u8] {
65 65 if !self.is_inline() {
66 66 panic!("tried to access data in the index of a revlog that is not inline");
67 67 }
68 68 &self.bytes[start..end]
69 69 }
70 70
71 71 /// Return number of entries of the revlog index.
72 72 pub fn len(&self) -> usize {
73 73 if let Some(offsets) = &self.offsets {
74 74 offsets.len()
75 75 } else {
76 76 self.bytes.len() / INDEX_ENTRY_SIZE
77 77 }
78 78 }
79 79
80 80 /// Returns `true` if the `Index` has zero `entries`.
81 81 pub fn is_empty(&self) -> bool {
82 82 self.len() == 0
83 83 }
84 84
85 85 /// Return the index entry corresponding to the given revision if it
86 86 /// exists.
87 87 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
88 88 if rev == NULL_REVISION {
89 89 return None;
90 90 }
91 91 if let Some(offsets) = &self.offsets {
92 92 self.get_entry_inline(rev, offsets)
93 93 } else {
94 94 self.get_entry_separated(rev)
95 95 }
96 96 }
97 97
98 98 fn get_entry_inline(
99 99 &self,
100 100 rev: Revision,
101 101 offsets: &[usize],
102 102 ) -> Option<IndexEntry> {
103 103 let start = *offsets.get(rev as usize)?;
104 104 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
105 105 let bytes = &self.bytes[start..end];
106 106
107 107 // See IndexEntry for an explanation of this override.
108 108 let offset_override = Some(end);
109 109
110 110 Some(IndexEntry {
111 111 bytes,
112 112 offset_override,
113 113 })
114 114 }
115 115
116 116 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
117 117 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
118 118 if rev as usize >= max_rev {
119 119 return None;
120 120 }
121 121 let start = rev as usize * INDEX_ENTRY_SIZE;
122 122 let end = start + INDEX_ENTRY_SIZE;
123 123 let bytes = &self.bytes[start..end];
124 124
125 125 // Override the offset of the first revision as its bytes are used
126 126 // for the index's metadata (saving space because it is always 0)
127 127 let offset_override = if rev == 0 { Some(0) } else { None };
128 128
129 129 Some(IndexEntry {
130 130 bytes,
131 131 offset_override,
132 132 })
133 133 }
134 134 }
135 135
136 136 impl super::RevlogIndex for Index {
137 137 fn len(&self) -> usize {
138 138 self.len()
139 139 }
140 140
141 141 fn node(&self, rev: Revision) -> Option<&Node> {
142 142 self.get_entry(rev).map(|entry| entry.hash())
143 143 }
144 144 }
145 145
146 146 #[derive(Debug)]
147 147 pub struct IndexEntry<'a> {
148 148 bytes: &'a [u8],
149 149 /// Allows to override the offset value of the entry.
150 150 ///
151 151 /// For interleaved index and data, the offset stored in the index
152 152 /// corresponds to the separated data offset.
153 153 /// It has to be overridden with the actual offset in the interleaved
154 154 /// index which is just after the index block.
155 155 ///
156 156 /// For separated index and data, the offset stored in the first index
157 157 /// entry is mixed with the index headers.
158 158 /// It has to be overridden with 0.
159 159 offset_override: Option<usize>,
160 160 }
161 161
162 162 impl<'a> IndexEntry<'a> {
163 163 /// Return the offset of the data.
164 164 pub fn offset(&self) -> usize {
165 165 if let Some(offset_override) = self.offset_override {
166 166 offset_override
167 167 } else {
168 168 let mut bytes = [0; 8];
169 169 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
170 170 BigEndian::read_u64(&bytes[..]) as usize
171 171 }
172 172 }
173 173
174 174 /// Return the compressed length of the data.
175 175 pub fn compressed_len(&self) -> usize {
176 176 BigEndian::read_u32(&self.bytes[8..=11]) as usize
177 177 }
178 178
179 179 /// Return the uncompressed length of the data.
180 180 pub fn uncompressed_len(&self) -> usize {
181 181 BigEndian::read_u32(&self.bytes[12..=15]) as usize
182 182 }
183 183
184 184 /// Return the revision upon which the data has been derived.
185 185 pub fn base_revision(&self) -> Revision {
186 186 // TODO Maybe return an Option when base_revision == rev?
187 187 // Requires to add rev to IndexEntry
188 188
189 189 BigEndian::read_i32(&self.bytes[16..])
190 190 }
191 191
192 192 pub fn p1(&self) -> Revision {
193 193 BigEndian::read_i32(&self.bytes[24..])
194 194 }
195 195
196 196 pub fn p2(&self) -> Revision {
197 197 BigEndian::read_i32(&self.bytes[28..])
198 198 }
199 199
200 200 /// Return the hash of revision's full text.
201 201 ///
202 202 /// Currently, SHA-1 is used and only the first 20 bytes of this field
203 203 /// are used.
204 204 pub fn hash(&self) -> &'a Node {
205 205 (&self.bytes[32..52]).try_into().unwrap()
206 206 }
207 207 }
208 208
209 209 /// Value of the inline flag.
210 210 pub fn is_inline(index_bytes: &[u8]) -> bool {
211 if index_bytes.len() < 4 {
212 return true;
213 }
211 214 match &index_bytes[0..=1] {
212 215 [0, 0] | [0, 2] => false,
213 216 _ => true,
214 217 }
215 218 }
216 219
217 220 #[cfg(test)]
218 221 mod tests {
219 222 use super::*;
220 223
221 224 #[cfg(test)]
222 225 #[derive(Debug, Copy, Clone)]
223 226 pub struct IndexEntryBuilder {
224 227 is_first: bool,
225 228 is_inline: bool,
226 229 is_general_delta: bool,
227 230 version: u16,
228 231 offset: usize,
229 232 compressed_len: usize,
230 233 uncompressed_len: usize,
231 234 base_revision: Revision,
232 235 }
233 236
234 237 #[cfg(test)]
235 238 impl IndexEntryBuilder {
236 239 pub fn new() -> Self {
237 240 Self {
238 241 is_first: false,
239 242 is_inline: false,
240 243 is_general_delta: true,
241 244 version: 2,
242 245 offset: 0,
243 246 compressed_len: 0,
244 247 uncompressed_len: 0,
245 248 base_revision: 0,
246 249 }
247 250 }
248 251
249 252 pub fn is_first(&mut self, value: bool) -> &mut Self {
250 253 self.is_first = value;
251 254 self
252 255 }
253 256
254 257 pub fn with_inline(&mut self, value: bool) -> &mut Self {
255 258 self.is_inline = value;
256 259 self
257 260 }
258 261
259 262 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
260 263 self.is_general_delta = value;
261 264 self
262 265 }
263 266
264 267 pub fn with_version(&mut self, value: u16) -> &mut Self {
265 268 self.version = value;
266 269 self
267 270 }
268 271
269 272 pub fn with_offset(&mut self, value: usize) -> &mut Self {
270 273 self.offset = value;
271 274 self
272 275 }
273 276
274 277 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
275 278 self.compressed_len = value;
276 279 self
277 280 }
278 281
279 282 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
280 283 self.uncompressed_len = value;
281 284 self
282 285 }
283 286
284 287 pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
285 288 self.base_revision = value;
286 289 self
287 290 }
288 291
289 292 pub fn build(&self) -> Vec<u8> {
290 293 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
291 294 if self.is_first {
292 295 bytes.extend(&match (self.is_general_delta, self.is_inline) {
293 296 (false, false) => [0u8, 0],
294 297 (false, true) => [0u8, 1],
295 298 (true, false) => [0u8, 2],
296 299 (true, true) => [0u8, 3],
297 300 });
298 301 bytes.extend(&self.version.to_be_bytes());
299 302 // Remaining offset bytes.
300 303 bytes.extend(&[0u8; 2]);
301 304 } else {
302 305 // Offset stored on 48 bits (6 bytes)
303 306 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
304 307 }
305 308 bytes.extend(&[0u8; 2]); // Revision flags.
306 309 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
307 310 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
308 311 bytes.extend(&self.base_revision.to_be_bytes());
309 312 bytes
310 313 }
311 314 }
312 315
313 316 #[test]
314 317 fn is_not_inline_when_no_inline_flag_test() {
315 318 let bytes = IndexEntryBuilder::new()
316 319 .is_first(true)
317 320 .with_general_delta(false)
318 321 .with_inline(false)
319 322 .build();
320 323
321 324 assert_eq!(is_inline(&bytes), false)
322 325 }
323 326
324 327 #[test]
325 328 fn is_inline_when_inline_flag_test() {
326 329 let bytes = IndexEntryBuilder::new()
327 330 .is_first(true)
328 331 .with_general_delta(false)
329 332 .with_inline(true)
330 333 .build();
331 334
332 335 assert_eq!(is_inline(&bytes), true)
333 336 }
334 337
335 338 #[test]
336 339 fn is_inline_when_inline_and_generaldelta_flags_test() {
337 340 let bytes = IndexEntryBuilder::new()
338 341 .is_first(true)
339 342 .with_general_delta(true)
340 343 .with_inline(true)
341 344 .build();
342 345
343 346 assert_eq!(is_inline(&bytes), true)
344 347 }
345 348
346 349 #[test]
347 350 fn test_offset() {
348 351 let bytes = IndexEntryBuilder::new().with_offset(1).build();
349 352 let entry = IndexEntry {
350 353 bytes: &bytes,
351 354 offset_override: None,
352 355 };
353 356
354 357 assert_eq!(entry.offset(), 1)
355 358 }
356 359
357 360 #[test]
358 361 fn test_with_overridden_offset() {
359 362 let bytes = IndexEntryBuilder::new().with_offset(1).build();
360 363 let entry = IndexEntry {
361 364 bytes: &bytes,
362 365 offset_override: Some(2),
363 366 };
364 367
365 368 assert_eq!(entry.offset(), 2)
366 369 }
367 370
368 371 #[test]
369 372 fn test_compressed_len() {
370 373 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
371 374 let entry = IndexEntry {
372 375 bytes: &bytes,
373 376 offset_override: None,
374 377 };
375 378
376 379 assert_eq!(entry.compressed_len(), 1)
377 380 }
378 381
379 382 #[test]
380 383 fn test_uncompressed_len() {
381 384 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
382 385 let entry = IndexEntry {
383 386 bytes: &bytes,
384 387 offset_override: None,
385 388 };
386 389
387 390 assert_eq!(entry.uncompressed_len(), 1)
388 391 }
389 392
390 393 #[test]
391 394 fn test_base_revision() {
392 395 let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
393 396 let entry = IndexEntry {
394 397 bytes: &bytes,
395 398 offset_override: None,
396 399 };
397 400
398 401 assert_eq!(entry.base_revision(), 1)
399 402 }
400 403 }
401 404
402 405 #[cfg(test)]
403 406 pub use tests::IndexEntryBuilder;
@@ -1,412 +1,423
1 1 use std::borrow::Cow;
2 2 use std::io::Read;
3 3 use std::ops::Deref;
4 4 use std::path::Path;
5 5
6 6 use byteorder::{BigEndian, ByteOrder};
7 7 use flate2::read::ZlibDecoder;
8 8 use micro_timer::timed;
9 9 use sha1::{Digest, Sha1};
10 10 use zstd;
11 11
12 12 use super::index::Index;
13 13 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
14 14 use super::nodemap;
15 15 use super::nodemap::{NodeMap, NodeMapError};
16 16 use super::nodemap_docket::NodeMapDocket;
17 17 use super::patch;
18 18 use crate::errors::HgError;
19 19 use crate::repo::Repo;
20 20 use crate::revlog::Revision;
21 21 use crate::{Node, NULL_REVISION};
22 22
23 23 #[derive(derive_more::From)]
24 24 pub enum RevlogError {
25 25 InvalidRevision,
26 26 /// Working directory is not supported
27 27 WDirUnsupported,
28 28 /// Found more than one entry whose ID match the requested prefix
29 29 AmbiguousPrefix,
30 30 #[from]
31 31 Other(HgError),
32 32 }
33 33
34 34 impl From<NodeMapError> for RevlogError {
35 35 fn from(error: NodeMapError) -> Self {
36 36 match error {
37 37 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
38 38 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
39 39 }
40 40 }
41 41 }
42 42
43 43 impl RevlogError {
44 44 fn corrupted() -> Self {
45 45 RevlogError::Other(HgError::corrupted("corrupted revlog"))
46 46 }
47 47 }
48 48
49 49 /// Read only implementation of revlog.
50 50 pub struct Revlog {
51 51 /// When index and data are not interleaved: bytes of the revlog index.
52 52 /// When index and data are interleaved: bytes of the revlog index and
53 53 /// data.
54 54 index: Index,
55 55 /// When index and data are not interleaved: bytes of the revlog data
56 56 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
57 57 /// When present on disk: the persistent nodemap for this revlog
58 58 nodemap: Option<nodemap::NodeTree>,
59 59 }
60 60
61 61 impl Revlog {
62 62 /// Open a revlog index file.
63 63 ///
64 64 /// It will also open the associated data file if index and data are not
65 65 /// interleaved.
66 66 #[timed]
67 67 pub fn open(
68 68 repo: &Repo,
69 69 index_path: impl AsRef<Path>,
70 70 data_path: Option<&Path>,
71 71 ) -> Result<Self, HgError> {
72 72 let index_path = index_path.as_ref();
73 73 let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
74 74
75 let version = get_version(&index_mmap);
75 let version = get_version(&index_mmap)?;
76 76 if version != 1 {
77 77 // A proper new version should have had a repo/store requirement.
78 78 return Err(HgError::corrupted("corrupted revlog"));
79 79 }
80 80
81 81 let index = Index::new(Box::new(index_mmap))?;
82 82
83 83 let default_data_path = index_path.with_extension("d");
84 84
85 85 // type annotation required
86 86 // won't recognize Mmap as Deref<Target = [u8]>
87 87 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
88 88 if index.is_inline() {
89 89 None
90 90 } else {
91 91 let data_path = data_path.unwrap_or(&default_data_path);
92 92 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
93 93 Some(Box::new(data_mmap))
94 94 };
95 95
96 96 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
97 97 |(docket, data)| {
98 98 nodemap::NodeTree::load_bytes(
99 99 Box::new(data),
100 100 docket.data_length,
101 101 )
102 102 },
103 103 );
104 104
105 105 Ok(Revlog {
106 106 index,
107 107 data_bytes,
108 108 nodemap,
109 109 })
110 110 }
111 111
112 112 /// Return number of entries of the `Revlog`.
113 113 pub fn len(&self) -> usize {
114 114 self.index.len()
115 115 }
116 116
117 117 /// Returns `true` if the `Revlog` has zero `entries`.
118 118 pub fn is_empty(&self) -> bool {
119 119 self.index.is_empty()
120 120 }
121 121
122 122 /// Returns the node ID for the given revision number, if it exists in this
123 123 /// revlog
124 124 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
125 125 Some(self.index.get_entry(rev)?.hash())
126 126 }
127 127
128 128 /// Return the revision number for the given node ID, if it exists in this
129 129 /// revlog
130 130 #[timed]
131 131 pub fn rev_from_node(
132 132 &self,
133 133 node: NodePrefix,
134 134 ) -> Result<Revision, RevlogError> {
135 135 if node.is_prefix_of(&NULL_NODE) {
136 136 return Ok(NULL_REVISION);
137 137 }
138 138
139 139 if let Some(nodemap) = &self.nodemap {
140 140 return nodemap
141 141 .find_bin(&self.index, node)?
142 142 .ok_or(RevlogError::InvalidRevision);
143 143 }
144 144
145 145 // Fallback to linear scan when a persistent nodemap is not present.
146 146 // This happens when the persistent-nodemap experimental feature is not
147 147 // enabled, or for small revlogs.
148 148 //
149 149 // TODO: consider building a non-persistent nodemap in memory to
150 150 // optimize these cases.
151 151 let mut found_by_prefix = None;
152 152 for rev in (0..self.len() as Revision).rev() {
153 153 let index_entry =
154 154 self.index.get_entry(rev).ok_or(HgError::corrupted(
155 155 "revlog references a revision not in the index",
156 156 ))?;
157 157 if node == *index_entry.hash() {
158 158 return Ok(rev);
159 159 }
160 160 if node.is_prefix_of(index_entry.hash()) {
161 161 if found_by_prefix.is_some() {
162 162 return Err(RevlogError::AmbiguousPrefix);
163 163 }
164 164 found_by_prefix = Some(rev)
165 165 }
166 166 }
167 167 found_by_prefix.ok_or(RevlogError::InvalidRevision)
168 168 }
169 169
170 170 /// Returns whether the given revision exists in this revlog.
171 171 pub fn has_rev(&self, rev: Revision) -> bool {
172 172 self.index.get_entry(rev).is_some()
173 173 }
174 174
175 175 /// Return the full data associated to a revision.
176 176 ///
177 177 /// All entries required to build the final data out of deltas will be
178 178 /// retrieved as needed, and the deltas will be applied to the inital
179 179 /// snapshot to rebuild the final data.
180 180 #[timed]
181 181 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
182 if rev == NULL_REVISION {
183 return Ok(vec![]);
184 };
182 185 // Todo return -> Cow
183 186 let mut entry = self.get_entry(rev)?;
184 187 let mut delta_chain = vec![];
185 188 while let Some(base_rev) = entry.base_rev {
186 189 delta_chain.push(entry);
187 190 entry = self
188 191 .get_entry(base_rev)
189 192 .map_err(|_| RevlogError::corrupted())?;
190 193 }
191 194
192 195 // TODO do not look twice in the index
193 196 let index_entry = self
194 197 .index
195 198 .get_entry(rev)
196 199 .ok_or(RevlogError::InvalidRevision)?;
197 200
198 201 let data: Vec<u8> = if delta_chain.is_empty() {
199 202 entry.data()?.into()
200 203 } else {
201 204 Revlog::build_data_from_deltas(entry, &delta_chain)?
202 205 };
203 206
204 207 if self.check_hash(
205 208 index_entry.p1(),
206 209 index_entry.p2(),
207 210 index_entry.hash().as_bytes(),
208 211 &data,
209 212 ) {
210 213 Ok(data)
211 214 } else {
212 215 Err(RevlogError::corrupted())
213 216 }
214 217 }
215 218
216 219 /// Check the hash of some given data against the recorded hash.
217 220 pub fn check_hash(
218 221 &self,
219 222 p1: Revision,
220 223 p2: Revision,
221 224 expected: &[u8],
222 225 data: &[u8],
223 226 ) -> bool {
224 227 let e1 = self.index.get_entry(p1);
225 228 let h1 = match e1 {
226 229 Some(ref entry) => entry.hash(),
227 230 None => &NULL_NODE,
228 231 };
229 232 let e2 = self.index.get_entry(p2);
230 233 let h2 = match e2 {
231 234 Some(ref entry) => entry.hash(),
232 235 None => &NULL_NODE,
233 236 };
234 237
235 238 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
236 239 }
237 240
238 241 /// Build the full data of a revision out its snapshot
239 242 /// and its deltas.
240 243 #[timed]
241 244 fn build_data_from_deltas(
242 245 snapshot: RevlogEntry,
243 246 deltas: &[RevlogEntry],
244 247 ) -> Result<Vec<u8>, RevlogError> {
245 248 let snapshot = snapshot.data()?;
246 249 let deltas = deltas
247 250 .iter()
248 251 .rev()
249 252 .map(RevlogEntry::data)
250 253 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
251 254 let patches: Vec<_> =
252 255 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
253 256 let patch = patch::fold_patch_lists(&patches);
254 257 Ok(patch.apply(&snapshot))
255 258 }
256 259
257 260 /// Return the revlog data.
258 261 fn data(&self) -> &[u8] {
259 262 match self.data_bytes {
260 263 Some(ref data_bytes) => &data_bytes,
261 264 None => panic!(
262 265 "forgot to load the data or trying to access inline data"
263 266 ),
264 267 }
265 268 }
266 269
267 270 /// Get an entry of the revlog.
268 271 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
269 272 let index_entry = self
270 273 .index
271 274 .get_entry(rev)
272 275 .ok_or(RevlogError::InvalidRevision)?;
273 276 let start = index_entry.offset();
274 277 let end = start + index_entry.compressed_len();
275 278 let data = if self.index.is_inline() {
276 279 self.index.data(start, end)
277 280 } else {
278 281 &self.data()[start..end]
279 282 };
280 283 let entry = RevlogEntry {
281 284 rev,
282 285 bytes: data,
283 286 compressed_len: index_entry.compressed_len(),
284 287 uncompressed_len: index_entry.uncompressed_len(),
285 288 base_rev: if index_entry.base_revision() == rev {
286 289 None
287 290 } else {
288 291 Some(index_entry.base_revision())
289 292 },
290 293 };
291 294 Ok(entry)
292 295 }
293 296 }
294 297
295 298 /// The revlog entry's bytes and the necessary informations to extract
296 299 /// the entry's data.
297 300 #[derive(Debug)]
298 301 pub struct RevlogEntry<'a> {
299 302 rev: Revision,
300 303 bytes: &'a [u8],
301 304 compressed_len: usize,
302 305 uncompressed_len: usize,
303 306 base_rev: Option<Revision>,
304 307 }
305 308
306 309 impl<'a> RevlogEntry<'a> {
307 310 pub fn revision(&self) -> Revision {
308 311 self.rev
309 312 }
310 313
311 314 /// Extract the data contained in the entry.
312 315 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
313 316 if self.bytes.is_empty() {
314 317 return Ok(Cow::Borrowed(&[]));
315 318 }
316 319 match self.bytes[0] {
317 320 // Revision data is the entirety of the entry, including this
318 321 // header.
319 322 b'\0' => Ok(Cow::Borrowed(self.bytes)),
320 323 // Raw revision data follows.
321 324 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
322 325 // zlib (RFC 1950) data.
323 326 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
324 327 // zstd data.
325 328 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
326 329 // A proper new format should have had a repo/store requirement.
327 330 _format_type => Err(RevlogError::corrupted()),
328 331 }
329 332 }
330 333
331 334 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
332 335 let mut decoder = ZlibDecoder::new(self.bytes);
333 336 if self.is_delta() {
334 337 let mut buf = Vec::with_capacity(self.compressed_len);
335 338 decoder
336 339 .read_to_end(&mut buf)
337 340 .map_err(|_| RevlogError::corrupted())?;
338 341 Ok(buf)
339 342 } else {
340 343 let mut buf = vec![0; self.uncompressed_len];
341 344 decoder
342 345 .read_exact(&mut buf)
343 346 .map_err(|_| RevlogError::corrupted())?;
344 347 Ok(buf)
345 348 }
346 349 }
347 350
348 351 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
349 352 if self.is_delta() {
350 353 let mut buf = Vec::with_capacity(self.compressed_len);
351 354 zstd::stream::copy_decode(self.bytes, &mut buf)
352 355 .map_err(|_| RevlogError::corrupted())?;
353 356 Ok(buf)
354 357 } else {
355 358 let mut buf = vec![0; self.uncompressed_len];
356 359 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
357 360 .map_err(|_| RevlogError::corrupted())?;
358 361 if len != self.uncompressed_len {
359 362 Err(RevlogError::corrupted())
360 363 } else {
361 364 Ok(buf)
362 365 }
363 366 }
364 367 }
365 368
366 369 /// Tell if the entry is a snapshot or a delta
367 370 /// (influences on decompression).
368 371 fn is_delta(&self) -> bool {
369 372 self.base_rev.is_some()
370 373 }
371 374 }
372 375
373 376 /// Format version of the revlog.
374 pub fn get_version(index_bytes: &[u8]) -> u16 {
375 BigEndian::read_u16(&index_bytes[2..=3])
377 pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> {
378 if index_bytes.len() == 0 {
379 return Ok(1);
380 };
381 if index_bytes.len() < 4 {
382 return Err(HgError::corrupted(
383 "corrupted revlog: can't read the index format header",
384 ));
385 };
386 Ok(BigEndian::read_u16(&index_bytes[2..=3]))
376 387 }
377 388
378 389 /// Calculate the hash of a revision given its data and its parents.
379 390 fn hash(
380 391 data: &[u8],
381 392 p1_hash: &[u8],
382 393 p2_hash: &[u8],
383 394 ) -> [u8; NODE_BYTES_LENGTH] {
384 395 let mut hasher = Sha1::new();
385 396 let (a, b) = (p1_hash, p2_hash);
386 397 if a > b {
387 398 hasher.update(b);
388 399 hasher.update(a);
389 400 } else {
390 401 hasher.update(a);
391 402 hasher.update(b);
392 403 }
393 404 hasher.update(data);
394 405 *hasher.finalize().as_ref()
395 406 }
396 407
397 408 #[cfg(test)]
398 409 mod tests {
399 410 use super::*;
400 411
401 412 use super::super::index::IndexEntryBuilder;
402 413
403 414 #[test]
404 415 fn version_test() {
405 416 let bytes = IndexEntryBuilder::new()
406 417 .is_first(true)
407 418 .with_version(1)
408 419 .build();
409 420
410 421 assert_eq!(get_version(&bytes), 1)
411 422 }
412 423 }
General Comments 0
You need to be logged in to leave comments. Login now