##// END OF EJS Templates
rhg: fix a crash on non-generaldelta revlogs...
Arseniy Alekseyev -
r49289:96ea4db4 default
parent child Browse files
Show More
@@ -1,505 +1,524 b''
1 use std::convert::TryInto;
1 use std::convert::TryInto;
2 use std::ops::Deref;
2 use std::ops::Deref;
3
3
4 use byteorder::{BigEndian, ByteOrder};
4 use byteorder::{BigEndian, ByteOrder};
5
5
6 use crate::errors::HgError;
6 use crate::errors::HgError;
7 use crate::revlog::node::Node;
7 use crate::revlog::node::Node;
8 use crate::revlog::{Revision, NULL_REVISION};
8 use crate::revlog::{Revision, NULL_REVISION};
9
9
10 pub const INDEX_ENTRY_SIZE: usize = 64;
10 pub const INDEX_ENTRY_SIZE: usize = 64;
11
11
12 pub struct IndexHeader {
12 pub struct IndexHeader {
13 header_bytes: [u8; 4],
13 header_bytes: [u8; 4],
14 }
14 }
15
15
16 #[derive(Copy, Clone)]
16 #[derive(Copy, Clone)]
17 pub struct IndexHeaderFlags {
17 pub struct IndexHeaderFlags {
18 flags: u16,
18 flags: u16,
19 }
19 }
20
20
21 /// Corresponds to the high bits of `_format_flags` in python
21 /// Corresponds to the high bits of `_format_flags` in python
22 impl IndexHeaderFlags {
22 impl IndexHeaderFlags {
23 /// Corresponds to FLAG_INLINE_DATA in python
23 /// Corresponds to FLAG_INLINE_DATA in python
24 pub fn is_inline(self) -> bool {
24 pub fn is_inline(self) -> bool {
25 return self.flags & 1 != 0;
25 return self.flags & 1 != 0;
26 }
26 }
27 /// Corresponds to FLAG_GENERALDELTA in python
27 /// Corresponds to FLAG_GENERALDELTA in python
28 pub fn uses_generaldelta(self) -> bool {
28 pub fn uses_generaldelta(self) -> bool {
29 return self.flags & 2 != 0;
29 return self.flags & 2 != 0;
30 }
30 }
31 }
31 }
32
32
33 /// Corresponds to the INDEX_HEADER structure,
33 /// Corresponds to the INDEX_HEADER structure,
34 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
34 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
35 impl IndexHeader {
35 impl IndexHeader {
36 fn format_flags(&self) -> IndexHeaderFlags {
36 fn format_flags(&self) -> IndexHeaderFlags {
37 // No "unknown flags" check here, unlike in python. Maybe there should
37 // No "unknown flags" check here, unlike in python. Maybe there should
38 // be.
38 // be.
39 return IndexHeaderFlags {
39 return IndexHeaderFlags {
40 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
40 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
41 };
41 };
42 }
42 }
43
43
44 /// The only revlog version currently supported by rhg.
44 /// The only revlog version currently supported by rhg.
45 const REVLOGV1: u16 = 1;
45 const REVLOGV1: u16 = 1;
46
46
47 /// Corresponds to `_format_version` in Python.
47 /// Corresponds to `_format_version` in Python.
48 fn format_version(&self) -> u16 {
48 fn format_version(&self) -> u16 {
49 return BigEndian::read_u16(&self.header_bytes[2..4]);
49 return BigEndian::read_u16(&self.header_bytes[2..4]);
50 }
50 }
51
51
52 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
52 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
53 // We treat an empty file as a valid index with no entries.
53 // We treat an empty file as a valid index with no entries.
54 // Here we make an arbitrary choice of what we assume the format of the
54 // Here we make an arbitrary choice of what we assume the format of the
55 // index to be (V1, using generaldelta).
55 // index to be (V1, using generaldelta).
56 // This doesn't matter too much, since we're only doing read-only
56 // This doesn't matter too much, since we're only doing read-only
57 // access. but the value corresponds to the `new_header` variable in
57 // access. but the value corresponds to the `new_header` variable in
58 // `revlog.py`, `_loadindex`
58 // `revlog.py`, `_loadindex`
59 header_bytes: [0, 3, 0, 1],
59 header_bytes: [0, 3, 0, 1],
60 };
60 };
61
61
62 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
62 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
63 if index_bytes.len() == 0 {
63 if index_bytes.len() == 0 {
64 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
64 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
65 }
65 }
66 if index_bytes.len() < 4 {
66 if index_bytes.len() < 4 {
67 return Err(HgError::corrupted(
67 return Err(HgError::corrupted(
68 "corrupted revlog: can't read the index format header",
68 "corrupted revlog: can't read the index format header",
69 ));
69 ));
70 }
70 }
71 return Ok(IndexHeader {
71 return Ok(IndexHeader {
72 header_bytes: {
72 header_bytes: {
73 let bytes: [u8; 4] =
73 let bytes: [u8; 4] =
74 index_bytes[0..4].try_into().expect("impossible");
74 index_bytes[0..4].try_into().expect("impossible");
75 bytes
75 bytes
76 },
76 },
77 });
77 });
78 }
78 }
79 }
79 }
80
80
81 /// A Revlog index
81 /// A Revlog index
82 pub struct Index {
82 pub struct Index {
83 bytes: Box<dyn Deref<Target = [u8]> + Send>,
83 bytes: Box<dyn Deref<Target = [u8]> + Send>,
84 /// Offsets of starts of index blocks.
84 /// Offsets of starts of index blocks.
85 /// Only needed when the index is interleaved with data.
85 /// Only needed when the index is interleaved with data.
86 offsets: Option<Vec<usize>>,
86 offsets: Option<Vec<usize>>,
87 uses_generaldelta: bool,
87 }
88 }
88
89
89 impl Index {
90 impl Index {
90 /// Create an index from bytes.
91 /// Create an index from bytes.
91 /// Calculate the start of each entry when is_inline is true.
92 /// Calculate the start of each entry when is_inline is true.
92 pub fn new(
93 pub fn new(
93 bytes: Box<dyn Deref<Target = [u8]> + Send>,
94 bytes: Box<dyn Deref<Target = [u8]> + Send>,
94 ) -> Result<Self, HgError> {
95 ) -> Result<Self, HgError> {
95 let header = IndexHeader::parse(bytes.as_ref())?;
96 let header = IndexHeader::parse(bytes.as_ref())?;
96
97
97 if header.format_version() != IndexHeader::REVLOGV1 {
98 if header.format_version() != IndexHeader::REVLOGV1 {
98 // A proper new version should have had a repo/store
99 // A proper new version should have had a repo/store
99 // requirement.
100 // requirement.
100 return Err(HgError::corrupted("unsupported revlog version"));
101 return Err(HgError::corrupted("unsupported revlog version"));
101 }
102 }
102
103
104 // This is only correct because we know version is REVLOGV1.
105 // In v2 we always use generaldelta, while in v0 we never use
106 // generaldelta. Similar for [is_inline] (it's only used in v1).
107 let uses_generaldelta = header.format_flags().uses_generaldelta();
108
103 if header.format_flags().is_inline() {
109 if header.format_flags().is_inline() {
104 let mut offset: usize = 0;
110 let mut offset: usize = 0;
105 let mut offsets = Vec::new();
111 let mut offsets = Vec::new();
106
112
107 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
113 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
108 offsets.push(offset);
114 offsets.push(offset);
109 let end = offset + INDEX_ENTRY_SIZE;
115 let end = offset + INDEX_ENTRY_SIZE;
110 let entry = IndexEntry {
116 let entry = IndexEntry {
111 bytes: &bytes[offset..end],
117 bytes: &bytes[offset..end],
112 offset_override: None,
118 offset_override: None,
113 };
119 };
114
120
115 offset += INDEX_ENTRY_SIZE + entry.compressed_len();
121 offset += INDEX_ENTRY_SIZE + entry.compressed_len();
116 }
122 }
117
123
118 if offset == bytes.len() {
124 if offset == bytes.len() {
119 Ok(Self {
125 Ok(Self {
120 bytes,
126 bytes,
121 offsets: Some(offsets),
127 offsets: Some(offsets),
128 uses_generaldelta,
122 })
129 })
123 } else {
130 } else {
124 Err(HgError::corrupted("unexpected inline revlog length")
131 Err(HgError::corrupted("unexpected inline revlog length")
125 .into())
132 .into())
126 }
133 }
127 } else {
134 } else {
128 Ok(Self {
135 Ok(Self {
129 bytes,
136 bytes,
130 offsets: None,
137 offsets: None,
138 uses_generaldelta,
131 })
139 })
132 }
140 }
133 }
141 }
134
142
143 pub fn uses_generaldelta(&self) -> bool {
144 self.uses_generaldelta
145 }
146
135 /// Value of the inline flag.
147 /// Value of the inline flag.
136 pub fn is_inline(&self) -> bool {
148 pub fn is_inline(&self) -> bool {
137 self.offsets.is_some()
149 self.offsets.is_some()
138 }
150 }
139
151
140 /// Return a slice of bytes if `revlog` is inline. Panic if not.
152 /// Return a slice of bytes if `revlog` is inline. Panic if not.
141 pub fn data(&self, start: usize, end: usize) -> &[u8] {
153 pub fn data(&self, start: usize, end: usize) -> &[u8] {
142 if !self.is_inline() {
154 if !self.is_inline() {
143 panic!("tried to access data in the index of a revlog that is not inline");
155 panic!("tried to access data in the index of a revlog that is not inline");
144 }
156 }
145 &self.bytes[start..end]
157 &self.bytes[start..end]
146 }
158 }
147
159
148 /// Return number of entries of the revlog index.
160 /// Return number of entries of the revlog index.
149 pub fn len(&self) -> usize {
161 pub fn len(&self) -> usize {
150 if let Some(offsets) = &self.offsets {
162 if let Some(offsets) = &self.offsets {
151 offsets.len()
163 offsets.len()
152 } else {
164 } else {
153 self.bytes.len() / INDEX_ENTRY_SIZE
165 self.bytes.len() / INDEX_ENTRY_SIZE
154 }
166 }
155 }
167 }
156
168
157 /// Returns `true` if the `Index` has zero `entries`.
169 /// Returns `true` if the `Index` has zero `entries`.
158 pub fn is_empty(&self) -> bool {
170 pub fn is_empty(&self) -> bool {
159 self.len() == 0
171 self.len() == 0
160 }
172 }
161
173
162 /// Return the index entry corresponding to the given revision if it
174 /// Return the index entry corresponding to the given revision if it
163 /// exists.
175 /// exists.
164 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
176 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
165 if rev == NULL_REVISION {
177 if rev == NULL_REVISION {
166 return None;
178 return None;
167 }
179 }
168 if let Some(offsets) = &self.offsets {
180 if let Some(offsets) = &self.offsets {
169 self.get_entry_inline(rev, offsets)
181 self.get_entry_inline(rev, offsets)
170 } else {
182 } else {
171 self.get_entry_separated(rev)
183 self.get_entry_separated(rev)
172 }
184 }
173 }
185 }
174
186
175 fn get_entry_inline(
187 fn get_entry_inline(
176 &self,
188 &self,
177 rev: Revision,
189 rev: Revision,
178 offsets: &[usize],
190 offsets: &[usize],
179 ) -> Option<IndexEntry> {
191 ) -> Option<IndexEntry> {
180 let start = *offsets.get(rev as usize)?;
192 let start = *offsets.get(rev as usize)?;
181 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
193 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
182 let bytes = &self.bytes[start..end];
194 let bytes = &self.bytes[start..end];
183
195
184 // See IndexEntry for an explanation of this override.
196 // See IndexEntry for an explanation of this override.
185 let offset_override = Some(end);
197 let offset_override = Some(end);
186
198
187 Some(IndexEntry {
199 Some(IndexEntry {
188 bytes,
200 bytes,
189 offset_override,
201 offset_override,
190 })
202 })
191 }
203 }
192
204
193 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
205 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
194 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
206 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
195 if rev as usize >= max_rev {
207 if rev as usize >= max_rev {
196 return None;
208 return None;
197 }
209 }
198 let start = rev as usize * INDEX_ENTRY_SIZE;
210 let start = rev as usize * INDEX_ENTRY_SIZE;
199 let end = start + INDEX_ENTRY_SIZE;
211 let end = start + INDEX_ENTRY_SIZE;
200 let bytes = &self.bytes[start..end];
212 let bytes = &self.bytes[start..end];
201
213
202 // Override the offset of the first revision as its bytes are used
214 // Override the offset of the first revision as its bytes are used
203 // for the index's metadata (saving space because it is always 0)
215 // for the index's metadata (saving space because it is always 0)
204 let offset_override = if rev == 0 { Some(0) } else { None };
216 let offset_override = if rev == 0 { Some(0) } else { None };
205
217
206 Some(IndexEntry {
218 Some(IndexEntry {
207 bytes,
219 bytes,
208 offset_override,
220 offset_override,
209 })
221 })
210 }
222 }
211 }
223 }
212
224
213 impl super::RevlogIndex for Index {
225 impl super::RevlogIndex for Index {
214 fn len(&self) -> usize {
226 fn len(&self) -> usize {
215 self.len()
227 self.len()
216 }
228 }
217
229
218 fn node(&self, rev: Revision) -> Option<&Node> {
230 fn node(&self, rev: Revision) -> Option<&Node> {
219 self.get_entry(rev).map(|entry| entry.hash())
231 self.get_entry(rev).map(|entry| entry.hash())
220 }
232 }
221 }
233 }
222
234
223 #[derive(Debug)]
235 #[derive(Debug)]
224 pub struct IndexEntry<'a> {
236 pub struct IndexEntry<'a> {
225 bytes: &'a [u8],
237 bytes: &'a [u8],
226 /// Allows to override the offset value of the entry.
238 /// Allows to override the offset value of the entry.
227 ///
239 ///
228 /// For interleaved index and data, the offset stored in the index
240 /// For interleaved index and data, the offset stored in the index
229 /// corresponds to the separated data offset.
241 /// corresponds to the separated data offset.
230 /// It has to be overridden with the actual offset in the interleaved
242 /// It has to be overridden with the actual offset in the interleaved
231 /// index which is just after the index block.
243 /// index which is just after the index block.
232 ///
244 ///
233 /// For separated index and data, the offset stored in the first index
245 /// For separated index and data, the offset stored in the first index
234 /// entry is mixed with the index headers.
246 /// entry is mixed with the index headers.
235 /// It has to be overridden with 0.
247 /// It has to be overridden with 0.
236 offset_override: Option<usize>,
248 offset_override: Option<usize>,
237 }
249 }
238
250
239 impl<'a> IndexEntry<'a> {
251 impl<'a> IndexEntry<'a> {
240 /// Return the offset of the data.
252 /// Return the offset of the data.
241 pub fn offset(&self) -> usize {
253 pub fn offset(&self) -> usize {
242 if let Some(offset_override) = self.offset_override {
254 if let Some(offset_override) = self.offset_override {
243 offset_override
255 offset_override
244 } else {
256 } else {
245 let mut bytes = [0; 8];
257 let mut bytes = [0; 8];
246 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
258 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
247 BigEndian::read_u64(&bytes[..]) as usize
259 BigEndian::read_u64(&bytes[..]) as usize
248 }
260 }
249 }
261 }
250
262
251 /// Return the compressed length of the data.
263 /// Return the compressed length of the data.
252 pub fn compressed_len(&self) -> usize {
264 pub fn compressed_len(&self) -> usize {
253 BigEndian::read_u32(&self.bytes[8..=11]) as usize
265 BigEndian::read_u32(&self.bytes[8..=11]) as usize
254 }
266 }
255
267
256 /// Return the uncompressed length of the data.
268 /// Return the uncompressed length of the data.
257 pub fn uncompressed_len(&self) -> usize {
269 pub fn uncompressed_len(&self) -> usize {
258 BigEndian::read_u32(&self.bytes[12..=15]) as usize
270 BigEndian::read_u32(&self.bytes[12..=15]) as usize
259 }
271 }
260
272
261 /// Return the revision upon which the data has been derived.
273 /// Return the revision upon which the data has been derived.
262 pub fn base_revision(&self) -> Revision {
274 pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
263 // TODO Maybe return an Option when base_revision == rev?
275 // TODO Maybe return an Option when base_revision == rev?
264 // Requires to add rev to IndexEntry
276 // Requires to add rev to IndexEntry
265
277
266 BigEndian::read_i32(&self.bytes[16..])
278 BigEndian::read_i32(&self.bytes[16..])
267 }
279 }
268
280
269 pub fn p1(&self) -> Revision {
281 pub fn p1(&self) -> Revision {
270 BigEndian::read_i32(&self.bytes[24..])
282 BigEndian::read_i32(&self.bytes[24..])
271 }
283 }
272
284
273 pub fn p2(&self) -> Revision {
285 pub fn p2(&self) -> Revision {
274 BigEndian::read_i32(&self.bytes[28..])
286 BigEndian::read_i32(&self.bytes[28..])
275 }
287 }
276
288
277 /// Return the hash of revision's full text.
289 /// Return the hash of revision's full text.
278 ///
290 ///
279 /// Currently, SHA-1 is used and only the first 20 bytes of this field
291 /// Currently, SHA-1 is used and only the first 20 bytes of this field
280 /// are used.
292 /// are used.
281 pub fn hash(&self) -> &'a Node {
293 pub fn hash(&self) -> &'a Node {
282 (&self.bytes[32..52]).try_into().unwrap()
294 (&self.bytes[32..52]).try_into().unwrap()
283 }
295 }
284 }
296 }
285
297
286 #[cfg(test)]
298 #[cfg(test)]
287 mod tests {
299 mod tests {
288 use super::*;
300 use super::*;
289
301
290 #[cfg(test)]
302 #[cfg(test)]
291 #[derive(Debug, Copy, Clone)]
303 #[derive(Debug, Copy, Clone)]
292 pub struct IndexEntryBuilder {
304 pub struct IndexEntryBuilder {
293 is_first: bool,
305 is_first: bool,
294 is_inline: bool,
306 is_inline: bool,
295 is_general_delta: bool,
307 is_general_delta: bool,
296 version: u16,
308 version: u16,
297 offset: usize,
309 offset: usize,
298 compressed_len: usize,
310 compressed_len: usize,
299 uncompressed_len: usize,
311 uncompressed_len: usize,
300 base_revision: Revision,
312 base_revision_or_base_of_delta_chain: Revision,
301 }
313 }
302
314
303 #[cfg(test)]
315 #[cfg(test)]
304 impl IndexEntryBuilder {
316 impl IndexEntryBuilder {
305 pub fn new() -> Self {
317 pub fn new() -> Self {
306 Self {
318 Self {
307 is_first: false,
319 is_first: false,
308 is_inline: false,
320 is_inline: false,
309 is_general_delta: true,
321 is_general_delta: true,
310 version: 2,
322 version: 2,
311 offset: 0,
323 offset: 0,
312 compressed_len: 0,
324 compressed_len: 0,
313 uncompressed_len: 0,
325 uncompressed_len: 0,
314 base_revision: 0,
326 base_revision_or_base_of_delta_chain: 0,
315 }
327 }
316 }
328 }
317
329
318 pub fn is_first(&mut self, value: bool) -> &mut Self {
330 pub fn is_first(&mut self, value: bool) -> &mut Self {
319 self.is_first = value;
331 self.is_first = value;
320 self
332 self
321 }
333 }
322
334
323 pub fn with_inline(&mut self, value: bool) -> &mut Self {
335 pub fn with_inline(&mut self, value: bool) -> &mut Self {
324 self.is_inline = value;
336 self.is_inline = value;
325 self
337 self
326 }
338 }
327
339
328 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
340 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
329 self.is_general_delta = value;
341 self.is_general_delta = value;
330 self
342 self
331 }
343 }
332
344
333 pub fn with_version(&mut self, value: u16) -> &mut Self {
345 pub fn with_version(&mut self, value: u16) -> &mut Self {
334 self.version = value;
346 self.version = value;
335 self
347 self
336 }
348 }
337
349
338 pub fn with_offset(&mut self, value: usize) -> &mut Self {
350 pub fn with_offset(&mut self, value: usize) -> &mut Self {
339 self.offset = value;
351 self.offset = value;
340 self
352 self
341 }
353 }
342
354
343 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
355 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
344 self.compressed_len = value;
356 self.compressed_len = value;
345 self
357 self
346 }
358 }
347
359
348 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
360 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
349 self.uncompressed_len = value;
361 self.uncompressed_len = value;
350 self
362 self
351 }
363 }
352
364
353 pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
365 pub fn with_base_revision_or_base_of_delta_chain(
354 self.base_revision = value;
366 &mut self,
367 value: Revision,
368 ) -> &mut Self {
369 self.base_revision_or_base_of_delta_chain = value;
355 self
370 self
356 }
371 }
357
372
358 pub fn build(&self) -> Vec<u8> {
373 pub fn build(&self) -> Vec<u8> {
359 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
374 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
360 if self.is_first {
375 if self.is_first {
361 bytes.extend(&match (self.is_general_delta, self.is_inline) {
376 bytes.extend(&match (self.is_general_delta, self.is_inline) {
362 (false, false) => [0u8, 0],
377 (false, false) => [0u8, 0],
363 (false, true) => [0u8, 1],
378 (false, true) => [0u8, 1],
364 (true, false) => [0u8, 2],
379 (true, false) => [0u8, 2],
365 (true, true) => [0u8, 3],
380 (true, true) => [0u8, 3],
366 });
381 });
367 bytes.extend(&self.version.to_be_bytes());
382 bytes.extend(&self.version.to_be_bytes());
368 // Remaining offset bytes.
383 // Remaining offset bytes.
369 bytes.extend(&[0u8; 2]);
384 bytes.extend(&[0u8; 2]);
370 } else {
385 } else {
371 // Offset stored on 48 bits (6 bytes)
386 // Offset stored on 48 bits (6 bytes)
372 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
387 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
373 }
388 }
374 bytes.extend(&[0u8; 2]); // Revision flags.
389 bytes.extend(&[0u8; 2]); // Revision flags.
375 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
390 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
376 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
391 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
377 bytes.extend(&self.base_revision.to_be_bytes());
392 bytes.extend(
393 &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
394 );
378 bytes
395 bytes
379 }
396 }
380 }
397 }
381
398
382 pub fn is_inline(index_bytes: &[u8]) -> bool {
399 pub fn is_inline(index_bytes: &[u8]) -> bool {
383 IndexHeader::parse(index_bytes)
400 IndexHeader::parse(index_bytes)
384 .expect("too short")
401 .expect("too short")
385 .format_flags()
402 .format_flags()
386 .is_inline()
403 .is_inline()
387 }
404 }
388
405
389 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
406 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
390 IndexHeader::parse(index_bytes)
407 IndexHeader::parse(index_bytes)
391 .expect("too short")
408 .expect("too short")
392 .format_flags()
409 .format_flags()
393 .uses_generaldelta()
410 .uses_generaldelta()
394 }
411 }
395
412
396 pub fn get_version(index_bytes: &[u8]) -> u16 {
413 pub fn get_version(index_bytes: &[u8]) -> u16 {
397 IndexHeader::parse(index_bytes)
414 IndexHeader::parse(index_bytes)
398 .expect("too short")
415 .expect("too short")
399 .format_version()
416 .format_version()
400 }
417 }
401
418
402 #[test]
419 #[test]
403 fn flags_when_no_inline_flag_test() {
420 fn flags_when_no_inline_flag_test() {
404 let bytes = IndexEntryBuilder::new()
421 let bytes = IndexEntryBuilder::new()
405 .is_first(true)
422 .is_first(true)
406 .with_general_delta(false)
423 .with_general_delta(false)
407 .with_inline(false)
424 .with_inline(false)
408 .build();
425 .build();
409
426
410 assert_eq!(is_inline(&bytes), false);
427 assert_eq!(is_inline(&bytes), false);
411 assert_eq!(uses_generaldelta(&bytes), false);
428 assert_eq!(uses_generaldelta(&bytes), false);
412 }
429 }
413
430
414 #[test]
431 #[test]
415 fn flags_when_inline_flag_test() {
432 fn flags_when_inline_flag_test() {
416 let bytes = IndexEntryBuilder::new()
433 let bytes = IndexEntryBuilder::new()
417 .is_first(true)
434 .is_first(true)
418 .with_general_delta(false)
435 .with_general_delta(false)
419 .with_inline(true)
436 .with_inline(true)
420 .build();
437 .build();
421
438
422 assert_eq!(is_inline(&bytes), true);
439 assert_eq!(is_inline(&bytes), true);
423 assert_eq!(uses_generaldelta(&bytes), false);
440 assert_eq!(uses_generaldelta(&bytes), false);
424 }
441 }
425
442
426 #[test]
443 #[test]
427 fn flags_when_inline_and_generaldelta_flags_test() {
444 fn flags_when_inline_and_generaldelta_flags_test() {
428 let bytes = IndexEntryBuilder::new()
445 let bytes = IndexEntryBuilder::new()
429 .is_first(true)
446 .is_first(true)
430 .with_general_delta(true)
447 .with_general_delta(true)
431 .with_inline(true)
448 .with_inline(true)
432 .build();
449 .build();
433
450
434 assert_eq!(is_inline(&bytes), true);
451 assert_eq!(is_inline(&bytes), true);
435 assert_eq!(uses_generaldelta(&bytes), true);
452 assert_eq!(uses_generaldelta(&bytes), true);
436 }
453 }
437
454
438 #[test]
455 #[test]
439 fn test_offset() {
456 fn test_offset() {
440 let bytes = IndexEntryBuilder::new().with_offset(1).build();
457 let bytes = IndexEntryBuilder::new().with_offset(1).build();
441 let entry = IndexEntry {
458 let entry = IndexEntry {
442 bytes: &bytes,
459 bytes: &bytes,
443 offset_override: None,
460 offset_override: None,
444 };
461 };
445
462
446 assert_eq!(entry.offset(), 1)
463 assert_eq!(entry.offset(), 1)
447 }
464 }
448
465
449 #[test]
466 #[test]
450 fn test_with_overridden_offset() {
467 fn test_with_overridden_offset() {
451 let bytes = IndexEntryBuilder::new().with_offset(1).build();
468 let bytes = IndexEntryBuilder::new().with_offset(1).build();
452 let entry = IndexEntry {
469 let entry = IndexEntry {
453 bytes: &bytes,
470 bytes: &bytes,
454 offset_override: Some(2),
471 offset_override: Some(2),
455 };
472 };
456
473
457 assert_eq!(entry.offset(), 2)
474 assert_eq!(entry.offset(), 2)
458 }
475 }
459
476
460 #[test]
477 #[test]
461 fn test_compressed_len() {
478 fn test_compressed_len() {
462 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
479 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
463 let entry = IndexEntry {
480 let entry = IndexEntry {
464 bytes: &bytes,
481 bytes: &bytes,
465 offset_override: None,
482 offset_override: None,
466 };
483 };
467
484
468 assert_eq!(entry.compressed_len(), 1)
485 assert_eq!(entry.compressed_len(), 1)
469 }
486 }
470
487
471 #[test]
488 #[test]
472 fn test_uncompressed_len() {
489 fn test_uncompressed_len() {
473 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
490 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
474 let entry = IndexEntry {
491 let entry = IndexEntry {
475 bytes: &bytes,
492 bytes: &bytes,
476 offset_override: None,
493 offset_override: None,
477 };
494 };
478
495
479 assert_eq!(entry.uncompressed_len(), 1)
496 assert_eq!(entry.uncompressed_len(), 1)
480 }
497 }
481
498
482 #[test]
499 #[test]
483 fn test_base_revision() {
500 fn test_base_revision_or_base_of_delta_chain() {
484 let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
501 let bytes = IndexEntryBuilder::new()
502 .with_base_revision_or_base_of_delta_chain(1)
503 .build();
485 let entry = IndexEntry {
504 let entry = IndexEntry {
486 bytes: &bytes,
505 bytes: &bytes,
487 offset_override: None,
506 offset_override: None,
488 };
507 };
489
508
490 assert_eq!(entry.base_revision(), 1)
509 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
491 }
510 }
492
511
493 #[test]
512 #[test]
494 fn version_test() {
513 fn version_test() {
495 let bytes = IndexEntryBuilder::new()
514 let bytes = IndexEntryBuilder::new()
496 .is_first(true)
515 .is_first(true)
497 .with_version(1)
516 .with_version(1)
498 .build();
517 .build();
499
518
500 assert_eq!(get_version(&bytes), 1)
519 assert_eq!(get_version(&bytes), 1)
501 }
520 }
502 }
521 }
503
522
504 #[cfg(test)]
523 #[cfg(test)]
505 pub use tests::IndexEntryBuilder;
524 pub use tests::IndexEntryBuilder;
@@ -1,399 +1,420 b''
1 use std::borrow::Cow;
1 use std::borrow::Cow;
2 use std::io::Read;
2 use std::io::Read;
3 use std::ops::Deref;
3 use std::ops::Deref;
4 use std::path::Path;
4 use std::path::Path;
5
5
6 use flate2::read::ZlibDecoder;
6 use flate2::read::ZlibDecoder;
7 use micro_timer::timed;
7 use micro_timer::timed;
8 use sha1::{Digest, Sha1};
8 use sha1::{Digest, Sha1};
9 use zstd;
9 use zstd;
10
10
11 use super::index::Index;
11 use super::index::Index;
12 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
12 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
13 use super::nodemap;
13 use super::nodemap;
14 use super::nodemap::{NodeMap, NodeMapError};
14 use super::nodemap::{NodeMap, NodeMapError};
15 use super::nodemap_docket::NodeMapDocket;
15 use super::nodemap_docket::NodeMapDocket;
16 use super::patch;
16 use super::patch;
17 use crate::errors::HgError;
17 use crate::errors::HgError;
18 use crate::repo::Repo;
18 use crate::repo::Repo;
19 use crate::revlog::Revision;
19 use crate::revlog::Revision;
20 use crate::{Node, NULL_REVISION};
20 use crate::{Node, NULL_REVISION};
21
21
22 #[derive(derive_more::From)]
22 #[derive(derive_more::From)]
23 pub enum RevlogError {
23 pub enum RevlogError {
24 InvalidRevision,
24 InvalidRevision,
25 /// Working directory is not supported
25 /// Working directory is not supported
26 WDirUnsupported,
26 WDirUnsupported,
27 /// Found more than one entry whose ID match the requested prefix
27 /// Found more than one entry whose ID match the requested prefix
28 AmbiguousPrefix,
28 AmbiguousPrefix,
29 #[from]
29 #[from]
30 Other(HgError),
30 Other(HgError),
31 }
31 }
32
32
33 impl From<NodeMapError> for RevlogError {
33 impl From<NodeMapError> for RevlogError {
34 fn from(error: NodeMapError) -> Self {
34 fn from(error: NodeMapError) -> Self {
35 match error {
35 match error {
36 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
36 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
37 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
37 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
38 }
38 }
39 }
39 }
40 }
40 }
41
41
42 impl RevlogError {
42 impl RevlogError {
43 fn corrupted() -> Self {
43 fn corrupted() -> Self {
44 RevlogError::Other(HgError::corrupted("corrupted revlog"))
44 RevlogError::Other(HgError::corrupted("corrupted revlog"))
45 }
45 }
46 }
46 }
47
47
48 /// Read only implementation of revlog.
48 /// Read only implementation of revlog.
49 pub struct Revlog {
49 pub struct Revlog {
50 /// When index and data are not interleaved: bytes of the revlog index.
50 /// When index and data are not interleaved: bytes of the revlog index.
51 /// When index and data are interleaved: bytes of the revlog index and
51 /// When index and data are interleaved: bytes of the revlog index and
52 /// data.
52 /// data.
53 index: Index,
53 index: Index,
54 /// When index and data are not interleaved: bytes of the revlog data
54 /// When index and data are not interleaved: bytes of the revlog data
55 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
55 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
56 /// When present on disk: the persistent nodemap for this revlog
56 /// When present on disk: the persistent nodemap for this revlog
57 nodemap: Option<nodemap::NodeTree>,
57 nodemap: Option<nodemap::NodeTree>,
58 }
58 }
59
59
60 impl Revlog {
60 impl Revlog {
61 /// Open a revlog index file.
61 /// Open a revlog index file.
62 ///
62 ///
63 /// It will also open the associated data file if index and data are not
63 /// It will also open the associated data file if index and data are not
64 /// interleaved.
64 /// interleaved.
65 #[timed]
65 #[timed]
66 pub fn open(
66 pub fn open(
67 repo: &Repo,
67 repo: &Repo,
68 index_path: impl AsRef<Path>,
68 index_path: impl AsRef<Path>,
69 data_path: Option<&Path>,
69 data_path: Option<&Path>,
70 ) -> Result<Self, HgError> {
70 ) -> Result<Self, HgError> {
71 let index_path = index_path.as_ref();
71 let index_path = index_path.as_ref();
72 let index = {
72 let index = {
73 match repo.store_vfs().mmap_open_opt(&index_path)? {
73 match repo.store_vfs().mmap_open_opt(&index_path)? {
74 None => Index::new(Box::new(vec![])),
74 None => Index::new(Box::new(vec![])),
75 Some(index_mmap) => {
75 Some(index_mmap) => {
76 let index = Index::new(Box::new(index_mmap))?;
76 let index = Index::new(Box::new(index_mmap))?;
77 Ok(index)
77 Ok(index)
78 }
78 }
79 }
79 }
80 }?;
80 }?;
81
81
82 let default_data_path = index_path.with_extension("d");
82 let default_data_path = index_path.with_extension("d");
83
83
84 // type annotation required
84 // type annotation required
85 // won't recognize Mmap as Deref<Target = [u8]>
85 // won't recognize Mmap as Deref<Target = [u8]>
86 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
86 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
87 if index.is_inline() {
87 if index.is_inline() {
88 None
88 None
89 } else {
89 } else {
90 let data_path = data_path.unwrap_or(&default_data_path);
90 let data_path = data_path.unwrap_or(&default_data_path);
91 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
91 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
92 Some(Box::new(data_mmap))
92 Some(Box::new(data_mmap))
93 };
93 };
94
94
95 let nodemap = if index.is_inline() {
95 let nodemap = if index.is_inline() {
96 None
96 None
97 } else {
97 } else {
98 NodeMapDocket::read_from_file(repo, index_path)?.map(
98 NodeMapDocket::read_from_file(repo, index_path)?.map(
99 |(docket, data)| {
99 |(docket, data)| {
100 nodemap::NodeTree::load_bytes(
100 nodemap::NodeTree::load_bytes(
101 Box::new(data),
101 Box::new(data),
102 docket.data_length,
102 docket.data_length,
103 )
103 )
104 },
104 },
105 )
105 )
106 };
106 };
107
107
108 Ok(Revlog {
108 Ok(Revlog {
109 index,
109 index,
110 data_bytes,
110 data_bytes,
111 nodemap,
111 nodemap,
112 })
112 })
113 }
113 }
114
114
115 /// Return number of entries of the `Revlog`.
115 /// Return number of entries of the `Revlog`.
116 pub fn len(&self) -> usize {
116 pub fn len(&self) -> usize {
117 self.index.len()
117 self.index.len()
118 }
118 }
119
119
120 /// Returns `true` if the `Revlog` has zero `entries`.
120 /// Returns `true` if the `Revlog` has zero `entries`.
121 pub fn is_empty(&self) -> bool {
121 pub fn is_empty(&self) -> bool {
122 self.index.is_empty()
122 self.index.is_empty()
123 }
123 }
124
124
125 /// Returns the node ID for the given revision number, if it exists in this
125 /// Returns the node ID for the given revision number, if it exists in this
126 /// revlog
126 /// revlog
127 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
127 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
128 if rev == NULL_REVISION {
128 if rev == NULL_REVISION {
129 return Some(&NULL_NODE);
129 return Some(&NULL_NODE);
130 }
130 }
131 Some(self.index.get_entry(rev)?.hash())
131 Some(self.index.get_entry(rev)?.hash())
132 }
132 }
133
133
134 /// Return the revision number for the given node ID, if it exists in this
134 /// Return the revision number for the given node ID, if it exists in this
135 /// revlog
135 /// revlog
136 #[timed]
136 #[timed]
137 pub fn rev_from_node(
137 pub fn rev_from_node(
138 &self,
138 &self,
139 node: NodePrefix,
139 node: NodePrefix,
140 ) -> Result<Revision, RevlogError> {
140 ) -> Result<Revision, RevlogError> {
141 if node.is_prefix_of(&NULL_NODE) {
141 if node.is_prefix_of(&NULL_NODE) {
142 return Ok(NULL_REVISION);
142 return Ok(NULL_REVISION);
143 }
143 }
144
144
145 if let Some(nodemap) = &self.nodemap {
145 if let Some(nodemap) = &self.nodemap {
146 return nodemap
146 return nodemap
147 .find_bin(&self.index, node)?
147 .find_bin(&self.index, node)?
148 .ok_or(RevlogError::InvalidRevision);
148 .ok_or(RevlogError::InvalidRevision);
149 }
149 }
150
150
151 // Fallback to linear scan when a persistent nodemap is not present.
151 // Fallback to linear scan when a persistent nodemap is not present.
152 // This happens when the persistent-nodemap experimental feature is not
152 // This happens when the persistent-nodemap experimental feature is not
153 // enabled, or for small revlogs.
153 // enabled, or for small revlogs.
154 //
154 //
155 // TODO: consider building a non-persistent nodemap in memory to
155 // TODO: consider building a non-persistent nodemap in memory to
156 // optimize these cases.
156 // optimize these cases.
157 let mut found_by_prefix = None;
157 let mut found_by_prefix = None;
158 for rev in (0..self.len() as Revision).rev() {
158 for rev in (0..self.len() as Revision).rev() {
159 let index_entry =
159 let index_entry =
160 self.index.get_entry(rev).ok_or(HgError::corrupted(
160 self.index.get_entry(rev).ok_or(HgError::corrupted(
161 "revlog references a revision not in the index",
161 "revlog references a revision not in the index",
162 ))?;
162 ))?;
163 if node == *index_entry.hash() {
163 if node == *index_entry.hash() {
164 return Ok(rev);
164 return Ok(rev);
165 }
165 }
166 if node.is_prefix_of(index_entry.hash()) {
166 if node.is_prefix_of(index_entry.hash()) {
167 if found_by_prefix.is_some() {
167 if found_by_prefix.is_some() {
168 return Err(RevlogError::AmbiguousPrefix);
168 return Err(RevlogError::AmbiguousPrefix);
169 }
169 }
170 found_by_prefix = Some(rev)
170 found_by_prefix = Some(rev)
171 }
171 }
172 }
172 }
173 found_by_prefix.ok_or(RevlogError::InvalidRevision)
173 found_by_prefix.ok_or(RevlogError::InvalidRevision)
174 }
174 }
175
175
176 /// Returns whether the given revision exists in this revlog.
176 /// Returns whether the given revision exists in this revlog.
177 pub fn has_rev(&self, rev: Revision) -> bool {
177 pub fn has_rev(&self, rev: Revision) -> bool {
178 self.index.get_entry(rev).is_some()
178 self.index.get_entry(rev).is_some()
179 }
179 }
180
180
181 /// Return the full data associated to a revision.
181 /// Return the full data associated to a revision.
182 ///
182 ///
183 /// All entries required to build the final data out of deltas will be
183 /// All entries required to build the final data out of deltas will be
184 /// retrieved as needed, and the deltas will be applied to the inital
184 /// retrieved as needed, and the deltas will be applied to the inital
185 /// snapshot to rebuild the final data.
185 /// snapshot to rebuild the final data.
186 #[timed]
186 #[timed]
187 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
187 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
188 if rev == NULL_REVISION {
188 if rev == NULL_REVISION {
189 return Ok(vec![]);
189 return Ok(vec![]);
190 };
190 };
191 // Todo return -> Cow
191 // Todo return -> Cow
192 let mut entry = self.get_entry(rev)?;
192 let mut entry = self.get_entry(rev)?;
193 let mut delta_chain = vec![];
193 let mut delta_chain = vec![];
194 while let Some(base_rev) = entry.base_rev {
194
195 // The meaning of `base_rev_or_base_of_delta_chain` depends on
196 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
197 // `mercurial/revlogutils/constants.py` and the code in
198 // [_chaininfo] and in [index_deltachain].
199 let uses_generaldelta = self.index.uses_generaldelta();
200 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
201 let base_rev = if uses_generaldelta {
202 base_rev
203 } else {
204 entry.rev - 1
205 };
195 delta_chain.push(entry);
206 delta_chain.push(entry);
196 entry = self
207 entry = self.get_entry_internal(base_rev)?;
197 .get_entry(base_rev)
198 .map_err(|_| RevlogError::corrupted())?;
199 }
208 }
200
209
201 // TODO do not look twice in the index
210 // TODO do not look twice in the index
202 let index_entry = self
211 let index_entry = self
203 .index
212 .index
204 .get_entry(rev)
213 .get_entry(rev)
205 .ok_or(RevlogError::InvalidRevision)?;
214 .ok_or(RevlogError::InvalidRevision)?;
206
215
207 let data: Vec<u8> = if delta_chain.is_empty() {
216 let data: Vec<u8> = if delta_chain.is_empty() {
208 entry.data()?.into()
217 entry.data()?.into()
209 } else {
218 } else {
210 Revlog::build_data_from_deltas(entry, &delta_chain)?
219 Revlog::build_data_from_deltas(entry, &delta_chain)?
211 };
220 };
212
221
213 if self.check_hash(
222 if self.check_hash(
214 index_entry.p1(),
223 index_entry.p1(),
215 index_entry.p2(),
224 index_entry.p2(),
216 index_entry.hash().as_bytes(),
225 index_entry.hash().as_bytes(),
217 &data,
226 &data,
218 ) {
227 ) {
219 Ok(data)
228 Ok(data)
220 } else {
229 } else {
221 Err(RevlogError::corrupted())
230 Err(RevlogError::corrupted())
222 }
231 }
223 }
232 }
224
233
225 /// Check the hash of some given data against the recorded hash.
234 /// Check the hash of some given data against the recorded hash.
226 pub fn check_hash(
235 pub fn check_hash(
227 &self,
236 &self,
228 p1: Revision,
237 p1: Revision,
229 p2: Revision,
238 p2: Revision,
230 expected: &[u8],
239 expected: &[u8],
231 data: &[u8],
240 data: &[u8],
232 ) -> bool {
241 ) -> bool {
233 let e1 = self.index.get_entry(p1);
242 let e1 = self.index.get_entry(p1);
234 let h1 = match e1 {
243 let h1 = match e1 {
235 Some(ref entry) => entry.hash(),
244 Some(ref entry) => entry.hash(),
236 None => &NULL_NODE,
245 None => &NULL_NODE,
237 };
246 };
238 let e2 = self.index.get_entry(p2);
247 let e2 = self.index.get_entry(p2);
239 let h2 = match e2 {
248 let h2 = match e2 {
240 Some(ref entry) => entry.hash(),
249 Some(ref entry) => entry.hash(),
241 None => &NULL_NODE,
250 None => &NULL_NODE,
242 };
251 };
243
252
244 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
253 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
245 }
254 }
246
255
247 /// Build the full data of a revision out its snapshot
256 /// Build the full data of a revision out its snapshot
248 /// and its deltas.
257 /// and its deltas.
249 #[timed]
258 #[timed]
250 fn build_data_from_deltas(
259 fn build_data_from_deltas(
251 snapshot: RevlogEntry,
260 snapshot: RevlogEntry,
252 deltas: &[RevlogEntry],
261 deltas: &[RevlogEntry],
253 ) -> Result<Vec<u8>, RevlogError> {
262 ) -> Result<Vec<u8>, RevlogError> {
254 let snapshot = snapshot.data()?;
263 let snapshot = snapshot.data()?;
255 let deltas = deltas
264 let deltas = deltas
256 .iter()
265 .iter()
257 .rev()
266 .rev()
258 .map(RevlogEntry::data)
267 .map(RevlogEntry::data)
259 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
268 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
260 let patches: Vec<_> =
269 let patches: Vec<_> =
261 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
270 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
262 let patch = patch::fold_patch_lists(&patches);
271 let patch = patch::fold_patch_lists(&patches);
263 Ok(patch.apply(&snapshot))
272 Ok(patch.apply(&snapshot))
264 }
273 }
265
274
266 /// Return the revlog data.
275 /// Return the revlog data.
267 fn data(&self) -> &[u8] {
276 fn data(&self) -> &[u8] {
268 match self.data_bytes {
277 match self.data_bytes {
269 Some(ref data_bytes) => &data_bytes,
278 Some(ref data_bytes) => &data_bytes,
270 None => panic!(
279 None => panic!(
271 "forgot to load the data or trying to access inline data"
280 "forgot to load the data or trying to access inline data"
272 ),
281 ),
273 }
282 }
274 }
283 }
275
284
276 /// Get an entry of the revlog.
285 /// Get an entry of the revlog.
277 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
286 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
278 let index_entry = self
287 let index_entry = self
279 .index
288 .index
280 .get_entry(rev)
289 .get_entry(rev)
281 .ok_or(RevlogError::InvalidRevision)?;
290 .ok_or(RevlogError::InvalidRevision)?;
282 let start = index_entry.offset();
291 let start = index_entry.offset();
283 let end = start + index_entry.compressed_len();
292 let end = start + index_entry.compressed_len();
284 let data = if self.index.is_inline() {
293 let data = if self.index.is_inline() {
285 self.index.data(start, end)
294 self.index.data(start, end)
286 } else {
295 } else {
287 &self.data()[start..end]
296 &self.data()[start..end]
288 };
297 };
289 let entry = RevlogEntry {
298 let entry = RevlogEntry {
290 rev,
299 rev,
291 bytes: data,
300 bytes: data,
292 compressed_len: index_entry.compressed_len(),
301 compressed_len: index_entry.compressed_len(),
293 uncompressed_len: index_entry.uncompressed_len(),
302 uncompressed_len: index_entry.uncompressed_len(),
294 base_rev: if index_entry.base_revision() == rev {
303 base_rev_or_base_of_delta_chain: if index_entry
304 .base_revision_or_base_of_delta_chain()
305 == rev
306 {
295 None
307 None
296 } else {
308 } else {
297 Some(index_entry.base_revision())
309 Some(index_entry.base_revision_or_base_of_delta_chain())
298 },
310 },
299 };
311 };
300 Ok(entry)
312 Ok(entry)
301 }
313 }
314
315 /// when resolving internal references within revlog, any errors
316 /// should be reported as corruption, instead of e.g. "invalid revision"
317 fn get_entry_internal(
318 &self,
319 rev: Revision,
320 ) -> Result<RevlogEntry, RevlogError> {
321 return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
322 }
302 }
323 }
303
324
304 /// The revlog entry's bytes and the necessary informations to extract
325 /// The revlog entry's bytes and the necessary informations to extract
305 /// the entry's data.
326 /// the entry's data.
306 #[derive(Debug)]
327 #[derive(Debug)]
307 pub struct RevlogEntry<'a> {
328 pub struct RevlogEntry<'a> {
308 rev: Revision,
329 rev: Revision,
309 bytes: &'a [u8],
330 bytes: &'a [u8],
310 compressed_len: usize,
331 compressed_len: usize,
311 uncompressed_len: usize,
332 uncompressed_len: usize,
312 base_rev: Option<Revision>,
333 base_rev_or_base_of_delta_chain: Option<Revision>,
313 }
334 }
314
335
315 impl<'a> RevlogEntry<'a> {
336 impl<'a> RevlogEntry<'a> {
316 pub fn revision(&self) -> Revision {
337 pub fn revision(&self) -> Revision {
317 self.rev
338 self.rev
318 }
339 }
319
340
320 /// Extract the data contained in the entry.
341 /// Extract the data contained in the entry.
321 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
342 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
322 if self.bytes.is_empty() {
343 if self.bytes.is_empty() {
323 return Ok(Cow::Borrowed(&[]));
344 return Ok(Cow::Borrowed(&[]));
324 }
345 }
325 match self.bytes[0] {
346 match self.bytes[0] {
326 // Revision data is the entirety of the entry, including this
347 // Revision data is the entirety of the entry, including this
327 // header.
348 // header.
328 b'\0' => Ok(Cow::Borrowed(self.bytes)),
349 b'\0' => Ok(Cow::Borrowed(self.bytes)),
329 // Raw revision data follows.
350 // Raw revision data follows.
330 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
351 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
331 // zlib (RFC 1950) data.
352 // zlib (RFC 1950) data.
332 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
353 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
333 // zstd data.
354 // zstd data.
334 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
355 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
335 // A proper new format should have had a repo/store requirement.
356 // A proper new format should have had a repo/store requirement.
336 _format_type => Err(RevlogError::corrupted()),
357 _format_type => Err(RevlogError::corrupted()),
337 }
358 }
338 }
359 }
339
360
340 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
361 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
341 let mut decoder = ZlibDecoder::new(self.bytes);
362 let mut decoder = ZlibDecoder::new(self.bytes);
342 if self.is_delta() {
363 if self.is_delta() {
343 let mut buf = Vec::with_capacity(self.compressed_len);
364 let mut buf = Vec::with_capacity(self.compressed_len);
344 decoder
365 decoder
345 .read_to_end(&mut buf)
366 .read_to_end(&mut buf)
346 .map_err(|_| RevlogError::corrupted())?;
367 .map_err(|_| RevlogError::corrupted())?;
347 Ok(buf)
368 Ok(buf)
348 } else {
369 } else {
349 let mut buf = vec![0; self.uncompressed_len];
370 let mut buf = vec![0; self.uncompressed_len];
350 decoder
371 decoder
351 .read_exact(&mut buf)
372 .read_exact(&mut buf)
352 .map_err(|_| RevlogError::corrupted())?;
373 .map_err(|_| RevlogError::corrupted())?;
353 Ok(buf)
374 Ok(buf)
354 }
375 }
355 }
376 }
356
377
357 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
378 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
358 if self.is_delta() {
379 if self.is_delta() {
359 let mut buf = Vec::with_capacity(self.compressed_len);
380 let mut buf = Vec::with_capacity(self.compressed_len);
360 zstd::stream::copy_decode(self.bytes, &mut buf)
381 zstd::stream::copy_decode(self.bytes, &mut buf)
361 .map_err(|_| RevlogError::corrupted())?;
382 .map_err(|_| RevlogError::corrupted())?;
362 Ok(buf)
383 Ok(buf)
363 } else {
384 } else {
364 let mut buf = vec![0; self.uncompressed_len];
385 let mut buf = vec![0; self.uncompressed_len];
365 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
386 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
366 .map_err(|_| RevlogError::corrupted())?;
387 .map_err(|_| RevlogError::corrupted())?;
367 if len != self.uncompressed_len {
388 if len != self.uncompressed_len {
368 Err(RevlogError::corrupted())
389 Err(RevlogError::corrupted())
369 } else {
390 } else {
370 Ok(buf)
391 Ok(buf)
371 }
392 }
372 }
393 }
373 }
394 }
374
395
375 /// Tell if the entry is a snapshot or a delta
396 /// Tell if the entry is a snapshot or a delta
376 /// (influences on decompression).
397 /// (influences on decompression).
377 fn is_delta(&self) -> bool {
398 fn is_delta(&self) -> bool {
378 self.base_rev.is_some()
399 self.base_rev_or_base_of_delta_chain.is_some()
379 }
400 }
380 }
401 }
381
402
382 /// Calculate the hash of a revision given its data and its parents.
403 /// Calculate the hash of a revision given its data and its parents.
383 fn hash(
404 fn hash(
384 data: &[u8],
405 data: &[u8],
385 p1_hash: &[u8],
406 p1_hash: &[u8],
386 p2_hash: &[u8],
407 p2_hash: &[u8],
387 ) -> [u8; NODE_BYTES_LENGTH] {
408 ) -> [u8; NODE_BYTES_LENGTH] {
388 let mut hasher = Sha1::new();
409 let mut hasher = Sha1::new();
389 let (a, b) = (p1_hash, p2_hash);
410 let (a, b) = (p1_hash, p2_hash);
390 if a > b {
411 if a > b {
391 hasher.update(b);
412 hasher.update(b);
392 hasher.update(a);
413 hasher.update(a);
393 } else {
414 } else {
394 hasher.update(a);
415 hasher.update(a);
395 hasher.update(b);
416 hasher.update(b);
396 }
417 }
397 hasher.update(data);
418 hasher.update(data);
398 *hasher.finalize().as_ref()
419 *hasher.finalize().as_ref()
399 }
420 }
@@ -1,29 +1,46 b''
1
2 $ NO_FALLBACK="env RHG_ON_UNSUPPORTED=abort"
1 $ NO_FALLBACK="env RHG_ON_UNSUPPORTED=abort"
3
2
4 $ cat << EOF >> $HGRCPATH
3 $ cat << EOF >> $HGRCPATH
5 > [format]
4 > [format]
6 > sparse-revlog = no
5 > sparse-revlog = no
7 > EOF
6 > EOF
8
7
9 $ hg init repo --config format.generaldelta=no --config format.usegeneraldelta=no
8 $ hg init repo --config format.generaldelta=no --config format.usegeneraldelta=no
10 $ cd repo
9 $ cd repo
11 $ (echo header; seq.py 20) > f
10 $ (echo header; seq.py 20) > f
12 $ hg commit -q -Am initial
11 $ hg commit -q -Am initial
13 $ (echo header; seq.py 20; echo footer) > f
12 $ (echo header; seq.py 20; echo footer) > f
14 $ hg commit -q -Am x
13 $ hg commit -q -Am x
15 $ hg update ".^"
14 $ hg update ".^"
16 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
15 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
17 $ (seq.py 20; echo footer) > f
16 $ (seq.py 20; echo footer) > f
18 $ hg commit -q -Am y
17 $ hg commit -q -Am y
19 $ hg debugdeltachain f --template '{rev} {prevrev} {deltatype}\n'
18 $ hg debugdeltachain f --template '{rev} {prevrev} {deltatype}\n'
20 0 -1 base
19 0 -1 base
21 1 0 prev
20 1 0 prev
22 2 1 prev
21 2 1 prev
23
22
24 rhg breaks on non-generaldelta revlogs:
23 rhg works on non-generaldelta revlogs:
25
24
26 $ $NO_FALLBACK hg cat f -r . | f --sha256 --size
25 $ $NO_FALLBACK hg cat f -r .
27 abort: corrupted revlog (rhg !)
26 1
28 size=0, sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 (rhg !)
27 2
29 size=58, sha256=0cf0386dd4813cc3b957ea790146627dfc0ec42ad3fcf47221b9842e4d5764c1 (no-rhg !)
28 3
29 4
30 5
31 6
32 7
33 8
34 9
35 10
36 11
37 12
38 13
39 14
40 15
41 16
42 17
43 18
44 19
45 20
46 footer
General Comments 0
You need to be logged in to leave comments. Login now