##// END OF EJS Templates
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
Antoine cezar -
r46175:900b9b79 default
parent child Browse files
Show More
@@ -1,328 +1,383 b''
1 use std::ops::Deref;
2
1 use byteorder::{BigEndian, ByteOrder};
3 use byteorder::{BigEndian, ByteOrder};
2
4
3 use crate::revlog::{Revision, NULL_REVISION};
5 use crate::revlog::{Revision, NULL_REVISION};
4
6
5 pub const INDEX_ENTRY_SIZE: usize = 64;
7 pub const INDEX_ENTRY_SIZE: usize = 64;
6
8
7 /// A Revlog index
9 /// A Revlog index
8 #[derive(Debug)]
10 pub struct Index {
9 pub struct Index<'a> {
11 bytes: Box<dyn Deref<Target = [u8]> + Send>,
10 bytes: &'a [u8],
11 /// Offsets of starts of index blocks.
12 /// Offsets of starts of index blocks.
12 /// Only needed when the index is interleaved with data.
13 /// Only needed when the index is interleaved with data.
13 offsets: Option<Vec<usize>>,
14 offsets: Option<Vec<usize>>,
14 }
15 }
15
16
16 impl<'a> Index<'a> {
17 impl Index {
17 /// Create an index from bytes.
18 /// Create an index from bytes.
18 /// Calculate the start of each entry when is_inline is true.
19 /// Calculate the start of each entry when is_inline is true.
19 pub fn new(bytes: &'a [u8], is_inline: bool) -> Self {
20 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
20 if is_inline {
21 if is_inline(&bytes) {
21 let mut offset: usize = 0;
22 let mut offset: usize = 0;
22 let mut offsets = Vec::new();
23 let mut offsets = Vec::new();
23
24
24 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
25 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
25 offsets.push(offset);
26 offsets.push(offset);
26 let end = offset + INDEX_ENTRY_SIZE;
27 let end = offset + INDEX_ENTRY_SIZE;
27 let entry = IndexEntry {
28 let entry = IndexEntry {
28 bytes: &bytes[offset..end],
29 bytes: &bytes[offset..end],
29 offset_override: None,
30 offset_override: None,
30 };
31 };
31
32
32 offset += INDEX_ENTRY_SIZE + entry.compressed_len();
33 offset += INDEX_ENTRY_SIZE + entry.compressed_len();
33 }
34 }
34
35
35 Self {
36 Self {
36 bytes,
37 bytes,
37 offsets: Some(offsets),
38 offsets: Some(offsets),
38 }
39 }
39 } else {
40 } else {
40 Self {
41 Self {
41 bytes,
42 bytes,
42 offsets: None,
43 offsets: None,
43 }
44 }
44 }
45 }
45 }
46 }
46
47
48 /// Value of the inline flag.
49 pub fn is_inline(&self) -> bool {
50 is_inline(&self.bytes)
51 }
52
53 /// Return a slice of bytes if `revlog` is inline. Panic if not.
54 pub fn data(&self, start: usize, end: usize) -> &[u8] {
55 if !self.is_inline() {
56 panic!("tried to access data in the index of a revlog that is not inline");
57 }
58 &self.bytes[start..end]
59 }
60
47 /// Return number of entries of the revlog index.
61 /// Return number of entries of the revlog index.
48 pub fn len(&self) -> usize {
62 pub fn len(&self) -> usize {
49 if let Some(offsets) = &self.offsets {
63 if let Some(offsets) = &self.offsets {
50 offsets.len()
64 offsets.len()
51 } else {
65 } else {
52 self.bytes.len() / INDEX_ENTRY_SIZE
66 self.bytes.len() / INDEX_ENTRY_SIZE
53 }
67 }
54 }
68 }
55
69
56 /// Returns `true` if the `Index` has zero `entries`.
70 /// Returns `true` if the `Index` has zero `entries`.
57 pub fn is_empty(&self) -> bool {
71 pub fn is_empty(&self) -> bool {
58 self.len() == 0
72 self.len() == 0
59 }
73 }
60
74
61 /// Return the index entry corresponding to the given revision if it
75 /// Return the index entry corresponding to the given revision if it
62 /// exists.
76 /// exists.
63 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
77 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
64 if rev == NULL_REVISION {
78 if rev == NULL_REVISION {
65 return None;
79 return None;
66 }
80 }
67 if let Some(offsets) = &self.offsets {
81 if let Some(offsets) = &self.offsets {
68 self.get_entry_inline(rev, offsets)
82 self.get_entry_inline(rev, offsets)
69 } else {
83 } else {
70 self.get_entry_separated(rev)
84 self.get_entry_separated(rev)
71 }
85 }
72 }
86 }
73
87
74 fn get_entry_inline(
88 fn get_entry_inline(
75 &self,
89 &self,
76 rev: Revision,
90 rev: Revision,
77 offsets: &[usize],
91 offsets: &[usize],
78 ) -> Option<IndexEntry> {
92 ) -> Option<IndexEntry> {
79 let start = *offsets.get(rev as usize)?;
93 let start = *offsets.get(rev as usize)?;
80 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
94 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
81 let bytes = &self.bytes[start..end];
95 let bytes = &self.bytes[start..end];
82
96
83 // See IndexEntry for an explanation of this override.
97 // See IndexEntry for an explanation of this override.
84 let offset_override = Some(end);
98 let offset_override = Some(end);
85
99
86 Some(IndexEntry {
100 Some(IndexEntry {
87 bytes,
101 bytes,
88 offset_override,
102 offset_override,
89 })
103 })
90 }
104 }
91
105
92 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
106 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
93 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
107 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
94 if rev as usize >= max_rev {
108 if rev as usize >= max_rev {
95 return None;
109 return None;
96 }
110 }
97 let start = rev as usize * INDEX_ENTRY_SIZE;
111 let start = rev as usize * INDEX_ENTRY_SIZE;
98 let end = start + INDEX_ENTRY_SIZE;
112 let end = start + INDEX_ENTRY_SIZE;
99 let bytes = &self.bytes[start..end];
113 let bytes = &self.bytes[start..end];
100
114
101 // Override the offset of the first revision as its bytes are used
115 // Override the offset of the first revision as its bytes are used
102 // for the index's metadata (saving space because it is always 0)
116 // for the index's metadata (saving space because it is always 0)
103 let offset_override = if rev == 0 { Some(0) } else { None };
117 let offset_override = if rev == 0 { Some(0) } else { None };
104
118
105 Some(IndexEntry {
119 Some(IndexEntry {
106 bytes,
120 bytes,
107 offset_override,
121 offset_override,
108 })
122 })
109 }
123 }
110 }
124 }
111
125
112 #[derive(Debug)]
126 #[derive(Debug)]
113 pub struct IndexEntry<'a> {
127 pub struct IndexEntry<'a> {
114 bytes: &'a [u8],
128 bytes: &'a [u8],
115 /// Allows to override the offset value of the entry.
129 /// Allows to override the offset value of the entry.
116 ///
130 ///
117 /// For interleaved index and data, the offset stored in the index
131 /// For interleaved index and data, the offset stored in the index
118 /// corresponds to the separated data offset.
132 /// corresponds to the separated data offset.
119 /// It has to be overridden with the actual offset in the interleaved
133 /// It has to be overridden with the actual offset in the interleaved
120 /// index which is just after the index block.
134 /// index which is just after the index block.
121 ///
135 ///
122 /// For separated index and data, the offset stored in the first index
136 /// For separated index and data, the offset stored in the first index
123 /// entry is mixed with the index headers.
137 /// entry is mixed with the index headers.
124 /// It has to be overridden with 0.
138 /// It has to be overridden with 0.
125 offset_override: Option<usize>,
139 offset_override: Option<usize>,
126 }
140 }
127
141
128 impl<'a> IndexEntry<'a> {
142 impl<'a> IndexEntry<'a> {
129 /// Return the offset of the data.
143 /// Return the offset of the data.
130 pub fn offset(&self) -> usize {
144 pub fn offset(&self) -> usize {
131 if let Some(offset_override) = self.offset_override {
145 if let Some(offset_override) = self.offset_override {
132 offset_override
146 offset_override
133 } else {
147 } else {
134 let mut bytes = [0; 8];
148 let mut bytes = [0; 8];
135 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
149 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
136 BigEndian::read_u64(&bytes[..]) as usize
150 BigEndian::read_u64(&bytes[..]) as usize
137 }
151 }
138 }
152 }
139
153
140 /// Return the compressed length of the data.
154 /// Return the compressed length of the data.
141 pub fn compressed_len(&self) -> usize {
155 pub fn compressed_len(&self) -> usize {
142 BigEndian::read_u32(&self.bytes[8..=11]) as usize
156 BigEndian::read_u32(&self.bytes[8..=11]) as usize
143 }
157 }
144
158
145 /// Return the uncompressed length of the data.
159 /// Return the uncompressed length of the data.
146 pub fn uncompressed_len(&self) -> usize {
160 pub fn uncompressed_len(&self) -> usize {
147 BigEndian::read_u32(&self.bytes[12..=15]) as usize
161 BigEndian::read_u32(&self.bytes[12..=15]) as usize
148 }
162 }
149
163
150 /// Return the revision upon which the data has been derived.
164 /// Return the revision upon which the data has been derived.
151 pub fn base_revision(&self) -> Revision {
165 pub fn base_revision(&self) -> Revision {
152 // TODO Maybe return an Option when base_revision == rev?
166 // TODO Maybe return an Option when base_revision == rev?
153 // Requires to add rev to IndexEntry
167 // Requires to add rev to IndexEntry
154
168
155 BigEndian::read_i32(&self.bytes[16..])
169 BigEndian::read_i32(&self.bytes[16..])
156 }
170 }
157
171
158 pub fn p1(&self) -> Revision {
172 pub fn p1(&self) -> Revision {
159 BigEndian::read_i32(&self.bytes[24..])
173 BigEndian::read_i32(&self.bytes[24..])
160 }
174 }
161
175
162 pub fn p2(&self) -> Revision {
176 pub fn p2(&self) -> Revision {
163 BigEndian::read_i32(&self.bytes[28..])
177 BigEndian::read_i32(&self.bytes[28..])
164 }
178 }
165
179
166 /// Return the hash of revision's full text.
180 /// Return the hash of revision's full text.
167 ///
181 ///
168 /// Currently, SHA-1 is used and only the first 20 bytes of this field
182 /// Currently, SHA-1 is used and only the first 20 bytes of this field
169 /// are used.
183 /// are used.
170 pub fn hash(&self) -> &[u8] {
184 pub fn hash(&self) -> &[u8] {
171 &self.bytes[32..52]
185 &self.bytes[32..52]
172 }
186 }
173 }
187 }
174
188
189 /// Value of the inline flag.
190 pub fn is_inline(index_bytes: &[u8]) -> bool {
191 match &index_bytes[0..=1] {
192 [0, 0] | [0, 2] => false,
193 _ => true,
194 }
195 }
196
175 #[cfg(test)]
197 #[cfg(test)]
176 mod tests {
198 mod tests {
177 use super::*;
199 use super::*;
178
200
179 #[cfg(test)]
201 #[cfg(test)]
180 #[derive(Debug, Copy, Clone)]
202 #[derive(Debug, Copy, Clone)]
181 pub struct IndexEntryBuilder {
203 pub struct IndexEntryBuilder {
182 is_first: bool,
204 is_first: bool,
183 is_inline: bool,
205 is_inline: bool,
184 is_general_delta: bool,
206 is_general_delta: bool,
185 version: u16,
207 version: u16,
186 offset: usize,
208 offset: usize,
187 compressed_len: usize,
209 compressed_len: usize,
188 uncompressed_len: usize,
210 uncompressed_len: usize,
189 base_revision: Revision,
211 base_revision: Revision,
190 }
212 }
191
213
192 #[cfg(test)]
214 #[cfg(test)]
193 impl IndexEntryBuilder {
215 impl IndexEntryBuilder {
194 pub fn new() -> Self {
216 pub fn new() -> Self {
195 Self {
217 Self {
196 is_first: false,
218 is_first: false,
197 is_inline: false,
219 is_inline: false,
198 is_general_delta: true,
220 is_general_delta: true,
199 version: 2,
221 version: 2,
200 offset: 0,
222 offset: 0,
201 compressed_len: 0,
223 compressed_len: 0,
202 uncompressed_len: 0,
224 uncompressed_len: 0,
203 base_revision: 0,
225 base_revision: 0,
204 }
226 }
205 }
227 }
206
228
207 pub fn is_first(&mut self, value: bool) -> &mut Self {
229 pub fn is_first(&mut self, value: bool) -> &mut Self {
208 self.is_first = value;
230 self.is_first = value;
209 self
231 self
210 }
232 }
211
233
212 pub fn with_inline(&mut self, value: bool) -> &mut Self {
234 pub fn with_inline(&mut self, value: bool) -> &mut Self {
213 self.is_inline = value;
235 self.is_inline = value;
214 self
236 self
215 }
237 }
216
238
217 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
239 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
218 self.is_general_delta = value;
240 self.is_general_delta = value;
219 self
241 self
220 }
242 }
221
243
222 pub fn with_version(&mut self, value: u16) -> &mut Self {
244 pub fn with_version(&mut self, value: u16) -> &mut Self {
223 self.version = value;
245 self.version = value;
224 self
246 self
225 }
247 }
226
248
227 pub fn with_offset(&mut self, value: usize) -> &mut Self {
249 pub fn with_offset(&mut self, value: usize) -> &mut Self {
228 self.offset = value;
250 self.offset = value;
229 self
251 self
230 }
252 }
231
253
232 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
254 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
233 self.compressed_len = value;
255 self.compressed_len = value;
234 self
256 self
235 }
257 }
236
258
237 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
259 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
238 self.uncompressed_len = value;
260 self.uncompressed_len = value;
239 self
261 self
240 }
262 }
241
263
242 pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
264 pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
243 self.base_revision = value;
265 self.base_revision = value;
244 self
266 self
245 }
267 }
246
268
247 pub fn build(&self) -> Vec<u8> {
269 pub fn build(&self) -> Vec<u8> {
248 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
270 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
249 if self.is_first {
271 if self.is_first {
250 bytes.extend(&match (self.is_general_delta, self.is_inline) {
272 bytes.extend(&match (self.is_general_delta, self.is_inline) {
251 (false, false) => [0u8, 0],
273 (false, false) => [0u8, 0],
252 (false, true) => [0u8, 1],
274 (false, true) => [0u8, 1],
253 (true, false) => [0u8, 2],
275 (true, false) => [0u8, 2],
254 (true, true) => [0u8, 3],
276 (true, true) => [0u8, 3],
255 });
277 });
256 bytes.extend(&self.version.to_be_bytes());
278 bytes.extend(&self.version.to_be_bytes());
257 // Remaining offset bytes.
279 // Remaining offset bytes.
258 bytes.extend(&[0u8; 2]);
280 bytes.extend(&[0u8; 2]);
259 } else {
281 } else {
260 // Offset is only 6 bytes will usize is 8.
282 // Offset is only 6 bytes will usize is 8.
261 bytes.extend(&self.offset.to_be_bytes()[2..]);
283 bytes.extend(&self.offset.to_be_bytes()[2..]);
262 }
284 }
263 bytes.extend(&[0u8; 2]); // Revision flags.
285 bytes.extend(&[0u8; 2]); // Revision flags.
264 bytes.extend(&self.compressed_len.to_be_bytes()[4..]);
286 bytes.extend(&self.compressed_len.to_be_bytes()[4..]);
265 bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]);
287 bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]);
266 bytes.extend(&self.base_revision.to_be_bytes());
288 bytes.extend(&self.base_revision.to_be_bytes());
267 bytes
289 bytes
268 }
290 }
269 }
291 }
270
292
271 #[test]
293 #[test]
294 fn is_not_inline_when_no_inline_flag_test() {
295 let bytes = IndexEntryBuilder::new()
296 .is_first(true)
297 .with_general_delta(false)
298 .with_inline(false)
299 .build();
300
301 assert_eq!(is_inline(&bytes), false)
302 }
303
304 #[test]
305 fn is_inline_when_inline_flag_test() {
306 let bytes = IndexEntryBuilder::new()
307 .is_first(true)
308 .with_general_delta(false)
309 .with_inline(true)
310 .build();
311
312 assert_eq!(is_inline(&bytes), true)
313 }
314
315 #[test]
316 fn is_inline_when_inline_and_generaldelta_flags_test() {
317 let bytes = IndexEntryBuilder::new()
318 .is_first(true)
319 .with_general_delta(true)
320 .with_inline(true)
321 .build();
322
323 assert_eq!(is_inline(&bytes), true)
324 }
325
326 #[test]
272 fn test_offset() {
327 fn test_offset() {
273 let bytes = IndexEntryBuilder::new().with_offset(1).build();
328 let bytes = IndexEntryBuilder::new().with_offset(1).build();
274 let entry = IndexEntry {
329 let entry = IndexEntry {
275 bytes: &bytes,
330 bytes: &bytes,
276 offset_override: None,
331 offset_override: None,
277 };
332 };
278
333
279 assert_eq!(entry.offset(), 1)
334 assert_eq!(entry.offset(), 1)
280 }
335 }
281
336
282 #[test]
337 #[test]
283 fn test_with_overridden_offset() {
338 fn test_with_overridden_offset() {
284 let bytes = IndexEntryBuilder::new().with_offset(1).build();
339 let bytes = IndexEntryBuilder::new().with_offset(1).build();
285 let entry = IndexEntry {
340 let entry = IndexEntry {
286 bytes: &bytes,
341 bytes: &bytes,
287 offset_override: Some(2),
342 offset_override: Some(2),
288 };
343 };
289
344
290 assert_eq!(entry.offset(), 2)
345 assert_eq!(entry.offset(), 2)
291 }
346 }
292
347
293 #[test]
348 #[test]
294 fn test_compressed_len() {
349 fn test_compressed_len() {
295 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
350 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
296 let entry = IndexEntry {
351 let entry = IndexEntry {
297 bytes: &bytes,
352 bytes: &bytes,
298 offset_override: None,
353 offset_override: None,
299 };
354 };
300
355
301 assert_eq!(entry.compressed_len(), 1)
356 assert_eq!(entry.compressed_len(), 1)
302 }
357 }
303
358
304 #[test]
359 #[test]
305 fn test_uncompressed_len() {
360 fn test_uncompressed_len() {
306 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
361 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
307 let entry = IndexEntry {
362 let entry = IndexEntry {
308 bytes: &bytes,
363 bytes: &bytes,
309 offset_override: None,
364 offset_override: None,
310 };
365 };
311
366
312 assert_eq!(entry.uncompressed_len(), 1)
367 assert_eq!(entry.uncompressed_len(), 1)
313 }
368 }
314
369
315 #[test]
370 #[test]
316 fn test_base_revision() {
371 fn test_base_revision() {
317 let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
372 let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
318 let entry = IndexEntry {
373 let entry = IndexEntry {
319 bytes: &bytes,
374 bytes: &bytes,
320 offset_override: None,
375 offset_override: None,
321 };
376 };
322
377
323 assert_eq!(entry.base_revision(), 1)
378 assert_eq!(entry.base_revision(), 1)
324 }
379 }
325 }
380 }
326
381
327 #[cfg(test)]
382 #[cfg(test)]
328 pub use tests::IndexEntryBuilder;
383 pub use tests::IndexEntryBuilder;
@@ -1,445 +1,332 b''
1 use std::borrow::Cow;
1 use std::borrow::Cow;
2 use std::fs::File;
2 use std::fs::File;
3 use std::io::Read;
3 use std::io::Read;
4 use std::ops::Deref;
4 use std::ops::Deref;
5 use std::path::Path;
5 use std::path::Path;
6
6
7 use byteorder::{BigEndian, ByteOrder};
7 use byteorder::{BigEndian, ByteOrder};
8 use crypto::digest::Digest;
8 use crypto::digest::Digest;
9 use crypto::sha1::Sha1;
9 use crypto::sha1::Sha1;
10 use flate2::read::ZlibDecoder;
10 use flate2::read::ZlibDecoder;
11 use memmap::{Mmap, MmapOptions};
11 use memmap::{Mmap, MmapOptions};
12 use micro_timer::timed;
12 use micro_timer::timed;
13 use zstd;
13 use zstd;
14
14
15 use super::index::Index;
15 use super::index::Index;
16 use super::node::{NODE_BYTES_LENGTH, NULL_NODE_ID};
16 use super::node::{NODE_BYTES_LENGTH, NULL_NODE_ID};
17 use super::patch;
17 use super::patch;
18 use crate::revlog::Revision;
18 use crate::revlog::Revision;
19
19
20 pub enum RevlogError {
20 pub enum RevlogError {
21 IoError(std::io::Error),
21 IoError(std::io::Error),
22 UnsuportedVersion(u16),
22 UnsuportedVersion(u16),
23 InvalidRevision,
23 InvalidRevision,
24 Corrupted,
24 Corrupted,
25 UnknowDataFormat(u8),
25 UnknowDataFormat(u8),
26 }
26 }
27
27
28 fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
28 fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
29 let file = File::open(path)?;
29 let file = File::open(path)?;
30 let mmap = unsafe { MmapOptions::new().map(&file) }?;
30 let mmap = unsafe { MmapOptions::new().map(&file) }?;
31 Ok(mmap)
31 Ok(mmap)
32 }
32 }
33
33
34 /// Read only implementation of revlog.
34 /// Read only implementation of revlog.
35 pub struct Revlog {
35 pub struct Revlog {
36 /// When index and data are not interleaved: bytes of the revlog index.
36 /// When index and data are not interleaved: bytes of the revlog index.
37 /// When index and data are interleaved: bytes of the revlog index and
37 /// When index and data are interleaved: bytes of the revlog index and
38 /// data.
38 /// data.
39 index_bytes: Box<dyn Deref<Target = [u8]> + Send>,
39 index: Index,
40 /// When index and data are not interleaved: bytes of the revlog data
40 /// When index and data are not interleaved: bytes of the revlog data
41 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
41 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
42 }
42 }
43
43
44 impl Revlog {
44 impl Revlog {
45 /// Open a revlog index file.
45 /// Open a revlog index file.
46 ///
46 ///
47 /// It will also open the associated data file if index and data are not
47 /// It will also open the associated data file if index and data are not
48 /// interleaved.
48 /// interleaved.
49 #[timed]
49 #[timed]
50 pub fn open(index_path: &Path) -> Result<Self, RevlogError> {
50 pub fn open(index_path: &Path) -> Result<Self, RevlogError> {
51 let index_mmap =
51 let index_mmap =
52 mmap_open(&index_path).map_err(RevlogError::IoError)?;
52 mmap_open(&index_path).map_err(RevlogError::IoError)?;
53
53
54 let version = get_version(&index_mmap);
54 let version = get_version(&index_mmap);
55 if version != 1 {
55 if version != 1 {
56 return Err(RevlogError::UnsuportedVersion(version));
56 return Err(RevlogError::UnsuportedVersion(version));
57 }
57 }
58
58
59 let is_inline = is_inline(&index_mmap);
59 let index = Index::new(Box::new(index_mmap));
60
61 let index_bytes = Box::new(index_mmap);
62
60
63 // TODO load data only when needed //
61 // TODO load data only when needed //
64 // type annotation required
62 // type annotation required
65 // won't recognize Mmap as Deref<Target = [u8]>
63 // won't recognize Mmap as Deref<Target = [u8]>
66 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
64 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
67 if is_inline {
65 if index.is_inline() {
68 None
66 None
69 } else {
67 } else {
70 let data_path = index_path.with_extension("d");
68 let data_path = index_path.with_extension("d");
71 let data_mmap =
69 let data_mmap =
72 mmap_open(&data_path).map_err(RevlogError::IoError)?;
70 mmap_open(&data_path).map_err(RevlogError::IoError)?;
73 Some(Box::new(data_mmap))
71 Some(Box::new(data_mmap))
74 };
72 };
75
73
76 Ok(Revlog {
74 Ok(Revlog { index, data_bytes })
77 index_bytes,
78 data_bytes,
79 })
80 }
75 }
81
76
82 /// Return number of entries of the `Revlog`.
77 /// Return number of entries of the `Revlog`.
83 pub fn len(&self) -> usize {
78 pub fn len(&self) -> usize {
84 self.index().len()
79 self.index.len()
85 }
80 }
86
81
87 /// Returns `true` if the `Revlog` has zero `entries`.
82 /// Returns `true` if the `Revlog` has zero `entries`.
88 pub fn is_empty(&self) -> bool {
83 pub fn is_empty(&self) -> bool {
89 self.index().is_empty()
84 self.index.is_empty()
90 }
85 }
91
86
92 /// Return the full data associated to a node.
87 /// Return the full data associated to a node.
93 #[timed]
88 #[timed]
94 pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> {
89 pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> {
95 let index = self.index();
96 // This is brute force. But it is fast enough for now.
90 // This is brute force. But it is fast enough for now.
97 // Optimization will come later.
91 // Optimization will come later.
98 for rev in (0..self.len() as Revision).rev() {
92 for rev in (0..self.len() as Revision).rev() {
99 let index_entry =
93 let index_entry =
100 index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
94 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
101 if node == index_entry.hash() {
95 if node == index_entry.hash() {
102 return Ok(rev);
96 return Ok(rev);
103 }
97 }
104 }
98 }
105 Err(RevlogError::InvalidRevision)
99 Err(RevlogError::InvalidRevision)
106 }
100 }
107
101
108 /// Return the full data associated to a revision.
102 /// Return the full data associated to a revision.
109 ///
103 ///
110 /// All entries required to build the final data out of deltas will be
104 /// All entries required to build the final data out of deltas will be
111 /// retrieved as needed, and the deltas will be applied to the inital
105 /// retrieved as needed, and the deltas will be applied to the inital
112 /// snapshot to rebuild the final data.
106 /// snapshot to rebuild the final data.
113 #[timed]
107 #[timed]
114 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
108 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
115 // Todo return -> Cow
109 // Todo return -> Cow
116 let mut entry = self.get_entry(rev)?;
110 let mut entry = self.get_entry(rev)?;
117 let mut delta_chain = vec![];
111 let mut delta_chain = vec![];
118 while let Some(base_rev) = entry.base_rev {
112 while let Some(base_rev) = entry.base_rev {
119 delta_chain.push(entry);
113 delta_chain.push(entry);
120 entry = self
114 entry = self
121 .get_entry(base_rev)
115 .get_entry(base_rev)
122 .map_err(|_| RevlogError::Corrupted)?;
116 .map_err(|_| RevlogError::Corrupted)?;
123 }
117 }
124
118
125 // TODO do not look twice in the index
119 // TODO do not look twice in the index
126 let index = self.index();
120 let index_entry = self
127 let index_entry =
121 .index
128 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?;
122 .get_entry(rev)
123 .ok_or(RevlogError::InvalidRevision)?;
129
124
130 let data: Vec<u8> = if delta_chain.is_empty() {
125 let data: Vec<u8> = if delta_chain.is_empty() {
131 entry.data()?.into()
126 entry.data()?.into()
132 } else {
127 } else {
133 Revlog::build_data_from_deltas(entry, &delta_chain)?
128 Revlog::build_data_from_deltas(entry, &delta_chain)?
134 };
129 };
135
130
136 if self.check_hash(
131 if self.check_hash(
137 index_entry.p1(),
132 index_entry.p1(),
138 index_entry.p2(),
133 index_entry.p2(),
139 index_entry.hash(),
134 index_entry.hash(),
140 &data,
135 &data,
141 ) {
136 ) {
142 Ok(data)
137 Ok(data)
143 } else {
138 } else {
144 Err(RevlogError::Corrupted)
139 Err(RevlogError::Corrupted)
145 }
140 }
146 }
141 }
147
142
148 /// Check the hash of some given data against the recorded hash.
143 /// Check the hash of some given data against the recorded hash.
149 pub fn check_hash(
144 pub fn check_hash(
150 &self,
145 &self,
151 p1: Revision,
146 p1: Revision,
152 p2: Revision,
147 p2: Revision,
153 expected: &[u8],
148 expected: &[u8],
154 data: &[u8],
149 data: &[u8],
155 ) -> bool {
150 ) -> bool {
156 let index = self.index();
151 let e1 = self.index.get_entry(p1);
157 let e1 = index.get_entry(p1);
158 let h1 = match e1 {
152 let h1 = match e1 {
159 Some(ref entry) => entry.hash(),
153 Some(ref entry) => entry.hash(),
160 None => &NULL_NODE_ID,
154 None => &NULL_NODE_ID,
161 };
155 };
162 let e2 = index.get_entry(p2);
156 let e2 = self.index.get_entry(p2);
163 let h2 = match e2 {
157 let h2 = match e2 {
164 Some(ref entry) => entry.hash(),
158 Some(ref entry) => entry.hash(),
165 None => &NULL_NODE_ID,
159 None => &NULL_NODE_ID,
166 };
160 };
167
161
168 hash(data, &h1, &h2).as_slice() == expected
162 hash(data, &h1, &h2).as_slice() == expected
169 }
163 }
170
164
171 /// Build the full data of a revision out its snapshot
165 /// Build the full data of a revision out its snapshot
172 /// and its deltas.
166 /// and its deltas.
173 #[timed]
167 #[timed]
174 fn build_data_from_deltas(
168 fn build_data_from_deltas(
175 snapshot: RevlogEntry,
169 snapshot: RevlogEntry,
176 deltas: &[RevlogEntry],
170 deltas: &[RevlogEntry],
177 ) -> Result<Vec<u8>, RevlogError> {
171 ) -> Result<Vec<u8>, RevlogError> {
178 let snapshot = snapshot.data()?;
172 let snapshot = snapshot.data()?;
179 let deltas = deltas
173 let deltas = deltas
180 .iter()
174 .iter()
181 .rev()
175 .rev()
182 .map(RevlogEntry::data)
176 .map(RevlogEntry::data)
183 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
177 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
184 let patches: Vec<_> =
178 let patches: Vec<_> =
185 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
179 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
186 let patch = patch::fold_patch_lists(&patches);
180 let patch = patch::fold_patch_lists(&patches);
187 Ok(patch.apply(&snapshot))
181 Ok(patch.apply(&snapshot))
188 }
182 }
189
183
190 /// Return the revlog index.
191 pub fn index(&self) -> Index {
192 let is_inline = self.data_bytes.is_none();
193 Index::new(&self.index_bytes, is_inline)
194 }
195
196 /// Return the revlog data.
184 /// Return the revlog data.
197 fn data(&self) -> &[u8] {
185 fn data(&self) -> &[u8] {
198 match self.data_bytes {
186 match self.data_bytes {
199 Some(ref data_bytes) => &data_bytes,
187 Some(ref data_bytes) => &data_bytes,
200 None => &self.index_bytes,
188 None => panic!(
189 "forgot to load the data or trying to access inline data"
190 ),
201 }
191 }
202 }
192 }
203
193
204 /// Get an entry of the revlog.
194 /// Get an entry of the revlog.
205 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
195 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
206 let index = self.index();
196 let index_entry = self
207 let index_entry =
197 .index
208 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?;
198 .get_entry(rev)
199 .ok_or(RevlogError::InvalidRevision)?;
209 let start = index_entry.offset();
200 let start = index_entry.offset();
210 let end = start + index_entry.compressed_len();
201 let end = start + index_entry.compressed_len();
202 let data = if self.index.is_inline() {
203 self.index.data(start, end)
204 } else {
205 &self.data()[start..end]
206 };
211 let entry = RevlogEntry {
207 let entry = RevlogEntry {
212 rev,
208 rev,
213 bytes: &self.data()[start..end],
209 bytes: data,
214 compressed_len: index_entry.compressed_len(),
210 compressed_len: index_entry.compressed_len(),
215 uncompressed_len: index_entry.uncompressed_len(),
211 uncompressed_len: index_entry.uncompressed_len(),
216 base_rev: if index_entry.base_revision() == rev {
212 base_rev: if index_entry.base_revision() == rev {
217 None
213 None
218 } else {
214 } else {
219 Some(index_entry.base_revision())
215 Some(index_entry.base_revision())
220 },
216 },
221 };
217 };
222 Ok(entry)
218 Ok(entry)
223 }
219 }
224 }
220 }
225
221
226 /// The revlog entry's bytes and the necessary informations to extract
222 /// The revlog entry's bytes and the necessary informations to extract
227 /// the entry's data.
223 /// the entry's data.
228 #[derive(Debug)]
224 #[derive(Debug)]
229 pub struct RevlogEntry<'a> {
225 pub struct RevlogEntry<'a> {
230 rev: Revision,
226 rev: Revision,
231 bytes: &'a [u8],
227 bytes: &'a [u8],
232 compressed_len: usize,
228 compressed_len: usize,
233 uncompressed_len: usize,
229 uncompressed_len: usize,
234 base_rev: Option<Revision>,
230 base_rev: Option<Revision>,
235 }
231 }
236
232
237 impl<'a> RevlogEntry<'a> {
233 impl<'a> RevlogEntry<'a> {
238 /// Extract the data contained in the entry.
234 /// Extract the data contained in the entry.
239 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
235 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
240 if self.bytes.is_empty() {
236 if self.bytes.is_empty() {
241 return Ok(Cow::Borrowed(&[]));
237 return Ok(Cow::Borrowed(&[]));
242 }
238 }
243 match self.bytes[0] {
239 match self.bytes[0] {
244 // Revision data is the entirety of the entry, including this
240 // Revision data is the entirety of the entry, including this
245 // header.
241 // header.
246 b'\0' => Ok(Cow::Borrowed(self.bytes)),
242 b'\0' => Ok(Cow::Borrowed(self.bytes)),
247 // Raw revision data follows.
243 // Raw revision data follows.
248 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
244 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
249 // zlib (RFC 1950) data.
245 // zlib (RFC 1950) data.
250 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
246 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
251 // zstd data.
247 // zstd data.
252 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
248 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
253 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
249 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
254 }
250 }
255 }
251 }
256
252
257 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
253 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
258 let mut decoder = ZlibDecoder::new(self.bytes);
254 let mut decoder = ZlibDecoder::new(self.bytes);
259 if self.is_delta() {
255 if self.is_delta() {
260 let mut buf = Vec::with_capacity(self.compressed_len);
256 let mut buf = Vec::with_capacity(self.compressed_len);
261 decoder
257 decoder
262 .read_to_end(&mut buf)
258 .read_to_end(&mut buf)
263 .or(Err(RevlogError::Corrupted))?;
259 .or(Err(RevlogError::Corrupted))?;
264 Ok(buf)
260 Ok(buf)
265 } else {
261 } else {
266 let mut buf = vec![0; self.uncompressed_len];
262 let mut buf = vec![0; self.uncompressed_len];
267 decoder
263 decoder
268 .read_exact(&mut buf)
264 .read_exact(&mut buf)
269 .or(Err(RevlogError::Corrupted))?;
265 .or(Err(RevlogError::Corrupted))?;
270 Ok(buf)
266 Ok(buf)
271 }
267 }
272 }
268 }
273
269
274 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
270 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
275 if self.is_delta() {
271 if self.is_delta() {
276 let mut buf = Vec::with_capacity(self.compressed_len);
272 let mut buf = Vec::with_capacity(self.compressed_len);
277 zstd::stream::copy_decode(self.bytes, &mut buf)
273 zstd::stream::copy_decode(self.bytes, &mut buf)
278 .or(Err(RevlogError::Corrupted))?;
274 .or(Err(RevlogError::Corrupted))?;
279 Ok(buf)
275 Ok(buf)
280 } else {
276 } else {
281 let mut buf = vec![0; self.uncompressed_len];
277 let mut buf = vec![0; self.uncompressed_len];
282 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
278 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
283 .or(Err(RevlogError::Corrupted))?;
279 .or(Err(RevlogError::Corrupted))?;
284 if len != self.uncompressed_len {
280 if len != self.uncompressed_len {
285 Err(RevlogError::Corrupted)
281 Err(RevlogError::Corrupted)
286 } else {
282 } else {
287 Ok(buf)
283 Ok(buf)
288 }
284 }
289 }
285 }
290 }
286 }
291
287
292 /// Tell if the entry is a snapshot or a delta
288 /// Tell if the entry is a snapshot or a delta
293 /// (influences on decompression).
289 /// (influences on decompression).
294 fn is_delta(&self) -> bool {
290 fn is_delta(&self) -> bool {
295 self.base_rev.is_some()
291 self.base_rev.is_some()
296 }
292 }
297 }
293 }
298
294
299 /// Value of the inline flag.
300 pub fn is_inline(index_bytes: &[u8]) -> bool {
301 match &index_bytes[0..=1] {
302 [0, 0] | [0, 2] => false,
303 _ => true,
304 }
305 }
306
307 /// Format version of the revlog.
295 /// Format version of the revlog.
308 pub fn get_version(index_bytes: &[u8]) -> u16 {
296 pub fn get_version(index_bytes: &[u8]) -> u16 {
309 BigEndian::read_u16(&index_bytes[2..=3])
297 BigEndian::read_u16(&index_bytes[2..=3])
310 }
298 }
311
299
312 /// Calculate the hash of a revision given its data and its parents.
300 /// Calculate the hash of a revision given its data and its parents.
313 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
301 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
314 let mut hasher = Sha1::new();
302 let mut hasher = Sha1::new();
315 let (a, b) = (p1_hash, p2_hash);
303 let (a, b) = (p1_hash, p2_hash);
316 if a > b {
304 if a > b {
317 hasher.input(b);
305 hasher.input(b);
318 hasher.input(a);
306 hasher.input(a);
319 } else {
307 } else {
320 hasher.input(a);
308 hasher.input(a);
321 hasher.input(b);
309 hasher.input(b);
322 }
310 }
323 hasher.input(data);
311 hasher.input(data);
324 let mut hash = vec![0; NODE_BYTES_LENGTH];
312 let mut hash = vec![0; NODE_BYTES_LENGTH];
325 hasher.result(&mut hash);
313 hasher.result(&mut hash);
326 hash
314 hash
327 }
315 }
328
316
329 #[cfg(test)]
317 #[cfg(test)]
330 mod tests {
318 mod tests {
331 use super::*;
319 use super::*;
332
320
333 use super::super::index::IndexEntryBuilder;
321 use super::super::index::IndexEntryBuilder;
334
322
335 #[cfg(test)]
336 pub struct RevlogBuilder {
337 version: u16,
338 is_general_delta: bool,
339 is_inline: bool,
340 offset: usize,
341 index: Vec<Vec<u8>>,
342 data: Vec<Vec<u8>>,
343 }
344
345 #[cfg(test)]
346 impl RevlogBuilder {
347 pub fn new() -> Self {
348 Self {
349 version: 2,
350 is_inline: false,
351 is_general_delta: true,
352 offset: 0,
353 index: vec![],
354 data: vec![],
355 }
356 }
357
358 pub fn with_inline(&mut self, value: bool) -> &mut Self {
359 self.is_inline = value;
360 self
361 }
362
363 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
364 self.is_general_delta = value;
365 self
366 }
367
368 pub fn with_version(&mut self, value: u16) -> &mut Self {
369 self.version = value;
370 self
371 }
372
373 pub fn push(
374 &mut self,
375 mut index: IndexEntryBuilder,
376 data: Vec<u8>,
377 ) -> &mut Self {
378 if self.index.is_empty() {
379 index.is_first(true);
380 index.with_general_delta(self.is_general_delta);
381 index.with_inline(self.is_inline);
382 index.with_version(self.version);
383 } else {
384 index.with_offset(self.offset);
385 }
386 self.index.push(index.build());
387 self.offset += data.len();
388 self.data.push(data);
389 self
390 }
391
392 pub fn build_inline(&self) -> Vec<u8> {
393 let mut bytes =
394 Vec::with_capacity(self.index.len() + self.data.len());
395 for (index, data) in self.index.iter().zip(self.data.iter()) {
396 bytes.extend(index);
397 bytes.extend(data);
398 }
399 bytes
400 }
401 }
402
403 #[test]
404 fn is_not_inline_when_no_inline_flag_test() {
405 let bytes = RevlogBuilder::new()
406 .with_general_delta(false)
407 .with_inline(false)
408 .push(IndexEntryBuilder::new(), vec![])
409 .build_inline();
410
411 assert_eq!(is_inline(&bytes), false)
412 }
413
414 #[test]
415 fn is_inline_when_inline_flag_test() {
416 let bytes = RevlogBuilder::new()
417 .with_general_delta(false)
418 .with_inline(true)
419 .push(IndexEntryBuilder::new(), vec![])
420 .build_inline();
421
422 assert_eq!(is_inline(&bytes), true)
423 }
424
425 #[test]
426 fn is_inline_when_inline_and_generaldelta_flags_test() {
427 let bytes = RevlogBuilder::new()
428 .with_general_delta(true)
429 .with_inline(true)
430 .push(IndexEntryBuilder::new(), vec![])
431 .build_inline();
432
433 assert_eq!(is_inline(&bytes), true)
434 }
435
436 #[test]
323 #[test]
437 fn version_test() {
324 fn version_test() {
438 let bytes = RevlogBuilder::new()
325 let bytes = IndexEntryBuilder::new()
326 .is_first(true)
439 .with_version(1)
327 .with_version(1)
440 .push(IndexEntryBuilder::new(), vec![])
328 .build();
441 .build_inline();
442
329
443 assert_eq!(get_version(&bytes), 1)
330 assert_eq!(get_version(&bytes), 1)
444 }
331 }
445 }
332 }
General Comments 0
You need to be logged in to leave comments. Login now