##// END OF EJS Templates
hg-core: add `Revlog.get_node_rev`...
Antoine Cezar -
r46105:4f11a67a default
parent child Browse files
Show More
@@ -1,412 +1,438 b''
1 1 use std::borrow::Cow;
2 2 use std::fs::File;
3 3 use std::io::Read;
4 4 use std::ops::Deref;
5 5 use std::path::Path;
6 6
7 7 use byteorder::{BigEndian, ByteOrder};
8 8 use crypto::digest::Digest;
9 9 use crypto::sha1::Sha1;
10 10 use flate2::read::ZlibDecoder;
11 11 use memmap::{Mmap, MmapOptions};
12 12 use micro_timer::timed;
13 13 use zstd;
14 14
15 15 use super::index::Index;
16 16 use super::node::{NODE_BYTES_LENGTH, NULL_NODE_ID};
17 17 use super::patch;
18 18 use crate::revlog::Revision;
19 19
20 20 pub enum RevlogError {
21 21 IoError(std::io::Error),
22 22 UnsuportedVersion(u16),
23 23 InvalidRevision,
24 24 Corrupted,
25 25 UnknowDataFormat(u8),
26 26 }
27 27
28 28 fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
29 29 let file = File::open(path)?;
30 30 let mmap = unsafe { MmapOptions::new().map(&file) }?;
31 31 Ok(mmap)
32 32 }
33 33
34 34 /// Read only implementation of revlog.
35 35 pub struct Revlog {
36 36 /// When index and data are not interleaved: bytes of the revlog index.
37 37 /// When index and data are interleaved: bytes of the revlog index and
38 38 /// data.
39 39 index_bytes: Box<dyn Deref<Target = [u8]> + Send>,
40 40 /// When index and data are not interleaved: bytes of the revlog data
41 41 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
42 42 }
43 43
44 44 impl Revlog {
45 45 /// Open a revlog index file.
46 46 ///
47 47 /// It will also open the associated data file if index and data are not
48 48 /// interleaved.
49 49 #[timed]
50 50 pub fn open(index_path: &Path) -> Result<Self, RevlogError> {
51 51 let index_mmap =
52 52 mmap_open(&index_path).map_err(RevlogError::IoError)?;
53 53
54 54 let version = get_version(&index_mmap);
55 55 if version != 1 {
56 56 return Err(RevlogError::UnsuportedVersion(version));
57 57 }
58 58
59 59 let is_inline = is_inline(&index_mmap);
60 60
61 61 let index_bytes = Box::new(index_mmap);
62 62
63 63 // TODO load data only when needed //
64 64 // type annotation required
65 65 // won't recognize Mmap as Deref<Target = [u8]>
66 66 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
67 67 if is_inline {
68 68 None
69 69 } else {
70 70 let data_path = index_path.with_extension("d");
71 71 let data_mmap =
72 72 mmap_open(&data_path).map_err(RevlogError::IoError)?;
73 73 Some(Box::new(data_mmap))
74 74 };
75 75
76 76 Ok(Revlog {
77 77 index_bytes,
78 78 data_bytes,
79 79 })
80 80 }
81 81
82 /// Return number of entries of the `Revlog`.
83 pub fn len(&self) -> usize {
84 self.index().len()
85 }
86
87 /// Returns `true` if the `Revlog` has zero `entries`.
88 pub fn is_empty(&self) -> bool {
89 self.index().is_empty()
90 }
91
92 /// Return the full data associated to a node.
93 #[timed]
94 pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> {
95 let index = self.index();
96 // This is brute force. But it is fast enough for now.
97 // Optimization will come later.
98 for rev in (0..self.len() as Revision).rev() {
99 let index_entry =
100 index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
101 if node == index_entry.hash() {
102 return Ok(rev);
103 }
104 }
105 Err(RevlogError::InvalidRevision)
106 }
107
82 108 /// Return the full data associated to a revision.
83 109 ///
84 110 /// All entries required to build the final data out of deltas will be
85 111 /// retrieved as needed, and the deltas will be applied to the inital
86 112 /// snapshot to rebuild the final data.
87 113 #[timed]
88 114 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
89 115 // Todo return -> Cow
90 116 let mut entry = self.get_entry(rev)?;
91 117 let mut delta_chain = vec![];
92 118 while let Some(base_rev) = entry.base_rev {
93 119 delta_chain.push(entry);
94 120 entry = self
95 121 .get_entry(base_rev)
96 122 .map_err(|_| RevlogError::Corrupted)?;
97 123 }
98 124
99 125 // TODO do not look twice in the index
100 126 let index = self.index();
101 127 let index_entry =
102 128 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?;
103 129
104 130 let data: Vec<u8> = if delta_chain.is_empty() {
105 131 entry.data()?.into()
106 132 } else {
107 133 Revlog::build_data_from_deltas(entry, &delta_chain)?
108 134 };
109 135
110 136 if self.check_hash(
111 137 index_entry.p1(),
112 138 index_entry.p2(),
113 139 index_entry.hash(),
114 140 &data,
115 141 ) {
116 142 Ok(data)
117 143 } else {
118 144 Err(RevlogError::Corrupted)
119 145 }
120 146 }
121 147
122 148 /// Check the hash of some given data against the recorded hash.
123 149 pub fn check_hash(
124 150 &self,
125 151 p1: Revision,
126 152 p2: Revision,
127 153 expected: &[u8],
128 154 data: &[u8],
129 155 ) -> bool {
130 156 let index = self.index();
131 157 let e1 = index.get_entry(p1);
132 158 let h1 = match e1 {
133 159 Some(ref entry) => entry.hash(),
134 160 None => &NULL_NODE_ID,
135 161 };
136 162 let e2 = index.get_entry(p2);
137 163 let h2 = match e2 {
138 164 Some(ref entry) => entry.hash(),
139 165 None => &NULL_NODE_ID,
140 166 };
141 167
142 168 hash(data, &h1, &h2).as_slice() == expected
143 169 }
144 170
145 171 /// Build the full data of a revision out its snapshot
146 172 /// and its deltas.
147 173 #[timed]
148 174 fn build_data_from_deltas(
149 175 snapshot: RevlogEntry,
150 176 deltas: &[RevlogEntry],
151 177 ) -> Result<Vec<u8>, RevlogError> {
152 178 let snapshot = snapshot.data()?;
153 179 let deltas = deltas
154 180 .iter()
155 181 .rev()
156 182 .map(RevlogEntry::data)
157 183 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
158 184 let patches: Vec<_> =
159 185 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
160 186 let patch = patch::fold_patch_lists(&patches);
161 187 Ok(patch.apply(&snapshot))
162 188 }
163 189
164 190 /// Return the revlog index.
165 191 fn index(&self) -> Index {
166 192 let is_inline = self.data_bytes.is_none();
167 193 Index::new(&self.index_bytes, is_inline)
168 194 }
169 195
170 196 /// Return the revlog data.
171 197 fn data(&self) -> &[u8] {
172 198 match self.data_bytes {
173 199 Some(ref data_bytes) => &data_bytes,
174 200 None => &self.index_bytes,
175 201 }
176 202 }
177 203
178 204 /// Get an entry of the revlog.
179 205 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
180 206 let index = self.index();
181 207 let index_entry =
182 208 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?;
183 209 let start = index_entry.offset();
184 210 let end = start + index_entry.compressed_len();
185 211 let entry = RevlogEntry {
186 212 rev,
187 213 bytes: &self.data()[start..end],
188 214 compressed_len: index_entry.compressed_len(),
189 215 uncompressed_len: index_entry.uncompressed_len(),
190 216 base_rev: if index_entry.base_revision() == rev {
191 217 None
192 218 } else {
193 219 Some(index_entry.base_revision())
194 220 },
195 221 };
196 222 Ok(entry)
197 223 }
198 224 }
199 225
200 226 /// The revlog entry's bytes and the necessary informations to extract
201 227 /// the entry's data.
202 228 #[derive(Debug)]
203 229 pub struct RevlogEntry<'a> {
204 230 rev: Revision,
205 231 bytes: &'a [u8],
206 232 compressed_len: usize,
207 233 uncompressed_len: usize,
208 234 base_rev: Option<Revision>,
209 235 }
210 236
211 237 impl<'a> RevlogEntry<'a> {
212 238 /// Extract the data contained in the entry.
213 239 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
214 240 if self.bytes.is_empty() {
215 241 return Ok(Cow::Borrowed(&[]));
216 242 }
217 243 match self.bytes[0] {
218 244 // Revision data is the entirety of the entry, including this
219 245 // header.
220 246 b'\0' => Ok(Cow::Borrowed(self.bytes)),
221 247 // Raw revision data follows.
222 248 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
223 249 // zlib (RFC 1950) data.
224 250 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data())),
225 251 // zstd data.
226 252 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data())),
227 253 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
228 254 }
229 255 }
230 256
231 257 fn uncompressed_zlib_data(&self) -> Vec<u8> {
232 258 let mut decoder = ZlibDecoder::new(self.bytes);
233 259 if self.is_delta() {
234 260 let mut buf = Vec::with_capacity(self.compressed_len);
235 261 decoder.read_to_end(&mut buf).expect("corrupted zlib data");
236 262 buf
237 263 } else {
238 264 let mut buf = vec![0; self.uncompressed_len];
239 265 decoder.read_exact(&mut buf).expect("corrupted zlib data");
240 266 buf
241 267 }
242 268 }
243 269
244 270 fn uncompressed_zstd_data(&self) -> Vec<u8> {
245 271 if self.is_delta() {
246 272 let mut buf = Vec::with_capacity(self.compressed_len);
247 273 zstd::stream::copy_decode(self.bytes, &mut buf)
248 274 .expect("corrupted zstd data");
249 275 buf
250 276 } else {
251 277 let mut buf = vec![0; self.uncompressed_len];
252 278 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
253 279 .expect("corrupted zstd data");
254 280 assert_eq!(len, self.uncompressed_len, "corrupted zstd data");
255 281 buf
256 282 }
257 283 }
258 284
259 285 /// Tell if the entry is a snapshot or a delta
260 286 /// (influences on decompression).
261 287 fn is_delta(&self) -> bool {
262 288 self.base_rev.is_some()
263 289 }
264 290 }
265 291
266 292 /// Value of the inline flag.
267 293 pub fn is_inline(index_bytes: &[u8]) -> bool {
268 294 match &index_bytes[0..=1] {
269 295 [0, 0] | [0, 2] => false,
270 296 _ => true,
271 297 }
272 298 }
273 299
274 300 /// Format version of the revlog.
275 301 pub fn get_version(index_bytes: &[u8]) -> u16 {
276 302 BigEndian::read_u16(&index_bytes[2..=3])
277 303 }
278 304
279 305 /// Calculate the hash of a revision given its data and its parents.
280 306 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
281 307 let mut hasher = Sha1::new();
282 308 let (a, b) = (p1_hash, p2_hash);
283 309 if a > b {
284 310 hasher.input(b);
285 311 hasher.input(a);
286 312 } else {
287 313 hasher.input(a);
288 314 hasher.input(b);
289 315 }
290 316 hasher.input(data);
291 317 let mut hash = vec![0; NODE_BYTES_LENGTH];
292 318 hasher.result(&mut hash);
293 319 hash
294 320 }
295 321
296 322 #[cfg(test)]
297 323 mod tests {
298 324 use super::*;
299 325
300 326 use super::super::index::IndexEntryBuilder;
301 327
302 328 #[cfg(test)]
303 329 pub struct RevlogBuilder {
304 330 version: u16,
305 331 is_general_delta: bool,
306 332 is_inline: bool,
307 333 offset: usize,
308 334 index: Vec<Vec<u8>>,
309 335 data: Vec<Vec<u8>>,
310 336 }
311 337
312 338 #[cfg(test)]
313 339 impl RevlogBuilder {
314 340 pub fn new() -> Self {
315 341 Self {
316 342 version: 2,
317 343 is_inline: false,
318 344 is_general_delta: true,
319 345 offset: 0,
320 346 index: vec![],
321 347 data: vec![],
322 348 }
323 349 }
324 350
325 351 pub fn with_inline(&mut self, value: bool) -> &mut Self {
326 352 self.is_inline = value;
327 353 self
328 354 }
329 355
330 356 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
331 357 self.is_general_delta = value;
332 358 self
333 359 }
334 360
335 361 pub fn with_version(&mut self, value: u16) -> &mut Self {
336 362 self.version = value;
337 363 self
338 364 }
339 365
340 366 pub fn push(
341 367 &mut self,
342 368 mut index: IndexEntryBuilder,
343 369 data: Vec<u8>,
344 370 ) -> &mut Self {
345 371 if self.index.is_empty() {
346 372 index.is_first(true);
347 373 index.with_general_delta(self.is_general_delta);
348 374 index.with_inline(self.is_inline);
349 375 index.with_version(self.version);
350 376 } else {
351 377 index.with_offset(self.offset);
352 378 }
353 379 self.index.push(index.build());
354 380 self.offset += data.len();
355 381 self.data.push(data);
356 382 self
357 383 }
358 384
359 385 pub fn build_inline(&self) -> Vec<u8> {
360 386 let mut bytes =
361 387 Vec::with_capacity(self.index.len() + self.data.len());
362 388 for (index, data) in self.index.iter().zip(self.data.iter()) {
363 389 bytes.extend(index);
364 390 bytes.extend(data);
365 391 }
366 392 bytes
367 393 }
368 394 }
369 395
370 396 #[test]
371 397 fn is_not_inline_when_no_inline_flag_test() {
372 398 let bytes = RevlogBuilder::new()
373 399 .with_general_delta(false)
374 400 .with_inline(false)
375 401 .push(IndexEntryBuilder::new(), vec![])
376 402 .build_inline();
377 403
378 404 assert_eq!(is_inline(&bytes), false)
379 405 }
380 406
381 407 #[test]
382 408 fn is_inline_when_inline_flag_test() {
383 409 let bytes = RevlogBuilder::new()
384 410 .with_general_delta(false)
385 411 .with_inline(true)
386 412 .push(IndexEntryBuilder::new(), vec![])
387 413 .build_inline();
388 414
389 415 assert_eq!(is_inline(&bytes), true)
390 416 }
391 417
392 418 #[test]
393 419 fn is_inline_when_inline_and_generaldelta_flags_test() {
394 420 let bytes = RevlogBuilder::new()
395 421 .with_general_delta(true)
396 422 .with_inline(true)
397 423 .push(IndexEntryBuilder::new(), vec![])
398 424 .build_inline();
399 425
400 426 assert_eq!(is_inline(&bytes), true)
401 427 }
402 428
403 429 #[test]
404 430 fn version_test() {
405 431 let bytes = RevlogBuilder::new()
406 432 .with_version(1)
407 433 .push(IndexEntryBuilder::new(), vec![])
408 434 .build_inline();
409 435
410 436 assert_eq!(get_version(&bytes), 1)
411 437 }
412 438 }
General Comments 0
You need to be logged in to leave comments. Login now