##// END OF EJS Templates
rust-revlog: make `IndexEntryBuilder` build a whole entry...
Martin von Zweigbergk -
r49984:2a9a55ff default
parent child Browse files
Show More
@@ -1,528 +1,616 b''
1 1 use std::convert::TryInto;
2 2 use std::ops::Deref;
3 3
4 4 use byteorder::{BigEndian, ByteOrder};
5 5
6 6 use crate::errors::HgError;
7 7 use crate::revlog::node::Node;
8 8 use crate::revlog::{Revision, NULL_REVISION};
9 9
10 10 pub const INDEX_ENTRY_SIZE: usize = 64;
11 11
12 12 pub struct IndexHeader {
13 13 header_bytes: [u8; 4],
14 14 }
15 15
16 16 #[derive(Copy, Clone)]
17 17 pub struct IndexHeaderFlags {
18 18 flags: u16,
19 19 }
20 20
21 21 /// Corresponds to the high bits of `_format_flags` in python
22 22 impl IndexHeaderFlags {
23 23 /// Corresponds to FLAG_INLINE_DATA in python
24 24 pub fn is_inline(self) -> bool {
25 25 return self.flags & 1 != 0;
26 26 }
27 27 /// Corresponds to FLAG_GENERALDELTA in python
28 28 pub fn uses_generaldelta(self) -> bool {
29 29 return self.flags & 2 != 0;
30 30 }
31 31 }
32 32
33 33 /// Corresponds to the INDEX_HEADER structure,
34 34 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
35 35 impl IndexHeader {
36 36 fn format_flags(&self) -> IndexHeaderFlags {
37 37 // No "unknown flags" check here, unlike in python. Maybe there should
38 38 // be.
39 39 return IndexHeaderFlags {
40 40 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
41 41 };
42 42 }
43 43
44 44 /// The only revlog version currently supported by rhg.
45 45 const REVLOGV1: u16 = 1;
46 46
47 47 /// Corresponds to `_format_version` in Python.
48 48 fn format_version(&self) -> u16 {
49 49 return BigEndian::read_u16(&self.header_bytes[2..4]);
50 50 }
51 51
52 52 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
53 53 // We treat an empty file as a valid index with no entries.
54 54 // Here we make an arbitrary choice of what we assume the format of the
55 55 // index to be (V1, using generaldelta).
56 56 // This doesn't matter too much, since we're only doing read-only
57 57 // access. but the value corresponds to the `new_header` variable in
58 58 // `revlog.py`, `_loadindex`
59 59 header_bytes: [0, 3, 0, 1],
60 60 };
61 61
62 62 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
63 63 if index_bytes.len() == 0 {
64 64 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
65 65 }
66 66 if index_bytes.len() < 4 {
67 67 return Err(HgError::corrupted(
68 68 "corrupted revlog: can't read the index format header",
69 69 ));
70 70 }
71 71 return Ok(IndexHeader {
72 72 header_bytes: {
73 73 let bytes: [u8; 4] =
74 74 index_bytes[0..4].try_into().expect("impossible");
75 75 bytes
76 76 },
77 77 });
78 78 }
79 79 }
80 80
81 81 /// A Revlog index
82 82 pub struct Index {
83 83 bytes: Box<dyn Deref<Target = [u8]> + Send>,
84 84 /// Offsets of starts of index blocks.
85 85 /// Only needed when the index is interleaved with data.
86 86 offsets: Option<Vec<usize>>,
87 87 uses_generaldelta: bool,
88 88 }
89 89
90 90 impl Index {
91 91 /// Create an index from bytes.
92 92 /// Calculate the start of each entry when is_inline is true.
93 93 pub fn new(
94 94 bytes: Box<dyn Deref<Target = [u8]> + Send>,
95 95 ) -> Result<Self, HgError> {
96 96 let header = IndexHeader::parse(bytes.as_ref())?;
97 97
98 98 if header.format_version() != IndexHeader::REVLOGV1 {
99 99 // A proper new version should have had a repo/store
100 100 // requirement.
101 101 return Err(HgError::corrupted("unsupported revlog version"));
102 102 }
103 103
104 104 // This is only correct because we know version is REVLOGV1.
105 105 // In v2 we always use generaldelta, while in v0 we never use
106 106 // generaldelta. Similar for [is_inline] (it's only used in v1).
107 107 let uses_generaldelta = header.format_flags().uses_generaldelta();
108 108
109 109 if header.format_flags().is_inline() {
110 110 let mut offset: usize = 0;
111 111 let mut offsets = Vec::new();
112 112
113 113 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
114 114 offsets.push(offset);
115 115 let end = offset + INDEX_ENTRY_SIZE;
116 116 let entry = IndexEntry {
117 117 bytes: &bytes[offset..end],
118 118 offset_override: None,
119 119 };
120 120
121 121 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
122 122 }
123 123
124 124 if offset == bytes.len() {
125 125 Ok(Self {
126 126 bytes,
127 127 offsets: Some(offsets),
128 128 uses_generaldelta,
129 129 })
130 130 } else {
131 131 Err(HgError::corrupted("unexpected inline revlog length")
132 132 .into())
133 133 }
134 134 } else {
135 135 Ok(Self {
136 136 bytes,
137 137 offsets: None,
138 138 uses_generaldelta,
139 139 })
140 140 }
141 141 }
142 142
143 143 pub fn uses_generaldelta(&self) -> bool {
144 144 self.uses_generaldelta
145 145 }
146 146
147 147 /// Value of the inline flag.
148 148 pub fn is_inline(&self) -> bool {
149 149 self.offsets.is_some()
150 150 }
151 151
152 152 /// Return a slice of bytes if `revlog` is inline. Panic if not.
153 153 pub fn data(&self, start: usize, end: usize) -> &[u8] {
154 154 if !self.is_inline() {
155 155 panic!("tried to access data in the index of a revlog that is not inline");
156 156 }
157 157 &self.bytes[start..end]
158 158 }
159 159
160 160 /// Return number of entries of the revlog index.
161 161 pub fn len(&self) -> usize {
162 162 if let Some(offsets) = &self.offsets {
163 163 offsets.len()
164 164 } else {
165 165 self.bytes.len() / INDEX_ENTRY_SIZE
166 166 }
167 167 }
168 168
169 169 /// Returns `true` if the `Index` has zero `entries`.
170 170 pub fn is_empty(&self) -> bool {
171 171 self.len() == 0
172 172 }
173 173
174 174 /// Return the index entry corresponding to the given revision if it
175 175 /// exists.
176 176 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
177 177 if rev == NULL_REVISION {
178 178 return None;
179 179 }
180 180 if let Some(offsets) = &self.offsets {
181 181 self.get_entry_inline(rev, offsets)
182 182 } else {
183 183 self.get_entry_separated(rev)
184 184 }
185 185 }
186 186
187 187 fn get_entry_inline(
188 188 &self,
189 189 rev: Revision,
190 190 offsets: &[usize],
191 191 ) -> Option<IndexEntry> {
192 192 let start = *offsets.get(rev as usize)?;
193 193 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
194 194 let bytes = &self.bytes[start..end];
195 195
196 196 // See IndexEntry for an explanation of this override.
197 197 let offset_override = Some(end);
198 198
199 199 Some(IndexEntry {
200 200 bytes,
201 201 offset_override,
202 202 })
203 203 }
204 204
205 205 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
206 206 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
207 207 if rev as usize >= max_rev {
208 208 return None;
209 209 }
210 210 let start = rev as usize * INDEX_ENTRY_SIZE;
211 211 let end = start + INDEX_ENTRY_SIZE;
212 212 let bytes = &self.bytes[start..end];
213 213
214 214 // Override the offset of the first revision as its bytes are used
215 215 // for the index's metadata (saving space because it is always 0)
216 216 let offset_override = if rev == 0 { Some(0) } else { None };
217 217
218 218 Some(IndexEntry {
219 219 bytes,
220 220 offset_override,
221 221 })
222 222 }
223 223 }
224 224
225 225 impl super::RevlogIndex for Index {
226 226 fn len(&self) -> usize {
227 227 self.len()
228 228 }
229 229
230 230 fn node(&self, rev: Revision) -> Option<&Node> {
231 231 self.get_entry(rev).map(|entry| entry.hash())
232 232 }
233 233 }
234 234
235 235 #[derive(Debug)]
236 236 pub struct IndexEntry<'a> {
237 237 bytes: &'a [u8],
238 238 /// Allows to override the offset value of the entry.
239 239 ///
240 240 /// For interleaved index and data, the offset stored in the index
241 241 /// corresponds to the separated data offset.
242 242 /// It has to be overridden with the actual offset in the interleaved
243 243 /// index which is just after the index block.
244 244 ///
245 245 /// For separated index and data, the offset stored in the first index
246 246 /// entry is mixed with the index headers.
247 247 /// It has to be overridden with 0.
248 248 offset_override: Option<usize>,
249 249 }
250 250
251 251 impl<'a> IndexEntry<'a> {
252 252 /// Return the offset of the data.
253 253 pub fn offset(&self) -> usize {
254 254 if let Some(offset_override) = self.offset_override {
255 255 offset_override
256 256 } else {
257 257 let mut bytes = [0; 8];
258 258 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
259 259 BigEndian::read_u64(&bytes[..]) as usize
260 260 }
261 261 }
262 262
263 263 pub fn flags(&self) -> u16 {
264 264 BigEndian::read_u16(&self.bytes[6..=7])
265 265 }
266 266
267 267 /// Return the compressed length of the data.
268 268 pub fn compressed_len(&self) -> u32 {
269 269 BigEndian::read_u32(&self.bytes[8..=11])
270 270 }
271 271
272 272 /// Return the uncompressed length of the data.
273 273 pub fn uncompressed_len(&self) -> i32 {
274 274 BigEndian::read_i32(&self.bytes[12..=15])
275 275 }
276 276
277 277 /// Return the revision upon which the data has been derived.
278 278 pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
279 279 // TODO Maybe return an Option when base_revision == rev?
280 280 // Requires to add rev to IndexEntry
281 281
282 282 BigEndian::read_i32(&self.bytes[16..])
283 283 }
284 284
285 pub fn link_revision(&self) -> Revision {
286 BigEndian::read_i32(&self.bytes[20..])
287 }
288
285 289 pub fn p1(&self) -> Revision {
286 290 BigEndian::read_i32(&self.bytes[24..])
287 291 }
288 292
289 293 pub fn p2(&self) -> Revision {
290 294 BigEndian::read_i32(&self.bytes[28..])
291 295 }
292 296
293 297 /// Return the hash of revision's full text.
294 298 ///
295 299 /// Currently, SHA-1 is used and only the first 20 bytes of this field
296 300 /// are used.
297 301 pub fn hash(&self) -> &'a Node {
298 302 (&self.bytes[32..52]).try_into().unwrap()
299 303 }
300 304 }
301 305
302 306 #[cfg(test)]
303 307 mod tests {
304 308 use super::*;
309 use crate::node::NULL_NODE;
305 310
306 311 #[cfg(test)]
307 312 #[derive(Debug, Copy, Clone)]
308 313 pub struct IndexEntryBuilder {
309 314 is_first: bool,
310 315 is_inline: bool,
311 316 is_general_delta: bool,
312 317 version: u16,
313 318 offset: usize,
314 319 compressed_len: usize,
315 320 uncompressed_len: usize,
316 321 base_revision_or_base_of_delta_chain: Revision,
322 link_revision: Revision,
323 p1: Revision,
324 p2: Revision,
325 node: Node,
317 326 }
318 327
319 328 #[cfg(test)]
320 329 impl IndexEntryBuilder {
321 330 pub fn new() -> Self {
322 331 Self {
323 332 is_first: false,
324 333 is_inline: false,
325 334 is_general_delta: true,
326 335 version: 1,
327 336 offset: 0,
328 337 compressed_len: 0,
329 338 uncompressed_len: 0,
330 339 base_revision_or_base_of_delta_chain: 0,
340 link_revision: 0,
341 p1: NULL_REVISION,
342 p2: NULL_REVISION,
343 node: NULL_NODE,
331 344 }
332 345 }
333 346
334 347 pub fn is_first(&mut self, value: bool) -> &mut Self {
335 348 self.is_first = value;
336 349 self
337 350 }
338 351
339 352 pub fn with_inline(&mut self, value: bool) -> &mut Self {
340 353 self.is_inline = value;
341 354 self
342 355 }
343 356
344 357 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
345 358 self.is_general_delta = value;
346 359 self
347 360 }
348 361
349 362 pub fn with_version(&mut self, value: u16) -> &mut Self {
350 363 self.version = value;
351 364 self
352 365 }
353 366
354 367 pub fn with_offset(&mut self, value: usize) -> &mut Self {
355 368 self.offset = value;
356 369 self
357 370 }
358 371
359 372 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
360 373 self.compressed_len = value;
361 374 self
362 375 }
363 376
364 377 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
365 378 self.uncompressed_len = value;
366 379 self
367 380 }
368 381
369 382 pub fn with_base_revision_or_base_of_delta_chain(
370 383 &mut self,
371 384 value: Revision,
372 385 ) -> &mut Self {
373 386 self.base_revision_or_base_of_delta_chain = value;
374 387 self
375 388 }
376 389
390 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
391 self.link_revision = value;
392 self
393 }
394
395 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
396 self.p1 = value;
397 self
398 }
399
400 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
401 self.p2 = value;
402 self
403 }
404
405 pub fn with_node(&mut self, value: Node) -> &mut Self {
406 self.node = value;
407 self
408 }
409
377 410 pub fn build(&self) -> Vec<u8> {
378 411 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
379 412 if self.is_first {
380 413 bytes.extend(&match (self.is_general_delta, self.is_inline) {
381 414 (false, false) => [0u8, 0],
382 415 (false, true) => [0u8, 1],
383 416 (true, false) => [0u8, 2],
384 417 (true, true) => [0u8, 3],
385 418 });
386 419 bytes.extend(&self.version.to_be_bytes());
387 420 // Remaining offset bytes.
388 421 bytes.extend(&[0u8; 2]);
389 422 } else {
390 423 // Offset stored on 48 bits (6 bytes)
391 424 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
392 425 }
393 426 bytes.extend(&[0u8; 2]); // Revision flags.
394 427 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
395 428 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
396 429 bytes.extend(
397 430 &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
398 431 );
432 bytes.extend(&self.link_revision.to_be_bytes());
433 bytes.extend(&self.p1.to_be_bytes());
434 bytes.extend(&self.p2.to_be_bytes());
435 bytes.extend(self.node.as_bytes());
436 bytes.extend(vec![0u8; 12]);
399 437 bytes
400 438 }
401 439 }
402 440
403 441 pub fn is_inline(index_bytes: &[u8]) -> bool {
404 442 IndexHeader::parse(index_bytes)
405 443 .expect("too short")
406 444 .format_flags()
407 445 .is_inline()
408 446 }
409 447
410 448 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
411 449 IndexHeader::parse(index_bytes)
412 450 .expect("too short")
413 451 .format_flags()
414 452 .uses_generaldelta()
415 453 }
416 454
417 455 pub fn get_version(index_bytes: &[u8]) -> u16 {
418 456 IndexHeader::parse(index_bytes)
419 457 .expect("too short")
420 458 .format_version()
421 459 }
422 460
423 461 #[test]
424 462 fn flags_when_no_inline_flag_test() {
425 463 let bytes = IndexEntryBuilder::new()
426 464 .is_first(true)
427 465 .with_general_delta(false)
428 466 .with_inline(false)
429 467 .build();
430 468
431 469 assert_eq!(is_inline(&bytes), false);
432 470 assert_eq!(uses_generaldelta(&bytes), false);
433 471 }
434 472
435 473 #[test]
436 474 fn flags_when_inline_flag_test() {
437 475 let bytes = IndexEntryBuilder::new()
438 476 .is_first(true)
439 477 .with_general_delta(false)
440 478 .with_inline(true)
441 479 .build();
442 480
443 481 assert_eq!(is_inline(&bytes), true);
444 482 assert_eq!(uses_generaldelta(&bytes), false);
445 483 }
446 484
447 485 #[test]
448 486 fn flags_when_inline_and_generaldelta_flags_test() {
449 487 let bytes = IndexEntryBuilder::new()
450 488 .is_first(true)
451 489 .with_general_delta(true)
452 490 .with_inline(true)
453 491 .build();
454 492
455 493 assert_eq!(is_inline(&bytes), true);
456 494 assert_eq!(uses_generaldelta(&bytes), true);
457 495 }
458 496
459 497 #[test]
460 498 fn test_offset() {
461 499 let bytes = IndexEntryBuilder::new().with_offset(1).build();
462 500 let entry = IndexEntry {
463 501 bytes: &bytes,
464 502 offset_override: None,
465 503 };
466 504
467 505 assert_eq!(entry.offset(), 1)
468 506 }
469 507
470 508 #[test]
471 509 fn test_with_overridden_offset() {
472 510 let bytes = IndexEntryBuilder::new().with_offset(1).build();
473 511 let entry = IndexEntry {
474 512 bytes: &bytes,
475 513 offset_override: Some(2),
476 514 };
477 515
478 516 assert_eq!(entry.offset(), 2)
479 517 }
480 518
481 519 #[test]
482 520 fn test_compressed_len() {
483 521 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
484 522 let entry = IndexEntry {
485 523 bytes: &bytes,
486 524 offset_override: None,
487 525 };
488 526
489 527 assert_eq!(entry.compressed_len(), 1)
490 528 }
491 529
492 530 #[test]
493 531 fn test_uncompressed_len() {
494 532 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
495 533 let entry = IndexEntry {
496 534 bytes: &bytes,
497 535 offset_override: None,
498 536 };
499 537
500 538 assert_eq!(entry.uncompressed_len(), 1)
501 539 }
502 540
503 541 #[test]
504 542 fn test_base_revision_or_base_of_delta_chain() {
505 543 let bytes = IndexEntryBuilder::new()
506 544 .with_base_revision_or_base_of_delta_chain(1)
507 545 .build();
508 546 let entry = IndexEntry {
509 547 bytes: &bytes,
510 548 offset_override: None,
511 549 };
512 550
513 551 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
514 552 }
515 553
516 554 #[test]
555 fn link_revision_test() {
556 let bytes = IndexEntryBuilder::new().with_link_revision(123).build();
557
558 let entry = IndexEntry {
559 bytes: &bytes,
560 offset_override: None,
561 };
562
563 assert_eq!(entry.link_revision(), 123);
564 }
565
566 #[test]
567 fn p1_test() {
568 let bytes = IndexEntryBuilder::new().with_p1(123).build();
569
570 let entry = IndexEntry {
571 bytes: &bytes,
572 offset_override: None,
573 };
574
575 assert_eq!(entry.p1(), 123);
576 }
577
578 #[test]
579 fn p2_test() {
580 let bytes = IndexEntryBuilder::new().with_p2(123).build();
581
582 let entry = IndexEntry {
583 bytes: &bytes,
584 offset_override: None,
585 };
586
587 assert_eq!(entry.p2(), 123);
588 }
589
590 #[test]
591 fn node_test() {
592 let node = Node::from_hex("0123456789012345678901234567890123456789")
593 .unwrap();
594 let bytes = IndexEntryBuilder::new().with_node(node).build();
595
596 let entry = IndexEntry {
597 bytes: &bytes,
598 offset_override: None,
599 };
600
601 assert_eq!(*entry.hash(), node);
602 }
603
604 #[test]
517 605 fn version_test() {
518 606 let bytes = IndexEntryBuilder::new()
519 607 .is_first(true)
520 608 .with_version(2)
521 609 .build();
522 610
523 611 assert_eq!(get_version(&bytes), 2)
524 612 }
525 613 }
526 614
527 615 #[cfg(test)]
528 616 pub use tests::IndexEntryBuilder;
General Comments 0
You need to be logged in to leave comments. Login now