##// END OF EJS Templates
rust-index: add an abstraction to support bytes added at runtimes...
Raphaël Gomès -
r52081:1ef4a36a default
parent child Browse files
Show More
@@ -1,641 +1,682 b''
1 1 use std::fmt::Debug;
2 2 use std::ops::Deref;
3 3
4 4 use byteorder::{BigEndian, ByteOrder};
5 5
6 6 use crate::errors::HgError;
7 7 use crate::revlog::node::Node;
8 8 use crate::revlog::{Revision, NULL_REVISION};
9 9 use crate::{Graph, GraphError, RevlogIndex, UncheckedRevision};
10 10
11 11 pub const INDEX_ENTRY_SIZE: usize = 64;
12 12
13 13 pub struct IndexHeader {
14 14 header_bytes: [u8; 4],
15 15 }
16 16
17 17 #[derive(Copy, Clone)]
18 18 pub struct IndexHeaderFlags {
19 19 flags: u16,
20 20 }
21 21
22 22 /// Corresponds to the high bits of `_format_flags` in python
23 23 impl IndexHeaderFlags {
24 24 /// Corresponds to FLAG_INLINE_DATA in python
25 25 pub fn is_inline(self) -> bool {
26 26 self.flags & 1 != 0
27 27 }
28 28 /// Corresponds to FLAG_GENERALDELTA in python
29 29 pub fn uses_generaldelta(self) -> bool {
30 30 self.flags & 2 != 0
31 31 }
32 32 }
33 33
34 34 /// Corresponds to the INDEX_HEADER structure,
35 35 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
36 36 impl IndexHeader {
37 37 fn format_flags(&self) -> IndexHeaderFlags {
38 38 // No "unknown flags" check here, unlike in python. Maybe there should
39 39 // be.
40 40 IndexHeaderFlags {
41 41 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
42 42 }
43 43 }
44 44
45 45 /// The only revlog version currently supported by rhg.
46 46 const REVLOGV1: u16 = 1;
47 47
48 48 /// Corresponds to `_format_version` in Python.
49 49 fn format_version(&self) -> u16 {
50 50 BigEndian::read_u16(&self.header_bytes[2..4])
51 51 }
52 52
53 53 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
54 54 // We treat an empty file as a valid index with no entries.
55 55 // Here we make an arbitrary choice of what we assume the format of the
56 56 // index to be (V1, using generaldelta).
57 57 // This doesn't matter too much, since we're only doing read-only
58 58 // access. but the value corresponds to the `new_header` variable in
59 59 // `revlog.py`, `_loadindex`
60 60 header_bytes: [0, 3, 0, 1],
61 61 };
62 62
63 63 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
64 64 if index_bytes.is_empty() {
65 65 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
66 66 }
67 67 if index_bytes.len() < 4 {
68 68 return Err(HgError::corrupted(
69 69 "corrupted revlog: can't read the index format header",
70 70 ));
71 71 }
72 72 Ok(IndexHeader {
73 73 header_bytes: {
74 74 let bytes: [u8; 4] =
75 75 index_bytes[0..4].try_into().expect("impossible");
76 76 bytes
77 77 },
78 78 })
79 79 }
80 80 }
81 81
82 /// Abstracts the access to the index bytes since they can be spread between
83 /// the immutable (bytes) part and the mutable (added) part if any appends
84 /// happened. This makes it transparent for the callers.
85 struct IndexData {
86 /// Immutable bytes, most likely taken from disk
87 bytes: Box<dyn Deref<Target = [u8]> + Send>,
88 /// Bytes that were added after reading the index
89 added: Vec<u8>,
90 }
91
92 impl IndexData {
93 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
94 Self {
95 bytes,
96 added: vec![],
97 }
98 }
99
100 pub fn len(&self) -> usize {
101 self.bytes.len() + self.added.len()
102 }
103 }
104
105 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
106 type Output = [u8];
107
108 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
109 let start = index.start;
110 let end = index.end;
111 let immutable_len = self.bytes.len();
112 if start < immutable_len {
113 if end > immutable_len {
114 panic!("index data cannot span existing and added ranges");
115 }
116 &self.bytes[index]
117 } else {
118 &self.added[start - immutable_len..end - immutable_len]
119 }
120 }
121 }
122
82 123 /// A Revlog index
83 124 pub struct Index {
84 bytes: Box<dyn Deref<Target = [u8]> + Send>,
125 bytes: IndexData,
85 126 /// Offsets of starts of index blocks.
86 127 /// Only needed when the index is interleaved with data.
87 128 offsets: Option<Vec<usize>>,
88 129 uses_generaldelta: bool,
89 130 }
90 131
91 132 impl Debug for Index {
92 133 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 134 f.debug_struct("Index")
94 135 .field("offsets", &self.offsets)
95 136 .field("uses_generaldelta", &self.uses_generaldelta)
96 137 .finish()
97 138 }
98 139 }
99 140
100 141 impl Graph for Index {
101 142 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
102 143 let err = || GraphError::ParentOutOfRange(rev);
103 144 match self.get_entry(rev) {
104 145 Some(entry) => {
105 146 // The C implementation checks that the parents are valid
106 147 // before returning
107 148 Ok([
108 149 self.check_revision(entry.p1()).ok_or_else(err)?,
109 150 self.check_revision(entry.p2()).ok_or_else(err)?,
110 151 ])
111 152 }
112 153 None => Ok([NULL_REVISION, NULL_REVISION]),
113 154 }
114 155 }
115 156 }
116 157
117 158 impl Index {
118 159 /// Create an index from bytes.
119 160 /// Calculate the start of each entry when is_inline is true.
120 161 pub fn new(
121 162 bytes: Box<dyn Deref<Target = [u8]> + Send>,
122 163 ) -> Result<Self, HgError> {
123 164 let header = IndexHeader::parse(bytes.as_ref())?;
124 165
125 166 if header.format_version() != IndexHeader::REVLOGV1 {
126 167 // A proper new version should have had a repo/store
127 168 // requirement.
128 169 return Err(HgError::corrupted("unsupported revlog version"));
129 170 }
130 171
131 172 // This is only correct because we know version is REVLOGV1.
132 173 // In v2 we always use generaldelta, while in v0 we never use
133 174 // generaldelta. Similar for [is_inline] (it's only used in v1).
134 175 let uses_generaldelta = header.format_flags().uses_generaldelta();
135 176
136 177 if header.format_flags().is_inline() {
137 178 let mut offset: usize = 0;
138 179 let mut offsets = Vec::new();
139 180
140 181 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
141 182 offsets.push(offset);
142 183 let end = offset + INDEX_ENTRY_SIZE;
143 184 let entry = IndexEntry {
144 185 bytes: &bytes[offset..end],
145 186 offset_override: None,
146 187 };
147 188
148 189 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
149 190 }
150 191
151 192 if offset == bytes.len() {
152 193 Ok(Self {
153 bytes,
194 bytes: IndexData::new(bytes),
154 195 offsets: Some(offsets),
155 196 uses_generaldelta,
156 197 })
157 198 } else {
158 199 Err(HgError::corrupted("unexpected inline revlog length"))
159 200 }
160 201 } else {
161 202 Ok(Self {
162 bytes,
203 bytes: IndexData::new(bytes),
163 204 offsets: None,
164 205 uses_generaldelta,
165 206 })
166 207 }
167 208 }
168 209
169 210 pub fn uses_generaldelta(&self) -> bool {
170 211 self.uses_generaldelta
171 212 }
172 213
173 214 /// Value of the inline flag.
174 215 pub fn is_inline(&self) -> bool {
175 216 self.offsets.is_some()
176 217 }
177 218
178 219 /// Return a slice of bytes if `revlog` is inline. Panic if not.
179 220 pub fn data(&self, start: usize, end: usize) -> &[u8] {
180 221 if !self.is_inline() {
181 222 panic!("tried to access data in the index of a revlog that is not inline");
182 223 }
183 224 &self.bytes[start..end]
184 225 }
185 226
186 227 /// Return number of entries of the revlog index.
187 228 pub fn len(&self) -> usize {
188 229 if let Some(offsets) = &self.offsets {
189 230 offsets.len()
190 231 } else {
191 232 self.bytes.len() / INDEX_ENTRY_SIZE
192 233 }
193 234 }
194 235
195 236 /// Returns `true` if the `Index` has zero `entries`.
196 237 pub fn is_empty(&self) -> bool {
197 238 self.len() == 0
198 239 }
199 240
200 241 /// Return the index entry corresponding to the given revision if it
201 242 /// exists.
202 243 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
203 244 if rev == NULL_REVISION {
204 245 return None;
205 246 }
206 247 Some(if let Some(offsets) = &self.offsets {
207 248 self.get_entry_inline(rev, offsets)
208 249 } else {
209 250 self.get_entry_separated(rev)
210 251 })
211 252 }
212 253
213 254 fn get_entry_inline(
214 255 &self,
215 256 rev: Revision,
216 257 offsets: &[usize],
217 258 ) -> IndexEntry {
218 259 let start = offsets[rev.0 as usize];
219 260 let end = start + INDEX_ENTRY_SIZE;
220 261 let bytes = &self.bytes[start..end];
221 262
222 263 // See IndexEntry for an explanation of this override.
223 264 let offset_override = Some(end);
224 265
225 266 IndexEntry {
226 267 bytes,
227 268 offset_override,
228 269 }
229 270 }
230 271
231 272 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
232 273 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
233 274 let end = start + INDEX_ENTRY_SIZE;
234 275 let bytes = &self.bytes[start..end];
235 276
236 277 // Override the offset of the first revision as its bytes are used
237 278 // for the index's metadata (saving space because it is always 0)
238 279 let offset_override = if rev == Revision(0) { Some(0) } else { None };
239 280
240 281 IndexEntry {
241 282 bytes,
242 283 offset_override,
243 284 }
244 285 }
245 286 }
246 287
247 288 impl super::RevlogIndex for Index {
248 289 fn len(&self) -> usize {
249 290 self.len()
250 291 }
251 292
252 293 fn node(&self, rev: Revision) -> Option<&Node> {
253 294 self.get_entry(rev).map(|entry| entry.hash())
254 295 }
255 296 }
256 297
257 298 #[derive(Debug)]
258 299 pub struct IndexEntry<'a> {
259 300 bytes: &'a [u8],
260 301 /// Allows to override the offset value of the entry.
261 302 ///
262 303 /// For interleaved index and data, the offset stored in the index
263 304 /// corresponds to the separated data offset.
264 305 /// It has to be overridden with the actual offset in the interleaved
265 306 /// index which is just after the index block.
266 307 ///
267 308 /// For separated index and data, the offset stored in the first index
268 309 /// entry is mixed with the index headers.
269 310 /// It has to be overridden with 0.
270 311 offset_override: Option<usize>,
271 312 }
272 313
273 314 impl<'a> IndexEntry<'a> {
274 315 /// Return the offset of the data.
275 316 pub fn offset(&self) -> usize {
276 317 if let Some(offset_override) = self.offset_override {
277 318 offset_override
278 319 } else {
279 320 let mut bytes = [0; 8];
280 321 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
281 322 BigEndian::read_u64(&bytes[..]) as usize
282 323 }
283 324 }
284 325
285 326 pub fn flags(&self) -> u16 {
286 327 BigEndian::read_u16(&self.bytes[6..=7])
287 328 }
288 329
289 330 /// Return the compressed length of the data.
290 331 pub fn compressed_len(&self) -> u32 {
291 332 BigEndian::read_u32(&self.bytes[8..=11])
292 333 }
293 334
294 335 /// Return the uncompressed length of the data.
295 336 pub fn uncompressed_len(&self) -> i32 {
296 337 BigEndian::read_i32(&self.bytes[12..=15])
297 338 }
298 339
299 340 /// Return the revision upon which the data has been derived.
300 341 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
301 342 // TODO Maybe return an Option when base_revision == rev?
302 343 // Requires to add rev to IndexEntry
303 344
304 345 BigEndian::read_i32(&self.bytes[16..]).into()
305 346 }
306 347
307 348 pub fn link_revision(&self) -> UncheckedRevision {
308 349 BigEndian::read_i32(&self.bytes[20..]).into()
309 350 }
310 351
311 352 pub fn p1(&self) -> UncheckedRevision {
312 353 BigEndian::read_i32(&self.bytes[24..]).into()
313 354 }
314 355
315 356 pub fn p2(&self) -> UncheckedRevision {
316 357 BigEndian::read_i32(&self.bytes[28..]).into()
317 358 }
318 359
319 360 /// Return the hash of revision's full text.
320 361 ///
321 362 /// Currently, SHA-1 is used and only the first 20 bytes of this field
322 363 /// are used.
323 364 pub fn hash(&self) -> &'a Node {
324 365 (&self.bytes[32..52]).try_into().unwrap()
325 366 }
326 367 }
327 368
328 369 #[cfg(test)]
329 370 mod tests {
330 371 use super::*;
331 372 use crate::node::NULL_NODE;
332 373
333 374 #[cfg(test)]
334 375 #[derive(Debug, Copy, Clone)]
335 376 pub struct IndexEntryBuilder {
336 377 is_first: bool,
337 378 is_inline: bool,
338 379 is_general_delta: bool,
339 380 version: u16,
340 381 offset: usize,
341 382 compressed_len: usize,
342 383 uncompressed_len: usize,
343 384 base_revision_or_base_of_delta_chain: Revision,
344 385 link_revision: Revision,
345 386 p1: Revision,
346 387 p2: Revision,
347 388 node: Node,
348 389 }
349 390
350 391 #[cfg(test)]
351 392 impl IndexEntryBuilder {
352 393 #[allow(clippy::new_without_default)]
353 394 pub fn new() -> Self {
354 395 Self {
355 396 is_first: false,
356 397 is_inline: false,
357 398 is_general_delta: true,
358 399 version: 1,
359 400 offset: 0,
360 401 compressed_len: 0,
361 402 uncompressed_len: 0,
362 403 base_revision_or_base_of_delta_chain: Revision(0),
363 404 link_revision: Revision(0),
364 405 p1: NULL_REVISION,
365 406 p2: NULL_REVISION,
366 407 node: NULL_NODE,
367 408 }
368 409 }
369 410
370 411 pub fn is_first(&mut self, value: bool) -> &mut Self {
371 412 self.is_first = value;
372 413 self
373 414 }
374 415
375 416 pub fn with_inline(&mut self, value: bool) -> &mut Self {
376 417 self.is_inline = value;
377 418 self
378 419 }
379 420
380 421 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
381 422 self.is_general_delta = value;
382 423 self
383 424 }
384 425
385 426 pub fn with_version(&mut self, value: u16) -> &mut Self {
386 427 self.version = value;
387 428 self
388 429 }
389 430
390 431 pub fn with_offset(&mut self, value: usize) -> &mut Self {
391 432 self.offset = value;
392 433 self
393 434 }
394 435
395 436 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
396 437 self.compressed_len = value;
397 438 self
398 439 }
399 440
400 441 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
401 442 self.uncompressed_len = value;
402 443 self
403 444 }
404 445
405 446 pub fn with_base_revision_or_base_of_delta_chain(
406 447 &mut self,
407 448 value: Revision,
408 449 ) -> &mut Self {
409 450 self.base_revision_or_base_of_delta_chain = value;
410 451 self
411 452 }
412 453
413 454 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
414 455 self.link_revision = value;
415 456 self
416 457 }
417 458
418 459 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
419 460 self.p1 = value;
420 461 self
421 462 }
422 463
423 464 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
424 465 self.p2 = value;
425 466 self
426 467 }
427 468
428 469 pub fn with_node(&mut self, value: Node) -> &mut Self {
429 470 self.node = value;
430 471 self
431 472 }
432 473
433 474 pub fn build(&self) -> Vec<u8> {
434 475 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
435 476 if self.is_first {
436 477 bytes.extend(&match (self.is_general_delta, self.is_inline) {
437 478 (false, false) => [0u8, 0],
438 479 (false, true) => [0u8, 1],
439 480 (true, false) => [0u8, 2],
440 481 (true, true) => [0u8, 3],
441 482 });
442 483 bytes.extend(&self.version.to_be_bytes());
443 484 // Remaining offset bytes.
444 485 bytes.extend(&[0u8; 2]);
445 486 } else {
446 487 // Offset stored on 48 bits (6 bytes)
447 488 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
448 489 }
449 490 bytes.extend(&[0u8; 2]); // Revision flags.
450 491 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
451 492 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
452 493 bytes.extend(
453 494 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
454 495 );
455 496 bytes.extend(&self.link_revision.0.to_be_bytes());
456 497 bytes.extend(&self.p1.0.to_be_bytes());
457 498 bytes.extend(&self.p2.0.to_be_bytes());
458 499 bytes.extend(self.node.as_bytes());
459 500 bytes.extend(vec![0u8; 12]);
460 501 bytes
461 502 }
462 503 }
463 504
464 505 pub fn is_inline(index_bytes: &[u8]) -> bool {
465 506 IndexHeader::parse(index_bytes)
466 507 .expect("too short")
467 508 .format_flags()
468 509 .is_inline()
469 510 }
470 511
471 512 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
472 513 IndexHeader::parse(index_bytes)
473 514 .expect("too short")
474 515 .format_flags()
475 516 .uses_generaldelta()
476 517 }
477 518
478 519 pub fn get_version(index_bytes: &[u8]) -> u16 {
479 520 IndexHeader::parse(index_bytes)
480 521 .expect("too short")
481 522 .format_version()
482 523 }
483 524
484 525 #[test]
485 526 fn flags_when_no_inline_flag_test() {
486 527 let bytes = IndexEntryBuilder::new()
487 528 .is_first(true)
488 529 .with_general_delta(false)
489 530 .with_inline(false)
490 531 .build();
491 532
492 533 assert!(!is_inline(&bytes));
493 534 assert!(!uses_generaldelta(&bytes));
494 535 }
495 536
496 537 #[test]
497 538 fn flags_when_inline_flag_test() {
498 539 let bytes = IndexEntryBuilder::new()
499 540 .is_first(true)
500 541 .with_general_delta(false)
501 542 .with_inline(true)
502 543 .build();
503 544
504 545 assert!(is_inline(&bytes));
505 546 assert!(!uses_generaldelta(&bytes));
506 547 }
507 548
508 549 #[test]
509 550 fn flags_when_inline_and_generaldelta_flags_test() {
510 551 let bytes = IndexEntryBuilder::new()
511 552 .is_first(true)
512 553 .with_general_delta(true)
513 554 .with_inline(true)
514 555 .build();
515 556
516 557 assert!(is_inline(&bytes));
517 558 assert!(uses_generaldelta(&bytes));
518 559 }
519 560
520 561 #[test]
521 562 fn test_offset() {
522 563 let bytes = IndexEntryBuilder::new().with_offset(1).build();
523 564 let entry = IndexEntry {
524 565 bytes: &bytes,
525 566 offset_override: None,
526 567 };
527 568
528 569 assert_eq!(entry.offset(), 1)
529 570 }
530 571
531 572 #[test]
532 573 fn test_with_overridden_offset() {
533 574 let bytes = IndexEntryBuilder::new().with_offset(1).build();
534 575 let entry = IndexEntry {
535 576 bytes: &bytes,
536 577 offset_override: Some(2),
537 578 };
538 579
539 580 assert_eq!(entry.offset(), 2)
540 581 }
541 582
542 583 #[test]
543 584 fn test_compressed_len() {
544 585 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
545 586 let entry = IndexEntry {
546 587 bytes: &bytes,
547 588 offset_override: None,
548 589 };
549 590
550 591 assert_eq!(entry.compressed_len(), 1)
551 592 }
552 593
553 594 #[test]
554 595 fn test_uncompressed_len() {
555 596 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
556 597 let entry = IndexEntry {
557 598 bytes: &bytes,
558 599 offset_override: None,
559 600 };
560 601
561 602 assert_eq!(entry.uncompressed_len(), 1)
562 603 }
563 604
564 605 #[test]
565 606 fn test_base_revision_or_base_of_delta_chain() {
566 607 let bytes = IndexEntryBuilder::new()
567 608 .with_base_revision_or_base_of_delta_chain(Revision(1))
568 609 .build();
569 610 let entry = IndexEntry {
570 611 bytes: &bytes,
571 612 offset_override: None,
572 613 };
573 614
574 615 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
575 616 }
576 617
577 618 #[test]
578 619 fn link_revision_test() {
579 620 let bytes = IndexEntryBuilder::new()
580 621 .with_link_revision(Revision(123))
581 622 .build();
582 623
583 624 let entry = IndexEntry {
584 625 bytes: &bytes,
585 626 offset_override: None,
586 627 };
587 628
588 629 assert_eq!(entry.link_revision(), 123.into());
589 630 }
590 631
591 632 #[test]
592 633 fn p1_test() {
593 634 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
594 635
595 636 let entry = IndexEntry {
596 637 bytes: &bytes,
597 638 offset_override: None,
598 639 };
599 640
600 641 assert_eq!(entry.p1(), 123.into());
601 642 }
602 643
603 644 #[test]
604 645 fn p2_test() {
605 646 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
606 647
607 648 let entry = IndexEntry {
608 649 bytes: &bytes,
609 650 offset_override: None,
610 651 };
611 652
612 653 assert_eq!(entry.p2(), 123.into());
613 654 }
614 655
615 656 #[test]
616 657 fn node_test() {
617 658 let node = Node::from_hex("0123456789012345678901234567890123456789")
618 659 .unwrap();
619 660 let bytes = IndexEntryBuilder::new().with_node(node).build();
620 661
621 662 let entry = IndexEntry {
622 663 bytes: &bytes,
623 664 offset_override: None,
624 665 };
625 666
626 667 assert_eq!(*entry.hash(), node);
627 668 }
628 669
629 670 #[test]
630 671 fn version_test() {
631 672 let bytes = IndexEntryBuilder::new()
632 673 .is_first(true)
633 674 .with_version(2)
634 675 .build();
635 676
636 677 assert_eq!(get_version(&bytes), 2)
637 678 }
638 679 }
639 680
640 681 #[cfg(test)]
641 682 pub use tests::IndexEntryBuilder;
General Comments 0
You need to be logged in to leave comments. Login now