##// END OF EJS Templates
rust-index: add append method...
Raphaël Gomès -
r52082:b4d152a2 default
parent child Browse files
Show More
@@ -1,682 +1,775 b''
1 1 use std::fmt::Debug;
2 2 use std::ops::Deref;
3 3
4 4 use byteorder::{BigEndian, ByteOrder};
5 use bytes_cast::{unaligned, BytesCast};
5 6
7 use super::REVIDX_KNOWN_FLAGS;
6 8 use crate::errors::HgError;
9 use crate::node::{NODE_BYTES_LENGTH, STORED_NODE_ID_BYTES};
7 10 use crate::revlog::node::Node;
8 11 use crate::revlog::{Revision, NULL_REVISION};
9 use crate::{Graph, GraphError, RevlogIndex, UncheckedRevision};
12 use crate::{Graph, GraphError, RevlogError, RevlogIndex, UncheckedRevision};
10 13
11 14 pub const INDEX_ENTRY_SIZE: usize = 64;
15 pub const COMPRESSION_MODE_INLINE: u8 = 2;
12 16
13 17 pub struct IndexHeader {
14 18 header_bytes: [u8; 4],
15 19 }
16 20
17 21 #[derive(Copy, Clone)]
18 22 pub struct IndexHeaderFlags {
19 23 flags: u16,
20 24 }
21 25
22 26 /// Corresponds to the high bits of `_format_flags` in python
23 27 impl IndexHeaderFlags {
24 28 /// Corresponds to FLAG_INLINE_DATA in python
25 29 pub fn is_inline(self) -> bool {
26 30 self.flags & 1 != 0
27 31 }
28 32 /// Corresponds to FLAG_GENERALDELTA in python
29 33 pub fn uses_generaldelta(self) -> bool {
30 34 self.flags & 2 != 0
31 35 }
32 36 }
33 37
34 38 /// Corresponds to the INDEX_HEADER structure,
35 39 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
36 40 impl IndexHeader {
37 41 fn format_flags(&self) -> IndexHeaderFlags {
38 42 // No "unknown flags" check here, unlike in python. Maybe there should
39 43 // be.
40 44 IndexHeaderFlags {
41 45 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
42 46 }
43 47 }
44 48
45 49 /// The only revlog version currently supported by rhg.
46 50 const REVLOGV1: u16 = 1;
47 51
48 52 /// Corresponds to `_format_version` in Python.
49 53 fn format_version(&self) -> u16 {
50 54 BigEndian::read_u16(&self.header_bytes[2..4])
51 55 }
52 56
53 57 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
54 58 // We treat an empty file as a valid index with no entries.
55 59 // Here we make an arbitrary choice of what we assume the format of the
56 60 // index to be (V1, using generaldelta).
57 61 // This doesn't matter too much, since we're only doing read-only
58 62 // access. but the value corresponds to the `new_header` variable in
59 63 // `revlog.py`, `_loadindex`
60 64 header_bytes: [0, 3, 0, 1],
61 65 };
62 66
63 67 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
64 68 if index_bytes.is_empty() {
65 69 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
66 70 }
67 71 if index_bytes.len() < 4 {
68 72 return Err(HgError::corrupted(
69 73 "corrupted revlog: can't read the index format header",
70 74 ));
71 75 }
72 76 Ok(IndexHeader {
73 77 header_bytes: {
74 78 let bytes: [u8; 4] =
75 79 index_bytes[0..4].try_into().expect("impossible");
76 80 bytes
77 81 },
78 82 })
79 83 }
80 84 }
81 85
82 86 /// Abstracts the access to the index bytes since they can be spread between
83 87 /// the immutable (bytes) part and the mutable (added) part if any appends
84 88 /// happened. This makes it transparent for the callers.
85 89 struct IndexData {
86 90 /// Immutable bytes, most likely taken from disk
87 91 bytes: Box<dyn Deref<Target = [u8]> + Send>,
88 92 /// Bytes that were added after reading the index
89 93 added: Vec<u8>,
90 94 }
91 95
92 96 impl IndexData {
93 97 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
94 98 Self {
95 99 bytes,
96 100 added: vec![],
97 101 }
98 102 }
99 103
100 104 pub fn len(&self) -> usize {
101 105 self.bytes.len() + self.added.len()
102 106 }
103 107 }
104 108
105 109 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
106 110 type Output = [u8];
107 111
108 112 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
109 113 let start = index.start;
110 114 let end = index.end;
111 115 let immutable_len = self.bytes.len();
112 116 if start < immutable_len {
113 117 if end > immutable_len {
114 118 panic!("index data cannot span existing and added ranges");
115 119 }
116 120 &self.bytes[index]
117 121 } else {
118 122 &self.added[start - immutable_len..end - immutable_len]
119 123 }
120 124 }
121 125 }
122 126
127 pub struct RevisionDataParams {
128 flags: u16,
129 data_offset: u64,
130 data_compressed_length: i32,
131 data_uncompressed_length: i32,
132 data_delta_base: i32,
133 link_rev: i32,
134 parent_rev_1: i32,
135 parent_rev_2: i32,
136 node_id: [u8; NODE_BYTES_LENGTH],
137 _sidedata_offset: u64,
138 _sidedata_compressed_length: i32,
139 data_compression_mode: u8,
140 _sidedata_compression_mode: u8,
141 _rank: i32,
142 }
143
144 #[derive(BytesCast)]
145 #[repr(C)]
146 pub struct RevisionDataV1 {
147 data_offset_or_flags: unaligned::U64Be,
148 data_compressed_length: unaligned::I32Be,
149 data_uncompressed_length: unaligned::I32Be,
150 data_delta_base: unaligned::I32Be,
151 link_rev: unaligned::I32Be,
152 parent_rev_1: unaligned::I32Be,
153 parent_rev_2: unaligned::I32Be,
154 node_id: [u8; STORED_NODE_ID_BYTES],
155 }
156
157 fn _static_assert_size_of_revision_data_v1() {
158 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
159 }
160
161 impl RevisionDataParams {
162 pub fn validate(&self) -> Result<(), RevlogError> {
163 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
164 return Err(RevlogError::corrupted(format!(
165 "unknown revlog index flags: {}",
166 self.flags
167 )));
168 }
169 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
170 return Err(RevlogError::corrupted(format!(
171 "invalid data compression mode: {}",
172 self.data_compression_mode
173 )));
174 }
175 // FIXME isn't this only for v2 or changelog v2?
176 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
177 return Err(RevlogError::corrupted(format!(
178 "invalid sidedata compression mode: {}",
179 self._sidedata_compression_mode
180 )));
181 }
182 Ok(())
183 }
184
185 pub fn into_v1(self) -> RevisionDataV1 {
186 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
187 let mut node_id = [0; STORED_NODE_ID_BYTES];
188 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
189 RevisionDataV1 {
190 data_offset_or_flags: data_offset_or_flags.into(),
191 data_compressed_length: self.data_compressed_length.into(),
192 data_uncompressed_length: self.data_uncompressed_length.into(),
193 data_delta_base: self.data_delta_base.into(),
194 link_rev: self.link_rev.into(),
195 parent_rev_1: self.parent_rev_1.into(),
196 parent_rev_2: self.parent_rev_2.into(),
197 node_id,
198 }
199 }
200 }
201
123 202 /// A Revlog index
124 203 pub struct Index {
125 204 bytes: IndexData,
126 205 /// Offsets of starts of index blocks.
127 206 /// Only needed when the index is interleaved with data.
128 207 offsets: Option<Vec<usize>>,
129 208 uses_generaldelta: bool,
130 209 }
131 210
132 211 impl Debug for Index {
133 212 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134 213 f.debug_struct("Index")
135 214 .field("offsets", &self.offsets)
136 215 .field("uses_generaldelta", &self.uses_generaldelta)
137 216 .finish()
138 217 }
139 218 }
140 219
141 220 impl Graph for Index {
142 221 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
143 222 let err = || GraphError::ParentOutOfRange(rev);
144 223 match self.get_entry(rev) {
145 224 Some(entry) => {
146 225 // The C implementation checks that the parents are valid
147 226 // before returning
148 227 Ok([
149 228 self.check_revision(entry.p1()).ok_or_else(err)?,
150 229 self.check_revision(entry.p2()).ok_or_else(err)?,
151 230 ])
152 231 }
153 232 None => Ok([NULL_REVISION, NULL_REVISION]),
154 233 }
155 234 }
156 235 }
157 236
158 237 impl Index {
159 238 /// Create an index from bytes.
160 239 /// Calculate the start of each entry when is_inline is true.
161 240 pub fn new(
162 241 bytes: Box<dyn Deref<Target = [u8]> + Send>,
163 242 ) -> Result<Self, HgError> {
164 243 let header = IndexHeader::parse(bytes.as_ref())?;
165 244
166 245 if header.format_version() != IndexHeader::REVLOGV1 {
167 246 // A proper new version should have had a repo/store
168 247 // requirement.
169 248 return Err(HgError::corrupted("unsupported revlog version"));
170 249 }
171 250
172 251 // This is only correct because we know version is REVLOGV1.
173 252 // In v2 we always use generaldelta, while in v0 we never use
174 253 // generaldelta. Similar for [is_inline] (it's only used in v1).
175 254 let uses_generaldelta = header.format_flags().uses_generaldelta();
176 255
177 256 if header.format_flags().is_inline() {
178 257 let mut offset: usize = 0;
179 258 let mut offsets = Vec::new();
180 259
181 260 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
182 261 offsets.push(offset);
183 262 let end = offset + INDEX_ENTRY_SIZE;
184 263 let entry = IndexEntry {
185 264 bytes: &bytes[offset..end],
186 265 offset_override: None,
187 266 };
188 267
189 268 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
190 269 }
191 270
192 271 if offset == bytes.len() {
193 272 Ok(Self {
194 273 bytes: IndexData::new(bytes),
195 274 offsets: Some(offsets),
196 275 uses_generaldelta,
197 276 })
198 277 } else {
199 278 Err(HgError::corrupted("unexpected inline revlog length"))
200 279 }
201 280 } else {
202 281 Ok(Self {
203 282 bytes: IndexData::new(bytes),
204 283 offsets: None,
205 284 uses_generaldelta,
206 285 })
207 286 }
208 287 }
209 288
210 289 pub fn uses_generaldelta(&self) -> bool {
211 290 self.uses_generaldelta
212 291 }
213 292
214 293 /// Value of the inline flag.
215 294 pub fn is_inline(&self) -> bool {
216 295 self.offsets.is_some()
217 296 }
218 297
219 298 /// Return a slice of bytes if `revlog` is inline. Panic if not.
220 299 pub fn data(&self, start: usize, end: usize) -> &[u8] {
221 300 if !self.is_inline() {
222 301 panic!("tried to access data in the index of a revlog that is not inline");
223 302 }
224 303 &self.bytes[start..end]
225 304 }
226 305
227 306 /// Return number of entries of the revlog index.
228 307 pub fn len(&self) -> usize {
229 308 if let Some(offsets) = &self.offsets {
230 309 offsets.len()
231 310 } else {
232 311 self.bytes.len() / INDEX_ENTRY_SIZE
233 312 }
234 313 }
235 314
236 315 /// Returns `true` if the `Index` has zero `entries`.
237 316 pub fn is_empty(&self) -> bool {
238 317 self.len() == 0
239 318 }
240 319
241 320 /// Return the index entry corresponding to the given revision if it
242 321 /// exists.
243 322 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
244 323 if rev == NULL_REVISION {
245 324 return None;
246 325 }
247 326 Some(if let Some(offsets) = &self.offsets {
248 327 self.get_entry_inline(rev, offsets)
249 328 } else {
250 329 self.get_entry_separated(rev)
251 330 })
252 331 }
253 332
254 333 fn get_entry_inline(
255 334 &self,
256 335 rev: Revision,
257 336 offsets: &[usize],
258 337 ) -> IndexEntry {
259 338 let start = offsets[rev.0 as usize];
260 339 let end = start + INDEX_ENTRY_SIZE;
261 340 let bytes = &self.bytes[start..end];
262 341
263 342 // See IndexEntry for an explanation of this override.
264 343 let offset_override = Some(end);
265 344
266 345 IndexEntry {
267 346 bytes,
268 347 offset_override,
269 348 }
270 349 }
271 350
272 351 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
273 352 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
274 353 let end = start + INDEX_ENTRY_SIZE;
275 354 let bytes = &self.bytes[start..end];
276 355
277 356 // Override the offset of the first revision as its bytes are used
278 357 // for the index's metadata (saving space because it is always 0)
279 358 let offset_override = if rev == Revision(0) { Some(0) } else { None };
280 359
281 360 IndexEntry {
282 361 bytes,
283 362 offset_override,
284 363 }
285 364 }
365
366 /// TODO move this to the trait probably, along with other things
367 pub fn append(
368 &mut self,
369 revision_data: RevisionDataParams,
370 ) -> Result<(), RevlogError> {
371 revision_data.validate()?;
372 let new_offset = self.bytes.len();
373 if let Some(offsets) = self.offsets.as_mut() {
374 offsets.push(new_offset)
375 }
376 self.bytes.added.extend(revision_data.into_v1().as_bytes());
377 Ok(())
378 }
286 379 }
287 380
288 381 impl super::RevlogIndex for Index {
289 382 fn len(&self) -> usize {
290 383 self.len()
291 384 }
292 385
293 386 fn node(&self, rev: Revision) -> Option<&Node> {
294 387 self.get_entry(rev).map(|entry| entry.hash())
295 388 }
296 389 }
297 390
298 391 #[derive(Debug)]
299 392 pub struct IndexEntry<'a> {
300 393 bytes: &'a [u8],
301 394 /// Allows to override the offset value of the entry.
302 395 ///
303 396 /// For interleaved index and data, the offset stored in the index
304 397 /// corresponds to the separated data offset.
305 398 /// It has to be overridden with the actual offset in the interleaved
306 399 /// index which is just after the index block.
307 400 ///
308 401 /// For separated index and data, the offset stored in the first index
309 402 /// entry is mixed with the index headers.
310 403 /// It has to be overridden with 0.
311 404 offset_override: Option<usize>,
312 405 }
313 406
314 407 impl<'a> IndexEntry<'a> {
315 408 /// Return the offset of the data.
316 409 pub fn offset(&self) -> usize {
317 410 if let Some(offset_override) = self.offset_override {
318 411 offset_override
319 412 } else {
320 413 let mut bytes = [0; 8];
321 414 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
322 415 BigEndian::read_u64(&bytes[..]) as usize
323 416 }
324 417 }
325 418
326 419 pub fn flags(&self) -> u16 {
327 420 BigEndian::read_u16(&self.bytes[6..=7])
328 421 }
329 422
330 423 /// Return the compressed length of the data.
331 424 pub fn compressed_len(&self) -> u32 {
332 425 BigEndian::read_u32(&self.bytes[8..=11])
333 426 }
334 427
335 428 /// Return the uncompressed length of the data.
336 429 pub fn uncompressed_len(&self) -> i32 {
337 430 BigEndian::read_i32(&self.bytes[12..=15])
338 431 }
339 432
340 433 /// Return the revision upon which the data has been derived.
341 434 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
342 435 // TODO Maybe return an Option when base_revision == rev?
343 436 // Requires to add rev to IndexEntry
344 437
345 438 BigEndian::read_i32(&self.bytes[16..]).into()
346 439 }
347 440
348 441 pub fn link_revision(&self) -> UncheckedRevision {
349 442 BigEndian::read_i32(&self.bytes[20..]).into()
350 443 }
351 444
352 445 pub fn p1(&self) -> UncheckedRevision {
353 446 BigEndian::read_i32(&self.bytes[24..]).into()
354 447 }
355 448
356 449 pub fn p2(&self) -> UncheckedRevision {
357 450 BigEndian::read_i32(&self.bytes[28..]).into()
358 451 }
359 452
360 453 /// Return the hash of revision's full text.
361 454 ///
362 455 /// Currently, SHA-1 is used and only the first 20 bytes of this field
363 456 /// are used.
364 457 pub fn hash(&self) -> &'a Node {
365 458 (&self.bytes[32..52]).try_into().unwrap()
366 459 }
367 460 }
368 461
369 462 #[cfg(test)]
370 463 mod tests {
371 464 use super::*;
372 465 use crate::node::NULL_NODE;
373 466
374 467 #[cfg(test)]
375 468 #[derive(Debug, Copy, Clone)]
376 469 pub struct IndexEntryBuilder {
377 470 is_first: bool,
378 471 is_inline: bool,
379 472 is_general_delta: bool,
380 473 version: u16,
381 474 offset: usize,
382 475 compressed_len: usize,
383 476 uncompressed_len: usize,
384 477 base_revision_or_base_of_delta_chain: Revision,
385 478 link_revision: Revision,
386 479 p1: Revision,
387 480 p2: Revision,
388 481 node: Node,
389 482 }
390 483
391 484 #[cfg(test)]
392 485 impl IndexEntryBuilder {
393 486 #[allow(clippy::new_without_default)]
394 487 pub fn new() -> Self {
395 488 Self {
396 489 is_first: false,
397 490 is_inline: false,
398 491 is_general_delta: true,
399 492 version: 1,
400 493 offset: 0,
401 494 compressed_len: 0,
402 495 uncompressed_len: 0,
403 496 base_revision_or_base_of_delta_chain: Revision(0),
404 497 link_revision: Revision(0),
405 498 p1: NULL_REVISION,
406 499 p2: NULL_REVISION,
407 500 node: NULL_NODE,
408 501 }
409 502 }
410 503
411 504 pub fn is_first(&mut self, value: bool) -> &mut Self {
412 505 self.is_first = value;
413 506 self
414 507 }
415 508
416 509 pub fn with_inline(&mut self, value: bool) -> &mut Self {
417 510 self.is_inline = value;
418 511 self
419 512 }
420 513
421 514 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
422 515 self.is_general_delta = value;
423 516 self
424 517 }
425 518
426 519 pub fn with_version(&mut self, value: u16) -> &mut Self {
427 520 self.version = value;
428 521 self
429 522 }
430 523
431 524 pub fn with_offset(&mut self, value: usize) -> &mut Self {
432 525 self.offset = value;
433 526 self
434 527 }
435 528
436 529 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
437 530 self.compressed_len = value;
438 531 self
439 532 }
440 533
441 534 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
442 535 self.uncompressed_len = value;
443 536 self
444 537 }
445 538
446 539 pub fn with_base_revision_or_base_of_delta_chain(
447 540 &mut self,
448 541 value: Revision,
449 542 ) -> &mut Self {
450 543 self.base_revision_or_base_of_delta_chain = value;
451 544 self
452 545 }
453 546
454 547 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
455 548 self.link_revision = value;
456 549 self
457 550 }
458 551
459 552 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
460 553 self.p1 = value;
461 554 self
462 555 }
463 556
464 557 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
465 558 self.p2 = value;
466 559 self
467 560 }
468 561
469 562 pub fn with_node(&mut self, value: Node) -> &mut Self {
470 563 self.node = value;
471 564 self
472 565 }
473 566
474 567 pub fn build(&self) -> Vec<u8> {
475 568 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
476 569 if self.is_first {
477 570 bytes.extend(&match (self.is_general_delta, self.is_inline) {
478 571 (false, false) => [0u8, 0],
479 572 (false, true) => [0u8, 1],
480 573 (true, false) => [0u8, 2],
481 574 (true, true) => [0u8, 3],
482 575 });
483 576 bytes.extend(&self.version.to_be_bytes());
484 577 // Remaining offset bytes.
485 578 bytes.extend(&[0u8; 2]);
486 579 } else {
487 580 // Offset stored on 48 bits (6 bytes)
488 581 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
489 582 }
490 583 bytes.extend(&[0u8; 2]); // Revision flags.
491 584 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
492 585 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
493 586 bytes.extend(
494 587 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
495 588 );
496 589 bytes.extend(&self.link_revision.0.to_be_bytes());
497 590 bytes.extend(&self.p1.0.to_be_bytes());
498 591 bytes.extend(&self.p2.0.to_be_bytes());
499 592 bytes.extend(self.node.as_bytes());
500 593 bytes.extend(vec![0u8; 12]);
501 594 bytes
502 595 }
503 596 }
504 597
505 598 pub fn is_inline(index_bytes: &[u8]) -> bool {
506 599 IndexHeader::parse(index_bytes)
507 600 .expect("too short")
508 601 .format_flags()
509 602 .is_inline()
510 603 }
511 604
512 605 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
513 606 IndexHeader::parse(index_bytes)
514 607 .expect("too short")
515 608 .format_flags()
516 609 .uses_generaldelta()
517 610 }
518 611
519 612 pub fn get_version(index_bytes: &[u8]) -> u16 {
520 613 IndexHeader::parse(index_bytes)
521 614 .expect("too short")
522 615 .format_version()
523 616 }
524 617
525 618 #[test]
526 619 fn flags_when_no_inline_flag_test() {
527 620 let bytes = IndexEntryBuilder::new()
528 621 .is_first(true)
529 622 .with_general_delta(false)
530 623 .with_inline(false)
531 624 .build();
532 625
533 626 assert!(!is_inline(&bytes));
534 627 assert!(!uses_generaldelta(&bytes));
535 628 }
536 629
537 630 #[test]
538 631 fn flags_when_inline_flag_test() {
539 632 let bytes = IndexEntryBuilder::new()
540 633 .is_first(true)
541 634 .with_general_delta(false)
542 635 .with_inline(true)
543 636 .build();
544 637
545 638 assert!(is_inline(&bytes));
546 639 assert!(!uses_generaldelta(&bytes));
547 640 }
548 641
549 642 #[test]
550 643 fn flags_when_inline_and_generaldelta_flags_test() {
551 644 let bytes = IndexEntryBuilder::new()
552 645 .is_first(true)
553 646 .with_general_delta(true)
554 647 .with_inline(true)
555 648 .build();
556 649
557 650 assert!(is_inline(&bytes));
558 651 assert!(uses_generaldelta(&bytes));
559 652 }
560 653
561 654 #[test]
562 655 fn test_offset() {
563 656 let bytes = IndexEntryBuilder::new().with_offset(1).build();
564 657 let entry = IndexEntry {
565 658 bytes: &bytes,
566 659 offset_override: None,
567 660 };
568 661
569 662 assert_eq!(entry.offset(), 1)
570 663 }
571 664
572 665 #[test]
573 666 fn test_with_overridden_offset() {
574 667 let bytes = IndexEntryBuilder::new().with_offset(1).build();
575 668 let entry = IndexEntry {
576 669 bytes: &bytes,
577 670 offset_override: Some(2),
578 671 };
579 672
580 673 assert_eq!(entry.offset(), 2)
581 674 }
582 675
583 676 #[test]
584 677 fn test_compressed_len() {
585 678 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
586 679 let entry = IndexEntry {
587 680 bytes: &bytes,
588 681 offset_override: None,
589 682 };
590 683
591 684 assert_eq!(entry.compressed_len(), 1)
592 685 }
593 686
594 687 #[test]
595 688 fn test_uncompressed_len() {
596 689 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
597 690 let entry = IndexEntry {
598 691 bytes: &bytes,
599 692 offset_override: None,
600 693 };
601 694
602 695 assert_eq!(entry.uncompressed_len(), 1)
603 696 }
604 697
605 698 #[test]
606 699 fn test_base_revision_or_base_of_delta_chain() {
607 700 let bytes = IndexEntryBuilder::new()
608 701 .with_base_revision_or_base_of_delta_chain(Revision(1))
609 702 .build();
610 703 let entry = IndexEntry {
611 704 bytes: &bytes,
612 705 offset_override: None,
613 706 };
614 707
615 708 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
616 709 }
617 710
618 711 #[test]
619 712 fn link_revision_test() {
620 713 let bytes = IndexEntryBuilder::new()
621 714 .with_link_revision(Revision(123))
622 715 .build();
623 716
624 717 let entry = IndexEntry {
625 718 bytes: &bytes,
626 719 offset_override: None,
627 720 };
628 721
629 722 assert_eq!(entry.link_revision(), 123.into());
630 723 }
631 724
632 725 #[test]
633 726 fn p1_test() {
634 727 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
635 728
636 729 let entry = IndexEntry {
637 730 bytes: &bytes,
638 731 offset_override: None,
639 732 };
640 733
641 734 assert_eq!(entry.p1(), 123.into());
642 735 }
643 736
644 737 #[test]
645 738 fn p2_test() {
646 739 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
647 740
648 741 let entry = IndexEntry {
649 742 bytes: &bytes,
650 743 offset_override: None,
651 744 };
652 745
653 746 assert_eq!(entry.p2(), 123.into());
654 747 }
655 748
656 749 #[test]
657 750 fn node_test() {
658 751 let node = Node::from_hex("0123456789012345678901234567890123456789")
659 752 .unwrap();
660 753 let bytes = IndexEntryBuilder::new().with_node(node).build();
661 754
662 755 let entry = IndexEntry {
663 756 bytes: &bytes,
664 757 offset_override: None,
665 758 };
666 759
667 760 assert_eq!(*entry.hash(), node);
668 761 }
669 762
670 763 #[test]
671 764 fn version_test() {
672 765 let bytes = IndexEntryBuilder::new()
673 766 .is_first(true)
674 767 .with_version(2)
675 768 .build();
676 769
677 770 assert_eq!(get_version(&bytes), 2)
678 771 }
679 772 }
680 773
681 774 #[cfg(test)]
682 775 pub use tests::IndexEntryBuilder;
@@ -1,429 +1,433 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use crate::errors::HgError;
12 12 use bytes_cast::BytesCast;
13 13 use std::fmt;
14 14
15 15 /// The length in bytes of a `Node`
16 16 ///
17 17 /// This constant is meant to ease refactors of this module, and
18 18 /// are private so that calling code does not expect all nodes have
19 19 /// the same size, should we support several formats concurrently in
20 20 /// the future.
21 21 pub const NODE_BYTES_LENGTH: usize = 20;
22 22
23 /// The length in bytes set aside on disk for a `Node`. Revlog up to v1 only
24 /// use 20 out of those 32.
25 pub const STORED_NODE_ID_BYTES: usize = 32;
26
23 27 /// Id of the null node.
24 28 ///
25 29 /// Used to indicate the absence of node.
26 30 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
27 31
28 32 /// The length in bytes of a `Node`
29 33 ///
30 34 /// see also `NODES_BYTES_LENGTH` about it being private.
31 35 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
32 36
33 37 /// Default for UI presentation
34 38 const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
35 39
36 40 /// Private alias for readability and to ease future change
37 41 type NodeData = [u8; NODE_BYTES_LENGTH];
38 42
39 43 /// Binary revision SHA
40 44 ///
41 45 /// ## Future changes of hash size
42 46 ///
43 47 /// To accomodate future changes of hash size, Rust callers
44 48 /// should use the conversion methods at the boundaries (FFI, actual
45 49 /// computation of hashes and I/O) only, and only if required.
46 50 ///
47 51 /// All other callers outside of unit tests should just handle `Node` values
48 52 /// and never make any assumption on the actual length, using [`nybbles_len`]
49 53 /// if they need a loop boundary.
50 54 ///
51 55 /// All methods that create a `Node` either take a type that enforces
52 56 /// the size or return an error at runtime.
53 57 ///
54 58 /// [`nybbles_len`]: #method.nybbles_len
55 59 #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
56 60 #[repr(transparent)]
57 61 pub struct Node {
58 62 data: NodeData,
59 63 }
60 64
61 65 impl fmt::Debug for Node {
62 66 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
63 67 let n = format!("{:x?}", self.data);
64 68 // We're using debug_tuple because it makes the output a little
65 69 // more compact without losing data.
66 70 f.debug_tuple("Node").field(&n).finish()
67 71 }
68 72 }
69 73
70 74 /// The node value for NULL_REVISION
71 75 pub const NULL_NODE: Node = Node {
72 76 data: [0; NODE_BYTES_LENGTH],
73 77 };
74 78
75 79 /// Return an error if the slice has an unexpected length
76 80 impl<'a> TryFrom<&'a [u8]> for &'a Node {
77 81 type Error = ();
78 82
79 83 #[inline]
80 84 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
81 85 match Node::from_bytes(bytes) {
82 86 Ok((node, rest)) if rest.is_empty() => Ok(node),
83 87 _ => Err(()),
84 88 }
85 89 }
86 90 }
87 91
88 92 /// Return an error if the slice has an unexpected length
89 93 impl TryFrom<&'_ [u8]> for Node {
90 94 type Error = std::array::TryFromSliceError;
91 95
92 96 #[inline]
93 97 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
94 98 let data = bytes.try_into()?;
95 99 Ok(Self { data })
96 100 }
97 101 }
98 102
99 103 impl From<&'_ NodeData> for Node {
100 104 #[inline]
101 105 fn from(data: &'_ NodeData) -> Self {
102 106 Self { data: *data }
103 107 }
104 108 }
105 109
106 110 impl fmt::LowerHex for Node {
107 111 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108 112 for &byte in &self.data {
109 113 write!(f, "{:02x}", byte)?
110 114 }
111 115 Ok(())
112 116 }
113 117 }
114 118
115 119 #[derive(Debug)]
116 120 pub struct FromHexError;
117 121
118 122 /// Low level utility function, also for prefixes
119 123 fn get_nybble(s: &[u8], i: usize) -> u8 {
120 124 if i % 2 == 0 {
121 125 s[i / 2] >> 4
122 126 } else {
123 127 s[i / 2] & 0x0f
124 128 }
125 129 }
126 130
127 131 impl Node {
128 132 /// Retrieve the `i`th half-byte of the binary data.
129 133 ///
130 134 /// This is also the `i`th hexadecimal digit in numeric form,
131 135 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
132 136 pub fn get_nybble(&self, i: usize) -> u8 {
133 137 get_nybble(&self.data, i)
134 138 }
135 139
136 140 /// Length of the data, in nybbles
137 141 pub fn nybbles_len(&self) -> usize {
138 142 // public exposure as an instance method only, so that we can
139 143 // easily support several sizes of hashes if needed in the future.
140 144 NODE_NYBBLES_LENGTH
141 145 }
142 146
143 147 /// Convert from hexadecimal string representation
144 148 ///
145 149 /// Exact length is required.
146 150 ///
147 151 /// To be used in FFI and I/O only, in order to facilitate future
148 152 /// changes of hash format.
149 153 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
150 154 let prefix = NodePrefix::from_hex(hex)?;
151 155 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
152 156 Ok(Self { data: prefix.data })
153 157 } else {
154 158 Err(FromHexError)
155 159 }
156 160 }
157 161
158 162 /// `from_hex`, but for input from an internal file of the repository such
159 163 /// as a changelog or manifest entry.
160 164 ///
161 165 /// An error is treated as repository corruption.
162 166 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
163 167 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
164 168 HgError::CorruptedRepository(format!(
165 169 "Expected a full hexadecimal node ID, found {}",
166 170 String::from_utf8_lossy(hex.as_ref())
167 171 ))
168 172 })
169 173 }
170 174
171 175 /// Provide access to binary data
172 176 ///
173 177 /// This is needed by FFI layers, for instance to return expected
174 178 /// binary values to Python.
175 179 pub fn as_bytes(&self) -> &[u8] {
176 180 &self.data
177 181 }
178 182
179 183 pub fn short(&self) -> NodePrefix {
180 184 NodePrefix {
181 185 nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
182 186 data: self.data,
183 187 }
184 188 }
185 189
186 190 pub fn pad_to_256_bits(&self) -> [u8; 32] {
187 191 let mut bits = [0; 32];
188 192 bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
189 193 bits
190 194 }
191 195 }
192 196
193 197 /// The beginning of a binary revision SHA.
194 198 ///
195 199 /// Since it can potentially come from an hexadecimal representation with
196 200 /// odd length, it needs to carry around whether the last 4 bits are relevant
197 201 /// or not.
198 202 #[derive(Debug, PartialEq, Copy, Clone)]
199 203 pub struct NodePrefix {
200 204 /// In `1..=NODE_NYBBLES_LENGTH`
201 205 nybbles_len: u8,
202 206 /// The first `4 * length_in_nybbles` bits are used (considering bits
203 207 /// within a bytes in big-endian: most significant first), the rest
204 208 /// are zero.
205 209 data: NodeData,
206 210 }
207 211
208 212 impl NodePrefix {
209 213 /// Convert from hexadecimal string representation
210 214 ///
211 215 /// Similarly to `hex::decode`, can be used with Unicode string types
212 216 /// (`String`, `&str`) as well as bytes.
213 217 ///
214 218 /// To be used in FFI and I/O only, in order to facilitate future
215 219 /// changes of hash format.
216 220 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
217 221 let hex = hex.as_ref();
218 222 let len = hex.len();
219 223 if len > NODE_NYBBLES_LENGTH || len == 0 {
220 224 return Err(FromHexError);
221 225 }
222 226
223 227 let mut data = [0; NODE_BYTES_LENGTH];
224 228 let mut nybbles_len = 0;
225 229 for &ascii_byte in hex {
226 230 let nybble = match char::from(ascii_byte).to_digit(16) {
227 231 Some(digit) => digit as u8,
228 232 None => return Err(FromHexError),
229 233 };
230 234 // Fill in the upper half of a byte first, then the lower half.
231 235 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
232 236 data[nybbles_len as usize / 2] |= nybble << shift;
233 237 nybbles_len += 1;
234 238 }
235 239 Ok(Self { data, nybbles_len })
236 240 }
237 241
238 242 pub fn nybbles_len(&self) -> usize {
239 243 self.nybbles_len as _
240 244 }
241 245
242 246 pub fn is_prefix_of(&self, node: &Node) -> bool {
243 247 let full_bytes = self.nybbles_len() / 2;
244 248 if self.data[..full_bytes] != node.data[..full_bytes] {
245 249 return false;
246 250 }
247 251 if self.nybbles_len() % 2 == 0 {
248 252 return true;
249 253 }
250 254 let last = self.nybbles_len() - 1;
251 255 self.get_nybble(last) == node.get_nybble(last)
252 256 }
253 257
254 258 /// Retrieve the `i`th half-byte from the prefix.
255 259 ///
256 260 /// This is also the `i`th hexadecimal digit in numeric form,
257 261 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
258 262 pub fn get_nybble(&self, i: usize) -> u8 {
259 263 assert!(i < self.nybbles_len());
260 264 get_nybble(&self.data, i)
261 265 }
262 266
263 267 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
264 268 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
265 269 }
266 270
267 271 /// Return the index first nybble that's different from `node`
268 272 ///
269 273 /// If the return value is `None` that means that `self` is
270 274 /// a prefix of `node`, but the current method is a bit slower
271 275 /// than `is_prefix_of`.
272 276 ///
273 277 /// Returned index is as in `get_nybble`, i.e., starting at 0.
274 278 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
275 279 self.iter_nybbles()
276 280 .zip(NodePrefix::from(*node).iter_nybbles())
277 281 .position(|(a, b)| a != b)
278 282 }
279 283 }
280 284
281 285 impl fmt::LowerHex for NodePrefix {
282 286 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
283 287 let full_bytes = self.nybbles_len() / 2;
284 288 for &byte in &self.data[..full_bytes] {
285 289 write!(f, "{:02x}", byte)?
286 290 }
287 291 if self.nybbles_len() % 2 == 1 {
288 292 let last = self.nybbles_len() - 1;
289 293 write!(f, "{:x}", self.get_nybble(last))?
290 294 }
291 295 Ok(())
292 296 }
293 297 }
294 298
295 299 /// A shortcut for full `Node` references
296 300 impl From<&'_ Node> for NodePrefix {
297 301 fn from(node: &'_ Node) -> Self {
298 302 NodePrefix {
299 303 nybbles_len: node.nybbles_len() as _,
300 304 data: node.data,
301 305 }
302 306 }
303 307 }
304 308
305 309 /// A shortcut for full `Node` references
306 310 impl From<Node> for NodePrefix {
307 311 fn from(node: Node) -> Self {
308 312 NodePrefix {
309 313 nybbles_len: node.nybbles_len() as _,
310 314 data: node.data,
311 315 }
312 316 }
313 317 }
314 318
315 319 impl PartialEq<Node> for NodePrefix {
316 320 fn eq(&self, other: &Node) -> bool {
317 321 self.data == other.data && self.nybbles_len() == other.nybbles_len()
318 322 }
319 323 }
320 324
321 325 #[cfg(test)]
322 326 mod tests {
323 327 use super::*;
324 328
325 329 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
326 330 const SAMPLE_NODE: Node = Node {
327 331 data: [
328 332 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
329 333 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
330 334 ],
331 335 };
332 336
333 337 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
334 338 /// The padding is made with zeros.
335 339 pub fn hex_pad_right(hex: &str) -> String {
336 340 let mut res = hex.to_string();
337 341 while res.len() < NODE_NYBBLES_LENGTH {
338 342 res.push('0');
339 343 }
340 344 res
341 345 }
342 346
343 347 #[test]
344 348 fn test_node_from_hex() {
345 349 let not_hex = "012... oops";
346 350 let too_short = "0123";
347 351 let too_long = format!("{}0", SAMPLE_NODE_HEX);
348 352 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
349 353 assert!(Node::from_hex(not_hex).is_err());
350 354 assert!(Node::from_hex(too_short).is_err());
351 355 assert!(Node::from_hex(too_long).is_err());
352 356 }
353 357
354 358 #[test]
355 359 fn test_node_encode_hex() {
356 360 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
357 361 }
358 362
359 363 #[test]
360 364 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
361 365 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
362 366 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
363 367 assert_eq!(
364 368 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
365 369 SAMPLE_NODE_HEX
366 370 );
367 371 Ok(())
368 372 }
369 373
370 374 #[test]
371 375 fn test_prefix_from_hex_errors() {
372 376 assert!(NodePrefix::from_hex("testgr").is_err());
373 377 let mut long = format!("{:x}", NULL_NODE);
374 378 long.push('c');
375 379 assert!(NodePrefix::from_hex(&long).is_err())
376 380 }
377 381
378 382 #[test]
379 383 fn test_is_prefix_of() -> Result<(), FromHexError> {
380 384 let mut node_data = [0; NODE_BYTES_LENGTH];
381 385 node_data[0] = 0x12;
382 386 node_data[1] = 0xca;
383 387 let node = Node::from(node_data);
384 388 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
385 389 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
386 390 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
387 391 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
388 392 Ok(())
389 393 }
390 394
391 395 #[test]
392 396 fn test_get_nybble() -> Result<(), FromHexError> {
393 397 let prefix = NodePrefix::from_hex("dead6789cafe")?;
394 398 assert_eq!(prefix.get_nybble(0), 13);
395 399 assert_eq!(prefix.get_nybble(7), 9);
396 400 Ok(())
397 401 }
398 402
399 403 #[test]
400 404 fn test_first_different_nybble_even_prefix() {
401 405 let prefix = NodePrefix::from_hex("12ca").unwrap();
402 406 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
403 407 assert_eq!(prefix.first_different_nybble(&node), Some(0));
404 408 node.data[0] = 0x13;
405 409 assert_eq!(prefix.first_different_nybble(&node), Some(1));
406 410 node.data[0] = 0x12;
407 411 assert_eq!(prefix.first_different_nybble(&node), Some(2));
408 412 node.data[1] = 0xca;
409 413 // now it is a prefix
410 414 assert_eq!(prefix.first_different_nybble(&node), None);
411 415 }
412 416
413 417 #[test]
414 418 fn test_first_different_nybble_odd_prefix() {
415 419 let prefix = NodePrefix::from_hex("12c").unwrap();
416 420 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
417 421 assert_eq!(prefix.first_different_nybble(&node), Some(0));
418 422 node.data[0] = 0x13;
419 423 assert_eq!(prefix.first_different_nybble(&node), Some(1));
420 424 node.data[0] = 0x12;
421 425 assert_eq!(prefix.first_different_nybble(&node), Some(2));
422 426 node.data[1] = 0xca;
423 427 // now it is a prefix
424 428 assert_eq!(prefix.first_different_nybble(&node), None);
425 429 }
426 430 }
427 431
428 432 #[cfg(test)]
429 433 pub use tests::hex_pad_right;
General Comments 0
You need to be logged in to leave comments. Login now