##// END OF EJS Templates
rust-index: add support for `_slicechunktodensity`
Raphaël Gomès -
r52111:0112803e default
parent child Browse files
Show More
@@ -1,1173 +1,1343 b''
1 1 use std::collections::hash_map::RandomState;
2 2 use std::collections::HashSet;
3 3 use std::fmt::Debug;
4 4 use std::ops::Deref;
5 5 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
6 6
7 7 use byteorder::{BigEndian, ByteOrder};
8 8 use bytes_cast::{unaligned, BytesCast};
9 9
10 10 use super::REVIDX_KNOWN_FLAGS;
11 11 use crate::errors::HgError;
12 12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
13 13 use crate::revlog::node::Node;
14 14 use crate::revlog::{Revision, NULL_REVISION};
15 15 use crate::{
16 16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
17 17 RevlogIndex, UncheckedRevision,
18 18 };
19 19
20 20 pub const INDEX_ENTRY_SIZE: usize = 64;
21 21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
22 22
23 #[derive(Debug)]
23 24 pub struct IndexHeader {
24 25 pub(super) header_bytes: [u8; 4],
25 26 }
26 27
27 28 #[derive(Copy, Clone)]
28 29 pub struct IndexHeaderFlags {
29 30 flags: u16,
30 31 }
31 32
32 33 /// Corresponds to the high bits of `_format_flags` in python
33 34 impl IndexHeaderFlags {
34 35 /// Corresponds to FLAG_INLINE_DATA in python
35 36 pub fn is_inline(self) -> bool {
36 37 self.flags & 1 != 0
37 38 }
38 39 /// Corresponds to FLAG_GENERALDELTA in python
39 40 pub fn uses_generaldelta(self) -> bool {
40 41 self.flags & 2 != 0
41 42 }
42 43 }
43 44
44 45 /// Corresponds to the INDEX_HEADER structure,
45 46 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
46 47 impl IndexHeader {
47 48 fn format_flags(&self) -> IndexHeaderFlags {
48 49 // No "unknown flags" check here, unlike in python. Maybe there should
49 50 // be.
50 51 IndexHeaderFlags {
51 52 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
52 53 }
53 54 }
54 55
55 56 /// The only revlog version currently supported by rhg.
56 57 const REVLOGV1: u16 = 1;
57 58
58 59 /// Corresponds to `_format_version` in Python.
59 60 fn format_version(&self) -> u16 {
60 61 BigEndian::read_u16(&self.header_bytes[2..4])
61 62 }
62 63
63 64 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
64 65 if index_bytes.is_empty() {
65 66 return Ok(None);
66 67 }
67 68 if index_bytes.len() < 4 {
68 69 return Err(HgError::corrupted(
69 70 "corrupted revlog: can't read the index format header",
70 71 ));
71 72 }
72 73 Ok(Some(IndexHeader {
73 74 header_bytes: {
74 75 let bytes: [u8; 4] =
75 76 index_bytes[0..4].try_into().expect("impossible");
76 77 bytes
77 78 },
78 79 }))
79 80 }
80 81 }
81 82
82 83 /// Abstracts the access to the index bytes since they can be spread between
83 84 /// the immutable (bytes) part and the mutable (added) part if any appends
84 85 /// happened. This makes it transparent for the callers.
85 86 struct IndexData {
86 87 /// Immutable bytes, most likely taken from disk
87 88 bytes: Box<dyn Deref<Target = [u8]> + Send>,
88 89 /// Used when stripping index contents, keeps track of the start of the
89 90 /// first stripped revision, which is used to give a slice of the
90 91 /// `bytes` field.
91 92 truncation: Option<usize>,
92 93 /// Bytes that were added after reading the index
93 94 added: Vec<u8>,
94 95 }
95 96
96 97 impl IndexData {
97 98 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
98 99 Self {
99 100 bytes,
100 101 truncation: None,
101 102 added: vec![],
102 103 }
103 104 }
104 105
105 106 pub fn len(&self) -> usize {
106 107 match self.truncation {
107 108 Some(truncation) => truncation + self.added.len(),
108 109 None => self.bytes.len() + self.added.len(),
109 110 }
110 111 }
111 112
112 113 fn remove(
113 114 &mut self,
114 115 rev: Revision,
115 116 offsets: Option<&[usize]>,
116 117 ) -> Result<(), RevlogError> {
117 118 let rev = rev.0 as usize;
118 119 let truncation = if let Some(offsets) = offsets {
119 120 offsets[rev]
120 121 } else {
121 122 rev * INDEX_ENTRY_SIZE
122 123 };
123 124 if truncation < self.bytes.len() {
124 125 self.truncation = Some(truncation);
125 126 self.added.clear();
126 127 } else {
127 128 self.added.truncate(truncation - self.bytes.len());
128 129 }
129 130 Ok(())
130 131 }
131 132
132 133 fn is_new(&self) -> bool {
133 134 self.bytes.is_empty()
134 135 }
135 136 }
136 137
137 138 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
138 139 type Output = [u8];
139 140
140 141 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
141 142 let start = index.start;
142 143 let end = index.end;
143 144 let immutable_len = match self.truncation {
144 145 Some(truncation) => truncation,
145 146 None => self.bytes.len(),
146 147 };
147 148 if start < immutable_len {
148 149 if end > immutable_len {
149 150 panic!("index data cannot span existing and added ranges");
150 151 }
151 152 &self.bytes[index]
152 153 } else {
153 154 &self.added[start - immutable_len..end - immutable_len]
154 155 }
155 156 }
156 157 }
157 158
158 159 #[derive(Debug, PartialEq, Eq)]
159 160 pub struct RevisionDataParams {
160 161 pub flags: u16,
161 162 pub data_offset: u64,
162 163 pub data_compressed_length: i32,
163 164 pub data_uncompressed_length: i32,
164 165 pub data_delta_base: i32,
165 166 pub link_rev: i32,
166 167 pub parent_rev_1: i32,
167 168 pub parent_rev_2: i32,
168 169 pub node_id: [u8; NODE_BYTES_LENGTH],
169 170 pub _sidedata_offset: u64,
170 171 pub _sidedata_compressed_length: i32,
171 172 pub data_compression_mode: u8,
172 173 pub _sidedata_compression_mode: u8,
173 174 pub _rank: i32,
174 175 }
175 176
176 177 impl Default for RevisionDataParams {
177 178 fn default() -> Self {
178 179 Self {
179 180 flags: 0,
180 181 data_offset: 0,
181 182 data_compressed_length: 0,
182 183 data_uncompressed_length: 0,
183 184 data_delta_base: -1,
184 185 link_rev: -1,
185 186 parent_rev_1: -1,
186 187 parent_rev_2: -1,
187 188 node_id: [0; NODE_BYTES_LENGTH],
188 189 _sidedata_offset: 0,
189 190 _sidedata_compressed_length: 0,
190 191 data_compression_mode: COMPRESSION_MODE_INLINE,
191 192 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
192 193 _rank: -1,
193 194 }
194 195 }
195 196 }
196 197
197 198 #[derive(BytesCast)]
198 199 #[repr(C)]
199 200 pub struct RevisionDataV1 {
200 201 data_offset_or_flags: unaligned::U64Be,
201 202 data_compressed_length: unaligned::I32Be,
202 203 data_uncompressed_length: unaligned::I32Be,
203 204 data_delta_base: unaligned::I32Be,
204 205 link_rev: unaligned::I32Be,
205 206 parent_rev_1: unaligned::I32Be,
206 207 parent_rev_2: unaligned::I32Be,
207 208 node_id: [u8; STORED_NODE_ID_BYTES],
208 209 }
209 210
210 211 fn _static_assert_size_of_revision_data_v1() {
211 212 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
212 213 }
213 214
214 215 impl RevisionDataParams {
215 216 pub fn validate(&self) -> Result<(), RevlogError> {
216 217 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
217 218 return Err(RevlogError::corrupted(format!(
218 219 "unknown revlog index flags: {}",
219 220 self.flags
220 221 )));
221 222 }
222 223 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
223 224 return Err(RevlogError::corrupted(format!(
224 225 "invalid data compression mode: {}",
225 226 self.data_compression_mode
226 227 )));
227 228 }
228 229 // FIXME isn't this only for v2 or changelog v2?
229 230 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
230 231 return Err(RevlogError::corrupted(format!(
231 232 "invalid sidedata compression mode: {}",
232 233 self._sidedata_compression_mode
233 234 )));
234 235 }
235 236 Ok(())
236 237 }
237 238
238 239 pub fn into_v1(self) -> RevisionDataV1 {
239 240 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
240 241 let mut node_id = [0; STORED_NODE_ID_BYTES];
241 242 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
242 243 RevisionDataV1 {
243 244 data_offset_or_flags: data_offset_or_flags.into(),
244 245 data_compressed_length: self.data_compressed_length.into(),
245 246 data_uncompressed_length: self.data_uncompressed_length.into(),
246 247 data_delta_base: self.data_delta_base.into(),
247 248 link_rev: self.link_rev.into(),
248 249 parent_rev_1: self.parent_rev_1.into(),
249 250 parent_rev_2: self.parent_rev_2.into(),
250 251 node_id,
251 252 }
252 253 }
253 254 }
254 255
255 256 /// A Revlog index
256 257 pub struct Index {
257 258 bytes: IndexData,
258 259 /// Offsets of starts of index blocks.
259 260 /// Only needed when the index is interleaved with data.
260 261 offsets: RwLock<Option<Vec<usize>>>,
261 262 uses_generaldelta: bool,
262 263 is_inline: bool,
263 264 /// Cache of the head revisions in this index, kept in sync. Should
264 265 /// be accessed via the [`Self::head_revs`] method.
265 266 head_revs: Vec<Revision>,
266 267 /// Cache of the last filtered revisions in this index, used to make sure
267 268 /// we haven't changed filters when returning the cached `head_revs`.
268 269 filtered_revs: HashSet<Revision>,
269 270 }
270 271
271 272 impl Debug for Index {
272 273 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273 274 f.debug_struct("Index")
274 275 .field("offsets", &self.offsets)
275 276 .field("uses_generaldelta", &self.uses_generaldelta)
276 277 .finish()
277 278 }
278 279 }
279 280
280 281 impl Graph for Index {
281 282 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
282 283 let err = || GraphError::ParentOutOfRange(rev);
283 284 match self.get_entry(rev) {
284 285 Some(entry) => {
285 286 // The C implementation checks that the parents are valid
286 287 // before returning
287 288 Ok([
288 289 self.check_revision(entry.p1()).ok_or_else(err)?,
289 290 self.check_revision(entry.p2()).ok_or_else(err)?,
290 291 ])
291 292 }
292 293 None => Ok([NULL_REVISION, NULL_REVISION]),
293 294 }
294 295 }
295 296 }
296 297
297 298 /// A cache suitable for find_snapshots
298 299 ///
299 300 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
300 301 /// values sets of [`BaseRevision`]
301 302 ///
302 303 /// TODO the dubious part is insisting that errors must be RevlogError
303 304 /// we would probably need to sprinkle some magic here, such as an associated
304 305 /// type that would be Into<RevlogError> but even that would not be
305 306 /// satisfactory, as errors potentially have nothing to do with the revlog.
306 307 pub trait SnapshotsCache {
307 308 fn insert_for(
308 309 &mut self,
309 310 rev: BaseRevision,
310 311 value: BaseRevision,
311 312 ) -> Result<(), RevlogError>;
312 313 }
313 314
314 315 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
315 316 fn insert_for(
316 317 &mut self,
317 318 rev: BaseRevision,
318 319 value: BaseRevision,
319 320 ) -> Result<(), RevlogError> {
320 321 let all_values = self.entry(rev).or_insert_with(HashSet::new);
321 322 all_values.insert(value);
322 323 Ok(())
323 324 }
324 325 }
325 326
326 327 impl Index {
327 328 /// Create an index from bytes.
328 329 /// Calculate the start of each entry when is_inline is true.
329 330 pub fn new(
330 331 bytes: Box<dyn Deref<Target = [u8]> + Send>,
331 332 default_header: IndexHeader,
332 333 ) -> Result<Self, HgError> {
333 334 let header =
334 335 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
335 336
336 337 if header.format_version() != IndexHeader::REVLOGV1 {
337 338 // A proper new version should have had a repo/store
338 339 // requirement.
339 340 return Err(HgError::corrupted("unsupported revlog version"));
340 341 }
341 342
342 343 // This is only correct because we know version is REVLOGV1.
343 344 // In v2 we always use generaldelta, while in v0 we never use
344 345 // generaldelta. Similar for [is_inline] (it's only used in v1).
345 346 let uses_generaldelta = header.format_flags().uses_generaldelta();
346 347
347 348 if header.format_flags().is_inline() {
348 349 let mut offset: usize = 0;
349 350 let mut offsets = Vec::new();
350 351
351 352 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
352 353 offsets.push(offset);
353 354 let end = offset + INDEX_ENTRY_SIZE;
354 355 let entry = IndexEntry {
355 356 bytes: &bytes[offset..end],
356 357 offset_override: None,
357 358 };
358 359
359 360 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
360 361 }
361 362
362 363 if offset == bytes.len() {
363 364 Ok(Self {
364 365 bytes: IndexData::new(bytes),
365 366 offsets: RwLock::new(Some(offsets)),
366 367 uses_generaldelta,
367 368 is_inline: true,
368 369 head_revs: vec![],
369 370 filtered_revs: HashSet::new(),
370 371 })
371 372 } else {
372 373 Err(HgError::corrupted("unexpected inline revlog length"))
373 374 }
374 375 } else {
375 376 Ok(Self {
376 377 bytes: IndexData::new(bytes),
377 378 offsets: RwLock::new(None),
378 379 uses_generaldelta,
379 380 is_inline: false,
380 381 head_revs: vec![],
381 382 filtered_revs: HashSet::new(),
382 383 })
383 384 }
384 385 }
385 386
386 387 pub fn uses_generaldelta(&self) -> bool {
387 388 self.uses_generaldelta
388 389 }
389 390
390 391 /// Value of the inline flag.
391 392 pub fn is_inline(&self) -> bool {
392 393 self.is_inline
393 394 }
394 395
395 396 /// Return a slice of bytes if `revlog` is inline. Panic if not.
396 397 pub fn data(&self, start: usize, end: usize) -> &[u8] {
397 398 if !self.is_inline() {
398 399 panic!("tried to access data in the index of a revlog that is not inline");
399 400 }
400 401 &self.bytes[start..end]
401 402 }
402 403
403 404 /// Return number of entries of the revlog index.
404 405 pub fn len(&self) -> usize {
405 406 if let Some(offsets) = &*self.get_offsets() {
406 407 offsets.len()
407 408 } else {
408 409 self.bytes.len() / INDEX_ENTRY_SIZE
409 410 }
410 411 }
411 412
412 413 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
413 414 if self.is_inline() {
414 415 {
415 416 // Wrap in a block to drop the read guard
416 417 // TODO perf?
417 418 let mut offsets = self.offsets.write().unwrap();
418 419 if offsets.is_none() {
419 420 offsets.replace(inline_scan(&self.bytes.bytes).1);
420 421 }
421 422 }
422 423 }
423 424 self.offsets.read().unwrap()
424 425 }
425 426
426 427 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
427 428 let mut offsets = self.offsets.write().unwrap();
428 429 if self.is_inline() && offsets.is_none() {
429 430 offsets.replace(inline_scan(&self.bytes.bytes).1);
430 431 }
431 432 offsets
432 433 }
433 434
434 435 /// Returns `true` if the `Index` has zero `entries`.
435 436 pub fn is_empty(&self) -> bool {
436 437 self.len() == 0
437 438 }
438 439
439 440 /// Return the index entry corresponding to the given revision or `None`
440 441 /// for [`NULL_REVISION`]
441 442 ///
442 443 /// The specified revision being of the checked type, it always exists
443 444 /// if it was validated by this index.
444 445 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
445 446 if rev == NULL_REVISION {
446 447 return None;
447 448 }
448 449 Some(if let Some(offsets) = &*self.get_offsets() {
449 450 self.get_entry_inline(rev, offsets.as_ref())
450 451 } else {
451 452 self.get_entry_separated(rev)
452 453 })
453 454 }
454 455
455 456 /// Return the binary content of the index entry for the given revision
456 457 ///
457 458 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
458 459 /// returned.
459 460 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
460 461 self.get_entry(rev).map(|e| {
461 462 let bytes = e.as_bytes();
462 463 if rev.0 == 0 {
463 464 &bytes[4..]
464 465 } else {
465 466 bytes
466 467 }
467 468 })
468 469 }
469 470
470 471 pub fn entry_as_params(
471 472 &self,
472 473 rev: UncheckedRevision,
473 474 ) -> Option<RevisionDataParams> {
474 475 let rev = self.check_revision(rev)?;
475 476 self.get_entry(rev).map(|e| RevisionDataParams {
476 477 flags: e.flags(),
477 478 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
478 479 e.flags() as u64
479 480 } else {
480 481 e.raw_offset()
481 482 },
482 483 data_compressed_length: e.compressed_len().try_into().unwrap(),
483 484 data_uncompressed_length: e.uncompressed_len(),
484 485 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
485 486 link_rev: e.link_revision().0,
486 487 parent_rev_1: e.p1().0,
487 488 parent_rev_2: e.p2().0,
488 489 node_id: e.hash().as_bytes().try_into().unwrap(),
489 490 ..Default::default()
490 491 })
491 492 }
492 493
493 494 fn get_entry_inline(
494 495 &self,
495 496 rev: Revision,
496 497 offsets: &[usize],
497 498 ) -> IndexEntry {
498 499 let start = offsets[rev.0 as usize];
499 500 let end = start + INDEX_ENTRY_SIZE;
500 501 let bytes = &self.bytes[start..end];
501 502
502 503 // See IndexEntry for an explanation of this override.
503 504 let offset_override = Some(end);
504 505
505 506 IndexEntry {
506 507 bytes,
507 508 offset_override,
508 509 }
509 510 }
510 511
511 512 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
512 513 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
513 514 let end = start + INDEX_ENTRY_SIZE;
514 515 let bytes = &self.bytes[start..end];
515 516
516 517 // Override the offset of the first revision as its bytes are used
517 518 // for the index's metadata (saving space because it is always 0)
518 519 let offset_override = if rev == Revision(0) { Some(0) } else { None };
519 520
520 521 IndexEntry {
521 522 bytes,
522 523 offset_override,
523 524 }
524 525 }
525 526
527 fn null_entry(&self) -> IndexEntry {
528 IndexEntry {
529 bytes: &[0; INDEX_ENTRY_SIZE],
530 offset_override: Some(0),
531 }
532 }
533
526 534 /// Return the head revisions of this index
527 535 pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {
528 536 self.head_revs_filtered(&HashSet::new())
529 537 }
530 538
531 539 /// Return the head revisions of this index
532 540 pub fn head_revs_filtered(
533 541 &mut self,
534 542 filtered_revs: &HashSet<Revision>,
535 543 ) -> Result<Vec<Revision>, GraphError> {
536 544 if !self.head_revs.is_empty() && filtered_revs == &self.filtered_revs {
537 545 return Ok(self.head_revs.to_owned());
538 546 }
539 547 let mut revs: HashSet<Revision, RandomState> =
540 548 if filtered_revs.is_empty() {
541 549 (0..self.len())
542 550 .into_iter()
543 551 .map(|i| Revision(i as BaseRevision))
544 552 .collect()
545 553 } else {
546 554 (0..self.len())
547 555 .into_iter()
548 556 .filter_map(|i| {
549 557 let r = Revision(i as BaseRevision);
550 558 if filtered_revs.contains(&r) {
551 559 None
552 560 } else {
553 561 Some(r)
554 562 }
555 563 })
556 564 .collect()
557 565 };
558 566 dagops::retain_heads(self, &mut revs)?;
559 567 if self.is_empty() {
560 568 revs.insert(NULL_REVISION);
561 569 }
562 570 let mut as_vec: Vec<Revision> =
563 571 revs.into_iter().map(Into::into).collect();
564 572 as_vec.sort_unstable();
565 573 self.head_revs = as_vec.to_owned();
566 574 self.filtered_revs = filtered_revs.to_owned();
567 575 Ok(as_vec)
568 576 }
569 577
570 578 /// Obtain the delta chain for a revision.
571 579 ///
572 580 /// `stop_rev` specifies a revision to stop at. If not specified, we
573 581 /// stop at the base of the chain.
574 582 ///
575 583 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
576 584 /// revs in ascending order and `stopped` is a bool indicating whether
577 585 /// `stoprev` was hit.
578 586 pub fn delta_chain(
579 587 &self,
580 588 rev: Revision,
581 589 stop_rev: Option<Revision>,
582 590 ) -> Result<(Vec<Revision>, bool), HgError> {
583 591 let mut current_rev = rev;
584 592 let mut entry = self.get_entry(rev).unwrap();
585 593 let mut chain = vec![];
586 594 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
587 595 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
588 596 {
589 597 chain.push(current_rev);
590 598 let new_rev = if self.uses_generaldelta() {
591 599 entry.base_revision_or_base_of_delta_chain()
592 600 } else {
593 601 UncheckedRevision(current_rev.0 - 1)
594 602 };
595 if new_rev.0 == NULL_REVISION.0 {
596 break;
597 }
598 603 current_rev = self.check_revision(new_rev).ok_or_else(|| {
599 604 HgError::corrupted(format!("Revision {new_rev} out of range"))
600 605 })?;
606 if current_rev.0 == NULL_REVISION.0 {
607 break;
608 }
601 609 entry = self.get_entry(current_rev).unwrap()
602 610 }
603 611
604 612 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
605 613 true
606 614 } else {
607 615 chain.push(current_rev);
608 616 false
609 617 };
610 618 chain.reverse();
611 619 Ok((chain, stopped))
612 620 }
613 621
614 622 pub fn find_snapshots(
615 623 &self,
616 624 start_rev: UncheckedRevision,
617 625 end_rev: UncheckedRevision,
618 626 cache: &mut impl SnapshotsCache,
619 627 ) -> Result<(), RevlogError> {
620 628 let mut start_rev = start_rev.0;
621 629 let mut end_rev = end_rev.0;
622 630 end_rev += 1;
623 631 let len = self.len().try_into().unwrap();
624 632 if end_rev > len {
625 633 end_rev = len;
626 634 }
627 635 if start_rev < 0 {
628 636 start_rev = 0;
629 637 }
630 638 for rev in start_rev..end_rev {
631 639 if !self.is_snapshot_unchecked(Revision(rev))? {
632 640 continue;
633 641 }
634 642 let mut base = self
635 643 .get_entry(Revision(rev))
636 644 .unwrap()
637 645 .base_revision_or_base_of_delta_chain();
638 646 if base.0 == rev {
639 647 base = NULL_REVISION.into();
640 648 }
641 649 cache.insert_for(base.0, rev)?;
642 650 }
643 651 Ok(())
644 652 }
645 653
646 654 /// TODO move this to the trait probably, along with other things
647 655 pub fn append(
648 656 &mut self,
649 657 revision_data: RevisionDataParams,
650 658 ) -> Result<(), RevlogError> {
651 659 revision_data.validate()?;
652 660 let new_offset = self.bytes.len();
653 661 if let Some(offsets) = &mut *self.get_offsets_mut() {
654 662 offsets.push(new_offset)
655 663 }
656 664 self.bytes.added.extend(revision_data.into_v1().as_bytes());
657 665 self.head_revs.clear();
658 666 Ok(())
659 667 }
660 668
661 669 pub fn pack_header(&self, header: i32) -> [u8; 4] {
662 670 header.to_be_bytes()
663 671 }
664 672
665 673 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
666 674 let offsets = self.get_offsets().clone();
667 675 self.bytes.remove(rev, offsets.as_deref())?;
668 676 if let Some(offsets) = &mut *self.get_offsets_mut() {
669 677 offsets.truncate(rev.0 as usize)
670 678 }
671 679 self.head_revs.clear();
672 680 Ok(())
673 681 }
674 682
675 683 pub fn clear_caches(&mut self) {
676 684 // We need to get the 'inline' value from Python at init and use this
677 685 // instead of offsets to determine whether we're inline since we might
678 686 // clear caches. This implies re-populating the offsets on-demand.
679 687 self.offsets = RwLock::new(None);
680 688 self.head_revs.clear();
681 689 }
682 690
683 691 /// Unchecked version of `is_snapshot`.
684 692 /// Assumes the caller checked that `rev` is within a valid revision range.
685 693 pub fn is_snapshot_unchecked(
686 694 &self,
687 695 mut rev: Revision,
688 696 ) -> Result<bool, RevlogError> {
689 697 while rev.0 >= 0 {
690 698 let entry = self.get_entry(rev).unwrap();
691 699 let mut base = entry.base_revision_or_base_of_delta_chain().0;
692 700 if base == rev.0 {
693 701 base = NULL_REVISION.0;
694 702 }
695 703 if base == NULL_REVISION.0 {
696 704 return Ok(true);
697 705 }
698 706 let [mut p1, mut p2] = self
699 707 .parents(rev)
700 708 .map_err(|_| RevlogError::InvalidRevision)?;
701 709 while let Some(p1_entry) = self.get_entry(p1) {
702 710 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
703 711 break;
704 712 }
705 713 let parent_base =
706 714 p1_entry.base_revision_or_base_of_delta_chain();
707 715 if parent_base.0 == p1.0 {
708 716 break;
709 717 }
710 718 p1 = self
711 719 .check_revision(parent_base)
712 720 .ok_or(RevlogError::InvalidRevision)?;
713 721 }
714 722 while let Some(p2_entry) = self.get_entry(p2) {
715 723 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
716 724 break;
717 725 }
718 726 let parent_base =
719 727 p2_entry.base_revision_or_base_of_delta_chain();
720 728 if parent_base.0 == p2.0 {
721 729 break;
722 730 }
723 731 p2 = self
724 732 .check_revision(parent_base)
725 733 .ok_or(RevlogError::InvalidRevision)?;
726 734 }
727 735 if base == p1.0 || base == p2.0 {
728 736 return Ok(false);
729 737 }
730 738 rev = self
731 739 .check_revision(base.into())
732 740 .ok_or(RevlogError::InvalidRevision)?;
733 741 }
734 742 Ok(rev == NULL_REVISION)
735 743 }
736 744
737 745 /// Return whether the given revision is a snapshot. Returns an error if
738 746 /// `rev` is not within a valid revision range.
739 747 pub fn is_snapshot(
740 748 &self,
741 749 rev: UncheckedRevision,
742 750 ) -> Result<bool, RevlogError> {
743 751 let rev = self
744 752 .check_revision(rev)
745 753 .ok_or_else(|| RevlogError::corrupted("test"))?;
746 754 self.is_snapshot_unchecked(rev)
747 755 }
756
757 /// Slice revs to reduce the amount of unrelated data to be read from disk.
758 ///
759 /// The index is sliced into groups that should be read in one time.
760 ///
761 /// The initial chunk is sliced until the overall density
762 /// (payload/chunks-span ratio) is above `target_density`.
763 /// No gap smaller than `min_gap_size` is skipped.
764 pub fn slice_chunk_to_density(
765 &self,
766 revs: &[Revision],
767 target_density: f64,
768 min_gap_size: usize,
769 ) -> Vec<Vec<Revision>> {
770 if revs.is_empty() {
771 return vec![];
772 }
773 if revs.len() == 1 {
774 return vec![revs.to_owned()];
775 }
776 let delta_chain_span = self.segment_span(revs);
777 if delta_chain_span < min_gap_size {
778 return vec![revs.to_owned()];
779 }
780 let entries: Vec<_> = revs
781 .iter()
782 .map(|r| {
783 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
784 })
785 .collect();
786
787 let mut read_data = delta_chain_span;
788 let chain_payload: u32 =
789 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
790 let mut density = if delta_chain_span > 0 {
791 chain_payload as f64 / delta_chain_span as f64
792 } else {
793 1.0
794 };
795
796 if density >= target_density {
797 return vec![revs.to_owned()];
798 }
799
800 // Store the gaps in a heap to have them sorted by decreasing size
801 let mut gaps = Vec::new();
802 let mut previous_end = None;
803
804 for (i, (_rev, entry)) in entries.iter().enumerate() {
805 let start = entry.c_start() as usize;
806 let length = entry.compressed_len();
807
808 // Skip empty revisions to form larger holes
809 if length == 0 {
810 continue;
811 }
812
813 if let Some(end) = previous_end {
814 let gap_size = start - end;
815 // Only consider holes that are large enough
816 if gap_size > min_gap_size {
817 gaps.push((gap_size, i));
818 }
819 }
820 previous_end = Some(start + length as usize);
821 }
822 if gaps.is_empty() {
823 return vec![revs.to_owned()];
824 }
825 // sort the gaps to pop them from largest to small
826 gaps.sort_unstable();
827
828 // Collect the indices of the largest holes until
829 // the density is acceptable
830 let mut selected = vec![];
831 while let Some((gap_size, gap_id)) = gaps.pop() {
832 if density >= target_density {
833 break;
834 }
835 selected.push(gap_id);
836
837 // The gap sizes are stored as negatives to be sorted decreasingly
838 // by the heap
839 read_data -= gap_size;
840 density = if read_data > 0 {
841 chain_payload as f64 / read_data as f64
842 } else {
843 1.0
844 };
845 if density >= target_density {
846 break;
847 }
848 }
849 selected.sort_unstable();
850 selected.push(revs.len());
851
852 // Cut the revs at collected indices
853 let mut previous_idx = 0;
854 let mut chunks = vec![];
855 for idx in selected {
856 let chunk = self.trim_chunk(&entries, previous_idx, idx);
857 if !chunk.is_empty() {
858 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
859 }
860 previous_idx = idx;
861 }
862 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
863 if !chunk.is_empty() {
864 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
865 }
866
867 chunks
868 }
869
870 /// Get the byte span of a segment of sorted revisions.
871 ///
872 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
873 /// the `revs` segment.
874 ///
875 /// panics:
876 /// - if `revs` is empty or only made of `NULL_REVISION`
877 /// - if cannot retrieve entry for the last or first not null element of
878 /// `revs`.
879 fn segment_span(&self, revs: &[Revision]) -> usize {
880 if revs.is_empty() {
881 return 0;
882 }
883 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
884 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
885 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
886 let start = if (*first_rev).0 == 0 {
887 0
888 } else {
889 self.get_entry(*first_rev).unwrap().c_start()
890 };
891 (end - start) as usize
892 }
893
894 /// Returns `&revs[startidx..endidx]` without empty trailing revs
895 fn trim_chunk<'a>(
896 &'a self,
897 revs: &'a [(Revision, IndexEntry)],
898 start: usize,
899 mut end: usize,
900 ) -> &'a [(Revision, IndexEntry)] {
901 // Trim empty revs at the end, except the very first rev of a chain
902 let last_rev = revs[end - 1].0;
903 if last_rev.0 < self.len() as BaseRevision {
904 while end > 1
905 && end > start
906 && revs[end - 1].1.compressed_len() == 0
907 {
908 end -= 1
909 }
910 }
911 &revs[start..end]
912 }
748 913 }
749 914 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
750 915 let mut offset: usize = 0;
751 916 let mut offsets = Vec::new();
752 917
753 918 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
754 919 offsets.push(offset);
755 920 let end = offset + INDEX_ENTRY_SIZE;
756 921 let entry = IndexEntry {
757 922 bytes: &bytes[offset..end],
758 923 offset_override: None,
759 924 };
760 925
761 926 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
762 927 }
763 928 (offset, offsets)
764 929 }
765 930
766 931 impl super::RevlogIndex for Index {
767 932 fn len(&self) -> usize {
768 933 self.len()
769 934 }
770 935
771 936 fn node(&self, rev: Revision) -> Option<&Node> {
772 937 if rev == NULL_REVISION {
773 938 return Some(&NULL_NODE);
774 939 }
775 940 self.get_entry(rev).map(|entry| entry.hash())
776 941 }
777 942 }
778 943
779 944 #[derive(Debug)]
780 945 pub struct IndexEntry<'a> {
781 946 bytes: &'a [u8],
782 947 /// Allows to override the offset value of the entry.
783 948 ///
784 949 /// For interleaved index and data, the offset stored in the index
785 950 /// corresponds to the separated data offset.
786 951 /// It has to be overridden with the actual offset in the interleaved
787 952 /// index which is just after the index block.
788 953 ///
789 954 /// For separated index and data, the offset stored in the first index
790 955 /// entry is mixed with the index headers.
791 956 /// It has to be overridden with 0.
792 957 offset_override: Option<usize>,
793 958 }
794 959
795 960 impl<'a> IndexEntry<'a> {
796 961 /// Return the offset of the data.
797 962 pub fn offset(&self) -> usize {
798 963 if let Some(offset_override) = self.offset_override {
799 964 offset_override
800 965 } else {
801 966 let mut bytes = [0; 8];
802 967 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
803 968 BigEndian::read_u64(&bytes[..]) as usize
804 969 }
805 970 }
806 971 pub fn raw_offset(&self) -> u64 {
807 972 BigEndian::read_u64(&self.bytes[0..8])
808 973 }
809 974
975 /// Same result (except potentially for rev 0) as C `index_get_start()`
976 fn c_start(&self) -> u64 {
977 self.raw_offset() >> 16
978 }
979
810 980 pub fn flags(&self) -> u16 {
811 981 BigEndian::read_u16(&self.bytes[6..=7])
812 982 }
813 983
814 984 /// Return the compressed length of the data.
815 985 pub fn compressed_len(&self) -> u32 {
816 986 BigEndian::read_u32(&self.bytes[8..=11])
817 987 }
818 988
819 989 /// Return the uncompressed length of the data.
820 990 pub fn uncompressed_len(&self) -> i32 {
821 991 BigEndian::read_i32(&self.bytes[12..=15])
822 992 }
823 993
824 994 /// Return the revision upon which the data has been derived.
825 995 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
826 996 // TODO Maybe return an Option when base_revision == rev?
827 997 // Requires to add rev to IndexEntry
828 998
829 999 BigEndian::read_i32(&self.bytes[16..]).into()
830 1000 }
831 1001
832 1002 pub fn link_revision(&self) -> UncheckedRevision {
833 1003 BigEndian::read_i32(&self.bytes[20..]).into()
834 1004 }
835 1005
836 1006 pub fn p1(&self) -> UncheckedRevision {
837 1007 BigEndian::read_i32(&self.bytes[24..]).into()
838 1008 }
839 1009
840 1010 pub fn p2(&self) -> UncheckedRevision {
841 1011 BigEndian::read_i32(&self.bytes[28..]).into()
842 1012 }
843 1013
844 1014 /// Return the hash of revision's full text.
845 1015 ///
846 1016 /// Currently, SHA-1 is used and only the first 20 bytes of this field
847 1017 /// are used.
848 1018 pub fn hash(&self) -> &'a Node {
849 1019 (&self.bytes[32..52]).try_into().unwrap()
850 1020 }
851 1021
852 1022 pub fn as_bytes(&self) -> &'a [u8] {
853 1023 self.bytes
854 1024 }
855 1025 }
856 1026
857 1027 #[cfg(test)]
858 1028 mod tests {
859 1029 use super::*;
860 1030 use crate::node::NULL_NODE;
861 1031
862 1032 #[cfg(test)]
863 1033 #[derive(Debug, Copy, Clone)]
864 1034 pub struct IndexEntryBuilder {
865 1035 is_first: bool,
866 1036 is_inline: bool,
867 1037 is_general_delta: bool,
868 1038 version: u16,
869 1039 offset: usize,
870 1040 compressed_len: usize,
871 1041 uncompressed_len: usize,
872 1042 base_revision_or_base_of_delta_chain: Revision,
873 1043 link_revision: Revision,
874 1044 p1: Revision,
875 1045 p2: Revision,
876 1046 node: Node,
877 1047 }
878 1048
879 1049 #[cfg(test)]
880 1050 impl IndexEntryBuilder {
881 1051 #[allow(clippy::new_without_default)]
882 1052 pub fn new() -> Self {
883 1053 Self {
884 1054 is_first: false,
885 1055 is_inline: false,
886 1056 is_general_delta: true,
887 1057 version: 1,
888 1058 offset: 0,
889 1059 compressed_len: 0,
890 1060 uncompressed_len: 0,
891 1061 base_revision_or_base_of_delta_chain: Revision(0),
892 1062 link_revision: Revision(0),
893 1063 p1: NULL_REVISION,
894 1064 p2: NULL_REVISION,
895 1065 node: NULL_NODE,
896 1066 }
897 1067 }
898 1068
899 1069 pub fn is_first(&mut self, value: bool) -> &mut Self {
900 1070 self.is_first = value;
901 1071 self
902 1072 }
903 1073
904 1074 pub fn with_inline(&mut self, value: bool) -> &mut Self {
905 1075 self.is_inline = value;
906 1076 self
907 1077 }
908 1078
909 1079 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
910 1080 self.is_general_delta = value;
911 1081 self
912 1082 }
913 1083
914 1084 pub fn with_version(&mut self, value: u16) -> &mut Self {
915 1085 self.version = value;
916 1086 self
917 1087 }
918 1088
919 1089 pub fn with_offset(&mut self, value: usize) -> &mut Self {
920 1090 self.offset = value;
921 1091 self
922 1092 }
923 1093
924 1094 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
925 1095 self.compressed_len = value;
926 1096 self
927 1097 }
928 1098
929 1099 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
930 1100 self.uncompressed_len = value;
931 1101 self
932 1102 }
933 1103
934 1104 pub fn with_base_revision_or_base_of_delta_chain(
935 1105 &mut self,
936 1106 value: Revision,
937 1107 ) -> &mut Self {
938 1108 self.base_revision_or_base_of_delta_chain = value;
939 1109 self
940 1110 }
941 1111
942 1112 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
943 1113 self.link_revision = value;
944 1114 self
945 1115 }
946 1116
947 1117 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
948 1118 self.p1 = value;
949 1119 self
950 1120 }
951 1121
952 1122 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
953 1123 self.p2 = value;
954 1124 self
955 1125 }
956 1126
957 1127 pub fn with_node(&mut self, value: Node) -> &mut Self {
958 1128 self.node = value;
959 1129 self
960 1130 }
961 1131
962 1132 pub fn build(&self) -> Vec<u8> {
963 1133 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
964 1134 if self.is_first {
965 1135 bytes.extend(&match (self.is_general_delta, self.is_inline) {
966 1136 (false, false) => [0u8, 0],
967 1137 (false, true) => [0u8, 1],
968 1138 (true, false) => [0u8, 2],
969 1139 (true, true) => [0u8, 3],
970 1140 });
971 1141 bytes.extend(&self.version.to_be_bytes());
972 1142 // Remaining offset bytes.
973 1143 bytes.extend(&[0u8; 2]);
974 1144 } else {
975 1145 // Offset stored on 48 bits (6 bytes)
976 1146 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
977 1147 }
978 1148 bytes.extend(&[0u8; 2]); // Revision flags.
979 1149 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
980 1150 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
981 1151 bytes.extend(
982 1152 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
983 1153 );
984 1154 bytes.extend(&self.link_revision.0.to_be_bytes());
985 1155 bytes.extend(&self.p1.0.to_be_bytes());
986 1156 bytes.extend(&self.p2.0.to_be_bytes());
987 1157 bytes.extend(self.node.as_bytes());
988 1158 bytes.extend(vec![0u8; 12]);
989 1159 bytes
990 1160 }
991 1161 }
992 1162
993 1163 pub fn is_inline(index_bytes: &[u8]) -> bool {
994 1164 IndexHeader::parse(index_bytes)
995 1165 .expect("too short")
996 1166 .unwrap()
997 1167 .format_flags()
998 1168 .is_inline()
999 1169 }
1000 1170
1001 1171 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1002 1172 IndexHeader::parse(index_bytes)
1003 1173 .expect("too short")
1004 1174 .unwrap()
1005 1175 .format_flags()
1006 1176 .uses_generaldelta()
1007 1177 }
1008 1178
1009 1179 pub fn get_version(index_bytes: &[u8]) -> u16 {
1010 1180 IndexHeader::parse(index_bytes)
1011 1181 .expect("too short")
1012 1182 .unwrap()
1013 1183 .format_version()
1014 1184 }
1015 1185
1016 1186 #[test]
1017 1187 fn flags_when_no_inline_flag_test() {
1018 1188 let bytes = IndexEntryBuilder::new()
1019 1189 .is_first(true)
1020 1190 .with_general_delta(false)
1021 1191 .with_inline(false)
1022 1192 .build();
1023 1193
1024 1194 assert!(!is_inline(&bytes));
1025 1195 assert!(!uses_generaldelta(&bytes));
1026 1196 }
1027 1197
1028 1198 #[test]
1029 1199 fn flags_when_inline_flag_test() {
1030 1200 let bytes = IndexEntryBuilder::new()
1031 1201 .is_first(true)
1032 1202 .with_general_delta(false)
1033 1203 .with_inline(true)
1034 1204 .build();
1035 1205
1036 1206 assert!(is_inline(&bytes));
1037 1207 assert!(!uses_generaldelta(&bytes));
1038 1208 }
1039 1209
1040 1210 #[test]
1041 1211 fn flags_when_inline_and_generaldelta_flags_test() {
1042 1212 let bytes = IndexEntryBuilder::new()
1043 1213 .is_first(true)
1044 1214 .with_general_delta(true)
1045 1215 .with_inline(true)
1046 1216 .build();
1047 1217
1048 1218 assert!(is_inline(&bytes));
1049 1219 assert!(uses_generaldelta(&bytes));
1050 1220 }
1051 1221
1052 1222 #[test]
1053 1223 fn test_offset() {
1054 1224 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1055 1225 let entry = IndexEntry {
1056 1226 bytes: &bytes,
1057 1227 offset_override: None,
1058 1228 };
1059 1229
1060 1230 assert_eq!(entry.offset(), 1)
1061 1231 }
1062 1232
1063 1233 #[test]
1064 1234 fn test_with_overridden_offset() {
1065 1235 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1066 1236 let entry = IndexEntry {
1067 1237 bytes: &bytes,
1068 1238 offset_override: Some(2),
1069 1239 };
1070 1240
1071 1241 assert_eq!(entry.offset(), 2)
1072 1242 }
1073 1243
1074 1244 #[test]
1075 1245 fn test_compressed_len() {
1076 1246 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1077 1247 let entry = IndexEntry {
1078 1248 bytes: &bytes,
1079 1249 offset_override: None,
1080 1250 };
1081 1251
1082 1252 assert_eq!(entry.compressed_len(), 1)
1083 1253 }
1084 1254
1085 1255 #[test]
1086 1256 fn test_uncompressed_len() {
1087 1257 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1088 1258 let entry = IndexEntry {
1089 1259 bytes: &bytes,
1090 1260 offset_override: None,
1091 1261 };
1092 1262
1093 1263 assert_eq!(entry.uncompressed_len(), 1)
1094 1264 }
1095 1265
1096 1266 #[test]
1097 1267 fn test_base_revision_or_base_of_delta_chain() {
1098 1268 let bytes = IndexEntryBuilder::new()
1099 1269 .with_base_revision_or_base_of_delta_chain(Revision(1))
1100 1270 .build();
1101 1271 let entry = IndexEntry {
1102 1272 bytes: &bytes,
1103 1273 offset_override: None,
1104 1274 };
1105 1275
1106 1276 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
1107 1277 }
1108 1278
1109 1279 #[test]
1110 1280 fn link_revision_test() {
1111 1281 let bytes = IndexEntryBuilder::new()
1112 1282 .with_link_revision(Revision(123))
1113 1283 .build();
1114 1284
1115 1285 let entry = IndexEntry {
1116 1286 bytes: &bytes,
1117 1287 offset_override: None,
1118 1288 };
1119 1289
1120 1290 assert_eq!(entry.link_revision(), 123.into());
1121 1291 }
1122 1292
1123 1293 #[test]
1124 1294 fn p1_test() {
1125 1295 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
1126 1296
1127 1297 let entry = IndexEntry {
1128 1298 bytes: &bytes,
1129 1299 offset_override: None,
1130 1300 };
1131 1301
1132 1302 assert_eq!(entry.p1(), 123.into());
1133 1303 }
1134 1304
1135 1305 #[test]
1136 1306 fn p2_test() {
1137 1307 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
1138 1308
1139 1309 let entry = IndexEntry {
1140 1310 bytes: &bytes,
1141 1311 offset_override: None,
1142 1312 };
1143 1313
1144 1314 assert_eq!(entry.p2(), 123.into());
1145 1315 }
1146 1316
1147 1317 #[test]
1148 1318 fn node_test() {
1149 1319 let node = Node::from_hex("0123456789012345678901234567890123456789")
1150 1320 .unwrap();
1151 1321 let bytes = IndexEntryBuilder::new().with_node(node).build();
1152 1322
1153 1323 let entry = IndexEntry {
1154 1324 bytes: &bytes,
1155 1325 offset_override: None,
1156 1326 };
1157 1327
1158 1328 assert_eq!(*entry.hash(), node);
1159 1329 }
1160 1330
1161 1331 #[test]
1162 1332 fn version_test() {
1163 1333 let bytes = IndexEntryBuilder::new()
1164 1334 .is_first(true)
1165 1335 .with_version(2)
1166 1336 .build();
1167 1337
1168 1338 assert_eq!(get_version(&bytes), 2)
1169 1339 }
1170 1340 }
1171 1341
1172 1342 #[cfg(test)]
1173 1343 pub use tests::IndexEntryBuilder;
@@ -1,928 +1,970 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 conversion::rev_pyiter_collect,
11 11 utils::{node_from_py_bytes, node_from_py_object},
12 12 PyRevision,
13 13 };
14 14 use cpython::{
15 15 buffer::{Element, PyBuffer},
16 16 exc::{IndexError, ValueError},
17 17 ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
18 18 PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
19 19 PythonObject, ToPyObject,
20 20 };
21 21 use hg::{
22 22 errors::HgError,
23 23 index::{IndexHeader, RevisionDataParams, SnapshotsCache},
24 24 nodemap::{Block, NodeMapError, NodeTree},
25 25 revlog::{nodemap::NodeMap, NodePrefix, RevlogError, RevlogIndex},
26 26 BaseRevision, Revision, UncheckedRevision, NULL_REVISION,
27 27 };
28 28 use std::cell::RefCell;
29 29
30 30 /// Return a Struct implementing the Graph trait
31 31 pub(crate) fn pyindex_to_graph(
32 32 py: Python,
33 33 index: PyObject,
34 34 ) -> PyResult<cindex::Index> {
35 35 match index.extract::<MixedIndex>(py) {
36 36 Ok(midx) => Ok(midx.clone_cindex(py)),
37 37 Err(_) => cindex::Index::new(py, index),
38 38 }
39 39 }
40 40
41 41 py_class!(pub class MixedIndex |py| {
42 42 data cindex: RefCell<cindex::Index>;
43 43 data index: RefCell<hg::index::Index>;
44 44 data nt: RefCell<Option<NodeTree>>;
45 45 data docket: RefCell<Option<PyObject>>;
46 46 // Holds a reference to the mmap'ed persistent nodemap data
47 47 data nodemap_mmap: RefCell<Option<PyBuffer>>;
48 48 // Holds a reference to the mmap'ed persistent index data
49 49 data index_mmap: RefCell<Option<PyBuffer>>;
50 50
51 51 def __new__(
52 52 _cls,
53 53 cindex: PyObject,
54 54 data: PyObject,
55 55 default_header: u32,
56 56 ) -> PyResult<MixedIndex> {
57 57 Self::new(py, cindex, data, default_header)
58 58 }
59 59
60 60 /// Compatibility layer used for Python consumers needing access to the C index
61 61 ///
62 62 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
63 63 /// that may need to build a custom `nodetree`, based on a specified revset.
64 64 /// With a Rust implementation of the nodemap, we will be able to get rid of
65 65 /// this, by exposing our own standalone nodemap class,
66 66 /// ready to accept `MixedIndex`.
67 67 def get_cindex(&self) -> PyResult<PyObject> {
68 68 Ok(self.cindex(py).borrow().inner().clone_ref(py))
69 69 }
70 70
71 71 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
72 72
73 73 /// Return Revision if found, raises a bare `error.RevlogError`
74 74 /// in case of ambiguity, same as C version does
75 75 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
76 76 let opt = self.get_nodetree(py)?.borrow();
77 77 let nt = opt.as_ref().unwrap();
78 78 let idx = &*self.cindex(py).borrow();
79 79 let ridx = &*self.index(py).borrow();
80 80 let node = node_from_py_bytes(py, &node)?;
81 81 let rust_rev =
82 82 nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
83 83 let c_rev =
84 84 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))?;
85 85 assert_eq!(rust_rev, c_rev);
86 86 Ok(rust_rev.map(Into::into))
87 87
88 88 }
89 89
90 90 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
91 91 /// is not found.
92 92 ///
93 93 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
94 94 /// will catch and rewrap with it
95 95 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
96 96 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
97 97 }
98 98
99 99 /// return True if the node exist in the index
100 100 def has_node(&self, node: PyBytes) -> PyResult<bool> {
101 101 // TODO OPTIM we could avoid a needless conversion here,
102 102 // to do when scaffolding for pure Rust switch is removed,
103 103 // as `get_rev()` currently does the necessary assertions
104 104 self.get_rev(py, node).map(|opt| opt.is_some())
105 105 }
106 106
107 107 /// find length of shortest hex nodeid of a binary ID
108 108 def shortest(&self, node: PyBytes) -> PyResult<usize> {
109 109 let opt = self.get_nodetree(py)?.borrow();
110 110 let nt = opt.as_ref().unwrap();
111 111 let idx = &*self.index(py).borrow();
112 112 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
113 113 {
114 114 Ok(Some(l)) => Ok(l),
115 115 Ok(None) => Err(revlog_error(py)),
116 116 Err(e) => Err(nodemap_error(py, e)),
117 117 }
118 118 }
119 119
120 120 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
121 121 let opt = self.get_nodetree(py)?.borrow();
122 122 let nt = opt.as_ref().unwrap();
123 123 let idx = &*self.index(py).borrow();
124 124
125 125 let node_as_string = if cfg!(feature = "python3-sys") {
126 126 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
127 127 }
128 128 else {
129 129 let node = node.extract::<PyBytes>(py)?;
130 130 String::from_utf8_lossy(node.data(py)).to_string()
131 131 };
132 132
133 133 let prefix = NodePrefix::from_hex(&node_as_string)
134 134 .map_err(|_| PyErr::new::<ValueError, _>(
135 135 py, format!("Invalid node or prefix '{}'", node_as_string))
136 136 )?;
137 137
138 138 nt.find_bin(idx, prefix)
139 139 // TODO make an inner API returning the node directly
140 140 .map(|opt| opt.map(
141 141 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
142 142 .map_err(|e| nodemap_error(py, e))
143 143
144 144 }
145 145
146 146 /// append an index entry
147 147 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
148 148 if tup.len(py) < 8 {
149 149 // this is better than the panic promised by tup.get_item()
150 150 return Err(
151 151 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
152 152 }
153 153 let node_bytes = tup.get_item(py, 7).extract(py)?;
154 154 let node = node_from_py_object(py, &node_bytes)?;
155 155
156 156 let rev = self.len(py)? as BaseRevision;
157 157 let mut idx = self.cindex(py).borrow_mut();
158 158
159 159 // This is ok since we will just add the revision to the index
160 160 let rev = Revision(rev);
161 161 idx.append(py, tup.clone_ref(py))?;
162 162 self.index(py)
163 163 .borrow_mut()
164 164 .append(py_tuple_to_revision_data_params(py, tup)?)
165 165 .unwrap();
166 166 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
167 167 .insert(&*idx, &node, rev)
168 168 .map_err(|e| nodemap_error(py, e))?;
169 169 Ok(py.None())
170 170 }
171 171
172 172 def __delitem__(&self, key: PyObject) -> PyResult<()> {
173 173 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
174 174 self.cindex(py).borrow().inner().del_item(py, &key)?;
175 175 let start = key.getattr(py, "start")?;
176 176 let start = UncheckedRevision(start.extract(py)?);
177 177 let start = self.index(py)
178 178 .borrow()
179 179 .check_revision(start)
180 180 .ok_or_else(|| {
181 181 nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
182 182 })?;
183 183 self.index(py).borrow_mut().remove(start).unwrap();
184 184 let mut opt = self.get_nodetree(py)?.borrow_mut();
185 185 let nt = opt.as_mut().unwrap();
186 186 nt.invalidate_all();
187 187 self.fill_nodemap(py, nt)?;
188 188 Ok(())
189 189 }
190 190
191 191 //
192 192 // Reforwarded C index API
193 193 //
194 194
195 195 // index_methods (tp_methods). Same ordering as in revlog.c
196 196
197 197 /// return the gca set of the given revs
198 198 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
199 199 self.call_cindex(py, "ancestors", args, kw)
200 200 }
201 201
202 202 /// return the heads of the common ancestors of the given revs
203 203 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
204 204 self.call_cindex(py, "commonancestorsheads", args, kw)
205 205 }
206 206
207 207 /// Clear the index caches and inner py_class data.
208 208 /// It is Python's responsibility to call `update_nodemap_data` again.
209 209 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
210 210 self.nt(py).borrow_mut().take();
211 211 self.docket(py).borrow_mut().take();
212 212 self.nodemap_mmap(py).borrow_mut().take();
213 213 self.index(py).borrow_mut().clear_caches();
214 214 self.call_cindex(py, "clearcaches", args, kw)
215 215 }
216 216
217 217 /// return the raw binary string representing a revision
218 218 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
219 219 let rindex = self.index(py).borrow();
220 220 let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
221 221 let rust_bytes = rindex.check_revision(rev).and_then(
222 222 |r| rindex.entry_binary(r))
223 223 .ok_or_else(|| rev_not_in_index(py, rev))?;
224 224 let rust_res = PyBytes::new(py, rust_bytes).into_object();
225 225
226 226 let c_res = self.call_cindex(py, "entry_binary", args, kw)?;
227 227 assert_py_eq(py, "entry_binary", &rust_res, &c_res)?;
228 228 Ok(rust_res)
229 229 }
230 230
231 231 /// return a binary packed version of the header
232 232 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
233 233 let rindex = self.index(py).borrow();
234 234 let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
235 235 let rust_res = PyBytes::new(py, &packed).into_object();
236 236
237 237 let c_res = self.call_cindex(py, "pack_header", args, kw)?;
238 238 assert_py_eq(py, "pack_header", &rust_res, &c_res)?;
239 239 Ok(rust_res)
240 240 }
241 241
242 242 /// compute phases
243 243 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
244 244 self.call_cindex(py, "computephasesmapsets", args, kw)
245 245 }
246 246
247 247 /// reachableroots
248 248 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
249 249 self.call_cindex(py, "reachableroots2", args, kw)
250 250 }
251 251
252 252 /// get head revisions
253 253 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
254 254 let rust_res = self.inner_headrevs(py)?;
255 255
256 256 let c_res = self.call_cindex(py, "headrevs", args, kw)?;
257 257 assert_py_eq(py, "headrevs", &rust_res, &c_res)?;
258 258 Ok(rust_res)
259 259 }
260 260
261 261 /// get filtered head revisions
262 262 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
263 263 let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
264 264 let c_res = self.call_cindex(py, "headrevsfiltered", args, kw)?;
265 265
266 266 assert_py_eq(py, "headrevsfiltered", &rust_res, &c_res)?;
267 267 Ok(rust_res)
268 268 }
269 269
270 270 /// True if the object is a snapshot
271 271 def issnapshot(&self, *args, **kw) -> PyResult<bool> {
272 272 let index = self.index(py).borrow();
273 273 let result = index
274 274 .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
275 275 .map_err(|e| {
276 276 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
277 277 })?;
278 278 let cresult = self.call_cindex(py, "issnapshot", args, kw)?;
279 279 assert_eq!(result, cresult.extract(py)?);
280 280 Ok(result)
281 281 }
282 282
283 283 /// Gather snapshot data in a cache dict
284 284 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
285 285 let index = self.index(py).borrow();
286 286 let cache: PyDict = args.get_item(py, 0).extract(py)?;
287 287 // this methods operates by setting new values in the cache,
288 288 // hence we will compare results by letting the C implementation
289 289 // operate over a deepcopy of the cache, and finally compare both
290 290 // caches.
291 291 let c_cache = PyDict::new(py);
292 292 for (k, v) in cache.items(py) {
293 293 c_cache.set_item(py, k, PySet::new(py, v)?)?;
294 294 }
295 295
296 296 let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
297 297 let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
298 298 let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
299 299 index.find_snapshots(
300 300 start_rev,
301 301 end_rev,
302 302 &mut cache_wrapper,
303 303 ).map_err(|_| revlog_error(py))?;
304 304
305 305 let c_args = PyTuple::new(
306 306 py,
307 307 &[
308 308 c_cache.clone_ref(py).into_object(),
309 309 args.get_item(py, 1),
310 310 args.get_item(py, 2)
311 311 ]
312 312 );
313 313 self.call_cindex(py, "findsnapshots", &c_args, kw)?;
314 314 assert_py_eq(py, "findsnapshots cache",
315 315 &cache_wrapper.into_object(),
316 316 &c_cache.into_object())?;
317 317 Ok(py.None())
318 318 }
319 319
320 320 /// determine revisions with deltas to reconstruct fulltext
321 321 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
322 322 let index = self.index(py).borrow();
323 323 let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
324 324 let stop_rev =
325 325 args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
326 326 let rev = index.check_revision(rev).ok_or_else(|| {
327 327 nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
328 328 })?;
329 329 let stop_rev = if let Some(stop_rev) = stop_rev {
330 330 let stop_rev = UncheckedRevision(stop_rev);
331 331 Some(index.check_revision(stop_rev).ok_or_else(|| {
332 332 nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
333 333 })?)
334 334 } else {None};
335 335 let (chain, stopped) = index.delta_chain(rev, stop_rev).map_err(|e| {
336 336 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
337 337 })?;
338 338
339 339 let cresult = self.call_cindex(py, "deltachain", args, kw)?;
340 340 let cchain: Vec<BaseRevision> =
341 341 cresult.get_item(py, 0)?.extract::<Vec<BaseRevision>>(py)?;
342 342 let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
343 343 assert_eq!(chain, cchain);
344 344 assert_eq!(stopped, cresult.get_item(py, 1)?.extract(py)?);
345 345
346 346 Ok(
347 347 PyTuple::new(
348 348 py,
349 349 &[
350 350 chain.into_py_object(py).into_object(),
351 351 stopped.into_py_object(py).into_object()
352 352 ]
353 353 ).into_object()
354 354 )
355 355
356 356 }
357 357
358 358 /// slice planned chunk read to reach a density threshold
359 359 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
360 self.call_cindex(py, "slicechunktodensity", args, kw)
360 let rust_res = self.inner_slicechunktodensity(
361 py,
362 args.get_item(py, 0),
363 args.get_item(py, 1).extract(py)?,
364 args.get_item(py, 2).extract(py)?
365 )?;
366
367 let c_res = self.call_cindex(py, "slicechunktodensity", args, kw)?;
368 assert_eq!(
369 rust_res.len(),
370 c_res.len(py)?,
371 "chunks differ {:?} {}",
372 rust_res, c_res
373 );
374 for (i, chunk) in rust_res.iter().enumerate() {
375 let c_chunk = c_res.get_item(py, i)?;
376 assert_eq!(
377 chunk.len(),
378 c_chunk.len(py)?,
379 "chunk {} length differ {:?} {}",
380 i,
381 chunk,
382 c_res
383 );
384 for (j, rev) in chunk.iter().enumerate() {
385 let c_chunk: BaseRevision
386 = c_chunk.get_item(py, j)?.extract(py)?;
387 assert_eq!(c_chunk, rev.0);
388 }
389 }
390 Ok(c_res)
361 391 }
362 392
363 393 /// stats for the index
364 394 def stats(&self, *args, **kw) -> PyResult<PyObject> {
365 395 self.call_cindex(py, "stats", args, kw)
366 396 }
367 397
368 398 // index_sequence_methods and index_mapping_methods.
369 399 //
370 400 // Since we call back through the high level Python API,
371 401 // there's no point making a distinction between index_get
372 402 // and index_getitem.
373 403 // gracinet 2023: this above is no longer true for the pure Rust impl
374 404
375 405 def __len__(&self) -> PyResult<usize> {
376 406 self.len(py)
377 407 }
378 408
379 409 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
380 410 let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
381 411
382 412 // this conversion seems needless, but that's actually because
383 413 // `index_getitem` does not handle conversion from PyLong,
384 414 // which expressions such as [e for e in index] internally use.
385 415 // Note that we don't seem to have a direct way to call
386 416 // PySequence_GetItem (does the job), which would possibly be better
387 417 // for performance
388 418 // gracinet 2023: the above comment can be removed when we use
389 419 // the pure Rust impl only. Note also that `key` can be a binary
390 420 // node id.
391 421 let c_key = match key.extract::<BaseRevision>(py) {
392 422 Ok(rev) => rev.to_py_object(py).into_object(),
393 423 Err(_) => key,
394 424 };
395 425 let c_res = self.cindex(py).borrow().inner().get_item(py, c_key)?;
396 426
397 427 assert_py_eq(py, "__getitem__", &rust_res, &c_res)?;
398 428 Ok(rust_res)
399 429 }
400 430
401 431 def __contains__(&self, item: PyObject) -> PyResult<bool> {
402 432 // ObjectProtocol does not seem to provide contains(), so
403 433 // this is an equivalent implementation of the index_contains()
404 434 // defined in revlog.c
405 435 let cindex = self.cindex(py).borrow();
406 436 match item.extract::<i32>(py) {
407 437 Ok(rev) => {
408 438 Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
409 439 }
410 440 Err(_) => {
411 441 let item_bytes: PyBytes = item.extract(py)?;
412 442 let rust_res = self.has_node(py, item_bytes)?;
413 443
414 444 let c_res = cindex.inner().call_method(
415 445 py,
416 446 "has_node",
417 447 PyTuple::new(py, &[item.clone_ref(py)]),
418 448 None)?
419 449 .extract(py)?;
420 450
421 451 assert_eq!(rust_res, c_res);
422 452 Ok(rust_res)
423 453 }
424 454 }
425 455 }
426 456
427 457 def nodemap_data_all(&self) -> PyResult<PyBytes> {
428 458 self.inner_nodemap_data_all(py)
429 459 }
430 460
431 461 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
432 462 self.inner_nodemap_data_incremental(py)
433 463 }
434 464 def update_nodemap_data(
435 465 &self,
436 466 docket: PyObject,
437 467 nm_data: PyObject
438 468 ) -> PyResult<PyObject> {
439 469 self.inner_update_nodemap_data(py, docket, nm_data)
440 470 }
441 471
442 472 @property
443 473 def entry_size(&self) -> PyResult<PyInt> {
444 474 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
445 475 }
446 476
447 477 @property
448 478 def rust_ext_compat(&self) -> PyResult<PyInt> {
449 479 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
450 480 }
451 481
452 482 });
453 483
454 484 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
455 485 /// buffer along with the Rust slice into said buffer. We need to keep the
456 486 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
457 487 /// is freed from Python's side.
458 488 ///
459 489 /// # Safety
460 490 ///
461 491 /// The caller must make sure that the buffer is kept around for at least as
462 492 /// long as the slice.
463 493 #[deny(unsafe_op_in_unsafe_fn)]
464 494 unsafe fn mmap_keeparound(
465 495 py: Python,
466 496 data: PyObject,
467 497 ) -> PyResult<(
468 498 PyBuffer,
469 499 Box<dyn std::ops::Deref<Target = [u8]> + Send + 'static>,
470 500 )> {
471 501 let buf = PyBuffer::get(py, &data)?;
472 502 let len = buf.item_count();
473 503
474 504 // Build a slice from the mmap'ed buffer data
475 505 let cbuf = buf.buf_ptr();
476 506 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
477 507 && buf.is_c_contiguous()
478 508 && u8::is_compatible_format(buf.format())
479 509 {
480 510 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
481 511 } else {
482 512 return Err(PyErr::new::<ValueError, _>(
483 513 py,
484 514 "Nodemap data buffer has an invalid memory representation"
485 515 .to_string(),
486 516 ));
487 517 };
488 518
489 519 Ok((buf, Box::new(bytes)))
490 520 }
491 521
492 522 fn py_tuple_to_revision_data_params(
493 523 py: Python,
494 524 tuple: PyTuple,
495 525 ) -> PyResult<RevisionDataParams> {
496 526 if tuple.len(py) < 8 {
497 527 // this is better than the panic promised by tup.get_item()
498 528 return Err(PyErr::new::<IndexError, _>(
499 529 py,
500 530 "tuple index out of range",
501 531 ));
502 532 }
503 533 let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?;
504 534 let node_id = tuple
505 535 .get_item(py, 7)
506 536 .extract::<PyBytes>(py)?
507 537 .data(py)
508 538 .try_into()
509 539 .unwrap();
510 540 let flags = (offset_or_flags & 0xFFFF) as u16;
511 541 let data_offset = offset_or_flags >> 16;
512 542 Ok(RevisionDataParams {
513 543 flags,
514 544 data_offset,
515 545 data_compressed_length: tuple.get_item(py, 1).extract(py)?,
516 546 data_uncompressed_length: tuple.get_item(py, 2).extract(py)?,
517 547 data_delta_base: tuple.get_item(py, 3).extract(py)?,
518 548 link_rev: tuple.get_item(py, 4).extract(py)?,
519 549 parent_rev_1: tuple.get_item(py, 5).extract(py)?,
520 550 parent_rev_2: tuple.get_item(py, 6).extract(py)?,
521 551 node_id,
522 552 ..Default::default()
523 553 })
524 554 }
525 555 fn revision_data_params_to_py_tuple(
526 556 py: Python,
527 557 params: RevisionDataParams,
528 558 ) -> PyTuple {
529 559 PyTuple::new(
530 560 py,
531 561 &[
532 562 params.data_offset.into_py_object(py).into_object(),
533 563 params
534 564 .data_compressed_length
535 565 .into_py_object(py)
536 566 .into_object(),
537 567 params
538 568 .data_uncompressed_length
539 569 .into_py_object(py)
540 570 .into_object(),
541 571 params.data_delta_base.into_py_object(py).into_object(),
542 572 params.link_rev.into_py_object(py).into_object(),
543 573 params.parent_rev_1.into_py_object(py).into_object(),
544 574 params.parent_rev_2.into_py_object(py).into_object(),
545 575 PyBytes::new(py, &params.node_id)
546 576 .into_py_object(py)
547 577 .into_object(),
548 578 params._sidedata_offset.into_py_object(py).into_object(),
549 579 params
550 580 ._sidedata_compressed_length
551 581 .into_py_object(py)
552 582 .into_object(),
553 583 params
554 584 .data_compression_mode
555 585 .into_py_object(py)
556 586 .into_object(),
557 587 params
558 588 ._sidedata_compression_mode
559 589 .into_py_object(py)
560 590 .into_object(),
561 591 params._rank.into_py_object(py).into_object(),
562 592 ],
563 593 )
564 594 }
565 595
566 596 struct PySnapshotsCache<'p> {
567 597 py: Python<'p>,
568 598 dict: PyDict,
569 599 }
570 600
571 601 impl<'p> PySnapshotsCache<'p> {
572 602 fn into_object(self) -> PyObject {
573 603 self.dict.into_object()
574 604 }
575 605 }
576 606
577 607 impl<'p> SnapshotsCache for PySnapshotsCache<'p> {
578 608 fn insert_for(
579 609 &mut self,
580 610 rev: BaseRevision,
581 611 value: BaseRevision,
582 612 ) -> Result<(), RevlogError> {
583 613 let pyvalue = value.into_py_object(self.py).into_object();
584 614 match self.dict.get_item(self.py, rev) {
585 615 Some(obj) => obj
586 616 .extract::<PySet>(self.py)
587 617 .and_then(|set| set.add(self.py, pyvalue)),
588 618 None => PySet::new(self.py, vec![pyvalue])
589 619 .and_then(|set| self.dict.set_item(self.py, rev, set)),
590 620 }
591 621 .map_err(|_| {
592 622 RevlogError::Other(HgError::unsupported(
593 623 "Error in Python caches handling",
594 624 ))
595 625 })
596 626 }
597 627 }
598 628
599 629 impl MixedIndex {
600 630 fn new(
601 631 py: Python,
602 632 cindex: PyObject,
603 633 data: PyObject,
604 634 header: u32,
605 635 ) -> PyResult<MixedIndex> {
606 636 // Safety: we keep the buffer around inside the class as `index_mmap`
607 637 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
608 638
609 639 Self::create_instance(
610 640 py,
611 641 RefCell::new(cindex::Index::new(py, cindex)?),
612 642 RefCell::new(
613 643 hg::index::Index::new(
614 644 bytes,
615 645 IndexHeader::parse(&header.to_be_bytes())
616 646 .expect("default header is broken")
617 647 .unwrap(),
618 648 )
619 649 .unwrap(),
620 650 ),
621 651 RefCell::new(None),
622 652 RefCell::new(None),
623 653 RefCell::new(None),
624 654 RefCell::new(Some(buf)),
625 655 )
626 656 }
627 657
628 658 fn len(&self, py: Python) -> PyResult<usize> {
629 659 let rust_index_len = self.index(py).borrow().len();
630 660 let cindex_len = self.cindex(py).borrow().inner().len(py)?;
631 661 assert_eq!(rust_index_len, cindex_len);
632 662 Ok(cindex_len)
633 663 }
634 664
635 665 /// This is scaffolding at this point, but it could also become
636 666 /// a way to start a persistent nodemap or perform a
637 667 /// vacuum / repack operation
638 668 fn fill_nodemap(
639 669 &self,
640 670 py: Python,
641 671 nt: &mut NodeTree,
642 672 ) -> PyResult<PyObject> {
643 673 let index = self.index(py).borrow();
644 674 for r in 0..self.len(py)? {
645 675 let rev = Revision(r as BaseRevision);
646 676 // in this case node() won't ever return None
647 677 nt.insert(&*index, index.node(rev).unwrap(), rev)
648 678 .map_err(|e| nodemap_error(py, e))?
649 679 }
650 680 Ok(py.None())
651 681 }
652 682
653 683 fn get_nodetree<'a>(
654 684 &'a self,
655 685 py: Python<'a>,
656 686 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
657 687 if self.nt(py).borrow().is_none() {
658 688 let readonly = Box::<Vec<_>>::default();
659 689 let mut nt = NodeTree::load_bytes(readonly, 0);
660 690 self.fill_nodemap(py, &mut nt)?;
661 691 self.nt(py).borrow_mut().replace(nt);
662 692 }
663 693 Ok(self.nt(py))
664 694 }
665 695
666 696 /// forward a method call to the underlying C index
667 697 fn call_cindex(
668 698 &self,
669 699 py: Python,
670 700 name: &str,
671 701 args: &PyTuple,
672 702 kwargs: Option<&PyDict>,
673 703 ) -> PyResult<PyObject> {
674 704 self.cindex(py)
675 705 .borrow()
676 706 .inner()
677 707 .call_method(py, name, args, kwargs)
678 708 }
679 709
680 710 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
681 711 self.cindex(py).borrow().clone_ref(py)
682 712 }
683 713
684 714 /// Returns the full nodemap bytes to be written as-is to disk
685 715 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
686 716 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
687 717 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
688 718
689 719 // If there's anything readonly, we need to build the data again from
690 720 // scratch
691 721 let bytes = if readonly.len() > 0 {
692 722 let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
693 723 self.fill_nodemap(py, &mut nt)?;
694 724
695 725 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
696 726 assert_eq!(readonly.len(), 0);
697 727
698 728 bytes
699 729 } else {
700 730 bytes
701 731 };
702 732
703 733 let bytes = PyBytes::new(py, &bytes);
704 734 Ok(bytes)
705 735 }
706 736
707 737 /// Returns the last saved docket along with the size of any changed data
708 738 /// (in number of blocks), and said data as bytes.
709 739 fn inner_nodemap_data_incremental(
710 740 &self,
711 741 py: Python,
712 742 ) -> PyResult<PyObject> {
713 743 let docket = self.docket(py).borrow();
714 744 let docket = match docket.as_ref() {
715 745 Some(d) => d,
716 746 None => return Ok(py.None()),
717 747 };
718 748
719 749 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
720 750 let masked_blocks = node_tree.masked_readonly_blocks();
721 751 let (_, data) = node_tree.into_readonly_and_added_bytes();
722 752 let changed = masked_blocks * std::mem::size_of::<Block>();
723 753
724 754 Ok((docket, changed, PyBytes::new(py, &data))
725 755 .to_py_object(py)
726 756 .into_object())
727 757 }
728 758
729 759 /// Update the nodemap from the new (mmaped) data.
730 760 /// The docket is kept as a reference for later incremental calls.
731 761 fn inner_update_nodemap_data(
732 762 &self,
733 763 py: Python,
734 764 docket: PyObject,
735 765 nm_data: PyObject,
736 766 ) -> PyResult<PyObject> {
737 767 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
738 768 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
739 769 let len = buf.item_count();
740 770 self.nodemap_mmap(py).borrow_mut().replace(buf);
741 771
742 772 let mut nt = NodeTree::load_bytes(bytes, len);
743 773
744 774 let data_tip = docket
745 775 .getattr(py, "tip_rev")?
746 776 .extract::<BaseRevision>(py)?
747 777 .into();
748 778 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
749 779 let idx = self.index(py).borrow();
750 780 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
751 781 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
752 782 })?;
753 783 let current_tip = idx.len();
754 784
755 785 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
756 786 let rev = Revision(r);
757 787 // in this case node() won't ever return None
758 788 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
759 789 .map_err(|e| nodemap_error(py, e))?
760 790 }
761 791
762 792 *self.nt(py).borrow_mut() = Some(nt);
763 793
764 794 Ok(py.None())
765 795 }
766 796
767 797 fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
768 798 let idx = self.index(py).borrow();
769 799 Ok(match key.extract::<BaseRevision>(py) {
770 800 Ok(key_as_int) => {
771 801 let entry_params = if key_as_int == NULL_REVISION.0 {
772 802 RevisionDataParams::default()
773 803 } else {
774 804 let rev = UncheckedRevision(key_as_int);
775 805 match idx.entry_as_params(rev) {
776 806 Some(e) => e,
777 807 None => {
778 808 return Err(PyErr::new::<IndexError, _>(
779 809 py,
780 810 "revlog index out of range",
781 811 ));
782 812 }
783 813 }
784 814 };
785 815 revision_data_params_to_py_tuple(py, entry_params)
786 816 .into_object()
787 817 }
788 818 _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
789 819 || py.None(),
790 820 |py_rev| py_rev.into_py_object(py).into_object(),
791 821 ),
792 822 })
793 823 }
794 824
795 825 fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> {
796 826 let index = &mut *self.index(py).borrow_mut();
797 827 let as_vec: Vec<PyObject> = index
798 828 .head_revs()
799 829 .map_err(|e| graph_error(py, e))?
800 830 .iter()
801 831 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
802 832 .collect();
803 833 Ok(PyList::new(py, &as_vec).into_object())
804 834 }
805 835
806 836 fn inner_headrevsfiltered(
807 837 &self,
808 838 py: Python,
809 839 filtered_revs: &PyObject,
810 840 ) -> PyResult<PyObject> {
811 841 let index = &mut *self.index(py).borrow_mut();
812 842 let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?;
813 843
814 844 let as_vec: Vec<PyObject> = index
815 845 .head_revs_filtered(&filtered_revs)
816 846 .map_err(|e| graph_error(py, e))?
817 847 .iter()
818 848 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
819 849 .collect();
820 850 Ok(PyList::new(py, &as_vec).into_object())
821 851 }
852
853 fn inner_slicechunktodensity(
854 &self,
855 py: Python,
856 revs: PyObject,
857 target_density: f64,
858 min_gap_size: usize,
859 ) -> PyResult<Vec<Vec<Revision>>> {
860 let index = &mut *self.index(py).borrow_mut();
861 let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
862 Ok(index.slice_chunk_to_density(&revs, target_density, min_gap_size))
863 }
822 864 }
823 865
824 866 fn revlog_error(py: Python) -> PyErr {
825 867 match py
826 868 .import("mercurial.error")
827 869 .and_then(|m| m.get(py, "RevlogError"))
828 870 {
829 871 Err(e) => e,
830 872 Ok(cls) => PyErr::from_instance(
831 873 py,
832 874 cls.call(py, (py.None(),), None).ok().into_py_object(py),
833 875 ),
834 876 }
835 877 }
836 878
837 879 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
838 880 // ParentOutOfRange is currently the only alternative
839 881 // in `hg::GraphError`. The C index always raises this simple ValueError.
840 882 PyErr::new::<ValueError, _>(py, "parent out of range")
841 883 }
842 884
843 885 fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
844 886 PyErr::new::<ValueError, _>(
845 887 py,
846 888 format!(
847 889 "Inconsistency: Revision {} found in nodemap \
848 890 is not in revlog index",
849 891 rev
850 892 ),
851 893 )
852 894 }
853 895
854 896 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
855 897 PyErr::new::<ValueError, _>(
856 898 py,
857 899 format!("revlog index out of range: {}", rev),
858 900 )
859 901 }
860 902
861 903 /// Standard treatment of NodeMapError
862 904 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
863 905 match err {
864 906 NodeMapError::MultipleResults => revlog_error(py),
865 907 NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r),
866 908 }
867 909 }
868 910
869 911 /// assert two Python objects to be equal from a Python point of view
870 912 ///
871 913 /// `method` is a label for the assertion error message, intended to be the
872 914 /// name of the caller.
873 915 /// `normalizer` is a function that takes a Python variable name and returns
874 916 /// an expression that the conparison will actually use.
875 917 /// Foe example: `|v| format!("sorted({})", v)`
876 918 fn assert_py_eq_normalized(
877 919 py: Python,
878 920 method: &str,
879 921 rust: &PyObject,
880 922 c: &PyObject,
881 923 normalizer: impl FnOnce(&str) -> String + Copy,
882 924 ) -> PyResult<()> {
883 925 let locals = PyDict::new(py);
884 926 locals.set_item(py, "rust".into_py_object(py).into_object(), rust)?;
885 927 locals.set_item(py, "c".into_py_object(py).into_object(), c)?;
886 928 // let lhs = format!(normalizer_fmt, "rust");
887 929 // let rhs = format!(normalizer_fmt, "c");
888 930 let is_eq: PyBool = py
889 931 .eval(
890 932 &format!("{} == {}", &normalizer("rust"), &normalizer("c")),
891 933 None,
892 934 Some(&locals),
893 935 )?
894 936 .extract(py)?;
895 937 assert!(
896 938 is_eq.is_true(),
897 939 "{} results differ. Rust: {:?} C: {:?} (before any normalization)",
898 940 method,
899 941 rust,
900 942 c
901 943 );
902 944 Ok(())
903 945 }
904 946
905 947 fn assert_py_eq(
906 948 py: Python,
907 949 method: &str,
908 950 rust: &PyObject,
909 951 c: &PyObject,
910 952 ) -> PyResult<()> {
911 953 assert_py_eq_normalized(py, method, rust, c, |v| v.to_owned())
912 954 }
913 955
914 956 /// Create the module, with __package__ given from parent
915 957 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
916 958 let dotted_name = &format!("{}.revlog", package);
917 959 let m = PyModule::new(py, dotted_name)?;
918 960 m.add(py, "__package__", package)?;
919 961 m.add(py, "__doc__", "RevLog - Rust implementations")?;
920 962
921 963 m.add_class::<MixedIndex>(py)?;
922 964
923 965 let sys = PyModule::import(py, "sys")?;
924 966 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
925 967 sys_modules.set_item(py, dotted_name, &m)?;
926 968
927 969 Ok(m)
928 970 }
General Comments 0
You need to be logged in to leave comments. Login now