##// END OF EJS Templates
rust-index-cpython: cache the heads' PyList representation...
Raphaël Gomès -
r52154:9088c6d6 default
parent child Browse files
Show More
@@ -1,2009 +1,2024 b''
1 1 use std::collections::{HashMap, HashSet};
2 2 use std::fmt::Debug;
3 3 use std::ops::Deref;
4 4 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
5 5
6 6 use bitvec::prelude::*;
7 7 use byteorder::{BigEndian, ByteOrder};
8 8 use bytes_cast::{unaligned, BytesCast};
9 9
10 10 use super::REVIDX_KNOWN_FLAGS;
11 11 use crate::errors::HgError;
12 12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
13 13 use crate::revlog::node::Node;
14 14 use crate::revlog::{Revision, NULL_REVISION};
15 15 use crate::{
16 16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
17 17 RevlogIndex, UncheckedRevision,
18 18 };
19 19
20 20 pub const INDEX_ENTRY_SIZE: usize = 64;
21 21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
22 22
23 23 #[derive(Debug)]
24 24 pub struct IndexHeader {
25 25 pub(super) header_bytes: [u8; 4],
26 26 }
27 27
28 28 #[derive(Copy, Clone)]
29 29 pub struct IndexHeaderFlags {
30 30 flags: u16,
31 31 }
32 32
33 33 /// Corresponds to the high bits of `_format_flags` in python
34 34 impl IndexHeaderFlags {
35 35 /// Corresponds to FLAG_INLINE_DATA in python
36 36 pub fn is_inline(self) -> bool {
37 37 self.flags & 1 != 0
38 38 }
39 39 /// Corresponds to FLAG_GENERALDELTA in python
40 40 pub fn uses_generaldelta(self) -> bool {
41 41 self.flags & 2 != 0
42 42 }
43 43 }
44 44
45 45 /// Corresponds to the INDEX_HEADER structure,
46 46 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
47 47 impl IndexHeader {
48 48 fn format_flags(&self) -> IndexHeaderFlags {
49 49 // No "unknown flags" check here, unlike in python. Maybe there should
50 50 // be.
51 51 IndexHeaderFlags {
52 52 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
53 53 }
54 54 }
55 55
56 56 /// The only revlog version currently supported by rhg.
57 57 const REVLOGV1: u16 = 1;
58 58
59 59 /// Corresponds to `_format_version` in Python.
60 60 fn format_version(&self) -> u16 {
61 61 BigEndian::read_u16(&self.header_bytes[2..4])
62 62 }
63 63
64 64 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
65 65 if index_bytes.is_empty() {
66 66 return Ok(None);
67 67 }
68 68 if index_bytes.len() < 4 {
69 69 return Err(HgError::corrupted(
70 70 "corrupted revlog: can't read the index format header",
71 71 ));
72 72 }
73 73 Ok(Some(IndexHeader {
74 74 header_bytes: {
75 75 let bytes: [u8; 4] =
76 76 index_bytes[0..4].try_into().expect("impossible");
77 77 bytes
78 78 },
79 79 }))
80 80 }
81 81 }
82 82
83 83 /// Abstracts the access to the index bytes since they can be spread between
84 84 /// the immutable (bytes) part and the mutable (added) part if any appends
85 85 /// happened. This makes it transparent for the callers.
86 86 struct IndexData {
87 87 /// Immutable bytes, most likely taken from disk
88 88 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
89 89 /// Used when stripping index contents, keeps track of the start of the
90 90 /// first stripped revision, which is used to give a slice of the
91 91 /// `bytes` field.
92 92 truncation: Option<usize>,
93 93 /// Bytes that were added after reading the index
94 94 added: Vec<u8>,
95 95 }
96 96
97 97 impl IndexData {
98 98 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self {
99 99 Self {
100 100 bytes,
101 101 truncation: None,
102 102 added: vec![],
103 103 }
104 104 }
105 105
106 106 pub fn len(&self) -> usize {
107 107 match self.truncation {
108 108 Some(truncation) => truncation + self.added.len(),
109 109 None => self.bytes.len() + self.added.len(),
110 110 }
111 111 }
112 112
113 113 fn remove(
114 114 &mut self,
115 115 rev: Revision,
116 116 offsets: Option<&[usize]>,
117 117 ) -> Result<(), RevlogError> {
118 118 let rev = rev.0 as usize;
119 119 let truncation = if let Some(offsets) = offsets {
120 120 offsets[rev]
121 121 } else {
122 122 rev * INDEX_ENTRY_SIZE
123 123 };
124 124 if truncation < self.bytes.len() {
125 125 self.truncation = Some(truncation);
126 126 self.added.clear();
127 127 } else {
128 128 self.added.truncate(truncation - self.bytes.len());
129 129 }
130 130 Ok(())
131 131 }
132 132
133 133 fn is_new(&self) -> bool {
134 134 self.bytes.is_empty()
135 135 }
136 136 }
137 137
138 138 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
139 139 type Output = [u8];
140 140
141 141 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
142 142 let start = index.start;
143 143 let end = index.end;
144 144 let immutable_len = match self.truncation {
145 145 Some(truncation) => truncation,
146 146 None => self.bytes.len(),
147 147 };
148 148 if start < immutable_len {
149 149 if end > immutable_len {
150 150 panic!("index data cannot span existing and added ranges");
151 151 }
152 152 &self.bytes[index]
153 153 } else {
154 154 &self.added[start - immutable_len..end - immutable_len]
155 155 }
156 156 }
157 157 }
158 158
159 159 #[derive(Debug, PartialEq, Eq)]
160 160 pub struct RevisionDataParams {
161 161 pub flags: u16,
162 162 pub data_offset: u64,
163 163 pub data_compressed_length: i32,
164 164 pub data_uncompressed_length: i32,
165 165 pub data_delta_base: i32,
166 166 pub link_rev: i32,
167 167 pub parent_rev_1: i32,
168 168 pub parent_rev_2: i32,
169 169 pub node_id: [u8; NODE_BYTES_LENGTH],
170 170 pub _sidedata_offset: u64,
171 171 pub _sidedata_compressed_length: i32,
172 172 pub data_compression_mode: u8,
173 173 pub _sidedata_compression_mode: u8,
174 174 pub _rank: i32,
175 175 }
176 176
177 177 impl Default for RevisionDataParams {
178 178 fn default() -> Self {
179 179 Self {
180 180 flags: 0,
181 181 data_offset: 0,
182 182 data_compressed_length: 0,
183 183 data_uncompressed_length: 0,
184 184 data_delta_base: -1,
185 185 link_rev: -1,
186 186 parent_rev_1: -1,
187 187 parent_rev_2: -1,
188 188 node_id: [0; NODE_BYTES_LENGTH],
189 189 _sidedata_offset: 0,
190 190 _sidedata_compressed_length: 0,
191 191 data_compression_mode: COMPRESSION_MODE_INLINE,
192 192 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
193 193 _rank: -1,
194 194 }
195 195 }
196 196 }
197 197
198 198 #[derive(BytesCast)]
199 199 #[repr(C)]
200 200 pub struct RevisionDataV1 {
201 201 data_offset_or_flags: unaligned::U64Be,
202 202 data_compressed_length: unaligned::I32Be,
203 203 data_uncompressed_length: unaligned::I32Be,
204 204 data_delta_base: unaligned::I32Be,
205 205 link_rev: unaligned::I32Be,
206 206 parent_rev_1: unaligned::I32Be,
207 207 parent_rev_2: unaligned::I32Be,
208 208 node_id: [u8; STORED_NODE_ID_BYTES],
209 209 }
210 210
211 211 fn _static_assert_size_of_revision_data_v1() {
212 212 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
213 213 }
214 214
215 215 impl RevisionDataParams {
216 216 pub fn validate(&self) -> Result<(), RevlogError> {
217 217 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
218 218 return Err(RevlogError::corrupted(format!(
219 219 "unknown revlog index flags: {}",
220 220 self.flags
221 221 )));
222 222 }
223 223 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
224 224 return Err(RevlogError::corrupted(format!(
225 225 "invalid data compression mode: {}",
226 226 self.data_compression_mode
227 227 )));
228 228 }
229 229 // FIXME isn't this only for v2 or changelog v2?
230 230 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
231 231 return Err(RevlogError::corrupted(format!(
232 232 "invalid sidedata compression mode: {}",
233 233 self._sidedata_compression_mode
234 234 )));
235 235 }
236 236 Ok(())
237 237 }
238 238
239 239 pub fn into_v1(self) -> RevisionDataV1 {
240 240 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
241 241 let mut node_id = [0; STORED_NODE_ID_BYTES];
242 242 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
243 243 RevisionDataV1 {
244 244 data_offset_or_flags: data_offset_or_flags.into(),
245 245 data_compressed_length: self.data_compressed_length.into(),
246 246 data_uncompressed_length: self.data_uncompressed_length.into(),
247 247 data_delta_base: self.data_delta_base.into(),
248 248 link_rev: self.link_rev.into(),
249 249 parent_rev_1: self.parent_rev_1.into(),
250 250 parent_rev_2: self.parent_rev_2.into(),
251 251 node_id,
252 252 }
253 253 }
254 254 }
255 255
256 256 /// A Revlog index
257 257 pub struct Index {
258 258 bytes: IndexData,
259 259 /// Offsets of starts of index blocks.
260 260 /// Only needed when the index is interleaved with data.
261 261 offsets: RwLock<Option<Vec<usize>>>,
262 262 uses_generaldelta: bool,
263 263 is_inline: bool,
264 264 /// Cache of (head_revisions, filtered_revisions)
265 265 ///
266 266 /// The head revisions in this index, kept in sync. Should
267 267 /// be accessed via the [`Self::head_revs`] method.
268 268 /// The last filtered revisions in this index, used to make sure
269 269 /// we haven't changed filters when returning the cached `head_revs`.
270 270 head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
271 271 }
272 272
273 273 impl Debug for Index {
274 274 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275 275 f.debug_struct("Index")
276 276 .field("offsets", &self.offsets)
277 277 .field("uses_generaldelta", &self.uses_generaldelta)
278 278 .finish()
279 279 }
280 280 }
281 281
282 282 impl Graph for Index {
283 283 #[inline(always)]
284 284 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
285 285 let err = || GraphError::ParentOutOfRange(rev);
286 286 match self.get_entry(rev) {
287 287 Some(entry) => {
288 288 // The C implementation checks that the parents are valid
289 289 // before returning
290 290 Ok([
291 291 self.check_revision(entry.p1()).ok_or_else(err)?,
292 292 self.check_revision(entry.p2()).ok_or_else(err)?,
293 293 ])
294 294 }
295 295 None => Ok([NULL_REVISION, NULL_REVISION]),
296 296 }
297 297 }
298 298 }
299 299
300 300 /// A cache suitable for find_snapshots
301 301 ///
302 302 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
303 303 /// values sets of [`BaseRevision`]
304 304 ///
305 305 /// TODO the dubious part is insisting that errors must be RevlogError
306 306 /// we would probably need to sprinkle some magic here, such as an associated
307 307 /// type that would be Into<RevlogError> but even that would not be
308 308 /// satisfactory, as errors potentially have nothing to do with the revlog.
309 309 pub trait SnapshotsCache {
310 310 fn insert_for(
311 311 &mut self,
312 312 rev: BaseRevision,
313 313 value: BaseRevision,
314 314 ) -> Result<(), RevlogError>;
315 315 }
316 316
317 317 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
318 318 fn insert_for(
319 319 &mut self,
320 320 rev: BaseRevision,
321 321 value: BaseRevision,
322 322 ) -> Result<(), RevlogError> {
323 323 let all_values = self.entry(rev).or_insert_with(HashSet::new);
324 324 all_values.insert(value);
325 325 Ok(())
326 326 }
327 327 }
328 328
329 329 impl Index {
330 330 /// Create an index from bytes.
331 331 /// Calculate the start of each entry when is_inline is true.
332 332 pub fn new(
333 333 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
334 334 default_header: IndexHeader,
335 335 ) -> Result<Self, HgError> {
336 336 let header =
337 337 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
338 338
339 339 if header.format_version() != IndexHeader::REVLOGV1 {
340 340 // A proper new version should have had a repo/store
341 341 // requirement.
342 342 return Err(HgError::corrupted("unsupported revlog version"));
343 343 }
344 344
345 345 // This is only correct because we know version is REVLOGV1.
346 346 // In v2 we always use generaldelta, while in v0 we never use
347 347 // generaldelta. Similar for [is_inline] (it's only used in v1).
348 348 let uses_generaldelta = header.format_flags().uses_generaldelta();
349 349
350 350 if header.format_flags().is_inline() {
351 351 let mut offset: usize = 0;
352 352 let mut offsets = Vec::new();
353 353
354 354 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
355 355 offsets.push(offset);
356 356 let end = offset + INDEX_ENTRY_SIZE;
357 357 let entry = IndexEntry {
358 358 bytes: &bytes[offset..end],
359 359 offset_override: None,
360 360 };
361 361
362 362 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
363 363 }
364 364
365 365 if offset == bytes.len() {
366 366 Ok(Self {
367 367 bytes: IndexData::new(bytes),
368 368 offsets: RwLock::new(Some(offsets)),
369 369 uses_generaldelta,
370 370 is_inline: true,
371 371 head_revs: RwLock::new((vec![], HashSet::new())),
372 372 })
373 373 } else {
374 374 Err(HgError::corrupted("unexpected inline revlog length"))
375 375 }
376 376 } else {
377 377 Ok(Self {
378 378 bytes: IndexData::new(bytes),
379 379 offsets: RwLock::new(None),
380 380 uses_generaldelta,
381 381 is_inline: false,
382 382 head_revs: RwLock::new((vec![], HashSet::new())),
383 383 })
384 384 }
385 385 }
386 386
387 387 pub fn uses_generaldelta(&self) -> bool {
388 388 self.uses_generaldelta
389 389 }
390 390
391 391 /// Value of the inline flag.
392 392 pub fn is_inline(&self) -> bool {
393 393 self.is_inline
394 394 }
395 395
396 396 /// Return a slice of bytes if `revlog` is inline. Panic if not.
397 397 pub fn data(&self, start: usize, end: usize) -> &[u8] {
398 398 if !self.is_inline() {
399 399 panic!("tried to access data in the index of a revlog that is not inline");
400 400 }
401 401 &self.bytes[start..end]
402 402 }
403 403
404 404 /// Return number of entries of the revlog index.
405 405 pub fn len(&self) -> usize {
406 406 if let Some(offsets) = &*self.get_offsets() {
407 407 offsets.len()
408 408 } else {
409 409 self.bytes.len() / INDEX_ENTRY_SIZE
410 410 }
411 411 }
412 412
413 413 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
414 414 if self.is_inline() {
415 415 {
416 416 // Wrap in a block to drop the read guard
417 417 // TODO perf?
418 418 let mut offsets = self.offsets.write().unwrap();
419 419 if offsets.is_none() {
420 420 offsets.replace(inline_scan(&self.bytes.bytes).1);
421 421 }
422 422 }
423 423 }
424 424 self.offsets.read().unwrap()
425 425 }
426 426
427 427 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
428 428 let mut offsets = self.offsets.write().unwrap();
429 429 if self.is_inline() && offsets.is_none() {
430 430 offsets.replace(inline_scan(&self.bytes.bytes).1);
431 431 }
432 432 offsets
433 433 }
434 434
435 435 /// Returns `true` if the `Index` has zero `entries`.
436 436 pub fn is_empty(&self) -> bool {
437 437 self.len() == 0
438 438 }
439 439
440 440 /// Return the index entry corresponding to the given revision or `None`
441 441 /// for [`NULL_REVISION`]
442 442 ///
443 443 /// The specified revision being of the checked type, it always exists
444 444 /// if it was validated by this index.
445 445 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
446 446 if rev == NULL_REVISION {
447 447 return None;
448 448 }
449 449 Some(if let Some(offsets) = &*self.get_offsets() {
450 450 self.get_entry_inline(rev, offsets.as_ref())
451 451 } else {
452 452 self.get_entry_separated(rev)
453 453 })
454 454 }
455 455
456 456 /// Return the binary content of the index entry for the given revision
457 457 ///
458 458 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
459 459 /// returned.
460 460 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
461 461 self.get_entry(rev).map(|e| {
462 462 let bytes = e.as_bytes();
463 463 if rev.0 == 0 {
464 464 &bytes[4..]
465 465 } else {
466 466 bytes
467 467 }
468 468 })
469 469 }
470 470
471 471 pub fn entry_as_params(
472 472 &self,
473 473 rev: UncheckedRevision,
474 474 ) -> Option<RevisionDataParams> {
475 475 let rev = self.check_revision(rev)?;
476 476 self.get_entry(rev).map(|e| RevisionDataParams {
477 477 flags: e.flags(),
478 478 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
479 479 e.flags() as u64
480 480 } else {
481 481 e.raw_offset()
482 482 },
483 483 data_compressed_length: e
484 484 .compressed_len()
485 485 .try_into()
486 486 .unwrap_or_else(|_| {
487 487 // Python's `unionrepo` sets the compressed length to be
488 488 // `-1` (or `u32::MAX` if transmuted to `u32`) because it
489 489 // cannot know the correct compressed length of a given
490 490 // revision. I'm not sure if this is true, but having this
491 491 // edge case won't hurt other use cases, let's handle it.
492 492 assert_eq!(e.compressed_len(), u32::MAX);
493 493 NULL_REVISION.0
494 494 }),
495 495 data_uncompressed_length: e.uncompressed_len(),
496 496 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
497 497 link_rev: e.link_revision().0,
498 498 parent_rev_1: e.p1().0,
499 499 parent_rev_2: e.p2().0,
500 500 node_id: e.hash().as_bytes().try_into().unwrap(),
501 501 ..Default::default()
502 502 })
503 503 }
504 504
505 505 fn get_entry_inline(
506 506 &self,
507 507 rev: Revision,
508 508 offsets: &[usize],
509 509 ) -> IndexEntry {
510 510 let start = offsets[rev.0 as usize];
511 511 let end = start + INDEX_ENTRY_SIZE;
512 512 let bytes = &self.bytes[start..end];
513 513
514 514 // See IndexEntry for an explanation of this override.
515 515 let offset_override = Some(end);
516 516
517 517 IndexEntry {
518 518 bytes,
519 519 offset_override,
520 520 }
521 521 }
522 522
523 523 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
524 524 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
525 525 let end = start + INDEX_ENTRY_SIZE;
526 526 let bytes = &self.bytes[start..end];
527 527
528 528 // Override the offset of the first revision as its bytes are used
529 529 // for the index's metadata (saving space because it is always 0)
530 530 let offset_override = if rev == Revision(0) { Some(0) } else { None };
531 531
532 532 IndexEntry {
533 533 bytes,
534 534 offset_override,
535 535 }
536 536 }
537 537
538 538 fn null_entry(&self) -> IndexEntry {
539 539 IndexEntry {
540 540 bytes: &[0; INDEX_ENTRY_SIZE],
541 541 offset_override: Some(0),
542 542 }
543 543 }
544 544
545 545 /// Return the head revisions of this index
546 546 pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
547 self.head_revs_filtered(&HashSet::new())
547 self.head_revs_filtered(&HashSet::new(), false)
548 .map(|h| h.unwrap())
549 }
550
551 /// Python-specific shortcut to save on PyList creation
552 pub fn head_revs_shortcut(
553 &self,
554 ) -> Result<Option<Vec<Revision>>, GraphError> {
555 self.head_revs_filtered(&HashSet::new(), true)
548 556 }
549 557
550 558 /// Return the head revisions of this index
551 559 pub fn head_revs_filtered(
552 560 &self,
553 561 filtered_revs: &HashSet<Revision>,
554 ) -> Result<Vec<Revision>, GraphError> {
562 py_shortcut: bool,
563 ) -> Result<Option<Vec<Revision>>, GraphError> {
555 564 {
556 565 let guard = self
557 566 .head_revs
558 567 .read()
559 568 .expect("RwLock on Index.head_revs should not be poisoned");
560 569 let self_head_revs = &guard.0;
561 570 let self_filtered_revs = &guard.1;
562 571 if !self_head_revs.is_empty()
563 572 && filtered_revs == self_filtered_revs
564 573 {
565 return Ok(self_head_revs.to_owned());
574 if py_shortcut {
575 // Don't copy the revs since we've already cached them
576 // on the Python side.
577 return Ok(None);
578 } else {
579 return Ok(Some(self_head_revs.to_owned()));
580 }
566 581 }
567 582 }
568 583
569 584 let as_vec = if self.is_empty() {
570 585 vec![NULL_REVISION]
571 586 } else {
572 587 let mut not_heads = bitvec![0; self.len()];
573 588 dagops::retain_heads_fast(
574 589 self,
575 590 not_heads.as_mut_bitslice(),
576 591 filtered_revs,
577 592 )?;
578 593 not_heads
579 594 .into_iter()
580 595 .enumerate()
581 596 .filter_map(|(idx, is_not_head)| {
582 597 if is_not_head {
583 598 None
584 599 } else {
585 600 Some(Revision(idx as BaseRevision))
586 601 }
587 602 })
588 603 .collect()
589 604 };
590 605 *self
591 606 .head_revs
592 607 .write()
593 608 .expect("RwLock on Index.head_revs should not be poisoned") =
594 609 (as_vec.to_owned(), filtered_revs.to_owned());
595 Ok(as_vec)
610 Ok(Some(as_vec))
596 611 }
597 612
598 613 /// Obtain the delta chain for a revision.
599 614 ///
600 615 /// `stop_rev` specifies a revision to stop at. If not specified, we
601 616 /// stop at the base of the chain.
602 617 ///
603 618 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
604 619 /// revs in ascending order and `stopped` is a bool indicating whether
605 620 /// `stoprev` was hit.
606 621 pub fn delta_chain(
607 622 &self,
608 623 rev: Revision,
609 624 stop_rev: Option<Revision>,
610 625 using_general_delta: Option<bool>,
611 626 ) -> Result<(Vec<Revision>, bool), HgError> {
612 627 let mut current_rev = rev;
613 628 let mut entry = self.get_entry(rev).unwrap();
614 629 let mut chain = vec![];
615 630 let using_general_delta =
616 631 using_general_delta.unwrap_or_else(|| self.uses_generaldelta());
617 632 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
618 633 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
619 634 {
620 635 chain.push(current_rev);
621 636 let new_rev = if using_general_delta {
622 637 entry.base_revision_or_base_of_delta_chain()
623 638 } else {
624 639 UncheckedRevision(current_rev.0 - 1)
625 640 };
626 641 current_rev = self.check_revision(new_rev).ok_or_else(|| {
627 642 HgError::corrupted(format!("Revision {new_rev} out of range"))
628 643 })?;
629 644 if current_rev.0 == NULL_REVISION.0 {
630 645 break;
631 646 }
632 647 entry = self.get_entry(current_rev).unwrap()
633 648 }
634 649
635 650 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
636 651 true
637 652 } else {
638 653 chain.push(current_rev);
639 654 false
640 655 };
641 656 chain.reverse();
642 657 Ok((chain, stopped))
643 658 }
644 659
645 660 pub fn find_snapshots(
646 661 &self,
647 662 start_rev: UncheckedRevision,
648 663 end_rev: UncheckedRevision,
649 664 cache: &mut impl SnapshotsCache,
650 665 ) -> Result<(), RevlogError> {
651 666 let mut start_rev = start_rev.0;
652 667 let mut end_rev = end_rev.0;
653 668 end_rev += 1;
654 669 let len = self.len().try_into().unwrap();
655 670 if end_rev > len {
656 671 end_rev = len;
657 672 }
658 673 if start_rev < 0 {
659 674 start_rev = 0;
660 675 }
661 676 for rev in start_rev..end_rev {
662 677 if !self.is_snapshot_unchecked(Revision(rev))? {
663 678 continue;
664 679 }
665 680 let mut base = self
666 681 .get_entry(Revision(rev))
667 682 .unwrap()
668 683 .base_revision_or_base_of_delta_chain();
669 684 if base.0 == rev {
670 685 base = NULL_REVISION.into();
671 686 }
672 687 cache.insert_for(base.0, rev)?;
673 688 }
674 689 Ok(())
675 690 }
676 691
677 692 fn clear_head_revs(&self) {
678 693 self.head_revs
679 694 .write()
680 695 .expect("RwLock on Index.head_revs should not be poisoined")
681 696 .0
682 697 .clear()
683 698 }
684 699
685 700 /// TODO move this to the trait probably, along with other things
686 701 pub fn append(
687 702 &mut self,
688 703 revision_data: RevisionDataParams,
689 704 ) -> Result<(), RevlogError> {
690 705 revision_data.validate()?;
691 706 let new_offset = self.bytes.len();
692 707 if let Some(offsets) = &mut *self.get_offsets_mut() {
693 708 offsets.push(new_offset)
694 709 }
695 710 self.bytes.added.extend(revision_data.into_v1().as_bytes());
696 711 self.clear_head_revs();
697 712 Ok(())
698 713 }
699 714
700 715 pub fn pack_header(&self, header: i32) -> [u8; 4] {
701 716 header.to_be_bytes()
702 717 }
703 718
704 719 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
705 720 let offsets = self.get_offsets().clone();
706 721 self.bytes.remove(rev, offsets.as_deref())?;
707 722 if let Some(offsets) = &mut *self.get_offsets_mut() {
708 723 offsets.truncate(rev.0 as usize)
709 724 }
710 725 self.clear_head_revs();
711 726 Ok(())
712 727 }
713 728
714 729 pub fn clear_caches(&self) {
715 730 // We need to get the 'inline' value from Python at init and use this
716 731 // instead of offsets to determine whether we're inline since we might
717 732 // clear caches. This implies re-populating the offsets on-demand.
718 733 *self
719 734 .offsets
720 735 .write()
721 736 .expect("RwLock on Index.offsets should not be poisoed") = None;
722 737 self.clear_head_revs();
723 738 }
724 739
725 740 /// Unchecked version of `is_snapshot`.
726 741 /// Assumes the caller checked that `rev` is within a valid revision range.
727 742 pub fn is_snapshot_unchecked(
728 743 &self,
729 744 mut rev: Revision,
730 745 ) -> Result<bool, RevlogError> {
731 746 while rev.0 >= 0 {
732 747 let entry = self.get_entry(rev).unwrap();
733 748 let mut base = entry.base_revision_or_base_of_delta_chain().0;
734 749 if base == rev.0 {
735 750 base = NULL_REVISION.0;
736 751 }
737 752 if base == NULL_REVISION.0 {
738 753 return Ok(true);
739 754 }
740 755 let [mut p1, mut p2] = self
741 756 .parents(rev)
742 757 .map_err(|_| RevlogError::InvalidRevision)?;
743 758 while let Some(p1_entry) = self.get_entry(p1) {
744 759 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
745 760 break;
746 761 }
747 762 let parent_base =
748 763 p1_entry.base_revision_or_base_of_delta_chain();
749 764 if parent_base.0 == p1.0 {
750 765 break;
751 766 }
752 767 p1 = self
753 768 .check_revision(parent_base)
754 769 .ok_or(RevlogError::InvalidRevision)?;
755 770 }
756 771 while let Some(p2_entry) = self.get_entry(p2) {
757 772 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
758 773 break;
759 774 }
760 775 let parent_base =
761 776 p2_entry.base_revision_or_base_of_delta_chain();
762 777 if parent_base.0 == p2.0 {
763 778 break;
764 779 }
765 780 p2 = self
766 781 .check_revision(parent_base)
767 782 .ok_or(RevlogError::InvalidRevision)?;
768 783 }
769 784 if base == p1.0 || base == p2.0 {
770 785 return Ok(false);
771 786 }
772 787 rev = self
773 788 .check_revision(base.into())
774 789 .ok_or(RevlogError::InvalidRevision)?;
775 790 }
776 791 Ok(rev == NULL_REVISION)
777 792 }
778 793
779 794 /// Return whether the given revision is a snapshot. Returns an error if
780 795 /// `rev` is not within a valid revision range.
781 796 pub fn is_snapshot(
782 797 &self,
783 798 rev: UncheckedRevision,
784 799 ) -> Result<bool, RevlogError> {
785 800 let rev = self
786 801 .check_revision(rev)
787 802 .ok_or_else(|| RevlogError::corrupted("test"))?;
788 803 self.is_snapshot_unchecked(rev)
789 804 }
790 805
791 806 /// Slice revs to reduce the amount of unrelated data to be read from disk.
792 807 ///
793 808 /// The index is sliced into groups that should be read in one time.
794 809 ///
795 810 /// The initial chunk is sliced until the overall density
796 811 /// (payload/chunks-span ratio) is above `target_density`.
797 812 /// No gap smaller than `min_gap_size` is skipped.
798 813 pub fn slice_chunk_to_density(
799 814 &self,
800 815 revs: &[Revision],
801 816 target_density: f64,
802 817 min_gap_size: usize,
803 818 ) -> Vec<Vec<Revision>> {
804 819 if revs.is_empty() {
805 820 return vec![];
806 821 }
807 822 if revs.len() == 1 {
808 823 return vec![revs.to_owned()];
809 824 }
810 825 let delta_chain_span = self.segment_span(revs);
811 826 if delta_chain_span < min_gap_size {
812 827 return vec![revs.to_owned()];
813 828 }
814 829 let entries: Vec<_> = revs
815 830 .iter()
816 831 .map(|r| {
817 832 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
818 833 })
819 834 .collect();
820 835
821 836 let mut read_data = delta_chain_span;
822 837 let chain_payload: u32 =
823 838 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
824 839 let mut density = if delta_chain_span > 0 {
825 840 chain_payload as f64 / delta_chain_span as f64
826 841 } else {
827 842 1.0
828 843 };
829 844
830 845 if density >= target_density {
831 846 return vec![revs.to_owned()];
832 847 }
833 848
834 849 // Store the gaps in a heap to have them sorted by decreasing size
835 850 let mut gaps = Vec::new();
836 851 let mut previous_end = None;
837 852
838 853 for (i, (_rev, entry)) in entries.iter().enumerate() {
839 854 let start = entry.c_start() as usize;
840 855 let length = entry.compressed_len();
841 856
842 857 // Skip empty revisions to form larger holes
843 858 if length == 0 {
844 859 continue;
845 860 }
846 861
847 862 if let Some(end) = previous_end {
848 863 let gap_size = start - end;
849 864 // Only consider holes that are large enough
850 865 if gap_size > min_gap_size {
851 866 gaps.push((gap_size, i));
852 867 }
853 868 }
854 869 previous_end = Some(start + length as usize);
855 870 }
856 871 if gaps.is_empty() {
857 872 return vec![revs.to_owned()];
858 873 }
859 874 // sort the gaps to pop them from largest to small
860 875 gaps.sort_unstable();
861 876
862 877 // Collect the indices of the largest holes until
863 878 // the density is acceptable
864 879 let mut selected = vec![];
865 880 while let Some((gap_size, gap_id)) = gaps.pop() {
866 881 if density >= target_density {
867 882 break;
868 883 }
869 884 selected.push(gap_id);
870 885
871 886 // The gap sizes are stored as negatives to be sorted decreasingly
872 887 // by the heap
873 888 read_data -= gap_size;
874 889 density = if read_data > 0 {
875 890 chain_payload as f64 / read_data as f64
876 891 } else {
877 892 1.0
878 893 };
879 894 if density >= target_density {
880 895 break;
881 896 }
882 897 }
883 898 selected.sort_unstable();
884 899 selected.push(revs.len());
885 900
886 901 // Cut the revs at collected indices
887 902 let mut previous_idx = 0;
888 903 let mut chunks = vec![];
889 904 for idx in selected {
890 905 let chunk = self.trim_chunk(&entries, previous_idx, idx);
891 906 if !chunk.is_empty() {
892 907 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
893 908 }
894 909 previous_idx = idx;
895 910 }
896 911 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
897 912 if !chunk.is_empty() {
898 913 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
899 914 }
900 915
901 916 chunks
902 917 }
903 918
904 919 /// Get the byte span of a segment of sorted revisions.
905 920 ///
906 921 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
907 922 /// the `revs` segment.
908 923 ///
909 924 /// panics:
910 925 /// - if `revs` is empty or only made of `NULL_REVISION`
911 926 /// - if cannot retrieve entry for the last or first not null element of
912 927 /// `revs`.
913 928 fn segment_span(&self, revs: &[Revision]) -> usize {
914 929 if revs.is_empty() {
915 930 return 0;
916 931 }
917 932 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
918 933 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
919 934 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
920 935 let start = if (*first_rev).0 == 0 {
921 936 0
922 937 } else {
923 938 self.get_entry(*first_rev).unwrap().c_start()
924 939 };
925 940 (end - start) as usize
926 941 }
927 942
928 943 /// Returns `&revs[startidx..endidx]` without empty trailing revs
929 944 fn trim_chunk<'a>(
930 945 &'a self,
931 946 revs: &'a [(Revision, IndexEntry)],
932 947 start: usize,
933 948 mut end: usize,
934 949 ) -> &'a [(Revision, IndexEntry)] {
935 950 // Trim empty revs at the end, except the very first rev of a chain
936 951 let last_rev = revs[end - 1].0;
937 952 if last_rev.0 < self.len() as BaseRevision {
938 953 while end > 1
939 954 && end > start
940 955 && revs[end - 1].1.compressed_len() == 0
941 956 {
942 957 end -= 1
943 958 }
944 959 }
945 960 &revs[start..end]
946 961 }
947 962
948 963 /// Computes the set of revisions for each non-public phase from `roots`,
949 964 /// which are the last known roots for each non-public phase.
950 965 pub fn compute_phases_map_sets(
951 966 &self,
952 967 roots: HashMap<Phase, Vec<Revision>>,
953 968 ) -> Result<(usize, RootsPerPhase), GraphError> {
954 969 let mut phases = HashMap::new();
955 970 let mut min_phase_rev = NULL_REVISION;
956 971
957 972 for phase in Phase::non_public_phases() {
958 973 if let Some(phase_roots) = roots.get(phase) {
959 974 let min_rev =
960 975 self.add_roots_get_min(phase_roots, &mut phases, *phase);
961 976 if min_rev != NULL_REVISION
962 977 && (min_phase_rev == NULL_REVISION
963 978 || min_rev < min_phase_rev)
964 979 {
965 980 min_phase_rev = min_rev;
966 981 }
967 982 } else {
968 983 continue;
969 984 };
970 985 }
971 986 let mut phase_sets: RootsPerPhase = Default::default();
972 987
973 988 if min_phase_rev == NULL_REVISION {
974 989 min_phase_rev = Revision(self.len() as BaseRevision);
975 990 }
976 991
977 992 for rev in min_phase_rev.0..self.len() as BaseRevision {
978 993 let rev = Revision(rev);
979 994 let [p1, p2] = self.parents(rev)?;
980 995
981 996 const DEFAULT_PHASE: &Phase = &Phase::Public;
982 997 if p1.0 >= 0
983 998 && phases.get(&p1).unwrap_or(DEFAULT_PHASE)
984 999 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
985 1000 {
986 1001 phases.insert(rev, phases[&p1]);
987 1002 }
988 1003 if p2.0 >= 0
989 1004 && phases.get(&p2).unwrap_or(DEFAULT_PHASE)
990 1005 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
991 1006 {
992 1007 phases.insert(rev, phases[&p2]);
993 1008 }
994 1009 let set = match phases.get(&rev).unwrap_or(DEFAULT_PHASE) {
995 1010 Phase::Public => continue,
996 1011 phase => &mut phase_sets[*phase as usize - 1],
997 1012 };
998 1013 set.insert(rev);
999 1014 }
1000 1015
1001 1016 Ok((self.len(), phase_sets))
1002 1017 }
1003 1018
1004 1019 fn add_roots_get_min(
1005 1020 &self,
1006 1021 phase_roots: &[Revision],
1007 1022 phases: &mut HashMap<Revision, Phase>,
1008 1023 phase: Phase,
1009 1024 ) -> Revision {
1010 1025 let mut min_rev = NULL_REVISION;
1011 1026
1012 1027 for root in phase_roots {
1013 1028 phases.insert(*root, phase);
1014 1029 if min_rev == NULL_REVISION || min_rev > *root {
1015 1030 min_rev = *root;
1016 1031 }
1017 1032 }
1018 1033 min_rev
1019 1034 }
1020 1035
1021 1036 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
1022 1037 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
1023 1038 ///
1024 1039 /// `min_root` and `roots` are unchecked since they are just used as
1025 1040 /// a bound or for comparison and don't need to represent a valid revision.
1026 1041 /// In practice, the only invalid revision passed is the working directory
1027 1042 /// revision ([`i32::MAX`]).
1028 1043 pub fn reachable_roots(
1029 1044 &self,
1030 1045 min_root: UncheckedRevision,
1031 1046 mut heads: Vec<Revision>,
1032 1047 roots: HashSet<UncheckedRevision>,
1033 1048 include_path: bool,
1034 1049 ) -> Result<HashSet<Revision>, GraphError> {
1035 1050 if roots.is_empty() {
1036 1051 return Ok(HashSet::new());
1037 1052 }
1038 1053 let mut reachable = HashSet::new();
1039 1054 let mut seen = HashMap::new();
1040 1055
1041 1056 while let Some(rev) = heads.pop() {
1042 1057 if roots.contains(&rev.into()) {
1043 1058 reachable.insert(rev);
1044 1059 if !include_path {
1045 1060 continue;
1046 1061 }
1047 1062 }
1048 1063 let parents = self.parents(rev)?;
1049 1064 seen.insert(rev, parents);
1050 1065 for parent in parents {
1051 1066 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1052 1067 heads.push(parent);
1053 1068 }
1054 1069 }
1055 1070 }
1056 1071 if !include_path {
1057 1072 return Ok(reachable);
1058 1073 }
1059 1074 let mut revs: Vec<_> = seen.keys().collect();
1060 1075 revs.sort_unstable();
1061 1076 for rev in revs {
1062 1077 for parent in seen[rev] {
1063 1078 if reachable.contains(&parent) {
1064 1079 reachable.insert(*rev);
1065 1080 }
1066 1081 }
1067 1082 }
1068 1083 Ok(reachable)
1069 1084 }
1070 1085
1071 1086 /// Given a (possibly overlapping) set of revs, return all the
1072 1087 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1073 1088 pub fn common_ancestor_heads(
1074 1089 &self,
1075 1090 revisions: &[Revision],
1076 1091 ) -> Result<Vec<Revision>, GraphError> {
1077 1092 // given that revisions is expected to be small, we find this shortcut
1078 1093 // potentially acceptable, especially given that `hg-cpython` could
1079 1094 // very much bypass this, constructing a vector of unique values from
1080 1095 // the onset.
1081 1096 let as_set: HashSet<Revision> = revisions.iter().copied().collect();
1082 1097 // Besides deduplicating, the C version also implements the shortcut
1083 1098 // for `NULL_REVISION`:
1084 1099 if as_set.contains(&NULL_REVISION) {
1085 1100 return Ok(vec![]);
1086 1101 }
1087 1102
1088 1103 let revisions: Vec<Revision> = as_set.into_iter().collect();
1089 1104
1090 1105 if revisions.len() < 8 {
1091 1106 self.find_gca_candidates::<u8>(&revisions)
1092 1107 } else if revisions.len() < 64 {
1093 1108 self.find_gca_candidates::<u64>(&revisions)
1094 1109 } else {
1095 1110 self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
1096 1111 }
1097 1112 }
1098 1113
1099 1114 pub fn ancestors(
1100 1115 &self,
1101 1116 revisions: &[Revision],
1102 1117 ) -> Result<Vec<Revision>, GraphError> {
1103 1118 self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
1104 1119 }
1105 1120
1106 1121 /// Given a disjoint set of revs, return all candidates for the
1107 1122 /// greatest common ancestor. In revset notation, this is the set
1108 1123 /// `heads(::a and ::b and ...)`
1109 1124 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1110 1125 &self,
1111 1126 revs: &[Revision],
1112 1127 ) -> Result<Vec<Revision>, GraphError> {
1113 1128 if revs.is_empty() {
1114 1129 return Ok(vec![]);
1115 1130 }
1116 1131 let revcount = revs.len();
1117 1132 let mut candidates = vec![];
1118 1133 let max_rev = revs.iter().max().unwrap();
1119 1134
1120 1135 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1121 1136
1122 1137 for (idx, rev) in revs.iter().enumerate() {
1123 1138 seen[rev.0 as usize].add(idx);
1124 1139 }
1125 1140 let mut current_rev = *max_rev;
1126 1141 // Number of revisions whose inspection in the main loop
1127 1142 // will give a result or trigger inspection of other revisions
1128 1143 let mut interesting = revcount;
1129 1144
1130 1145 // The algorithm works on a vector of bit sets, indexed by revision
1131 1146 // numbers and iterated on reverse order.
1132 1147 // An entry in this vector is poisoned if and only if the corresponding
1133 1148 // revision is a common, yet not maximal ancestor.
1134 1149
1135 1150 // The principle of the algorithm is as follows:
1136 1151 // For a revision `r`, when entering the loop, `seen[r]` is either
1137 1152 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1138 1153 // In this sub set is full, then `r` is a solution and its parents
1139 1154 // have to be poisoned.
1140 1155 //
1141 1156 // At each iteration, the bit sets of the parents are updated by
1142 1157 // union with `seen[r]`.
1143 1158 // As we walk the index from the end, we are sure we have encountered
1144 1159 // all children of `r` before `r`, hence we know that `seen[r]` is
1145 1160 // fully computed.
1146 1161 //
1147 1162 // On top of that there are several optimizations that make reading
1148 1163 // less obvious than the comment above:
1149 1164 // - The `interesting` counter allows to break early
1150 1165 // - The loop starts from `max(revs)`
1151 1166 // - Early return in case it is detected that one of the incoming revs
1152 1167 // is a common ancestor of all of them.
1153 1168 while current_rev.0 >= 0 && interesting > 0 {
1154 1169 let current_seen = seen[current_rev.0 as usize].clone();
1155 1170
1156 1171 if current_seen.is_empty() {
1157 1172 current_rev = Revision(current_rev.0 - 1);
1158 1173 continue;
1159 1174 }
1160 1175 let mut poison = current_seen.is_poisoned();
1161 1176 if !poison {
1162 1177 interesting -= 1;
1163 1178 if current_seen.is_full_range(revcount) {
1164 1179 candidates.push(current_rev);
1165 1180 poison = true;
1166 1181
1167 1182 // Being a common ancestor, if `current_rev` is among
1168 1183 // the input revisions, it is *the* answer.
1169 1184 for rev in revs {
1170 1185 if *rev == current_rev {
1171 1186 return Ok(candidates);
1172 1187 }
1173 1188 }
1174 1189 }
1175 1190 }
1176 1191 for parent in self.parents(current_rev)? {
1177 1192 if parent == NULL_REVISION {
1178 1193 continue;
1179 1194 }
1180 1195 let parent_seen = &mut seen[parent.0 as usize];
1181 1196 if poison {
1182 1197 // this block is logically equivalent to poisoning parent
1183 1198 // and counting it as non interesting if it
1184 1199 // has been seen before (hence counted then as interesting)
1185 1200 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1186 1201 interesting -= 1;
1187 1202 }
1188 1203 parent_seen.poison();
1189 1204 } else {
1190 1205 if parent_seen.is_empty() {
1191 1206 interesting += 1;
1192 1207 }
1193 1208 parent_seen.union(&current_seen);
1194 1209 }
1195 1210 }
1196 1211
1197 1212 current_rev = Revision(current_rev.0 - 1);
1198 1213 }
1199 1214
1200 1215 Ok(candidates)
1201 1216 }
1202 1217
1203 1218 /// Given a disjoint set of revs, return the subset with the longest path
1204 1219 /// to the root.
1205 1220 fn find_deepest_revs(
1206 1221 &self,
1207 1222 revs: &[Revision],
1208 1223 ) -> Result<Vec<Revision>, GraphError> {
1209 1224 // TODO replace this all with just comparing rank?
1210 1225 // Also, the original implementations in C/Python are cryptic, not
1211 1226 // even sure we actually need this?
1212 1227 if revs.len() <= 1 {
1213 1228 return Ok(revs.to_owned());
1214 1229 }
1215 1230 let max_rev = revs.iter().max().unwrap().0;
1216 1231 let mut interesting = HashMap::new();
1217 1232 let mut seen = vec![0; max_rev as usize + 1];
1218 1233 let mut depth = vec![0; max_rev as usize + 1];
1219 1234 let mut mapping = vec![];
1220 1235 let mut revs = revs.to_owned();
1221 1236 revs.sort_unstable();
1222 1237
1223 1238 for (idx, rev) in revs.iter().enumerate() {
1224 1239 depth[rev.0 as usize] = 1;
1225 1240 let shift = 1 << idx;
1226 1241 seen[rev.0 as usize] = shift;
1227 1242 interesting.insert(shift, 1);
1228 1243 mapping.push((shift, *rev));
1229 1244 }
1230 1245
1231 1246 let mut current_rev = Revision(max_rev);
1232 1247 while current_rev.0 >= 0 && interesting.len() > 1 {
1233 1248 let current_depth = depth[current_rev.0 as usize];
1234 1249 if current_depth == 0 {
1235 1250 current_rev = Revision(current_rev.0 - 1);
1236 1251 continue;
1237 1252 }
1238 1253
1239 1254 let current_seen = seen[current_rev.0 as usize];
1240 1255 for parent in self.parents(current_rev)? {
1241 1256 if parent == NULL_REVISION {
1242 1257 continue;
1243 1258 }
1244 1259 let parent_seen = seen[parent.0 as usize];
1245 1260 let parent_depth = depth[parent.0 as usize];
1246 1261 if parent_depth <= current_depth {
1247 1262 depth[parent.0 as usize] = current_depth + 1;
1248 1263 if parent_seen != current_seen {
1249 1264 *interesting.get_mut(&current_seen).unwrap() += 1;
1250 1265 seen[parent.0 as usize] = current_seen;
1251 1266 if parent_seen != 0 {
1252 1267 let parent_interesting =
1253 1268 interesting.get_mut(&parent_seen).unwrap();
1254 1269 *parent_interesting -= 1;
1255 1270 if *parent_interesting == 0 {
1256 1271 interesting.remove(&parent_seen);
1257 1272 }
1258 1273 }
1259 1274 }
1260 1275 } else if current_depth == parent_depth - 1 {
1261 1276 let either_seen = parent_seen | current_seen;
1262 1277 if either_seen == parent_seen {
1263 1278 continue;
1264 1279 }
1265 1280 seen[parent.0 as usize] = either_seen;
1266 1281 interesting
1267 1282 .entry(either_seen)
1268 1283 .and_modify(|v| *v += 1)
1269 1284 .or_insert(1);
1270 1285 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1271 1286 if interesting[&parent_seen] == 0 {
1272 1287 interesting.remove(&parent_seen);
1273 1288 }
1274 1289 }
1275 1290 }
1276 1291 *interesting.get_mut(&current_seen).unwrap() -= 1;
1277 1292 if interesting[&current_seen] == 0 {
1278 1293 interesting.remove(&current_seen);
1279 1294 }
1280 1295
1281 1296 current_rev = Revision(current_rev.0 - 1);
1282 1297 }
1283 1298
1284 1299 if interesting.len() != 1 {
1285 1300 return Ok(vec![]);
1286 1301 }
1287 1302 let mask = interesting.keys().next().unwrap();
1288 1303
1289 1304 Ok(mapping
1290 1305 .into_iter()
1291 1306 .filter_map(|(shift, rev)| {
1292 1307 if (mask & shift) != 0 {
1293 1308 return Some(rev);
1294 1309 }
1295 1310 None
1296 1311 })
1297 1312 .collect())
1298 1313 }
1299 1314 }
1300 1315
1301 1316 /// The kind of functionality needed by find_gca_candidates
1302 1317 ///
1303 1318 /// This is a bit mask which can be declared to be "poisoned", which callers
1304 1319 /// interpret to break out of some loops.
1305 1320 ///
1306 1321 /// The maximum capacity of the bit mask is up to the actual implementation
1307 1322 trait PoisonableBitSet: Sized + PartialEq {
1308 1323 /// Return a vector of exactly n elements, initialized to be empty.
1309 1324 ///
1310 1325 /// Optimization can vastly depend on implementation. Those being `Copy`
1311 1326 /// and having constant capacity typically can have a very simple
1312 1327 /// implementation.
1313 1328 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1314 1329
1315 1330 /// The size of the bit mask in memory
1316 1331 fn size(&self) -> usize;
1317 1332
1318 1333 /// The number of elements that can be represented in the set.
1319 1334 ///
1320 1335 /// Another way to put it is that it is the highest integer `C` such that
1321 1336 /// the set is guaranteed to always be a subset of the integer range
1322 1337 /// `[0, C)`
1323 1338 fn capacity(&self) -> usize;
1324 1339
1325 1340 /// Declare `n` to belong to the set
1326 1341 fn add(&mut self, n: usize);
1327 1342
1328 1343 /// Declare `n` not to belong to the set
1329 1344 fn discard(&mut self, n: usize);
1330 1345
1331 1346 /// Replace this bit set by its union with other
1332 1347 fn union(&mut self, other: &Self);
1333 1348
1334 1349 /// Poison the bit set
1335 1350 ///
1336 1351 /// Interpretation up to the caller
1337 1352 fn poison(&mut self);
1338 1353
1339 1354 /// Is the bit set poisoned?
1340 1355 ///
1341 1356 /// Interpretation is up to the caller
1342 1357 fn is_poisoned(&self) -> bool;
1343 1358
1344 1359 /// Is the bit set empty?
1345 1360 fn is_empty(&self) -> bool;
1346 1361
1347 1362 /// return `true` if and only if the bit is the full range `[0, n)`
1348 1363 /// of integers
1349 1364 fn is_full_range(&self, n: usize) -> bool;
1350 1365 }
1351 1366
1352 1367 const U64_POISON: u64 = 1 << 63;
1353 1368 const U8_POISON: u8 = 1 << 7;
1354 1369
1355 1370 impl PoisonableBitSet for u64 {
1356 1371 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1357 1372 vec![0u64; vec_len]
1358 1373 }
1359 1374
1360 1375 fn size(&self) -> usize {
1361 1376 8
1362 1377 }
1363 1378
1364 1379 fn capacity(&self) -> usize {
1365 1380 63
1366 1381 }
1367 1382
1368 1383 fn add(&mut self, n: usize) {
1369 1384 (*self) |= 1u64 << n;
1370 1385 }
1371 1386
1372 1387 fn discard(&mut self, n: usize) {
1373 1388 (*self) &= u64::MAX - (1u64 << n);
1374 1389 }
1375 1390
1376 1391 fn union(&mut self, other: &Self) {
1377 1392 if *self != *other {
1378 1393 (*self) |= *other;
1379 1394 }
1380 1395 }
1381 1396
1382 1397 fn is_full_range(&self, n: usize) -> bool {
1383 1398 *self + 1 == (1u64 << n)
1384 1399 }
1385 1400
1386 1401 fn is_empty(&self) -> bool {
1387 1402 *self == 0
1388 1403 }
1389 1404
1390 1405 fn poison(&mut self) {
1391 1406 *self = U64_POISON;
1392 1407 }
1393 1408
1394 1409 fn is_poisoned(&self) -> bool {
1395 1410 // equality comparison would be tempting but would not resist
1396 1411 // operations after poisoning (even if these should be bogus).
1397 1412 *self >= U64_POISON
1398 1413 }
1399 1414 }
1400 1415
1401 1416 impl PoisonableBitSet for u8 {
1402 1417 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1403 1418 vec![0; vec_len]
1404 1419 }
1405 1420
1406 1421 fn size(&self) -> usize {
1407 1422 1
1408 1423 }
1409 1424
1410 1425 fn capacity(&self) -> usize {
1411 1426 7
1412 1427 }
1413 1428
1414 1429 fn add(&mut self, n: usize) {
1415 1430 (*self) |= 1 << n;
1416 1431 }
1417 1432
1418 1433 fn discard(&mut self, n: usize) {
1419 1434 (*self) &= u8::MAX - (1 << n);
1420 1435 }
1421 1436
1422 1437 fn union(&mut self, other: &Self) {
1423 1438 if *self != *other {
1424 1439 (*self) |= *other;
1425 1440 }
1426 1441 }
1427 1442
1428 1443 fn is_full_range(&self, n: usize) -> bool {
1429 1444 *self + 1 == (1 << n)
1430 1445 }
1431 1446
1432 1447 fn is_empty(&self) -> bool {
1433 1448 *self == 0
1434 1449 }
1435 1450
1436 1451 fn poison(&mut self) {
1437 1452 *self = U8_POISON;
1438 1453 }
1439 1454
1440 1455 fn is_poisoned(&self) -> bool {
1441 1456 // equality comparison would be tempting but would not resist
1442 1457 // operations after poisoning (even if these should be bogus).
1443 1458 *self >= U8_POISON
1444 1459 }
1445 1460 }
1446 1461
1447 1462 /// A poisonable bit set whose capacity is not known at compile time but
1448 1463 /// is constant after initial construction
1449 1464 ///
1450 1465 /// This can be way further optimized if performance assessments (speed
1451 1466 /// and/or RAM) require it.
1452 1467 /// As far as RAM is concerned, for large vectors of these, the main problem
1453 1468 /// would be the repetition of set_size in each item. We would need a trait
1454 1469 /// to abstract over the idea of a vector of such bit sets to do better.
1455 1470 #[derive(Clone, PartialEq)]
1456 1471 struct NonStaticPoisonableBitSet {
1457 1472 set_size: usize,
1458 1473 bit_set: Vec<u64>,
1459 1474 }
1460 1475
1461 1476 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1462 1477 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1463 1478 1 + (set_size + 1) / 64
1464 1479 }
1465 1480
1466 1481 impl NonStaticPoisonableBitSet {
1467 1482 /// The index of the sub-bit set for the given n, and the index inside
1468 1483 /// the latter
1469 1484 fn index(&self, n: usize) -> (usize, usize) {
1470 1485 (n / 64, n % 64)
1471 1486 }
1472 1487 }
1473 1488
1474 1489 /// Mock implementation to ensure that the trait makes sense
1475 1490 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1476 1491 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1477 1492 let tmpl = Self {
1478 1493 set_size,
1479 1494 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1480 1495 };
1481 1496 vec![tmpl; vec_len]
1482 1497 }
1483 1498
1484 1499 fn size(&self) -> usize {
1485 1500 8 + self.bit_set.len() * 8
1486 1501 }
1487 1502
1488 1503 fn capacity(&self) -> usize {
1489 1504 self.set_size
1490 1505 }
1491 1506
1492 1507 fn add(&mut self, n: usize) {
1493 1508 let (sub_bs, bit_pos) = self.index(n);
1494 1509 self.bit_set[sub_bs] |= 1 << bit_pos
1495 1510 }
1496 1511
1497 1512 fn discard(&mut self, n: usize) {
1498 1513 let (sub_bs, bit_pos) = self.index(n);
1499 1514 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1500 1515 }
1501 1516
1502 1517 fn union(&mut self, other: &Self) {
1503 1518 assert!(
1504 1519 self.set_size == other.set_size,
1505 1520 "Binary operations on bit sets can only be done on same size"
1506 1521 );
1507 1522 for i in 0..self.bit_set.len() - 1 {
1508 1523 self.bit_set[i] |= other.bit_set[i]
1509 1524 }
1510 1525 }
1511 1526
1512 1527 fn is_full_range(&self, n: usize) -> bool {
1513 1528 let (sub_bs, bit_pos) = self.index(n);
1514 1529 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1515 1530 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1516 1531 }
1517 1532
1518 1533 fn is_empty(&self) -> bool {
1519 1534 self.bit_set.iter().all(|bs| *bs == 0u64)
1520 1535 }
1521 1536
1522 1537 fn poison(&mut self) {
1523 1538 let (sub_bs, bit_pos) = self.index(self.set_size);
1524 1539 self.bit_set[sub_bs] = 1 << bit_pos;
1525 1540 }
1526 1541
1527 1542 fn is_poisoned(&self) -> bool {
1528 1543 let (sub_bs, bit_pos) = self.index(self.set_size);
1529 1544 self.bit_set[sub_bs] >= 1 << bit_pos
1530 1545 }
1531 1546 }
1532 1547
1533 1548 /// Set of roots of all non-public phases
1534 1549 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
1535 1550
1536 1551 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1537 1552 pub enum Phase {
1538 1553 Public = 0,
1539 1554 Draft = 1,
1540 1555 Secret = 2,
1541 1556 Archived = 3,
1542 1557 Internal = 4,
1543 1558 }
1544 1559
1545 1560 impl TryFrom<usize> for Phase {
1546 1561 type Error = RevlogError;
1547 1562
1548 1563 fn try_from(value: usize) -> Result<Self, Self::Error> {
1549 1564 Ok(match value {
1550 1565 0 => Self::Public,
1551 1566 1 => Self::Draft,
1552 1567 2 => Self::Secret,
1553 1568 32 => Self::Archived,
1554 1569 96 => Self::Internal,
1555 1570 v => {
1556 1571 return Err(RevlogError::corrupted(format!(
1557 1572 "invalid phase value {}",
1558 1573 v
1559 1574 )))
1560 1575 }
1561 1576 })
1562 1577 }
1563 1578 }
1564 1579
1565 1580 impl Phase {
1566 1581 pub const fn all_phases() -> &'static [Self] {
1567 1582 &[
1568 1583 Self::Public,
1569 1584 Self::Draft,
1570 1585 Self::Secret,
1571 1586 Self::Archived,
1572 1587 Self::Internal,
1573 1588 ]
1574 1589 }
1575 1590 pub const fn non_public_phases() -> &'static [Self] {
1576 1591 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1577 1592 }
1578 1593 }
1579 1594
1580 1595 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1581 1596 let mut offset: usize = 0;
1582 1597 let mut offsets = Vec::new();
1583 1598
1584 1599 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1585 1600 offsets.push(offset);
1586 1601 let end = offset + INDEX_ENTRY_SIZE;
1587 1602 let entry = IndexEntry {
1588 1603 bytes: &bytes[offset..end],
1589 1604 offset_override: None,
1590 1605 };
1591 1606
1592 1607 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1593 1608 }
1594 1609 (offset, offsets)
1595 1610 }
1596 1611
1597 1612 impl super::RevlogIndex for Index {
1598 1613 fn len(&self) -> usize {
1599 1614 self.len()
1600 1615 }
1601 1616
1602 1617 fn node(&self, rev: Revision) -> Option<&Node> {
1603 1618 if rev == NULL_REVISION {
1604 1619 return Some(&NULL_NODE);
1605 1620 }
1606 1621 self.get_entry(rev).map(|entry| entry.hash())
1607 1622 }
1608 1623 }
1609 1624
1610 1625 #[derive(Debug)]
1611 1626 pub struct IndexEntry<'a> {
1612 1627 bytes: &'a [u8],
1613 1628 /// Allows to override the offset value of the entry.
1614 1629 ///
1615 1630 /// For interleaved index and data, the offset stored in the index
1616 1631 /// corresponds to the separated data offset.
1617 1632 /// It has to be overridden with the actual offset in the interleaved
1618 1633 /// index which is just after the index block.
1619 1634 ///
1620 1635 /// For separated index and data, the offset stored in the first index
1621 1636 /// entry is mixed with the index headers.
1622 1637 /// It has to be overridden with 0.
1623 1638 offset_override: Option<usize>,
1624 1639 }
1625 1640
1626 1641 impl<'a> IndexEntry<'a> {
1627 1642 /// Return the offset of the data.
1628 1643 pub fn offset(&self) -> usize {
1629 1644 if let Some(offset_override) = self.offset_override {
1630 1645 offset_override
1631 1646 } else {
1632 1647 let mut bytes = [0; 8];
1633 1648 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1634 1649 BigEndian::read_u64(&bytes[..]) as usize
1635 1650 }
1636 1651 }
1637 1652 pub fn raw_offset(&self) -> u64 {
1638 1653 BigEndian::read_u64(&self.bytes[0..8])
1639 1654 }
1640 1655
1641 1656 /// Same result (except potentially for rev 0) as C `index_get_start()`
1642 1657 fn c_start(&self) -> u64 {
1643 1658 self.raw_offset() >> 16
1644 1659 }
1645 1660
1646 1661 pub fn flags(&self) -> u16 {
1647 1662 BigEndian::read_u16(&self.bytes[6..=7])
1648 1663 }
1649 1664
1650 1665 /// Return the compressed length of the data.
1651 1666 pub fn compressed_len(&self) -> u32 {
1652 1667 BigEndian::read_u32(&self.bytes[8..=11])
1653 1668 }
1654 1669
1655 1670 /// Return the uncompressed length of the data.
1656 1671 pub fn uncompressed_len(&self) -> i32 {
1657 1672 BigEndian::read_i32(&self.bytes[12..=15])
1658 1673 }
1659 1674
1660 1675 /// Return the revision upon which the data has been derived.
1661 1676 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1662 1677 // TODO Maybe return an Option when base_revision == rev?
1663 1678 // Requires to add rev to IndexEntry
1664 1679
1665 1680 BigEndian::read_i32(&self.bytes[16..]).into()
1666 1681 }
1667 1682
1668 1683 pub fn link_revision(&self) -> UncheckedRevision {
1669 1684 BigEndian::read_i32(&self.bytes[20..]).into()
1670 1685 }
1671 1686
1672 1687 pub fn p1(&self) -> UncheckedRevision {
1673 1688 BigEndian::read_i32(&self.bytes[24..]).into()
1674 1689 }
1675 1690
1676 1691 pub fn p2(&self) -> UncheckedRevision {
1677 1692 BigEndian::read_i32(&self.bytes[28..]).into()
1678 1693 }
1679 1694
1680 1695 /// Return the hash of revision's full text.
1681 1696 ///
1682 1697 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1683 1698 /// are used.
1684 1699 pub fn hash(&self) -> &'a Node {
1685 1700 (&self.bytes[32..52]).try_into().unwrap()
1686 1701 }
1687 1702
1688 1703 pub fn as_bytes(&self) -> &'a [u8] {
1689 1704 self.bytes
1690 1705 }
1691 1706 }
1692 1707
1693 1708 #[cfg(test)]
1694 1709 mod tests {
1695 1710 use super::*;
1696 1711 use crate::node::NULL_NODE;
1697 1712
1698 1713 #[cfg(test)]
1699 1714 #[derive(Debug, Copy, Clone)]
1700 1715 pub struct IndexEntryBuilder {
1701 1716 is_first: bool,
1702 1717 is_inline: bool,
1703 1718 is_general_delta: bool,
1704 1719 version: u16,
1705 1720 offset: usize,
1706 1721 compressed_len: usize,
1707 1722 uncompressed_len: usize,
1708 1723 base_revision_or_base_of_delta_chain: Revision,
1709 1724 link_revision: Revision,
1710 1725 p1: Revision,
1711 1726 p2: Revision,
1712 1727 node: Node,
1713 1728 }
1714 1729
1715 1730 #[cfg(test)]
1716 1731 impl IndexEntryBuilder {
1717 1732 #[allow(clippy::new_without_default)]
1718 1733 pub fn new() -> Self {
1719 1734 Self {
1720 1735 is_first: false,
1721 1736 is_inline: false,
1722 1737 is_general_delta: true,
1723 1738 version: 1,
1724 1739 offset: 0,
1725 1740 compressed_len: 0,
1726 1741 uncompressed_len: 0,
1727 1742 base_revision_or_base_of_delta_chain: Revision(0),
1728 1743 link_revision: Revision(0),
1729 1744 p1: NULL_REVISION,
1730 1745 p2: NULL_REVISION,
1731 1746 node: NULL_NODE,
1732 1747 }
1733 1748 }
1734 1749
1735 1750 pub fn is_first(&mut self, value: bool) -> &mut Self {
1736 1751 self.is_first = value;
1737 1752 self
1738 1753 }
1739 1754
1740 1755 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1741 1756 self.is_inline = value;
1742 1757 self
1743 1758 }
1744 1759
1745 1760 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1746 1761 self.is_general_delta = value;
1747 1762 self
1748 1763 }
1749 1764
1750 1765 pub fn with_version(&mut self, value: u16) -> &mut Self {
1751 1766 self.version = value;
1752 1767 self
1753 1768 }
1754 1769
1755 1770 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1756 1771 self.offset = value;
1757 1772 self
1758 1773 }
1759 1774
1760 1775 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1761 1776 self.compressed_len = value;
1762 1777 self
1763 1778 }
1764 1779
1765 1780 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1766 1781 self.uncompressed_len = value;
1767 1782 self
1768 1783 }
1769 1784
1770 1785 pub fn with_base_revision_or_base_of_delta_chain(
1771 1786 &mut self,
1772 1787 value: Revision,
1773 1788 ) -> &mut Self {
1774 1789 self.base_revision_or_base_of_delta_chain = value;
1775 1790 self
1776 1791 }
1777 1792
1778 1793 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1779 1794 self.link_revision = value;
1780 1795 self
1781 1796 }
1782 1797
1783 1798 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1784 1799 self.p1 = value;
1785 1800 self
1786 1801 }
1787 1802
1788 1803 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1789 1804 self.p2 = value;
1790 1805 self
1791 1806 }
1792 1807
1793 1808 pub fn with_node(&mut self, value: Node) -> &mut Self {
1794 1809 self.node = value;
1795 1810 self
1796 1811 }
1797 1812
1798 1813 pub fn build(&self) -> Vec<u8> {
1799 1814 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1800 1815 if self.is_first {
1801 1816 bytes.extend(&match (self.is_general_delta, self.is_inline) {
1802 1817 (false, false) => [0u8, 0],
1803 1818 (false, true) => [0u8, 1],
1804 1819 (true, false) => [0u8, 2],
1805 1820 (true, true) => [0u8, 3],
1806 1821 });
1807 1822 bytes.extend(&self.version.to_be_bytes());
1808 1823 // Remaining offset bytes.
1809 1824 bytes.extend(&[0u8; 2]);
1810 1825 } else {
1811 1826 // Offset stored on 48 bits (6 bytes)
1812 1827 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1813 1828 }
1814 1829 bytes.extend(&[0u8; 2]); // Revision flags.
1815 1830 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
1816 1831 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
1817 1832 bytes.extend(
1818 1833 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1819 1834 );
1820 1835 bytes.extend(&self.link_revision.0.to_be_bytes());
1821 1836 bytes.extend(&self.p1.0.to_be_bytes());
1822 1837 bytes.extend(&self.p2.0.to_be_bytes());
1823 1838 bytes.extend(self.node.as_bytes());
1824 1839 bytes.extend(vec![0u8; 12]);
1825 1840 bytes
1826 1841 }
1827 1842 }
1828 1843
1829 1844 pub fn is_inline(index_bytes: &[u8]) -> bool {
1830 1845 IndexHeader::parse(index_bytes)
1831 1846 .expect("too short")
1832 1847 .unwrap()
1833 1848 .format_flags()
1834 1849 .is_inline()
1835 1850 }
1836 1851
1837 1852 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1838 1853 IndexHeader::parse(index_bytes)
1839 1854 .expect("too short")
1840 1855 .unwrap()
1841 1856 .format_flags()
1842 1857 .uses_generaldelta()
1843 1858 }
1844 1859
1845 1860 pub fn get_version(index_bytes: &[u8]) -> u16 {
1846 1861 IndexHeader::parse(index_bytes)
1847 1862 .expect("too short")
1848 1863 .unwrap()
1849 1864 .format_version()
1850 1865 }
1851 1866
1852 1867 #[test]
1853 1868 fn flags_when_no_inline_flag_test() {
1854 1869 let bytes = IndexEntryBuilder::new()
1855 1870 .is_first(true)
1856 1871 .with_general_delta(false)
1857 1872 .with_inline(false)
1858 1873 .build();
1859 1874
1860 1875 assert!(!is_inline(&bytes));
1861 1876 assert!(!uses_generaldelta(&bytes));
1862 1877 }
1863 1878
1864 1879 #[test]
1865 1880 fn flags_when_inline_flag_test() {
1866 1881 let bytes = IndexEntryBuilder::new()
1867 1882 .is_first(true)
1868 1883 .with_general_delta(false)
1869 1884 .with_inline(true)
1870 1885 .build();
1871 1886
1872 1887 assert!(is_inline(&bytes));
1873 1888 assert!(!uses_generaldelta(&bytes));
1874 1889 }
1875 1890
1876 1891 #[test]
1877 1892 fn flags_when_inline_and_generaldelta_flags_test() {
1878 1893 let bytes = IndexEntryBuilder::new()
1879 1894 .is_first(true)
1880 1895 .with_general_delta(true)
1881 1896 .with_inline(true)
1882 1897 .build();
1883 1898
1884 1899 assert!(is_inline(&bytes));
1885 1900 assert!(uses_generaldelta(&bytes));
1886 1901 }
1887 1902
1888 1903 #[test]
1889 1904 fn test_offset() {
1890 1905 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1891 1906 let entry = IndexEntry {
1892 1907 bytes: &bytes,
1893 1908 offset_override: None,
1894 1909 };
1895 1910
1896 1911 assert_eq!(entry.offset(), 1)
1897 1912 }
1898 1913
1899 1914 #[test]
1900 1915 fn test_with_overridden_offset() {
1901 1916 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1902 1917 let entry = IndexEntry {
1903 1918 bytes: &bytes,
1904 1919 offset_override: Some(2),
1905 1920 };
1906 1921
1907 1922 assert_eq!(entry.offset(), 2)
1908 1923 }
1909 1924
1910 1925 #[test]
1911 1926 fn test_compressed_len() {
1912 1927 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1913 1928 let entry = IndexEntry {
1914 1929 bytes: &bytes,
1915 1930 offset_override: None,
1916 1931 };
1917 1932
1918 1933 assert_eq!(entry.compressed_len(), 1)
1919 1934 }
1920 1935
1921 1936 #[test]
1922 1937 fn test_uncompressed_len() {
1923 1938 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1924 1939 let entry = IndexEntry {
1925 1940 bytes: &bytes,
1926 1941 offset_override: None,
1927 1942 };
1928 1943
1929 1944 assert_eq!(entry.uncompressed_len(), 1)
1930 1945 }
1931 1946
1932 1947 #[test]
1933 1948 fn test_base_revision_or_base_of_delta_chain() {
1934 1949 let bytes = IndexEntryBuilder::new()
1935 1950 .with_base_revision_or_base_of_delta_chain(Revision(1))
1936 1951 .build();
1937 1952 let entry = IndexEntry {
1938 1953 bytes: &bytes,
1939 1954 offset_override: None,
1940 1955 };
1941 1956
1942 1957 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
1943 1958 }
1944 1959
1945 1960 #[test]
1946 1961 fn link_revision_test() {
1947 1962 let bytes = IndexEntryBuilder::new()
1948 1963 .with_link_revision(Revision(123))
1949 1964 .build();
1950 1965
1951 1966 let entry = IndexEntry {
1952 1967 bytes: &bytes,
1953 1968 offset_override: None,
1954 1969 };
1955 1970
1956 1971 assert_eq!(entry.link_revision(), 123.into());
1957 1972 }
1958 1973
1959 1974 #[test]
1960 1975 fn p1_test() {
1961 1976 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
1962 1977
1963 1978 let entry = IndexEntry {
1964 1979 bytes: &bytes,
1965 1980 offset_override: None,
1966 1981 };
1967 1982
1968 1983 assert_eq!(entry.p1(), 123.into());
1969 1984 }
1970 1985
1971 1986 #[test]
1972 1987 fn p2_test() {
1973 1988 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
1974 1989
1975 1990 let entry = IndexEntry {
1976 1991 bytes: &bytes,
1977 1992 offset_override: None,
1978 1993 };
1979 1994
1980 1995 assert_eq!(entry.p2(), 123.into());
1981 1996 }
1982 1997
1983 1998 #[test]
1984 1999 fn node_test() {
1985 2000 let node = Node::from_hex("0123456789012345678901234567890123456789")
1986 2001 .unwrap();
1987 2002 let bytes = IndexEntryBuilder::new().with_node(node).build();
1988 2003
1989 2004 let entry = IndexEntry {
1990 2005 bytes: &bytes,
1991 2006 offset_override: None,
1992 2007 };
1993 2008
1994 2009 assert_eq!(*entry.hash(), node);
1995 2010 }
1996 2011
1997 2012 #[test]
1998 2013 fn version_test() {
1999 2014 let bytes = IndexEntryBuilder::new()
2000 2015 .is_first(true)
2001 2016 .with_version(2)
2002 2017 .build();
2003 2018
2004 2019 assert_eq!(get_version(&bytes), 2)
2005 2020 }
2006 2021 }
2007 2022
2008 2023 #[cfg(test)]
2009 2024 pub use tests::IndexEntryBuilder;
@@ -1,1126 +1,1157 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 PyRevision,
12 12 };
13 13 use cpython::{
14 14 buffer::{Element, PyBuffer},
15 15 exc::{IndexError, ValueError},
16 16 ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
17 17 PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
18 18 PythonObject, ToPyObject, UnsafePyLeaked,
19 19 };
20 20 use hg::{
21 21 errors::HgError,
22 22 index::{
23 23 IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
24 24 INDEX_ENTRY_SIZE,
25 25 },
26 26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
27 27 revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
28 28 BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
29 29 };
30 30 use std::{cell::RefCell, collections::HashMap};
31 31 use vcsgraph::graph::Graph as VCSGraph;
32 32
33 33 pub struct PySharedIndex {
34 34 /// The underlying hg-core index
35 35 pub(crate) inner: &'static hg::index::Index,
36 36 }
37 37
38 38 /// Return a Struct implementing the Graph trait
39 39 pub(crate) fn py_rust_index_to_graph(
40 40 py: Python,
41 41 index: PyObject,
42 42 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
43 43 let midx = index.extract::<Index>(py)?;
44 44 let leaked = midx.index(py).leak_immutable();
45 45 // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
46 46 Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
47 47 }
48 48
49 49 impl Clone for PySharedIndex {
50 50 fn clone(&self) -> Self {
51 51 Self { inner: self.inner }
52 52 }
53 53 }
54 54
55 55 impl Graph for PySharedIndex {
56 56 #[inline(always)]
57 57 fn parents(&self, rev: Revision) -> Result<[Revision; 2], hg::GraphError> {
58 58 self.inner.parents(rev)
59 59 }
60 60 }
61 61
62 62 impl VCSGraph for PySharedIndex {
63 63 #[inline(always)]
64 64 fn parents(
65 65 &self,
66 66 rev: BaseRevision,
67 67 ) -> Result<vcsgraph::graph::Parents, vcsgraph::graph::GraphReadError>
68 68 {
69 69 // FIXME This trait should be reworked to decide between Revision
70 70 // and UncheckedRevision, get better errors names, etc.
71 71 match Graph::parents(self, Revision(rev)) {
72 72 Ok(parents) => {
73 73 Ok(vcsgraph::graph::Parents([parents[0].0, parents[1].0]))
74 74 }
75 75 Err(hg::GraphError::ParentOutOfRange(rev)) => {
76 76 Err(vcsgraph::graph::GraphReadError::KeyedInvalidKey(rev.0))
77 77 }
78 78 }
79 79 }
80 80 }
81 81
82 82 impl RevlogIndex for PySharedIndex {
83 83 fn len(&self) -> usize {
84 84 self.inner.len()
85 85 }
86 86 fn node(&self, rev: Revision) -> Option<&Node> {
87 87 self.inner.node(rev)
88 88 }
89 89 }
90 90
91 91 py_class!(pub class Index |py| {
92 92 @shared data index: hg::index::Index;
93 93 data nt: RefCell<Option<CoreNodeTree>>;
94 94 data docket: RefCell<Option<PyObject>>;
95 95 // Holds a reference to the mmap'ed persistent nodemap data
96 96 data nodemap_mmap: RefCell<Option<PyBuffer>>;
97 97 // Holds a reference to the mmap'ed persistent index data
98 98 data index_mmap: RefCell<Option<PyBuffer>>;
99 data head_revs_py_list: RefCell<Option<PyList>>;
99 100
100 101 def __new__(
101 102 _cls,
102 103 data: PyObject,
103 104 default_header: u32,
104 105 ) -> PyResult<Self> {
105 106 Self::new(py, data, default_header)
106 107 }
107 108
108 109 /// Compatibility layer used for Python consumers needing access to the C index
109 110 ///
110 111 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
111 112 /// that may need to build a custom `nodetree`, based on a specified revset.
112 113 /// With a Rust implementation of the nodemap, we will be able to get rid of
113 114 /// this, by exposing our own standalone nodemap class,
114 115 /// ready to accept `Index`.
115 116 /* def get_cindex(&self) -> PyResult<PyObject> {
116 117 Ok(self.cindex(py).borrow().inner().clone_ref(py))
117 118 }
118 119 */
119 120 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
120 121
121 122 /// Return Revision if found, raises a bare `error.RevlogError`
122 123 /// in case of ambiguity, same as C version does
123 124 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
124 125 let opt = self.get_nodetree(py)?.borrow();
125 126 let nt = opt.as_ref().unwrap();
126 127 let ridx = &*self.index(py).borrow();
127 128 let node = node_from_py_bytes(py, &node)?;
128 129 let rust_rev =
129 130 nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
130 131 Ok(rust_rev.map(Into::into))
131 132
132 133 }
133 134
134 135 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
135 136 /// is not found.
136 137 ///
137 138 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
138 139 /// will catch and rewrap with it
139 140 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
140 141 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
141 142 }
142 143
143 144 /// return True if the node exist in the index
144 145 def has_node(&self, node: PyBytes) -> PyResult<bool> {
145 146 // TODO OPTIM we could avoid a needless conversion here,
146 147 // to do when scaffolding for pure Rust switch is removed,
147 148 // as `get_rev()` currently does the necessary assertions
148 149 self.get_rev(py, node).map(|opt| opt.is_some())
149 150 }
150 151
151 152 /// find length of shortest hex nodeid of a binary ID
152 153 def shortest(&self, node: PyBytes) -> PyResult<usize> {
153 154 let opt = self.get_nodetree(py)?.borrow();
154 155 let nt = opt.as_ref().unwrap();
155 156 let idx = &*self.index(py).borrow();
156 157 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
157 158 {
158 159 Ok(Some(l)) => Ok(l),
159 160 Ok(None) => Err(revlog_error(py)),
160 161 Err(e) => Err(nodemap_error(py, e)),
161 162 }
162 163 }
163 164
164 165 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
165 166 let opt = self.get_nodetree(py)?.borrow();
166 167 let nt = opt.as_ref().unwrap();
167 168 let idx = &*self.index(py).borrow();
168 169
169 170 let node_as_string = if cfg!(feature = "python3-sys") {
170 171 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
171 172 }
172 173 else {
173 174 let node = node.extract::<PyBytes>(py)?;
174 175 String::from_utf8_lossy(node.data(py)).to_string()
175 176 };
176 177
177 178 let prefix = NodePrefix::from_hex(&node_as_string)
178 179 .map_err(|_| PyErr::new::<ValueError, _>(
179 180 py, format!("Invalid node or prefix '{}'", node_as_string))
180 181 )?;
181 182
182 183 nt.find_bin(idx, prefix)
183 184 // TODO make an inner API returning the node directly
184 185 .map(|opt| opt.map(
185 186 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
186 187 .map_err(|e| nodemap_error(py, e))
187 188
188 189 }
189 190
190 191 /// append an index entry
191 192 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
192 193 if tup.len(py) < 8 {
193 194 // this is better than the panic promised by tup.get_item()
194 195 return Err(
195 196 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
196 197 }
197 198 let node_bytes = tup.get_item(py, 7).extract(py)?;
198 199 let node = node_from_py_object(py, &node_bytes)?;
199 200
200 201 let rev = self.len(py)? as BaseRevision;
201 202
202 203 // This is ok since we will just add the revision to the index
203 204 let rev = Revision(rev);
204 205 self.index(py)
205 206 .borrow_mut()
206 207 .append(py_tuple_to_revision_data_params(py, tup)?)
207 208 .unwrap();
208 209 let idx = &*self.index(py).borrow();
209 210 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
210 211 .insert(idx, &node, rev)
211 212 .map_err(|e| nodemap_error(py, e))?;
212 213 Ok(py.None())
213 214 }
214 215
215 216 def __delitem__(&self, key: PyObject) -> PyResult<()> {
216 217 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
217 218 let start = if let Ok(rev) = key.extract(py) {
218 219 UncheckedRevision(rev)
219 220 } else {
220 221 let start = key.getattr(py, "start")?;
221 222 UncheckedRevision(start.extract(py)?)
222 223 };
223 224 let start = self.index(py)
224 225 .borrow()
225 226 .check_revision(start)
226 227 .ok_or_else(|| {
227 228 nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
228 229 })?;
229 230 self.index(py).borrow_mut().remove(start).unwrap();
230 231 let mut opt = self.get_nodetree(py)?.borrow_mut();
231 232 let nt = opt.as_mut().unwrap();
232 233 nt.invalidate_all();
233 234 self.fill_nodemap(py, nt)?;
234 235 Ok(())
235 236 }
236 237
237 238 //
238 239 // Index methods previously reforwarded to C index (tp_methods)
239 240 // Same ordering as in revlog.c
240 241 //
241 242
242 243 /// return the gca set of the given revs
243 244 def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
244 245 let rust_res = self.inner_ancestors(py, args)?;
245 246 Ok(rust_res)
246 247 }
247 248
248 249 /// return the heads of the common ancestors of the given revs
249 250 def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
250 251 let rust_res = self.inner_commonancestorsheads(py, args)?;
251 252 Ok(rust_res)
252 253 }
253 254
254 255 /// Clear the index caches and inner py_class data.
255 256 /// It is Python's responsibility to call `update_nodemap_data` again.
256 257 def clearcaches(&self) -> PyResult<PyObject> {
257 258 self.nt(py).borrow_mut().take();
258 259 self.docket(py).borrow_mut().take();
259 260 self.nodemap_mmap(py).borrow_mut().take();
261 self.head_revs_py_list(py).borrow_mut().take();
260 262 self.index(py).borrow().clear_caches();
261 263 Ok(py.None())
262 264 }
263 265
264 266 /// return the raw binary string representing a revision
265 267 def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
266 268 let rindex = self.index(py).borrow();
267 269 let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
268 270 let rust_bytes = rindex.check_revision(rev).and_then(
269 271 |r| rindex.entry_binary(r))
270 272 .ok_or_else(|| rev_not_in_index(py, rev))?;
271 273 let rust_res = PyBytes::new(py, rust_bytes).into_object();
272 274 Ok(rust_res)
273 275 }
274 276
275 277 /// return a binary packed version of the header
276 278 def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
277 279 let rindex = self.index(py).borrow();
278 280 let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
279 281 let rust_res = PyBytes::new(py, &packed).into_object();
280 282 Ok(rust_res)
281 283 }
282 284
283 285 /// compute phases
284 286 def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
285 287 let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
286 288 let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
287 289 Ok(rust_res)
288 290 }
289 291
290 292 /// reachableroots
291 293 def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
292 294 let rust_res = self.inner_reachableroots2(
293 295 py,
294 296 UncheckedRevision(args.get_item(py, 0).extract(py)?),
295 297 args.get_item(py, 1),
296 298 args.get_item(py, 2),
297 299 args.get_item(py, 3).extract(py)?,
298 300 )?;
299 301 Ok(rust_res)
300 302 }
301 303
302 304 /// get head revisions
303 305 def headrevs(&self) -> PyResult<PyObject> {
304 306 let rust_res = self.inner_headrevs(py)?;
305 307 Ok(rust_res)
306 308 }
307 309
308 310 /// get filtered head revisions
309 311 def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
310 312 let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
311 313 Ok(rust_res)
312 314 }
313 315
314 316 /// True if the object is a snapshot
315 317 def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
316 318 let index = self.index(py).borrow();
317 319 let result = index
318 320 .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
319 321 .map_err(|e| {
320 322 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
321 323 })?;
322 324 Ok(result)
323 325 }
324 326
325 327 /// Gather snapshot data in a cache dict
326 328 def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
327 329 let index = self.index(py).borrow();
328 330 let cache: PyDict = args.get_item(py, 0).extract(py)?;
329 331 // this methods operates by setting new values in the cache,
330 332 // hence we will compare results by letting the C implementation
331 333 // operate over a deepcopy of the cache, and finally compare both
332 334 // caches.
333 335 let c_cache = PyDict::new(py);
334 336 for (k, v) in cache.items(py) {
335 337 c_cache.set_item(py, k, PySet::new(py, v)?)?;
336 338 }
337 339
338 340 let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
339 341 let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
340 342 let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
341 343 index.find_snapshots(
342 344 start_rev,
343 345 end_rev,
344 346 &mut cache_wrapper,
345 347 ).map_err(|_| revlog_error(py))?;
346 348 Ok(py.None())
347 349 }
348 350
349 351 /// determine revisions with deltas to reconstruct fulltext
350 352 def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
351 353 let index = self.index(py).borrow();
352 354 let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
353 355 let stop_rev =
354 356 args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
355 357 let rev = index.check_revision(rev).ok_or_else(|| {
356 358 nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
357 359 })?;
358 360 let stop_rev = if let Some(stop_rev) = stop_rev {
359 361 let stop_rev = UncheckedRevision(stop_rev);
360 362 Some(index.check_revision(stop_rev).ok_or_else(|| {
361 363 nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
362 364 })?)
363 365 } else {None};
364 366 let using_general_delta = args.get_item(py, 2)
365 367 .extract::<Option<u32>>(py)?
366 368 .map(|i| i != 0);
367 369 let (chain, stopped) = index.delta_chain(
368 370 rev, stop_rev, using_general_delta
369 371 ).map_err(|e| {
370 372 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
371 373 })?;
372 374
373 375 let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
374 376 Ok(
375 377 PyTuple::new(
376 378 py,
377 379 &[
378 380 chain.into_py_object(py).into_object(),
379 381 stopped.into_py_object(py).into_object()
380 382 ]
381 383 ).into_object()
382 384 )
383 385
384 386 }
385 387
386 388 /// slice planned chunk read to reach a density threshold
387 389 def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
388 390 let rust_res = self.inner_slicechunktodensity(
389 391 py,
390 392 args.get_item(py, 0),
391 393 args.get_item(py, 1).extract(py)?,
392 394 args.get_item(py, 2).extract(py)?
393 395 )?;
394 396 Ok(rust_res)
395 397 }
396 398
397 399 // index_sequence_methods and index_mapping_methods.
398 400 //
399 401 // Since we call back through the high level Python API,
400 402 // there's no point making a distinction between index_get
401 403 // and index_getitem.
402 404 // gracinet 2023: this above is no longer true for the pure Rust impl
403 405
404 406 def __len__(&self) -> PyResult<usize> {
405 407 self.len(py)
406 408 }
407 409
408 410 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
409 411 let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
410 412 Ok(rust_res)
411 413 }
412 414
413 415 def __contains__(&self, item: PyObject) -> PyResult<bool> {
414 416 // ObjectProtocol does not seem to provide contains(), so
415 417 // this is an equivalent implementation of the index_contains()
416 418 // defined in revlog.c
417 419 match item.extract::<i32>(py) {
418 420 Ok(rev) => {
419 421 Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
420 422 }
421 423 Err(_) => {
422 424 let item_bytes: PyBytes = item.extract(py)?;
423 425 let rust_res = self.has_node(py, item_bytes)?;
424 426 Ok(rust_res)
425 427 }
426 428 }
427 429 }
428 430
429 431 def nodemap_data_all(&self) -> PyResult<PyBytes> {
430 432 self.inner_nodemap_data_all(py)
431 433 }
432 434
433 435 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
434 436 self.inner_nodemap_data_incremental(py)
435 437 }
436 438 def update_nodemap_data(
437 439 &self,
438 440 docket: PyObject,
439 441 nm_data: PyObject
440 442 ) -> PyResult<PyObject> {
441 443 self.inner_update_nodemap_data(py, docket, nm_data)
442 444 }
443 445
444 446 @property
445 447 def entry_size(&self) -> PyResult<PyInt> {
446 448 let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
447 449 Ok(rust_res)
448 450 }
449 451
450 452 @property
451 453 def rust_ext_compat(&self) -> PyResult<PyInt> {
452 454 // will be entirely removed when the Rust index yet useful to
453 455 // implement in Rust to detangle things when removing `self.cindex`
454 456 let rust_res: PyInt = 1.to_py_object(py);
455 457 Ok(rust_res)
456 458 }
457 459
458 460 @property
459 461 def is_rust(&self) -> PyResult<PyBool> {
460 462 Ok(false.to_py_object(py))
461 463 }
462 464
463 465 });
464 466
465 467 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
466 468 /// buffer along with the Rust slice into said buffer. We need to keep the
467 469 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
468 470 /// is freed from Python's side.
469 471 ///
470 472 /// # Safety
471 473 ///
472 474 /// The caller must make sure that the buffer is kept around for at least as
473 475 /// long as the slice.
474 476 #[deny(unsafe_op_in_unsafe_fn)]
475 477 unsafe fn mmap_keeparound(
476 478 py: Python,
477 479 data: PyObject,
478 480 ) -> PyResult<(
479 481 PyBuffer,
480 482 Box<dyn std::ops::Deref<Target = [u8]> + Send + Sync + 'static>,
481 483 )> {
482 484 let buf = PyBuffer::get(py, &data)?;
483 485 let len = buf.item_count();
484 486
485 487 // Build a slice from the mmap'ed buffer data
486 488 let cbuf = buf.buf_ptr();
487 489 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
488 490 && buf.is_c_contiguous()
489 491 && u8::is_compatible_format(buf.format())
490 492 {
491 493 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
492 494 } else {
493 495 return Err(PyErr::new::<ValueError, _>(
494 496 py,
495 497 "Nodemap data buffer has an invalid memory representation"
496 498 .to_string(),
497 499 ));
498 500 };
499 501
500 502 Ok((buf, Box::new(bytes)))
501 503 }
502 504
503 505 fn py_tuple_to_revision_data_params(
504 506 py: Python,
505 507 tuple: PyTuple,
506 508 ) -> PyResult<RevisionDataParams> {
507 509 if tuple.len(py) < 8 {
508 510 // this is better than the panic promised by tup.get_item()
509 511 return Err(PyErr::new::<IndexError, _>(
510 512 py,
511 513 "tuple index out of range",
512 514 ));
513 515 }
514 516 let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?;
515 517 let node_id = tuple
516 518 .get_item(py, 7)
517 519 .extract::<PyBytes>(py)?
518 520 .data(py)
519 521 .try_into()
520 522 .unwrap();
521 523 let flags = (offset_or_flags & 0xFFFF) as u16;
522 524 let data_offset = offset_or_flags >> 16;
523 525 Ok(RevisionDataParams {
524 526 flags,
525 527 data_offset,
526 528 data_compressed_length: tuple.get_item(py, 1).extract(py)?,
527 529 data_uncompressed_length: tuple.get_item(py, 2).extract(py)?,
528 530 data_delta_base: tuple.get_item(py, 3).extract(py)?,
529 531 link_rev: tuple.get_item(py, 4).extract(py)?,
530 532 parent_rev_1: tuple.get_item(py, 5).extract(py)?,
531 533 parent_rev_2: tuple.get_item(py, 6).extract(py)?,
532 534 node_id,
533 535 ..Default::default()
534 536 })
535 537 }
536 538 fn revision_data_params_to_py_tuple(
537 539 py: Python,
538 540 params: RevisionDataParams,
539 541 ) -> PyTuple {
540 542 PyTuple::new(
541 543 py,
542 544 &[
543 545 params.data_offset.into_py_object(py).into_object(),
544 546 params
545 547 .data_compressed_length
546 548 .into_py_object(py)
547 549 .into_object(),
548 550 params
549 551 .data_uncompressed_length
550 552 .into_py_object(py)
551 553 .into_object(),
552 554 params.data_delta_base.into_py_object(py).into_object(),
553 555 params.link_rev.into_py_object(py).into_object(),
554 556 params.parent_rev_1.into_py_object(py).into_object(),
555 557 params.parent_rev_2.into_py_object(py).into_object(),
556 558 PyBytes::new(py, &params.node_id)
557 559 .into_py_object(py)
558 560 .into_object(),
559 561 params._sidedata_offset.into_py_object(py).into_object(),
560 562 params
561 563 ._sidedata_compressed_length
562 564 .into_py_object(py)
563 565 .into_object(),
564 566 params
565 567 .data_compression_mode
566 568 .into_py_object(py)
567 569 .into_object(),
568 570 params
569 571 ._sidedata_compression_mode
570 572 .into_py_object(py)
571 573 .into_object(),
572 574 params._rank.into_py_object(py).into_object(),
573 575 ],
574 576 )
575 577 }
576 578
577 579 struct PySnapshotsCache<'p> {
578 580 py: Python<'p>,
579 581 dict: PyDict,
580 582 }
581 583
582 584 impl<'p> SnapshotsCache for PySnapshotsCache<'p> {
583 585 fn insert_for(
584 586 &mut self,
585 587 rev: BaseRevision,
586 588 value: BaseRevision,
587 589 ) -> Result<(), RevlogError> {
588 590 let pyvalue = value.into_py_object(self.py).into_object();
589 591 match self.dict.get_item(self.py, rev) {
590 592 Some(obj) => obj
591 593 .extract::<PySet>(self.py)
592 594 .and_then(|set| set.add(self.py, pyvalue)),
593 595 None => PySet::new(self.py, vec![pyvalue])
594 596 .and_then(|set| self.dict.set_item(self.py, rev, set)),
595 597 }
596 598 .map_err(|_| {
597 599 RevlogError::Other(HgError::unsupported(
598 600 "Error in Python caches handling",
599 601 ))
600 602 })
601 603 }
602 604 }
603 605
604 606 impl Index {
605 607 fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
606 608 // Safety: we keep the buffer around inside the class as `index_mmap`
607 609 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
608 610
609 611 Self::create_instance(
610 612 py,
611 613 hg::index::Index::new(
612 614 bytes,
613 615 IndexHeader::parse(&header.to_be_bytes())
614 616 .expect("default header is broken")
615 617 .unwrap(),
616 618 )
617 619 .map_err(|e| {
618 620 revlog_error_with_msg(py, e.to_string().as_bytes())
619 621 })?,
620 622 RefCell::new(None),
621 623 RefCell::new(None),
622 624 RefCell::new(None),
623 625 RefCell::new(Some(buf)),
626 RefCell::new(None),
624 627 )
625 628 }
626 629
627 630 fn len(&self, py: Python) -> PyResult<usize> {
628 631 let rust_index_len = self.index(py).borrow().len();
629 632 Ok(rust_index_len)
630 633 }
631 634
632 635 /// This is scaffolding at this point, but it could also become
633 636 /// a way to start a persistent nodemap or perform a
634 637 /// vacuum / repack operation
635 638 fn fill_nodemap(
636 639 &self,
637 640 py: Python,
638 641 nt: &mut CoreNodeTree,
639 642 ) -> PyResult<PyObject> {
640 643 let index = self.index(py).borrow();
641 644 for r in 0..self.len(py)? {
642 645 let rev = Revision(r as BaseRevision);
643 646 // in this case node() won't ever return None
644 647 nt.insert(&*index, index.node(rev).unwrap(), rev)
645 648 .map_err(|e| nodemap_error(py, e))?
646 649 }
647 650 Ok(py.None())
648 651 }
649 652
650 653 fn get_nodetree<'a>(
651 654 &'a self,
652 655 py: Python<'a>,
653 656 ) -> PyResult<&'a RefCell<Option<CoreNodeTree>>> {
654 657 if self.nt(py).borrow().is_none() {
655 658 let readonly = Box::<Vec<_>>::default();
656 659 let mut nt = CoreNodeTree::load_bytes(readonly, 0);
657 660 self.fill_nodemap(py, &mut nt)?;
658 661 self.nt(py).borrow_mut().replace(nt);
659 662 }
660 663 Ok(self.nt(py))
661 664 }
662 665
663 666 /// Returns the full nodemap bytes to be written as-is to disk
664 667 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
665 668 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
666 669 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
667 670
668 671 // If there's anything readonly, we need to build the data again from
669 672 // scratch
670 673 let bytes = if readonly.len() > 0 {
671 674 let mut nt = CoreNodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
672 675 self.fill_nodemap(py, &mut nt)?;
673 676
674 677 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
675 678 assert_eq!(readonly.len(), 0);
676 679
677 680 bytes
678 681 } else {
679 682 bytes
680 683 };
681 684
682 685 let bytes = PyBytes::new(py, &bytes);
683 686 Ok(bytes)
684 687 }
685 688
686 689 /// Returns the last saved docket along with the size of any changed data
687 690 /// (in number of blocks), and said data as bytes.
688 691 fn inner_nodemap_data_incremental(
689 692 &self,
690 693 py: Python,
691 694 ) -> PyResult<PyObject> {
692 695 let docket = self.docket(py).borrow();
693 696 let docket = match docket.as_ref() {
694 697 Some(d) => d,
695 698 None => return Ok(py.None()),
696 699 };
697 700
698 701 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
699 702 let masked_blocks = node_tree.masked_readonly_blocks();
700 703 let (_, data) = node_tree.into_readonly_and_added_bytes();
701 704 let changed = masked_blocks * std::mem::size_of::<Block>();
702 705
703 706 Ok((docket, changed, PyBytes::new(py, &data))
704 707 .to_py_object(py)
705 708 .into_object())
706 709 }
707 710
708 711 /// Update the nodemap from the new (mmaped) data.
709 712 /// The docket is kept as a reference for later incremental calls.
710 713 fn inner_update_nodemap_data(
711 714 &self,
712 715 py: Python,
713 716 docket: PyObject,
714 717 nm_data: PyObject,
715 718 ) -> PyResult<PyObject> {
716 719 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
717 720 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
718 721 let len = buf.item_count();
719 722 self.nodemap_mmap(py).borrow_mut().replace(buf);
720 723
721 724 let mut nt = CoreNodeTree::load_bytes(bytes, len);
722 725
723 726 let data_tip = docket
724 727 .getattr(py, "tip_rev")?
725 728 .extract::<BaseRevision>(py)?
726 729 .into();
727 730 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
728 731 let idx = self.index(py).borrow();
729 732 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
730 733 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
731 734 })?;
732 735 let current_tip = idx.len();
733 736
734 737 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
735 738 let rev = Revision(r);
736 739 // in this case node() won't ever return None
737 740 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
738 741 .map_err(|e| nodemap_error(py, e))?
739 742 }
740 743
741 744 *self.nt(py).borrow_mut() = Some(nt);
742 745
743 746 Ok(py.None())
744 747 }
745 748
746 749 fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
747 750 let idx = self.index(py).borrow();
748 751 Ok(match key.extract::<BaseRevision>(py) {
749 752 Ok(key_as_int) => {
750 753 let entry_params = if key_as_int == NULL_REVISION.0 {
751 754 RevisionDataParams::default()
752 755 } else {
753 756 let rev = UncheckedRevision(key_as_int);
754 757 match idx.entry_as_params(rev) {
755 758 Some(e) => e,
756 759 None => {
757 760 return Err(PyErr::new::<IndexError, _>(
758 761 py,
759 762 "revlog index out of range",
760 763 ));
761 764 }
762 765 }
763 766 };
764 767 revision_data_params_to_py_tuple(py, entry_params)
765 768 .into_object()
766 769 }
767 770 _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
768 771 || py.None(),
769 772 |py_rev| py_rev.into_py_object(py).into_object(),
770 773 ),
771 774 })
772 775 }
773 776
774 777 fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> {
775 778 let index = &*self.index(py).borrow();
776 let as_vec: Vec<PyObject> = index
777 .head_revs()
778 .map_err(|e| graph_error(py, e))?
779 .iter()
780 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
781 .collect();
782 Ok(PyList::new(py, &as_vec).into_object())
779 if let Some(new_heads) =
780 index.head_revs_shortcut().map_err(|e| graph_error(py, e))?
781 {
782 self.cache_new_heads_py_list(new_heads, py);
783 }
784
785 Ok(self
786 .head_revs_py_list(py)
787 .borrow()
788 .as_ref()
789 .expect("head revs should be cached")
790 .clone_ref(py)
791 .into_object())
783 792 }
784 793
785 794 fn inner_headrevsfiltered(
786 795 &self,
787 796 py: Python,
788 797 filtered_revs: &PyObject,
789 798 ) -> PyResult<PyObject> {
790 799 let index = &mut *self.index(py).borrow_mut();
791 800 let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?;
792 801
793 let as_vec: Vec<PyObject> = index
794 .head_revs_filtered(&filtered_revs)
802 if let Some(new_heads) = index
803 .head_revs_filtered(&filtered_revs, true)
795 804 .map_err(|e| graph_error(py, e))?
805 {
806 self.cache_new_heads_py_list(new_heads, py);
807 }
808
809 Ok(self
810 .head_revs_py_list(py)
811 .borrow()
812 .as_ref()
813 .expect("head revs should be cached")
814 .clone_ref(py)
815 .into_object())
816 }
817
818 fn cache_new_heads_py_list(
819 &self,
820 new_heads: Vec<Revision>,
821 py: Python<'_>,
822 ) -> PyList {
823 let as_vec: Vec<PyObject> = new_heads
796 824 .iter()
797 825 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
798 826 .collect();
799 Ok(PyList::new(py, &as_vec).into_object())
827 let new_heads_py_list = PyList::new(py, &as_vec);
828 *self.head_revs_py_list(py).borrow_mut() =
829 Some(new_heads_py_list.clone_ref(py));
830 new_heads_py_list
800 831 }
801 832
802 833 fn inner_ancestors(
803 834 &self,
804 835 py: Python,
805 836 py_revs: &PyTuple,
806 837 ) -> PyResult<PyObject> {
807 838 let index = &*self.index(py).borrow();
808 839 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
809 840 let as_vec: Vec<_> = index
810 841 .ancestors(&revs)
811 842 .map_err(|e| graph_error(py, e))?
812 843 .iter()
813 844 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
814 845 .collect();
815 846 Ok(PyList::new(py, &as_vec).into_object())
816 847 }
817 848
818 849 fn inner_commonancestorsheads(
819 850 &self,
820 851 py: Python,
821 852 py_revs: &PyTuple,
822 853 ) -> PyResult<PyObject> {
823 854 let index = &*self.index(py).borrow();
824 855 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
825 856 let as_vec: Vec<_> = index
826 857 .common_ancestor_heads(&revs)
827 858 .map_err(|e| graph_error(py, e))?
828 859 .iter()
829 860 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
830 861 .collect();
831 862 Ok(PyList::new(py, &as_vec).into_object())
832 863 }
833 864
834 865 fn inner_computephasesmapsets(
835 866 &self,
836 867 py: Python,
837 868 py_roots: PyDict,
838 869 ) -> PyResult<PyObject> {
839 870 let index = &*self.index(py).borrow();
840 871 let opt = self.get_nodetree(py)?.borrow();
841 872 let nt = opt.as_ref().unwrap();
842 873 let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
843 874 .items_list(py)
844 875 .iter(py)
845 876 .map(|r| {
846 877 let phase = r.get_item(py, 0)?;
847 878 let nodes = r.get_item(py, 1)?;
848 879 // Transform the nodes from Python to revs here since we
849 880 // have access to the nodemap
850 881 let revs: Result<_, _> = nodes
851 882 .iter(py)?
852 883 .map(|node| match node?.extract::<PyBytes>(py) {
853 884 Ok(py_bytes) => {
854 885 let node = node_from_py_bytes(py, &py_bytes)?;
855 886 nt.find_bin(index, node.into())
856 887 .map_err(|e| nodemap_error(py, e))?
857 888 .ok_or_else(|| revlog_error(py))
858 889 }
859 890 Err(e) => Err(e),
860 891 })
861 892 .collect();
862 893 let phase = Phase::try_from(phase.extract::<usize>(py)?)
863 894 .map_err(|_| revlog_error(py));
864 895 Ok((phase?, revs?))
865 896 })
866 897 .collect();
867 898 let (len, phase_maps) = index
868 899 .compute_phases_map_sets(roots?)
869 900 .map_err(|e| graph_error(py, e))?;
870 901
871 902 // Ugly hack, but temporary
872 903 const IDX_TO_PHASE_NUM: [usize; 4] = [1, 2, 32, 96];
873 904 let py_phase_maps = PyDict::new(py);
874 905 for (idx, roots) in phase_maps.iter().enumerate() {
875 906 let phase_num = IDX_TO_PHASE_NUM[idx].into_py_object(py);
876 907 // OPTIM too bad we have to collect here. At least, we could
877 908 // reuse the same Vec and allocate it with capacity at
878 909 // max(len(phase_maps)
879 910 let roots_vec: Vec<PyInt> = roots
880 911 .iter()
881 912 .map(|r| PyRevision::from(*r).into_py_object(py))
882 913 .collect();
883 914 py_phase_maps.set_item(
884 915 py,
885 916 phase_num,
886 917 PySet::new(py, roots_vec)?,
887 918 )?;
888 919 }
889 920 Ok(PyTuple::new(
890 921 py,
891 922 &[
892 923 len.into_py_object(py).into_object(),
893 924 py_phase_maps.into_object(),
894 925 ],
895 926 )
896 927 .into_object())
897 928 }
898 929
899 930 fn inner_slicechunktodensity(
900 931 &self,
901 932 py: Python,
902 933 revs: PyObject,
903 934 target_density: f64,
904 935 min_gap_size: usize,
905 936 ) -> PyResult<PyObject> {
906 937 let index = &*self.index(py).borrow();
907 938 let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
908 939 let as_nested_vec =
909 940 index.slice_chunk_to_density(&revs, target_density, min_gap_size);
910 941 let mut res = Vec::with_capacity(as_nested_vec.len());
911 942 let mut py_chunk = Vec::new();
912 943 for chunk in as_nested_vec {
913 944 py_chunk.clear();
914 945 py_chunk.reserve_exact(chunk.len());
915 946 for rev in chunk {
916 947 py_chunk.push(
917 948 PyRevision::from(rev).into_py_object(py).into_object(),
918 949 );
919 950 }
920 951 res.push(PyList::new(py, &py_chunk).into_object());
921 952 }
922 953 // This is just to do the same as C, not sure why it does this
923 954 if res.len() == 1 {
924 955 Ok(PyTuple::new(py, &res).into_object())
925 956 } else {
926 957 Ok(PyList::new(py, &res).into_object())
927 958 }
928 959 }
929 960
930 961 fn inner_reachableroots2(
931 962 &self,
932 963 py: Python,
933 964 min_root: UncheckedRevision,
934 965 heads: PyObject,
935 966 roots: PyObject,
936 967 include_path: bool,
937 968 ) -> PyResult<PyObject> {
938 969 let index = &*self.index(py).borrow();
939 970 let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
940 971 PyErr::new::<IndexError, _>(py, "head out of range")
941 972 })?;
942 973 let roots: Result<_, _> = roots
943 974 .iter(py)?
944 975 .map(|r| {
945 976 r.and_then(|o| match o.extract::<PyRevision>(py) {
946 977 Ok(r) => Ok(UncheckedRevision(r.0)),
947 978 Err(e) => Err(e),
948 979 })
949 980 })
950 981 .collect();
951 982 let as_set = index
952 983 .reachable_roots(min_root, heads, roots?, include_path)
953 984 .map_err(|e| graph_error(py, e))?;
954 985 let as_vec: Vec<PyObject> = as_set
955 986 .iter()
956 987 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
957 988 .collect();
958 989 Ok(PyList::new(py, &as_vec).into_object())
959 990 }
960 991 }
961 992
962 993 py_class!(pub class NodeTree |py| {
963 994 data nt: RefCell<CoreNodeTree>;
964 995 data index: RefCell<UnsafePyLeaked<PySharedIndex>>;
965 996
966 997 def __new__(_cls, index: PyObject) -> PyResult<NodeTree> {
967 998 let index = py_rust_index_to_graph(py, index)?;
968 999 let nt = CoreNodeTree::default(); // in-RAM, fully mutable
969 1000 Self::create_instance(py, RefCell::new(nt), RefCell::new(index))
970 1001 }
971 1002
972 1003 /// Tell whether the NodeTree is still valid
973 1004 ///
974 1005 /// In case of mutation of the index, the given results are not
975 1006 /// guaranteed to be correct, and in fact, the methods borrowing
976 1007 /// the inner index would fail because of `PySharedRef` poisoning
977 1008 /// (generation-based guard), same as iterating on a `dict` that has
978 1009 /// been meanwhile mutated.
979 1010 def is_invalidated(&self) -> PyResult<bool> {
980 1011 let leaked = self.index(py).borrow();
981 1012 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
982 1013 let result = unsafe { leaked.try_borrow(py) };
983 1014 // two cases for result to be an error:
984 1015 // - the index has previously been mutably borrowed
985 1016 // - there is currently a mutable borrow
986 1017 // in both cases this amounts for previous results related to
987 1018 // the index to still be valid.
988 1019 Ok(result.is_err())
989 1020 }
990 1021
991 1022 def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
992 1023 let leaked = self.index(py).borrow();
993 1024 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
994 1025 let index = &*unsafe { leaked.try_borrow(py)? };
995 1026
996 1027 let rev = UncheckedRevision(rev.0);
997 1028 let rev = index
998 1029 .check_revision(rev)
999 1030 .ok_or_else(|| rev_not_in_index(py, rev))?;
1000 1031 if rev == NULL_REVISION {
1001 1032 return Err(rev_not_in_index(py, rev.into()))
1002 1033 }
1003 1034
1004 1035 let entry = index.inner.get_entry(rev).unwrap();
1005 1036 let mut nt = self.nt(py).borrow_mut();
1006 1037 nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
1007 1038
1008 1039 Ok(py.None())
1009 1040 }
1010 1041
1011 1042 /// Lookup by node hex prefix in the NodeTree, returning revision number.
1012 1043 ///
1013 1044 /// This is not part of the classical NodeTree API, but is good enough
1014 1045 /// for unit testing, as in `test-rust-revlog.py`.
1015 1046 def prefix_rev_lookup(
1016 1047 &self,
1017 1048 node_prefix: PyBytes
1018 1049 ) -> PyResult<Option<PyRevision>> {
1019 1050 let prefix = NodePrefix::from_hex(node_prefix.data(py))
1020 1051 .map_err(|_| PyErr::new::<ValueError, _>(
1021 1052 py,
1022 1053 format!("Invalid node or prefix {:?}",
1023 1054 node_prefix.as_object()))
1024 1055 )?;
1025 1056
1026 1057 let nt = self.nt(py).borrow();
1027 1058 let leaked = self.index(py).borrow();
1028 1059 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1029 1060 let index = &*unsafe { leaked.try_borrow(py)? };
1030 1061
1031 1062 Ok(nt.find_bin(index, prefix)
1032 1063 .map_err(|e| nodemap_error(py, e))?
1033 1064 .map(|r| r.into())
1034 1065 )
1035 1066 }
1036 1067
1037 1068 def shortest(&self, node: PyBytes) -> PyResult<usize> {
1038 1069 let nt = self.nt(py).borrow();
1039 1070 let leaked = self.index(py).borrow();
1040 1071 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1041 1072 let idx = &*unsafe { leaked.try_borrow(py)? };
1042 1073 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
1043 1074 {
1044 1075 Ok(Some(l)) => Ok(l),
1045 1076 Ok(None) => Err(revlog_error(py)),
1046 1077 Err(e) => Err(nodemap_error(py, e)),
1047 1078 }
1048 1079 }
1049 1080 });
1050 1081
1051 1082 fn revlog_error(py: Python) -> PyErr {
1052 1083 match py
1053 1084 .import("mercurial.error")
1054 1085 .and_then(|m| m.get(py, "RevlogError"))
1055 1086 {
1056 1087 Err(e) => e,
1057 1088 Ok(cls) => PyErr::from_instance(
1058 1089 py,
1059 1090 cls.call(py, (py.None(),), None).ok().into_py_object(py),
1060 1091 ),
1061 1092 }
1062 1093 }
1063 1094
1064 1095 fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
1065 1096 match py
1066 1097 .import("mercurial.error")
1067 1098 .and_then(|m| m.get(py, "RevlogError"))
1068 1099 {
1069 1100 Err(e) => e,
1070 1101 Ok(cls) => PyErr::from_instance(
1071 1102 py,
1072 1103 cls.call(py, (PyBytes::new(py, msg),), None)
1073 1104 .ok()
1074 1105 .into_py_object(py),
1075 1106 ),
1076 1107 }
1077 1108 }
1078 1109
1079 1110 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
1080 1111 // ParentOutOfRange is currently the only alternative
1081 1112 // in `hg::GraphError`. The C index always raises this simple ValueError.
1082 1113 PyErr::new::<ValueError, _>(py, "parent out of range")
1083 1114 }
1084 1115
1085 1116 fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1086 1117 PyErr::new::<ValueError, _>(
1087 1118 py,
1088 1119 format!(
1089 1120 "Inconsistency: Revision {} found in nodemap \
1090 1121 is not in revlog index",
1091 1122 rev
1092 1123 ),
1093 1124 )
1094 1125 }
1095 1126
1096 1127 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1097 1128 PyErr::new::<ValueError, _>(
1098 1129 py,
1099 1130 format!("revlog index out of range: {}", rev),
1100 1131 )
1101 1132 }
1102 1133
1103 1134 /// Standard treatment of NodeMapError
1104 1135 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
1105 1136 match err {
1106 1137 NodeMapError::MultipleResults => revlog_error(py),
1107 1138 NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r),
1108 1139 }
1109 1140 }
1110 1141
1111 1142 /// Create the module, with __package__ given from parent
1112 1143 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
1113 1144 let dotted_name = &format!("{}.revlog", package);
1114 1145 let m = PyModule::new(py, dotted_name)?;
1115 1146 m.add(py, "__package__", package)?;
1116 1147 m.add(py, "__doc__", "RevLog - Rust implementations")?;
1117 1148
1118 1149 m.add_class::<Index>(py)?;
1119 1150 m.add_class::<NodeTree>(py)?;
1120 1151
1121 1152 let sys = PyModule::import(py, "sys")?;
1122 1153 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
1123 1154 sys_modules.set_item(py, dotted_name, &m)?;
1124 1155
1125 1156 Ok(m)
1126 1157 }
General Comments 0
You need to be logged in to leave comments. Login now