##// END OF EJS Templates
rust-index: implementation of __getitem__...
Raphaël Gomès -
r52098:002b4990 default
parent child Browse files
Show More
@@ -1,886 +1,938 b''
1 1 use std::fmt::Debug;
2 2 use std::ops::Deref;
3 3 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
4 4
5 5 use byteorder::{BigEndian, ByteOrder};
6 6 use bytes_cast::{unaligned, BytesCast};
7 7
8 8 use super::REVIDX_KNOWN_FLAGS;
9 9 use crate::errors::HgError;
10 10 use crate::node::{NODE_BYTES_LENGTH, STORED_NODE_ID_BYTES};
11 11 use crate::revlog::node::Node;
12 12 use crate::revlog::{Revision, NULL_REVISION};
13 13 use crate::{Graph, GraphError, RevlogError, RevlogIndex, UncheckedRevision};
14 14
15 15 pub const INDEX_ENTRY_SIZE: usize = 64;
16 16 pub const COMPRESSION_MODE_INLINE: u8 = 2;
17 17
18 18 pub struct IndexHeader {
19 19 pub(super) header_bytes: [u8; 4],
20 20 }
21 21
22 22 #[derive(Copy, Clone)]
23 23 pub struct IndexHeaderFlags {
24 24 flags: u16,
25 25 }
26 26
27 27 /// Corresponds to the high bits of `_format_flags` in python
28 28 impl IndexHeaderFlags {
29 29 /// Corresponds to FLAG_INLINE_DATA in python
30 30 pub fn is_inline(self) -> bool {
31 31 self.flags & 1 != 0
32 32 }
33 33 /// Corresponds to FLAG_GENERALDELTA in python
34 34 pub fn uses_generaldelta(self) -> bool {
35 35 self.flags & 2 != 0
36 36 }
37 37 }
38 38
39 39 /// Corresponds to the INDEX_HEADER structure,
40 40 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
41 41 impl IndexHeader {
42 42 fn format_flags(&self) -> IndexHeaderFlags {
43 43 // No "unknown flags" check here, unlike in python. Maybe there should
44 44 // be.
45 45 IndexHeaderFlags {
46 46 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
47 47 }
48 48 }
49 49
50 50 /// The only revlog version currently supported by rhg.
51 51 const REVLOGV1: u16 = 1;
52 52
53 53 /// Corresponds to `_format_version` in Python.
54 54 fn format_version(&self) -> u16 {
55 55 BigEndian::read_u16(&self.header_bytes[2..4])
56 56 }
57 57
58 58 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
59 59 if index_bytes.is_empty() {
60 60 return Ok(None);
61 61 }
62 62 if index_bytes.len() < 4 {
63 63 return Err(HgError::corrupted(
64 64 "corrupted revlog: can't read the index format header",
65 65 ));
66 66 }
67 67 Ok(Some(IndexHeader {
68 68 header_bytes: {
69 69 let bytes: [u8; 4] =
70 70 index_bytes[0..4].try_into().expect("impossible");
71 71 bytes
72 72 },
73 73 }))
74 74 }
75 75 }
76 76
77 77 /// Abstracts the access to the index bytes since they can be spread between
78 78 /// the immutable (bytes) part and the mutable (added) part if any appends
79 79 /// happened. This makes it transparent for the callers.
80 80 struct IndexData {
81 81 /// Immutable bytes, most likely taken from disk
82 82 bytes: Box<dyn Deref<Target = [u8]> + Send>,
83 83 /// Used when stripping index contents, keeps track of the start of the
84 84 /// first stripped revision, which is used to give a slice of the
85 85 /// `bytes` field.
86 86 truncation: Option<usize>,
87 87 /// Bytes that were added after reading the index
88 88 added: Vec<u8>,
89 89 }
90 90
91 91 impl IndexData {
92 92 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
93 93 Self {
94 94 bytes,
95 95 truncation: None,
96 96 added: vec![],
97 97 }
98 98 }
99 99
100 100 pub fn len(&self) -> usize {
101 101 match self.truncation {
102 102 Some(truncation) => truncation + self.added.len(),
103 103 None => self.bytes.len() + self.added.len(),
104 104 }
105 105 }
106 106
107 107 fn remove(
108 108 &mut self,
109 109 rev: Revision,
110 110 offsets: Option<&[usize]>,
111 111 ) -> Result<(), RevlogError> {
112 112 let rev = rev.0 as usize;
113 113 let truncation = if let Some(offsets) = offsets {
114 114 offsets[rev]
115 115 } else {
116 116 rev * INDEX_ENTRY_SIZE
117 117 };
118 118 if truncation < self.bytes.len() {
119 119 self.truncation = Some(truncation);
120 120 self.added.clear();
121 121 } else {
122 122 self.added.truncate(truncation - self.bytes.len());
123 123 }
124 124 Ok(())
125 125 }
126
127 fn is_new(&self) -> bool {
128 self.bytes.is_empty()
129 }
126 130 }
127 131
128 132 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
129 133 type Output = [u8];
130 134
131 135 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
132 136 let start = index.start;
133 137 let end = index.end;
134 138 let immutable_len = match self.truncation {
135 139 Some(truncation) => truncation,
136 140 None => self.bytes.len(),
137 141 };
138 142 if start < immutable_len {
139 143 if end > immutable_len {
140 144 panic!("index data cannot span existing and added ranges");
141 145 }
142 146 &self.bytes[index]
143 147 } else {
144 148 &self.added[start - immutable_len..end - immutable_len]
145 149 }
146 150 }
147 151 }
148 152
153 #[derive(Debug, PartialEq, Eq)]
149 154 pub struct RevisionDataParams {
150 155 pub flags: u16,
151 156 pub data_offset: u64,
152 157 pub data_compressed_length: i32,
153 158 pub data_uncompressed_length: i32,
154 159 pub data_delta_base: i32,
155 160 pub link_rev: i32,
156 161 pub parent_rev_1: i32,
157 162 pub parent_rev_2: i32,
158 163 pub node_id: [u8; NODE_BYTES_LENGTH],
159 164 pub _sidedata_offset: u64,
160 165 pub _sidedata_compressed_length: i32,
161 166 pub data_compression_mode: u8,
162 167 pub _sidedata_compression_mode: u8,
163 168 pub _rank: i32,
164 169 }
165 170
171 impl Default for RevisionDataParams {
172 fn default() -> Self {
173 Self {
174 flags: 0,
175 data_offset: 0,
176 data_compressed_length: 0,
177 data_uncompressed_length: 0,
178 data_delta_base: -1,
179 link_rev: -1,
180 parent_rev_1: -1,
181 parent_rev_2: -1,
182 node_id: [0; NODE_BYTES_LENGTH],
183 _sidedata_offset: 0,
184 _sidedata_compressed_length: 0,
185 data_compression_mode: COMPRESSION_MODE_INLINE,
186 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
187 _rank: -1,
188 }
189 }
190 }
191
166 192 #[derive(BytesCast)]
167 193 #[repr(C)]
168 194 pub struct RevisionDataV1 {
169 195 data_offset_or_flags: unaligned::U64Be,
170 196 data_compressed_length: unaligned::I32Be,
171 197 data_uncompressed_length: unaligned::I32Be,
172 198 data_delta_base: unaligned::I32Be,
173 199 link_rev: unaligned::I32Be,
174 200 parent_rev_1: unaligned::I32Be,
175 201 parent_rev_2: unaligned::I32Be,
176 202 node_id: [u8; STORED_NODE_ID_BYTES],
177 203 }
178 204
179 205 fn _static_assert_size_of_revision_data_v1() {
180 206 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
181 207 }
182 208
183 209 impl RevisionDataParams {
184 210 pub fn validate(&self) -> Result<(), RevlogError> {
185 211 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
186 212 return Err(RevlogError::corrupted(format!(
187 213 "unknown revlog index flags: {}",
188 214 self.flags
189 215 )));
190 216 }
191 217 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
192 218 return Err(RevlogError::corrupted(format!(
193 219 "invalid data compression mode: {}",
194 220 self.data_compression_mode
195 221 )));
196 222 }
197 223 // FIXME isn't this only for v2 or changelog v2?
198 224 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
199 225 return Err(RevlogError::corrupted(format!(
200 226 "invalid sidedata compression mode: {}",
201 227 self._sidedata_compression_mode
202 228 )));
203 229 }
204 230 Ok(())
205 231 }
206 232
207 233 pub fn into_v1(self) -> RevisionDataV1 {
208 234 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
209 235 let mut node_id = [0; STORED_NODE_ID_BYTES];
210 236 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
211 237 RevisionDataV1 {
212 238 data_offset_or_flags: data_offset_or_flags.into(),
213 239 data_compressed_length: self.data_compressed_length.into(),
214 240 data_uncompressed_length: self.data_uncompressed_length.into(),
215 241 data_delta_base: self.data_delta_base.into(),
216 242 link_rev: self.link_rev.into(),
217 243 parent_rev_1: self.parent_rev_1.into(),
218 244 parent_rev_2: self.parent_rev_2.into(),
219 245 node_id,
220 246 }
221 247 }
222 248 }
223 249
224 250 /// A Revlog index
225 251 pub struct Index {
226 252 bytes: IndexData,
227 253 /// Offsets of starts of index blocks.
228 254 /// Only needed when the index is interleaved with data.
229 255 offsets: RwLock<Option<Vec<usize>>>,
230 256 uses_generaldelta: bool,
231 257 is_inline: bool,
232 258 }
233 259
234 260 impl Debug for Index {
235 261 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
236 262 f.debug_struct("Index")
237 263 .field("offsets", &self.offsets)
238 264 .field("uses_generaldelta", &self.uses_generaldelta)
239 265 .finish()
240 266 }
241 267 }
242 268
243 269 impl Graph for Index {
244 270 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
245 271 let err = || GraphError::ParentOutOfRange(rev);
246 272 match self.get_entry(rev) {
247 273 Some(entry) => {
248 274 // The C implementation checks that the parents are valid
249 275 // before returning
250 276 Ok([
251 277 self.check_revision(entry.p1()).ok_or_else(err)?,
252 278 self.check_revision(entry.p2()).ok_or_else(err)?,
253 279 ])
254 280 }
255 281 None => Ok([NULL_REVISION, NULL_REVISION]),
256 282 }
257 283 }
258 284 }
259 285
260 286 impl Index {
261 287 /// Create an index from bytes.
262 288 /// Calculate the start of each entry when is_inline is true.
263 289 pub fn new(
264 290 bytes: Box<dyn Deref<Target = [u8]> + Send>,
265 291 default_header: IndexHeader,
266 292 ) -> Result<Self, HgError> {
267 293 let header =
268 294 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
269 295
270 296 if header.format_version() != IndexHeader::REVLOGV1 {
271 297 // A proper new version should have had a repo/store
272 298 // requirement.
273 299 return Err(HgError::corrupted("unsupported revlog version"));
274 300 }
275 301
276 302 // This is only correct because we know version is REVLOGV1.
277 303 // In v2 we always use generaldelta, while in v0 we never use
278 304 // generaldelta. Similar for [is_inline] (it's only used in v1).
279 305 let uses_generaldelta = header.format_flags().uses_generaldelta();
280 306
281 307 if header.format_flags().is_inline() {
282 308 let mut offset: usize = 0;
283 309 let mut offsets = Vec::new();
284 310
285 311 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
286 312 offsets.push(offset);
287 313 let end = offset + INDEX_ENTRY_SIZE;
288 314 let entry = IndexEntry {
289 315 bytes: &bytes[offset..end],
290 316 offset_override: None,
291 317 };
292 318
293 319 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
294 320 }
295 321
296 322 if offset == bytes.len() {
297 323 Ok(Self {
298 324 bytes: IndexData::new(bytes),
299 325 offsets: RwLock::new(Some(offsets)),
300 326 uses_generaldelta,
301 327 is_inline: true,
302 328 })
303 329 } else {
304 330 Err(HgError::corrupted("unexpected inline revlog length"))
305 331 }
306 332 } else {
307 333 Ok(Self {
308 334 bytes: IndexData::new(bytes),
309 335 offsets: RwLock::new(None),
310 336 uses_generaldelta,
311 337 is_inline: false,
312 338 })
313 339 }
314 340 }
315 341
316 342 pub fn uses_generaldelta(&self) -> bool {
317 343 self.uses_generaldelta
318 344 }
319 345
320 346 /// Value of the inline flag.
321 347 pub fn is_inline(&self) -> bool {
322 348 self.is_inline
323 349 }
324 350
325 351 /// Return a slice of bytes if `revlog` is inline. Panic if not.
326 352 pub fn data(&self, start: usize, end: usize) -> &[u8] {
327 353 if !self.is_inline() {
328 354 panic!("tried to access data in the index of a revlog that is not inline");
329 355 }
330 356 &self.bytes[start..end]
331 357 }
332 358
333 359 /// Return number of entries of the revlog index.
334 360 pub fn len(&self) -> usize {
335 361 if let Some(offsets) = &*self.get_offsets() {
336 362 offsets.len()
337 363 } else {
338 364 self.bytes.len() / INDEX_ENTRY_SIZE
339 365 }
340 366 }
341 367
342 368 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
343 369 if self.is_inline() {
344 370 {
345 371 // Wrap in a block to drop the read guard
346 372 // TODO perf?
347 373 let mut offsets = self.offsets.write().unwrap();
348 374 if offsets.is_none() {
349 375 offsets.replace(inline_scan(&self.bytes.bytes).1);
350 376 }
351 377 }
352 378 }
353 379 self.offsets.read().unwrap()
354 380 }
355 381
356 382 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
357 383 let mut offsets = self.offsets.write().unwrap();
358 384 if self.is_inline() && offsets.is_none() {
359 385 offsets.replace(inline_scan(&self.bytes.bytes).1);
360 386 }
361 387 offsets
362 388 }
363 389
364 390 /// Returns `true` if the `Index` has zero `entries`.
365 391 pub fn is_empty(&self) -> bool {
366 392 self.len() == 0
367 393 }
368 394
369 395 /// Return the index entry corresponding to the given revision or `None`
370 396 /// for [`NULL_REVISION`]
371 397 ///
372 398 /// The specified revision being of the checked type, it always exists
373 399 /// if it was validated by this index.
374 400 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
375 401 if rev == NULL_REVISION {
376 402 return None;
377 403 }
378 404 Some(if let Some(offsets) = &*self.get_offsets() {
379 405 self.get_entry_inline(rev, offsets.as_ref())
380 406 } else {
381 407 self.get_entry_separated(rev)
382 408 })
383 409 }
384 410
385 411 /// Return the binary content of the index entry for the given revision
386 412 ///
387 413 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
388 414 /// returned.
389 415 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
390 416 self.get_entry(rev).map(|e| {
391 417 let bytes = e.as_bytes();
392 418 if rev.0 == 0 {
393 419 &bytes[4..]
394 420 } else {
395 421 bytes
396 422 }
397 423 })
398 424 }
399 425
426 pub fn entry_as_params(
427 &self,
428 rev: UncheckedRevision,
429 ) -> Option<RevisionDataParams> {
430 let rev = self.check_revision(rev)?;
431 self.get_entry(rev).map(|e| RevisionDataParams {
432 flags: e.flags(),
433 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
434 e.flags() as u64
435 } else {
436 e.raw_offset()
437 },
438 data_compressed_length: e.compressed_len().try_into().unwrap(),
439 data_uncompressed_length: e.uncompressed_len(),
440 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
441 link_rev: e.link_revision().0,
442 parent_rev_1: e.p1().0,
443 parent_rev_2: e.p2().0,
444 node_id: e.hash().as_bytes().try_into().unwrap(),
445 ..Default::default()
446 })
447 }
448
400 449 fn get_entry_inline(
401 450 &self,
402 451 rev: Revision,
403 452 offsets: &[usize],
404 453 ) -> IndexEntry {
405 454 let start = offsets[rev.0 as usize];
406 455 let end = start + INDEX_ENTRY_SIZE;
407 456 let bytes = &self.bytes[start..end];
408 457
409 458 // See IndexEntry for an explanation of this override.
410 459 let offset_override = Some(end);
411 460
412 461 IndexEntry {
413 462 bytes,
414 463 offset_override,
415 464 }
416 465 }
417 466
418 467 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
419 468 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
420 469 let end = start + INDEX_ENTRY_SIZE;
421 470 let bytes = &self.bytes[start..end];
422 471
423 472 // Override the offset of the first revision as its bytes are used
424 473 // for the index's metadata (saving space because it is always 0)
425 474 let offset_override = if rev == Revision(0) { Some(0) } else { None };
426 475
427 476 IndexEntry {
428 477 bytes,
429 478 offset_override,
430 479 }
431 480 }
432 481
433 482 /// TODO move this to the trait probably, along with other things
434 483 pub fn append(
435 484 &mut self,
436 485 revision_data: RevisionDataParams,
437 486 ) -> Result<(), RevlogError> {
438 487 revision_data.validate()?;
439 488 let new_offset = self.bytes.len();
440 489 if let Some(offsets) = &mut *self.get_offsets_mut() {
441 490 offsets.push(new_offset)
442 491 }
443 492 self.bytes.added.extend(revision_data.into_v1().as_bytes());
444 493 Ok(())
445 494 }
446 495
447 496 pub fn pack_header(&self, header: i32) -> [u8; 4] {
448 497 header.to_be_bytes()
449 498 }
450 499
451 500 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
452 501 let offsets = self.get_offsets().clone();
453 502 self.bytes.remove(rev, offsets.as_deref())?;
454 503 if let Some(offsets) = &mut *self.get_offsets_mut() {
455 504 offsets.truncate(rev.0 as usize)
456 505 }
457 506 Ok(())
458 507 }
459 508
460 509 pub fn clear_caches(&mut self) {
461 510 // We need to get the 'inline' value from Python at init and use this
462 511 // instead of offsets to determine whether we're inline since we might
463 512 // clear caches. This implies re-populating the offsets on-demand.
464 513 self.offsets = RwLock::new(None);
465 514 }
466 515 }
467 516
468 517 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
469 518 let mut offset: usize = 0;
470 519 let mut offsets = Vec::new();
471 520
472 521 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
473 522 offsets.push(offset);
474 523 let end = offset + INDEX_ENTRY_SIZE;
475 524 let entry = IndexEntry {
476 525 bytes: &bytes[offset..end],
477 526 offset_override: None,
478 527 };
479 528
480 529 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
481 530 }
482 531 (offset, offsets)
483 532 }
484 533
485 534 impl super::RevlogIndex for Index {
486 535 fn len(&self) -> usize {
487 536 self.len()
488 537 }
489 538
490 539 fn node(&self, rev: Revision) -> Option<&Node> {
491 540 self.get_entry(rev).map(|entry| entry.hash())
492 541 }
493 542 }
494 543
495 544 #[derive(Debug)]
496 545 pub struct IndexEntry<'a> {
497 546 bytes: &'a [u8],
498 547 /// Allows to override the offset value of the entry.
499 548 ///
500 549 /// For interleaved index and data, the offset stored in the index
501 550 /// corresponds to the separated data offset.
502 551 /// It has to be overridden with the actual offset in the interleaved
503 552 /// index which is just after the index block.
504 553 ///
505 554 /// For separated index and data, the offset stored in the first index
506 555 /// entry is mixed with the index headers.
507 556 /// It has to be overridden with 0.
508 557 offset_override: Option<usize>,
509 558 }
510 559
511 560 impl<'a> IndexEntry<'a> {
512 561 /// Return the offset of the data.
513 562 pub fn offset(&self) -> usize {
514 563 if let Some(offset_override) = self.offset_override {
515 564 offset_override
516 565 } else {
517 566 let mut bytes = [0; 8];
518 567 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
519 568 BigEndian::read_u64(&bytes[..]) as usize
520 569 }
521 570 }
571 pub fn raw_offset(&self) -> u64 {
572 BigEndian::read_u64(&self.bytes[0..8])
573 }
522 574
523 575 pub fn flags(&self) -> u16 {
524 576 BigEndian::read_u16(&self.bytes[6..=7])
525 577 }
526 578
527 579 /// Return the compressed length of the data.
528 580 pub fn compressed_len(&self) -> u32 {
529 581 BigEndian::read_u32(&self.bytes[8..=11])
530 582 }
531 583
532 584 /// Return the uncompressed length of the data.
533 585 pub fn uncompressed_len(&self) -> i32 {
534 586 BigEndian::read_i32(&self.bytes[12..=15])
535 587 }
536 588
537 589 /// Return the revision upon which the data has been derived.
538 590 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
539 591 // TODO Maybe return an Option when base_revision == rev?
540 592 // Requires to add rev to IndexEntry
541 593
542 594 BigEndian::read_i32(&self.bytes[16..]).into()
543 595 }
544 596
545 597 pub fn link_revision(&self) -> UncheckedRevision {
546 598 BigEndian::read_i32(&self.bytes[20..]).into()
547 599 }
548 600
549 601 pub fn p1(&self) -> UncheckedRevision {
550 602 BigEndian::read_i32(&self.bytes[24..]).into()
551 603 }
552 604
553 605 pub fn p2(&self) -> UncheckedRevision {
554 606 BigEndian::read_i32(&self.bytes[28..]).into()
555 607 }
556 608
557 609 /// Return the hash of revision's full text.
558 610 ///
559 611 /// Currently, SHA-1 is used and only the first 20 bytes of this field
560 612 /// are used.
561 613 pub fn hash(&self) -> &'a Node {
562 614 (&self.bytes[32..52]).try_into().unwrap()
563 615 }
564 616
565 617 pub fn as_bytes(&self) -> &'a [u8] {
566 618 self.bytes
567 619 }
568 620 }
569 621
570 622 #[cfg(test)]
571 623 mod tests {
572 624 use super::*;
573 625 use crate::node::NULL_NODE;
574 626
575 627 #[cfg(test)]
576 628 #[derive(Debug, Copy, Clone)]
577 629 pub struct IndexEntryBuilder {
578 630 is_first: bool,
579 631 is_inline: bool,
580 632 is_general_delta: bool,
581 633 version: u16,
582 634 offset: usize,
583 635 compressed_len: usize,
584 636 uncompressed_len: usize,
585 637 base_revision_or_base_of_delta_chain: Revision,
586 638 link_revision: Revision,
587 639 p1: Revision,
588 640 p2: Revision,
589 641 node: Node,
590 642 }
591 643
592 644 #[cfg(test)]
593 645 impl IndexEntryBuilder {
594 646 #[allow(clippy::new_without_default)]
595 647 pub fn new() -> Self {
596 648 Self {
597 649 is_first: false,
598 650 is_inline: false,
599 651 is_general_delta: true,
600 652 version: 1,
601 653 offset: 0,
602 654 compressed_len: 0,
603 655 uncompressed_len: 0,
604 656 base_revision_or_base_of_delta_chain: Revision(0),
605 657 link_revision: Revision(0),
606 658 p1: NULL_REVISION,
607 659 p2: NULL_REVISION,
608 660 node: NULL_NODE,
609 661 }
610 662 }
611 663
612 664 pub fn is_first(&mut self, value: bool) -> &mut Self {
613 665 self.is_first = value;
614 666 self
615 667 }
616 668
617 669 pub fn with_inline(&mut self, value: bool) -> &mut Self {
618 670 self.is_inline = value;
619 671 self
620 672 }
621 673
622 674 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
623 675 self.is_general_delta = value;
624 676 self
625 677 }
626 678
627 679 pub fn with_version(&mut self, value: u16) -> &mut Self {
628 680 self.version = value;
629 681 self
630 682 }
631 683
632 684 pub fn with_offset(&mut self, value: usize) -> &mut Self {
633 685 self.offset = value;
634 686 self
635 687 }
636 688
637 689 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
638 690 self.compressed_len = value;
639 691 self
640 692 }
641 693
642 694 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
643 695 self.uncompressed_len = value;
644 696 self
645 697 }
646 698
647 699 pub fn with_base_revision_or_base_of_delta_chain(
648 700 &mut self,
649 701 value: Revision,
650 702 ) -> &mut Self {
651 703 self.base_revision_or_base_of_delta_chain = value;
652 704 self
653 705 }
654 706
655 707 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
656 708 self.link_revision = value;
657 709 self
658 710 }
659 711
660 712 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
661 713 self.p1 = value;
662 714 self
663 715 }
664 716
665 717 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
666 718 self.p2 = value;
667 719 self
668 720 }
669 721
670 722 pub fn with_node(&mut self, value: Node) -> &mut Self {
671 723 self.node = value;
672 724 self
673 725 }
674 726
675 727 pub fn build(&self) -> Vec<u8> {
676 728 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
677 729 if self.is_first {
678 730 bytes.extend(&match (self.is_general_delta, self.is_inline) {
679 731 (false, false) => [0u8, 0],
680 732 (false, true) => [0u8, 1],
681 733 (true, false) => [0u8, 2],
682 734 (true, true) => [0u8, 3],
683 735 });
684 736 bytes.extend(&self.version.to_be_bytes());
685 737 // Remaining offset bytes.
686 738 bytes.extend(&[0u8; 2]);
687 739 } else {
688 740 // Offset stored on 48 bits (6 bytes)
689 741 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
690 742 }
691 743 bytes.extend(&[0u8; 2]); // Revision flags.
692 744 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
693 745 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
694 746 bytes.extend(
695 747 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
696 748 );
697 749 bytes.extend(&self.link_revision.0.to_be_bytes());
698 750 bytes.extend(&self.p1.0.to_be_bytes());
699 751 bytes.extend(&self.p2.0.to_be_bytes());
700 752 bytes.extend(self.node.as_bytes());
701 753 bytes.extend(vec![0u8; 12]);
702 754 bytes
703 755 }
704 756 }
705 757
706 758 pub fn is_inline(index_bytes: &[u8]) -> bool {
707 759 IndexHeader::parse(index_bytes)
708 760 .expect("too short")
709 761 .unwrap()
710 762 .format_flags()
711 763 .is_inline()
712 764 }
713 765
714 766 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
715 767 IndexHeader::parse(index_bytes)
716 768 .expect("too short")
717 769 .unwrap()
718 770 .format_flags()
719 771 .uses_generaldelta()
720 772 }
721 773
722 774 pub fn get_version(index_bytes: &[u8]) -> u16 {
723 775 IndexHeader::parse(index_bytes)
724 776 .expect("too short")
725 777 .unwrap()
726 778 .format_version()
727 779 }
728 780
729 781 #[test]
730 782 fn flags_when_no_inline_flag_test() {
731 783 let bytes = IndexEntryBuilder::new()
732 784 .is_first(true)
733 785 .with_general_delta(false)
734 786 .with_inline(false)
735 787 .build();
736 788
737 789 assert!(!is_inline(&bytes));
738 790 assert!(!uses_generaldelta(&bytes));
739 791 }
740 792
741 793 #[test]
742 794 fn flags_when_inline_flag_test() {
743 795 let bytes = IndexEntryBuilder::new()
744 796 .is_first(true)
745 797 .with_general_delta(false)
746 798 .with_inline(true)
747 799 .build();
748 800
749 801 assert!(is_inline(&bytes));
750 802 assert!(!uses_generaldelta(&bytes));
751 803 }
752 804
753 805 #[test]
754 806 fn flags_when_inline_and_generaldelta_flags_test() {
755 807 let bytes = IndexEntryBuilder::new()
756 808 .is_first(true)
757 809 .with_general_delta(true)
758 810 .with_inline(true)
759 811 .build();
760 812
761 813 assert!(is_inline(&bytes));
762 814 assert!(uses_generaldelta(&bytes));
763 815 }
764 816
765 817 #[test]
766 818 fn test_offset() {
767 819 let bytes = IndexEntryBuilder::new().with_offset(1).build();
768 820 let entry = IndexEntry {
769 821 bytes: &bytes,
770 822 offset_override: None,
771 823 };
772 824
773 825 assert_eq!(entry.offset(), 1)
774 826 }
775 827
776 828 #[test]
777 829 fn test_with_overridden_offset() {
778 830 let bytes = IndexEntryBuilder::new().with_offset(1).build();
779 831 let entry = IndexEntry {
780 832 bytes: &bytes,
781 833 offset_override: Some(2),
782 834 };
783 835
784 836 assert_eq!(entry.offset(), 2)
785 837 }
786 838
787 839 #[test]
788 840 fn test_compressed_len() {
789 841 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
790 842 let entry = IndexEntry {
791 843 bytes: &bytes,
792 844 offset_override: None,
793 845 };
794 846
795 847 assert_eq!(entry.compressed_len(), 1)
796 848 }
797 849
798 850 #[test]
799 851 fn test_uncompressed_len() {
800 852 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
801 853 let entry = IndexEntry {
802 854 bytes: &bytes,
803 855 offset_override: None,
804 856 };
805 857
806 858 assert_eq!(entry.uncompressed_len(), 1)
807 859 }
808 860
809 861 #[test]
810 862 fn test_base_revision_or_base_of_delta_chain() {
811 863 let bytes = IndexEntryBuilder::new()
812 864 .with_base_revision_or_base_of_delta_chain(Revision(1))
813 865 .build();
814 866 let entry = IndexEntry {
815 867 bytes: &bytes,
816 868 offset_override: None,
817 869 };
818 870
819 871 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
820 872 }
821 873
822 874 #[test]
823 875 fn link_revision_test() {
824 876 let bytes = IndexEntryBuilder::new()
825 877 .with_link_revision(Revision(123))
826 878 .build();
827 879
828 880 let entry = IndexEntry {
829 881 bytes: &bytes,
830 882 offset_override: None,
831 883 };
832 884
833 885 assert_eq!(entry.link_revision(), 123.into());
834 886 }
835 887
836 888 #[test]
837 889 fn p1_test() {
838 890 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
839 891
840 892 let entry = IndexEntry {
841 893 bytes: &bytes,
842 894 offset_override: None,
843 895 };
844 896
845 897 assert_eq!(entry.p1(), 123.into());
846 898 }
847 899
848 900 #[test]
849 901 fn p2_test() {
850 902 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
851 903
852 904 let entry = IndexEntry {
853 905 bytes: &bytes,
854 906 offset_override: None,
855 907 };
856 908
857 909 assert_eq!(entry.p2(), 123.into());
858 910 }
859 911
860 912 #[test]
861 913 fn node_test() {
862 914 let node = Node::from_hex("0123456789012345678901234567890123456789")
863 915 .unwrap();
864 916 let bytes = IndexEntryBuilder::new().with_node(node).build();
865 917
866 918 let entry = IndexEntry {
867 919 bytes: &bytes,
868 920 offset_override: None,
869 921 };
870 922
871 923 assert_eq!(*entry.hash(), node);
872 924 }
873 925
874 926 #[test]
875 927 fn version_test() {
876 928 let bytes = IndexEntryBuilder::new()
877 929 .is_first(true)
878 930 .with_version(2)
879 931 .build();
880 932
881 933 assert_eq!(get_version(&bytes), 2)
882 934 }
883 935 }
884 936
885 937 #[cfg(test)]
886 938 pub use tests::IndexEntryBuilder;
@@ -1,680 +1,747 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 PyRevision,
12 12 };
13 13 use cpython::{
14 14 buffer::{Element, PyBuffer},
15 15 exc::{IndexError, ValueError},
16 16 ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
17 17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
18 18 };
19 19 use hg::{
20 index::IndexHeader,
21 index::{RevisionDataParams, COMPRESSION_MODE_INLINE},
20 index::{IndexHeader, RevisionDataParams},
22 21 nodemap::{Block, NodeMapError, NodeTree},
23 22 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
24 BaseRevision, Revision, UncheckedRevision,
23 BaseRevision, Revision, UncheckedRevision, NULL_REVISION,
25 24 };
26 25 use std::cell::RefCell;
27 26
28 27 /// Return a Struct implementing the Graph trait
29 28 pub(crate) fn pyindex_to_graph(
30 29 py: Python,
31 30 index: PyObject,
32 31 ) -> PyResult<cindex::Index> {
33 32 match index.extract::<MixedIndex>(py) {
34 33 Ok(midx) => Ok(midx.clone_cindex(py)),
35 34 Err(_) => cindex::Index::new(py, index),
36 35 }
37 36 }
38 37
39 38 py_class!(pub class MixedIndex |py| {
40 39 data cindex: RefCell<cindex::Index>;
41 40 data index: RefCell<hg::index::Index>;
42 41 data nt: RefCell<Option<NodeTree>>;
43 42 data docket: RefCell<Option<PyObject>>;
44 43 // Holds a reference to the mmap'ed persistent nodemap data
45 44 data nodemap_mmap: RefCell<Option<PyBuffer>>;
46 45 // Holds a reference to the mmap'ed persistent index data
47 46 data index_mmap: RefCell<Option<PyBuffer>>;
48 47
49 48 def __new__(
50 49 _cls,
51 50 cindex: PyObject,
52 51 data: PyObject,
53 52 default_header: u32,
54 53 ) -> PyResult<MixedIndex> {
55 54 Self::new(py, cindex, data, default_header)
56 55 }
57 56
58 57 /// Compatibility layer used for Python consumers needing access to the C index
59 58 ///
60 59 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
61 60 /// that may need to build a custom `nodetree`, based on a specified revset.
62 61 /// With a Rust implementation of the nodemap, we will be able to get rid of
63 62 /// this, by exposing our own standalone nodemap class,
64 63 /// ready to accept `MixedIndex`.
65 64 def get_cindex(&self) -> PyResult<PyObject> {
66 65 Ok(self.cindex(py).borrow().inner().clone_ref(py))
67 66 }
68 67
69 68 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
70 69
71 70 /// Return Revision if found, raises a bare `error.RevlogError`
72 71 /// in case of ambiguity, same as C version does
73 72 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
74 73 let opt = self.get_nodetree(py)?.borrow();
75 74 let nt = opt.as_ref().unwrap();
76 75 let idx = &*self.cindex(py).borrow();
77 76 let ridx = &*self.index(py).borrow();
78 77 let node = node_from_py_bytes(py, &node)?;
79 78 let rust_rev =
80 79 nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
81 80 let c_rev =
82 81 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))?;
83 82 assert_eq!(rust_rev, c_rev);
84 83 Ok(rust_rev.map(Into::into))
85 84
86 85 }
87 86
88 87 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
89 88 /// is not found.
90 89 ///
91 90 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
92 91 /// will catch and rewrap with it
93 92 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
94 93 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
95 94 }
96 95
97 96 /// return True if the node exist in the index
98 97 def has_node(&self, node: PyBytes) -> PyResult<bool> {
99 98 // TODO OPTIM we could avoid a needless conversion here,
100 99 // to do when scaffolding for pure Rust switch is removed,
101 100 // as `get_rev()` currently does the necessary assertions
102 101 self.get_rev(py, node).map(|opt| opt.is_some())
103 102 }
104 103
105 104 /// find length of shortest hex nodeid of a binary ID
106 105 def shortest(&self, node: PyBytes) -> PyResult<usize> {
107 106 let opt = self.get_nodetree(py)?.borrow();
108 107 let nt = opt.as_ref().unwrap();
109 108 let idx = &*self.cindex(py).borrow();
110 109 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
111 110 {
112 111 Ok(Some(l)) => Ok(l),
113 112 Ok(None) => Err(revlog_error(py)),
114 113 Err(e) => Err(nodemap_error(py, e)),
115 114 }
116 115 }
117 116
118 117 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
119 118 let opt = self.get_nodetree(py)?.borrow();
120 119 let nt = opt.as_ref().unwrap();
121 120 let idx = &*self.cindex(py).borrow();
122 121
123 122 let node_as_string = if cfg!(feature = "python3-sys") {
124 123 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
125 124 }
126 125 else {
127 126 let node = node.extract::<PyBytes>(py)?;
128 127 String::from_utf8_lossy(node.data(py)).to_string()
129 128 };
130 129
131 130 let prefix = NodePrefix::from_hex(&node_as_string)
132 131 .map_err(|_| PyErr::new::<ValueError, _>(
133 132 py, format!("Invalid node or prefix '{}'", node_as_string))
134 133 )?;
135 134
136 135 nt.find_bin(idx, prefix)
137 136 // TODO make an inner API returning the node directly
138 137 .map(|opt| opt.map(
139 138 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
140 139 .map_err(|e| nodemap_error(py, e))
141 140
142 141 }
143 142
144 143 /// append an index entry
145 144 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
146 145 if tup.len(py) < 8 {
147 146 // this is better than the panic promised by tup.get_item()
148 147 return Err(
149 148 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
150 149 }
151 150 let node_bytes = tup.get_item(py, 7).extract(py)?;
152 151 let node = node_from_py_object(py, &node_bytes)?;
153 152
154 153 let rev = self.len(py)? as BaseRevision;
155 154 let mut idx = self.cindex(py).borrow_mut();
156 155
157 156 // This is ok since we will just add the revision to the index
158 157 let rev = Revision(rev);
159 158 idx.append(py, tup.clone_ref(py))?;
160 159 self.index(py)
161 160 .borrow_mut()
162 161 .append(py_tuple_to_revision_data_params(py, tup)?)
163 162 .unwrap();
164 163 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
165 164 .insert(&*idx, &node, rev)
166 165 .map_err(|e| nodemap_error(py, e))?;
167 166 Ok(py.None())
168 167 }
169 168
170 169 def __delitem__(&self, key: PyObject) -> PyResult<()> {
171 170 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
172 171 self.cindex(py).borrow().inner().del_item(py, &key)?;
173 172 let start = key.getattr(py, "start")?;
174 173 let start = UncheckedRevision(start.extract(py)?);
175 174 let start = self.index(py)
176 175 .borrow()
177 176 .check_revision(start)
178 177 .ok_or_else(|| {
179 178 nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
180 179 })?;
181 180 self.index(py).borrow_mut().remove(start).unwrap();
182 181 let mut opt = self.get_nodetree(py)?.borrow_mut();
183 182 let nt = opt.as_mut().unwrap();
184 183 nt.invalidate_all();
185 184 self.fill_nodemap(py, nt)?;
186 185 Ok(())
187 186 }
188 187
189 188 //
190 189 // Reforwarded C index API
191 190 //
192 191
193 192 // index_methods (tp_methods). Same ordering as in revlog.c
194 193
195 194 /// return the gca set of the given revs
196 195 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
197 196 self.call_cindex(py, "ancestors", args, kw)
198 197 }
199 198
200 199 /// return the heads of the common ancestors of the given revs
201 200 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
202 201 self.call_cindex(py, "commonancestorsheads", args, kw)
203 202 }
204 203
205 204 /// Clear the index caches and inner py_class data.
206 205 /// It is Python's responsibility to call `update_nodemap_data` again.
207 206 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
208 207 self.nt(py).borrow_mut().take();
209 208 self.docket(py).borrow_mut().take();
210 209 self.nodemap_mmap(py).borrow_mut().take();
211 210 self.index(py).borrow_mut().clear_caches();
212 211 self.call_cindex(py, "clearcaches", args, kw)
213 212 }
214 213
215 214 /// return the raw binary string representing a revision
216 215 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
217 216 let rindex = self.index(py).borrow();
218 217 let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
219 218 let rust_bytes = rindex.check_revision(rev).and_then(
220 219 |r| rindex.entry_binary(r))
221 220 .ok_or_else(|| rev_not_in_index(py, rev))?;
222 221 let rust_res = PyBytes::new(py, rust_bytes).into_object();
223 222
224 223 let c_res = self.call_cindex(py, "entry_binary", args, kw)?;
225 224 assert_py_eq(py, "entry_binary", &rust_res, &c_res)?;
226 225 Ok(rust_res)
227 226 }
228 227
229 228 /// return a binary packed version of the header
230 229 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
231 230 let rindex = self.index(py).borrow();
232 231 let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
233 232 let rust_res = PyBytes::new(py, &packed).into_object();
234 233
235 234 let c_res = self.call_cindex(py, "pack_header", args, kw)?;
236 235 assert_py_eq(py, "pack_header", &rust_res, &c_res)?;
237 236 Ok(rust_res)
238 237 }
239 238
240 /// get an index entry
241 def get(&self, *args, **kw) -> PyResult<PyObject> {
242 self.call_cindex(py, "get", args, kw)
243 }
244
245 239 /// compute phases
246 240 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
247 241 self.call_cindex(py, "computephasesmapsets", args, kw)
248 242 }
249 243
250 244 /// reachableroots
251 245 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
252 246 self.call_cindex(py, "reachableroots2", args, kw)
253 247 }
254 248
255 249 /// get head revisions
256 250 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
257 251 self.call_cindex(py, "headrevs", args, kw)
258 252 }
259 253
260 254 /// get filtered head revisions
261 255 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
262 256 self.call_cindex(py, "headrevsfiltered", args, kw)
263 257 }
264 258
265 259 /// True if the object is a snapshot
266 260 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
267 261 self.call_cindex(py, "issnapshot", args, kw)
268 262 }
269 263
270 264 /// Gather snapshot data in a cache dict
271 265 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
272 266 self.call_cindex(py, "findsnapshots", args, kw)
273 267 }
274 268
275 269 /// determine revisions with deltas to reconstruct fulltext
276 270 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
277 271 self.call_cindex(py, "deltachain", args, kw)
278 272 }
279 273
280 274 /// slice planned chunk read to reach a density threshold
281 275 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
282 276 self.call_cindex(py, "slicechunktodensity", args, kw)
283 277 }
284 278
285 279 /// stats for the index
286 280 def stats(&self, *args, **kw) -> PyResult<PyObject> {
287 281 self.call_cindex(py, "stats", args, kw)
288 282 }
289 283
290 284 // index_sequence_methods and index_mapping_methods.
291 285 //
292 286 // Since we call back through the high level Python API,
293 287 // there's no point making a distinction between index_get
294 288 // and index_getitem.
289 // gracinet 2023: this above is no longer true for the pure Rust impl
295 290
296 291 def __len__(&self) -> PyResult<usize> {
297 292 self.len(py)
298 293 }
299 294
300 295 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
296 let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
297
301 298 // this conversion seems needless, but that's actually because
302 299 // `index_getitem` does not handle conversion from PyLong,
303 300 // which expressions such as [e for e in index] internally use.
304 301 // Note that we don't seem to have a direct way to call
305 302 // PySequence_GetItem (does the job), which would possibly be better
306 303 // for performance
307 let key = match key.extract::<i32>(py) {
304 // gracinet 2023: the above comment can be removed when we use
305 // the pure Rust impl only. Note also that `key` can be a binary
306 // node id.
307 let c_key = match key.extract::<BaseRevision>(py) {
308 308 Ok(rev) => rev.to_py_object(py).into_object(),
309 309 Err(_) => key,
310 310 };
311 self.cindex(py).borrow().inner().get_item(py, key)
311 let c_res = self.cindex(py).borrow().inner().get_item(py, c_key)?;
312
313 assert_py_eq(py, "__getitem__", &rust_res, &c_res)?;
314 Ok(rust_res)
312 315 }
313 316
314 317 def __contains__(&self, item: PyObject) -> PyResult<bool> {
315 318 // ObjectProtocol does not seem to provide contains(), so
316 319 // this is an equivalent implementation of the index_contains()
317 320 // defined in revlog.c
318 321 let cindex = self.cindex(py).borrow();
319 322 match item.extract::<i32>(py) {
320 323 Ok(rev) => {
321 324 Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
322 325 }
323 326 Err(_) => {
324 327 cindex.inner().call_method(
325 328 py,
326 329 "has_node",
327 330 PyTuple::new(py, &[item]),
328 331 None)?
329 332 .extract(py)
330 333 }
331 334 }
332 335 }
333 336
334 337 def nodemap_data_all(&self) -> PyResult<PyBytes> {
335 338 self.inner_nodemap_data_all(py)
336 339 }
337 340
338 341 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
339 342 self.inner_nodemap_data_incremental(py)
340 343 }
341 344 def update_nodemap_data(
342 345 &self,
343 346 docket: PyObject,
344 347 nm_data: PyObject
345 348 ) -> PyResult<PyObject> {
346 349 self.inner_update_nodemap_data(py, docket, nm_data)
347 350 }
348 351
349 352 @property
350 353 def entry_size(&self) -> PyResult<PyInt> {
351 354 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
352 355 }
353 356
354 357 @property
355 358 def rust_ext_compat(&self) -> PyResult<PyInt> {
356 359 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
357 360 }
358 361
359 362 });
360 363
361 364 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
362 365 /// buffer along with the Rust slice into said buffer. We need to keep the
363 366 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
364 367 /// is freed from Python's side.
365 368 ///
366 369 /// # Safety
367 370 ///
368 371 /// The caller must make sure that the buffer is kept around for at least as
369 372 /// long as the slice.
370 373 #[deny(unsafe_op_in_unsafe_fn)]
371 374 unsafe fn mmap_keeparound(
372 375 py: Python,
373 376 data: PyObject,
374 377 ) -> PyResult<(
375 378 PyBuffer,
376 379 Box<dyn std::ops::Deref<Target = [u8]> + Send + 'static>,
377 380 )> {
378 381 let buf = PyBuffer::get(py, &data)?;
379 382 let len = buf.item_count();
380 383
381 384 // Build a slice from the mmap'ed buffer data
382 385 let cbuf = buf.buf_ptr();
383 386 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
384 387 && buf.is_c_contiguous()
385 388 && u8::is_compatible_format(buf.format())
386 389 {
387 390 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
388 391 } else {
389 392 return Err(PyErr::new::<ValueError, _>(
390 393 py,
391 394 "Nodemap data buffer has an invalid memory representation"
392 395 .to_string(),
393 396 ));
394 397 };
395 398
396 399 Ok((buf, Box::new(bytes)))
397 400 }
398 401
399 402 fn py_tuple_to_revision_data_params(
400 403 py: Python,
401 404 tuple: PyTuple,
402 405 ) -> PyResult<RevisionDataParams> {
403 406 if tuple.len(py) < 8 {
404 407 // this is better than the panic promised by tup.get_item()
405 408 return Err(PyErr::new::<IndexError, _>(
406 409 py,
407 410 "tuple index out of range",
408 411 ));
409 412 }
410 413 let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?;
411 414 let node_id = tuple
412 415 .get_item(py, 7)
413 416 .extract::<PyBytes>(py)?
414 417 .data(py)
415 418 .try_into()
416 419 .unwrap();
417 420 let flags = (offset_or_flags & 0xFFFF) as u16;
418 421 let data_offset = offset_or_flags >> 16;
419 422 Ok(RevisionDataParams {
420 423 flags,
421 424 data_offset,
422 425 data_compressed_length: tuple.get_item(py, 1).extract(py)?,
423 426 data_uncompressed_length: tuple.get_item(py, 2).extract(py)?,
424 427 data_delta_base: tuple.get_item(py, 3).extract(py)?,
425 428 link_rev: tuple.get_item(py, 4).extract(py)?,
426 429 parent_rev_1: tuple.get_item(py, 5).extract(py)?,
427 430 parent_rev_2: tuple.get_item(py, 6).extract(py)?,
428 431 node_id,
429 _sidedata_offset: 0,
430 _sidedata_compressed_length: 0,
431 data_compression_mode: COMPRESSION_MODE_INLINE,
432 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
433 _rank: -1,
432 ..Default::default()
434 433 })
435 434 }
435 fn revision_data_params_to_py_tuple(
436 py: Python,
437 params: RevisionDataParams,
438 ) -> PyTuple {
439 PyTuple::new(
440 py,
441 &[
442 params.data_offset.into_py_object(py).into_object(),
443 params
444 .data_compressed_length
445 .into_py_object(py)
446 .into_object(),
447 params
448 .data_uncompressed_length
449 .into_py_object(py)
450 .into_object(),
451 params.data_delta_base.into_py_object(py).into_object(),
452 params.link_rev.into_py_object(py).into_object(),
453 params.parent_rev_1.into_py_object(py).into_object(),
454 params.parent_rev_2.into_py_object(py).into_object(),
455 PyBytes::new(py, &params.node_id)
456 .into_py_object(py)
457 .into_object(),
458 params._sidedata_offset.into_py_object(py).into_object(),
459 params
460 ._sidedata_compressed_length
461 .into_py_object(py)
462 .into_object(),
463 params
464 .data_compression_mode
465 .into_py_object(py)
466 .into_object(),
467 params
468 ._sidedata_compression_mode
469 .into_py_object(py)
470 .into_object(),
471 params._rank.into_py_object(py).into_object(),
472 ],
473 )
474 }
436 475
437 476 impl MixedIndex {
438 477 fn new(
439 478 py: Python,
440 479 cindex: PyObject,
441 480 data: PyObject,
442 481 header: u32,
443 482 ) -> PyResult<MixedIndex> {
444 483 // Safety: we keep the buffer around inside the class as `index_mmap`
445 484 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
446 485
447 486 Self::create_instance(
448 487 py,
449 488 RefCell::new(cindex::Index::new(py, cindex)?),
450 489 RefCell::new(
451 490 hg::index::Index::new(
452 491 bytes,
453 492 IndexHeader::parse(&header.to_be_bytes())
454 493 .expect("default header is broken")
455 494 .unwrap(),
456 495 )
457 496 .unwrap(),
458 497 ),
459 498 RefCell::new(None),
460 499 RefCell::new(None),
461 500 RefCell::new(None),
462 501 RefCell::new(Some(buf)),
463 502 )
464 503 }
465 504
466 505 fn len(&self, py: Python) -> PyResult<usize> {
467 506 let rust_index_len = self.index(py).borrow().len();
468 507 let cindex_len = self.cindex(py).borrow().inner().len(py)?;
469 508 assert_eq!(rust_index_len, cindex_len);
470 509 Ok(cindex_len)
471 510 }
472 511
473 512 /// This is scaffolding at this point, but it could also become
474 513 /// a way to start a persistent nodemap or perform a
475 514 /// vacuum / repack operation
476 515 fn fill_nodemap(
477 516 &self,
478 517 py: Python,
479 518 nt: &mut NodeTree,
480 519 ) -> PyResult<PyObject> {
481 520 let index = self.cindex(py).borrow();
482 521 for r in 0..self.len(py)? {
483 522 let rev = Revision(r as BaseRevision);
484 523 // in this case node() won't ever return None
485 524 nt.insert(&*index, index.node(rev).unwrap(), rev)
486 525 .map_err(|e| nodemap_error(py, e))?
487 526 }
488 527 Ok(py.None())
489 528 }
490 529
491 530 fn get_nodetree<'a>(
492 531 &'a self,
493 532 py: Python<'a>,
494 533 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
495 534 if self.nt(py).borrow().is_none() {
496 535 let readonly = Box::<Vec<_>>::default();
497 536 let mut nt = NodeTree::load_bytes(readonly, 0);
498 537 self.fill_nodemap(py, &mut nt)?;
499 538 self.nt(py).borrow_mut().replace(nt);
500 539 }
501 540 Ok(self.nt(py))
502 541 }
503 542
504 543 /// forward a method call to the underlying C index
505 544 fn call_cindex(
506 545 &self,
507 546 py: Python,
508 547 name: &str,
509 548 args: &PyTuple,
510 549 kwargs: Option<&PyDict>,
511 550 ) -> PyResult<PyObject> {
512 551 self.cindex(py)
513 552 .borrow()
514 553 .inner()
515 554 .call_method(py, name, args, kwargs)
516 555 }
517 556
518 557 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
519 558 self.cindex(py).borrow().clone_ref(py)
520 559 }
521 560
522 561 /// Returns the full nodemap bytes to be written as-is to disk
523 562 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
524 563 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
525 564 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
526 565
527 566 // If there's anything readonly, we need to build the data again from
528 567 // scratch
529 568 let bytes = if readonly.len() > 0 {
530 569 let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
531 570 self.fill_nodemap(py, &mut nt)?;
532 571
533 572 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
534 573 assert_eq!(readonly.len(), 0);
535 574
536 575 bytes
537 576 } else {
538 577 bytes
539 578 };
540 579
541 580 let bytes = PyBytes::new(py, &bytes);
542 581 Ok(bytes)
543 582 }
544 583
545 584 /// Returns the last saved docket along with the size of any changed data
546 585 /// (in number of blocks), and said data as bytes.
547 586 fn inner_nodemap_data_incremental(
548 587 &self,
549 588 py: Python,
550 589 ) -> PyResult<PyObject> {
551 590 let docket = self.docket(py).borrow();
552 591 let docket = match docket.as_ref() {
553 592 Some(d) => d,
554 593 None => return Ok(py.None()),
555 594 };
556 595
557 596 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
558 597 let masked_blocks = node_tree.masked_readonly_blocks();
559 598 let (_, data) = node_tree.into_readonly_and_added_bytes();
560 599 let changed = masked_blocks * std::mem::size_of::<Block>();
561 600
562 601 Ok((docket, changed, PyBytes::new(py, &data))
563 602 .to_py_object(py)
564 603 .into_object())
565 604 }
566 605
567 606 /// Update the nodemap from the new (mmaped) data.
568 607 /// The docket is kept as a reference for later incremental calls.
569 608 fn inner_update_nodemap_data(
570 609 &self,
571 610 py: Python,
572 611 docket: PyObject,
573 612 nm_data: PyObject,
574 613 ) -> PyResult<PyObject> {
575 614 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
576 615 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
577 616 let len = buf.item_count();
578 617 self.nodemap_mmap(py).borrow_mut().replace(buf);
579 618
580 619 let mut nt = NodeTree::load_bytes(bytes, len);
581 620
582 621 let data_tip = docket
583 622 .getattr(py, "tip_rev")?
584 623 .extract::<BaseRevision>(py)?
585 624 .into();
586 625 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
587 626 let idx = self.cindex(py).borrow();
588 627 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
589 628 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
590 629 })?;
591 630 let current_tip = idx.len();
592 631
593 632 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
594 633 let rev = Revision(r);
595 634 // in this case node() won't ever return None
596 635 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
597 636 .map_err(|e| nodemap_error(py, e))?
598 637 }
599 638
600 639 *self.nt(py).borrow_mut() = Some(nt);
601 640
602 641 Ok(py.None())
603 642 }
643
644 fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
645 let idx = self.index(py).borrow();
646 Ok(match key.extract::<BaseRevision>(py) {
647 Ok(key_as_int) => {
648 let entry_params = if key_as_int == NULL_REVISION.0 {
649 RevisionDataParams::default()
650 } else {
651 let rev = UncheckedRevision(key_as_int);
652 match idx.entry_as_params(rev) {
653 Some(e) => e,
654 None => {
655 return Err(PyErr::new::<IndexError, _>(
656 py,
657 "revlog index out of range",
658 ));
659 }
660 }
661 };
662 revision_data_params_to_py_tuple(py, entry_params)
663 .into_object()
664 }
665 _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
666 || py.None(),
667 |py_rev| py_rev.into_py_object(py).into_object(),
668 ),
669 })
670 }
604 671 }
605 672
606 673 fn revlog_error(py: Python) -> PyErr {
607 674 match py
608 675 .import("mercurial.error")
609 676 .and_then(|m| m.get(py, "RevlogError"))
610 677 {
611 678 Err(e) => e,
612 679 Ok(cls) => PyErr::from_instance(
613 680 py,
614 681 cls.call(py, (py.None(),), None).ok().into_py_object(py),
615 682 ),
616 683 }
617 684 }
618 685
619 686 fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
620 687 PyErr::new::<ValueError, _>(
621 688 py,
622 689 format!(
623 690 "Inconsistency: Revision {} found in nodemap \
624 691 is not in revlog index",
625 692 rev
626 693 ),
627 694 )
628 695 }
629 696
630 697 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
631 698 PyErr::new::<ValueError, _>(
632 699 py,
633 700 format!("revlog index out of range: {}", rev),
634 701 )
635 702 }
636 703
637 704 /// Standard treatment of NodeMapError
638 705 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
639 706 match err {
640 707 NodeMapError::MultipleResults => revlog_error(py),
641 708 NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r),
642 709 }
643 710 }
644 711
645 712 fn assert_py_eq(
646 713 py: Python,
647 714 method: &str,
648 715 rust: &PyObject,
649 716 c: &PyObject,
650 717 ) -> PyResult<()> {
651 718 let locals = PyDict::new(py);
652 719 locals.set_item(py, "rust".into_py_object(py).into_object(), rust)?;
653 720 locals.set_item(py, "c".into_py_object(py).into_object(), c)?;
654 721 let is_eq: PyBool =
655 722 py.eval("rust == c", None, Some(&locals))?.extract(py)?;
656 723 assert!(
657 724 is_eq.is_true(),
658 725 "{} results differ. Rust: {:?} C: {:?}",
659 726 method,
660 727 rust,
661 728 c
662 729 );
663 730 Ok(())
664 731 }
665 732
666 733 /// Create the module, with __package__ given from parent
667 734 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
668 735 let dotted_name = &format!("{}.revlog", package);
669 736 let m = PyModule::new(py, dotted_name)?;
670 737 m.add(py, "__package__", package)?;
671 738 m.add(py, "__doc__", "RevLog - Rust implementations")?;
672 739
673 740 m.add_class::<MixedIndex>(py)?;
674 741
675 742 let sys = PyModule::import(py, "sys")?;
676 743 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
677 744 sys_modules.set_item(py, dotted_name, &m)?;
678 745
679 746 Ok(m)
680 747 }
General Comments 0
You need to be logged in to leave comments. Login now