##// END OF EJS Templates
rust-index-cpython: cache the heads' PyList representation...
Raphaël Gomès -
r52154:9088c6d6 default
parent child Browse files
Show More
@@ -1,2009 +1,2024 b''
1 use std::collections::{HashMap, HashSet};
1 use std::collections::{HashMap, HashSet};
2 use std::fmt::Debug;
2 use std::fmt::Debug;
3 use std::ops::Deref;
3 use std::ops::Deref;
4 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
4 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
5
5
6 use bitvec::prelude::*;
6 use bitvec::prelude::*;
7 use byteorder::{BigEndian, ByteOrder};
7 use byteorder::{BigEndian, ByteOrder};
8 use bytes_cast::{unaligned, BytesCast};
8 use bytes_cast::{unaligned, BytesCast};
9
9
10 use super::REVIDX_KNOWN_FLAGS;
10 use super::REVIDX_KNOWN_FLAGS;
11 use crate::errors::HgError;
11 use crate::errors::HgError;
12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
13 use crate::revlog::node::Node;
13 use crate::revlog::node::Node;
14 use crate::revlog::{Revision, NULL_REVISION};
14 use crate::revlog::{Revision, NULL_REVISION};
15 use crate::{
15 use crate::{
16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
17 RevlogIndex, UncheckedRevision,
17 RevlogIndex, UncheckedRevision,
18 };
18 };
19
19
20 pub const INDEX_ENTRY_SIZE: usize = 64;
20 pub const INDEX_ENTRY_SIZE: usize = 64;
21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
22
22
23 #[derive(Debug)]
23 #[derive(Debug)]
24 pub struct IndexHeader {
24 pub struct IndexHeader {
25 pub(super) header_bytes: [u8; 4],
25 pub(super) header_bytes: [u8; 4],
26 }
26 }
27
27
28 #[derive(Copy, Clone)]
28 #[derive(Copy, Clone)]
29 pub struct IndexHeaderFlags {
29 pub struct IndexHeaderFlags {
30 flags: u16,
30 flags: u16,
31 }
31 }
32
32
33 /// Corresponds to the high bits of `_format_flags` in python
33 /// Corresponds to the high bits of `_format_flags` in python
34 impl IndexHeaderFlags {
34 impl IndexHeaderFlags {
35 /// Corresponds to FLAG_INLINE_DATA in python
35 /// Corresponds to FLAG_INLINE_DATA in python
36 pub fn is_inline(self) -> bool {
36 pub fn is_inline(self) -> bool {
37 self.flags & 1 != 0
37 self.flags & 1 != 0
38 }
38 }
39 /// Corresponds to FLAG_GENERALDELTA in python
39 /// Corresponds to FLAG_GENERALDELTA in python
40 pub fn uses_generaldelta(self) -> bool {
40 pub fn uses_generaldelta(self) -> bool {
41 self.flags & 2 != 0
41 self.flags & 2 != 0
42 }
42 }
43 }
43 }
44
44
45 /// Corresponds to the INDEX_HEADER structure,
45 /// Corresponds to the INDEX_HEADER structure,
46 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
46 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
47 impl IndexHeader {
47 impl IndexHeader {
48 fn format_flags(&self) -> IndexHeaderFlags {
48 fn format_flags(&self) -> IndexHeaderFlags {
49 // No "unknown flags" check here, unlike in python. Maybe there should
49 // No "unknown flags" check here, unlike in python. Maybe there should
50 // be.
50 // be.
51 IndexHeaderFlags {
51 IndexHeaderFlags {
52 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
52 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
53 }
53 }
54 }
54 }
55
55
56 /// The only revlog version currently supported by rhg.
56 /// The only revlog version currently supported by rhg.
57 const REVLOGV1: u16 = 1;
57 const REVLOGV1: u16 = 1;
58
58
59 /// Corresponds to `_format_version` in Python.
59 /// Corresponds to `_format_version` in Python.
60 fn format_version(&self) -> u16 {
60 fn format_version(&self) -> u16 {
61 BigEndian::read_u16(&self.header_bytes[2..4])
61 BigEndian::read_u16(&self.header_bytes[2..4])
62 }
62 }
63
63
64 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
64 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
65 if index_bytes.is_empty() {
65 if index_bytes.is_empty() {
66 return Ok(None);
66 return Ok(None);
67 }
67 }
68 if index_bytes.len() < 4 {
68 if index_bytes.len() < 4 {
69 return Err(HgError::corrupted(
69 return Err(HgError::corrupted(
70 "corrupted revlog: can't read the index format header",
70 "corrupted revlog: can't read the index format header",
71 ));
71 ));
72 }
72 }
73 Ok(Some(IndexHeader {
73 Ok(Some(IndexHeader {
74 header_bytes: {
74 header_bytes: {
75 let bytes: [u8; 4] =
75 let bytes: [u8; 4] =
76 index_bytes[0..4].try_into().expect("impossible");
76 index_bytes[0..4].try_into().expect("impossible");
77 bytes
77 bytes
78 },
78 },
79 }))
79 }))
80 }
80 }
81 }
81 }
82
82
83 /// Abstracts the access to the index bytes since they can be spread between
83 /// Abstracts the access to the index bytes since they can be spread between
84 /// the immutable (bytes) part and the mutable (added) part if any appends
84 /// the immutable (bytes) part and the mutable (added) part if any appends
85 /// happened. This makes it transparent for the callers.
85 /// happened. This makes it transparent for the callers.
86 struct IndexData {
86 struct IndexData {
87 /// Immutable bytes, most likely taken from disk
87 /// Immutable bytes, most likely taken from disk
88 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
88 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
89 /// Used when stripping index contents, keeps track of the start of the
89 /// Used when stripping index contents, keeps track of the start of the
90 /// first stripped revision, which is used to give a slice of the
90 /// first stripped revision, which is used to give a slice of the
91 /// `bytes` field.
91 /// `bytes` field.
92 truncation: Option<usize>,
92 truncation: Option<usize>,
93 /// Bytes that were added after reading the index
93 /// Bytes that were added after reading the index
94 added: Vec<u8>,
94 added: Vec<u8>,
95 }
95 }
96
96
97 impl IndexData {
97 impl IndexData {
98 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self {
98 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self {
99 Self {
99 Self {
100 bytes,
100 bytes,
101 truncation: None,
101 truncation: None,
102 added: vec![],
102 added: vec![],
103 }
103 }
104 }
104 }
105
105
106 pub fn len(&self) -> usize {
106 pub fn len(&self) -> usize {
107 match self.truncation {
107 match self.truncation {
108 Some(truncation) => truncation + self.added.len(),
108 Some(truncation) => truncation + self.added.len(),
109 None => self.bytes.len() + self.added.len(),
109 None => self.bytes.len() + self.added.len(),
110 }
110 }
111 }
111 }
112
112
113 fn remove(
113 fn remove(
114 &mut self,
114 &mut self,
115 rev: Revision,
115 rev: Revision,
116 offsets: Option<&[usize]>,
116 offsets: Option<&[usize]>,
117 ) -> Result<(), RevlogError> {
117 ) -> Result<(), RevlogError> {
118 let rev = rev.0 as usize;
118 let rev = rev.0 as usize;
119 let truncation = if let Some(offsets) = offsets {
119 let truncation = if let Some(offsets) = offsets {
120 offsets[rev]
120 offsets[rev]
121 } else {
121 } else {
122 rev * INDEX_ENTRY_SIZE
122 rev * INDEX_ENTRY_SIZE
123 };
123 };
124 if truncation < self.bytes.len() {
124 if truncation < self.bytes.len() {
125 self.truncation = Some(truncation);
125 self.truncation = Some(truncation);
126 self.added.clear();
126 self.added.clear();
127 } else {
127 } else {
128 self.added.truncate(truncation - self.bytes.len());
128 self.added.truncate(truncation - self.bytes.len());
129 }
129 }
130 Ok(())
130 Ok(())
131 }
131 }
132
132
133 fn is_new(&self) -> bool {
133 fn is_new(&self) -> bool {
134 self.bytes.is_empty()
134 self.bytes.is_empty()
135 }
135 }
136 }
136 }
137
137
138 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
138 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
139 type Output = [u8];
139 type Output = [u8];
140
140
141 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
141 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
142 let start = index.start;
142 let start = index.start;
143 let end = index.end;
143 let end = index.end;
144 let immutable_len = match self.truncation {
144 let immutable_len = match self.truncation {
145 Some(truncation) => truncation,
145 Some(truncation) => truncation,
146 None => self.bytes.len(),
146 None => self.bytes.len(),
147 };
147 };
148 if start < immutable_len {
148 if start < immutable_len {
149 if end > immutable_len {
149 if end > immutable_len {
150 panic!("index data cannot span existing and added ranges");
150 panic!("index data cannot span existing and added ranges");
151 }
151 }
152 &self.bytes[index]
152 &self.bytes[index]
153 } else {
153 } else {
154 &self.added[start - immutable_len..end - immutable_len]
154 &self.added[start - immutable_len..end - immutable_len]
155 }
155 }
156 }
156 }
157 }
157 }
158
158
159 #[derive(Debug, PartialEq, Eq)]
159 #[derive(Debug, PartialEq, Eq)]
160 pub struct RevisionDataParams {
160 pub struct RevisionDataParams {
161 pub flags: u16,
161 pub flags: u16,
162 pub data_offset: u64,
162 pub data_offset: u64,
163 pub data_compressed_length: i32,
163 pub data_compressed_length: i32,
164 pub data_uncompressed_length: i32,
164 pub data_uncompressed_length: i32,
165 pub data_delta_base: i32,
165 pub data_delta_base: i32,
166 pub link_rev: i32,
166 pub link_rev: i32,
167 pub parent_rev_1: i32,
167 pub parent_rev_1: i32,
168 pub parent_rev_2: i32,
168 pub parent_rev_2: i32,
169 pub node_id: [u8; NODE_BYTES_LENGTH],
169 pub node_id: [u8; NODE_BYTES_LENGTH],
170 pub _sidedata_offset: u64,
170 pub _sidedata_offset: u64,
171 pub _sidedata_compressed_length: i32,
171 pub _sidedata_compressed_length: i32,
172 pub data_compression_mode: u8,
172 pub data_compression_mode: u8,
173 pub _sidedata_compression_mode: u8,
173 pub _sidedata_compression_mode: u8,
174 pub _rank: i32,
174 pub _rank: i32,
175 }
175 }
176
176
177 impl Default for RevisionDataParams {
177 impl Default for RevisionDataParams {
178 fn default() -> Self {
178 fn default() -> Self {
179 Self {
179 Self {
180 flags: 0,
180 flags: 0,
181 data_offset: 0,
181 data_offset: 0,
182 data_compressed_length: 0,
182 data_compressed_length: 0,
183 data_uncompressed_length: 0,
183 data_uncompressed_length: 0,
184 data_delta_base: -1,
184 data_delta_base: -1,
185 link_rev: -1,
185 link_rev: -1,
186 parent_rev_1: -1,
186 parent_rev_1: -1,
187 parent_rev_2: -1,
187 parent_rev_2: -1,
188 node_id: [0; NODE_BYTES_LENGTH],
188 node_id: [0; NODE_BYTES_LENGTH],
189 _sidedata_offset: 0,
189 _sidedata_offset: 0,
190 _sidedata_compressed_length: 0,
190 _sidedata_compressed_length: 0,
191 data_compression_mode: COMPRESSION_MODE_INLINE,
191 data_compression_mode: COMPRESSION_MODE_INLINE,
192 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
192 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
193 _rank: -1,
193 _rank: -1,
194 }
194 }
195 }
195 }
196 }
196 }
197
197
198 #[derive(BytesCast)]
198 #[derive(BytesCast)]
199 #[repr(C)]
199 #[repr(C)]
200 pub struct RevisionDataV1 {
200 pub struct RevisionDataV1 {
201 data_offset_or_flags: unaligned::U64Be,
201 data_offset_or_flags: unaligned::U64Be,
202 data_compressed_length: unaligned::I32Be,
202 data_compressed_length: unaligned::I32Be,
203 data_uncompressed_length: unaligned::I32Be,
203 data_uncompressed_length: unaligned::I32Be,
204 data_delta_base: unaligned::I32Be,
204 data_delta_base: unaligned::I32Be,
205 link_rev: unaligned::I32Be,
205 link_rev: unaligned::I32Be,
206 parent_rev_1: unaligned::I32Be,
206 parent_rev_1: unaligned::I32Be,
207 parent_rev_2: unaligned::I32Be,
207 parent_rev_2: unaligned::I32Be,
208 node_id: [u8; STORED_NODE_ID_BYTES],
208 node_id: [u8; STORED_NODE_ID_BYTES],
209 }
209 }
210
210
211 fn _static_assert_size_of_revision_data_v1() {
211 fn _static_assert_size_of_revision_data_v1() {
212 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
212 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
213 }
213 }
214
214
215 impl RevisionDataParams {
215 impl RevisionDataParams {
216 pub fn validate(&self) -> Result<(), RevlogError> {
216 pub fn validate(&self) -> Result<(), RevlogError> {
217 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
217 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
218 return Err(RevlogError::corrupted(format!(
218 return Err(RevlogError::corrupted(format!(
219 "unknown revlog index flags: {}",
219 "unknown revlog index flags: {}",
220 self.flags
220 self.flags
221 )));
221 )));
222 }
222 }
223 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
223 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
224 return Err(RevlogError::corrupted(format!(
224 return Err(RevlogError::corrupted(format!(
225 "invalid data compression mode: {}",
225 "invalid data compression mode: {}",
226 self.data_compression_mode
226 self.data_compression_mode
227 )));
227 )));
228 }
228 }
229 // FIXME isn't this only for v2 or changelog v2?
229 // FIXME isn't this only for v2 or changelog v2?
230 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
230 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
231 return Err(RevlogError::corrupted(format!(
231 return Err(RevlogError::corrupted(format!(
232 "invalid sidedata compression mode: {}",
232 "invalid sidedata compression mode: {}",
233 self._sidedata_compression_mode
233 self._sidedata_compression_mode
234 )));
234 )));
235 }
235 }
236 Ok(())
236 Ok(())
237 }
237 }
238
238
239 pub fn into_v1(self) -> RevisionDataV1 {
239 pub fn into_v1(self) -> RevisionDataV1 {
240 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
240 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
241 let mut node_id = [0; STORED_NODE_ID_BYTES];
241 let mut node_id = [0; STORED_NODE_ID_BYTES];
242 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
242 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
243 RevisionDataV1 {
243 RevisionDataV1 {
244 data_offset_or_flags: data_offset_or_flags.into(),
244 data_offset_or_flags: data_offset_or_flags.into(),
245 data_compressed_length: self.data_compressed_length.into(),
245 data_compressed_length: self.data_compressed_length.into(),
246 data_uncompressed_length: self.data_uncompressed_length.into(),
246 data_uncompressed_length: self.data_uncompressed_length.into(),
247 data_delta_base: self.data_delta_base.into(),
247 data_delta_base: self.data_delta_base.into(),
248 link_rev: self.link_rev.into(),
248 link_rev: self.link_rev.into(),
249 parent_rev_1: self.parent_rev_1.into(),
249 parent_rev_1: self.parent_rev_1.into(),
250 parent_rev_2: self.parent_rev_2.into(),
250 parent_rev_2: self.parent_rev_2.into(),
251 node_id,
251 node_id,
252 }
252 }
253 }
253 }
254 }
254 }
255
255
256 /// A Revlog index
256 /// A Revlog index
257 pub struct Index {
257 pub struct Index {
258 bytes: IndexData,
258 bytes: IndexData,
259 /// Offsets of starts of index blocks.
259 /// Offsets of starts of index blocks.
260 /// Only needed when the index is interleaved with data.
260 /// Only needed when the index is interleaved with data.
261 offsets: RwLock<Option<Vec<usize>>>,
261 offsets: RwLock<Option<Vec<usize>>>,
262 uses_generaldelta: bool,
262 uses_generaldelta: bool,
263 is_inline: bool,
263 is_inline: bool,
264 /// Cache of (head_revisions, filtered_revisions)
264 /// Cache of (head_revisions, filtered_revisions)
265 ///
265 ///
266 /// The head revisions in this index, kept in sync. Should
266 /// The head revisions in this index, kept in sync. Should
267 /// be accessed via the [`Self::head_revs`] method.
267 /// be accessed via the [`Self::head_revs`] method.
268 /// The last filtered revisions in this index, used to make sure
268 /// The last filtered revisions in this index, used to make sure
269 /// we haven't changed filters when returning the cached `head_revs`.
269 /// we haven't changed filters when returning the cached `head_revs`.
270 head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
270 head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
271 }
271 }
272
272
273 impl Debug for Index {
273 impl Debug for Index {
274 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275 f.debug_struct("Index")
275 f.debug_struct("Index")
276 .field("offsets", &self.offsets)
276 .field("offsets", &self.offsets)
277 .field("uses_generaldelta", &self.uses_generaldelta)
277 .field("uses_generaldelta", &self.uses_generaldelta)
278 .finish()
278 .finish()
279 }
279 }
280 }
280 }
281
281
282 impl Graph for Index {
282 impl Graph for Index {
283 #[inline(always)]
283 #[inline(always)]
284 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
284 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
285 let err = || GraphError::ParentOutOfRange(rev);
285 let err = || GraphError::ParentOutOfRange(rev);
286 match self.get_entry(rev) {
286 match self.get_entry(rev) {
287 Some(entry) => {
287 Some(entry) => {
288 // The C implementation checks that the parents are valid
288 // The C implementation checks that the parents are valid
289 // before returning
289 // before returning
290 Ok([
290 Ok([
291 self.check_revision(entry.p1()).ok_or_else(err)?,
291 self.check_revision(entry.p1()).ok_or_else(err)?,
292 self.check_revision(entry.p2()).ok_or_else(err)?,
292 self.check_revision(entry.p2()).ok_or_else(err)?,
293 ])
293 ])
294 }
294 }
295 None => Ok([NULL_REVISION, NULL_REVISION]),
295 None => Ok([NULL_REVISION, NULL_REVISION]),
296 }
296 }
297 }
297 }
298 }
298 }
299
299
300 /// A cache suitable for find_snapshots
300 /// A cache suitable for find_snapshots
301 ///
301 ///
302 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
302 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
303 /// values sets of [`BaseRevision`]
303 /// values sets of [`BaseRevision`]
304 ///
304 ///
305 /// TODO the dubious part is insisting that errors must be RevlogError
305 /// TODO the dubious part is insisting that errors must be RevlogError
306 /// we would probably need to sprinkle some magic here, such as an associated
306 /// we would probably need to sprinkle some magic here, such as an associated
307 /// type that would be Into<RevlogError> but even that would not be
307 /// type that would be Into<RevlogError> but even that would not be
308 /// satisfactory, as errors potentially have nothing to do with the revlog.
308 /// satisfactory, as errors potentially have nothing to do with the revlog.
309 pub trait SnapshotsCache {
309 pub trait SnapshotsCache {
310 fn insert_for(
310 fn insert_for(
311 &mut self,
311 &mut self,
312 rev: BaseRevision,
312 rev: BaseRevision,
313 value: BaseRevision,
313 value: BaseRevision,
314 ) -> Result<(), RevlogError>;
314 ) -> Result<(), RevlogError>;
315 }
315 }
316
316
317 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
317 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
318 fn insert_for(
318 fn insert_for(
319 &mut self,
319 &mut self,
320 rev: BaseRevision,
320 rev: BaseRevision,
321 value: BaseRevision,
321 value: BaseRevision,
322 ) -> Result<(), RevlogError> {
322 ) -> Result<(), RevlogError> {
323 let all_values = self.entry(rev).or_insert_with(HashSet::new);
323 let all_values = self.entry(rev).or_insert_with(HashSet::new);
324 all_values.insert(value);
324 all_values.insert(value);
325 Ok(())
325 Ok(())
326 }
326 }
327 }
327 }
328
328
329 impl Index {
329 impl Index {
330 /// Create an index from bytes.
330 /// Create an index from bytes.
331 /// Calculate the start of each entry when is_inline is true.
331 /// Calculate the start of each entry when is_inline is true.
332 pub fn new(
332 pub fn new(
333 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
333 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
334 default_header: IndexHeader,
334 default_header: IndexHeader,
335 ) -> Result<Self, HgError> {
335 ) -> Result<Self, HgError> {
336 let header =
336 let header =
337 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
337 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
338
338
339 if header.format_version() != IndexHeader::REVLOGV1 {
339 if header.format_version() != IndexHeader::REVLOGV1 {
340 // A proper new version should have had a repo/store
340 // A proper new version should have had a repo/store
341 // requirement.
341 // requirement.
342 return Err(HgError::corrupted("unsupported revlog version"));
342 return Err(HgError::corrupted("unsupported revlog version"));
343 }
343 }
344
344
345 // This is only correct because we know version is REVLOGV1.
345 // This is only correct because we know version is REVLOGV1.
346 // In v2 we always use generaldelta, while in v0 we never use
346 // In v2 we always use generaldelta, while in v0 we never use
347 // generaldelta. Similar for [is_inline] (it's only used in v1).
347 // generaldelta. Similar for [is_inline] (it's only used in v1).
348 let uses_generaldelta = header.format_flags().uses_generaldelta();
348 let uses_generaldelta = header.format_flags().uses_generaldelta();
349
349
350 if header.format_flags().is_inline() {
350 if header.format_flags().is_inline() {
351 let mut offset: usize = 0;
351 let mut offset: usize = 0;
352 let mut offsets = Vec::new();
352 let mut offsets = Vec::new();
353
353
354 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
354 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
355 offsets.push(offset);
355 offsets.push(offset);
356 let end = offset + INDEX_ENTRY_SIZE;
356 let end = offset + INDEX_ENTRY_SIZE;
357 let entry = IndexEntry {
357 let entry = IndexEntry {
358 bytes: &bytes[offset..end],
358 bytes: &bytes[offset..end],
359 offset_override: None,
359 offset_override: None,
360 };
360 };
361
361
362 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
362 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
363 }
363 }
364
364
365 if offset == bytes.len() {
365 if offset == bytes.len() {
366 Ok(Self {
366 Ok(Self {
367 bytes: IndexData::new(bytes),
367 bytes: IndexData::new(bytes),
368 offsets: RwLock::new(Some(offsets)),
368 offsets: RwLock::new(Some(offsets)),
369 uses_generaldelta,
369 uses_generaldelta,
370 is_inline: true,
370 is_inline: true,
371 head_revs: RwLock::new((vec![], HashSet::new())),
371 head_revs: RwLock::new((vec![], HashSet::new())),
372 })
372 })
373 } else {
373 } else {
374 Err(HgError::corrupted("unexpected inline revlog length"))
374 Err(HgError::corrupted("unexpected inline revlog length"))
375 }
375 }
376 } else {
376 } else {
377 Ok(Self {
377 Ok(Self {
378 bytes: IndexData::new(bytes),
378 bytes: IndexData::new(bytes),
379 offsets: RwLock::new(None),
379 offsets: RwLock::new(None),
380 uses_generaldelta,
380 uses_generaldelta,
381 is_inline: false,
381 is_inline: false,
382 head_revs: RwLock::new((vec![], HashSet::new())),
382 head_revs: RwLock::new((vec![], HashSet::new())),
383 })
383 })
384 }
384 }
385 }
385 }
386
386
387 pub fn uses_generaldelta(&self) -> bool {
387 pub fn uses_generaldelta(&self) -> bool {
388 self.uses_generaldelta
388 self.uses_generaldelta
389 }
389 }
390
390
391 /// Value of the inline flag.
391 /// Value of the inline flag.
392 pub fn is_inline(&self) -> bool {
392 pub fn is_inline(&self) -> bool {
393 self.is_inline
393 self.is_inline
394 }
394 }
395
395
396 /// Return a slice of bytes if `revlog` is inline. Panic if not.
396 /// Return a slice of bytes if `revlog` is inline. Panic if not.
397 pub fn data(&self, start: usize, end: usize) -> &[u8] {
397 pub fn data(&self, start: usize, end: usize) -> &[u8] {
398 if !self.is_inline() {
398 if !self.is_inline() {
399 panic!("tried to access data in the index of a revlog that is not inline");
399 panic!("tried to access data in the index of a revlog that is not inline");
400 }
400 }
401 &self.bytes[start..end]
401 &self.bytes[start..end]
402 }
402 }
403
403
404 /// Return number of entries of the revlog index.
404 /// Return number of entries of the revlog index.
405 pub fn len(&self) -> usize {
405 pub fn len(&self) -> usize {
406 if let Some(offsets) = &*self.get_offsets() {
406 if let Some(offsets) = &*self.get_offsets() {
407 offsets.len()
407 offsets.len()
408 } else {
408 } else {
409 self.bytes.len() / INDEX_ENTRY_SIZE
409 self.bytes.len() / INDEX_ENTRY_SIZE
410 }
410 }
411 }
411 }
412
412
413 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
413 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
414 if self.is_inline() {
414 if self.is_inline() {
415 {
415 {
416 // Wrap in a block to drop the read guard
416 // Wrap in a block to drop the read guard
417 // TODO perf?
417 // TODO perf?
418 let mut offsets = self.offsets.write().unwrap();
418 let mut offsets = self.offsets.write().unwrap();
419 if offsets.is_none() {
419 if offsets.is_none() {
420 offsets.replace(inline_scan(&self.bytes.bytes).1);
420 offsets.replace(inline_scan(&self.bytes.bytes).1);
421 }
421 }
422 }
422 }
423 }
423 }
424 self.offsets.read().unwrap()
424 self.offsets.read().unwrap()
425 }
425 }
426
426
427 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
427 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
428 let mut offsets = self.offsets.write().unwrap();
428 let mut offsets = self.offsets.write().unwrap();
429 if self.is_inline() && offsets.is_none() {
429 if self.is_inline() && offsets.is_none() {
430 offsets.replace(inline_scan(&self.bytes.bytes).1);
430 offsets.replace(inline_scan(&self.bytes.bytes).1);
431 }
431 }
432 offsets
432 offsets
433 }
433 }
434
434
435 /// Returns `true` if the `Index` has zero `entries`.
435 /// Returns `true` if the `Index` has zero `entries`.
436 pub fn is_empty(&self) -> bool {
436 pub fn is_empty(&self) -> bool {
437 self.len() == 0
437 self.len() == 0
438 }
438 }
439
439
440 /// Return the index entry corresponding to the given revision or `None`
440 /// Return the index entry corresponding to the given revision or `None`
441 /// for [`NULL_REVISION`]
441 /// for [`NULL_REVISION`]
442 ///
442 ///
443 /// The specified revision being of the checked type, it always exists
443 /// The specified revision being of the checked type, it always exists
444 /// if it was validated by this index.
444 /// if it was validated by this index.
445 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
445 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
446 if rev == NULL_REVISION {
446 if rev == NULL_REVISION {
447 return None;
447 return None;
448 }
448 }
449 Some(if let Some(offsets) = &*self.get_offsets() {
449 Some(if let Some(offsets) = &*self.get_offsets() {
450 self.get_entry_inline(rev, offsets.as_ref())
450 self.get_entry_inline(rev, offsets.as_ref())
451 } else {
451 } else {
452 self.get_entry_separated(rev)
452 self.get_entry_separated(rev)
453 })
453 })
454 }
454 }
455
455
456 /// Return the binary content of the index entry for the given revision
456 /// Return the binary content of the index entry for the given revision
457 ///
457 ///
458 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
458 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
459 /// returned.
459 /// returned.
460 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
460 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
461 self.get_entry(rev).map(|e| {
461 self.get_entry(rev).map(|e| {
462 let bytes = e.as_bytes();
462 let bytes = e.as_bytes();
463 if rev.0 == 0 {
463 if rev.0 == 0 {
464 &bytes[4..]
464 &bytes[4..]
465 } else {
465 } else {
466 bytes
466 bytes
467 }
467 }
468 })
468 })
469 }
469 }
470
470
471 pub fn entry_as_params(
471 pub fn entry_as_params(
472 &self,
472 &self,
473 rev: UncheckedRevision,
473 rev: UncheckedRevision,
474 ) -> Option<RevisionDataParams> {
474 ) -> Option<RevisionDataParams> {
475 let rev = self.check_revision(rev)?;
475 let rev = self.check_revision(rev)?;
476 self.get_entry(rev).map(|e| RevisionDataParams {
476 self.get_entry(rev).map(|e| RevisionDataParams {
477 flags: e.flags(),
477 flags: e.flags(),
478 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
478 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
479 e.flags() as u64
479 e.flags() as u64
480 } else {
480 } else {
481 e.raw_offset()
481 e.raw_offset()
482 },
482 },
483 data_compressed_length: e
483 data_compressed_length: e
484 .compressed_len()
484 .compressed_len()
485 .try_into()
485 .try_into()
486 .unwrap_or_else(|_| {
486 .unwrap_or_else(|_| {
487 // Python's `unionrepo` sets the compressed length to be
487 // Python's `unionrepo` sets the compressed length to be
488 // `-1` (or `u32::MAX` if transmuted to `u32`) because it
488 // `-1` (or `u32::MAX` if transmuted to `u32`) because it
489 // cannot know the correct compressed length of a given
489 // cannot know the correct compressed length of a given
490 // revision. I'm not sure if this is true, but having this
490 // revision. I'm not sure if this is true, but having this
491 // edge case won't hurt other use cases, let's handle it.
491 // edge case won't hurt other use cases, let's handle it.
492 assert_eq!(e.compressed_len(), u32::MAX);
492 assert_eq!(e.compressed_len(), u32::MAX);
493 NULL_REVISION.0
493 NULL_REVISION.0
494 }),
494 }),
495 data_uncompressed_length: e.uncompressed_len(),
495 data_uncompressed_length: e.uncompressed_len(),
496 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
496 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
497 link_rev: e.link_revision().0,
497 link_rev: e.link_revision().0,
498 parent_rev_1: e.p1().0,
498 parent_rev_1: e.p1().0,
499 parent_rev_2: e.p2().0,
499 parent_rev_2: e.p2().0,
500 node_id: e.hash().as_bytes().try_into().unwrap(),
500 node_id: e.hash().as_bytes().try_into().unwrap(),
501 ..Default::default()
501 ..Default::default()
502 })
502 })
503 }
503 }
504
504
505 fn get_entry_inline(
505 fn get_entry_inline(
506 &self,
506 &self,
507 rev: Revision,
507 rev: Revision,
508 offsets: &[usize],
508 offsets: &[usize],
509 ) -> IndexEntry {
509 ) -> IndexEntry {
510 let start = offsets[rev.0 as usize];
510 let start = offsets[rev.0 as usize];
511 let end = start + INDEX_ENTRY_SIZE;
511 let end = start + INDEX_ENTRY_SIZE;
512 let bytes = &self.bytes[start..end];
512 let bytes = &self.bytes[start..end];
513
513
514 // See IndexEntry for an explanation of this override.
514 // See IndexEntry for an explanation of this override.
515 let offset_override = Some(end);
515 let offset_override = Some(end);
516
516
517 IndexEntry {
517 IndexEntry {
518 bytes,
518 bytes,
519 offset_override,
519 offset_override,
520 }
520 }
521 }
521 }
522
522
523 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
523 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
524 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
524 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
525 let end = start + INDEX_ENTRY_SIZE;
525 let end = start + INDEX_ENTRY_SIZE;
526 let bytes = &self.bytes[start..end];
526 let bytes = &self.bytes[start..end];
527
527
528 // Override the offset of the first revision as its bytes are used
528 // Override the offset of the first revision as its bytes are used
529 // for the index's metadata (saving space because it is always 0)
529 // for the index's metadata (saving space because it is always 0)
530 let offset_override = if rev == Revision(0) { Some(0) } else { None };
530 let offset_override = if rev == Revision(0) { Some(0) } else { None };
531
531
532 IndexEntry {
532 IndexEntry {
533 bytes,
533 bytes,
534 offset_override,
534 offset_override,
535 }
535 }
536 }
536 }
537
537
538 fn null_entry(&self) -> IndexEntry {
538 fn null_entry(&self) -> IndexEntry {
539 IndexEntry {
539 IndexEntry {
540 bytes: &[0; INDEX_ENTRY_SIZE],
540 bytes: &[0; INDEX_ENTRY_SIZE],
541 offset_override: Some(0),
541 offset_override: Some(0),
542 }
542 }
543 }
543 }
544
544
545 /// Return the head revisions of this index
545 /// Return the head revisions of this index
546 pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
546 pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
547 self.head_revs_filtered(&HashSet::new())
547 self.head_revs_filtered(&HashSet::new(), false)
548 .map(|h| h.unwrap())
549 }
550
551 /// Python-specific shortcut to save on PyList creation
552 pub fn head_revs_shortcut(
553 &self,
554 ) -> Result<Option<Vec<Revision>>, GraphError> {
555 self.head_revs_filtered(&HashSet::new(), true)
548 }
556 }
549
557
550 /// Return the head revisions of this index
558 /// Return the head revisions of this index
551 pub fn head_revs_filtered(
559 pub fn head_revs_filtered(
552 &self,
560 &self,
553 filtered_revs: &HashSet<Revision>,
561 filtered_revs: &HashSet<Revision>,
554 ) -> Result<Vec<Revision>, GraphError> {
562 py_shortcut: bool,
563 ) -> Result<Option<Vec<Revision>>, GraphError> {
555 {
564 {
556 let guard = self
565 let guard = self
557 .head_revs
566 .head_revs
558 .read()
567 .read()
559 .expect("RwLock on Index.head_revs should not be poisoned");
568 .expect("RwLock on Index.head_revs should not be poisoned");
560 let self_head_revs = &guard.0;
569 let self_head_revs = &guard.0;
561 let self_filtered_revs = &guard.1;
570 let self_filtered_revs = &guard.1;
562 if !self_head_revs.is_empty()
571 if !self_head_revs.is_empty()
563 && filtered_revs == self_filtered_revs
572 && filtered_revs == self_filtered_revs
564 {
573 {
565 return Ok(self_head_revs.to_owned());
574 if py_shortcut {
575 // Don't copy the revs since we've already cached them
576 // on the Python side.
577 return Ok(None);
578 } else {
579 return Ok(Some(self_head_revs.to_owned()));
580 }
566 }
581 }
567 }
582 }
568
583
569 let as_vec = if self.is_empty() {
584 let as_vec = if self.is_empty() {
570 vec![NULL_REVISION]
585 vec![NULL_REVISION]
571 } else {
586 } else {
572 let mut not_heads = bitvec![0; self.len()];
587 let mut not_heads = bitvec![0; self.len()];
573 dagops::retain_heads_fast(
588 dagops::retain_heads_fast(
574 self,
589 self,
575 not_heads.as_mut_bitslice(),
590 not_heads.as_mut_bitslice(),
576 filtered_revs,
591 filtered_revs,
577 )?;
592 )?;
578 not_heads
593 not_heads
579 .into_iter()
594 .into_iter()
580 .enumerate()
595 .enumerate()
581 .filter_map(|(idx, is_not_head)| {
596 .filter_map(|(idx, is_not_head)| {
582 if is_not_head {
597 if is_not_head {
583 None
598 None
584 } else {
599 } else {
585 Some(Revision(idx as BaseRevision))
600 Some(Revision(idx as BaseRevision))
586 }
601 }
587 })
602 })
588 .collect()
603 .collect()
589 };
604 };
590 *self
605 *self
591 .head_revs
606 .head_revs
592 .write()
607 .write()
593 .expect("RwLock on Index.head_revs should not be poisoned") =
608 .expect("RwLock on Index.head_revs should not be poisoned") =
594 (as_vec.to_owned(), filtered_revs.to_owned());
609 (as_vec.to_owned(), filtered_revs.to_owned());
595 Ok(as_vec)
610 Ok(Some(as_vec))
596 }
611 }
597
612
598 /// Obtain the delta chain for a revision.
613 /// Obtain the delta chain for a revision.
599 ///
614 ///
600 /// `stop_rev` specifies a revision to stop at. If not specified, we
615 /// `stop_rev` specifies a revision to stop at. If not specified, we
601 /// stop at the base of the chain.
616 /// stop at the base of the chain.
602 ///
617 ///
603 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
618 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
604 /// revs in ascending order and `stopped` is a bool indicating whether
619 /// revs in ascending order and `stopped` is a bool indicating whether
605 /// `stoprev` was hit.
620 /// `stoprev` was hit.
606 pub fn delta_chain(
621 pub fn delta_chain(
607 &self,
622 &self,
608 rev: Revision,
623 rev: Revision,
609 stop_rev: Option<Revision>,
624 stop_rev: Option<Revision>,
610 using_general_delta: Option<bool>,
625 using_general_delta: Option<bool>,
611 ) -> Result<(Vec<Revision>, bool), HgError> {
626 ) -> Result<(Vec<Revision>, bool), HgError> {
612 let mut current_rev = rev;
627 let mut current_rev = rev;
613 let mut entry = self.get_entry(rev).unwrap();
628 let mut entry = self.get_entry(rev).unwrap();
614 let mut chain = vec![];
629 let mut chain = vec![];
615 let using_general_delta =
630 let using_general_delta =
616 using_general_delta.unwrap_or_else(|| self.uses_generaldelta());
631 using_general_delta.unwrap_or_else(|| self.uses_generaldelta());
617 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
632 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
618 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
633 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
619 {
634 {
620 chain.push(current_rev);
635 chain.push(current_rev);
621 let new_rev = if using_general_delta {
636 let new_rev = if using_general_delta {
622 entry.base_revision_or_base_of_delta_chain()
637 entry.base_revision_or_base_of_delta_chain()
623 } else {
638 } else {
624 UncheckedRevision(current_rev.0 - 1)
639 UncheckedRevision(current_rev.0 - 1)
625 };
640 };
626 current_rev = self.check_revision(new_rev).ok_or_else(|| {
641 current_rev = self.check_revision(new_rev).ok_or_else(|| {
627 HgError::corrupted(format!("Revision {new_rev} out of range"))
642 HgError::corrupted(format!("Revision {new_rev} out of range"))
628 })?;
643 })?;
629 if current_rev.0 == NULL_REVISION.0 {
644 if current_rev.0 == NULL_REVISION.0 {
630 break;
645 break;
631 }
646 }
632 entry = self.get_entry(current_rev).unwrap()
647 entry = self.get_entry(current_rev).unwrap()
633 }
648 }
634
649
635 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
650 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
636 true
651 true
637 } else {
652 } else {
638 chain.push(current_rev);
653 chain.push(current_rev);
639 false
654 false
640 };
655 };
641 chain.reverse();
656 chain.reverse();
642 Ok((chain, stopped))
657 Ok((chain, stopped))
643 }
658 }
644
659
645 pub fn find_snapshots(
660 pub fn find_snapshots(
646 &self,
661 &self,
647 start_rev: UncheckedRevision,
662 start_rev: UncheckedRevision,
648 end_rev: UncheckedRevision,
663 end_rev: UncheckedRevision,
649 cache: &mut impl SnapshotsCache,
664 cache: &mut impl SnapshotsCache,
650 ) -> Result<(), RevlogError> {
665 ) -> Result<(), RevlogError> {
651 let mut start_rev = start_rev.0;
666 let mut start_rev = start_rev.0;
652 let mut end_rev = end_rev.0;
667 let mut end_rev = end_rev.0;
653 end_rev += 1;
668 end_rev += 1;
654 let len = self.len().try_into().unwrap();
669 let len = self.len().try_into().unwrap();
655 if end_rev > len {
670 if end_rev > len {
656 end_rev = len;
671 end_rev = len;
657 }
672 }
658 if start_rev < 0 {
673 if start_rev < 0 {
659 start_rev = 0;
674 start_rev = 0;
660 }
675 }
661 for rev in start_rev..end_rev {
676 for rev in start_rev..end_rev {
662 if !self.is_snapshot_unchecked(Revision(rev))? {
677 if !self.is_snapshot_unchecked(Revision(rev))? {
663 continue;
678 continue;
664 }
679 }
665 let mut base = self
680 let mut base = self
666 .get_entry(Revision(rev))
681 .get_entry(Revision(rev))
667 .unwrap()
682 .unwrap()
668 .base_revision_or_base_of_delta_chain();
683 .base_revision_or_base_of_delta_chain();
669 if base.0 == rev {
684 if base.0 == rev {
670 base = NULL_REVISION.into();
685 base = NULL_REVISION.into();
671 }
686 }
672 cache.insert_for(base.0, rev)?;
687 cache.insert_for(base.0, rev)?;
673 }
688 }
674 Ok(())
689 Ok(())
675 }
690 }
676
691
677 fn clear_head_revs(&self) {
692 fn clear_head_revs(&self) {
678 self.head_revs
693 self.head_revs
679 .write()
694 .write()
680 .expect("RwLock on Index.head_revs should not be poisoined")
695 .expect("RwLock on Index.head_revs should not be poisoined")
681 .0
696 .0
682 .clear()
697 .clear()
683 }
698 }
684
699
685 /// TODO move this to the trait probably, along with other things
700 /// TODO move this to the trait probably, along with other things
686 pub fn append(
701 pub fn append(
687 &mut self,
702 &mut self,
688 revision_data: RevisionDataParams,
703 revision_data: RevisionDataParams,
689 ) -> Result<(), RevlogError> {
704 ) -> Result<(), RevlogError> {
690 revision_data.validate()?;
705 revision_data.validate()?;
691 let new_offset = self.bytes.len();
706 let new_offset = self.bytes.len();
692 if let Some(offsets) = &mut *self.get_offsets_mut() {
707 if let Some(offsets) = &mut *self.get_offsets_mut() {
693 offsets.push(new_offset)
708 offsets.push(new_offset)
694 }
709 }
695 self.bytes.added.extend(revision_data.into_v1().as_bytes());
710 self.bytes.added.extend(revision_data.into_v1().as_bytes());
696 self.clear_head_revs();
711 self.clear_head_revs();
697 Ok(())
712 Ok(())
698 }
713 }
699
714
700 pub fn pack_header(&self, header: i32) -> [u8; 4] {
715 pub fn pack_header(&self, header: i32) -> [u8; 4] {
701 header.to_be_bytes()
716 header.to_be_bytes()
702 }
717 }
703
718
704 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
719 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
705 let offsets = self.get_offsets().clone();
720 let offsets = self.get_offsets().clone();
706 self.bytes.remove(rev, offsets.as_deref())?;
721 self.bytes.remove(rev, offsets.as_deref())?;
707 if let Some(offsets) = &mut *self.get_offsets_mut() {
722 if let Some(offsets) = &mut *self.get_offsets_mut() {
708 offsets.truncate(rev.0 as usize)
723 offsets.truncate(rev.0 as usize)
709 }
724 }
710 self.clear_head_revs();
725 self.clear_head_revs();
711 Ok(())
726 Ok(())
712 }
727 }
713
728
714 pub fn clear_caches(&self) {
729 pub fn clear_caches(&self) {
715 // We need to get the 'inline' value from Python at init and use this
730 // We need to get the 'inline' value from Python at init and use this
716 // instead of offsets to determine whether we're inline since we might
731 // instead of offsets to determine whether we're inline since we might
717 // clear caches. This implies re-populating the offsets on-demand.
732 // clear caches. This implies re-populating the offsets on-demand.
718 *self
733 *self
719 .offsets
734 .offsets
720 .write()
735 .write()
721 .expect("RwLock on Index.offsets should not be poisoed") = None;
736 .expect("RwLock on Index.offsets should not be poisoed") = None;
722 self.clear_head_revs();
737 self.clear_head_revs();
723 }
738 }
724
739
725 /// Unchecked version of `is_snapshot`.
740 /// Unchecked version of `is_snapshot`.
726 /// Assumes the caller checked that `rev` is within a valid revision range.
741 /// Assumes the caller checked that `rev` is within a valid revision range.
727 pub fn is_snapshot_unchecked(
742 pub fn is_snapshot_unchecked(
728 &self,
743 &self,
729 mut rev: Revision,
744 mut rev: Revision,
730 ) -> Result<bool, RevlogError> {
745 ) -> Result<bool, RevlogError> {
731 while rev.0 >= 0 {
746 while rev.0 >= 0 {
732 let entry = self.get_entry(rev).unwrap();
747 let entry = self.get_entry(rev).unwrap();
733 let mut base = entry.base_revision_or_base_of_delta_chain().0;
748 let mut base = entry.base_revision_or_base_of_delta_chain().0;
734 if base == rev.0 {
749 if base == rev.0 {
735 base = NULL_REVISION.0;
750 base = NULL_REVISION.0;
736 }
751 }
737 if base == NULL_REVISION.0 {
752 if base == NULL_REVISION.0 {
738 return Ok(true);
753 return Ok(true);
739 }
754 }
740 let [mut p1, mut p2] = self
755 let [mut p1, mut p2] = self
741 .parents(rev)
756 .parents(rev)
742 .map_err(|_| RevlogError::InvalidRevision)?;
757 .map_err(|_| RevlogError::InvalidRevision)?;
743 while let Some(p1_entry) = self.get_entry(p1) {
758 while let Some(p1_entry) = self.get_entry(p1) {
744 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
759 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
745 break;
760 break;
746 }
761 }
747 let parent_base =
762 let parent_base =
748 p1_entry.base_revision_or_base_of_delta_chain();
763 p1_entry.base_revision_or_base_of_delta_chain();
749 if parent_base.0 == p1.0 {
764 if parent_base.0 == p1.0 {
750 break;
765 break;
751 }
766 }
752 p1 = self
767 p1 = self
753 .check_revision(parent_base)
768 .check_revision(parent_base)
754 .ok_or(RevlogError::InvalidRevision)?;
769 .ok_or(RevlogError::InvalidRevision)?;
755 }
770 }
756 while let Some(p2_entry) = self.get_entry(p2) {
771 while let Some(p2_entry) = self.get_entry(p2) {
757 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
772 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
758 break;
773 break;
759 }
774 }
760 let parent_base =
775 let parent_base =
761 p2_entry.base_revision_or_base_of_delta_chain();
776 p2_entry.base_revision_or_base_of_delta_chain();
762 if parent_base.0 == p2.0 {
777 if parent_base.0 == p2.0 {
763 break;
778 break;
764 }
779 }
765 p2 = self
780 p2 = self
766 .check_revision(parent_base)
781 .check_revision(parent_base)
767 .ok_or(RevlogError::InvalidRevision)?;
782 .ok_or(RevlogError::InvalidRevision)?;
768 }
783 }
769 if base == p1.0 || base == p2.0 {
784 if base == p1.0 || base == p2.0 {
770 return Ok(false);
785 return Ok(false);
771 }
786 }
772 rev = self
787 rev = self
773 .check_revision(base.into())
788 .check_revision(base.into())
774 .ok_or(RevlogError::InvalidRevision)?;
789 .ok_or(RevlogError::InvalidRevision)?;
775 }
790 }
776 Ok(rev == NULL_REVISION)
791 Ok(rev == NULL_REVISION)
777 }
792 }
778
793
779 /// Return whether the given revision is a snapshot. Returns an error if
794 /// Return whether the given revision is a snapshot. Returns an error if
780 /// `rev` is not within a valid revision range.
795 /// `rev` is not within a valid revision range.
781 pub fn is_snapshot(
796 pub fn is_snapshot(
782 &self,
797 &self,
783 rev: UncheckedRevision,
798 rev: UncheckedRevision,
784 ) -> Result<bool, RevlogError> {
799 ) -> Result<bool, RevlogError> {
785 let rev = self
800 let rev = self
786 .check_revision(rev)
801 .check_revision(rev)
787 .ok_or_else(|| RevlogError::corrupted("test"))?;
802 .ok_or_else(|| RevlogError::corrupted("test"))?;
788 self.is_snapshot_unchecked(rev)
803 self.is_snapshot_unchecked(rev)
789 }
804 }
790
805
791 /// Slice revs to reduce the amount of unrelated data to be read from disk.
806 /// Slice revs to reduce the amount of unrelated data to be read from disk.
792 ///
807 ///
793 /// The index is sliced into groups that should be read in one time.
808 /// The index is sliced into groups that should be read in one time.
794 ///
809 ///
795 /// The initial chunk is sliced until the overall density
810 /// The initial chunk is sliced until the overall density
796 /// (payload/chunks-span ratio) is above `target_density`.
811 /// (payload/chunks-span ratio) is above `target_density`.
797 /// No gap smaller than `min_gap_size` is skipped.
812 /// No gap smaller than `min_gap_size` is skipped.
798 pub fn slice_chunk_to_density(
813 pub fn slice_chunk_to_density(
799 &self,
814 &self,
800 revs: &[Revision],
815 revs: &[Revision],
801 target_density: f64,
816 target_density: f64,
802 min_gap_size: usize,
817 min_gap_size: usize,
803 ) -> Vec<Vec<Revision>> {
818 ) -> Vec<Vec<Revision>> {
804 if revs.is_empty() {
819 if revs.is_empty() {
805 return vec![];
820 return vec![];
806 }
821 }
807 if revs.len() == 1 {
822 if revs.len() == 1 {
808 return vec![revs.to_owned()];
823 return vec![revs.to_owned()];
809 }
824 }
810 let delta_chain_span = self.segment_span(revs);
825 let delta_chain_span = self.segment_span(revs);
811 if delta_chain_span < min_gap_size {
826 if delta_chain_span < min_gap_size {
812 return vec![revs.to_owned()];
827 return vec![revs.to_owned()];
813 }
828 }
814 let entries: Vec<_> = revs
829 let entries: Vec<_> = revs
815 .iter()
830 .iter()
816 .map(|r| {
831 .map(|r| {
817 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
832 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
818 })
833 })
819 .collect();
834 .collect();
820
835
821 let mut read_data = delta_chain_span;
836 let mut read_data = delta_chain_span;
822 let chain_payload: u32 =
837 let chain_payload: u32 =
823 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
838 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
824 let mut density = if delta_chain_span > 0 {
839 let mut density = if delta_chain_span > 0 {
825 chain_payload as f64 / delta_chain_span as f64
840 chain_payload as f64 / delta_chain_span as f64
826 } else {
841 } else {
827 1.0
842 1.0
828 };
843 };
829
844
830 if density >= target_density {
845 if density >= target_density {
831 return vec![revs.to_owned()];
846 return vec![revs.to_owned()];
832 }
847 }
833
848
834 // Store the gaps in a heap to have them sorted by decreasing size
849 // Store the gaps in a heap to have them sorted by decreasing size
835 let mut gaps = Vec::new();
850 let mut gaps = Vec::new();
836 let mut previous_end = None;
851 let mut previous_end = None;
837
852
838 for (i, (_rev, entry)) in entries.iter().enumerate() {
853 for (i, (_rev, entry)) in entries.iter().enumerate() {
839 let start = entry.c_start() as usize;
854 let start = entry.c_start() as usize;
840 let length = entry.compressed_len();
855 let length = entry.compressed_len();
841
856
842 // Skip empty revisions to form larger holes
857 // Skip empty revisions to form larger holes
843 if length == 0 {
858 if length == 0 {
844 continue;
859 continue;
845 }
860 }
846
861
847 if let Some(end) = previous_end {
862 if let Some(end) = previous_end {
848 let gap_size = start - end;
863 let gap_size = start - end;
849 // Only consider holes that are large enough
864 // Only consider holes that are large enough
850 if gap_size > min_gap_size {
865 if gap_size > min_gap_size {
851 gaps.push((gap_size, i));
866 gaps.push((gap_size, i));
852 }
867 }
853 }
868 }
854 previous_end = Some(start + length as usize);
869 previous_end = Some(start + length as usize);
855 }
870 }
856 if gaps.is_empty() {
871 if gaps.is_empty() {
857 return vec![revs.to_owned()];
872 return vec![revs.to_owned()];
858 }
873 }
859 // sort the gaps to pop them from largest to small
874 // sort the gaps to pop them from largest to small
860 gaps.sort_unstable();
875 gaps.sort_unstable();
861
876
862 // Collect the indices of the largest holes until
877 // Collect the indices of the largest holes until
863 // the density is acceptable
878 // the density is acceptable
864 let mut selected = vec![];
879 let mut selected = vec![];
865 while let Some((gap_size, gap_id)) = gaps.pop() {
880 while let Some((gap_size, gap_id)) = gaps.pop() {
866 if density >= target_density {
881 if density >= target_density {
867 break;
882 break;
868 }
883 }
869 selected.push(gap_id);
884 selected.push(gap_id);
870
885
871 // The gap sizes are stored as negatives to be sorted decreasingly
886 // The gap sizes are stored as negatives to be sorted decreasingly
872 // by the heap
887 // by the heap
873 read_data -= gap_size;
888 read_data -= gap_size;
874 density = if read_data > 0 {
889 density = if read_data > 0 {
875 chain_payload as f64 / read_data as f64
890 chain_payload as f64 / read_data as f64
876 } else {
891 } else {
877 1.0
892 1.0
878 };
893 };
879 if density >= target_density {
894 if density >= target_density {
880 break;
895 break;
881 }
896 }
882 }
897 }
883 selected.sort_unstable();
898 selected.sort_unstable();
884 selected.push(revs.len());
899 selected.push(revs.len());
885
900
886 // Cut the revs at collected indices
901 // Cut the revs at collected indices
887 let mut previous_idx = 0;
902 let mut previous_idx = 0;
888 let mut chunks = vec![];
903 let mut chunks = vec![];
889 for idx in selected {
904 for idx in selected {
890 let chunk = self.trim_chunk(&entries, previous_idx, idx);
905 let chunk = self.trim_chunk(&entries, previous_idx, idx);
891 if !chunk.is_empty() {
906 if !chunk.is_empty() {
892 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
907 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
893 }
908 }
894 previous_idx = idx;
909 previous_idx = idx;
895 }
910 }
896 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
911 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
897 if !chunk.is_empty() {
912 if !chunk.is_empty() {
898 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
913 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
899 }
914 }
900
915
901 chunks
916 chunks
902 }
917 }
903
918
904 /// Get the byte span of a segment of sorted revisions.
919 /// Get the byte span of a segment of sorted revisions.
905 ///
920 ///
906 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
921 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
907 /// the `revs` segment.
922 /// the `revs` segment.
908 ///
923 ///
909 /// panics:
924 /// panics:
910 /// - if `revs` is empty or only made of `NULL_REVISION`
925 /// - if `revs` is empty or only made of `NULL_REVISION`
911 /// - if cannot retrieve entry for the last or first not null element of
926 /// - if cannot retrieve entry for the last or first not null element of
912 /// `revs`.
927 /// `revs`.
913 fn segment_span(&self, revs: &[Revision]) -> usize {
928 fn segment_span(&self, revs: &[Revision]) -> usize {
914 if revs.is_empty() {
929 if revs.is_empty() {
915 return 0;
930 return 0;
916 }
931 }
917 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
932 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
918 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
933 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
919 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
934 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
920 let start = if (*first_rev).0 == 0 {
935 let start = if (*first_rev).0 == 0 {
921 0
936 0
922 } else {
937 } else {
923 self.get_entry(*first_rev).unwrap().c_start()
938 self.get_entry(*first_rev).unwrap().c_start()
924 };
939 };
925 (end - start) as usize
940 (end - start) as usize
926 }
941 }
927
942
928 /// Returns `&revs[startidx..endidx]` without empty trailing revs
943 /// Returns `&revs[startidx..endidx]` without empty trailing revs
929 fn trim_chunk<'a>(
944 fn trim_chunk<'a>(
930 &'a self,
945 &'a self,
931 revs: &'a [(Revision, IndexEntry)],
946 revs: &'a [(Revision, IndexEntry)],
932 start: usize,
947 start: usize,
933 mut end: usize,
948 mut end: usize,
934 ) -> &'a [(Revision, IndexEntry)] {
949 ) -> &'a [(Revision, IndexEntry)] {
935 // Trim empty revs at the end, except the very first rev of a chain
950 // Trim empty revs at the end, except the very first rev of a chain
936 let last_rev = revs[end - 1].0;
951 let last_rev = revs[end - 1].0;
937 if last_rev.0 < self.len() as BaseRevision {
952 if last_rev.0 < self.len() as BaseRevision {
938 while end > 1
953 while end > 1
939 && end > start
954 && end > start
940 && revs[end - 1].1.compressed_len() == 0
955 && revs[end - 1].1.compressed_len() == 0
941 {
956 {
942 end -= 1
957 end -= 1
943 }
958 }
944 }
959 }
945 &revs[start..end]
960 &revs[start..end]
946 }
961 }
947
962
948 /// Computes the set of revisions for each non-public phase from `roots`,
963 /// Computes the set of revisions for each non-public phase from `roots`,
949 /// which are the last known roots for each non-public phase.
964 /// which are the last known roots for each non-public phase.
950 pub fn compute_phases_map_sets(
965 pub fn compute_phases_map_sets(
951 &self,
966 &self,
952 roots: HashMap<Phase, Vec<Revision>>,
967 roots: HashMap<Phase, Vec<Revision>>,
953 ) -> Result<(usize, RootsPerPhase), GraphError> {
968 ) -> Result<(usize, RootsPerPhase), GraphError> {
954 let mut phases = HashMap::new();
969 let mut phases = HashMap::new();
955 let mut min_phase_rev = NULL_REVISION;
970 let mut min_phase_rev = NULL_REVISION;
956
971
957 for phase in Phase::non_public_phases() {
972 for phase in Phase::non_public_phases() {
958 if let Some(phase_roots) = roots.get(phase) {
973 if let Some(phase_roots) = roots.get(phase) {
959 let min_rev =
974 let min_rev =
960 self.add_roots_get_min(phase_roots, &mut phases, *phase);
975 self.add_roots_get_min(phase_roots, &mut phases, *phase);
961 if min_rev != NULL_REVISION
976 if min_rev != NULL_REVISION
962 && (min_phase_rev == NULL_REVISION
977 && (min_phase_rev == NULL_REVISION
963 || min_rev < min_phase_rev)
978 || min_rev < min_phase_rev)
964 {
979 {
965 min_phase_rev = min_rev;
980 min_phase_rev = min_rev;
966 }
981 }
967 } else {
982 } else {
968 continue;
983 continue;
969 };
984 };
970 }
985 }
971 let mut phase_sets: RootsPerPhase = Default::default();
986 let mut phase_sets: RootsPerPhase = Default::default();
972
987
973 if min_phase_rev == NULL_REVISION {
988 if min_phase_rev == NULL_REVISION {
974 min_phase_rev = Revision(self.len() as BaseRevision);
989 min_phase_rev = Revision(self.len() as BaseRevision);
975 }
990 }
976
991
977 for rev in min_phase_rev.0..self.len() as BaseRevision {
992 for rev in min_phase_rev.0..self.len() as BaseRevision {
978 let rev = Revision(rev);
993 let rev = Revision(rev);
979 let [p1, p2] = self.parents(rev)?;
994 let [p1, p2] = self.parents(rev)?;
980
995
981 const DEFAULT_PHASE: &Phase = &Phase::Public;
996 const DEFAULT_PHASE: &Phase = &Phase::Public;
982 if p1.0 >= 0
997 if p1.0 >= 0
983 && phases.get(&p1).unwrap_or(DEFAULT_PHASE)
998 && phases.get(&p1).unwrap_or(DEFAULT_PHASE)
984 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
999 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
985 {
1000 {
986 phases.insert(rev, phases[&p1]);
1001 phases.insert(rev, phases[&p1]);
987 }
1002 }
988 if p2.0 >= 0
1003 if p2.0 >= 0
989 && phases.get(&p2).unwrap_or(DEFAULT_PHASE)
1004 && phases.get(&p2).unwrap_or(DEFAULT_PHASE)
990 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
1005 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
991 {
1006 {
992 phases.insert(rev, phases[&p2]);
1007 phases.insert(rev, phases[&p2]);
993 }
1008 }
994 let set = match phases.get(&rev).unwrap_or(DEFAULT_PHASE) {
1009 let set = match phases.get(&rev).unwrap_or(DEFAULT_PHASE) {
995 Phase::Public => continue,
1010 Phase::Public => continue,
996 phase => &mut phase_sets[*phase as usize - 1],
1011 phase => &mut phase_sets[*phase as usize - 1],
997 };
1012 };
998 set.insert(rev);
1013 set.insert(rev);
999 }
1014 }
1000
1015
1001 Ok((self.len(), phase_sets))
1016 Ok((self.len(), phase_sets))
1002 }
1017 }
1003
1018
1004 fn add_roots_get_min(
1019 fn add_roots_get_min(
1005 &self,
1020 &self,
1006 phase_roots: &[Revision],
1021 phase_roots: &[Revision],
1007 phases: &mut HashMap<Revision, Phase>,
1022 phases: &mut HashMap<Revision, Phase>,
1008 phase: Phase,
1023 phase: Phase,
1009 ) -> Revision {
1024 ) -> Revision {
1010 let mut min_rev = NULL_REVISION;
1025 let mut min_rev = NULL_REVISION;
1011
1026
1012 for root in phase_roots {
1027 for root in phase_roots {
1013 phases.insert(*root, phase);
1028 phases.insert(*root, phase);
1014 if min_rev == NULL_REVISION || min_rev > *root {
1029 if min_rev == NULL_REVISION || min_rev > *root {
1015 min_rev = *root;
1030 min_rev = *root;
1016 }
1031 }
1017 }
1032 }
1018 min_rev
1033 min_rev
1019 }
1034 }
1020
1035
1021 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
1036 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
1022 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
1037 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
1023 ///
1038 ///
1024 /// `min_root` and `roots` are unchecked since they are just used as
1039 /// `min_root` and `roots` are unchecked since they are just used as
1025 /// a bound or for comparison and don't need to represent a valid revision.
1040 /// a bound or for comparison and don't need to represent a valid revision.
1026 /// In practice, the only invalid revision passed is the working directory
1041 /// In practice, the only invalid revision passed is the working directory
1027 /// revision ([`i32::MAX`]).
1042 /// revision ([`i32::MAX`]).
1028 pub fn reachable_roots(
1043 pub fn reachable_roots(
1029 &self,
1044 &self,
1030 min_root: UncheckedRevision,
1045 min_root: UncheckedRevision,
1031 mut heads: Vec<Revision>,
1046 mut heads: Vec<Revision>,
1032 roots: HashSet<UncheckedRevision>,
1047 roots: HashSet<UncheckedRevision>,
1033 include_path: bool,
1048 include_path: bool,
1034 ) -> Result<HashSet<Revision>, GraphError> {
1049 ) -> Result<HashSet<Revision>, GraphError> {
1035 if roots.is_empty() {
1050 if roots.is_empty() {
1036 return Ok(HashSet::new());
1051 return Ok(HashSet::new());
1037 }
1052 }
1038 let mut reachable = HashSet::new();
1053 let mut reachable = HashSet::new();
1039 let mut seen = HashMap::new();
1054 let mut seen = HashMap::new();
1040
1055
1041 while let Some(rev) = heads.pop() {
1056 while let Some(rev) = heads.pop() {
1042 if roots.contains(&rev.into()) {
1057 if roots.contains(&rev.into()) {
1043 reachable.insert(rev);
1058 reachable.insert(rev);
1044 if !include_path {
1059 if !include_path {
1045 continue;
1060 continue;
1046 }
1061 }
1047 }
1062 }
1048 let parents = self.parents(rev)?;
1063 let parents = self.parents(rev)?;
1049 seen.insert(rev, parents);
1064 seen.insert(rev, parents);
1050 for parent in parents {
1065 for parent in parents {
1051 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1066 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1052 heads.push(parent);
1067 heads.push(parent);
1053 }
1068 }
1054 }
1069 }
1055 }
1070 }
1056 if !include_path {
1071 if !include_path {
1057 return Ok(reachable);
1072 return Ok(reachable);
1058 }
1073 }
1059 let mut revs: Vec<_> = seen.keys().collect();
1074 let mut revs: Vec<_> = seen.keys().collect();
1060 revs.sort_unstable();
1075 revs.sort_unstable();
1061 for rev in revs {
1076 for rev in revs {
1062 for parent in seen[rev] {
1077 for parent in seen[rev] {
1063 if reachable.contains(&parent) {
1078 if reachable.contains(&parent) {
1064 reachable.insert(*rev);
1079 reachable.insert(*rev);
1065 }
1080 }
1066 }
1081 }
1067 }
1082 }
1068 Ok(reachable)
1083 Ok(reachable)
1069 }
1084 }
1070
1085
1071 /// Given a (possibly overlapping) set of revs, return all the
1086 /// Given a (possibly overlapping) set of revs, return all the
1072 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1087 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1073 pub fn common_ancestor_heads(
1088 pub fn common_ancestor_heads(
1074 &self,
1089 &self,
1075 revisions: &[Revision],
1090 revisions: &[Revision],
1076 ) -> Result<Vec<Revision>, GraphError> {
1091 ) -> Result<Vec<Revision>, GraphError> {
1077 // given that revisions is expected to be small, we find this shortcut
1092 // given that revisions is expected to be small, we find this shortcut
1078 // potentially acceptable, especially given that `hg-cpython` could
1093 // potentially acceptable, especially given that `hg-cpython` could
1079 // very much bypass this, constructing a vector of unique values from
1094 // very much bypass this, constructing a vector of unique values from
1080 // the onset.
1095 // the onset.
1081 let as_set: HashSet<Revision> = revisions.iter().copied().collect();
1096 let as_set: HashSet<Revision> = revisions.iter().copied().collect();
1082 // Besides deduplicating, the C version also implements the shortcut
1097 // Besides deduplicating, the C version also implements the shortcut
1083 // for `NULL_REVISION`:
1098 // for `NULL_REVISION`:
1084 if as_set.contains(&NULL_REVISION) {
1099 if as_set.contains(&NULL_REVISION) {
1085 return Ok(vec![]);
1100 return Ok(vec![]);
1086 }
1101 }
1087
1102
1088 let revisions: Vec<Revision> = as_set.into_iter().collect();
1103 let revisions: Vec<Revision> = as_set.into_iter().collect();
1089
1104
1090 if revisions.len() < 8 {
1105 if revisions.len() < 8 {
1091 self.find_gca_candidates::<u8>(&revisions)
1106 self.find_gca_candidates::<u8>(&revisions)
1092 } else if revisions.len() < 64 {
1107 } else if revisions.len() < 64 {
1093 self.find_gca_candidates::<u64>(&revisions)
1108 self.find_gca_candidates::<u64>(&revisions)
1094 } else {
1109 } else {
1095 self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
1110 self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
1096 }
1111 }
1097 }
1112 }
1098
1113
1099 pub fn ancestors(
1114 pub fn ancestors(
1100 &self,
1115 &self,
1101 revisions: &[Revision],
1116 revisions: &[Revision],
1102 ) -> Result<Vec<Revision>, GraphError> {
1117 ) -> Result<Vec<Revision>, GraphError> {
1103 self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
1118 self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
1104 }
1119 }
1105
1120
1106 /// Given a disjoint set of revs, return all candidates for the
1121 /// Given a disjoint set of revs, return all candidates for the
1107 /// greatest common ancestor. In revset notation, this is the set
1122 /// greatest common ancestor. In revset notation, this is the set
1108 /// `heads(::a and ::b and ...)`
1123 /// `heads(::a and ::b and ...)`
1109 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1124 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1110 &self,
1125 &self,
1111 revs: &[Revision],
1126 revs: &[Revision],
1112 ) -> Result<Vec<Revision>, GraphError> {
1127 ) -> Result<Vec<Revision>, GraphError> {
1113 if revs.is_empty() {
1128 if revs.is_empty() {
1114 return Ok(vec![]);
1129 return Ok(vec![]);
1115 }
1130 }
1116 let revcount = revs.len();
1131 let revcount = revs.len();
1117 let mut candidates = vec![];
1132 let mut candidates = vec![];
1118 let max_rev = revs.iter().max().unwrap();
1133 let max_rev = revs.iter().max().unwrap();
1119
1134
1120 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1135 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1121
1136
1122 for (idx, rev) in revs.iter().enumerate() {
1137 for (idx, rev) in revs.iter().enumerate() {
1123 seen[rev.0 as usize].add(idx);
1138 seen[rev.0 as usize].add(idx);
1124 }
1139 }
1125 let mut current_rev = *max_rev;
1140 let mut current_rev = *max_rev;
1126 // Number of revisions whose inspection in the main loop
1141 // Number of revisions whose inspection in the main loop
1127 // will give a result or trigger inspection of other revisions
1142 // will give a result or trigger inspection of other revisions
1128 let mut interesting = revcount;
1143 let mut interesting = revcount;
1129
1144
1130 // The algorithm works on a vector of bit sets, indexed by revision
1145 // The algorithm works on a vector of bit sets, indexed by revision
1131 // numbers and iterated on reverse order.
1146 // numbers and iterated on reverse order.
1132 // An entry in this vector is poisoned if and only if the corresponding
1147 // An entry in this vector is poisoned if and only if the corresponding
1133 // revision is a common, yet not maximal ancestor.
1148 // revision is a common, yet not maximal ancestor.
1134
1149
1135 // The principle of the algorithm is as follows:
1150 // The principle of the algorithm is as follows:
1136 // For a revision `r`, when entering the loop, `seen[r]` is either
1151 // For a revision `r`, when entering the loop, `seen[r]` is either
1137 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1152 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1138 // In this sub set is full, then `r` is a solution and its parents
1153 // In this sub set is full, then `r` is a solution and its parents
1139 // have to be poisoned.
1154 // have to be poisoned.
1140 //
1155 //
1141 // At each iteration, the bit sets of the parents are updated by
1156 // At each iteration, the bit sets of the parents are updated by
1142 // union with `seen[r]`.
1157 // union with `seen[r]`.
1143 // As we walk the index from the end, we are sure we have encountered
1158 // As we walk the index from the end, we are sure we have encountered
1144 // all children of `r` before `r`, hence we know that `seen[r]` is
1159 // all children of `r` before `r`, hence we know that `seen[r]` is
1145 // fully computed.
1160 // fully computed.
1146 //
1161 //
1147 // On top of that there are several optimizations that make reading
1162 // On top of that there are several optimizations that make reading
1148 // less obvious than the comment above:
1163 // less obvious than the comment above:
1149 // - The `interesting` counter allows to break early
1164 // - The `interesting` counter allows to break early
1150 // - The loop starts from `max(revs)`
1165 // - The loop starts from `max(revs)`
1151 // - Early return in case it is detected that one of the incoming revs
1166 // - Early return in case it is detected that one of the incoming revs
1152 // is a common ancestor of all of them.
1167 // is a common ancestor of all of them.
1153 while current_rev.0 >= 0 && interesting > 0 {
1168 while current_rev.0 >= 0 && interesting > 0 {
1154 let current_seen = seen[current_rev.0 as usize].clone();
1169 let current_seen = seen[current_rev.0 as usize].clone();
1155
1170
1156 if current_seen.is_empty() {
1171 if current_seen.is_empty() {
1157 current_rev = Revision(current_rev.0 - 1);
1172 current_rev = Revision(current_rev.0 - 1);
1158 continue;
1173 continue;
1159 }
1174 }
1160 let mut poison = current_seen.is_poisoned();
1175 let mut poison = current_seen.is_poisoned();
1161 if !poison {
1176 if !poison {
1162 interesting -= 1;
1177 interesting -= 1;
1163 if current_seen.is_full_range(revcount) {
1178 if current_seen.is_full_range(revcount) {
1164 candidates.push(current_rev);
1179 candidates.push(current_rev);
1165 poison = true;
1180 poison = true;
1166
1181
1167 // Being a common ancestor, if `current_rev` is among
1182 // Being a common ancestor, if `current_rev` is among
1168 // the input revisions, it is *the* answer.
1183 // the input revisions, it is *the* answer.
1169 for rev in revs {
1184 for rev in revs {
1170 if *rev == current_rev {
1185 if *rev == current_rev {
1171 return Ok(candidates);
1186 return Ok(candidates);
1172 }
1187 }
1173 }
1188 }
1174 }
1189 }
1175 }
1190 }
1176 for parent in self.parents(current_rev)? {
1191 for parent in self.parents(current_rev)? {
1177 if parent == NULL_REVISION {
1192 if parent == NULL_REVISION {
1178 continue;
1193 continue;
1179 }
1194 }
1180 let parent_seen = &mut seen[parent.0 as usize];
1195 let parent_seen = &mut seen[parent.0 as usize];
1181 if poison {
1196 if poison {
1182 // this block is logically equivalent to poisoning parent
1197 // this block is logically equivalent to poisoning parent
1183 // and counting it as non interesting if it
1198 // and counting it as non interesting if it
1184 // has been seen before (hence counted then as interesting)
1199 // has been seen before (hence counted then as interesting)
1185 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1200 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1186 interesting -= 1;
1201 interesting -= 1;
1187 }
1202 }
1188 parent_seen.poison();
1203 parent_seen.poison();
1189 } else {
1204 } else {
1190 if parent_seen.is_empty() {
1205 if parent_seen.is_empty() {
1191 interesting += 1;
1206 interesting += 1;
1192 }
1207 }
1193 parent_seen.union(&current_seen);
1208 parent_seen.union(&current_seen);
1194 }
1209 }
1195 }
1210 }
1196
1211
1197 current_rev = Revision(current_rev.0 - 1);
1212 current_rev = Revision(current_rev.0 - 1);
1198 }
1213 }
1199
1214
1200 Ok(candidates)
1215 Ok(candidates)
1201 }
1216 }
1202
1217
1203 /// Given a disjoint set of revs, return the subset with the longest path
1218 /// Given a disjoint set of revs, return the subset with the longest path
1204 /// to the root.
1219 /// to the root.
1205 fn find_deepest_revs(
1220 fn find_deepest_revs(
1206 &self,
1221 &self,
1207 revs: &[Revision],
1222 revs: &[Revision],
1208 ) -> Result<Vec<Revision>, GraphError> {
1223 ) -> Result<Vec<Revision>, GraphError> {
1209 // TODO replace this all with just comparing rank?
1224 // TODO replace this all with just comparing rank?
1210 // Also, the original implementations in C/Python are cryptic, not
1225 // Also, the original implementations in C/Python are cryptic, not
1211 // even sure we actually need this?
1226 // even sure we actually need this?
1212 if revs.len() <= 1 {
1227 if revs.len() <= 1 {
1213 return Ok(revs.to_owned());
1228 return Ok(revs.to_owned());
1214 }
1229 }
1215 let max_rev = revs.iter().max().unwrap().0;
1230 let max_rev = revs.iter().max().unwrap().0;
1216 let mut interesting = HashMap::new();
1231 let mut interesting = HashMap::new();
1217 let mut seen = vec![0; max_rev as usize + 1];
1232 let mut seen = vec![0; max_rev as usize + 1];
1218 let mut depth = vec![0; max_rev as usize + 1];
1233 let mut depth = vec![0; max_rev as usize + 1];
1219 let mut mapping = vec![];
1234 let mut mapping = vec![];
1220 let mut revs = revs.to_owned();
1235 let mut revs = revs.to_owned();
1221 revs.sort_unstable();
1236 revs.sort_unstable();
1222
1237
1223 for (idx, rev) in revs.iter().enumerate() {
1238 for (idx, rev) in revs.iter().enumerate() {
1224 depth[rev.0 as usize] = 1;
1239 depth[rev.0 as usize] = 1;
1225 let shift = 1 << idx;
1240 let shift = 1 << idx;
1226 seen[rev.0 as usize] = shift;
1241 seen[rev.0 as usize] = shift;
1227 interesting.insert(shift, 1);
1242 interesting.insert(shift, 1);
1228 mapping.push((shift, *rev));
1243 mapping.push((shift, *rev));
1229 }
1244 }
1230
1245
1231 let mut current_rev = Revision(max_rev);
1246 let mut current_rev = Revision(max_rev);
1232 while current_rev.0 >= 0 && interesting.len() > 1 {
1247 while current_rev.0 >= 0 && interesting.len() > 1 {
1233 let current_depth = depth[current_rev.0 as usize];
1248 let current_depth = depth[current_rev.0 as usize];
1234 if current_depth == 0 {
1249 if current_depth == 0 {
1235 current_rev = Revision(current_rev.0 - 1);
1250 current_rev = Revision(current_rev.0 - 1);
1236 continue;
1251 continue;
1237 }
1252 }
1238
1253
1239 let current_seen = seen[current_rev.0 as usize];
1254 let current_seen = seen[current_rev.0 as usize];
1240 for parent in self.parents(current_rev)? {
1255 for parent in self.parents(current_rev)? {
1241 if parent == NULL_REVISION {
1256 if parent == NULL_REVISION {
1242 continue;
1257 continue;
1243 }
1258 }
1244 let parent_seen = seen[parent.0 as usize];
1259 let parent_seen = seen[parent.0 as usize];
1245 let parent_depth = depth[parent.0 as usize];
1260 let parent_depth = depth[parent.0 as usize];
1246 if parent_depth <= current_depth {
1261 if parent_depth <= current_depth {
1247 depth[parent.0 as usize] = current_depth + 1;
1262 depth[parent.0 as usize] = current_depth + 1;
1248 if parent_seen != current_seen {
1263 if parent_seen != current_seen {
1249 *interesting.get_mut(&current_seen).unwrap() += 1;
1264 *interesting.get_mut(&current_seen).unwrap() += 1;
1250 seen[parent.0 as usize] = current_seen;
1265 seen[parent.0 as usize] = current_seen;
1251 if parent_seen != 0 {
1266 if parent_seen != 0 {
1252 let parent_interesting =
1267 let parent_interesting =
1253 interesting.get_mut(&parent_seen).unwrap();
1268 interesting.get_mut(&parent_seen).unwrap();
1254 *parent_interesting -= 1;
1269 *parent_interesting -= 1;
1255 if *parent_interesting == 0 {
1270 if *parent_interesting == 0 {
1256 interesting.remove(&parent_seen);
1271 interesting.remove(&parent_seen);
1257 }
1272 }
1258 }
1273 }
1259 }
1274 }
1260 } else if current_depth == parent_depth - 1 {
1275 } else if current_depth == parent_depth - 1 {
1261 let either_seen = parent_seen | current_seen;
1276 let either_seen = parent_seen | current_seen;
1262 if either_seen == parent_seen {
1277 if either_seen == parent_seen {
1263 continue;
1278 continue;
1264 }
1279 }
1265 seen[parent.0 as usize] = either_seen;
1280 seen[parent.0 as usize] = either_seen;
1266 interesting
1281 interesting
1267 .entry(either_seen)
1282 .entry(either_seen)
1268 .and_modify(|v| *v += 1)
1283 .and_modify(|v| *v += 1)
1269 .or_insert(1);
1284 .or_insert(1);
1270 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1285 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1271 if interesting[&parent_seen] == 0 {
1286 if interesting[&parent_seen] == 0 {
1272 interesting.remove(&parent_seen);
1287 interesting.remove(&parent_seen);
1273 }
1288 }
1274 }
1289 }
1275 }
1290 }
1276 *interesting.get_mut(&current_seen).unwrap() -= 1;
1291 *interesting.get_mut(&current_seen).unwrap() -= 1;
1277 if interesting[&current_seen] == 0 {
1292 if interesting[&current_seen] == 0 {
1278 interesting.remove(&current_seen);
1293 interesting.remove(&current_seen);
1279 }
1294 }
1280
1295
1281 current_rev = Revision(current_rev.0 - 1);
1296 current_rev = Revision(current_rev.0 - 1);
1282 }
1297 }
1283
1298
1284 if interesting.len() != 1 {
1299 if interesting.len() != 1 {
1285 return Ok(vec![]);
1300 return Ok(vec![]);
1286 }
1301 }
1287 let mask = interesting.keys().next().unwrap();
1302 let mask = interesting.keys().next().unwrap();
1288
1303
1289 Ok(mapping
1304 Ok(mapping
1290 .into_iter()
1305 .into_iter()
1291 .filter_map(|(shift, rev)| {
1306 .filter_map(|(shift, rev)| {
1292 if (mask & shift) != 0 {
1307 if (mask & shift) != 0 {
1293 return Some(rev);
1308 return Some(rev);
1294 }
1309 }
1295 None
1310 None
1296 })
1311 })
1297 .collect())
1312 .collect())
1298 }
1313 }
1299 }
1314 }
1300
1315
1301 /// The kind of functionality needed by find_gca_candidates
1316 /// The kind of functionality needed by find_gca_candidates
1302 ///
1317 ///
1303 /// This is a bit mask which can be declared to be "poisoned", which callers
1318 /// This is a bit mask which can be declared to be "poisoned", which callers
1304 /// interpret to break out of some loops.
1319 /// interpret to break out of some loops.
1305 ///
1320 ///
1306 /// The maximum capacity of the bit mask is up to the actual implementation
1321 /// The maximum capacity of the bit mask is up to the actual implementation
1307 trait PoisonableBitSet: Sized + PartialEq {
1322 trait PoisonableBitSet: Sized + PartialEq {
1308 /// Return a vector of exactly n elements, initialized to be empty.
1323 /// Return a vector of exactly n elements, initialized to be empty.
1309 ///
1324 ///
1310 /// Optimization can vastly depend on implementation. Those being `Copy`
1325 /// Optimization can vastly depend on implementation. Those being `Copy`
1311 /// and having constant capacity typically can have a very simple
1326 /// and having constant capacity typically can have a very simple
1312 /// implementation.
1327 /// implementation.
1313 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1328 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1314
1329
1315 /// The size of the bit mask in memory
1330 /// The size of the bit mask in memory
1316 fn size(&self) -> usize;
1331 fn size(&self) -> usize;
1317
1332
1318 /// The number of elements that can be represented in the set.
1333 /// The number of elements that can be represented in the set.
1319 ///
1334 ///
1320 /// Another way to put it is that it is the highest integer `C` such that
1335 /// Another way to put it is that it is the highest integer `C` such that
1321 /// the set is guaranteed to always be a subset of the integer range
1336 /// the set is guaranteed to always be a subset of the integer range
1322 /// `[0, C)`
1337 /// `[0, C)`
1323 fn capacity(&self) -> usize;
1338 fn capacity(&self) -> usize;
1324
1339
1325 /// Declare `n` to belong to the set
1340 /// Declare `n` to belong to the set
1326 fn add(&mut self, n: usize);
1341 fn add(&mut self, n: usize);
1327
1342
1328 /// Declare `n` not to belong to the set
1343 /// Declare `n` not to belong to the set
1329 fn discard(&mut self, n: usize);
1344 fn discard(&mut self, n: usize);
1330
1345
1331 /// Replace this bit set by its union with other
1346 /// Replace this bit set by its union with other
1332 fn union(&mut self, other: &Self);
1347 fn union(&mut self, other: &Self);
1333
1348
1334 /// Poison the bit set
1349 /// Poison the bit set
1335 ///
1350 ///
1336 /// Interpretation up to the caller
1351 /// Interpretation up to the caller
1337 fn poison(&mut self);
1352 fn poison(&mut self);
1338
1353
1339 /// Is the bit set poisoned?
1354 /// Is the bit set poisoned?
1340 ///
1355 ///
1341 /// Interpretation is up to the caller
1356 /// Interpretation is up to the caller
1342 fn is_poisoned(&self) -> bool;
1357 fn is_poisoned(&self) -> bool;
1343
1358
1344 /// Is the bit set empty?
1359 /// Is the bit set empty?
1345 fn is_empty(&self) -> bool;
1360 fn is_empty(&self) -> bool;
1346
1361
1347 /// return `true` if and only if the bit is the full range `[0, n)`
1362 /// return `true` if and only if the bit is the full range `[0, n)`
1348 /// of integers
1363 /// of integers
1349 fn is_full_range(&self, n: usize) -> bool;
1364 fn is_full_range(&self, n: usize) -> bool;
1350 }
1365 }
1351
1366
1352 const U64_POISON: u64 = 1 << 63;
1367 const U64_POISON: u64 = 1 << 63;
1353 const U8_POISON: u8 = 1 << 7;
1368 const U8_POISON: u8 = 1 << 7;
1354
1369
1355 impl PoisonableBitSet for u64 {
1370 impl PoisonableBitSet for u64 {
1356 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1371 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1357 vec![0u64; vec_len]
1372 vec![0u64; vec_len]
1358 }
1373 }
1359
1374
1360 fn size(&self) -> usize {
1375 fn size(&self) -> usize {
1361 8
1376 8
1362 }
1377 }
1363
1378
1364 fn capacity(&self) -> usize {
1379 fn capacity(&self) -> usize {
1365 63
1380 63
1366 }
1381 }
1367
1382
1368 fn add(&mut self, n: usize) {
1383 fn add(&mut self, n: usize) {
1369 (*self) |= 1u64 << n;
1384 (*self) |= 1u64 << n;
1370 }
1385 }
1371
1386
1372 fn discard(&mut self, n: usize) {
1387 fn discard(&mut self, n: usize) {
1373 (*self) &= u64::MAX - (1u64 << n);
1388 (*self) &= u64::MAX - (1u64 << n);
1374 }
1389 }
1375
1390
1376 fn union(&mut self, other: &Self) {
1391 fn union(&mut self, other: &Self) {
1377 if *self != *other {
1392 if *self != *other {
1378 (*self) |= *other;
1393 (*self) |= *other;
1379 }
1394 }
1380 }
1395 }
1381
1396
1382 fn is_full_range(&self, n: usize) -> bool {
1397 fn is_full_range(&self, n: usize) -> bool {
1383 *self + 1 == (1u64 << n)
1398 *self + 1 == (1u64 << n)
1384 }
1399 }
1385
1400
1386 fn is_empty(&self) -> bool {
1401 fn is_empty(&self) -> bool {
1387 *self == 0
1402 *self == 0
1388 }
1403 }
1389
1404
1390 fn poison(&mut self) {
1405 fn poison(&mut self) {
1391 *self = U64_POISON;
1406 *self = U64_POISON;
1392 }
1407 }
1393
1408
1394 fn is_poisoned(&self) -> bool {
1409 fn is_poisoned(&self) -> bool {
1395 // equality comparison would be tempting but would not resist
1410 // equality comparison would be tempting but would not resist
1396 // operations after poisoning (even if these should be bogus).
1411 // operations after poisoning (even if these should be bogus).
1397 *self >= U64_POISON
1412 *self >= U64_POISON
1398 }
1413 }
1399 }
1414 }
1400
1415
1401 impl PoisonableBitSet for u8 {
1416 impl PoisonableBitSet for u8 {
1402 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1417 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1403 vec![0; vec_len]
1418 vec![0; vec_len]
1404 }
1419 }
1405
1420
1406 fn size(&self) -> usize {
1421 fn size(&self) -> usize {
1407 1
1422 1
1408 }
1423 }
1409
1424
1410 fn capacity(&self) -> usize {
1425 fn capacity(&self) -> usize {
1411 7
1426 7
1412 }
1427 }
1413
1428
1414 fn add(&mut self, n: usize) {
1429 fn add(&mut self, n: usize) {
1415 (*self) |= 1 << n;
1430 (*self) |= 1 << n;
1416 }
1431 }
1417
1432
1418 fn discard(&mut self, n: usize) {
1433 fn discard(&mut self, n: usize) {
1419 (*self) &= u8::MAX - (1 << n);
1434 (*self) &= u8::MAX - (1 << n);
1420 }
1435 }
1421
1436
1422 fn union(&mut self, other: &Self) {
1437 fn union(&mut self, other: &Self) {
1423 if *self != *other {
1438 if *self != *other {
1424 (*self) |= *other;
1439 (*self) |= *other;
1425 }
1440 }
1426 }
1441 }
1427
1442
1428 fn is_full_range(&self, n: usize) -> bool {
1443 fn is_full_range(&self, n: usize) -> bool {
1429 *self + 1 == (1 << n)
1444 *self + 1 == (1 << n)
1430 }
1445 }
1431
1446
1432 fn is_empty(&self) -> bool {
1447 fn is_empty(&self) -> bool {
1433 *self == 0
1448 *self == 0
1434 }
1449 }
1435
1450
1436 fn poison(&mut self) {
1451 fn poison(&mut self) {
1437 *self = U8_POISON;
1452 *self = U8_POISON;
1438 }
1453 }
1439
1454
1440 fn is_poisoned(&self) -> bool {
1455 fn is_poisoned(&self) -> bool {
1441 // equality comparison would be tempting but would not resist
1456 // equality comparison would be tempting but would not resist
1442 // operations after poisoning (even if these should be bogus).
1457 // operations after poisoning (even if these should be bogus).
1443 *self >= U8_POISON
1458 *self >= U8_POISON
1444 }
1459 }
1445 }
1460 }
1446
1461
1447 /// A poisonable bit set whose capacity is not known at compile time but
1462 /// A poisonable bit set whose capacity is not known at compile time but
1448 /// is constant after initial construction
1463 /// is constant after initial construction
1449 ///
1464 ///
1450 /// This can be way further optimized if performance assessments (speed
1465 /// This can be way further optimized if performance assessments (speed
1451 /// and/or RAM) require it.
1466 /// and/or RAM) require it.
1452 /// As far as RAM is concerned, for large vectors of these, the main problem
1467 /// As far as RAM is concerned, for large vectors of these, the main problem
1453 /// would be the repetition of set_size in each item. We would need a trait
1468 /// would be the repetition of set_size in each item. We would need a trait
1454 /// to abstract over the idea of a vector of such bit sets to do better.
1469 /// to abstract over the idea of a vector of such bit sets to do better.
1455 #[derive(Clone, PartialEq)]
1470 #[derive(Clone, PartialEq)]
1456 struct NonStaticPoisonableBitSet {
1471 struct NonStaticPoisonableBitSet {
1457 set_size: usize,
1472 set_size: usize,
1458 bit_set: Vec<u64>,
1473 bit_set: Vec<u64>,
1459 }
1474 }
1460
1475
1461 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1476 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1462 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1477 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1463 1 + (set_size + 1) / 64
1478 1 + (set_size + 1) / 64
1464 }
1479 }
1465
1480
1466 impl NonStaticPoisonableBitSet {
1481 impl NonStaticPoisonableBitSet {
1467 /// The index of the sub-bit set for the given n, and the index inside
1482 /// The index of the sub-bit set for the given n, and the index inside
1468 /// the latter
1483 /// the latter
1469 fn index(&self, n: usize) -> (usize, usize) {
1484 fn index(&self, n: usize) -> (usize, usize) {
1470 (n / 64, n % 64)
1485 (n / 64, n % 64)
1471 }
1486 }
1472 }
1487 }
1473
1488
1474 /// Mock implementation to ensure that the trait makes sense
1489 /// Mock implementation to ensure that the trait makes sense
1475 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1490 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1476 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1491 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1477 let tmpl = Self {
1492 let tmpl = Self {
1478 set_size,
1493 set_size,
1479 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1494 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1480 };
1495 };
1481 vec![tmpl; vec_len]
1496 vec![tmpl; vec_len]
1482 }
1497 }
1483
1498
1484 fn size(&self) -> usize {
1499 fn size(&self) -> usize {
1485 8 + self.bit_set.len() * 8
1500 8 + self.bit_set.len() * 8
1486 }
1501 }
1487
1502
1488 fn capacity(&self) -> usize {
1503 fn capacity(&self) -> usize {
1489 self.set_size
1504 self.set_size
1490 }
1505 }
1491
1506
1492 fn add(&mut self, n: usize) {
1507 fn add(&mut self, n: usize) {
1493 let (sub_bs, bit_pos) = self.index(n);
1508 let (sub_bs, bit_pos) = self.index(n);
1494 self.bit_set[sub_bs] |= 1 << bit_pos
1509 self.bit_set[sub_bs] |= 1 << bit_pos
1495 }
1510 }
1496
1511
1497 fn discard(&mut self, n: usize) {
1512 fn discard(&mut self, n: usize) {
1498 let (sub_bs, bit_pos) = self.index(n);
1513 let (sub_bs, bit_pos) = self.index(n);
1499 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1514 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1500 }
1515 }
1501
1516
1502 fn union(&mut self, other: &Self) {
1517 fn union(&mut self, other: &Self) {
1503 assert!(
1518 assert!(
1504 self.set_size == other.set_size,
1519 self.set_size == other.set_size,
1505 "Binary operations on bit sets can only be done on same size"
1520 "Binary operations on bit sets can only be done on same size"
1506 );
1521 );
1507 for i in 0..self.bit_set.len() - 1 {
1522 for i in 0..self.bit_set.len() - 1 {
1508 self.bit_set[i] |= other.bit_set[i]
1523 self.bit_set[i] |= other.bit_set[i]
1509 }
1524 }
1510 }
1525 }
1511
1526
1512 fn is_full_range(&self, n: usize) -> bool {
1527 fn is_full_range(&self, n: usize) -> bool {
1513 let (sub_bs, bit_pos) = self.index(n);
1528 let (sub_bs, bit_pos) = self.index(n);
1514 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1529 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1515 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1530 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1516 }
1531 }
1517
1532
1518 fn is_empty(&self) -> bool {
1533 fn is_empty(&self) -> bool {
1519 self.bit_set.iter().all(|bs| *bs == 0u64)
1534 self.bit_set.iter().all(|bs| *bs == 0u64)
1520 }
1535 }
1521
1536
1522 fn poison(&mut self) {
1537 fn poison(&mut self) {
1523 let (sub_bs, bit_pos) = self.index(self.set_size);
1538 let (sub_bs, bit_pos) = self.index(self.set_size);
1524 self.bit_set[sub_bs] = 1 << bit_pos;
1539 self.bit_set[sub_bs] = 1 << bit_pos;
1525 }
1540 }
1526
1541
1527 fn is_poisoned(&self) -> bool {
1542 fn is_poisoned(&self) -> bool {
1528 let (sub_bs, bit_pos) = self.index(self.set_size);
1543 let (sub_bs, bit_pos) = self.index(self.set_size);
1529 self.bit_set[sub_bs] >= 1 << bit_pos
1544 self.bit_set[sub_bs] >= 1 << bit_pos
1530 }
1545 }
1531 }
1546 }
1532
1547
1533 /// Set of roots of all non-public phases
1548 /// Set of roots of all non-public phases
1534 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
1549 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
1535
1550
1536 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1551 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1537 pub enum Phase {
1552 pub enum Phase {
1538 Public = 0,
1553 Public = 0,
1539 Draft = 1,
1554 Draft = 1,
1540 Secret = 2,
1555 Secret = 2,
1541 Archived = 3,
1556 Archived = 3,
1542 Internal = 4,
1557 Internal = 4,
1543 }
1558 }
1544
1559
1545 impl TryFrom<usize> for Phase {
1560 impl TryFrom<usize> for Phase {
1546 type Error = RevlogError;
1561 type Error = RevlogError;
1547
1562
1548 fn try_from(value: usize) -> Result<Self, Self::Error> {
1563 fn try_from(value: usize) -> Result<Self, Self::Error> {
1549 Ok(match value {
1564 Ok(match value {
1550 0 => Self::Public,
1565 0 => Self::Public,
1551 1 => Self::Draft,
1566 1 => Self::Draft,
1552 2 => Self::Secret,
1567 2 => Self::Secret,
1553 32 => Self::Archived,
1568 32 => Self::Archived,
1554 96 => Self::Internal,
1569 96 => Self::Internal,
1555 v => {
1570 v => {
1556 return Err(RevlogError::corrupted(format!(
1571 return Err(RevlogError::corrupted(format!(
1557 "invalid phase value {}",
1572 "invalid phase value {}",
1558 v
1573 v
1559 )))
1574 )))
1560 }
1575 }
1561 })
1576 })
1562 }
1577 }
1563 }
1578 }
1564
1579
1565 impl Phase {
1580 impl Phase {
1566 pub const fn all_phases() -> &'static [Self] {
1581 pub const fn all_phases() -> &'static [Self] {
1567 &[
1582 &[
1568 Self::Public,
1583 Self::Public,
1569 Self::Draft,
1584 Self::Draft,
1570 Self::Secret,
1585 Self::Secret,
1571 Self::Archived,
1586 Self::Archived,
1572 Self::Internal,
1587 Self::Internal,
1573 ]
1588 ]
1574 }
1589 }
1575 pub const fn non_public_phases() -> &'static [Self] {
1590 pub const fn non_public_phases() -> &'static [Self] {
1576 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1591 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1577 }
1592 }
1578 }
1593 }
1579
1594
1580 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1595 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1581 let mut offset: usize = 0;
1596 let mut offset: usize = 0;
1582 let mut offsets = Vec::new();
1597 let mut offsets = Vec::new();
1583
1598
1584 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1599 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1585 offsets.push(offset);
1600 offsets.push(offset);
1586 let end = offset + INDEX_ENTRY_SIZE;
1601 let end = offset + INDEX_ENTRY_SIZE;
1587 let entry = IndexEntry {
1602 let entry = IndexEntry {
1588 bytes: &bytes[offset..end],
1603 bytes: &bytes[offset..end],
1589 offset_override: None,
1604 offset_override: None,
1590 };
1605 };
1591
1606
1592 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1607 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1593 }
1608 }
1594 (offset, offsets)
1609 (offset, offsets)
1595 }
1610 }
1596
1611
1597 impl super::RevlogIndex for Index {
1612 impl super::RevlogIndex for Index {
1598 fn len(&self) -> usize {
1613 fn len(&self) -> usize {
1599 self.len()
1614 self.len()
1600 }
1615 }
1601
1616
1602 fn node(&self, rev: Revision) -> Option<&Node> {
1617 fn node(&self, rev: Revision) -> Option<&Node> {
1603 if rev == NULL_REVISION {
1618 if rev == NULL_REVISION {
1604 return Some(&NULL_NODE);
1619 return Some(&NULL_NODE);
1605 }
1620 }
1606 self.get_entry(rev).map(|entry| entry.hash())
1621 self.get_entry(rev).map(|entry| entry.hash())
1607 }
1622 }
1608 }
1623 }
1609
1624
1610 #[derive(Debug)]
1625 #[derive(Debug)]
1611 pub struct IndexEntry<'a> {
1626 pub struct IndexEntry<'a> {
1612 bytes: &'a [u8],
1627 bytes: &'a [u8],
1613 /// Allows to override the offset value of the entry.
1628 /// Allows to override the offset value of the entry.
1614 ///
1629 ///
1615 /// For interleaved index and data, the offset stored in the index
1630 /// For interleaved index and data, the offset stored in the index
1616 /// corresponds to the separated data offset.
1631 /// corresponds to the separated data offset.
1617 /// It has to be overridden with the actual offset in the interleaved
1632 /// It has to be overridden with the actual offset in the interleaved
1618 /// index which is just after the index block.
1633 /// index which is just after the index block.
1619 ///
1634 ///
1620 /// For separated index and data, the offset stored in the first index
1635 /// For separated index and data, the offset stored in the first index
1621 /// entry is mixed with the index headers.
1636 /// entry is mixed with the index headers.
1622 /// It has to be overridden with 0.
1637 /// It has to be overridden with 0.
1623 offset_override: Option<usize>,
1638 offset_override: Option<usize>,
1624 }
1639 }
1625
1640
1626 impl<'a> IndexEntry<'a> {
1641 impl<'a> IndexEntry<'a> {
1627 /// Return the offset of the data.
1642 /// Return the offset of the data.
1628 pub fn offset(&self) -> usize {
1643 pub fn offset(&self) -> usize {
1629 if let Some(offset_override) = self.offset_override {
1644 if let Some(offset_override) = self.offset_override {
1630 offset_override
1645 offset_override
1631 } else {
1646 } else {
1632 let mut bytes = [0; 8];
1647 let mut bytes = [0; 8];
1633 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1648 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1634 BigEndian::read_u64(&bytes[..]) as usize
1649 BigEndian::read_u64(&bytes[..]) as usize
1635 }
1650 }
1636 }
1651 }
1637 pub fn raw_offset(&self) -> u64 {
1652 pub fn raw_offset(&self) -> u64 {
1638 BigEndian::read_u64(&self.bytes[0..8])
1653 BigEndian::read_u64(&self.bytes[0..8])
1639 }
1654 }
1640
1655
1641 /// Same result (except potentially for rev 0) as C `index_get_start()`
1656 /// Same result (except potentially for rev 0) as C `index_get_start()`
1642 fn c_start(&self) -> u64 {
1657 fn c_start(&self) -> u64 {
1643 self.raw_offset() >> 16
1658 self.raw_offset() >> 16
1644 }
1659 }
1645
1660
1646 pub fn flags(&self) -> u16 {
1661 pub fn flags(&self) -> u16 {
1647 BigEndian::read_u16(&self.bytes[6..=7])
1662 BigEndian::read_u16(&self.bytes[6..=7])
1648 }
1663 }
1649
1664
1650 /// Return the compressed length of the data.
1665 /// Return the compressed length of the data.
1651 pub fn compressed_len(&self) -> u32 {
1666 pub fn compressed_len(&self) -> u32 {
1652 BigEndian::read_u32(&self.bytes[8..=11])
1667 BigEndian::read_u32(&self.bytes[8..=11])
1653 }
1668 }
1654
1669
1655 /// Return the uncompressed length of the data.
1670 /// Return the uncompressed length of the data.
1656 pub fn uncompressed_len(&self) -> i32 {
1671 pub fn uncompressed_len(&self) -> i32 {
1657 BigEndian::read_i32(&self.bytes[12..=15])
1672 BigEndian::read_i32(&self.bytes[12..=15])
1658 }
1673 }
1659
1674
1660 /// Return the revision upon which the data has been derived.
1675 /// Return the revision upon which the data has been derived.
1661 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1676 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1662 // TODO Maybe return an Option when base_revision == rev?
1677 // TODO Maybe return an Option when base_revision == rev?
1663 // Requires to add rev to IndexEntry
1678 // Requires to add rev to IndexEntry
1664
1679
1665 BigEndian::read_i32(&self.bytes[16..]).into()
1680 BigEndian::read_i32(&self.bytes[16..]).into()
1666 }
1681 }
1667
1682
1668 pub fn link_revision(&self) -> UncheckedRevision {
1683 pub fn link_revision(&self) -> UncheckedRevision {
1669 BigEndian::read_i32(&self.bytes[20..]).into()
1684 BigEndian::read_i32(&self.bytes[20..]).into()
1670 }
1685 }
1671
1686
1672 pub fn p1(&self) -> UncheckedRevision {
1687 pub fn p1(&self) -> UncheckedRevision {
1673 BigEndian::read_i32(&self.bytes[24..]).into()
1688 BigEndian::read_i32(&self.bytes[24..]).into()
1674 }
1689 }
1675
1690
1676 pub fn p2(&self) -> UncheckedRevision {
1691 pub fn p2(&self) -> UncheckedRevision {
1677 BigEndian::read_i32(&self.bytes[28..]).into()
1692 BigEndian::read_i32(&self.bytes[28..]).into()
1678 }
1693 }
1679
1694
1680 /// Return the hash of revision's full text.
1695 /// Return the hash of revision's full text.
1681 ///
1696 ///
1682 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1697 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1683 /// are used.
1698 /// are used.
1684 pub fn hash(&self) -> &'a Node {
1699 pub fn hash(&self) -> &'a Node {
1685 (&self.bytes[32..52]).try_into().unwrap()
1700 (&self.bytes[32..52]).try_into().unwrap()
1686 }
1701 }
1687
1702
1688 pub fn as_bytes(&self) -> &'a [u8] {
1703 pub fn as_bytes(&self) -> &'a [u8] {
1689 self.bytes
1704 self.bytes
1690 }
1705 }
1691 }
1706 }
1692
1707
1693 #[cfg(test)]
1708 #[cfg(test)]
1694 mod tests {
1709 mod tests {
1695 use super::*;
1710 use super::*;
1696 use crate::node::NULL_NODE;
1711 use crate::node::NULL_NODE;
1697
1712
1698 #[cfg(test)]
1713 #[cfg(test)]
1699 #[derive(Debug, Copy, Clone)]
1714 #[derive(Debug, Copy, Clone)]
1700 pub struct IndexEntryBuilder {
1715 pub struct IndexEntryBuilder {
1701 is_first: bool,
1716 is_first: bool,
1702 is_inline: bool,
1717 is_inline: bool,
1703 is_general_delta: bool,
1718 is_general_delta: bool,
1704 version: u16,
1719 version: u16,
1705 offset: usize,
1720 offset: usize,
1706 compressed_len: usize,
1721 compressed_len: usize,
1707 uncompressed_len: usize,
1722 uncompressed_len: usize,
1708 base_revision_or_base_of_delta_chain: Revision,
1723 base_revision_or_base_of_delta_chain: Revision,
1709 link_revision: Revision,
1724 link_revision: Revision,
1710 p1: Revision,
1725 p1: Revision,
1711 p2: Revision,
1726 p2: Revision,
1712 node: Node,
1727 node: Node,
1713 }
1728 }
1714
1729
1715 #[cfg(test)]
1730 #[cfg(test)]
1716 impl IndexEntryBuilder {
1731 impl IndexEntryBuilder {
1717 #[allow(clippy::new_without_default)]
1732 #[allow(clippy::new_without_default)]
1718 pub fn new() -> Self {
1733 pub fn new() -> Self {
1719 Self {
1734 Self {
1720 is_first: false,
1735 is_first: false,
1721 is_inline: false,
1736 is_inline: false,
1722 is_general_delta: true,
1737 is_general_delta: true,
1723 version: 1,
1738 version: 1,
1724 offset: 0,
1739 offset: 0,
1725 compressed_len: 0,
1740 compressed_len: 0,
1726 uncompressed_len: 0,
1741 uncompressed_len: 0,
1727 base_revision_or_base_of_delta_chain: Revision(0),
1742 base_revision_or_base_of_delta_chain: Revision(0),
1728 link_revision: Revision(0),
1743 link_revision: Revision(0),
1729 p1: NULL_REVISION,
1744 p1: NULL_REVISION,
1730 p2: NULL_REVISION,
1745 p2: NULL_REVISION,
1731 node: NULL_NODE,
1746 node: NULL_NODE,
1732 }
1747 }
1733 }
1748 }
1734
1749
1735 pub fn is_first(&mut self, value: bool) -> &mut Self {
1750 pub fn is_first(&mut self, value: bool) -> &mut Self {
1736 self.is_first = value;
1751 self.is_first = value;
1737 self
1752 self
1738 }
1753 }
1739
1754
1740 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1755 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1741 self.is_inline = value;
1756 self.is_inline = value;
1742 self
1757 self
1743 }
1758 }
1744
1759
1745 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1760 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1746 self.is_general_delta = value;
1761 self.is_general_delta = value;
1747 self
1762 self
1748 }
1763 }
1749
1764
1750 pub fn with_version(&mut self, value: u16) -> &mut Self {
1765 pub fn with_version(&mut self, value: u16) -> &mut Self {
1751 self.version = value;
1766 self.version = value;
1752 self
1767 self
1753 }
1768 }
1754
1769
1755 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1770 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1756 self.offset = value;
1771 self.offset = value;
1757 self
1772 self
1758 }
1773 }
1759
1774
1760 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1775 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1761 self.compressed_len = value;
1776 self.compressed_len = value;
1762 self
1777 self
1763 }
1778 }
1764
1779
1765 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1780 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1766 self.uncompressed_len = value;
1781 self.uncompressed_len = value;
1767 self
1782 self
1768 }
1783 }
1769
1784
1770 pub fn with_base_revision_or_base_of_delta_chain(
1785 pub fn with_base_revision_or_base_of_delta_chain(
1771 &mut self,
1786 &mut self,
1772 value: Revision,
1787 value: Revision,
1773 ) -> &mut Self {
1788 ) -> &mut Self {
1774 self.base_revision_or_base_of_delta_chain = value;
1789 self.base_revision_or_base_of_delta_chain = value;
1775 self
1790 self
1776 }
1791 }
1777
1792
1778 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1793 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1779 self.link_revision = value;
1794 self.link_revision = value;
1780 self
1795 self
1781 }
1796 }
1782
1797
1783 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1798 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1784 self.p1 = value;
1799 self.p1 = value;
1785 self
1800 self
1786 }
1801 }
1787
1802
1788 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1803 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1789 self.p2 = value;
1804 self.p2 = value;
1790 self
1805 self
1791 }
1806 }
1792
1807
1793 pub fn with_node(&mut self, value: Node) -> &mut Self {
1808 pub fn with_node(&mut self, value: Node) -> &mut Self {
1794 self.node = value;
1809 self.node = value;
1795 self
1810 self
1796 }
1811 }
1797
1812
1798 pub fn build(&self) -> Vec<u8> {
1813 pub fn build(&self) -> Vec<u8> {
1799 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1814 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1800 if self.is_first {
1815 if self.is_first {
1801 bytes.extend(&match (self.is_general_delta, self.is_inline) {
1816 bytes.extend(&match (self.is_general_delta, self.is_inline) {
1802 (false, false) => [0u8, 0],
1817 (false, false) => [0u8, 0],
1803 (false, true) => [0u8, 1],
1818 (false, true) => [0u8, 1],
1804 (true, false) => [0u8, 2],
1819 (true, false) => [0u8, 2],
1805 (true, true) => [0u8, 3],
1820 (true, true) => [0u8, 3],
1806 });
1821 });
1807 bytes.extend(&self.version.to_be_bytes());
1822 bytes.extend(&self.version.to_be_bytes());
1808 // Remaining offset bytes.
1823 // Remaining offset bytes.
1809 bytes.extend(&[0u8; 2]);
1824 bytes.extend(&[0u8; 2]);
1810 } else {
1825 } else {
1811 // Offset stored on 48 bits (6 bytes)
1826 // Offset stored on 48 bits (6 bytes)
1812 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1827 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1813 }
1828 }
1814 bytes.extend(&[0u8; 2]); // Revision flags.
1829 bytes.extend(&[0u8; 2]); // Revision flags.
1815 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
1830 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
1816 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
1831 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
1817 bytes.extend(
1832 bytes.extend(
1818 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1833 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1819 );
1834 );
1820 bytes.extend(&self.link_revision.0.to_be_bytes());
1835 bytes.extend(&self.link_revision.0.to_be_bytes());
1821 bytes.extend(&self.p1.0.to_be_bytes());
1836 bytes.extend(&self.p1.0.to_be_bytes());
1822 bytes.extend(&self.p2.0.to_be_bytes());
1837 bytes.extend(&self.p2.0.to_be_bytes());
1823 bytes.extend(self.node.as_bytes());
1838 bytes.extend(self.node.as_bytes());
1824 bytes.extend(vec![0u8; 12]);
1839 bytes.extend(vec![0u8; 12]);
1825 bytes
1840 bytes
1826 }
1841 }
1827 }
1842 }
1828
1843
1829 pub fn is_inline(index_bytes: &[u8]) -> bool {
1844 pub fn is_inline(index_bytes: &[u8]) -> bool {
1830 IndexHeader::parse(index_bytes)
1845 IndexHeader::parse(index_bytes)
1831 .expect("too short")
1846 .expect("too short")
1832 .unwrap()
1847 .unwrap()
1833 .format_flags()
1848 .format_flags()
1834 .is_inline()
1849 .is_inline()
1835 }
1850 }
1836
1851
1837 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1852 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1838 IndexHeader::parse(index_bytes)
1853 IndexHeader::parse(index_bytes)
1839 .expect("too short")
1854 .expect("too short")
1840 .unwrap()
1855 .unwrap()
1841 .format_flags()
1856 .format_flags()
1842 .uses_generaldelta()
1857 .uses_generaldelta()
1843 }
1858 }
1844
1859
1845 pub fn get_version(index_bytes: &[u8]) -> u16 {
1860 pub fn get_version(index_bytes: &[u8]) -> u16 {
1846 IndexHeader::parse(index_bytes)
1861 IndexHeader::parse(index_bytes)
1847 .expect("too short")
1862 .expect("too short")
1848 .unwrap()
1863 .unwrap()
1849 .format_version()
1864 .format_version()
1850 }
1865 }
1851
1866
1852 #[test]
1867 #[test]
1853 fn flags_when_no_inline_flag_test() {
1868 fn flags_when_no_inline_flag_test() {
1854 let bytes = IndexEntryBuilder::new()
1869 let bytes = IndexEntryBuilder::new()
1855 .is_first(true)
1870 .is_first(true)
1856 .with_general_delta(false)
1871 .with_general_delta(false)
1857 .with_inline(false)
1872 .with_inline(false)
1858 .build();
1873 .build();
1859
1874
1860 assert!(!is_inline(&bytes));
1875 assert!(!is_inline(&bytes));
1861 assert!(!uses_generaldelta(&bytes));
1876 assert!(!uses_generaldelta(&bytes));
1862 }
1877 }
1863
1878
1864 #[test]
1879 #[test]
1865 fn flags_when_inline_flag_test() {
1880 fn flags_when_inline_flag_test() {
1866 let bytes = IndexEntryBuilder::new()
1881 let bytes = IndexEntryBuilder::new()
1867 .is_first(true)
1882 .is_first(true)
1868 .with_general_delta(false)
1883 .with_general_delta(false)
1869 .with_inline(true)
1884 .with_inline(true)
1870 .build();
1885 .build();
1871
1886
1872 assert!(is_inline(&bytes));
1887 assert!(is_inline(&bytes));
1873 assert!(!uses_generaldelta(&bytes));
1888 assert!(!uses_generaldelta(&bytes));
1874 }
1889 }
1875
1890
1876 #[test]
1891 #[test]
1877 fn flags_when_inline_and_generaldelta_flags_test() {
1892 fn flags_when_inline_and_generaldelta_flags_test() {
1878 let bytes = IndexEntryBuilder::new()
1893 let bytes = IndexEntryBuilder::new()
1879 .is_first(true)
1894 .is_first(true)
1880 .with_general_delta(true)
1895 .with_general_delta(true)
1881 .with_inline(true)
1896 .with_inline(true)
1882 .build();
1897 .build();
1883
1898
1884 assert!(is_inline(&bytes));
1899 assert!(is_inline(&bytes));
1885 assert!(uses_generaldelta(&bytes));
1900 assert!(uses_generaldelta(&bytes));
1886 }
1901 }
1887
1902
1888 #[test]
1903 #[test]
1889 fn test_offset() {
1904 fn test_offset() {
1890 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1905 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1891 let entry = IndexEntry {
1906 let entry = IndexEntry {
1892 bytes: &bytes,
1907 bytes: &bytes,
1893 offset_override: None,
1908 offset_override: None,
1894 };
1909 };
1895
1910
1896 assert_eq!(entry.offset(), 1)
1911 assert_eq!(entry.offset(), 1)
1897 }
1912 }
1898
1913
1899 #[test]
1914 #[test]
1900 fn test_with_overridden_offset() {
1915 fn test_with_overridden_offset() {
1901 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1916 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1902 let entry = IndexEntry {
1917 let entry = IndexEntry {
1903 bytes: &bytes,
1918 bytes: &bytes,
1904 offset_override: Some(2),
1919 offset_override: Some(2),
1905 };
1920 };
1906
1921
1907 assert_eq!(entry.offset(), 2)
1922 assert_eq!(entry.offset(), 2)
1908 }
1923 }
1909
1924
1910 #[test]
1925 #[test]
1911 fn test_compressed_len() {
1926 fn test_compressed_len() {
1912 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1927 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1913 let entry = IndexEntry {
1928 let entry = IndexEntry {
1914 bytes: &bytes,
1929 bytes: &bytes,
1915 offset_override: None,
1930 offset_override: None,
1916 };
1931 };
1917
1932
1918 assert_eq!(entry.compressed_len(), 1)
1933 assert_eq!(entry.compressed_len(), 1)
1919 }
1934 }
1920
1935
1921 #[test]
1936 #[test]
1922 fn test_uncompressed_len() {
1937 fn test_uncompressed_len() {
1923 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1938 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1924 let entry = IndexEntry {
1939 let entry = IndexEntry {
1925 bytes: &bytes,
1940 bytes: &bytes,
1926 offset_override: None,
1941 offset_override: None,
1927 };
1942 };
1928
1943
1929 assert_eq!(entry.uncompressed_len(), 1)
1944 assert_eq!(entry.uncompressed_len(), 1)
1930 }
1945 }
1931
1946
1932 #[test]
1947 #[test]
1933 fn test_base_revision_or_base_of_delta_chain() {
1948 fn test_base_revision_or_base_of_delta_chain() {
1934 let bytes = IndexEntryBuilder::new()
1949 let bytes = IndexEntryBuilder::new()
1935 .with_base_revision_or_base_of_delta_chain(Revision(1))
1950 .with_base_revision_or_base_of_delta_chain(Revision(1))
1936 .build();
1951 .build();
1937 let entry = IndexEntry {
1952 let entry = IndexEntry {
1938 bytes: &bytes,
1953 bytes: &bytes,
1939 offset_override: None,
1954 offset_override: None,
1940 };
1955 };
1941
1956
1942 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
1957 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
1943 }
1958 }
1944
1959
1945 #[test]
1960 #[test]
1946 fn link_revision_test() {
1961 fn link_revision_test() {
1947 let bytes = IndexEntryBuilder::new()
1962 let bytes = IndexEntryBuilder::new()
1948 .with_link_revision(Revision(123))
1963 .with_link_revision(Revision(123))
1949 .build();
1964 .build();
1950
1965
1951 let entry = IndexEntry {
1966 let entry = IndexEntry {
1952 bytes: &bytes,
1967 bytes: &bytes,
1953 offset_override: None,
1968 offset_override: None,
1954 };
1969 };
1955
1970
1956 assert_eq!(entry.link_revision(), 123.into());
1971 assert_eq!(entry.link_revision(), 123.into());
1957 }
1972 }
1958
1973
1959 #[test]
1974 #[test]
1960 fn p1_test() {
1975 fn p1_test() {
1961 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
1976 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
1962
1977
1963 let entry = IndexEntry {
1978 let entry = IndexEntry {
1964 bytes: &bytes,
1979 bytes: &bytes,
1965 offset_override: None,
1980 offset_override: None,
1966 };
1981 };
1967
1982
1968 assert_eq!(entry.p1(), 123.into());
1983 assert_eq!(entry.p1(), 123.into());
1969 }
1984 }
1970
1985
1971 #[test]
1986 #[test]
1972 fn p2_test() {
1987 fn p2_test() {
1973 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
1988 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
1974
1989
1975 let entry = IndexEntry {
1990 let entry = IndexEntry {
1976 bytes: &bytes,
1991 bytes: &bytes,
1977 offset_override: None,
1992 offset_override: None,
1978 };
1993 };
1979
1994
1980 assert_eq!(entry.p2(), 123.into());
1995 assert_eq!(entry.p2(), 123.into());
1981 }
1996 }
1982
1997
1983 #[test]
1998 #[test]
1984 fn node_test() {
1999 fn node_test() {
1985 let node = Node::from_hex("0123456789012345678901234567890123456789")
2000 let node = Node::from_hex("0123456789012345678901234567890123456789")
1986 .unwrap();
2001 .unwrap();
1987 let bytes = IndexEntryBuilder::new().with_node(node).build();
2002 let bytes = IndexEntryBuilder::new().with_node(node).build();
1988
2003
1989 let entry = IndexEntry {
2004 let entry = IndexEntry {
1990 bytes: &bytes,
2005 bytes: &bytes,
1991 offset_override: None,
2006 offset_override: None,
1992 };
2007 };
1993
2008
1994 assert_eq!(*entry.hash(), node);
2009 assert_eq!(*entry.hash(), node);
1995 }
2010 }
1996
2011
1997 #[test]
2012 #[test]
1998 fn version_test() {
2013 fn version_test() {
1999 let bytes = IndexEntryBuilder::new()
2014 let bytes = IndexEntryBuilder::new()
2000 .is_first(true)
2015 .is_first(true)
2001 .with_version(2)
2016 .with_version(2)
2002 .build();
2017 .build();
2003
2018
2004 assert_eq!(get_version(&bytes), 2)
2019 assert_eq!(get_version(&bytes), 2)
2005 }
2020 }
2006 }
2021 }
2007
2022
2008 #[cfg(test)]
2023 #[cfg(test)]
2009 pub use tests::IndexEntryBuilder;
2024 pub use tests::IndexEntryBuilder;
@@ -1,1126 +1,1157 b''
1 // revlog.rs
1 // revlog.rs
2 //
2 //
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::{
8 use crate::{
9 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
9 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
10 utils::{node_from_py_bytes, node_from_py_object},
10 utils::{node_from_py_bytes, node_from_py_object},
11 PyRevision,
11 PyRevision,
12 };
12 };
13 use cpython::{
13 use cpython::{
14 buffer::{Element, PyBuffer},
14 buffer::{Element, PyBuffer},
15 exc::{IndexError, ValueError},
15 exc::{IndexError, ValueError},
16 ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
16 ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
17 PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
17 PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
18 PythonObject, ToPyObject, UnsafePyLeaked,
18 PythonObject, ToPyObject, UnsafePyLeaked,
19 };
19 };
20 use hg::{
20 use hg::{
21 errors::HgError,
21 errors::HgError,
22 index::{
22 index::{
23 IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
23 IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
24 INDEX_ENTRY_SIZE,
24 INDEX_ENTRY_SIZE,
25 },
25 },
26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
27 revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
27 revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
28 BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
28 BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
29 };
29 };
30 use std::{cell::RefCell, collections::HashMap};
30 use std::{cell::RefCell, collections::HashMap};
31 use vcsgraph::graph::Graph as VCSGraph;
31 use vcsgraph::graph::Graph as VCSGraph;
32
32
33 pub struct PySharedIndex {
33 pub struct PySharedIndex {
34 /// The underlying hg-core index
34 /// The underlying hg-core index
35 pub(crate) inner: &'static hg::index::Index,
35 pub(crate) inner: &'static hg::index::Index,
36 }
36 }
37
37
38 /// Return a Struct implementing the Graph trait
38 /// Return a Struct implementing the Graph trait
39 pub(crate) fn py_rust_index_to_graph(
39 pub(crate) fn py_rust_index_to_graph(
40 py: Python,
40 py: Python,
41 index: PyObject,
41 index: PyObject,
42 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
42 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
43 let midx = index.extract::<Index>(py)?;
43 let midx = index.extract::<Index>(py)?;
44 let leaked = midx.index(py).leak_immutable();
44 let leaked = midx.index(py).leak_immutable();
45 // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
45 // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
46 Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
46 Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
47 }
47 }
48
48
49 impl Clone for PySharedIndex {
49 impl Clone for PySharedIndex {
50 fn clone(&self) -> Self {
50 fn clone(&self) -> Self {
51 Self { inner: self.inner }
51 Self { inner: self.inner }
52 }
52 }
53 }
53 }
54
54
55 impl Graph for PySharedIndex {
55 impl Graph for PySharedIndex {
56 #[inline(always)]
56 #[inline(always)]
57 fn parents(&self, rev: Revision) -> Result<[Revision; 2], hg::GraphError> {
57 fn parents(&self, rev: Revision) -> Result<[Revision; 2], hg::GraphError> {
58 self.inner.parents(rev)
58 self.inner.parents(rev)
59 }
59 }
60 }
60 }
61
61
62 impl VCSGraph for PySharedIndex {
62 impl VCSGraph for PySharedIndex {
63 #[inline(always)]
63 #[inline(always)]
64 fn parents(
64 fn parents(
65 &self,
65 &self,
66 rev: BaseRevision,
66 rev: BaseRevision,
67 ) -> Result<vcsgraph::graph::Parents, vcsgraph::graph::GraphReadError>
67 ) -> Result<vcsgraph::graph::Parents, vcsgraph::graph::GraphReadError>
68 {
68 {
69 // FIXME This trait should be reworked to decide between Revision
69 // FIXME This trait should be reworked to decide between Revision
70 // and UncheckedRevision, get better errors names, etc.
70 // and UncheckedRevision, get better errors names, etc.
71 match Graph::parents(self, Revision(rev)) {
71 match Graph::parents(self, Revision(rev)) {
72 Ok(parents) => {
72 Ok(parents) => {
73 Ok(vcsgraph::graph::Parents([parents[0].0, parents[1].0]))
73 Ok(vcsgraph::graph::Parents([parents[0].0, parents[1].0]))
74 }
74 }
75 Err(hg::GraphError::ParentOutOfRange(rev)) => {
75 Err(hg::GraphError::ParentOutOfRange(rev)) => {
76 Err(vcsgraph::graph::GraphReadError::KeyedInvalidKey(rev.0))
76 Err(vcsgraph::graph::GraphReadError::KeyedInvalidKey(rev.0))
77 }
77 }
78 }
78 }
79 }
79 }
80 }
80 }
81
81
82 impl RevlogIndex for PySharedIndex {
82 impl RevlogIndex for PySharedIndex {
83 fn len(&self) -> usize {
83 fn len(&self) -> usize {
84 self.inner.len()
84 self.inner.len()
85 }
85 }
86 fn node(&self, rev: Revision) -> Option<&Node> {
86 fn node(&self, rev: Revision) -> Option<&Node> {
87 self.inner.node(rev)
87 self.inner.node(rev)
88 }
88 }
89 }
89 }
90
90
91 py_class!(pub class Index |py| {
91 py_class!(pub class Index |py| {
92 @shared data index: hg::index::Index;
92 @shared data index: hg::index::Index;
93 data nt: RefCell<Option<CoreNodeTree>>;
93 data nt: RefCell<Option<CoreNodeTree>>;
94 data docket: RefCell<Option<PyObject>>;
94 data docket: RefCell<Option<PyObject>>;
95 // Holds a reference to the mmap'ed persistent nodemap data
95 // Holds a reference to the mmap'ed persistent nodemap data
96 data nodemap_mmap: RefCell<Option<PyBuffer>>;
96 data nodemap_mmap: RefCell<Option<PyBuffer>>;
97 // Holds a reference to the mmap'ed persistent index data
97 // Holds a reference to the mmap'ed persistent index data
98 data index_mmap: RefCell<Option<PyBuffer>>;
98 data index_mmap: RefCell<Option<PyBuffer>>;
99 data head_revs_py_list: RefCell<Option<PyList>>;
99
100
100 def __new__(
101 def __new__(
101 _cls,
102 _cls,
102 data: PyObject,
103 data: PyObject,
103 default_header: u32,
104 default_header: u32,
104 ) -> PyResult<Self> {
105 ) -> PyResult<Self> {
105 Self::new(py, data, default_header)
106 Self::new(py, data, default_header)
106 }
107 }
107
108
108 /// Compatibility layer used for Python consumers needing access to the C index
109 /// Compatibility layer used for Python consumers needing access to the C index
109 ///
110 ///
110 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
111 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
111 /// that may need to build a custom `nodetree`, based on a specified revset.
112 /// that may need to build a custom `nodetree`, based on a specified revset.
112 /// With a Rust implementation of the nodemap, we will be able to get rid of
113 /// With a Rust implementation of the nodemap, we will be able to get rid of
113 /// this, by exposing our own standalone nodemap class,
114 /// this, by exposing our own standalone nodemap class,
114 /// ready to accept `Index`.
115 /// ready to accept `Index`.
115 /* def get_cindex(&self) -> PyResult<PyObject> {
116 /* def get_cindex(&self) -> PyResult<PyObject> {
116 Ok(self.cindex(py).borrow().inner().clone_ref(py))
117 Ok(self.cindex(py).borrow().inner().clone_ref(py))
117 }
118 }
118 */
119 */
119 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
120 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
120
121
121 /// Return Revision if found, raises a bare `error.RevlogError`
122 /// Return Revision if found, raises a bare `error.RevlogError`
122 /// in case of ambiguity, same as C version does
123 /// in case of ambiguity, same as C version does
123 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
124 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
124 let opt = self.get_nodetree(py)?.borrow();
125 let opt = self.get_nodetree(py)?.borrow();
125 let nt = opt.as_ref().unwrap();
126 let nt = opt.as_ref().unwrap();
126 let ridx = &*self.index(py).borrow();
127 let ridx = &*self.index(py).borrow();
127 let node = node_from_py_bytes(py, &node)?;
128 let node = node_from_py_bytes(py, &node)?;
128 let rust_rev =
129 let rust_rev =
129 nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
130 nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
130 Ok(rust_rev.map(Into::into))
131 Ok(rust_rev.map(Into::into))
131
132
132 }
133 }
133
134
134 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
135 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
135 /// is not found.
136 /// is not found.
136 ///
137 ///
137 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
138 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
138 /// will catch and rewrap with it
139 /// will catch and rewrap with it
139 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
140 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
140 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
141 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
141 }
142 }
142
143
143 /// return True if the node exist in the index
144 /// return True if the node exist in the index
144 def has_node(&self, node: PyBytes) -> PyResult<bool> {
145 def has_node(&self, node: PyBytes) -> PyResult<bool> {
145 // TODO OPTIM we could avoid a needless conversion here,
146 // TODO OPTIM we could avoid a needless conversion here,
146 // to do when scaffolding for pure Rust switch is removed,
147 // to do when scaffolding for pure Rust switch is removed,
147 // as `get_rev()` currently does the necessary assertions
148 // as `get_rev()` currently does the necessary assertions
148 self.get_rev(py, node).map(|opt| opt.is_some())
149 self.get_rev(py, node).map(|opt| opt.is_some())
149 }
150 }
150
151
151 /// find length of shortest hex nodeid of a binary ID
152 /// find length of shortest hex nodeid of a binary ID
152 def shortest(&self, node: PyBytes) -> PyResult<usize> {
153 def shortest(&self, node: PyBytes) -> PyResult<usize> {
153 let opt = self.get_nodetree(py)?.borrow();
154 let opt = self.get_nodetree(py)?.borrow();
154 let nt = opt.as_ref().unwrap();
155 let nt = opt.as_ref().unwrap();
155 let idx = &*self.index(py).borrow();
156 let idx = &*self.index(py).borrow();
156 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
157 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
157 {
158 {
158 Ok(Some(l)) => Ok(l),
159 Ok(Some(l)) => Ok(l),
159 Ok(None) => Err(revlog_error(py)),
160 Ok(None) => Err(revlog_error(py)),
160 Err(e) => Err(nodemap_error(py, e)),
161 Err(e) => Err(nodemap_error(py, e)),
161 }
162 }
162 }
163 }
163
164
164 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
165 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
165 let opt = self.get_nodetree(py)?.borrow();
166 let opt = self.get_nodetree(py)?.borrow();
166 let nt = opt.as_ref().unwrap();
167 let nt = opt.as_ref().unwrap();
167 let idx = &*self.index(py).borrow();
168 let idx = &*self.index(py).borrow();
168
169
169 let node_as_string = if cfg!(feature = "python3-sys") {
170 let node_as_string = if cfg!(feature = "python3-sys") {
170 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
171 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
171 }
172 }
172 else {
173 else {
173 let node = node.extract::<PyBytes>(py)?;
174 let node = node.extract::<PyBytes>(py)?;
174 String::from_utf8_lossy(node.data(py)).to_string()
175 String::from_utf8_lossy(node.data(py)).to_string()
175 };
176 };
176
177
177 let prefix = NodePrefix::from_hex(&node_as_string)
178 let prefix = NodePrefix::from_hex(&node_as_string)
178 .map_err(|_| PyErr::new::<ValueError, _>(
179 .map_err(|_| PyErr::new::<ValueError, _>(
179 py, format!("Invalid node or prefix '{}'", node_as_string))
180 py, format!("Invalid node or prefix '{}'", node_as_string))
180 )?;
181 )?;
181
182
182 nt.find_bin(idx, prefix)
183 nt.find_bin(idx, prefix)
183 // TODO make an inner API returning the node directly
184 // TODO make an inner API returning the node directly
184 .map(|opt| opt.map(
185 .map(|opt| opt.map(
185 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
186 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
186 .map_err(|e| nodemap_error(py, e))
187 .map_err(|e| nodemap_error(py, e))
187
188
188 }
189 }
189
190
190 /// append an index entry
191 /// append an index entry
191 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
192 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
192 if tup.len(py) < 8 {
193 if tup.len(py) < 8 {
193 // this is better than the panic promised by tup.get_item()
194 // this is better than the panic promised by tup.get_item()
194 return Err(
195 return Err(
195 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
196 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
196 }
197 }
197 let node_bytes = tup.get_item(py, 7).extract(py)?;
198 let node_bytes = tup.get_item(py, 7).extract(py)?;
198 let node = node_from_py_object(py, &node_bytes)?;
199 let node = node_from_py_object(py, &node_bytes)?;
199
200
200 let rev = self.len(py)? as BaseRevision;
201 let rev = self.len(py)? as BaseRevision;
201
202
202 // This is ok since we will just add the revision to the index
203 // This is ok since we will just add the revision to the index
203 let rev = Revision(rev);
204 let rev = Revision(rev);
204 self.index(py)
205 self.index(py)
205 .borrow_mut()
206 .borrow_mut()
206 .append(py_tuple_to_revision_data_params(py, tup)?)
207 .append(py_tuple_to_revision_data_params(py, tup)?)
207 .unwrap();
208 .unwrap();
208 let idx = &*self.index(py).borrow();
209 let idx = &*self.index(py).borrow();
209 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
210 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
210 .insert(idx, &node, rev)
211 .insert(idx, &node, rev)
211 .map_err(|e| nodemap_error(py, e))?;
212 .map_err(|e| nodemap_error(py, e))?;
212 Ok(py.None())
213 Ok(py.None())
213 }
214 }
214
215
215 def __delitem__(&self, key: PyObject) -> PyResult<()> {
216 def __delitem__(&self, key: PyObject) -> PyResult<()> {
216 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
217 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
217 let start = if let Ok(rev) = key.extract(py) {
218 let start = if let Ok(rev) = key.extract(py) {
218 UncheckedRevision(rev)
219 UncheckedRevision(rev)
219 } else {
220 } else {
220 let start = key.getattr(py, "start")?;
221 let start = key.getattr(py, "start")?;
221 UncheckedRevision(start.extract(py)?)
222 UncheckedRevision(start.extract(py)?)
222 };
223 };
223 let start = self.index(py)
224 let start = self.index(py)
224 .borrow()
225 .borrow()
225 .check_revision(start)
226 .check_revision(start)
226 .ok_or_else(|| {
227 .ok_or_else(|| {
227 nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
228 nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
228 })?;
229 })?;
229 self.index(py).borrow_mut().remove(start).unwrap();
230 self.index(py).borrow_mut().remove(start).unwrap();
230 let mut opt = self.get_nodetree(py)?.borrow_mut();
231 let mut opt = self.get_nodetree(py)?.borrow_mut();
231 let nt = opt.as_mut().unwrap();
232 let nt = opt.as_mut().unwrap();
232 nt.invalidate_all();
233 nt.invalidate_all();
233 self.fill_nodemap(py, nt)?;
234 self.fill_nodemap(py, nt)?;
234 Ok(())
235 Ok(())
235 }
236 }
236
237
237 //
238 //
238 // Index methods previously reforwarded to C index (tp_methods)
239 // Index methods previously reforwarded to C index (tp_methods)
239 // Same ordering as in revlog.c
240 // Same ordering as in revlog.c
240 //
241 //
241
242
242 /// return the gca set of the given revs
243 /// return the gca set of the given revs
243 def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
244 def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
244 let rust_res = self.inner_ancestors(py, args)?;
245 let rust_res = self.inner_ancestors(py, args)?;
245 Ok(rust_res)
246 Ok(rust_res)
246 }
247 }
247
248
248 /// return the heads of the common ancestors of the given revs
249 /// return the heads of the common ancestors of the given revs
249 def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
250 def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
250 let rust_res = self.inner_commonancestorsheads(py, args)?;
251 let rust_res = self.inner_commonancestorsheads(py, args)?;
251 Ok(rust_res)
252 Ok(rust_res)
252 }
253 }
253
254
254 /// Clear the index caches and inner py_class data.
255 /// Clear the index caches and inner py_class data.
255 /// It is Python's responsibility to call `update_nodemap_data` again.
256 /// It is Python's responsibility to call `update_nodemap_data` again.
256 def clearcaches(&self) -> PyResult<PyObject> {
257 def clearcaches(&self) -> PyResult<PyObject> {
257 self.nt(py).borrow_mut().take();
258 self.nt(py).borrow_mut().take();
258 self.docket(py).borrow_mut().take();
259 self.docket(py).borrow_mut().take();
259 self.nodemap_mmap(py).borrow_mut().take();
260 self.nodemap_mmap(py).borrow_mut().take();
261 self.head_revs_py_list(py).borrow_mut().take();
260 self.index(py).borrow().clear_caches();
262 self.index(py).borrow().clear_caches();
261 Ok(py.None())
263 Ok(py.None())
262 }
264 }
263
265
264 /// return the raw binary string representing a revision
266 /// return the raw binary string representing a revision
265 def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
267 def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
266 let rindex = self.index(py).borrow();
268 let rindex = self.index(py).borrow();
267 let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
269 let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
268 let rust_bytes = rindex.check_revision(rev).and_then(
270 let rust_bytes = rindex.check_revision(rev).and_then(
269 |r| rindex.entry_binary(r))
271 |r| rindex.entry_binary(r))
270 .ok_or_else(|| rev_not_in_index(py, rev))?;
272 .ok_or_else(|| rev_not_in_index(py, rev))?;
271 let rust_res = PyBytes::new(py, rust_bytes).into_object();
273 let rust_res = PyBytes::new(py, rust_bytes).into_object();
272 Ok(rust_res)
274 Ok(rust_res)
273 }
275 }
274
276
275 /// return a binary packed version of the header
277 /// return a binary packed version of the header
276 def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
278 def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
277 let rindex = self.index(py).borrow();
279 let rindex = self.index(py).borrow();
278 let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
280 let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
279 let rust_res = PyBytes::new(py, &packed).into_object();
281 let rust_res = PyBytes::new(py, &packed).into_object();
280 Ok(rust_res)
282 Ok(rust_res)
281 }
283 }
282
284
283 /// compute phases
285 /// compute phases
284 def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
286 def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
285 let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
287 let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
286 let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
288 let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
287 Ok(rust_res)
289 Ok(rust_res)
288 }
290 }
289
291
290 /// reachableroots
292 /// reachableroots
291 def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
293 def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
292 let rust_res = self.inner_reachableroots2(
294 let rust_res = self.inner_reachableroots2(
293 py,
295 py,
294 UncheckedRevision(args.get_item(py, 0).extract(py)?),
296 UncheckedRevision(args.get_item(py, 0).extract(py)?),
295 args.get_item(py, 1),
297 args.get_item(py, 1),
296 args.get_item(py, 2),
298 args.get_item(py, 2),
297 args.get_item(py, 3).extract(py)?,
299 args.get_item(py, 3).extract(py)?,
298 )?;
300 )?;
299 Ok(rust_res)
301 Ok(rust_res)
300 }
302 }
301
303
302 /// get head revisions
304 /// get head revisions
303 def headrevs(&self) -> PyResult<PyObject> {
305 def headrevs(&self) -> PyResult<PyObject> {
304 let rust_res = self.inner_headrevs(py)?;
306 let rust_res = self.inner_headrevs(py)?;
305 Ok(rust_res)
307 Ok(rust_res)
306 }
308 }
307
309
308 /// get filtered head revisions
310 /// get filtered head revisions
309 def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
311 def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
310 let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
312 let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
311 Ok(rust_res)
313 Ok(rust_res)
312 }
314 }
313
315
314 /// True if the object is a snapshot
316 /// True if the object is a snapshot
315 def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
317 def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
316 let index = self.index(py).borrow();
318 let index = self.index(py).borrow();
317 let result = index
319 let result = index
318 .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
320 .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
319 .map_err(|e| {
321 .map_err(|e| {
320 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
322 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
321 })?;
323 })?;
322 Ok(result)
324 Ok(result)
323 }
325 }
324
326
325 /// Gather snapshot data in a cache dict
327 /// Gather snapshot data in a cache dict
326 def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
328 def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
327 let index = self.index(py).borrow();
329 let index = self.index(py).borrow();
328 let cache: PyDict = args.get_item(py, 0).extract(py)?;
330 let cache: PyDict = args.get_item(py, 0).extract(py)?;
329 // this methods operates by setting new values in the cache,
331 // this methods operates by setting new values in the cache,
330 // hence we will compare results by letting the C implementation
332 // hence we will compare results by letting the C implementation
331 // operate over a deepcopy of the cache, and finally compare both
333 // operate over a deepcopy of the cache, and finally compare both
332 // caches.
334 // caches.
333 let c_cache = PyDict::new(py);
335 let c_cache = PyDict::new(py);
334 for (k, v) in cache.items(py) {
336 for (k, v) in cache.items(py) {
335 c_cache.set_item(py, k, PySet::new(py, v)?)?;
337 c_cache.set_item(py, k, PySet::new(py, v)?)?;
336 }
338 }
337
339
338 let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
340 let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
339 let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
341 let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
340 let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
342 let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
341 index.find_snapshots(
343 index.find_snapshots(
342 start_rev,
344 start_rev,
343 end_rev,
345 end_rev,
344 &mut cache_wrapper,
346 &mut cache_wrapper,
345 ).map_err(|_| revlog_error(py))?;
347 ).map_err(|_| revlog_error(py))?;
346 Ok(py.None())
348 Ok(py.None())
347 }
349 }
348
350
349 /// determine revisions with deltas to reconstruct fulltext
351 /// determine revisions with deltas to reconstruct fulltext
350 def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
352 def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
351 let index = self.index(py).borrow();
353 let index = self.index(py).borrow();
352 let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
354 let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
353 let stop_rev =
355 let stop_rev =
354 args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
356 args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
355 let rev = index.check_revision(rev).ok_or_else(|| {
357 let rev = index.check_revision(rev).ok_or_else(|| {
356 nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
358 nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
357 })?;
359 })?;
358 let stop_rev = if let Some(stop_rev) = stop_rev {
360 let stop_rev = if let Some(stop_rev) = stop_rev {
359 let stop_rev = UncheckedRevision(stop_rev);
361 let stop_rev = UncheckedRevision(stop_rev);
360 Some(index.check_revision(stop_rev).ok_or_else(|| {
362 Some(index.check_revision(stop_rev).ok_or_else(|| {
361 nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
363 nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
362 })?)
364 })?)
363 } else {None};
365 } else {None};
364 let using_general_delta = args.get_item(py, 2)
366 let using_general_delta = args.get_item(py, 2)
365 .extract::<Option<u32>>(py)?
367 .extract::<Option<u32>>(py)?
366 .map(|i| i != 0);
368 .map(|i| i != 0);
367 let (chain, stopped) = index.delta_chain(
369 let (chain, stopped) = index.delta_chain(
368 rev, stop_rev, using_general_delta
370 rev, stop_rev, using_general_delta
369 ).map_err(|e| {
371 ).map_err(|e| {
370 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
372 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
371 })?;
373 })?;
372
374
373 let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
375 let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
374 Ok(
376 Ok(
375 PyTuple::new(
377 PyTuple::new(
376 py,
378 py,
377 &[
379 &[
378 chain.into_py_object(py).into_object(),
380 chain.into_py_object(py).into_object(),
379 stopped.into_py_object(py).into_object()
381 stopped.into_py_object(py).into_object()
380 ]
382 ]
381 ).into_object()
383 ).into_object()
382 )
384 )
383
385
384 }
386 }
385
387
386 /// slice planned chunk read to reach a density threshold
388 /// slice planned chunk read to reach a density threshold
387 def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
389 def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
388 let rust_res = self.inner_slicechunktodensity(
390 let rust_res = self.inner_slicechunktodensity(
389 py,
391 py,
390 args.get_item(py, 0),
392 args.get_item(py, 0),
391 args.get_item(py, 1).extract(py)?,
393 args.get_item(py, 1).extract(py)?,
392 args.get_item(py, 2).extract(py)?
394 args.get_item(py, 2).extract(py)?
393 )?;
395 )?;
394 Ok(rust_res)
396 Ok(rust_res)
395 }
397 }
396
398
397 // index_sequence_methods and index_mapping_methods.
399 // index_sequence_methods and index_mapping_methods.
398 //
400 //
399 // Since we call back through the high level Python API,
401 // Since we call back through the high level Python API,
400 // there's no point making a distinction between index_get
402 // there's no point making a distinction between index_get
401 // and index_getitem.
403 // and index_getitem.
402 // gracinet 2023: this above is no longer true for the pure Rust impl
404 // gracinet 2023: this above is no longer true for the pure Rust impl
403
405
404 def __len__(&self) -> PyResult<usize> {
406 def __len__(&self) -> PyResult<usize> {
405 self.len(py)
407 self.len(py)
406 }
408 }
407
409
408 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
410 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
409 let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
411 let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
410 Ok(rust_res)
412 Ok(rust_res)
411 }
413 }
412
414
413 def __contains__(&self, item: PyObject) -> PyResult<bool> {
415 def __contains__(&self, item: PyObject) -> PyResult<bool> {
414 // ObjectProtocol does not seem to provide contains(), so
416 // ObjectProtocol does not seem to provide contains(), so
415 // this is an equivalent implementation of the index_contains()
417 // this is an equivalent implementation of the index_contains()
416 // defined in revlog.c
418 // defined in revlog.c
417 match item.extract::<i32>(py) {
419 match item.extract::<i32>(py) {
418 Ok(rev) => {
420 Ok(rev) => {
419 Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
421 Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
420 }
422 }
421 Err(_) => {
423 Err(_) => {
422 let item_bytes: PyBytes = item.extract(py)?;
424 let item_bytes: PyBytes = item.extract(py)?;
423 let rust_res = self.has_node(py, item_bytes)?;
425 let rust_res = self.has_node(py, item_bytes)?;
424 Ok(rust_res)
426 Ok(rust_res)
425 }
427 }
426 }
428 }
427 }
429 }
428
430
429 def nodemap_data_all(&self) -> PyResult<PyBytes> {
431 def nodemap_data_all(&self) -> PyResult<PyBytes> {
430 self.inner_nodemap_data_all(py)
432 self.inner_nodemap_data_all(py)
431 }
433 }
432
434
433 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
435 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
434 self.inner_nodemap_data_incremental(py)
436 self.inner_nodemap_data_incremental(py)
435 }
437 }
436 def update_nodemap_data(
438 def update_nodemap_data(
437 &self,
439 &self,
438 docket: PyObject,
440 docket: PyObject,
439 nm_data: PyObject
441 nm_data: PyObject
440 ) -> PyResult<PyObject> {
442 ) -> PyResult<PyObject> {
441 self.inner_update_nodemap_data(py, docket, nm_data)
443 self.inner_update_nodemap_data(py, docket, nm_data)
442 }
444 }
443
445
444 @property
446 @property
445 def entry_size(&self) -> PyResult<PyInt> {
447 def entry_size(&self) -> PyResult<PyInt> {
446 let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
448 let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
447 Ok(rust_res)
449 Ok(rust_res)
448 }
450 }
449
451
450 @property
452 @property
451 def rust_ext_compat(&self) -> PyResult<PyInt> {
453 def rust_ext_compat(&self) -> PyResult<PyInt> {
452 // will be entirely removed when the Rust index yet useful to
454 // will be entirely removed when the Rust index yet useful to
453 // implement in Rust to detangle things when removing `self.cindex`
455 // implement in Rust to detangle things when removing `self.cindex`
454 let rust_res: PyInt = 1.to_py_object(py);
456 let rust_res: PyInt = 1.to_py_object(py);
455 Ok(rust_res)
457 Ok(rust_res)
456 }
458 }
457
459
458 @property
460 @property
459 def is_rust(&self) -> PyResult<PyBool> {
461 def is_rust(&self) -> PyResult<PyBool> {
460 Ok(false.to_py_object(py))
462 Ok(false.to_py_object(py))
461 }
463 }
462
464
463 });
465 });
464
466
465 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
467 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
466 /// buffer along with the Rust slice into said buffer. We need to keep the
468 /// buffer along with the Rust slice into said buffer. We need to keep the
467 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
469 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
468 /// is freed from Python's side.
470 /// is freed from Python's side.
469 ///
471 ///
470 /// # Safety
472 /// # Safety
471 ///
473 ///
472 /// The caller must make sure that the buffer is kept around for at least as
474 /// The caller must make sure that the buffer is kept around for at least as
473 /// long as the slice.
475 /// long as the slice.
474 #[deny(unsafe_op_in_unsafe_fn)]
476 #[deny(unsafe_op_in_unsafe_fn)]
475 unsafe fn mmap_keeparound(
477 unsafe fn mmap_keeparound(
476 py: Python,
478 py: Python,
477 data: PyObject,
479 data: PyObject,
478 ) -> PyResult<(
480 ) -> PyResult<(
479 PyBuffer,
481 PyBuffer,
480 Box<dyn std::ops::Deref<Target = [u8]> + Send + Sync + 'static>,
482 Box<dyn std::ops::Deref<Target = [u8]> + Send + Sync + 'static>,
481 )> {
483 )> {
482 let buf = PyBuffer::get(py, &data)?;
484 let buf = PyBuffer::get(py, &data)?;
483 let len = buf.item_count();
485 let len = buf.item_count();
484
486
485 // Build a slice from the mmap'ed buffer data
487 // Build a slice from the mmap'ed buffer data
486 let cbuf = buf.buf_ptr();
488 let cbuf = buf.buf_ptr();
487 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
489 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
488 && buf.is_c_contiguous()
490 && buf.is_c_contiguous()
489 && u8::is_compatible_format(buf.format())
491 && u8::is_compatible_format(buf.format())
490 {
492 {
491 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
493 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
492 } else {
494 } else {
493 return Err(PyErr::new::<ValueError, _>(
495 return Err(PyErr::new::<ValueError, _>(
494 py,
496 py,
495 "Nodemap data buffer has an invalid memory representation"
497 "Nodemap data buffer has an invalid memory representation"
496 .to_string(),
498 .to_string(),
497 ));
499 ));
498 };
500 };
499
501
500 Ok((buf, Box::new(bytes)))
502 Ok((buf, Box::new(bytes)))
501 }
503 }
502
504
503 fn py_tuple_to_revision_data_params(
505 fn py_tuple_to_revision_data_params(
504 py: Python,
506 py: Python,
505 tuple: PyTuple,
507 tuple: PyTuple,
506 ) -> PyResult<RevisionDataParams> {
508 ) -> PyResult<RevisionDataParams> {
507 if tuple.len(py) < 8 {
509 if tuple.len(py) < 8 {
508 // this is better than the panic promised by tup.get_item()
510 // this is better than the panic promised by tup.get_item()
509 return Err(PyErr::new::<IndexError, _>(
511 return Err(PyErr::new::<IndexError, _>(
510 py,
512 py,
511 "tuple index out of range",
513 "tuple index out of range",
512 ));
514 ));
513 }
515 }
514 let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?;
516 let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?;
515 let node_id = tuple
517 let node_id = tuple
516 .get_item(py, 7)
518 .get_item(py, 7)
517 .extract::<PyBytes>(py)?
519 .extract::<PyBytes>(py)?
518 .data(py)
520 .data(py)
519 .try_into()
521 .try_into()
520 .unwrap();
522 .unwrap();
521 let flags = (offset_or_flags & 0xFFFF) as u16;
523 let flags = (offset_or_flags & 0xFFFF) as u16;
522 let data_offset = offset_or_flags >> 16;
524 let data_offset = offset_or_flags >> 16;
523 Ok(RevisionDataParams {
525 Ok(RevisionDataParams {
524 flags,
526 flags,
525 data_offset,
527 data_offset,
526 data_compressed_length: tuple.get_item(py, 1).extract(py)?,
528 data_compressed_length: tuple.get_item(py, 1).extract(py)?,
527 data_uncompressed_length: tuple.get_item(py, 2).extract(py)?,
529 data_uncompressed_length: tuple.get_item(py, 2).extract(py)?,
528 data_delta_base: tuple.get_item(py, 3).extract(py)?,
530 data_delta_base: tuple.get_item(py, 3).extract(py)?,
529 link_rev: tuple.get_item(py, 4).extract(py)?,
531 link_rev: tuple.get_item(py, 4).extract(py)?,
530 parent_rev_1: tuple.get_item(py, 5).extract(py)?,
532 parent_rev_1: tuple.get_item(py, 5).extract(py)?,
531 parent_rev_2: tuple.get_item(py, 6).extract(py)?,
533 parent_rev_2: tuple.get_item(py, 6).extract(py)?,
532 node_id,
534 node_id,
533 ..Default::default()
535 ..Default::default()
534 })
536 })
535 }
537 }
536 fn revision_data_params_to_py_tuple(
538 fn revision_data_params_to_py_tuple(
537 py: Python,
539 py: Python,
538 params: RevisionDataParams,
540 params: RevisionDataParams,
539 ) -> PyTuple {
541 ) -> PyTuple {
540 PyTuple::new(
542 PyTuple::new(
541 py,
543 py,
542 &[
544 &[
543 params.data_offset.into_py_object(py).into_object(),
545 params.data_offset.into_py_object(py).into_object(),
544 params
546 params
545 .data_compressed_length
547 .data_compressed_length
546 .into_py_object(py)
548 .into_py_object(py)
547 .into_object(),
549 .into_object(),
548 params
550 params
549 .data_uncompressed_length
551 .data_uncompressed_length
550 .into_py_object(py)
552 .into_py_object(py)
551 .into_object(),
553 .into_object(),
552 params.data_delta_base.into_py_object(py).into_object(),
554 params.data_delta_base.into_py_object(py).into_object(),
553 params.link_rev.into_py_object(py).into_object(),
555 params.link_rev.into_py_object(py).into_object(),
554 params.parent_rev_1.into_py_object(py).into_object(),
556 params.parent_rev_1.into_py_object(py).into_object(),
555 params.parent_rev_2.into_py_object(py).into_object(),
557 params.parent_rev_2.into_py_object(py).into_object(),
556 PyBytes::new(py, &params.node_id)
558 PyBytes::new(py, &params.node_id)
557 .into_py_object(py)
559 .into_py_object(py)
558 .into_object(),
560 .into_object(),
559 params._sidedata_offset.into_py_object(py).into_object(),
561 params._sidedata_offset.into_py_object(py).into_object(),
560 params
562 params
561 ._sidedata_compressed_length
563 ._sidedata_compressed_length
562 .into_py_object(py)
564 .into_py_object(py)
563 .into_object(),
565 .into_object(),
564 params
566 params
565 .data_compression_mode
567 .data_compression_mode
566 .into_py_object(py)
568 .into_py_object(py)
567 .into_object(),
569 .into_object(),
568 params
570 params
569 ._sidedata_compression_mode
571 ._sidedata_compression_mode
570 .into_py_object(py)
572 .into_py_object(py)
571 .into_object(),
573 .into_object(),
572 params._rank.into_py_object(py).into_object(),
574 params._rank.into_py_object(py).into_object(),
573 ],
575 ],
574 )
576 )
575 }
577 }
576
578
577 struct PySnapshotsCache<'p> {
579 struct PySnapshotsCache<'p> {
578 py: Python<'p>,
580 py: Python<'p>,
579 dict: PyDict,
581 dict: PyDict,
580 }
582 }
581
583
582 impl<'p> SnapshotsCache for PySnapshotsCache<'p> {
584 impl<'p> SnapshotsCache for PySnapshotsCache<'p> {
583 fn insert_for(
585 fn insert_for(
584 &mut self,
586 &mut self,
585 rev: BaseRevision,
587 rev: BaseRevision,
586 value: BaseRevision,
588 value: BaseRevision,
587 ) -> Result<(), RevlogError> {
589 ) -> Result<(), RevlogError> {
588 let pyvalue = value.into_py_object(self.py).into_object();
590 let pyvalue = value.into_py_object(self.py).into_object();
589 match self.dict.get_item(self.py, rev) {
591 match self.dict.get_item(self.py, rev) {
590 Some(obj) => obj
592 Some(obj) => obj
591 .extract::<PySet>(self.py)
593 .extract::<PySet>(self.py)
592 .and_then(|set| set.add(self.py, pyvalue)),
594 .and_then(|set| set.add(self.py, pyvalue)),
593 None => PySet::new(self.py, vec![pyvalue])
595 None => PySet::new(self.py, vec![pyvalue])
594 .and_then(|set| self.dict.set_item(self.py, rev, set)),
596 .and_then(|set| self.dict.set_item(self.py, rev, set)),
595 }
597 }
596 .map_err(|_| {
598 .map_err(|_| {
597 RevlogError::Other(HgError::unsupported(
599 RevlogError::Other(HgError::unsupported(
598 "Error in Python caches handling",
600 "Error in Python caches handling",
599 ))
601 ))
600 })
602 })
601 }
603 }
602 }
604 }
603
605
604 impl Index {
606 impl Index {
605 fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
607 fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
606 // Safety: we keep the buffer around inside the class as `index_mmap`
608 // Safety: we keep the buffer around inside the class as `index_mmap`
607 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
609 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
608
610
609 Self::create_instance(
611 Self::create_instance(
610 py,
612 py,
611 hg::index::Index::new(
613 hg::index::Index::new(
612 bytes,
614 bytes,
613 IndexHeader::parse(&header.to_be_bytes())
615 IndexHeader::parse(&header.to_be_bytes())
614 .expect("default header is broken")
616 .expect("default header is broken")
615 .unwrap(),
617 .unwrap(),
616 )
618 )
617 .map_err(|e| {
619 .map_err(|e| {
618 revlog_error_with_msg(py, e.to_string().as_bytes())
620 revlog_error_with_msg(py, e.to_string().as_bytes())
619 })?,
621 })?,
620 RefCell::new(None),
622 RefCell::new(None),
621 RefCell::new(None),
623 RefCell::new(None),
622 RefCell::new(None),
624 RefCell::new(None),
623 RefCell::new(Some(buf)),
625 RefCell::new(Some(buf)),
626 RefCell::new(None),
624 )
627 )
625 }
628 }
626
629
627 fn len(&self, py: Python) -> PyResult<usize> {
630 fn len(&self, py: Python) -> PyResult<usize> {
628 let rust_index_len = self.index(py).borrow().len();
631 let rust_index_len = self.index(py).borrow().len();
629 Ok(rust_index_len)
632 Ok(rust_index_len)
630 }
633 }
631
634
632 /// This is scaffolding at this point, but it could also become
635 /// This is scaffolding at this point, but it could also become
633 /// a way to start a persistent nodemap or perform a
636 /// a way to start a persistent nodemap or perform a
634 /// vacuum / repack operation
637 /// vacuum / repack operation
635 fn fill_nodemap(
638 fn fill_nodemap(
636 &self,
639 &self,
637 py: Python,
640 py: Python,
638 nt: &mut CoreNodeTree,
641 nt: &mut CoreNodeTree,
639 ) -> PyResult<PyObject> {
642 ) -> PyResult<PyObject> {
640 let index = self.index(py).borrow();
643 let index = self.index(py).borrow();
641 for r in 0..self.len(py)? {
644 for r in 0..self.len(py)? {
642 let rev = Revision(r as BaseRevision);
645 let rev = Revision(r as BaseRevision);
643 // in this case node() won't ever return None
646 // in this case node() won't ever return None
644 nt.insert(&*index, index.node(rev).unwrap(), rev)
647 nt.insert(&*index, index.node(rev).unwrap(), rev)
645 .map_err(|e| nodemap_error(py, e))?
648 .map_err(|e| nodemap_error(py, e))?
646 }
649 }
647 Ok(py.None())
650 Ok(py.None())
648 }
651 }
649
652
650 fn get_nodetree<'a>(
653 fn get_nodetree<'a>(
651 &'a self,
654 &'a self,
652 py: Python<'a>,
655 py: Python<'a>,
653 ) -> PyResult<&'a RefCell<Option<CoreNodeTree>>> {
656 ) -> PyResult<&'a RefCell<Option<CoreNodeTree>>> {
654 if self.nt(py).borrow().is_none() {
657 if self.nt(py).borrow().is_none() {
655 let readonly = Box::<Vec<_>>::default();
658 let readonly = Box::<Vec<_>>::default();
656 let mut nt = CoreNodeTree::load_bytes(readonly, 0);
659 let mut nt = CoreNodeTree::load_bytes(readonly, 0);
657 self.fill_nodemap(py, &mut nt)?;
660 self.fill_nodemap(py, &mut nt)?;
658 self.nt(py).borrow_mut().replace(nt);
661 self.nt(py).borrow_mut().replace(nt);
659 }
662 }
660 Ok(self.nt(py))
663 Ok(self.nt(py))
661 }
664 }
662
665
663 /// Returns the full nodemap bytes to be written as-is to disk
666 /// Returns the full nodemap bytes to be written as-is to disk
664 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
667 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
665 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
668 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
666 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
669 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
667
670
668 // If there's anything readonly, we need to build the data again from
671 // If there's anything readonly, we need to build the data again from
669 // scratch
672 // scratch
670 let bytes = if readonly.len() > 0 {
673 let bytes = if readonly.len() > 0 {
671 let mut nt = CoreNodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
674 let mut nt = CoreNodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
672 self.fill_nodemap(py, &mut nt)?;
675 self.fill_nodemap(py, &mut nt)?;
673
676
674 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
677 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
675 assert_eq!(readonly.len(), 0);
678 assert_eq!(readonly.len(), 0);
676
679
677 bytes
680 bytes
678 } else {
681 } else {
679 bytes
682 bytes
680 };
683 };
681
684
682 let bytes = PyBytes::new(py, &bytes);
685 let bytes = PyBytes::new(py, &bytes);
683 Ok(bytes)
686 Ok(bytes)
684 }
687 }
685
688
686 /// Returns the last saved docket along with the size of any changed data
689 /// Returns the last saved docket along with the size of any changed data
687 /// (in number of blocks), and said data as bytes.
690 /// (in number of blocks), and said data as bytes.
688 fn inner_nodemap_data_incremental(
691 fn inner_nodemap_data_incremental(
689 &self,
692 &self,
690 py: Python,
693 py: Python,
691 ) -> PyResult<PyObject> {
694 ) -> PyResult<PyObject> {
692 let docket = self.docket(py).borrow();
695 let docket = self.docket(py).borrow();
693 let docket = match docket.as_ref() {
696 let docket = match docket.as_ref() {
694 Some(d) => d,
697 Some(d) => d,
695 None => return Ok(py.None()),
698 None => return Ok(py.None()),
696 };
699 };
697
700
698 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
701 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
699 let masked_blocks = node_tree.masked_readonly_blocks();
702 let masked_blocks = node_tree.masked_readonly_blocks();
700 let (_, data) = node_tree.into_readonly_and_added_bytes();
703 let (_, data) = node_tree.into_readonly_and_added_bytes();
701 let changed = masked_blocks * std::mem::size_of::<Block>();
704 let changed = masked_blocks * std::mem::size_of::<Block>();
702
705
703 Ok((docket, changed, PyBytes::new(py, &data))
706 Ok((docket, changed, PyBytes::new(py, &data))
704 .to_py_object(py)
707 .to_py_object(py)
705 .into_object())
708 .into_object())
706 }
709 }
707
710
708 /// Update the nodemap from the new (mmaped) data.
711 /// Update the nodemap from the new (mmaped) data.
709 /// The docket is kept as a reference for later incremental calls.
712 /// The docket is kept as a reference for later incremental calls.
710 fn inner_update_nodemap_data(
713 fn inner_update_nodemap_data(
711 &self,
714 &self,
712 py: Python,
715 py: Python,
713 docket: PyObject,
716 docket: PyObject,
714 nm_data: PyObject,
717 nm_data: PyObject,
715 ) -> PyResult<PyObject> {
718 ) -> PyResult<PyObject> {
716 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
719 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
717 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
720 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
718 let len = buf.item_count();
721 let len = buf.item_count();
719 self.nodemap_mmap(py).borrow_mut().replace(buf);
722 self.nodemap_mmap(py).borrow_mut().replace(buf);
720
723
721 let mut nt = CoreNodeTree::load_bytes(bytes, len);
724 let mut nt = CoreNodeTree::load_bytes(bytes, len);
722
725
723 let data_tip = docket
726 let data_tip = docket
724 .getattr(py, "tip_rev")?
727 .getattr(py, "tip_rev")?
725 .extract::<BaseRevision>(py)?
728 .extract::<BaseRevision>(py)?
726 .into();
729 .into();
727 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
730 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
728 let idx = self.index(py).borrow();
731 let idx = self.index(py).borrow();
729 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
732 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
730 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
733 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
731 })?;
734 })?;
732 let current_tip = idx.len();
735 let current_tip = idx.len();
733
736
734 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
737 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
735 let rev = Revision(r);
738 let rev = Revision(r);
736 // in this case node() won't ever return None
739 // in this case node() won't ever return None
737 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
740 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
738 .map_err(|e| nodemap_error(py, e))?
741 .map_err(|e| nodemap_error(py, e))?
739 }
742 }
740
743
741 *self.nt(py).borrow_mut() = Some(nt);
744 *self.nt(py).borrow_mut() = Some(nt);
742
745
743 Ok(py.None())
746 Ok(py.None())
744 }
747 }
745
748
746 fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
749 fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
747 let idx = self.index(py).borrow();
750 let idx = self.index(py).borrow();
748 Ok(match key.extract::<BaseRevision>(py) {
751 Ok(match key.extract::<BaseRevision>(py) {
749 Ok(key_as_int) => {
752 Ok(key_as_int) => {
750 let entry_params = if key_as_int == NULL_REVISION.0 {
753 let entry_params = if key_as_int == NULL_REVISION.0 {
751 RevisionDataParams::default()
754 RevisionDataParams::default()
752 } else {
755 } else {
753 let rev = UncheckedRevision(key_as_int);
756 let rev = UncheckedRevision(key_as_int);
754 match idx.entry_as_params(rev) {
757 match idx.entry_as_params(rev) {
755 Some(e) => e,
758 Some(e) => e,
756 None => {
759 None => {
757 return Err(PyErr::new::<IndexError, _>(
760 return Err(PyErr::new::<IndexError, _>(
758 py,
761 py,
759 "revlog index out of range",
762 "revlog index out of range",
760 ));
763 ));
761 }
764 }
762 }
765 }
763 };
766 };
764 revision_data_params_to_py_tuple(py, entry_params)
767 revision_data_params_to_py_tuple(py, entry_params)
765 .into_object()
768 .into_object()
766 }
769 }
767 _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
770 _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
768 || py.None(),
771 || py.None(),
769 |py_rev| py_rev.into_py_object(py).into_object(),
772 |py_rev| py_rev.into_py_object(py).into_object(),
770 ),
773 ),
771 })
774 })
772 }
775 }
773
776
774 fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> {
777 fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> {
775 let index = &*self.index(py).borrow();
778 let index = &*self.index(py).borrow();
776 let as_vec: Vec<PyObject> = index
779 if let Some(new_heads) =
777 .head_revs()
780 index.head_revs_shortcut().map_err(|e| graph_error(py, e))?
778 .map_err(|e| graph_error(py, e))?
781 {
779 .iter()
782 self.cache_new_heads_py_list(new_heads, py);
780 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
783 }
781 .collect();
784
782 Ok(PyList::new(py, &as_vec).into_object())
785 Ok(self
786 .head_revs_py_list(py)
787 .borrow()
788 .as_ref()
789 .expect("head revs should be cached")
790 .clone_ref(py)
791 .into_object())
783 }
792 }
784
793
785 fn inner_headrevsfiltered(
794 fn inner_headrevsfiltered(
786 &self,
795 &self,
787 py: Python,
796 py: Python,
788 filtered_revs: &PyObject,
797 filtered_revs: &PyObject,
789 ) -> PyResult<PyObject> {
798 ) -> PyResult<PyObject> {
790 let index = &mut *self.index(py).borrow_mut();
799 let index = &mut *self.index(py).borrow_mut();
791 let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?;
800 let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?;
792
801
793 let as_vec: Vec<PyObject> = index
802 if let Some(new_heads) = index
794 .head_revs_filtered(&filtered_revs)
803 .head_revs_filtered(&filtered_revs, true)
795 .map_err(|e| graph_error(py, e))?
804 .map_err(|e| graph_error(py, e))?
805 {
806 self.cache_new_heads_py_list(new_heads, py);
807 }
808
809 Ok(self
810 .head_revs_py_list(py)
811 .borrow()
812 .as_ref()
813 .expect("head revs should be cached")
814 .clone_ref(py)
815 .into_object())
816 }
817
818 fn cache_new_heads_py_list(
819 &self,
820 new_heads: Vec<Revision>,
821 py: Python<'_>,
822 ) -> PyList {
823 let as_vec: Vec<PyObject> = new_heads
796 .iter()
824 .iter()
797 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
825 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
798 .collect();
826 .collect();
799 Ok(PyList::new(py, &as_vec).into_object())
827 let new_heads_py_list = PyList::new(py, &as_vec);
828 *self.head_revs_py_list(py).borrow_mut() =
829 Some(new_heads_py_list.clone_ref(py));
830 new_heads_py_list
800 }
831 }
801
832
802 fn inner_ancestors(
833 fn inner_ancestors(
803 &self,
834 &self,
804 py: Python,
835 py: Python,
805 py_revs: &PyTuple,
836 py_revs: &PyTuple,
806 ) -> PyResult<PyObject> {
837 ) -> PyResult<PyObject> {
807 let index = &*self.index(py).borrow();
838 let index = &*self.index(py).borrow();
808 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
839 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
809 let as_vec: Vec<_> = index
840 let as_vec: Vec<_> = index
810 .ancestors(&revs)
841 .ancestors(&revs)
811 .map_err(|e| graph_error(py, e))?
842 .map_err(|e| graph_error(py, e))?
812 .iter()
843 .iter()
813 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
844 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
814 .collect();
845 .collect();
815 Ok(PyList::new(py, &as_vec).into_object())
846 Ok(PyList::new(py, &as_vec).into_object())
816 }
847 }
817
848
818 fn inner_commonancestorsheads(
849 fn inner_commonancestorsheads(
819 &self,
850 &self,
820 py: Python,
851 py: Python,
821 py_revs: &PyTuple,
852 py_revs: &PyTuple,
822 ) -> PyResult<PyObject> {
853 ) -> PyResult<PyObject> {
823 let index = &*self.index(py).borrow();
854 let index = &*self.index(py).borrow();
824 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
855 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
825 let as_vec: Vec<_> = index
856 let as_vec: Vec<_> = index
826 .common_ancestor_heads(&revs)
857 .common_ancestor_heads(&revs)
827 .map_err(|e| graph_error(py, e))?
858 .map_err(|e| graph_error(py, e))?
828 .iter()
859 .iter()
829 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
860 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
830 .collect();
861 .collect();
831 Ok(PyList::new(py, &as_vec).into_object())
862 Ok(PyList::new(py, &as_vec).into_object())
832 }
863 }
833
864
834 fn inner_computephasesmapsets(
865 fn inner_computephasesmapsets(
835 &self,
866 &self,
836 py: Python,
867 py: Python,
837 py_roots: PyDict,
868 py_roots: PyDict,
838 ) -> PyResult<PyObject> {
869 ) -> PyResult<PyObject> {
839 let index = &*self.index(py).borrow();
870 let index = &*self.index(py).borrow();
840 let opt = self.get_nodetree(py)?.borrow();
871 let opt = self.get_nodetree(py)?.borrow();
841 let nt = opt.as_ref().unwrap();
872 let nt = opt.as_ref().unwrap();
842 let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
873 let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
843 .items_list(py)
874 .items_list(py)
844 .iter(py)
875 .iter(py)
845 .map(|r| {
876 .map(|r| {
846 let phase = r.get_item(py, 0)?;
877 let phase = r.get_item(py, 0)?;
847 let nodes = r.get_item(py, 1)?;
878 let nodes = r.get_item(py, 1)?;
848 // Transform the nodes from Python to revs here since we
879 // Transform the nodes from Python to revs here since we
849 // have access to the nodemap
880 // have access to the nodemap
850 let revs: Result<_, _> = nodes
881 let revs: Result<_, _> = nodes
851 .iter(py)?
882 .iter(py)?
852 .map(|node| match node?.extract::<PyBytes>(py) {
883 .map(|node| match node?.extract::<PyBytes>(py) {
853 Ok(py_bytes) => {
884 Ok(py_bytes) => {
854 let node = node_from_py_bytes(py, &py_bytes)?;
885 let node = node_from_py_bytes(py, &py_bytes)?;
855 nt.find_bin(index, node.into())
886 nt.find_bin(index, node.into())
856 .map_err(|e| nodemap_error(py, e))?
887 .map_err(|e| nodemap_error(py, e))?
857 .ok_or_else(|| revlog_error(py))
888 .ok_or_else(|| revlog_error(py))
858 }
889 }
859 Err(e) => Err(e),
890 Err(e) => Err(e),
860 })
891 })
861 .collect();
892 .collect();
862 let phase = Phase::try_from(phase.extract::<usize>(py)?)
893 let phase = Phase::try_from(phase.extract::<usize>(py)?)
863 .map_err(|_| revlog_error(py));
894 .map_err(|_| revlog_error(py));
864 Ok((phase?, revs?))
895 Ok((phase?, revs?))
865 })
896 })
866 .collect();
897 .collect();
867 let (len, phase_maps) = index
898 let (len, phase_maps) = index
868 .compute_phases_map_sets(roots?)
899 .compute_phases_map_sets(roots?)
869 .map_err(|e| graph_error(py, e))?;
900 .map_err(|e| graph_error(py, e))?;
870
901
871 // Ugly hack, but temporary
902 // Ugly hack, but temporary
872 const IDX_TO_PHASE_NUM: [usize; 4] = [1, 2, 32, 96];
903 const IDX_TO_PHASE_NUM: [usize; 4] = [1, 2, 32, 96];
873 let py_phase_maps = PyDict::new(py);
904 let py_phase_maps = PyDict::new(py);
874 for (idx, roots) in phase_maps.iter().enumerate() {
905 for (idx, roots) in phase_maps.iter().enumerate() {
875 let phase_num = IDX_TO_PHASE_NUM[idx].into_py_object(py);
906 let phase_num = IDX_TO_PHASE_NUM[idx].into_py_object(py);
876 // OPTIM too bad we have to collect here. At least, we could
907 // OPTIM too bad we have to collect here. At least, we could
877 // reuse the same Vec and allocate it with capacity at
908 // reuse the same Vec and allocate it with capacity at
878 // max(len(phase_maps)
909 // max(len(phase_maps)
879 let roots_vec: Vec<PyInt> = roots
910 let roots_vec: Vec<PyInt> = roots
880 .iter()
911 .iter()
881 .map(|r| PyRevision::from(*r).into_py_object(py))
912 .map(|r| PyRevision::from(*r).into_py_object(py))
882 .collect();
913 .collect();
883 py_phase_maps.set_item(
914 py_phase_maps.set_item(
884 py,
915 py,
885 phase_num,
916 phase_num,
886 PySet::new(py, roots_vec)?,
917 PySet::new(py, roots_vec)?,
887 )?;
918 )?;
888 }
919 }
889 Ok(PyTuple::new(
920 Ok(PyTuple::new(
890 py,
921 py,
891 &[
922 &[
892 len.into_py_object(py).into_object(),
923 len.into_py_object(py).into_object(),
893 py_phase_maps.into_object(),
924 py_phase_maps.into_object(),
894 ],
925 ],
895 )
926 )
896 .into_object())
927 .into_object())
897 }
928 }
898
929
899 fn inner_slicechunktodensity(
930 fn inner_slicechunktodensity(
900 &self,
931 &self,
901 py: Python,
932 py: Python,
902 revs: PyObject,
933 revs: PyObject,
903 target_density: f64,
934 target_density: f64,
904 min_gap_size: usize,
935 min_gap_size: usize,
905 ) -> PyResult<PyObject> {
936 ) -> PyResult<PyObject> {
906 let index = &*self.index(py).borrow();
937 let index = &*self.index(py).borrow();
907 let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
938 let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
908 let as_nested_vec =
939 let as_nested_vec =
909 index.slice_chunk_to_density(&revs, target_density, min_gap_size);
940 index.slice_chunk_to_density(&revs, target_density, min_gap_size);
910 let mut res = Vec::with_capacity(as_nested_vec.len());
941 let mut res = Vec::with_capacity(as_nested_vec.len());
911 let mut py_chunk = Vec::new();
942 let mut py_chunk = Vec::new();
912 for chunk in as_nested_vec {
943 for chunk in as_nested_vec {
913 py_chunk.clear();
944 py_chunk.clear();
914 py_chunk.reserve_exact(chunk.len());
945 py_chunk.reserve_exact(chunk.len());
915 for rev in chunk {
946 for rev in chunk {
916 py_chunk.push(
947 py_chunk.push(
917 PyRevision::from(rev).into_py_object(py).into_object(),
948 PyRevision::from(rev).into_py_object(py).into_object(),
918 );
949 );
919 }
950 }
920 res.push(PyList::new(py, &py_chunk).into_object());
951 res.push(PyList::new(py, &py_chunk).into_object());
921 }
952 }
922 // This is just to do the same as C, not sure why it does this
953 // This is just to do the same as C, not sure why it does this
923 if res.len() == 1 {
954 if res.len() == 1 {
924 Ok(PyTuple::new(py, &res).into_object())
955 Ok(PyTuple::new(py, &res).into_object())
925 } else {
956 } else {
926 Ok(PyList::new(py, &res).into_object())
957 Ok(PyList::new(py, &res).into_object())
927 }
958 }
928 }
959 }
929
960
930 fn inner_reachableroots2(
961 fn inner_reachableroots2(
931 &self,
962 &self,
932 py: Python,
963 py: Python,
933 min_root: UncheckedRevision,
964 min_root: UncheckedRevision,
934 heads: PyObject,
965 heads: PyObject,
935 roots: PyObject,
966 roots: PyObject,
936 include_path: bool,
967 include_path: bool,
937 ) -> PyResult<PyObject> {
968 ) -> PyResult<PyObject> {
938 let index = &*self.index(py).borrow();
969 let index = &*self.index(py).borrow();
939 let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
970 let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
940 PyErr::new::<IndexError, _>(py, "head out of range")
971 PyErr::new::<IndexError, _>(py, "head out of range")
941 })?;
972 })?;
942 let roots: Result<_, _> = roots
973 let roots: Result<_, _> = roots
943 .iter(py)?
974 .iter(py)?
944 .map(|r| {
975 .map(|r| {
945 r.and_then(|o| match o.extract::<PyRevision>(py) {
976 r.and_then(|o| match o.extract::<PyRevision>(py) {
946 Ok(r) => Ok(UncheckedRevision(r.0)),
977 Ok(r) => Ok(UncheckedRevision(r.0)),
947 Err(e) => Err(e),
978 Err(e) => Err(e),
948 })
979 })
949 })
980 })
950 .collect();
981 .collect();
951 let as_set = index
982 let as_set = index
952 .reachable_roots(min_root, heads, roots?, include_path)
983 .reachable_roots(min_root, heads, roots?, include_path)
953 .map_err(|e| graph_error(py, e))?;
984 .map_err(|e| graph_error(py, e))?;
954 let as_vec: Vec<PyObject> = as_set
985 let as_vec: Vec<PyObject> = as_set
955 .iter()
986 .iter()
956 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
987 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
957 .collect();
988 .collect();
958 Ok(PyList::new(py, &as_vec).into_object())
989 Ok(PyList::new(py, &as_vec).into_object())
959 }
990 }
960 }
991 }
961
992
962 py_class!(pub class NodeTree |py| {
993 py_class!(pub class NodeTree |py| {
963 data nt: RefCell<CoreNodeTree>;
994 data nt: RefCell<CoreNodeTree>;
964 data index: RefCell<UnsafePyLeaked<PySharedIndex>>;
995 data index: RefCell<UnsafePyLeaked<PySharedIndex>>;
965
996
966 def __new__(_cls, index: PyObject) -> PyResult<NodeTree> {
997 def __new__(_cls, index: PyObject) -> PyResult<NodeTree> {
967 let index = py_rust_index_to_graph(py, index)?;
998 let index = py_rust_index_to_graph(py, index)?;
968 let nt = CoreNodeTree::default(); // in-RAM, fully mutable
999 let nt = CoreNodeTree::default(); // in-RAM, fully mutable
969 Self::create_instance(py, RefCell::new(nt), RefCell::new(index))
1000 Self::create_instance(py, RefCell::new(nt), RefCell::new(index))
970 }
1001 }
971
1002
972 /// Tell whether the NodeTree is still valid
1003 /// Tell whether the NodeTree is still valid
973 ///
1004 ///
974 /// In case of mutation of the index, the given results are not
1005 /// In case of mutation of the index, the given results are not
975 /// guaranteed to be correct, and in fact, the methods borrowing
1006 /// guaranteed to be correct, and in fact, the methods borrowing
976 /// the inner index would fail because of `PySharedRef` poisoning
1007 /// the inner index would fail because of `PySharedRef` poisoning
977 /// (generation-based guard), same as iterating on a `dict` that has
1008 /// (generation-based guard), same as iterating on a `dict` that has
978 /// been meanwhile mutated.
1009 /// been meanwhile mutated.
979 def is_invalidated(&self) -> PyResult<bool> {
1010 def is_invalidated(&self) -> PyResult<bool> {
980 let leaked = self.index(py).borrow();
1011 let leaked = self.index(py).borrow();
981 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1012 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
982 let result = unsafe { leaked.try_borrow(py) };
1013 let result = unsafe { leaked.try_borrow(py) };
983 // two cases for result to be an error:
1014 // two cases for result to be an error:
984 // - the index has previously been mutably borrowed
1015 // - the index has previously been mutably borrowed
985 // - there is currently a mutable borrow
1016 // - there is currently a mutable borrow
986 // in both cases this amounts for previous results related to
1017 // in both cases this amounts for previous results related to
987 // the index to still be valid.
1018 // the index to still be valid.
988 Ok(result.is_err())
1019 Ok(result.is_err())
989 }
1020 }
990
1021
991 def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
1022 def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
992 let leaked = self.index(py).borrow();
1023 let leaked = self.index(py).borrow();
993 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1024 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
994 let index = &*unsafe { leaked.try_borrow(py)? };
1025 let index = &*unsafe { leaked.try_borrow(py)? };
995
1026
996 let rev = UncheckedRevision(rev.0);
1027 let rev = UncheckedRevision(rev.0);
997 let rev = index
1028 let rev = index
998 .check_revision(rev)
1029 .check_revision(rev)
999 .ok_or_else(|| rev_not_in_index(py, rev))?;
1030 .ok_or_else(|| rev_not_in_index(py, rev))?;
1000 if rev == NULL_REVISION {
1031 if rev == NULL_REVISION {
1001 return Err(rev_not_in_index(py, rev.into()))
1032 return Err(rev_not_in_index(py, rev.into()))
1002 }
1033 }
1003
1034
1004 let entry = index.inner.get_entry(rev).unwrap();
1035 let entry = index.inner.get_entry(rev).unwrap();
1005 let mut nt = self.nt(py).borrow_mut();
1036 let mut nt = self.nt(py).borrow_mut();
1006 nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
1037 nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
1007
1038
1008 Ok(py.None())
1039 Ok(py.None())
1009 }
1040 }
1010
1041
1011 /// Lookup by node hex prefix in the NodeTree, returning revision number.
1042 /// Lookup by node hex prefix in the NodeTree, returning revision number.
1012 ///
1043 ///
1013 /// This is not part of the classical NodeTree API, but is good enough
1044 /// This is not part of the classical NodeTree API, but is good enough
1014 /// for unit testing, as in `test-rust-revlog.py`.
1045 /// for unit testing, as in `test-rust-revlog.py`.
1015 def prefix_rev_lookup(
1046 def prefix_rev_lookup(
1016 &self,
1047 &self,
1017 node_prefix: PyBytes
1048 node_prefix: PyBytes
1018 ) -> PyResult<Option<PyRevision>> {
1049 ) -> PyResult<Option<PyRevision>> {
1019 let prefix = NodePrefix::from_hex(node_prefix.data(py))
1050 let prefix = NodePrefix::from_hex(node_prefix.data(py))
1020 .map_err(|_| PyErr::new::<ValueError, _>(
1051 .map_err(|_| PyErr::new::<ValueError, _>(
1021 py,
1052 py,
1022 format!("Invalid node or prefix {:?}",
1053 format!("Invalid node or prefix {:?}",
1023 node_prefix.as_object()))
1054 node_prefix.as_object()))
1024 )?;
1055 )?;
1025
1056
1026 let nt = self.nt(py).borrow();
1057 let nt = self.nt(py).borrow();
1027 let leaked = self.index(py).borrow();
1058 let leaked = self.index(py).borrow();
1028 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1059 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1029 let index = &*unsafe { leaked.try_borrow(py)? };
1060 let index = &*unsafe { leaked.try_borrow(py)? };
1030
1061
1031 Ok(nt.find_bin(index, prefix)
1062 Ok(nt.find_bin(index, prefix)
1032 .map_err(|e| nodemap_error(py, e))?
1063 .map_err(|e| nodemap_error(py, e))?
1033 .map(|r| r.into())
1064 .map(|r| r.into())
1034 )
1065 )
1035 }
1066 }
1036
1067
1037 def shortest(&self, node: PyBytes) -> PyResult<usize> {
1068 def shortest(&self, node: PyBytes) -> PyResult<usize> {
1038 let nt = self.nt(py).borrow();
1069 let nt = self.nt(py).borrow();
1039 let leaked = self.index(py).borrow();
1070 let leaked = self.index(py).borrow();
1040 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1071 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1041 let idx = &*unsafe { leaked.try_borrow(py)? };
1072 let idx = &*unsafe { leaked.try_borrow(py)? };
1042 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
1073 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
1043 {
1074 {
1044 Ok(Some(l)) => Ok(l),
1075 Ok(Some(l)) => Ok(l),
1045 Ok(None) => Err(revlog_error(py)),
1076 Ok(None) => Err(revlog_error(py)),
1046 Err(e) => Err(nodemap_error(py, e)),
1077 Err(e) => Err(nodemap_error(py, e)),
1047 }
1078 }
1048 }
1079 }
1049 });
1080 });
1050
1081
1051 fn revlog_error(py: Python) -> PyErr {
1082 fn revlog_error(py: Python) -> PyErr {
1052 match py
1083 match py
1053 .import("mercurial.error")
1084 .import("mercurial.error")
1054 .and_then(|m| m.get(py, "RevlogError"))
1085 .and_then(|m| m.get(py, "RevlogError"))
1055 {
1086 {
1056 Err(e) => e,
1087 Err(e) => e,
1057 Ok(cls) => PyErr::from_instance(
1088 Ok(cls) => PyErr::from_instance(
1058 py,
1089 py,
1059 cls.call(py, (py.None(),), None).ok().into_py_object(py),
1090 cls.call(py, (py.None(),), None).ok().into_py_object(py),
1060 ),
1091 ),
1061 }
1092 }
1062 }
1093 }
1063
1094
1064 fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
1095 fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
1065 match py
1096 match py
1066 .import("mercurial.error")
1097 .import("mercurial.error")
1067 .and_then(|m| m.get(py, "RevlogError"))
1098 .and_then(|m| m.get(py, "RevlogError"))
1068 {
1099 {
1069 Err(e) => e,
1100 Err(e) => e,
1070 Ok(cls) => PyErr::from_instance(
1101 Ok(cls) => PyErr::from_instance(
1071 py,
1102 py,
1072 cls.call(py, (PyBytes::new(py, msg),), None)
1103 cls.call(py, (PyBytes::new(py, msg),), None)
1073 .ok()
1104 .ok()
1074 .into_py_object(py),
1105 .into_py_object(py),
1075 ),
1106 ),
1076 }
1107 }
1077 }
1108 }
1078
1109
1079 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
1110 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
1080 // ParentOutOfRange is currently the only alternative
1111 // ParentOutOfRange is currently the only alternative
1081 // in `hg::GraphError`. The C index always raises this simple ValueError.
1112 // in `hg::GraphError`. The C index always raises this simple ValueError.
1082 PyErr::new::<ValueError, _>(py, "parent out of range")
1113 PyErr::new::<ValueError, _>(py, "parent out of range")
1083 }
1114 }
1084
1115
1085 fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1116 fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1086 PyErr::new::<ValueError, _>(
1117 PyErr::new::<ValueError, _>(
1087 py,
1118 py,
1088 format!(
1119 format!(
1089 "Inconsistency: Revision {} found in nodemap \
1120 "Inconsistency: Revision {} found in nodemap \
1090 is not in revlog index",
1121 is not in revlog index",
1091 rev
1122 rev
1092 ),
1123 ),
1093 )
1124 )
1094 }
1125 }
1095
1126
1096 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1127 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1097 PyErr::new::<ValueError, _>(
1128 PyErr::new::<ValueError, _>(
1098 py,
1129 py,
1099 format!("revlog index out of range: {}", rev),
1130 format!("revlog index out of range: {}", rev),
1100 )
1131 )
1101 }
1132 }
1102
1133
1103 /// Standard treatment of NodeMapError
1134 /// Standard treatment of NodeMapError
1104 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
1135 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
1105 match err {
1136 match err {
1106 NodeMapError::MultipleResults => revlog_error(py),
1137 NodeMapError::MultipleResults => revlog_error(py),
1107 NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r),
1138 NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r),
1108 }
1139 }
1109 }
1140 }
1110
1141
1111 /// Create the module, with __package__ given from parent
1142 /// Create the module, with __package__ given from parent
1112 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
1143 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
1113 let dotted_name = &format!("{}.revlog", package);
1144 let dotted_name = &format!("{}.revlog", package);
1114 let m = PyModule::new(py, dotted_name)?;
1145 let m = PyModule::new(py, dotted_name)?;
1115 m.add(py, "__package__", package)?;
1146 m.add(py, "__package__", package)?;
1116 m.add(py, "__doc__", "RevLog - Rust implementations")?;
1147 m.add(py, "__doc__", "RevLog - Rust implementations")?;
1117
1148
1118 m.add_class::<Index>(py)?;
1149 m.add_class::<Index>(py)?;
1119 m.add_class::<NodeTree>(py)?;
1150 m.add_class::<NodeTree>(py)?;
1120
1151
1121 let sys = PyModule::import(py, "sys")?;
1152 let sys = PyModule::import(py, "sys")?;
1122 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
1153 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
1123 sys_modules.set_item(py, dotted_name, &m)?;
1154 sys_modules.set_item(py, dotted_name, &m)?;
1124
1155
1125 Ok(m)
1156 Ok(m)
1126 }
1157 }
General Comments 0
You need to be logged in to leave comments. Login now