##// END OF EJS Templates
rust-index: improve phase computation speed...
Raphaël Gomès -
r52318:7c6d0b9d default
parent child Browse files
Show More
@@ -1,2084 +1,2077 b''
1 use std::collections::{HashMap, HashSet};
1 use std::collections::{HashMap, HashSet};
2 use std::fmt::Debug;
2 use std::fmt::Debug;
3 use std::ops::Deref;
3 use std::ops::Deref;
4 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
4 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
5
5
6 use bitvec::prelude::*;
6 use bitvec::prelude::*;
7 use byteorder::{BigEndian, ByteOrder};
7 use byteorder::{BigEndian, ByteOrder};
8 use bytes_cast::{unaligned, BytesCast};
8 use bytes_cast::{unaligned, BytesCast};
9
9
10 use super::REVIDX_KNOWN_FLAGS;
10 use super::REVIDX_KNOWN_FLAGS;
11 use crate::errors::HgError;
11 use crate::errors::HgError;
12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
13 use crate::revlog::node::Node;
13 use crate::revlog::node::Node;
14 use crate::revlog::{Revision, NULL_REVISION};
14 use crate::revlog::{Revision, NULL_REVISION};
15 use crate::{
15 use crate::{
16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
17 RevlogIndex, UncheckedRevision,
17 RevlogIndex, UncheckedRevision,
18 };
18 };
19
19
20 pub const INDEX_ENTRY_SIZE: usize = 64;
20 pub const INDEX_ENTRY_SIZE: usize = 64;
21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
22
22
23 #[derive(Debug)]
23 #[derive(Debug)]
24 pub struct IndexHeader {
24 pub struct IndexHeader {
25 pub(super) header_bytes: [u8; 4],
25 pub(super) header_bytes: [u8; 4],
26 }
26 }
27
27
28 #[derive(Copy, Clone)]
28 #[derive(Copy, Clone)]
29 pub struct IndexHeaderFlags {
29 pub struct IndexHeaderFlags {
30 flags: u16,
30 flags: u16,
31 }
31 }
32
32
33 /// Corresponds to the high bits of `_format_flags` in python
33 /// Corresponds to the high bits of `_format_flags` in python
34 impl IndexHeaderFlags {
34 impl IndexHeaderFlags {
35 /// Corresponds to FLAG_INLINE_DATA in python
35 /// Corresponds to FLAG_INLINE_DATA in python
36 pub fn is_inline(self) -> bool {
36 pub fn is_inline(self) -> bool {
37 self.flags & 1 != 0
37 self.flags & 1 != 0
38 }
38 }
39 /// Corresponds to FLAG_GENERALDELTA in python
39 /// Corresponds to FLAG_GENERALDELTA in python
40 pub fn uses_generaldelta(self) -> bool {
40 pub fn uses_generaldelta(self) -> bool {
41 self.flags & 2 != 0
41 self.flags & 2 != 0
42 }
42 }
43 }
43 }
44
44
45 /// Corresponds to the INDEX_HEADER structure,
45 /// Corresponds to the INDEX_HEADER structure,
46 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
46 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
47 impl IndexHeader {
47 impl IndexHeader {
48 fn format_flags(&self) -> IndexHeaderFlags {
48 fn format_flags(&self) -> IndexHeaderFlags {
49 // No "unknown flags" check here, unlike in python. Maybe there should
49 // No "unknown flags" check here, unlike in python. Maybe there should
50 // be.
50 // be.
51 IndexHeaderFlags {
51 IndexHeaderFlags {
52 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
52 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
53 }
53 }
54 }
54 }
55
55
56 /// The only revlog version currently supported by rhg.
56 /// The only revlog version currently supported by rhg.
57 const REVLOGV1: u16 = 1;
57 const REVLOGV1: u16 = 1;
58
58
59 /// Corresponds to `_format_version` in Python.
59 /// Corresponds to `_format_version` in Python.
60 fn format_version(&self) -> u16 {
60 fn format_version(&self) -> u16 {
61 BigEndian::read_u16(&self.header_bytes[2..4])
61 BigEndian::read_u16(&self.header_bytes[2..4])
62 }
62 }
63
63
64 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
64 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
65 if index_bytes.is_empty() {
65 if index_bytes.is_empty() {
66 return Ok(None);
66 return Ok(None);
67 }
67 }
68 if index_bytes.len() < 4 {
68 if index_bytes.len() < 4 {
69 return Err(HgError::corrupted(
69 return Err(HgError::corrupted(
70 "corrupted revlog: can't read the index format header",
70 "corrupted revlog: can't read the index format header",
71 ));
71 ));
72 }
72 }
73 Ok(Some(IndexHeader {
73 Ok(Some(IndexHeader {
74 header_bytes: {
74 header_bytes: {
75 let bytes: [u8; 4] =
75 let bytes: [u8; 4] =
76 index_bytes[0..4].try_into().expect("impossible");
76 index_bytes[0..4].try_into().expect("impossible");
77 bytes
77 bytes
78 },
78 },
79 }))
79 }))
80 }
80 }
81 }
81 }
82
82
83 /// Abstracts the access to the index bytes since they can be spread between
83 /// Abstracts the access to the index bytes since they can be spread between
84 /// the immutable (bytes) part and the mutable (added) part if any appends
84 /// the immutable (bytes) part and the mutable (added) part if any appends
85 /// happened. This makes it transparent for the callers.
85 /// happened. This makes it transparent for the callers.
86 struct IndexData {
86 struct IndexData {
87 /// Immutable bytes, most likely taken from disk
87 /// Immutable bytes, most likely taken from disk
88 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
88 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
89 /// Used when stripping index contents, keeps track of the start of the
89 /// Used when stripping index contents, keeps track of the start of the
90 /// first stripped revision, which is used to give a slice of the
90 /// first stripped revision, which is used to give a slice of the
91 /// `bytes` field.
91 /// `bytes` field.
92 truncation: Option<usize>,
92 truncation: Option<usize>,
93 /// Bytes that were added after reading the index
93 /// Bytes that were added after reading the index
94 added: Vec<u8>,
94 added: Vec<u8>,
95 }
95 }
96
96
97 impl IndexData {
97 impl IndexData {
98 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self {
98 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self {
99 Self {
99 Self {
100 bytes,
100 bytes,
101 truncation: None,
101 truncation: None,
102 added: vec![],
102 added: vec![],
103 }
103 }
104 }
104 }
105
105
106 pub fn len(&self) -> usize {
106 pub fn len(&self) -> usize {
107 match self.truncation {
107 match self.truncation {
108 Some(truncation) => truncation + self.added.len(),
108 Some(truncation) => truncation + self.added.len(),
109 None => self.bytes.len() + self.added.len(),
109 None => self.bytes.len() + self.added.len(),
110 }
110 }
111 }
111 }
112
112
113 fn remove(
113 fn remove(
114 &mut self,
114 &mut self,
115 rev: Revision,
115 rev: Revision,
116 offsets: Option<&[usize]>,
116 offsets: Option<&[usize]>,
117 ) -> Result<(), RevlogError> {
117 ) -> Result<(), RevlogError> {
118 let rev = rev.0 as usize;
118 let rev = rev.0 as usize;
119 let truncation = if let Some(offsets) = offsets {
119 let truncation = if let Some(offsets) = offsets {
120 offsets[rev]
120 offsets[rev]
121 } else {
121 } else {
122 rev * INDEX_ENTRY_SIZE
122 rev * INDEX_ENTRY_SIZE
123 };
123 };
124 if truncation < self.bytes.len() {
124 if truncation < self.bytes.len() {
125 self.truncation = Some(truncation);
125 self.truncation = Some(truncation);
126 self.added.clear();
126 self.added.clear();
127 } else {
127 } else {
128 self.added.truncate(truncation - self.bytes.len());
128 self.added.truncate(truncation - self.bytes.len());
129 }
129 }
130 Ok(())
130 Ok(())
131 }
131 }
132
132
133 fn is_new(&self) -> bool {
133 fn is_new(&self) -> bool {
134 self.bytes.is_empty()
134 self.bytes.is_empty()
135 }
135 }
136 }
136 }
137
137
138 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
138 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
139 type Output = [u8];
139 type Output = [u8];
140
140
141 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
141 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
142 let start = index.start;
142 let start = index.start;
143 let end = index.end;
143 let end = index.end;
144 let immutable_len = match self.truncation {
144 let immutable_len = match self.truncation {
145 Some(truncation) => truncation,
145 Some(truncation) => truncation,
146 None => self.bytes.len(),
146 None => self.bytes.len(),
147 };
147 };
148 if start < immutable_len {
148 if start < immutable_len {
149 if end > immutable_len {
149 if end > immutable_len {
150 panic!("index data cannot span existing and added ranges");
150 panic!("index data cannot span existing and added ranges");
151 }
151 }
152 &self.bytes[index]
152 &self.bytes[index]
153 } else {
153 } else {
154 &self.added[start - immutable_len..end - immutable_len]
154 &self.added[start - immutable_len..end - immutable_len]
155 }
155 }
156 }
156 }
157 }
157 }
158
158
159 #[derive(Debug, PartialEq, Eq)]
159 #[derive(Debug, PartialEq, Eq)]
160 pub struct RevisionDataParams {
160 pub struct RevisionDataParams {
161 pub flags: u16,
161 pub flags: u16,
162 pub data_offset: u64,
162 pub data_offset: u64,
163 pub data_compressed_length: i32,
163 pub data_compressed_length: i32,
164 pub data_uncompressed_length: i32,
164 pub data_uncompressed_length: i32,
165 pub data_delta_base: i32,
165 pub data_delta_base: i32,
166 pub link_rev: i32,
166 pub link_rev: i32,
167 pub parent_rev_1: i32,
167 pub parent_rev_1: i32,
168 pub parent_rev_2: i32,
168 pub parent_rev_2: i32,
169 pub node_id: [u8; NODE_BYTES_LENGTH],
169 pub node_id: [u8; NODE_BYTES_LENGTH],
170 pub _sidedata_offset: u64,
170 pub _sidedata_offset: u64,
171 pub _sidedata_compressed_length: i32,
171 pub _sidedata_compressed_length: i32,
172 pub data_compression_mode: u8,
172 pub data_compression_mode: u8,
173 pub _sidedata_compression_mode: u8,
173 pub _sidedata_compression_mode: u8,
174 pub _rank: i32,
174 pub _rank: i32,
175 }
175 }
176
176
177 impl Default for RevisionDataParams {
177 impl Default for RevisionDataParams {
178 fn default() -> Self {
178 fn default() -> Self {
179 Self {
179 Self {
180 flags: 0,
180 flags: 0,
181 data_offset: 0,
181 data_offset: 0,
182 data_compressed_length: 0,
182 data_compressed_length: 0,
183 data_uncompressed_length: 0,
183 data_uncompressed_length: 0,
184 data_delta_base: -1,
184 data_delta_base: -1,
185 link_rev: -1,
185 link_rev: -1,
186 parent_rev_1: -1,
186 parent_rev_1: -1,
187 parent_rev_2: -1,
187 parent_rev_2: -1,
188 node_id: [0; NODE_BYTES_LENGTH],
188 node_id: [0; NODE_BYTES_LENGTH],
189 _sidedata_offset: 0,
189 _sidedata_offset: 0,
190 _sidedata_compressed_length: 0,
190 _sidedata_compressed_length: 0,
191 data_compression_mode: COMPRESSION_MODE_INLINE,
191 data_compression_mode: COMPRESSION_MODE_INLINE,
192 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
192 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
193 _rank: -1,
193 _rank: -1,
194 }
194 }
195 }
195 }
196 }
196 }
197
197
198 #[derive(BytesCast)]
198 #[derive(BytesCast)]
199 #[repr(C)]
199 #[repr(C)]
200 pub struct RevisionDataV1 {
200 pub struct RevisionDataV1 {
201 data_offset_or_flags: unaligned::U64Be,
201 data_offset_or_flags: unaligned::U64Be,
202 data_compressed_length: unaligned::I32Be,
202 data_compressed_length: unaligned::I32Be,
203 data_uncompressed_length: unaligned::I32Be,
203 data_uncompressed_length: unaligned::I32Be,
204 data_delta_base: unaligned::I32Be,
204 data_delta_base: unaligned::I32Be,
205 link_rev: unaligned::I32Be,
205 link_rev: unaligned::I32Be,
206 parent_rev_1: unaligned::I32Be,
206 parent_rev_1: unaligned::I32Be,
207 parent_rev_2: unaligned::I32Be,
207 parent_rev_2: unaligned::I32Be,
208 node_id: [u8; STORED_NODE_ID_BYTES],
208 node_id: [u8; STORED_NODE_ID_BYTES],
209 }
209 }
210
210
211 fn _static_assert_size_of_revision_data_v1() {
211 fn _static_assert_size_of_revision_data_v1() {
212 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
212 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
213 }
213 }
214
214
215 impl RevisionDataParams {
215 impl RevisionDataParams {
216 pub fn validate(&self) -> Result<(), RevlogError> {
216 pub fn validate(&self) -> Result<(), RevlogError> {
217 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
217 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
218 return Err(RevlogError::corrupted(format!(
218 return Err(RevlogError::corrupted(format!(
219 "unknown revlog index flags: {}",
219 "unknown revlog index flags: {}",
220 self.flags
220 self.flags
221 )));
221 )));
222 }
222 }
223 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
223 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
224 return Err(RevlogError::corrupted(format!(
224 return Err(RevlogError::corrupted(format!(
225 "invalid data compression mode: {}",
225 "invalid data compression mode: {}",
226 self.data_compression_mode
226 self.data_compression_mode
227 )));
227 )));
228 }
228 }
229 // FIXME isn't this only for v2 or changelog v2?
229 // FIXME isn't this only for v2 or changelog v2?
230 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
230 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
231 return Err(RevlogError::corrupted(format!(
231 return Err(RevlogError::corrupted(format!(
232 "invalid sidedata compression mode: {}",
232 "invalid sidedata compression mode: {}",
233 self._sidedata_compression_mode
233 self._sidedata_compression_mode
234 )));
234 )));
235 }
235 }
236 Ok(())
236 Ok(())
237 }
237 }
238
238
239 pub fn into_v1(self) -> RevisionDataV1 {
239 pub fn into_v1(self) -> RevisionDataV1 {
240 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
240 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
241 let mut node_id = [0; STORED_NODE_ID_BYTES];
241 let mut node_id = [0; STORED_NODE_ID_BYTES];
242 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
242 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
243 RevisionDataV1 {
243 RevisionDataV1 {
244 data_offset_or_flags: data_offset_or_flags.into(),
244 data_offset_or_flags: data_offset_or_flags.into(),
245 data_compressed_length: self.data_compressed_length.into(),
245 data_compressed_length: self.data_compressed_length.into(),
246 data_uncompressed_length: self.data_uncompressed_length.into(),
246 data_uncompressed_length: self.data_uncompressed_length.into(),
247 data_delta_base: self.data_delta_base.into(),
247 data_delta_base: self.data_delta_base.into(),
248 link_rev: self.link_rev.into(),
248 link_rev: self.link_rev.into(),
249 parent_rev_1: self.parent_rev_1.into(),
249 parent_rev_1: self.parent_rev_1.into(),
250 parent_rev_2: self.parent_rev_2.into(),
250 parent_rev_2: self.parent_rev_2.into(),
251 node_id,
251 node_id,
252 }
252 }
253 }
253 }
254 }
254 }
255
255
256 /// A Revlog index
256 /// A Revlog index
257 pub struct Index {
257 pub struct Index {
258 bytes: IndexData,
258 bytes: IndexData,
259 /// Offsets of starts of index blocks.
259 /// Offsets of starts of index blocks.
260 /// Only needed when the index is interleaved with data.
260 /// Only needed when the index is interleaved with data.
261 offsets: RwLock<Option<Vec<usize>>>,
261 offsets: RwLock<Option<Vec<usize>>>,
262 uses_generaldelta: bool,
262 uses_generaldelta: bool,
263 is_inline: bool,
263 is_inline: bool,
264 /// Cache of (head_revisions, filtered_revisions)
264 /// Cache of (head_revisions, filtered_revisions)
265 ///
265 ///
266 /// The head revisions in this index, kept in sync. Should
266 /// The head revisions in this index, kept in sync. Should
267 /// be accessed via the [`Self::head_revs`] method.
267 /// be accessed via the [`Self::head_revs`] method.
268 /// The last filtered revisions in this index, used to make sure
268 /// The last filtered revisions in this index, used to make sure
269 /// we haven't changed filters when returning the cached `head_revs`.
269 /// we haven't changed filters when returning the cached `head_revs`.
270 head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
270 head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
271 }
271 }
272
272
273 impl Debug for Index {
273 impl Debug for Index {
274 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275 f.debug_struct("Index")
275 f.debug_struct("Index")
276 .field("offsets", &self.offsets)
276 .field("offsets", &self.offsets)
277 .field("uses_generaldelta", &self.uses_generaldelta)
277 .field("uses_generaldelta", &self.uses_generaldelta)
278 .finish()
278 .finish()
279 }
279 }
280 }
280 }
281
281
282 impl Graph for Index {
282 impl Graph for Index {
283 #[inline(always)]
283 #[inline(always)]
284 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
284 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
285 let err = || GraphError::ParentOutOfRange(rev);
285 let err = || GraphError::ParentOutOfRange(rev);
286 match self.get_entry(rev) {
286 match self.get_entry(rev) {
287 Some(entry) => {
287 Some(entry) => {
288 // The C implementation checks that the parents are valid
288 // The C implementation checks that the parents are valid
289 // before returning
289 // before returning
290 Ok([
290 Ok([
291 self.check_revision(entry.p1()).ok_or_else(err)?,
291 self.check_revision(entry.p1()).ok_or_else(err)?,
292 self.check_revision(entry.p2()).ok_or_else(err)?,
292 self.check_revision(entry.p2()).ok_or_else(err)?,
293 ])
293 ])
294 }
294 }
295 None => Ok([NULL_REVISION, NULL_REVISION]),
295 None => Ok([NULL_REVISION, NULL_REVISION]),
296 }
296 }
297 }
297 }
298 }
298 }
299
299
300 /// A cache suitable for find_snapshots
300 /// A cache suitable for find_snapshots
301 ///
301 ///
302 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
302 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
303 /// values sets of [`BaseRevision`]
303 /// values sets of [`BaseRevision`]
304 ///
304 ///
305 /// TODO the dubious part is insisting that errors must be RevlogError
305 /// TODO the dubious part is insisting that errors must be RevlogError
306 /// we would probably need to sprinkle some magic here, such as an associated
306 /// we would probably need to sprinkle some magic here, such as an associated
307 /// type that would be Into<RevlogError> but even that would not be
307 /// type that would be Into<RevlogError> but even that would not be
308 /// satisfactory, as errors potentially have nothing to do with the revlog.
308 /// satisfactory, as errors potentially have nothing to do with the revlog.
309 pub trait SnapshotsCache {
309 pub trait SnapshotsCache {
310 fn insert_for(
310 fn insert_for(
311 &mut self,
311 &mut self,
312 rev: BaseRevision,
312 rev: BaseRevision,
313 value: BaseRevision,
313 value: BaseRevision,
314 ) -> Result<(), RevlogError>;
314 ) -> Result<(), RevlogError>;
315 }
315 }
316
316
317 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
317 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
318 fn insert_for(
318 fn insert_for(
319 &mut self,
319 &mut self,
320 rev: BaseRevision,
320 rev: BaseRevision,
321 value: BaseRevision,
321 value: BaseRevision,
322 ) -> Result<(), RevlogError> {
322 ) -> Result<(), RevlogError> {
323 let all_values = self.entry(rev).or_default();
323 let all_values = self.entry(rev).or_default();
324 all_values.insert(value);
324 all_values.insert(value);
325 Ok(())
325 Ok(())
326 }
326 }
327 }
327 }
328
328
329 impl Index {
329 impl Index {
330 /// Create an index from bytes.
330 /// Create an index from bytes.
331 /// Calculate the start of each entry when is_inline is true.
331 /// Calculate the start of each entry when is_inline is true.
332 pub fn new(
332 pub fn new(
333 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
333 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
334 default_header: IndexHeader,
334 default_header: IndexHeader,
335 ) -> Result<Self, HgError> {
335 ) -> Result<Self, HgError> {
336 let header =
336 let header =
337 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
337 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
338
338
339 if header.format_version() != IndexHeader::REVLOGV1 {
339 if header.format_version() != IndexHeader::REVLOGV1 {
340 // A proper new version should have had a repo/store
340 // A proper new version should have had a repo/store
341 // requirement.
341 // requirement.
342 return Err(HgError::corrupted("unsupported revlog version"));
342 return Err(HgError::corrupted("unsupported revlog version"));
343 }
343 }
344
344
345 // This is only correct because we know version is REVLOGV1.
345 // This is only correct because we know version is REVLOGV1.
346 // In v2 we always use generaldelta, while in v0 we never use
346 // In v2 we always use generaldelta, while in v0 we never use
347 // generaldelta. Similar for [is_inline] (it's only used in v1).
347 // generaldelta. Similar for [is_inline] (it's only used in v1).
348 let uses_generaldelta = header.format_flags().uses_generaldelta();
348 let uses_generaldelta = header.format_flags().uses_generaldelta();
349
349
350 if header.format_flags().is_inline() {
350 if header.format_flags().is_inline() {
351 let mut offset: usize = 0;
351 let mut offset: usize = 0;
352 let mut offsets = Vec::new();
352 let mut offsets = Vec::new();
353
353
354 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
354 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
355 offsets.push(offset);
355 offsets.push(offset);
356 let end = offset + INDEX_ENTRY_SIZE;
356 let end = offset + INDEX_ENTRY_SIZE;
357 let entry = IndexEntry {
357 let entry = IndexEntry {
358 bytes: &bytes[offset..end],
358 bytes: &bytes[offset..end],
359 offset_override: None,
359 offset_override: None,
360 };
360 };
361
361
362 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
362 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
363 }
363 }
364
364
365 if offset == bytes.len() {
365 if offset == bytes.len() {
366 Ok(Self {
366 Ok(Self {
367 bytes: IndexData::new(bytes),
367 bytes: IndexData::new(bytes),
368 offsets: RwLock::new(Some(offsets)),
368 offsets: RwLock::new(Some(offsets)),
369 uses_generaldelta,
369 uses_generaldelta,
370 is_inline: true,
370 is_inline: true,
371 head_revs: RwLock::new((vec![], HashSet::new())),
371 head_revs: RwLock::new((vec![], HashSet::new())),
372 })
372 })
373 } else {
373 } else {
374 Err(HgError::corrupted("unexpected inline revlog length"))
374 Err(HgError::corrupted("unexpected inline revlog length"))
375 }
375 }
376 } else {
376 } else {
377 Ok(Self {
377 Ok(Self {
378 bytes: IndexData::new(bytes),
378 bytes: IndexData::new(bytes),
379 offsets: RwLock::new(None),
379 offsets: RwLock::new(None),
380 uses_generaldelta,
380 uses_generaldelta,
381 is_inline: false,
381 is_inline: false,
382 head_revs: RwLock::new((vec![], HashSet::new())),
382 head_revs: RwLock::new((vec![], HashSet::new())),
383 })
383 })
384 }
384 }
385 }
385 }
386
386
387 pub fn uses_generaldelta(&self) -> bool {
387 pub fn uses_generaldelta(&self) -> bool {
388 self.uses_generaldelta
388 self.uses_generaldelta
389 }
389 }
390
390
391 /// Value of the inline flag.
391 /// Value of the inline flag.
392 pub fn is_inline(&self) -> bool {
392 pub fn is_inline(&self) -> bool {
393 self.is_inline
393 self.is_inline
394 }
394 }
395
395
396 /// Return a slice of bytes if `revlog` is inline. Panic if not.
396 /// Return a slice of bytes if `revlog` is inline. Panic if not.
397 pub fn data(&self, start: usize, end: usize) -> &[u8] {
397 pub fn data(&self, start: usize, end: usize) -> &[u8] {
398 if !self.is_inline() {
398 if !self.is_inline() {
399 panic!("tried to access data in the index of a revlog that is not inline");
399 panic!("tried to access data in the index of a revlog that is not inline");
400 }
400 }
401 &self.bytes[start..end]
401 &self.bytes[start..end]
402 }
402 }
403
403
404 /// Return number of entries of the revlog index.
404 /// Return number of entries of the revlog index.
405 pub fn len(&self) -> usize {
405 pub fn len(&self) -> usize {
406 if self.is_inline() {
406 if self.is_inline() {
407 (*self.get_offsets())
407 (*self.get_offsets())
408 .as_ref()
408 .as_ref()
409 .expect("inline should have offsets")
409 .expect("inline should have offsets")
410 .len()
410 .len()
411 } else {
411 } else {
412 self.bytes.len() / INDEX_ENTRY_SIZE
412 self.bytes.len() / INDEX_ENTRY_SIZE
413 }
413 }
414 }
414 }
415
415
416 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
416 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
417 assert!(self.is_inline());
417 assert!(self.is_inline());
418 {
418 {
419 // Wrap in a block to drop the read guard
419 // Wrap in a block to drop the read guard
420 // TODO perf?
420 // TODO perf?
421 let mut offsets = self.offsets.write().unwrap();
421 let mut offsets = self.offsets.write().unwrap();
422 if offsets.is_none() {
422 if offsets.is_none() {
423 offsets.replace(inline_scan(&self.bytes.bytes).1);
423 offsets.replace(inline_scan(&self.bytes.bytes).1);
424 }
424 }
425 }
425 }
426 self.offsets.read().unwrap()
426 self.offsets.read().unwrap()
427 }
427 }
428
428
429 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
429 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
430 assert!(self.is_inline());
430 assert!(self.is_inline());
431 let mut offsets = self.offsets.write().unwrap();
431 let mut offsets = self.offsets.write().unwrap();
432 if offsets.is_none() {
432 if offsets.is_none() {
433 offsets.replace(inline_scan(&self.bytes.bytes).1);
433 offsets.replace(inline_scan(&self.bytes.bytes).1);
434 }
434 }
435 offsets
435 offsets
436 }
436 }
437
437
438 /// Returns `true` if the `Index` has zero `entries`.
438 /// Returns `true` if the `Index` has zero `entries`.
439 pub fn is_empty(&self) -> bool {
439 pub fn is_empty(&self) -> bool {
440 self.len() == 0
440 self.len() == 0
441 }
441 }
442
442
443 /// Return the index entry corresponding to the given revision or `None`
443 /// Return the index entry corresponding to the given revision or `None`
444 /// for [`NULL_REVISION`]
444 /// for [`NULL_REVISION`]
445 ///
445 ///
446 /// The specified revision being of the checked type, it always exists
446 /// The specified revision being of the checked type, it always exists
447 /// if it was validated by this index.
447 /// if it was validated by this index.
448 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
448 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
449 if rev == NULL_REVISION {
449 if rev == NULL_REVISION {
450 return None;
450 return None;
451 }
451 }
452 Some(if self.is_inline() {
452 Some(if self.is_inline() {
453 self.get_entry_inline(rev)
453 self.get_entry_inline(rev)
454 } else {
454 } else {
455 self.get_entry_separated(rev)
455 self.get_entry_separated(rev)
456 })
456 })
457 }
457 }
458
458
459 /// Return the binary content of the index entry for the given revision
459 /// Return the binary content of the index entry for the given revision
460 ///
460 ///
461 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
461 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
462 /// returned.
462 /// returned.
463 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
463 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
464 self.get_entry(rev).map(|e| {
464 self.get_entry(rev).map(|e| {
465 let bytes = e.as_bytes();
465 let bytes = e.as_bytes();
466 if rev.0 == 0 {
466 if rev.0 == 0 {
467 &bytes[4..]
467 &bytes[4..]
468 } else {
468 } else {
469 bytes
469 bytes
470 }
470 }
471 })
471 })
472 }
472 }
473
473
474 pub fn entry_as_params(
474 pub fn entry_as_params(
475 &self,
475 &self,
476 rev: UncheckedRevision,
476 rev: UncheckedRevision,
477 ) -> Option<RevisionDataParams> {
477 ) -> Option<RevisionDataParams> {
478 let rev = self.check_revision(rev)?;
478 let rev = self.check_revision(rev)?;
479 self.get_entry(rev).map(|e| RevisionDataParams {
479 self.get_entry(rev).map(|e| RevisionDataParams {
480 flags: e.flags(),
480 flags: e.flags(),
481 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
481 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
482 e.flags() as u64
482 e.flags() as u64
483 } else {
483 } else {
484 e.raw_offset()
484 e.raw_offset()
485 },
485 },
486 data_compressed_length: e
486 data_compressed_length: e
487 .compressed_len()
487 .compressed_len()
488 .try_into()
488 .try_into()
489 .unwrap_or_else(|_| {
489 .unwrap_or_else(|_| {
490 // Python's `unionrepo` sets the compressed length to be
490 // Python's `unionrepo` sets the compressed length to be
491 // `-1` (or `u32::MAX` if transmuted to `u32`) because it
491 // `-1` (or `u32::MAX` if transmuted to `u32`) because it
492 // cannot know the correct compressed length of a given
492 // cannot know the correct compressed length of a given
493 // revision. I'm not sure if this is true, but having this
493 // revision. I'm not sure if this is true, but having this
494 // edge case won't hurt other use cases, let's handle it.
494 // edge case won't hurt other use cases, let's handle it.
495 assert_eq!(e.compressed_len(), u32::MAX);
495 assert_eq!(e.compressed_len(), u32::MAX);
496 NULL_REVISION.0
496 NULL_REVISION.0
497 }),
497 }),
498 data_uncompressed_length: e.uncompressed_len(),
498 data_uncompressed_length: e.uncompressed_len(),
499 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
499 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
500 link_rev: e.link_revision().0,
500 link_rev: e.link_revision().0,
501 parent_rev_1: e.p1().0,
501 parent_rev_1: e.p1().0,
502 parent_rev_2: e.p2().0,
502 parent_rev_2: e.p2().0,
503 node_id: e.hash().as_bytes().try_into().unwrap(),
503 node_id: e.hash().as_bytes().try_into().unwrap(),
504 ..Default::default()
504 ..Default::default()
505 })
505 })
506 }
506 }
507
507
508 fn get_entry_inline(&self, rev: Revision) -> IndexEntry {
508 fn get_entry_inline(&self, rev: Revision) -> IndexEntry {
509 let offsets = &self.get_offsets();
509 let offsets = &self.get_offsets();
510 let offsets = offsets.as_ref().expect("inline should have offsets");
510 let offsets = offsets.as_ref().expect("inline should have offsets");
511 let start = offsets[rev.0 as usize];
511 let start = offsets[rev.0 as usize];
512 let end = start + INDEX_ENTRY_SIZE;
512 let end = start + INDEX_ENTRY_SIZE;
513 let bytes = &self.bytes[start..end];
513 let bytes = &self.bytes[start..end];
514
514
515 // See IndexEntry for an explanation of this override.
515 // See IndexEntry for an explanation of this override.
516 let offset_override = Some(end);
516 let offset_override = Some(end);
517
517
518 IndexEntry {
518 IndexEntry {
519 bytes,
519 bytes,
520 offset_override,
520 offset_override,
521 }
521 }
522 }
522 }
523
523
524 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
524 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
525 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
525 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
526 let end = start + INDEX_ENTRY_SIZE;
526 let end = start + INDEX_ENTRY_SIZE;
527 let bytes = &self.bytes[start..end];
527 let bytes = &self.bytes[start..end];
528
528
529 // Override the offset of the first revision as its bytes are used
529 // Override the offset of the first revision as its bytes are used
530 // for the index's metadata (saving space because it is always 0)
530 // for the index's metadata (saving space because it is always 0)
531 let offset_override = if rev == Revision(0) { Some(0) } else { None };
531 let offset_override = if rev == Revision(0) { Some(0) } else { None };
532
532
533 IndexEntry {
533 IndexEntry {
534 bytes,
534 bytes,
535 offset_override,
535 offset_override,
536 }
536 }
537 }
537 }
538
538
539 fn null_entry(&self) -> IndexEntry {
539 fn null_entry(&self) -> IndexEntry {
540 IndexEntry {
540 IndexEntry {
541 bytes: &[0; INDEX_ENTRY_SIZE],
541 bytes: &[0; INDEX_ENTRY_SIZE],
542 offset_override: Some(0),
542 offset_override: Some(0),
543 }
543 }
544 }
544 }
545
545
546 /// Return the head revisions of this index
546 /// Return the head revisions of this index
547 pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
547 pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
548 self.head_revs_filtered(&HashSet::new(), false)
548 self.head_revs_filtered(&HashSet::new(), false)
549 .map(|h| h.unwrap())
549 .map(|h| h.unwrap())
550 }
550 }
551
551
552 /// Python-specific shortcut to save on PyList creation
552 /// Python-specific shortcut to save on PyList creation
553 pub fn head_revs_shortcut(
553 pub fn head_revs_shortcut(
554 &self,
554 &self,
555 ) -> Result<Option<Vec<Revision>>, GraphError> {
555 ) -> Result<Option<Vec<Revision>>, GraphError> {
556 self.head_revs_filtered(&HashSet::new(), true)
556 self.head_revs_filtered(&HashSet::new(), true)
557 }
557 }
558
558
559 /// Return the heads removed and added by advancing from `begin` to `end`.
559 /// Return the heads removed and added by advancing from `begin` to `end`.
560 /// In revset language, we compute:
560 /// In revset language, we compute:
561 /// - `heads(:begin)-heads(:end)`
561 /// - `heads(:begin)-heads(:end)`
562 /// - `heads(:end)-heads(:begin)`
562 /// - `heads(:end)-heads(:begin)`
563 pub fn head_revs_diff(
563 pub fn head_revs_diff(
564 &self,
564 &self,
565 begin: Revision,
565 begin: Revision,
566 end: Revision,
566 end: Revision,
567 ) -> Result<(Vec<Revision>, Vec<Revision>), GraphError> {
567 ) -> Result<(Vec<Revision>, Vec<Revision>), GraphError> {
568 let mut heads_added = vec![];
568 let mut heads_added = vec![];
569 let mut heads_removed = vec![];
569 let mut heads_removed = vec![];
570
570
571 let mut acc = HashSet::new();
571 let mut acc = HashSet::new();
572 let Revision(begin) = begin;
572 let Revision(begin) = begin;
573 let Revision(end) = end;
573 let Revision(end) = end;
574 let mut i = end;
574 let mut i = end;
575
575
576 while i > begin {
576 while i > begin {
577 // acc invariant:
577 // acc invariant:
578 // `j` is in the set iff `j <= i` and it has children
578 // `j` is in the set iff `j <= i` and it has children
579 // among `i+1..end` (inclusive)
579 // among `i+1..end` (inclusive)
580 if !acc.remove(&i) {
580 if !acc.remove(&i) {
581 heads_added.push(Revision(i));
581 heads_added.push(Revision(i));
582 }
582 }
583 for Revision(parent) in self.parents(Revision(i))? {
583 for Revision(parent) in self.parents(Revision(i))? {
584 acc.insert(parent);
584 acc.insert(parent);
585 }
585 }
586 i -= 1;
586 i -= 1;
587 }
587 }
588
588
589 // At this point `acc` contains old revisions that gained new children.
589 // At this point `acc` contains old revisions that gained new children.
590 // We need to check if they had any children before. If not, those
590 // We need to check if they had any children before. If not, those
591 // revisions are the removed heads.
591 // revisions are the removed heads.
592 while !acc.is_empty() {
592 while !acc.is_empty() {
593 // acc invariant:
593 // acc invariant:
594 // `j` is in the set iff `j <= i` and it has children
594 // `j` is in the set iff `j <= i` and it has children
595 // among `begin+1..end`, but not among `i+1..begin` (inclusive)
595 // among `begin+1..end`, but not among `i+1..begin` (inclusive)
596
596
597 assert!(i >= -1); // yes, `-1` can also be a head if the repo is empty
597 assert!(i >= -1); // yes, `-1` can also be a head if the repo is empty
598 if acc.remove(&i) {
598 if acc.remove(&i) {
599 heads_removed.push(Revision(i));
599 heads_removed.push(Revision(i));
600 }
600 }
601 for Revision(parent) in self.parents(Revision(i))? {
601 for Revision(parent) in self.parents(Revision(i))? {
602 acc.remove(&parent);
602 acc.remove(&parent);
603 }
603 }
604 i -= 1;
604 i -= 1;
605 }
605 }
606
606
607 Ok((heads_removed, heads_added))
607 Ok((heads_removed, heads_added))
608 }
608 }
609
609
610 /// Return the head revisions of this index
610 /// Return the head revisions of this index
611 pub fn head_revs_filtered(
611 pub fn head_revs_filtered(
612 &self,
612 &self,
613 filtered_revs: &HashSet<Revision>,
613 filtered_revs: &HashSet<Revision>,
614 py_shortcut: bool,
614 py_shortcut: bool,
615 ) -> Result<Option<Vec<Revision>>, GraphError> {
615 ) -> Result<Option<Vec<Revision>>, GraphError> {
616 {
616 {
617 let guard = self
617 let guard = self
618 .head_revs
618 .head_revs
619 .read()
619 .read()
620 .expect("RwLock on Index.head_revs should not be poisoned");
620 .expect("RwLock on Index.head_revs should not be poisoned");
621 let self_head_revs = &guard.0;
621 let self_head_revs = &guard.0;
622 let self_filtered_revs = &guard.1;
622 let self_filtered_revs = &guard.1;
623 if !self_head_revs.is_empty()
623 if !self_head_revs.is_empty()
624 && filtered_revs == self_filtered_revs
624 && filtered_revs == self_filtered_revs
625 {
625 {
626 if py_shortcut {
626 if py_shortcut {
627 // Don't copy the revs since we've already cached them
627 // Don't copy the revs since we've already cached them
628 // on the Python side.
628 // on the Python side.
629 return Ok(None);
629 return Ok(None);
630 } else {
630 } else {
631 return Ok(Some(self_head_revs.to_owned()));
631 return Ok(Some(self_head_revs.to_owned()));
632 }
632 }
633 }
633 }
634 }
634 }
635
635
636 let as_vec = if self.is_empty() {
636 let as_vec = if self.is_empty() {
637 vec![NULL_REVISION]
637 vec![NULL_REVISION]
638 } else {
638 } else {
639 let mut not_heads = bitvec![0; self.len()];
639 let mut not_heads = bitvec![0; self.len()];
640 dagops::retain_heads_fast(
640 dagops::retain_heads_fast(
641 self,
641 self,
642 not_heads.as_mut_bitslice(),
642 not_heads.as_mut_bitslice(),
643 filtered_revs,
643 filtered_revs,
644 )?;
644 )?;
645 not_heads
645 not_heads
646 .into_iter()
646 .into_iter()
647 .enumerate()
647 .enumerate()
648 .filter_map(|(idx, is_not_head)| {
648 .filter_map(|(idx, is_not_head)| {
649 if is_not_head {
649 if is_not_head {
650 None
650 None
651 } else {
651 } else {
652 Some(Revision(idx as BaseRevision))
652 Some(Revision(idx as BaseRevision))
653 }
653 }
654 })
654 })
655 .collect()
655 .collect()
656 };
656 };
657 *self
657 *self
658 .head_revs
658 .head_revs
659 .write()
659 .write()
660 .expect("RwLock on Index.head_revs should not be poisoned") =
660 .expect("RwLock on Index.head_revs should not be poisoned") =
661 (as_vec.to_owned(), filtered_revs.to_owned());
661 (as_vec.to_owned(), filtered_revs.to_owned());
662 Ok(Some(as_vec))
662 Ok(Some(as_vec))
663 }
663 }
664
664
665 /// Obtain the delta chain for a revision.
665 /// Obtain the delta chain for a revision.
666 ///
666 ///
667 /// `stop_rev` specifies a revision to stop at. If not specified, we
667 /// `stop_rev` specifies a revision to stop at. If not specified, we
668 /// stop at the base of the chain.
668 /// stop at the base of the chain.
669 ///
669 ///
670 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
670 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
671 /// revs in ascending order and `stopped` is a bool indicating whether
671 /// revs in ascending order and `stopped` is a bool indicating whether
672 /// `stoprev` was hit.
672 /// `stoprev` was hit.
673 pub fn delta_chain(
673 pub fn delta_chain(
674 &self,
674 &self,
675 rev: Revision,
675 rev: Revision,
676 stop_rev: Option<Revision>,
676 stop_rev: Option<Revision>,
677 using_general_delta: Option<bool>,
677 using_general_delta: Option<bool>,
678 ) -> Result<(Vec<Revision>, bool), HgError> {
678 ) -> Result<(Vec<Revision>, bool), HgError> {
679 let mut current_rev = rev;
679 let mut current_rev = rev;
680 let mut entry = self.get_entry(rev).unwrap();
680 let mut entry = self.get_entry(rev).unwrap();
681 let mut chain = vec![];
681 let mut chain = vec![];
682 let using_general_delta =
682 let using_general_delta =
683 using_general_delta.unwrap_or_else(|| self.uses_generaldelta());
683 using_general_delta.unwrap_or_else(|| self.uses_generaldelta());
684 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
684 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
685 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
685 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
686 {
686 {
687 chain.push(current_rev);
687 chain.push(current_rev);
688 let new_rev = if using_general_delta {
688 let new_rev = if using_general_delta {
689 entry.base_revision_or_base_of_delta_chain()
689 entry.base_revision_or_base_of_delta_chain()
690 } else {
690 } else {
691 UncheckedRevision(current_rev.0 - 1)
691 UncheckedRevision(current_rev.0 - 1)
692 };
692 };
693 current_rev = self.check_revision(new_rev).ok_or_else(|| {
693 current_rev = self.check_revision(new_rev).ok_or_else(|| {
694 HgError::corrupted(format!("Revision {new_rev} out of range"))
694 HgError::corrupted(format!("Revision {new_rev} out of range"))
695 })?;
695 })?;
696 if current_rev.0 == NULL_REVISION.0 {
696 if current_rev.0 == NULL_REVISION.0 {
697 break;
697 break;
698 }
698 }
699 entry = self.get_entry(current_rev).unwrap()
699 entry = self.get_entry(current_rev).unwrap()
700 }
700 }
701
701
702 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
702 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
703 true
703 true
704 } else {
704 } else {
705 chain.push(current_rev);
705 chain.push(current_rev);
706 false
706 false
707 };
707 };
708 chain.reverse();
708 chain.reverse();
709 Ok((chain, stopped))
709 Ok((chain, stopped))
710 }
710 }
711
711
712 pub fn find_snapshots(
712 pub fn find_snapshots(
713 &self,
713 &self,
714 start_rev: UncheckedRevision,
714 start_rev: UncheckedRevision,
715 end_rev: UncheckedRevision,
715 end_rev: UncheckedRevision,
716 cache: &mut impl SnapshotsCache,
716 cache: &mut impl SnapshotsCache,
717 ) -> Result<(), RevlogError> {
717 ) -> Result<(), RevlogError> {
718 let mut start_rev = start_rev.0;
718 let mut start_rev = start_rev.0;
719 let mut end_rev = end_rev.0;
719 let mut end_rev = end_rev.0;
720 end_rev += 1;
720 end_rev += 1;
721 let len = self.len().try_into().unwrap();
721 let len = self.len().try_into().unwrap();
722 if end_rev > len {
722 if end_rev > len {
723 end_rev = len;
723 end_rev = len;
724 }
724 }
725 if start_rev < 0 {
725 if start_rev < 0 {
726 start_rev = 0;
726 start_rev = 0;
727 }
727 }
728 for rev in start_rev..end_rev {
728 for rev in start_rev..end_rev {
729 if !self.is_snapshot_unchecked(Revision(rev))? {
729 if !self.is_snapshot_unchecked(Revision(rev))? {
730 continue;
730 continue;
731 }
731 }
732 let mut base = self
732 let mut base = self
733 .get_entry(Revision(rev))
733 .get_entry(Revision(rev))
734 .unwrap()
734 .unwrap()
735 .base_revision_or_base_of_delta_chain();
735 .base_revision_or_base_of_delta_chain();
736 if base.0 == rev {
736 if base.0 == rev {
737 base = NULL_REVISION.into();
737 base = NULL_REVISION.into();
738 }
738 }
739 cache.insert_for(base.0, rev)?;
739 cache.insert_for(base.0, rev)?;
740 }
740 }
741 Ok(())
741 Ok(())
742 }
742 }
743
743
744 fn clear_head_revs(&self) {
744 fn clear_head_revs(&self) {
745 self.head_revs
745 self.head_revs
746 .write()
746 .write()
747 .expect("RwLock on Index.head_revs should not be poisoined")
747 .expect("RwLock on Index.head_revs should not be poisoined")
748 .0
748 .0
749 .clear()
749 .clear()
750 }
750 }
751
751
752 /// TODO move this to the trait probably, along with other things
752 /// TODO move this to the trait probably, along with other things
753 pub fn append(
753 pub fn append(
754 &mut self,
754 &mut self,
755 revision_data: RevisionDataParams,
755 revision_data: RevisionDataParams,
756 ) -> Result<(), RevlogError> {
756 ) -> Result<(), RevlogError> {
757 revision_data.validate()?;
757 revision_data.validate()?;
758 if self.is_inline() {
758 if self.is_inline() {
759 let new_offset = self.bytes.len();
759 let new_offset = self.bytes.len();
760 if let Some(offsets) = &mut *self.get_offsets_mut() {
760 if let Some(offsets) = &mut *self.get_offsets_mut() {
761 offsets.push(new_offset)
761 offsets.push(new_offset)
762 }
762 }
763 }
763 }
764 self.bytes.added.extend(revision_data.into_v1().as_bytes());
764 self.bytes.added.extend(revision_data.into_v1().as_bytes());
765 self.clear_head_revs();
765 self.clear_head_revs();
766 Ok(())
766 Ok(())
767 }
767 }
768
768
769 pub fn pack_header(&self, header: i32) -> [u8; 4] {
769 pub fn pack_header(&self, header: i32) -> [u8; 4] {
770 header.to_be_bytes()
770 header.to_be_bytes()
771 }
771 }
772
772
773 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
773 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
774 let offsets = if self.is_inline() {
774 let offsets = if self.is_inline() {
775 self.get_offsets().clone()
775 self.get_offsets().clone()
776 } else {
776 } else {
777 None
777 None
778 };
778 };
779 self.bytes.remove(rev, offsets.as_deref())?;
779 self.bytes.remove(rev, offsets.as_deref())?;
780 if self.is_inline() {
780 if self.is_inline() {
781 if let Some(offsets) = &mut *self.get_offsets_mut() {
781 if let Some(offsets) = &mut *self.get_offsets_mut() {
782 offsets.truncate(rev.0 as usize)
782 offsets.truncate(rev.0 as usize)
783 }
783 }
784 }
784 }
785 self.clear_head_revs();
785 self.clear_head_revs();
786 Ok(())
786 Ok(())
787 }
787 }
788
788
789 pub fn clear_caches(&self) {
789 pub fn clear_caches(&self) {
790 // We need to get the 'inline' value from Python at init and use this
790 // We need to get the 'inline' value from Python at init and use this
791 // instead of offsets to determine whether we're inline since we might
791 // instead of offsets to determine whether we're inline since we might
792 // clear caches. This implies re-populating the offsets on-demand.
792 // clear caches. This implies re-populating the offsets on-demand.
793 *self
793 *self
794 .offsets
794 .offsets
795 .write()
795 .write()
796 .expect("RwLock on Index.offsets should not be poisoed") = None;
796 .expect("RwLock on Index.offsets should not be poisoed") = None;
797 self.clear_head_revs();
797 self.clear_head_revs();
798 }
798 }
799
799
800 /// Unchecked version of `is_snapshot`.
800 /// Unchecked version of `is_snapshot`.
801 /// Assumes the caller checked that `rev` is within a valid revision range.
801 /// Assumes the caller checked that `rev` is within a valid revision range.
802 pub fn is_snapshot_unchecked(
802 pub fn is_snapshot_unchecked(
803 &self,
803 &self,
804 mut rev: Revision,
804 mut rev: Revision,
805 ) -> Result<bool, RevlogError> {
805 ) -> Result<bool, RevlogError> {
806 while rev.0 >= 0 {
806 while rev.0 >= 0 {
807 let entry = self.get_entry(rev).unwrap();
807 let entry = self.get_entry(rev).unwrap();
808 let mut base = entry.base_revision_or_base_of_delta_chain().0;
808 let mut base = entry.base_revision_or_base_of_delta_chain().0;
809 if base == rev.0 {
809 if base == rev.0 {
810 base = NULL_REVISION.0;
810 base = NULL_REVISION.0;
811 }
811 }
812 if base == NULL_REVISION.0 {
812 if base == NULL_REVISION.0 {
813 return Ok(true);
813 return Ok(true);
814 }
814 }
815 let [mut p1, mut p2] = self
815 let [mut p1, mut p2] = self
816 .parents(rev)
816 .parents(rev)
817 .map_err(|_| RevlogError::InvalidRevision)?;
817 .map_err(|_| RevlogError::InvalidRevision)?;
818 while let Some(p1_entry) = self.get_entry(p1) {
818 while let Some(p1_entry) = self.get_entry(p1) {
819 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
819 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
820 break;
820 break;
821 }
821 }
822 let parent_base =
822 let parent_base =
823 p1_entry.base_revision_or_base_of_delta_chain();
823 p1_entry.base_revision_or_base_of_delta_chain();
824 if parent_base.0 == p1.0 {
824 if parent_base.0 == p1.0 {
825 break;
825 break;
826 }
826 }
827 p1 = self
827 p1 = self
828 .check_revision(parent_base)
828 .check_revision(parent_base)
829 .ok_or(RevlogError::InvalidRevision)?;
829 .ok_or(RevlogError::InvalidRevision)?;
830 }
830 }
831 while let Some(p2_entry) = self.get_entry(p2) {
831 while let Some(p2_entry) = self.get_entry(p2) {
832 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
832 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
833 break;
833 break;
834 }
834 }
835 let parent_base =
835 let parent_base =
836 p2_entry.base_revision_or_base_of_delta_chain();
836 p2_entry.base_revision_or_base_of_delta_chain();
837 if parent_base.0 == p2.0 {
837 if parent_base.0 == p2.0 {
838 break;
838 break;
839 }
839 }
840 p2 = self
840 p2 = self
841 .check_revision(parent_base)
841 .check_revision(parent_base)
842 .ok_or(RevlogError::InvalidRevision)?;
842 .ok_or(RevlogError::InvalidRevision)?;
843 }
843 }
844 if base == p1.0 || base == p2.0 {
844 if base == p1.0 || base == p2.0 {
845 return Ok(false);
845 return Ok(false);
846 }
846 }
847 rev = self
847 rev = self
848 .check_revision(base.into())
848 .check_revision(base.into())
849 .ok_or(RevlogError::InvalidRevision)?;
849 .ok_or(RevlogError::InvalidRevision)?;
850 }
850 }
851 Ok(rev == NULL_REVISION)
851 Ok(rev == NULL_REVISION)
852 }
852 }
853
853
854 /// Return whether the given revision is a snapshot. Returns an error if
854 /// Return whether the given revision is a snapshot. Returns an error if
855 /// `rev` is not within a valid revision range.
855 /// `rev` is not within a valid revision range.
856 pub fn is_snapshot(
856 pub fn is_snapshot(
857 &self,
857 &self,
858 rev: UncheckedRevision,
858 rev: UncheckedRevision,
859 ) -> Result<bool, RevlogError> {
859 ) -> Result<bool, RevlogError> {
860 let rev = self
860 let rev = self
861 .check_revision(rev)
861 .check_revision(rev)
862 .ok_or_else(|| RevlogError::corrupted("test"))?;
862 .ok_or_else(|| RevlogError::corrupted("test"))?;
863 self.is_snapshot_unchecked(rev)
863 self.is_snapshot_unchecked(rev)
864 }
864 }
865
865
866 /// Slice revs to reduce the amount of unrelated data to be read from disk.
866 /// Slice revs to reduce the amount of unrelated data to be read from disk.
867 ///
867 ///
868 /// The index is sliced into groups that should be read in one time.
868 /// The index is sliced into groups that should be read in one time.
869 ///
869 ///
870 /// The initial chunk is sliced until the overall density
870 /// The initial chunk is sliced until the overall density
871 /// (payload/chunks-span ratio) is above `target_density`.
871 /// (payload/chunks-span ratio) is above `target_density`.
872 /// No gap smaller than `min_gap_size` is skipped.
872 /// No gap smaller than `min_gap_size` is skipped.
873 pub fn slice_chunk_to_density(
873 pub fn slice_chunk_to_density(
874 &self,
874 &self,
875 revs: &[Revision],
875 revs: &[Revision],
876 target_density: f64,
876 target_density: f64,
877 min_gap_size: usize,
877 min_gap_size: usize,
878 ) -> Vec<Vec<Revision>> {
878 ) -> Vec<Vec<Revision>> {
879 if revs.is_empty() {
879 if revs.is_empty() {
880 return vec![];
880 return vec![];
881 }
881 }
882 if revs.len() == 1 {
882 if revs.len() == 1 {
883 return vec![revs.to_owned()];
883 return vec![revs.to_owned()];
884 }
884 }
885 let delta_chain_span = self.segment_span(revs);
885 let delta_chain_span = self.segment_span(revs);
886 if delta_chain_span < min_gap_size {
886 if delta_chain_span < min_gap_size {
887 return vec![revs.to_owned()];
887 return vec![revs.to_owned()];
888 }
888 }
889 let entries: Vec<_> = revs
889 let entries: Vec<_> = revs
890 .iter()
890 .iter()
891 .map(|r| {
891 .map(|r| {
892 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
892 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
893 })
893 })
894 .collect();
894 .collect();
895
895
896 let mut read_data = delta_chain_span;
896 let mut read_data = delta_chain_span;
897 let chain_payload: u32 =
897 let chain_payload: u32 =
898 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
898 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
899 let mut density = if delta_chain_span > 0 {
899 let mut density = if delta_chain_span > 0 {
900 chain_payload as f64 / delta_chain_span as f64
900 chain_payload as f64 / delta_chain_span as f64
901 } else {
901 } else {
902 1.0
902 1.0
903 };
903 };
904
904
905 if density >= target_density {
905 if density >= target_density {
906 return vec![revs.to_owned()];
906 return vec![revs.to_owned()];
907 }
907 }
908
908
909 // Store the gaps in a heap to have them sorted by decreasing size
909 // Store the gaps in a heap to have them sorted by decreasing size
910 let mut gaps = Vec::new();
910 let mut gaps = Vec::new();
911 let mut previous_end = None;
911 let mut previous_end = None;
912
912
913 for (i, (_rev, entry)) in entries.iter().enumerate() {
913 for (i, (_rev, entry)) in entries.iter().enumerate() {
914 let start = entry.c_start() as usize;
914 let start = entry.c_start() as usize;
915 let length = entry.compressed_len();
915 let length = entry.compressed_len();
916
916
917 // Skip empty revisions to form larger holes
917 // Skip empty revisions to form larger holes
918 if length == 0 {
918 if length == 0 {
919 continue;
919 continue;
920 }
920 }
921
921
922 if let Some(end) = previous_end {
922 if let Some(end) = previous_end {
923 let gap_size = start - end;
923 let gap_size = start - end;
924 // Only consider holes that are large enough
924 // Only consider holes that are large enough
925 if gap_size > min_gap_size {
925 if gap_size > min_gap_size {
926 gaps.push((gap_size, i));
926 gaps.push((gap_size, i));
927 }
927 }
928 }
928 }
929 previous_end = Some(start + length as usize);
929 previous_end = Some(start + length as usize);
930 }
930 }
931 if gaps.is_empty() {
931 if gaps.is_empty() {
932 return vec![revs.to_owned()];
932 return vec![revs.to_owned()];
933 }
933 }
934 // sort the gaps to pop them from largest to small
934 // sort the gaps to pop them from largest to small
935 gaps.sort_unstable();
935 gaps.sort_unstable();
936
936
937 // Collect the indices of the largest holes until
937 // Collect the indices of the largest holes until
938 // the density is acceptable
938 // the density is acceptable
939 let mut selected = vec![];
939 let mut selected = vec![];
940 while let Some((gap_size, gap_id)) = gaps.pop() {
940 while let Some((gap_size, gap_id)) = gaps.pop() {
941 if density >= target_density {
941 if density >= target_density {
942 break;
942 break;
943 }
943 }
944 selected.push(gap_id);
944 selected.push(gap_id);
945
945
946 // The gap sizes are stored as negatives to be sorted decreasingly
946 // The gap sizes are stored as negatives to be sorted decreasingly
947 // by the heap
947 // by the heap
948 read_data -= gap_size;
948 read_data -= gap_size;
949 density = if read_data > 0 {
949 density = if read_data > 0 {
950 chain_payload as f64 / read_data as f64
950 chain_payload as f64 / read_data as f64
951 } else {
951 } else {
952 1.0
952 1.0
953 };
953 };
954 if density >= target_density {
954 if density >= target_density {
955 break;
955 break;
956 }
956 }
957 }
957 }
958 selected.sort_unstable();
958 selected.sort_unstable();
959 selected.push(revs.len());
959 selected.push(revs.len());
960
960
961 // Cut the revs at collected indices
961 // Cut the revs at collected indices
962 let mut previous_idx = 0;
962 let mut previous_idx = 0;
963 let mut chunks = vec![];
963 let mut chunks = vec![];
964 for idx in selected {
964 for idx in selected {
965 let chunk = self.trim_chunk(&entries, previous_idx, idx);
965 let chunk = self.trim_chunk(&entries, previous_idx, idx);
966 if !chunk.is_empty() {
966 if !chunk.is_empty() {
967 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
967 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
968 }
968 }
969 previous_idx = idx;
969 previous_idx = idx;
970 }
970 }
971 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
971 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
972 if !chunk.is_empty() {
972 if !chunk.is_empty() {
973 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
973 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
974 }
974 }
975
975
976 chunks
976 chunks
977 }
977 }
978
978
979 /// Get the byte span of a segment of sorted revisions.
979 /// Get the byte span of a segment of sorted revisions.
980 ///
980 ///
981 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
981 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
982 /// the `revs` segment.
982 /// the `revs` segment.
983 ///
983 ///
984 /// panics:
984 /// panics:
985 /// - if `revs` is empty or only made of `NULL_REVISION`
985 /// - if `revs` is empty or only made of `NULL_REVISION`
986 /// - if cannot retrieve entry for the last or first not null element of
986 /// - if cannot retrieve entry for the last or first not null element of
987 /// `revs`.
987 /// `revs`.
988 fn segment_span(&self, revs: &[Revision]) -> usize {
988 fn segment_span(&self, revs: &[Revision]) -> usize {
989 if revs.is_empty() {
989 if revs.is_empty() {
990 return 0;
990 return 0;
991 }
991 }
992 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
992 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
993 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
993 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
994 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
994 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
995 let start = if first_rev.0 == 0 {
995 let start = if first_rev.0 == 0 {
996 0
996 0
997 } else {
997 } else {
998 self.get_entry(*first_rev).unwrap().c_start()
998 self.get_entry(*first_rev).unwrap().c_start()
999 };
999 };
1000 (end - start) as usize
1000 (end - start) as usize
1001 }
1001 }
1002
1002
1003 /// Returns `&revs[startidx..endidx]` without empty trailing revs
1003 /// Returns `&revs[startidx..endidx]` without empty trailing revs
1004 fn trim_chunk<'a>(
1004 fn trim_chunk<'a>(
1005 &'a self,
1005 &'a self,
1006 revs: &'a [(Revision, IndexEntry)],
1006 revs: &'a [(Revision, IndexEntry)],
1007 start: usize,
1007 start: usize,
1008 mut end: usize,
1008 mut end: usize,
1009 ) -> &'a [(Revision, IndexEntry)] {
1009 ) -> &'a [(Revision, IndexEntry)] {
1010 // Trim empty revs at the end, except the very first rev of a chain
1010 // Trim empty revs at the end, except the very first rev of a chain
1011 let last_rev = revs[end - 1].0;
1011 let last_rev = revs[end - 1].0;
1012 if last_rev.0 < self.len() as BaseRevision {
1012 if last_rev.0 < self.len() as BaseRevision {
1013 while end > 1
1013 while end > 1
1014 && end > start
1014 && end > start
1015 && revs[end - 1].1.compressed_len() == 0
1015 && revs[end - 1].1.compressed_len() == 0
1016 {
1016 {
1017 end -= 1
1017 end -= 1
1018 }
1018 }
1019 }
1019 }
1020 &revs[start..end]
1020 &revs[start..end]
1021 }
1021 }
1022
1022
1023 /// Computes the set of revisions for each non-public phase from `roots`,
1023 /// Computes the set of revisions for each non-public phase from `roots`,
1024 /// which are the last known roots for each non-public phase.
1024 /// which are the last known roots for each non-public phase.
1025 pub fn compute_phases_map_sets(
1025 pub fn compute_phases_map_sets(
1026 &self,
1026 &self,
1027 roots: HashMap<Phase, Vec<Revision>>,
1027 roots: HashMap<Phase, Vec<Revision>>,
1028 ) -> Result<(usize, RootsPerPhase), GraphError> {
1028 ) -> Result<(usize, RootsPerPhase), GraphError> {
1029 let mut phases = HashMap::new();
1029 let mut phases = vec![Phase::Public; self.len()];
1030 let mut min_phase_rev = NULL_REVISION;
1030 let mut min_phase_rev = NULL_REVISION;
1031
1031
1032 for phase in Phase::non_public_phases() {
1032 for phase in Phase::non_public_phases() {
1033 if let Some(phase_roots) = roots.get(phase) {
1033 if let Some(phase_roots) = roots.get(phase) {
1034 let min_rev =
1034 let min_rev =
1035 self.add_roots_get_min(phase_roots, &mut phases, *phase);
1035 self.add_roots_get_min(phase_roots, &mut phases, *phase);
1036 if min_rev != NULL_REVISION
1036 if min_rev != NULL_REVISION
1037 && (min_phase_rev == NULL_REVISION
1037 && (min_phase_rev == NULL_REVISION
1038 || min_rev < min_phase_rev)
1038 || min_rev < min_phase_rev)
1039 {
1039 {
1040 min_phase_rev = min_rev;
1040 min_phase_rev = min_rev;
1041 }
1041 }
1042 } else {
1042 } else {
1043 continue;
1043 continue;
1044 };
1044 };
1045 }
1045 }
1046 let mut phase_sets: RootsPerPhase = Default::default();
1046 let mut phase_sets: RootsPerPhase = Default::default();
1047
1047
1048 if min_phase_rev == NULL_REVISION {
1048 if min_phase_rev == NULL_REVISION {
1049 min_phase_rev = Revision(self.len() as BaseRevision);
1049 min_phase_rev = Revision(self.len() as BaseRevision);
1050 }
1050 }
1051
1051
1052 for rev in min_phase_rev.0..self.len() as BaseRevision {
1052 for rev in min_phase_rev.0..self.len() as BaseRevision {
1053 let rev = Revision(rev);
1053 let rev = Revision(rev);
1054 let [p1, p2] = self.parents(rev)?;
1054 let [p1, p2] = self.parents(rev)?;
1055
1055
1056 const DEFAULT_PHASE: &Phase = &Phase::Public;
1056 if p1.0 >= 0 && phases[p1.0 as usize] > phases[rev.0 as usize] {
1057 if p1.0 >= 0
1057 phases[rev.0 as usize] = phases[p1.0 as usize];
1058 && phases.get(&p1).unwrap_or(DEFAULT_PHASE)
1058 }
1059 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
1059 if p2.0 >= 0 && phases[p2.0 as usize] > phases[rev.0 as usize] {
1060 {
1060 phases[rev.0 as usize] = phases[p2.0 as usize];
1061 phases.insert(rev, phases[&p1]);
1062 }
1061 }
1063 if p2.0 >= 0
1062 let set = match phases[rev.0 as usize] {
1064 && phases.get(&p2).unwrap_or(DEFAULT_PHASE)
1065 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
1066 {
1067 phases.insert(rev, phases[&p2]);
1068 }
1069 let set = match phases.get(&rev).unwrap_or(DEFAULT_PHASE) {
1070 Phase::Public => continue,
1063 Phase::Public => continue,
1071 phase => &mut phase_sets[*phase as usize - 1],
1064 phase => &mut phase_sets[phase as usize - 1],
1072 };
1065 };
1073 set.insert(rev);
1066 set.push(rev);
1074 }
1067 }
1075
1068
1076 Ok((self.len(), phase_sets))
1069 Ok((self.len(), phase_sets))
1077 }
1070 }
1078
1071
1079 fn add_roots_get_min(
1072 fn add_roots_get_min(
1080 &self,
1073 &self,
1081 phase_roots: &[Revision],
1074 phase_roots: &[Revision],
1082 phases: &mut HashMap<Revision, Phase>,
1075 phases: &mut [Phase],
1083 phase: Phase,
1076 phase: Phase,
1084 ) -> Revision {
1077 ) -> Revision {
1085 let mut min_rev = NULL_REVISION;
1078 let mut min_rev = NULL_REVISION;
1086
1079
1087 for root in phase_roots {
1080 for root in phase_roots {
1088 phases.insert(*root, phase);
1081 phases[root.0 as usize] = phase;
1089 if min_rev == NULL_REVISION || min_rev > *root {
1082 if min_rev == NULL_REVISION || min_rev > *root {
1090 min_rev = *root;
1083 min_rev = *root;
1091 }
1084 }
1092 }
1085 }
1093 min_rev
1086 min_rev
1094 }
1087 }
1095
1088
1096 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
1089 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
1097 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
1090 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
1098 ///
1091 ///
1099 /// `min_root` and `roots` are unchecked since they are just used as
1092 /// `min_root` and `roots` are unchecked since they are just used as
1100 /// a bound or for comparison and don't need to represent a valid revision.
1093 /// a bound or for comparison and don't need to represent a valid revision.
1101 /// In practice, the only invalid revision passed is the working directory
1094 /// In practice, the only invalid revision passed is the working directory
1102 /// revision ([`i32::MAX`]).
1095 /// revision ([`i32::MAX`]).
1103 pub fn reachable_roots(
1096 pub fn reachable_roots(
1104 &self,
1097 &self,
1105 min_root: UncheckedRevision,
1098 min_root: UncheckedRevision,
1106 mut heads: Vec<Revision>,
1099 mut heads: Vec<Revision>,
1107 roots: HashSet<UncheckedRevision>,
1100 roots: HashSet<UncheckedRevision>,
1108 include_path: bool,
1101 include_path: bool,
1109 ) -> Result<HashSet<Revision>, GraphError> {
1102 ) -> Result<HashSet<Revision>, GraphError> {
1110 if roots.is_empty() {
1103 if roots.is_empty() {
1111 return Ok(HashSet::new());
1104 return Ok(HashSet::new());
1112 }
1105 }
1113 let mut reachable = HashSet::new();
1106 let mut reachable = HashSet::new();
1114 let mut seen = HashMap::new();
1107 let mut seen = HashMap::new();
1115
1108
1116 while let Some(rev) = heads.pop() {
1109 while let Some(rev) = heads.pop() {
1117 if roots.contains(&rev.into()) {
1110 if roots.contains(&rev.into()) {
1118 reachable.insert(rev);
1111 reachable.insert(rev);
1119 if !include_path {
1112 if !include_path {
1120 continue;
1113 continue;
1121 }
1114 }
1122 }
1115 }
1123 let parents = self.parents(rev)?;
1116 let parents = self.parents(rev)?;
1124 seen.insert(rev, parents);
1117 seen.insert(rev, parents);
1125 for parent in parents {
1118 for parent in parents {
1126 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1119 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1127 heads.push(parent);
1120 heads.push(parent);
1128 }
1121 }
1129 }
1122 }
1130 }
1123 }
1131 if !include_path {
1124 if !include_path {
1132 return Ok(reachable);
1125 return Ok(reachable);
1133 }
1126 }
1134 let mut revs: Vec<_> = seen.keys().collect();
1127 let mut revs: Vec<_> = seen.keys().collect();
1135 revs.sort_unstable();
1128 revs.sort_unstable();
1136 for rev in revs {
1129 for rev in revs {
1137 for parent in seen[rev] {
1130 for parent in seen[rev] {
1138 if reachable.contains(&parent) {
1131 if reachable.contains(&parent) {
1139 reachable.insert(*rev);
1132 reachable.insert(*rev);
1140 }
1133 }
1141 }
1134 }
1142 }
1135 }
1143 Ok(reachable)
1136 Ok(reachable)
1144 }
1137 }
1145
1138
1146 /// Given a (possibly overlapping) set of revs, return all the
1139 /// Given a (possibly overlapping) set of revs, return all the
1147 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1140 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1148 pub fn common_ancestor_heads(
1141 pub fn common_ancestor_heads(
1149 &self,
1142 &self,
1150 revisions: &[Revision],
1143 revisions: &[Revision],
1151 ) -> Result<Vec<Revision>, GraphError> {
1144 ) -> Result<Vec<Revision>, GraphError> {
1152 // given that revisions is expected to be small, we find this shortcut
1145 // given that revisions is expected to be small, we find this shortcut
1153 // potentially acceptable, especially given that `hg-cpython` could
1146 // potentially acceptable, especially given that `hg-cpython` could
1154 // very much bypass this, constructing a vector of unique values from
1147 // very much bypass this, constructing a vector of unique values from
1155 // the onset.
1148 // the onset.
1156 let as_set: HashSet<Revision> = revisions.iter().copied().collect();
1149 let as_set: HashSet<Revision> = revisions.iter().copied().collect();
1157 // Besides deduplicating, the C version also implements the shortcut
1150 // Besides deduplicating, the C version also implements the shortcut
1158 // for `NULL_REVISION`:
1151 // for `NULL_REVISION`:
1159 if as_set.contains(&NULL_REVISION) {
1152 if as_set.contains(&NULL_REVISION) {
1160 return Ok(vec![]);
1153 return Ok(vec![]);
1161 }
1154 }
1162
1155
1163 let revisions: Vec<Revision> = as_set.into_iter().collect();
1156 let revisions: Vec<Revision> = as_set.into_iter().collect();
1164
1157
1165 if revisions.len() < 8 {
1158 if revisions.len() < 8 {
1166 self.find_gca_candidates::<u8>(&revisions)
1159 self.find_gca_candidates::<u8>(&revisions)
1167 } else if revisions.len() < 64 {
1160 } else if revisions.len() < 64 {
1168 self.find_gca_candidates::<u64>(&revisions)
1161 self.find_gca_candidates::<u64>(&revisions)
1169 } else {
1162 } else {
1170 self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
1163 self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
1171 }
1164 }
1172 }
1165 }
1173
1166
1174 pub fn ancestors(
1167 pub fn ancestors(
1175 &self,
1168 &self,
1176 revisions: &[Revision],
1169 revisions: &[Revision],
1177 ) -> Result<Vec<Revision>, GraphError> {
1170 ) -> Result<Vec<Revision>, GraphError> {
1178 self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
1171 self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
1179 }
1172 }
1180
1173
1181 /// Given a disjoint set of revs, return all candidates for the
1174 /// Given a disjoint set of revs, return all candidates for the
1182 /// greatest common ancestor. In revset notation, this is the set
1175 /// greatest common ancestor. In revset notation, this is the set
1183 /// `heads(::a and ::b and ...)`
1176 /// `heads(::a and ::b and ...)`
1184 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1177 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1185 &self,
1178 &self,
1186 revs: &[Revision],
1179 revs: &[Revision],
1187 ) -> Result<Vec<Revision>, GraphError> {
1180 ) -> Result<Vec<Revision>, GraphError> {
1188 if revs.is_empty() {
1181 if revs.is_empty() {
1189 return Ok(vec![]);
1182 return Ok(vec![]);
1190 }
1183 }
1191 let revcount = revs.len();
1184 let revcount = revs.len();
1192 let mut candidates = vec![];
1185 let mut candidates = vec![];
1193 let max_rev = revs.iter().max().unwrap();
1186 let max_rev = revs.iter().max().unwrap();
1194
1187
1195 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1188 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1196
1189
1197 for (idx, rev) in revs.iter().enumerate() {
1190 for (idx, rev) in revs.iter().enumerate() {
1198 seen[rev.0 as usize].add(idx);
1191 seen[rev.0 as usize].add(idx);
1199 }
1192 }
1200 let mut current_rev = *max_rev;
1193 let mut current_rev = *max_rev;
1201 // Number of revisions whose inspection in the main loop
1194 // Number of revisions whose inspection in the main loop
1202 // will give a result or trigger inspection of other revisions
1195 // will give a result or trigger inspection of other revisions
1203 let mut interesting = revcount;
1196 let mut interesting = revcount;
1204
1197
1205 // The algorithm works on a vector of bit sets, indexed by revision
1198 // The algorithm works on a vector of bit sets, indexed by revision
1206 // numbers and iterated on reverse order.
1199 // numbers and iterated on reverse order.
1207 // An entry in this vector is poisoned if and only if the corresponding
1200 // An entry in this vector is poisoned if and only if the corresponding
1208 // revision is a common, yet not maximal ancestor.
1201 // revision is a common, yet not maximal ancestor.
1209
1202
1210 // The principle of the algorithm is as follows:
1203 // The principle of the algorithm is as follows:
1211 // For a revision `r`, when entering the loop, `seen[r]` is either
1204 // For a revision `r`, when entering the loop, `seen[r]` is either
1212 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1205 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1213 // In this sub set is full, then `r` is a solution and its parents
1206 // In this sub set is full, then `r` is a solution and its parents
1214 // have to be poisoned.
1207 // have to be poisoned.
1215 //
1208 //
1216 // At each iteration, the bit sets of the parents are updated by
1209 // At each iteration, the bit sets of the parents are updated by
1217 // union with `seen[r]`.
1210 // union with `seen[r]`.
1218 // As we walk the index from the end, we are sure we have encountered
1211 // As we walk the index from the end, we are sure we have encountered
1219 // all children of `r` before `r`, hence we know that `seen[r]` is
1212 // all children of `r` before `r`, hence we know that `seen[r]` is
1220 // fully computed.
1213 // fully computed.
1221 //
1214 //
1222 // On top of that there are several optimizations that make reading
1215 // On top of that there are several optimizations that make reading
1223 // less obvious than the comment above:
1216 // less obvious than the comment above:
1224 // - The `interesting` counter allows to break early
1217 // - The `interesting` counter allows to break early
1225 // - The loop starts from `max(revs)`
1218 // - The loop starts from `max(revs)`
1226 // - Early return in case it is detected that one of the incoming revs
1219 // - Early return in case it is detected that one of the incoming revs
1227 // is a common ancestor of all of them.
1220 // is a common ancestor of all of them.
1228 while current_rev.0 >= 0 && interesting > 0 {
1221 while current_rev.0 >= 0 && interesting > 0 {
1229 let current_seen = seen[current_rev.0 as usize].clone();
1222 let current_seen = seen[current_rev.0 as usize].clone();
1230
1223
1231 if current_seen.is_empty() {
1224 if current_seen.is_empty() {
1232 current_rev = Revision(current_rev.0 - 1);
1225 current_rev = Revision(current_rev.0 - 1);
1233 continue;
1226 continue;
1234 }
1227 }
1235 let mut poison = current_seen.is_poisoned();
1228 let mut poison = current_seen.is_poisoned();
1236 if !poison {
1229 if !poison {
1237 interesting -= 1;
1230 interesting -= 1;
1238 if current_seen.is_full_range(revcount) {
1231 if current_seen.is_full_range(revcount) {
1239 candidates.push(current_rev);
1232 candidates.push(current_rev);
1240 poison = true;
1233 poison = true;
1241
1234
1242 // Being a common ancestor, if `current_rev` is among
1235 // Being a common ancestor, if `current_rev` is among
1243 // the input revisions, it is *the* answer.
1236 // the input revisions, it is *the* answer.
1244 for rev in revs {
1237 for rev in revs {
1245 if *rev == current_rev {
1238 if *rev == current_rev {
1246 return Ok(candidates);
1239 return Ok(candidates);
1247 }
1240 }
1248 }
1241 }
1249 }
1242 }
1250 }
1243 }
1251 for parent in self.parents(current_rev)? {
1244 for parent in self.parents(current_rev)? {
1252 if parent == NULL_REVISION {
1245 if parent == NULL_REVISION {
1253 continue;
1246 continue;
1254 }
1247 }
1255 let parent_seen = &mut seen[parent.0 as usize];
1248 let parent_seen = &mut seen[parent.0 as usize];
1256 if poison {
1249 if poison {
1257 // this block is logically equivalent to poisoning parent
1250 // this block is logically equivalent to poisoning parent
1258 // and counting it as non interesting if it
1251 // and counting it as non interesting if it
1259 // has been seen before (hence counted then as interesting)
1252 // has been seen before (hence counted then as interesting)
1260 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1253 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1261 interesting -= 1;
1254 interesting -= 1;
1262 }
1255 }
1263 parent_seen.poison();
1256 parent_seen.poison();
1264 } else {
1257 } else {
1265 if parent_seen.is_empty() {
1258 if parent_seen.is_empty() {
1266 interesting += 1;
1259 interesting += 1;
1267 }
1260 }
1268 parent_seen.union(&current_seen);
1261 parent_seen.union(&current_seen);
1269 }
1262 }
1270 }
1263 }
1271
1264
1272 current_rev = Revision(current_rev.0 - 1);
1265 current_rev = Revision(current_rev.0 - 1);
1273 }
1266 }
1274
1267
1275 Ok(candidates)
1268 Ok(candidates)
1276 }
1269 }
1277
1270
1278 /// Given a disjoint set of revs, return the subset with the longest path
1271 /// Given a disjoint set of revs, return the subset with the longest path
1279 /// to the root.
1272 /// to the root.
1280 fn find_deepest_revs(
1273 fn find_deepest_revs(
1281 &self,
1274 &self,
1282 revs: &[Revision],
1275 revs: &[Revision],
1283 ) -> Result<Vec<Revision>, GraphError> {
1276 ) -> Result<Vec<Revision>, GraphError> {
1284 // TODO replace this all with just comparing rank?
1277 // TODO replace this all with just comparing rank?
1285 // Also, the original implementations in C/Python are cryptic, not
1278 // Also, the original implementations in C/Python are cryptic, not
1286 // even sure we actually need this?
1279 // even sure we actually need this?
1287 if revs.len() <= 1 {
1280 if revs.len() <= 1 {
1288 return Ok(revs.to_owned());
1281 return Ok(revs.to_owned());
1289 }
1282 }
1290 let max_rev = revs.iter().max().unwrap().0;
1283 let max_rev = revs.iter().max().unwrap().0;
1291 let mut interesting = HashMap::new();
1284 let mut interesting = HashMap::new();
1292 let mut seen = vec![0; max_rev as usize + 1];
1285 let mut seen = vec![0; max_rev as usize + 1];
1293 let mut depth = vec![0; max_rev as usize + 1];
1286 let mut depth = vec![0; max_rev as usize + 1];
1294 let mut mapping = vec![];
1287 let mut mapping = vec![];
1295 let mut revs = revs.to_owned();
1288 let mut revs = revs.to_owned();
1296 revs.sort_unstable();
1289 revs.sort_unstable();
1297
1290
1298 for (idx, rev) in revs.iter().enumerate() {
1291 for (idx, rev) in revs.iter().enumerate() {
1299 depth[rev.0 as usize] = 1;
1292 depth[rev.0 as usize] = 1;
1300 let shift = 1 << idx;
1293 let shift = 1 << idx;
1301 seen[rev.0 as usize] = shift;
1294 seen[rev.0 as usize] = shift;
1302 interesting.insert(shift, 1);
1295 interesting.insert(shift, 1);
1303 mapping.push((shift, *rev));
1296 mapping.push((shift, *rev));
1304 }
1297 }
1305
1298
1306 let mut current_rev = Revision(max_rev);
1299 let mut current_rev = Revision(max_rev);
1307 while current_rev.0 >= 0 && interesting.len() > 1 {
1300 while current_rev.0 >= 0 && interesting.len() > 1 {
1308 let current_depth = depth[current_rev.0 as usize];
1301 let current_depth = depth[current_rev.0 as usize];
1309 if current_depth == 0 {
1302 if current_depth == 0 {
1310 current_rev = Revision(current_rev.0 - 1);
1303 current_rev = Revision(current_rev.0 - 1);
1311 continue;
1304 continue;
1312 }
1305 }
1313
1306
1314 let current_seen = seen[current_rev.0 as usize];
1307 let current_seen = seen[current_rev.0 as usize];
1315 for parent in self.parents(current_rev)? {
1308 for parent in self.parents(current_rev)? {
1316 if parent == NULL_REVISION {
1309 if parent == NULL_REVISION {
1317 continue;
1310 continue;
1318 }
1311 }
1319 let parent_seen = seen[parent.0 as usize];
1312 let parent_seen = seen[parent.0 as usize];
1320 let parent_depth = depth[parent.0 as usize];
1313 let parent_depth = depth[parent.0 as usize];
1321 if parent_depth <= current_depth {
1314 if parent_depth <= current_depth {
1322 depth[parent.0 as usize] = current_depth + 1;
1315 depth[parent.0 as usize] = current_depth + 1;
1323 if parent_seen != current_seen {
1316 if parent_seen != current_seen {
1324 *interesting.get_mut(&current_seen).unwrap() += 1;
1317 *interesting.get_mut(&current_seen).unwrap() += 1;
1325 seen[parent.0 as usize] = current_seen;
1318 seen[parent.0 as usize] = current_seen;
1326 if parent_seen != 0 {
1319 if parent_seen != 0 {
1327 let parent_interesting =
1320 let parent_interesting =
1328 interesting.get_mut(&parent_seen).unwrap();
1321 interesting.get_mut(&parent_seen).unwrap();
1329 *parent_interesting -= 1;
1322 *parent_interesting -= 1;
1330 if *parent_interesting == 0 {
1323 if *parent_interesting == 0 {
1331 interesting.remove(&parent_seen);
1324 interesting.remove(&parent_seen);
1332 }
1325 }
1333 }
1326 }
1334 }
1327 }
1335 } else if current_depth == parent_depth - 1 {
1328 } else if current_depth == parent_depth - 1 {
1336 let either_seen = parent_seen | current_seen;
1329 let either_seen = parent_seen | current_seen;
1337 if either_seen == parent_seen {
1330 if either_seen == parent_seen {
1338 continue;
1331 continue;
1339 }
1332 }
1340 seen[parent.0 as usize] = either_seen;
1333 seen[parent.0 as usize] = either_seen;
1341 interesting
1334 interesting
1342 .entry(either_seen)
1335 .entry(either_seen)
1343 .and_modify(|v| *v += 1)
1336 .and_modify(|v| *v += 1)
1344 .or_insert(1);
1337 .or_insert(1);
1345 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1338 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1346 if interesting[&parent_seen] == 0 {
1339 if interesting[&parent_seen] == 0 {
1347 interesting.remove(&parent_seen);
1340 interesting.remove(&parent_seen);
1348 }
1341 }
1349 }
1342 }
1350 }
1343 }
1351 *interesting.get_mut(&current_seen).unwrap() -= 1;
1344 *interesting.get_mut(&current_seen).unwrap() -= 1;
1352 if interesting[&current_seen] == 0 {
1345 if interesting[&current_seen] == 0 {
1353 interesting.remove(&current_seen);
1346 interesting.remove(&current_seen);
1354 }
1347 }
1355
1348
1356 current_rev = Revision(current_rev.0 - 1);
1349 current_rev = Revision(current_rev.0 - 1);
1357 }
1350 }
1358
1351
1359 if interesting.len() != 1 {
1352 if interesting.len() != 1 {
1360 return Ok(vec![]);
1353 return Ok(vec![]);
1361 }
1354 }
1362 let mask = interesting.keys().next().unwrap();
1355 let mask = interesting.keys().next().unwrap();
1363
1356
1364 Ok(mapping
1357 Ok(mapping
1365 .into_iter()
1358 .into_iter()
1366 .filter_map(|(shift, rev)| {
1359 .filter_map(|(shift, rev)| {
1367 if (mask & shift) != 0 {
1360 if (mask & shift) != 0 {
1368 return Some(rev);
1361 return Some(rev);
1369 }
1362 }
1370 None
1363 None
1371 })
1364 })
1372 .collect())
1365 .collect())
1373 }
1366 }
1374 }
1367 }
1375
1368
1376 /// The kind of functionality needed by find_gca_candidates
1369 /// The kind of functionality needed by find_gca_candidates
1377 ///
1370 ///
1378 /// This is a bit mask which can be declared to be "poisoned", which callers
1371 /// This is a bit mask which can be declared to be "poisoned", which callers
1379 /// interpret to break out of some loops.
1372 /// interpret to break out of some loops.
1380 ///
1373 ///
1381 /// The maximum capacity of the bit mask is up to the actual implementation
1374 /// The maximum capacity of the bit mask is up to the actual implementation
1382 trait PoisonableBitSet: Sized + PartialEq {
1375 trait PoisonableBitSet: Sized + PartialEq {
1383 /// Return a vector of exactly n elements, initialized to be empty.
1376 /// Return a vector of exactly n elements, initialized to be empty.
1384 ///
1377 ///
1385 /// Optimization can vastly depend on implementation. Those being `Copy`
1378 /// Optimization can vastly depend on implementation. Those being `Copy`
1386 /// and having constant capacity typically can have a very simple
1379 /// and having constant capacity typically can have a very simple
1387 /// implementation.
1380 /// implementation.
1388 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1381 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1389
1382
1390 /// The size of the bit mask in memory
1383 /// The size of the bit mask in memory
1391 fn size(&self) -> usize;
1384 fn size(&self) -> usize;
1392
1385
1393 /// The number of elements that can be represented in the set.
1386 /// The number of elements that can be represented in the set.
1394 ///
1387 ///
1395 /// Another way to put it is that it is the highest integer `C` such that
1388 /// Another way to put it is that it is the highest integer `C` such that
1396 /// the set is guaranteed to always be a subset of the integer range
1389 /// the set is guaranteed to always be a subset of the integer range
1397 /// `[0, C)`
1390 /// `[0, C)`
1398 fn capacity(&self) -> usize;
1391 fn capacity(&self) -> usize;
1399
1392
1400 /// Declare `n` to belong to the set
1393 /// Declare `n` to belong to the set
1401 fn add(&mut self, n: usize);
1394 fn add(&mut self, n: usize);
1402
1395
1403 /// Declare `n` not to belong to the set
1396 /// Declare `n` not to belong to the set
1404 fn discard(&mut self, n: usize);
1397 fn discard(&mut self, n: usize);
1405
1398
1406 /// Replace this bit set by its union with other
1399 /// Replace this bit set by its union with other
1407 fn union(&mut self, other: &Self);
1400 fn union(&mut self, other: &Self);
1408
1401
1409 /// Poison the bit set
1402 /// Poison the bit set
1410 ///
1403 ///
1411 /// Interpretation up to the caller
1404 /// Interpretation up to the caller
1412 fn poison(&mut self);
1405 fn poison(&mut self);
1413
1406
1414 /// Is the bit set poisoned?
1407 /// Is the bit set poisoned?
1415 ///
1408 ///
1416 /// Interpretation is up to the caller
1409 /// Interpretation is up to the caller
1417 fn is_poisoned(&self) -> bool;
1410 fn is_poisoned(&self) -> bool;
1418
1411
1419 /// Is the bit set empty?
1412 /// Is the bit set empty?
1420 fn is_empty(&self) -> bool;
1413 fn is_empty(&self) -> bool;
1421
1414
1422 /// return `true` if and only if the bit is the full range `[0, n)`
1415 /// return `true` if and only if the bit is the full range `[0, n)`
1423 /// of integers
1416 /// of integers
1424 fn is_full_range(&self, n: usize) -> bool;
1417 fn is_full_range(&self, n: usize) -> bool;
1425 }
1418 }
1426
1419
1427 const U64_POISON: u64 = 1 << 63;
1420 const U64_POISON: u64 = 1 << 63;
1428 const U8_POISON: u8 = 1 << 7;
1421 const U8_POISON: u8 = 1 << 7;
1429
1422
1430 impl PoisonableBitSet for u64 {
1423 impl PoisonableBitSet for u64 {
1431 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1424 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1432 vec![0u64; vec_len]
1425 vec![0u64; vec_len]
1433 }
1426 }
1434
1427
1435 fn size(&self) -> usize {
1428 fn size(&self) -> usize {
1436 8
1429 8
1437 }
1430 }
1438
1431
1439 fn capacity(&self) -> usize {
1432 fn capacity(&self) -> usize {
1440 63
1433 63
1441 }
1434 }
1442
1435
1443 fn add(&mut self, n: usize) {
1436 fn add(&mut self, n: usize) {
1444 (*self) |= 1u64 << n;
1437 (*self) |= 1u64 << n;
1445 }
1438 }
1446
1439
1447 fn discard(&mut self, n: usize) {
1440 fn discard(&mut self, n: usize) {
1448 (*self) &= u64::MAX - (1u64 << n);
1441 (*self) &= u64::MAX - (1u64 << n);
1449 }
1442 }
1450
1443
1451 fn union(&mut self, other: &Self) {
1444 fn union(&mut self, other: &Self) {
1452 if *self != *other {
1445 if *self != *other {
1453 (*self) |= *other;
1446 (*self) |= *other;
1454 }
1447 }
1455 }
1448 }
1456
1449
1457 fn is_full_range(&self, n: usize) -> bool {
1450 fn is_full_range(&self, n: usize) -> bool {
1458 *self + 1 == (1u64 << n)
1451 *self + 1 == (1u64 << n)
1459 }
1452 }
1460
1453
1461 fn is_empty(&self) -> bool {
1454 fn is_empty(&self) -> bool {
1462 *self == 0
1455 *self == 0
1463 }
1456 }
1464
1457
1465 fn poison(&mut self) {
1458 fn poison(&mut self) {
1466 *self = U64_POISON;
1459 *self = U64_POISON;
1467 }
1460 }
1468
1461
1469 fn is_poisoned(&self) -> bool {
1462 fn is_poisoned(&self) -> bool {
1470 // equality comparison would be tempting but would not resist
1463 // equality comparison would be tempting but would not resist
1471 // operations after poisoning (even if these should be bogus).
1464 // operations after poisoning (even if these should be bogus).
1472 *self >= U64_POISON
1465 *self >= U64_POISON
1473 }
1466 }
1474 }
1467 }
1475
1468
1476 impl PoisonableBitSet for u8 {
1469 impl PoisonableBitSet for u8 {
1477 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1470 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1478 vec![0; vec_len]
1471 vec![0; vec_len]
1479 }
1472 }
1480
1473
1481 fn size(&self) -> usize {
1474 fn size(&self) -> usize {
1482 1
1475 1
1483 }
1476 }
1484
1477
1485 fn capacity(&self) -> usize {
1478 fn capacity(&self) -> usize {
1486 7
1479 7
1487 }
1480 }
1488
1481
1489 fn add(&mut self, n: usize) {
1482 fn add(&mut self, n: usize) {
1490 (*self) |= 1 << n;
1483 (*self) |= 1 << n;
1491 }
1484 }
1492
1485
1493 fn discard(&mut self, n: usize) {
1486 fn discard(&mut self, n: usize) {
1494 (*self) &= u8::MAX - (1 << n);
1487 (*self) &= u8::MAX - (1 << n);
1495 }
1488 }
1496
1489
1497 fn union(&mut self, other: &Self) {
1490 fn union(&mut self, other: &Self) {
1498 if *self != *other {
1491 if *self != *other {
1499 (*self) |= *other;
1492 (*self) |= *other;
1500 }
1493 }
1501 }
1494 }
1502
1495
1503 fn is_full_range(&self, n: usize) -> bool {
1496 fn is_full_range(&self, n: usize) -> bool {
1504 *self + 1 == (1 << n)
1497 *self + 1 == (1 << n)
1505 }
1498 }
1506
1499
1507 fn is_empty(&self) -> bool {
1500 fn is_empty(&self) -> bool {
1508 *self == 0
1501 *self == 0
1509 }
1502 }
1510
1503
1511 fn poison(&mut self) {
1504 fn poison(&mut self) {
1512 *self = U8_POISON;
1505 *self = U8_POISON;
1513 }
1506 }
1514
1507
1515 fn is_poisoned(&self) -> bool {
1508 fn is_poisoned(&self) -> bool {
1516 // equality comparison would be tempting but would not resist
1509 // equality comparison would be tempting but would not resist
1517 // operations after poisoning (even if these should be bogus).
1510 // operations after poisoning (even if these should be bogus).
1518 *self >= U8_POISON
1511 *self >= U8_POISON
1519 }
1512 }
1520 }
1513 }
1521
1514
1522 /// A poisonable bit set whose capacity is not known at compile time but
1515 /// A poisonable bit set whose capacity is not known at compile time but
1523 /// is constant after initial construction
1516 /// is constant after initial construction
1524 ///
1517 ///
1525 /// This can be way further optimized if performance assessments (speed
1518 /// This can be way further optimized if performance assessments (speed
1526 /// and/or RAM) require it.
1519 /// and/or RAM) require it.
1527 /// As far as RAM is concerned, for large vectors of these, the main problem
1520 /// As far as RAM is concerned, for large vectors of these, the main problem
1528 /// would be the repetition of set_size in each item. We would need a trait
1521 /// would be the repetition of set_size in each item. We would need a trait
1529 /// to abstract over the idea of a vector of such bit sets to do better.
1522 /// to abstract over the idea of a vector of such bit sets to do better.
1530 #[derive(Clone, PartialEq)]
1523 #[derive(Clone, PartialEq)]
1531 struct NonStaticPoisonableBitSet {
1524 struct NonStaticPoisonableBitSet {
1532 set_size: usize,
1525 set_size: usize,
1533 bit_set: Vec<u64>,
1526 bit_set: Vec<u64>,
1534 }
1527 }
1535
1528
1536 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1529 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1537 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1530 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1538 1 + (set_size + 1) / 64
1531 1 + (set_size + 1) / 64
1539 }
1532 }
1540
1533
1541 impl NonStaticPoisonableBitSet {
1534 impl NonStaticPoisonableBitSet {
1542 /// The index of the sub-bit set for the given n, and the index inside
1535 /// The index of the sub-bit set for the given n, and the index inside
1543 /// the latter
1536 /// the latter
1544 fn index(&self, n: usize) -> (usize, usize) {
1537 fn index(&self, n: usize) -> (usize, usize) {
1545 (n / 64, n % 64)
1538 (n / 64, n % 64)
1546 }
1539 }
1547 }
1540 }
1548
1541
1549 /// Mock implementation to ensure that the trait makes sense
1542 /// Mock implementation to ensure that the trait makes sense
1550 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1543 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1551 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1544 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1552 let tmpl = Self {
1545 let tmpl = Self {
1553 set_size,
1546 set_size,
1554 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1547 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1555 };
1548 };
1556 vec![tmpl; vec_len]
1549 vec![tmpl; vec_len]
1557 }
1550 }
1558
1551
1559 fn size(&self) -> usize {
1552 fn size(&self) -> usize {
1560 8 + self.bit_set.len() * 8
1553 8 + self.bit_set.len() * 8
1561 }
1554 }
1562
1555
1563 fn capacity(&self) -> usize {
1556 fn capacity(&self) -> usize {
1564 self.set_size
1557 self.set_size
1565 }
1558 }
1566
1559
1567 fn add(&mut self, n: usize) {
1560 fn add(&mut self, n: usize) {
1568 let (sub_bs, bit_pos) = self.index(n);
1561 let (sub_bs, bit_pos) = self.index(n);
1569 self.bit_set[sub_bs] |= 1 << bit_pos
1562 self.bit_set[sub_bs] |= 1 << bit_pos
1570 }
1563 }
1571
1564
1572 fn discard(&mut self, n: usize) {
1565 fn discard(&mut self, n: usize) {
1573 let (sub_bs, bit_pos) = self.index(n);
1566 let (sub_bs, bit_pos) = self.index(n);
1574 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1567 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1575 }
1568 }
1576
1569
1577 fn union(&mut self, other: &Self) {
1570 fn union(&mut self, other: &Self) {
1578 assert!(
1571 assert!(
1579 self.set_size == other.set_size,
1572 self.set_size == other.set_size,
1580 "Binary operations on bit sets can only be done on same size"
1573 "Binary operations on bit sets can only be done on same size"
1581 );
1574 );
1582 for i in 0..self.bit_set.len() - 1 {
1575 for i in 0..self.bit_set.len() - 1 {
1583 self.bit_set[i] |= other.bit_set[i]
1576 self.bit_set[i] |= other.bit_set[i]
1584 }
1577 }
1585 }
1578 }
1586
1579
1587 fn is_full_range(&self, n: usize) -> bool {
1580 fn is_full_range(&self, n: usize) -> bool {
1588 let (sub_bs, bit_pos) = self.index(n);
1581 let (sub_bs, bit_pos) = self.index(n);
1589 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1582 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1590 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1583 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1591 }
1584 }
1592
1585
1593 fn is_empty(&self) -> bool {
1586 fn is_empty(&self) -> bool {
1594 self.bit_set.iter().all(|bs| *bs == 0u64)
1587 self.bit_set.iter().all(|bs| *bs == 0u64)
1595 }
1588 }
1596
1589
1597 fn poison(&mut self) {
1590 fn poison(&mut self) {
1598 let (sub_bs, bit_pos) = self.index(self.set_size);
1591 let (sub_bs, bit_pos) = self.index(self.set_size);
1599 self.bit_set[sub_bs] = 1 << bit_pos;
1592 self.bit_set[sub_bs] = 1 << bit_pos;
1600 }
1593 }
1601
1594
1602 fn is_poisoned(&self) -> bool {
1595 fn is_poisoned(&self) -> bool {
1603 let (sub_bs, bit_pos) = self.index(self.set_size);
1596 let (sub_bs, bit_pos) = self.index(self.set_size);
1604 self.bit_set[sub_bs] >= 1 << bit_pos
1597 self.bit_set[sub_bs] >= 1 << bit_pos
1605 }
1598 }
1606 }
1599 }
1607
1600
1608 /// Set of roots of all non-public phases
1601 /// Set of roots of all non-public phases
1609 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
1602 pub type RootsPerPhase = [Vec<Revision>; Phase::non_public_phases().len()];
1610
1603
1611 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1604 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1612 pub enum Phase {
1605 pub enum Phase {
1613 Public = 0,
1606 Public = 0,
1614 Draft = 1,
1607 Draft = 1,
1615 Secret = 2,
1608 Secret = 2,
1616 Archived = 3,
1609 Archived = 3,
1617 Internal = 4,
1610 Internal = 4,
1618 }
1611 }
1619
1612
1620 impl TryFrom<usize> for Phase {
1613 impl TryFrom<usize> for Phase {
1621 type Error = RevlogError;
1614 type Error = RevlogError;
1622
1615
1623 fn try_from(value: usize) -> Result<Self, Self::Error> {
1616 fn try_from(value: usize) -> Result<Self, Self::Error> {
1624 Ok(match value {
1617 Ok(match value {
1625 0 => Self::Public,
1618 0 => Self::Public,
1626 1 => Self::Draft,
1619 1 => Self::Draft,
1627 2 => Self::Secret,
1620 2 => Self::Secret,
1628 32 => Self::Archived,
1621 32 => Self::Archived,
1629 96 => Self::Internal,
1622 96 => Self::Internal,
1630 v => {
1623 v => {
1631 return Err(RevlogError::corrupted(format!(
1624 return Err(RevlogError::corrupted(format!(
1632 "invalid phase value {}",
1625 "invalid phase value {}",
1633 v
1626 v
1634 )))
1627 )))
1635 }
1628 }
1636 })
1629 })
1637 }
1630 }
1638 }
1631 }
1639
1632
1640 impl Phase {
1633 impl Phase {
1641 pub const fn all_phases() -> &'static [Self] {
1634 pub const fn all_phases() -> &'static [Self] {
1642 &[
1635 &[
1643 Self::Public,
1636 Self::Public,
1644 Self::Draft,
1637 Self::Draft,
1645 Self::Secret,
1638 Self::Secret,
1646 Self::Archived,
1639 Self::Archived,
1647 Self::Internal,
1640 Self::Internal,
1648 ]
1641 ]
1649 }
1642 }
1650 pub const fn non_public_phases() -> &'static [Self] {
1643 pub const fn non_public_phases() -> &'static [Self] {
1651 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1644 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1652 }
1645 }
1653 }
1646 }
1654
1647
1655 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1648 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1656 let mut offset: usize = 0;
1649 let mut offset: usize = 0;
1657 let mut offsets = Vec::new();
1650 let mut offsets = Vec::new();
1658
1651
1659 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1652 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1660 offsets.push(offset);
1653 offsets.push(offset);
1661 let end = offset + INDEX_ENTRY_SIZE;
1654 let end = offset + INDEX_ENTRY_SIZE;
1662 let entry = IndexEntry {
1655 let entry = IndexEntry {
1663 bytes: &bytes[offset..end],
1656 bytes: &bytes[offset..end],
1664 offset_override: None,
1657 offset_override: None,
1665 };
1658 };
1666
1659
1667 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1660 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1668 }
1661 }
1669 (offset, offsets)
1662 (offset, offsets)
1670 }
1663 }
1671
1664
1672 impl super::RevlogIndex for Index {
1665 impl super::RevlogIndex for Index {
1673 fn len(&self) -> usize {
1666 fn len(&self) -> usize {
1674 self.len()
1667 self.len()
1675 }
1668 }
1676
1669
1677 fn node(&self, rev: Revision) -> Option<&Node> {
1670 fn node(&self, rev: Revision) -> Option<&Node> {
1678 if rev == NULL_REVISION {
1671 if rev == NULL_REVISION {
1679 return Some(&NULL_NODE);
1672 return Some(&NULL_NODE);
1680 }
1673 }
1681 self.get_entry(rev).map(|entry| entry.hash())
1674 self.get_entry(rev).map(|entry| entry.hash())
1682 }
1675 }
1683 }
1676 }
1684
1677
1685 #[derive(Debug)]
1678 #[derive(Debug)]
1686 pub struct IndexEntry<'a> {
1679 pub struct IndexEntry<'a> {
1687 bytes: &'a [u8],
1680 bytes: &'a [u8],
1688 /// Allows to override the offset value of the entry.
1681 /// Allows to override the offset value of the entry.
1689 ///
1682 ///
1690 /// For interleaved index and data, the offset stored in the index
1683 /// For interleaved index and data, the offset stored in the index
1691 /// corresponds to the separated data offset.
1684 /// corresponds to the separated data offset.
1692 /// It has to be overridden with the actual offset in the interleaved
1685 /// It has to be overridden with the actual offset in the interleaved
1693 /// index which is just after the index block.
1686 /// index which is just after the index block.
1694 ///
1687 ///
1695 /// For separated index and data, the offset stored in the first index
1688 /// For separated index and data, the offset stored in the first index
1696 /// entry is mixed with the index headers.
1689 /// entry is mixed with the index headers.
1697 /// It has to be overridden with 0.
1690 /// It has to be overridden with 0.
1698 offset_override: Option<usize>,
1691 offset_override: Option<usize>,
1699 }
1692 }
1700
1693
1701 impl<'a> IndexEntry<'a> {
1694 impl<'a> IndexEntry<'a> {
1702 /// Return the offset of the data.
1695 /// Return the offset of the data.
1703 pub fn offset(&self) -> usize {
1696 pub fn offset(&self) -> usize {
1704 if let Some(offset_override) = self.offset_override {
1697 if let Some(offset_override) = self.offset_override {
1705 offset_override
1698 offset_override
1706 } else {
1699 } else {
1707 let mut bytes = [0; 8];
1700 let mut bytes = [0; 8];
1708 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1701 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1709 BigEndian::read_u64(&bytes[..]) as usize
1702 BigEndian::read_u64(&bytes[..]) as usize
1710 }
1703 }
1711 }
1704 }
1712 pub fn raw_offset(&self) -> u64 {
1705 pub fn raw_offset(&self) -> u64 {
1713 BigEndian::read_u64(&self.bytes[0..8])
1706 BigEndian::read_u64(&self.bytes[0..8])
1714 }
1707 }
1715
1708
1716 /// Same result (except potentially for rev 0) as C `index_get_start()`
1709 /// Same result (except potentially for rev 0) as C `index_get_start()`
1717 fn c_start(&self) -> u64 {
1710 fn c_start(&self) -> u64 {
1718 self.raw_offset() >> 16
1711 self.raw_offset() >> 16
1719 }
1712 }
1720
1713
1721 pub fn flags(&self) -> u16 {
1714 pub fn flags(&self) -> u16 {
1722 BigEndian::read_u16(&self.bytes[6..=7])
1715 BigEndian::read_u16(&self.bytes[6..=7])
1723 }
1716 }
1724
1717
1725 /// Return the compressed length of the data.
1718 /// Return the compressed length of the data.
1726 pub fn compressed_len(&self) -> u32 {
1719 pub fn compressed_len(&self) -> u32 {
1727 BigEndian::read_u32(&self.bytes[8..=11])
1720 BigEndian::read_u32(&self.bytes[8..=11])
1728 }
1721 }
1729
1722
1730 /// Return the uncompressed length of the data.
1723 /// Return the uncompressed length of the data.
1731 pub fn uncompressed_len(&self) -> i32 {
1724 pub fn uncompressed_len(&self) -> i32 {
1732 BigEndian::read_i32(&self.bytes[12..=15])
1725 BigEndian::read_i32(&self.bytes[12..=15])
1733 }
1726 }
1734
1727
1735 /// Return the revision upon which the data has been derived.
1728 /// Return the revision upon which the data has been derived.
1736 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1729 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1737 // TODO Maybe return an Option when base_revision == rev?
1730 // TODO Maybe return an Option when base_revision == rev?
1738 // Requires to add rev to IndexEntry
1731 // Requires to add rev to IndexEntry
1739
1732
1740 BigEndian::read_i32(&self.bytes[16..]).into()
1733 BigEndian::read_i32(&self.bytes[16..]).into()
1741 }
1734 }
1742
1735
1743 pub fn link_revision(&self) -> UncheckedRevision {
1736 pub fn link_revision(&self) -> UncheckedRevision {
1744 BigEndian::read_i32(&self.bytes[20..]).into()
1737 BigEndian::read_i32(&self.bytes[20..]).into()
1745 }
1738 }
1746
1739
1747 pub fn p1(&self) -> UncheckedRevision {
1740 pub fn p1(&self) -> UncheckedRevision {
1748 BigEndian::read_i32(&self.bytes[24..]).into()
1741 BigEndian::read_i32(&self.bytes[24..]).into()
1749 }
1742 }
1750
1743
1751 pub fn p2(&self) -> UncheckedRevision {
1744 pub fn p2(&self) -> UncheckedRevision {
1752 BigEndian::read_i32(&self.bytes[28..]).into()
1745 BigEndian::read_i32(&self.bytes[28..]).into()
1753 }
1746 }
1754
1747
1755 /// Return the hash of revision's full text.
1748 /// Return the hash of revision's full text.
1756 ///
1749 ///
1757 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1750 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1758 /// are used.
1751 /// are used.
1759 pub fn hash(&self) -> &'a Node {
1752 pub fn hash(&self) -> &'a Node {
1760 (&self.bytes[32..52]).try_into().unwrap()
1753 (&self.bytes[32..52]).try_into().unwrap()
1761 }
1754 }
1762
1755
1763 pub fn as_bytes(&self) -> &'a [u8] {
1756 pub fn as_bytes(&self) -> &'a [u8] {
1764 self.bytes
1757 self.bytes
1765 }
1758 }
1766 }
1759 }
1767
1760
1768 #[cfg(test)]
1761 #[cfg(test)]
1769 mod tests {
1762 mod tests {
1770 use super::*;
1763 use super::*;
1771 use crate::node::NULL_NODE;
1764 use crate::node::NULL_NODE;
1772
1765
1773 #[cfg(test)]
1766 #[cfg(test)]
1774 #[derive(Debug, Copy, Clone)]
1767 #[derive(Debug, Copy, Clone)]
1775 pub struct IndexEntryBuilder {
1768 pub struct IndexEntryBuilder {
1776 is_first: bool,
1769 is_first: bool,
1777 is_inline: bool,
1770 is_inline: bool,
1778 is_general_delta: bool,
1771 is_general_delta: bool,
1779 version: u16,
1772 version: u16,
1780 offset: usize,
1773 offset: usize,
1781 compressed_len: usize,
1774 compressed_len: usize,
1782 uncompressed_len: usize,
1775 uncompressed_len: usize,
1783 base_revision_or_base_of_delta_chain: Revision,
1776 base_revision_or_base_of_delta_chain: Revision,
1784 link_revision: Revision,
1777 link_revision: Revision,
1785 p1: Revision,
1778 p1: Revision,
1786 p2: Revision,
1779 p2: Revision,
1787 node: Node,
1780 node: Node,
1788 }
1781 }
1789
1782
1790 #[cfg(test)]
1783 #[cfg(test)]
1791 impl IndexEntryBuilder {
1784 impl IndexEntryBuilder {
1792 #[allow(clippy::new_without_default)]
1785 #[allow(clippy::new_without_default)]
1793 pub fn new() -> Self {
1786 pub fn new() -> Self {
1794 Self {
1787 Self {
1795 is_first: false,
1788 is_first: false,
1796 is_inline: false,
1789 is_inline: false,
1797 is_general_delta: true,
1790 is_general_delta: true,
1798 version: 1,
1791 version: 1,
1799 offset: 0,
1792 offset: 0,
1800 compressed_len: 0,
1793 compressed_len: 0,
1801 uncompressed_len: 0,
1794 uncompressed_len: 0,
1802 base_revision_or_base_of_delta_chain: Revision(0),
1795 base_revision_or_base_of_delta_chain: Revision(0),
1803 link_revision: Revision(0),
1796 link_revision: Revision(0),
1804 p1: NULL_REVISION,
1797 p1: NULL_REVISION,
1805 p2: NULL_REVISION,
1798 p2: NULL_REVISION,
1806 node: NULL_NODE,
1799 node: NULL_NODE,
1807 }
1800 }
1808 }
1801 }
1809
1802
1810 pub fn is_first(&mut self, value: bool) -> &mut Self {
1803 pub fn is_first(&mut self, value: bool) -> &mut Self {
1811 self.is_first = value;
1804 self.is_first = value;
1812 self
1805 self
1813 }
1806 }
1814
1807
1815 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1808 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1816 self.is_inline = value;
1809 self.is_inline = value;
1817 self
1810 self
1818 }
1811 }
1819
1812
1820 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1813 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1821 self.is_general_delta = value;
1814 self.is_general_delta = value;
1822 self
1815 self
1823 }
1816 }
1824
1817
1825 pub fn with_version(&mut self, value: u16) -> &mut Self {
1818 pub fn with_version(&mut self, value: u16) -> &mut Self {
1826 self.version = value;
1819 self.version = value;
1827 self
1820 self
1828 }
1821 }
1829
1822
1830 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1823 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1831 self.offset = value;
1824 self.offset = value;
1832 self
1825 self
1833 }
1826 }
1834
1827
1835 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1828 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1836 self.compressed_len = value;
1829 self.compressed_len = value;
1837 self
1830 self
1838 }
1831 }
1839
1832
1840 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1833 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1841 self.uncompressed_len = value;
1834 self.uncompressed_len = value;
1842 self
1835 self
1843 }
1836 }
1844
1837
1845 pub fn with_base_revision_or_base_of_delta_chain(
1838 pub fn with_base_revision_or_base_of_delta_chain(
1846 &mut self,
1839 &mut self,
1847 value: Revision,
1840 value: Revision,
1848 ) -> &mut Self {
1841 ) -> &mut Self {
1849 self.base_revision_or_base_of_delta_chain = value;
1842 self.base_revision_or_base_of_delta_chain = value;
1850 self
1843 self
1851 }
1844 }
1852
1845
1853 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1846 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1854 self.link_revision = value;
1847 self.link_revision = value;
1855 self
1848 self
1856 }
1849 }
1857
1850
1858 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1851 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1859 self.p1 = value;
1852 self.p1 = value;
1860 self
1853 self
1861 }
1854 }
1862
1855
1863 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1856 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1864 self.p2 = value;
1857 self.p2 = value;
1865 self
1858 self
1866 }
1859 }
1867
1860
1868 pub fn with_node(&mut self, value: Node) -> &mut Self {
1861 pub fn with_node(&mut self, value: Node) -> &mut Self {
1869 self.node = value;
1862 self.node = value;
1870 self
1863 self
1871 }
1864 }
1872
1865
1873 pub fn build(&self) -> Vec<u8> {
1866 pub fn build(&self) -> Vec<u8> {
1874 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1867 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1875 if self.is_first {
1868 if self.is_first {
1876 bytes.extend(match (self.is_general_delta, self.is_inline) {
1869 bytes.extend(match (self.is_general_delta, self.is_inline) {
1877 (false, false) => [0u8, 0],
1870 (false, false) => [0u8, 0],
1878 (false, true) => [0u8, 1],
1871 (false, true) => [0u8, 1],
1879 (true, false) => [0u8, 2],
1872 (true, false) => [0u8, 2],
1880 (true, true) => [0u8, 3],
1873 (true, true) => [0u8, 3],
1881 });
1874 });
1882 bytes.extend(self.version.to_be_bytes());
1875 bytes.extend(self.version.to_be_bytes());
1883 // Remaining offset bytes.
1876 // Remaining offset bytes.
1884 bytes.extend([0u8; 2]);
1877 bytes.extend([0u8; 2]);
1885 } else {
1878 } else {
1886 // Offset stored on 48 bits (6 bytes)
1879 // Offset stored on 48 bits (6 bytes)
1887 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1880 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1888 }
1881 }
1889 bytes.extend([0u8; 2]); // Revision flags.
1882 bytes.extend([0u8; 2]); // Revision flags.
1890 bytes.extend((self.compressed_len as u32).to_be_bytes());
1883 bytes.extend((self.compressed_len as u32).to_be_bytes());
1891 bytes.extend((self.uncompressed_len as u32).to_be_bytes());
1884 bytes.extend((self.uncompressed_len as u32).to_be_bytes());
1892 bytes.extend(
1885 bytes.extend(
1893 self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1886 self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1894 );
1887 );
1895 bytes.extend(self.link_revision.0.to_be_bytes());
1888 bytes.extend(self.link_revision.0.to_be_bytes());
1896 bytes.extend(self.p1.0.to_be_bytes());
1889 bytes.extend(self.p1.0.to_be_bytes());
1897 bytes.extend(self.p2.0.to_be_bytes());
1890 bytes.extend(self.p2.0.to_be_bytes());
1898 bytes.extend(self.node.as_bytes());
1891 bytes.extend(self.node.as_bytes());
1899 bytes.extend(vec![0u8; 12]);
1892 bytes.extend(vec![0u8; 12]);
1900 bytes
1893 bytes
1901 }
1894 }
1902 }
1895 }
1903
1896
1904 pub fn is_inline(index_bytes: &[u8]) -> bool {
1897 pub fn is_inline(index_bytes: &[u8]) -> bool {
1905 IndexHeader::parse(index_bytes)
1898 IndexHeader::parse(index_bytes)
1906 .expect("too short")
1899 .expect("too short")
1907 .unwrap()
1900 .unwrap()
1908 .format_flags()
1901 .format_flags()
1909 .is_inline()
1902 .is_inline()
1910 }
1903 }
1911
1904
1912 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1905 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1913 IndexHeader::parse(index_bytes)
1906 IndexHeader::parse(index_bytes)
1914 .expect("too short")
1907 .expect("too short")
1915 .unwrap()
1908 .unwrap()
1916 .format_flags()
1909 .format_flags()
1917 .uses_generaldelta()
1910 .uses_generaldelta()
1918 }
1911 }
1919
1912
1920 pub fn get_version(index_bytes: &[u8]) -> u16 {
1913 pub fn get_version(index_bytes: &[u8]) -> u16 {
1921 IndexHeader::parse(index_bytes)
1914 IndexHeader::parse(index_bytes)
1922 .expect("too short")
1915 .expect("too short")
1923 .unwrap()
1916 .unwrap()
1924 .format_version()
1917 .format_version()
1925 }
1918 }
1926
1919
1927 #[test]
1920 #[test]
1928 fn flags_when_no_inline_flag_test() {
1921 fn flags_when_no_inline_flag_test() {
1929 let bytes = IndexEntryBuilder::new()
1922 let bytes = IndexEntryBuilder::new()
1930 .is_first(true)
1923 .is_first(true)
1931 .with_general_delta(false)
1924 .with_general_delta(false)
1932 .with_inline(false)
1925 .with_inline(false)
1933 .build();
1926 .build();
1934
1927
1935 assert!(!is_inline(&bytes));
1928 assert!(!is_inline(&bytes));
1936 assert!(!uses_generaldelta(&bytes));
1929 assert!(!uses_generaldelta(&bytes));
1937 }
1930 }
1938
1931
1939 #[test]
1932 #[test]
1940 fn flags_when_inline_flag_test() {
1933 fn flags_when_inline_flag_test() {
1941 let bytes = IndexEntryBuilder::new()
1934 let bytes = IndexEntryBuilder::new()
1942 .is_first(true)
1935 .is_first(true)
1943 .with_general_delta(false)
1936 .with_general_delta(false)
1944 .with_inline(true)
1937 .with_inline(true)
1945 .build();
1938 .build();
1946
1939
1947 assert!(is_inline(&bytes));
1940 assert!(is_inline(&bytes));
1948 assert!(!uses_generaldelta(&bytes));
1941 assert!(!uses_generaldelta(&bytes));
1949 }
1942 }
1950
1943
1951 #[test]
1944 #[test]
1952 fn flags_when_inline_and_generaldelta_flags_test() {
1945 fn flags_when_inline_and_generaldelta_flags_test() {
1953 let bytes = IndexEntryBuilder::new()
1946 let bytes = IndexEntryBuilder::new()
1954 .is_first(true)
1947 .is_first(true)
1955 .with_general_delta(true)
1948 .with_general_delta(true)
1956 .with_inline(true)
1949 .with_inline(true)
1957 .build();
1950 .build();
1958
1951
1959 assert!(is_inline(&bytes));
1952 assert!(is_inline(&bytes));
1960 assert!(uses_generaldelta(&bytes));
1953 assert!(uses_generaldelta(&bytes));
1961 }
1954 }
1962
1955
1963 #[test]
1956 #[test]
1964 fn test_offset() {
1957 fn test_offset() {
1965 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1958 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1966 let entry = IndexEntry {
1959 let entry = IndexEntry {
1967 bytes: &bytes,
1960 bytes: &bytes,
1968 offset_override: None,
1961 offset_override: None,
1969 };
1962 };
1970
1963
1971 assert_eq!(entry.offset(), 1)
1964 assert_eq!(entry.offset(), 1)
1972 }
1965 }
1973
1966
1974 #[test]
1967 #[test]
1975 fn test_with_overridden_offset() {
1968 fn test_with_overridden_offset() {
1976 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1969 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1977 let entry = IndexEntry {
1970 let entry = IndexEntry {
1978 bytes: &bytes,
1971 bytes: &bytes,
1979 offset_override: Some(2),
1972 offset_override: Some(2),
1980 };
1973 };
1981
1974
1982 assert_eq!(entry.offset(), 2)
1975 assert_eq!(entry.offset(), 2)
1983 }
1976 }
1984
1977
1985 #[test]
1978 #[test]
1986 fn test_compressed_len() {
1979 fn test_compressed_len() {
1987 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1980 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1988 let entry = IndexEntry {
1981 let entry = IndexEntry {
1989 bytes: &bytes,
1982 bytes: &bytes,
1990 offset_override: None,
1983 offset_override: None,
1991 };
1984 };
1992
1985
1993 assert_eq!(entry.compressed_len(), 1)
1986 assert_eq!(entry.compressed_len(), 1)
1994 }
1987 }
1995
1988
1996 #[test]
1989 #[test]
1997 fn test_uncompressed_len() {
1990 fn test_uncompressed_len() {
1998 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1991 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1999 let entry = IndexEntry {
1992 let entry = IndexEntry {
2000 bytes: &bytes,
1993 bytes: &bytes,
2001 offset_override: None,
1994 offset_override: None,
2002 };
1995 };
2003
1996
2004 assert_eq!(entry.uncompressed_len(), 1)
1997 assert_eq!(entry.uncompressed_len(), 1)
2005 }
1998 }
2006
1999
2007 #[test]
2000 #[test]
2008 fn test_base_revision_or_base_of_delta_chain() {
2001 fn test_base_revision_or_base_of_delta_chain() {
2009 let bytes = IndexEntryBuilder::new()
2002 let bytes = IndexEntryBuilder::new()
2010 .with_base_revision_or_base_of_delta_chain(Revision(1))
2003 .with_base_revision_or_base_of_delta_chain(Revision(1))
2011 .build();
2004 .build();
2012 let entry = IndexEntry {
2005 let entry = IndexEntry {
2013 bytes: &bytes,
2006 bytes: &bytes,
2014 offset_override: None,
2007 offset_override: None,
2015 };
2008 };
2016
2009
2017 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
2010 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
2018 }
2011 }
2019
2012
2020 #[test]
2013 #[test]
2021 fn link_revision_test() {
2014 fn link_revision_test() {
2022 let bytes = IndexEntryBuilder::new()
2015 let bytes = IndexEntryBuilder::new()
2023 .with_link_revision(Revision(123))
2016 .with_link_revision(Revision(123))
2024 .build();
2017 .build();
2025
2018
2026 let entry = IndexEntry {
2019 let entry = IndexEntry {
2027 bytes: &bytes,
2020 bytes: &bytes,
2028 offset_override: None,
2021 offset_override: None,
2029 };
2022 };
2030
2023
2031 assert_eq!(entry.link_revision(), 123.into());
2024 assert_eq!(entry.link_revision(), 123.into());
2032 }
2025 }
2033
2026
2034 #[test]
2027 #[test]
2035 fn p1_test() {
2028 fn p1_test() {
2036 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
2029 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
2037
2030
2038 let entry = IndexEntry {
2031 let entry = IndexEntry {
2039 bytes: &bytes,
2032 bytes: &bytes,
2040 offset_override: None,
2033 offset_override: None,
2041 };
2034 };
2042
2035
2043 assert_eq!(entry.p1(), 123.into());
2036 assert_eq!(entry.p1(), 123.into());
2044 }
2037 }
2045
2038
2046 #[test]
2039 #[test]
2047 fn p2_test() {
2040 fn p2_test() {
2048 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
2041 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
2049
2042
2050 let entry = IndexEntry {
2043 let entry = IndexEntry {
2051 bytes: &bytes,
2044 bytes: &bytes,
2052 offset_override: None,
2045 offset_override: None,
2053 };
2046 };
2054
2047
2055 assert_eq!(entry.p2(), 123.into());
2048 assert_eq!(entry.p2(), 123.into());
2056 }
2049 }
2057
2050
2058 #[test]
2051 #[test]
2059 fn node_test() {
2052 fn node_test() {
2060 let node = Node::from_hex("0123456789012345678901234567890123456789")
2053 let node = Node::from_hex("0123456789012345678901234567890123456789")
2061 .unwrap();
2054 .unwrap();
2062 let bytes = IndexEntryBuilder::new().with_node(node).build();
2055 let bytes = IndexEntryBuilder::new().with_node(node).build();
2063
2056
2064 let entry = IndexEntry {
2057 let entry = IndexEntry {
2065 bytes: &bytes,
2058 bytes: &bytes,
2066 offset_override: None,
2059 offset_override: None,
2067 };
2060 };
2068
2061
2069 assert_eq!(*entry.hash(), node);
2062 assert_eq!(*entry.hash(), node);
2070 }
2063 }
2071
2064
2072 #[test]
2065 #[test]
2073 fn version_test() {
2066 fn version_test() {
2074 let bytes = IndexEntryBuilder::new()
2067 let bytes = IndexEntryBuilder::new()
2075 .is_first(true)
2068 .is_first(true)
2076 .with_version(2)
2069 .with_version(2)
2077 .build();
2070 .build();
2078
2071
2079 assert_eq!(get_version(&bytes), 2)
2072 assert_eq!(get_version(&bytes), 2)
2080 }
2073 }
2081 }
2074 }
2082
2075
2083 #[cfg(test)]
2076 #[cfg(test)]
2084 pub use tests::IndexEntryBuilder;
2077 pub use tests::IndexEntryBuilder;
General Comments 0
You need to be logged in to leave comments. Login now