##// END OF EJS Templates
rust-index: find_gca_candidates bit sets genericization...
Georges Racinet on incendie.racinet.fr -
r52117:43241f31 default
parent child Browse files
Show More
@@ -1,1721 +1,1906 b''
1 use std::collections::hash_map::RandomState;
1 use std::collections::hash_map::RandomState;
2 use std::collections::{HashMap, HashSet};
2 use std::collections::{HashMap, HashSet};
3 use std::fmt::Debug;
3 use std::fmt::Debug;
4 use std::ops::Deref;
4 use std::ops::Deref;
5 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
5 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
6
6
7 use byteorder::{BigEndian, ByteOrder};
7 use byteorder::{BigEndian, ByteOrder};
8 use bytes_cast::{unaligned, BytesCast};
8 use bytes_cast::{unaligned, BytesCast};
9
9
10 use super::REVIDX_KNOWN_FLAGS;
10 use super::REVIDX_KNOWN_FLAGS;
11 use crate::errors::HgError;
11 use crate::errors::HgError;
12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
13 use crate::revlog::node::Node;
13 use crate::revlog::node::Node;
14 use crate::revlog::{Revision, NULL_REVISION};
14 use crate::revlog::{Revision, NULL_REVISION};
15 use crate::{
15 use crate::{
16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
17 RevlogIndex, UncheckedRevision,
17 RevlogIndex, UncheckedRevision,
18 };
18 };
19
19
20 pub const INDEX_ENTRY_SIZE: usize = 64;
20 pub const INDEX_ENTRY_SIZE: usize = 64;
21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
22
22
23 #[derive(Debug)]
23 #[derive(Debug)]
24 pub struct IndexHeader {
24 pub struct IndexHeader {
25 pub(super) header_bytes: [u8; 4],
25 pub(super) header_bytes: [u8; 4],
26 }
26 }
27
27
28 #[derive(Copy, Clone)]
28 #[derive(Copy, Clone)]
29 pub struct IndexHeaderFlags {
29 pub struct IndexHeaderFlags {
30 flags: u16,
30 flags: u16,
31 }
31 }
32
32
33 /// Corresponds to the high bits of `_format_flags` in python
33 /// Corresponds to the high bits of `_format_flags` in python
34 impl IndexHeaderFlags {
34 impl IndexHeaderFlags {
35 /// Corresponds to FLAG_INLINE_DATA in python
35 /// Corresponds to FLAG_INLINE_DATA in python
36 pub fn is_inline(self) -> bool {
36 pub fn is_inline(self) -> bool {
37 self.flags & 1 != 0
37 self.flags & 1 != 0
38 }
38 }
39 /// Corresponds to FLAG_GENERALDELTA in python
39 /// Corresponds to FLAG_GENERALDELTA in python
40 pub fn uses_generaldelta(self) -> bool {
40 pub fn uses_generaldelta(self) -> bool {
41 self.flags & 2 != 0
41 self.flags & 2 != 0
42 }
42 }
43 }
43 }
44
44
45 /// Corresponds to the INDEX_HEADER structure,
45 /// Corresponds to the INDEX_HEADER structure,
46 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
46 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
47 impl IndexHeader {
47 impl IndexHeader {
48 fn format_flags(&self) -> IndexHeaderFlags {
48 fn format_flags(&self) -> IndexHeaderFlags {
49 // No "unknown flags" check here, unlike in python. Maybe there should
49 // No "unknown flags" check here, unlike in python. Maybe there should
50 // be.
50 // be.
51 IndexHeaderFlags {
51 IndexHeaderFlags {
52 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
52 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
53 }
53 }
54 }
54 }
55
55
56 /// The only revlog version currently supported by rhg.
56 /// The only revlog version currently supported by rhg.
57 const REVLOGV1: u16 = 1;
57 const REVLOGV1: u16 = 1;
58
58
59 /// Corresponds to `_format_version` in Python.
59 /// Corresponds to `_format_version` in Python.
60 fn format_version(&self) -> u16 {
60 fn format_version(&self) -> u16 {
61 BigEndian::read_u16(&self.header_bytes[2..4])
61 BigEndian::read_u16(&self.header_bytes[2..4])
62 }
62 }
63
63
64 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
64 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
65 if index_bytes.is_empty() {
65 if index_bytes.is_empty() {
66 return Ok(None);
66 return Ok(None);
67 }
67 }
68 if index_bytes.len() < 4 {
68 if index_bytes.len() < 4 {
69 return Err(HgError::corrupted(
69 return Err(HgError::corrupted(
70 "corrupted revlog: can't read the index format header",
70 "corrupted revlog: can't read the index format header",
71 ));
71 ));
72 }
72 }
73 Ok(Some(IndexHeader {
73 Ok(Some(IndexHeader {
74 header_bytes: {
74 header_bytes: {
75 let bytes: [u8; 4] =
75 let bytes: [u8; 4] =
76 index_bytes[0..4].try_into().expect("impossible");
76 index_bytes[0..4].try_into().expect("impossible");
77 bytes
77 bytes
78 },
78 },
79 }))
79 }))
80 }
80 }
81 }
81 }
82
82
83 /// Abstracts the access to the index bytes since they can be spread between
83 /// Abstracts the access to the index bytes since they can be spread between
84 /// the immutable (bytes) part and the mutable (added) part if any appends
84 /// the immutable (bytes) part and the mutable (added) part if any appends
85 /// happened. This makes it transparent for the callers.
85 /// happened. This makes it transparent for the callers.
86 struct IndexData {
86 struct IndexData {
87 /// Immutable bytes, most likely taken from disk
87 /// Immutable bytes, most likely taken from disk
88 bytes: Box<dyn Deref<Target = [u8]> + Send>,
88 bytes: Box<dyn Deref<Target = [u8]> + Send>,
89 /// Used when stripping index contents, keeps track of the start of the
89 /// Used when stripping index contents, keeps track of the start of the
90 /// first stripped revision, which is used to give a slice of the
90 /// first stripped revision, which is used to give a slice of the
91 /// `bytes` field.
91 /// `bytes` field.
92 truncation: Option<usize>,
92 truncation: Option<usize>,
93 /// Bytes that were added after reading the index
93 /// Bytes that were added after reading the index
94 added: Vec<u8>,
94 added: Vec<u8>,
95 }
95 }
96
96
97 impl IndexData {
97 impl IndexData {
98 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
98 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
99 Self {
99 Self {
100 bytes,
100 bytes,
101 truncation: None,
101 truncation: None,
102 added: vec![],
102 added: vec![],
103 }
103 }
104 }
104 }
105
105
106 pub fn len(&self) -> usize {
106 pub fn len(&self) -> usize {
107 match self.truncation {
107 match self.truncation {
108 Some(truncation) => truncation + self.added.len(),
108 Some(truncation) => truncation + self.added.len(),
109 None => self.bytes.len() + self.added.len(),
109 None => self.bytes.len() + self.added.len(),
110 }
110 }
111 }
111 }
112
112
113 fn remove(
113 fn remove(
114 &mut self,
114 &mut self,
115 rev: Revision,
115 rev: Revision,
116 offsets: Option<&[usize]>,
116 offsets: Option<&[usize]>,
117 ) -> Result<(), RevlogError> {
117 ) -> Result<(), RevlogError> {
118 let rev = rev.0 as usize;
118 let rev = rev.0 as usize;
119 let truncation = if let Some(offsets) = offsets {
119 let truncation = if let Some(offsets) = offsets {
120 offsets[rev]
120 offsets[rev]
121 } else {
121 } else {
122 rev * INDEX_ENTRY_SIZE
122 rev * INDEX_ENTRY_SIZE
123 };
123 };
124 if truncation < self.bytes.len() {
124 if truncation < self.bytes.len() {
125 self.truncation = Some(truncation);
125 self.truncation = Some(truncation);
126 self.added.clear();
126 self.added.clear();
127 } else {
127 } else {
128 self.added.truncate(truncation - self.bytes.len());
128 self.added.truncate(truncation - self.bytes.len());
129 }
129 }
130 Ok(())
130 Ok(())
131 }
131 }
132
132
133 fn is_new(&self) -> bool {
133 fn is_new(&self) -> bool {
134 self.bytes.is_empty()
134 self.bytes.is_empty()
135 }
135 }
136 }
136 }
137
137
138 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
138 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
139 type Output = [u8];
139 type Output = [u8];
140
140
141 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
141 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
142 let start = index.start;
142 let start = index.start;
143 let end = index.end;
143 let end = index.end;
144 let immutable_len = match self.truncation {
144 let immutable_len = match self.truncation {
145 Some(truncation) => truncation,
145 Some(truncation) => truncation,
146 None => self.bytes.len(),
146 None => self.bytes.len(),
147 };
147 };
148 if start < immutable_len {
148 if start < immutable_len {
149 if end > immutable_len {
149 if end > immutable_len {
150 panic!("index data cannot span existing and added ranges");
150 panic!("index data cannot span existing and added ranges");
151 }
151 }
152 &self.bytes[index]
152 &self.bytes[index]
153 } else {
153 } else {
154 &self.added[start - immutable_len..end - immutable_len]
154 &self.added[start - immutable_len..end - immutable_len]
155 }
155 }
156 }
156 }
157 }
157 }
158
158
159 #[derive(Debug, PartialEq, Eq)]
159 #[derive(Debug, PartialEq, Eq)]
160 pub struct RevisionDataParams {
160 pub struct RevisionDataParams {
161 pub flags: u16,
161 pub flags: u16,
162 pub data_offset: u64,
162 pub data_offset: u64,
163 pub data_compressed_length: i32,
163 pub data_compressed_length: i32,
164 pub data_uncompressed_length: i32,
164 pub data_uncompressed_length: i32,
165 pub data_delta_base: i32,
165 pub data_delta_base: i32,
166 pub link_rev: i32,
166 pub link_rev: i32,
167 pub parent_rev_1: i32,
167 pub parent_rev_1: i32,
168 pub parent_rev_2: i32,
168 pub parent_rev_2: i32,
169 pub node_id: [u8; NODE_BYTES_LENGTH],
169 pub node_id: [u8; NODE_BYTES_LENGTH],
170 pub _sidedata_offset: u64,
170 pub _sidedata_offset: u64,
171 pub _sidedata_compressed_length: i32,
171 pub _sidedata_compressed_length: i32,
172 pub data_compression_mode: u8,
172 pub data_compression_mode: u8,
173 pub _sidedata_compression_mode: u8,
173 pub _sidedata_compression_mode: u8,
174 pub _rank: i32,
174 pub _rank: i32,
175 }
175 }
176
176
177 impl Default for RevisionDataParams {
177 impl Default for RevisionDataParams {
178 fn default() -> Self {
178 fn default() -> Self {
179 Self {
179 Self {
180 flags: 0,
180 flags: 0,
181 data_offset: 0,
181 data_offset: 0,
182 data_compressed_length: 0,
182 data_compressed_length: 0,
183 data_uncompressed_length: 0,
183 data_uncompressed_length: 0,
184 data_delta_base: -1,
184 data_delta_base: -1,
185 link_rev: -1,
185 link_rev: -1,
186 parent_rev_1: -1,
186 parent_rev_1: -1,
187 parent_rev_2: -1,
187 parent_rev_2: -1,
188 node_id: [0; NODE_BYTES_LENGTH],
188 node_id: [0; NODE_BYTES_LENGTH],
189 _sidedata_offset: 0,
189 _sidedata_offset: 0,
190 _sidedata_compressed_length: 0,
190 _sidedata_compressed_length: 0,
191 data_compression_mode: COMPRESSION_MODE_INLINE,
191 data_compression_mode: COMPRESSION_MODE_INLINE,
192 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
192 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
193 _rank: -1,
193 _rank: -1,
194 }
194 }
195 }
195 }
196 }
196 }
197
197
198 #[derive(BytesCast)]
198 #[derive(BytesCast)]
199 #[repr(C)]
199 #[repr(C)]
200 pub struct RevisionDataV1 {
200 pub struct RevisionDataV1 {
201 data_offset_or_flags: unaligned::U64Be,
201 data_offset_or_flags: unaligned::U64Be,
202 data_compressed_length: unaligned::I32Be,
202 data_compressed_length: unaligned::I32Be,
203 data_uncompressed_length: unaligned::I32Be,
203 data_uncompressed_length: unaligned::I32Be,
204 data_delta_base: unaligned::I32Be,
204 data_delta_base: unaligned::I32Be,
205 link_rev: unaligned::I32Be,
205 link_rev: unaligned::I32Be,
206 parent_rev_1: unaligned::I32Be,
206 parent_rev_1: unaligned::I32Be,
207 parent_rev_2: unaligned::I32Be,
207 parent_rev_2: unaligned::I32Be,
208 node_id: [u8; STORED_NODE_ID_BYTES],
208 node_id: [u8; STORED_NODE_ID_BYTES],
209 }
209 }
210
210
211 fn _static_assert_size_of_revision_data_v1() {
211 fn _static_assert_size_of_revision_data_v1() {
212 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
212 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
213 }
213 }
214
214
215 impl RevisionDataParams {
215 impl RevisionDataParams {
216 pub fn validate(&self) -> Result<(), RevlogError> {
216 pub fn validate(&self) -> Result<(), RevlogError> {
217 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
217 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
218 return Err(RevlogError::corrupted(format!(
218 return Err(RevlogError::corrupted(format!(
219 "unknown revlog index flags: {}",
219 "unknown revlog index flags: {}",
220 self.flags
220 self.flags
221 )));
221 )));
222 }
222 }
223 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
223 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
224 return Err(RevlogError::corrupted(format!(
224 return Err(RevlogError::corrupted(format!(
225 "invalid data compression mode: {}",
225 "invalid data compression mode: {}",
226 self.data_compression_mode
226 self.data_compression_mode
227 )));
227 )));
228 }
228 }
229 // FIXME isn't this only for v2 or changelog v2?
229 // FIXME isn't this only for v2 or changelog v2?
230 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
230 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
231 return Err(RevlogError::corrupted(format!(
231 return Err(RevlogError::corrupted(format!(
232 "invalid sidedata compression mode: {}",
232 "invalid sidedata compression mode: {}",
233 self._sidedata_compression_mode
233 self._sidedata_compression_mode
234 )));
234 )));
235 }
235 }
236 Ok(())
236 Ok(())
237 }
237 }
238
238
239 pub fn into_v1(self) -> RevisionDataV1 {
239 pub fn into_v1(self) -> RevisionDataV1 {
240 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
240 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
241 let mut node_id = [0; STORED_NODE_ID_BYTES];
241 let mut node_id = [0; STORED_NODE_ID_BYTES];
242 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
242 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
243 RevisionDataV1 {
243 RevisionDataV1 {
244 data_offset_or_flags: data_offset_or_flags.into(),
244 data_offset_or_flags: data_offset_or_flags.into(),
245 data_compressed_length: self.data_compressed_length.into(),
245 data_compressed_length: self.data_compressed_length.into(),
246 data_uncompressed_length: self.data_uncompressed_length.into(),
246 data_uncompressed_length: self.data_uncompressed_length.into(),
247 data_delta_base: self.data_delta_base.into(),
247 data_delta_base: self.data_delta_base.into(),
248 link_rev: self.link_rev.into(),
248 link_rev: self.link_rev.into(),
249 parent_rev_1: self.parent_rev_1.into(),
249 parent_rev_1: self.parent_rev_1.into(),
250 parent_rev_2: self.parent_rev_2.into(),
250 parent_rev_2: self.parent_rev_2.into(),
251 node_id,
251 node_id,
252 }
252 }
253 }
253 }
254 }
254 }
255
255
256 /// A Revlog index
256 /// A Revlog index
257 pub struct Index {
257 pub struct Index {
258 bytes: IndexData,
258 bytes: IndexData,
259 /// Offsets of starts of index blocks.
259 /// Offsets of starts of index blocks.
260 /// Only needed when the index is interleaved with data.
260 /// Only needed when the index is interleaved with data.
261 offsets: RwLock<Option<Vec<usize>>>,
261 offsets: RwLock<Option<Vec<usize>>>,
262 uses_generaldelta: bool,
262 uses_generaldelta: bool,
263 is_inline: bool,
263 is_inline: bool,
264 /// Cache of the head revisions in this index, kept in sync. Should
264 /// Cache of the head revisions in this index, kept in sync. Should
265 /// be accessed via the [`Self::head_revs`] method.
265 /// be accessed via the [`Self::head_revs`] method.
266 head_revs: Vec<Revision>,
266 head_revs: Vec<Revision>,
267 /// Cache of the last filtered revisions in this index, used to make sure
267 /// Cache of the last filtered revisions in this index, used to make sure
268 /// we haven't changed filters when returning the cached `head_revs`.
268 /// we haven't changed filters when returning the cached `head_revs`.
269 filtered_revs: HashSet<Revision>,
269 filtered_revs: HashSet<Revision>,
270 }
270 }
271
271
272 impl Debug for Index {
272 impl Debug for Index {
273 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274 f.debug_struct("Index")
274 f.debug_struct("Index")
275 .field("offsets", &self.offsets)
275 .field("offsets", &self.offsets)
276 .field("uses_generaldelta", &self.uses_generaldelta)
276 .field("uses_generaldelta", &self.uses_generaldelta)
277 .finish()
277 .finish()
278 }
278 }
279 }
279 }
280
280
281 impl Graph for Index {
281 impl Graph for Index {
282 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
282 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
283 let err = || GraphError::ParentOutOfRange(rev);
283 let err = || GraphError::ParentOutOfRange(rev);
284 match self.get_entry(rev) {
284 match self.get_entry(rev) {
285 Some(entry) => {
285 Some(entry) => {
286 // The C implementation checks that the parents are valid
286 // The C implementation checks that the parents are valid
287 // before returning
287 // before returning
288 Ok([
288 Ok([
289 self.check_revision(entry.p1()).ok_or_else(err)?,
289 self.check_revision(entry.p1()).ok_or_else(err)?,
290 self.check_revision(entry.p2()).ok_or_else(err)?,
290 self.check_revision(entry.p2()).ok_or_else(err)?,
291 ])
291 ])
292 }
292 }
293 None => Ok([NULL_REVISION, NULL_REVISION]),
293 None => Ok([NULL_REVISION, NULL_REVISION]),
294 }
294 }
295 }
295 }
296 }
296 }
297
297
298 /// A cache suitable for find_snapshots
298 /// A cache suitable for find_snapshots
299 ///
299 ///
300 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
300 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
301 /// values sets of [`BaseRevision`]
301 /// values sets of [`BaseRevision`]
302 ///
302 ///
303 /// TODO the dubious part is insisting that errors must be RevlogError
303 /// TODO the dubious part is insisting that errors must be RevlogError
304 /// we would probably need to sprinkle some magic here, such as an associated
304 /// we would probably need to sprinkle some magic here, such as an associated
305 /// type that would be Into<RevlogError> but even that would not be
305 /// type that would be Into<RevlogError> but even that would not be
306 /// satisfactory, as errors potentially have nothing to do with the revlog.
306 /// satisfactory, as errors potentially have nothing to do with the revlog.
307 pub trait SnapshotsCache {
307 pub trait SnapshotsCache {
308 fn insert_for(
308 fn insert_for(
309 &mut self,
309 &mut self,
310 rev: BaseRevision,
310 rev: BaseRevision,
311 value: BaseRevision,
311 value: BaseRevision,
312 ) -> Result<(), RevlogError>;
312 ) -> Result<(), RevlogError>;
313 }
313 }
314
314
315 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
315 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
316 fn insert_for(
316 fn insert_for(
317 &mut self,
317 &mut self,
318 rev: BaseRevision,
318 rev: BaseRevision,
319 value: BaseRevision,
319 value: BaseRevision,
320 ) -> Result<(), RevlogError> {
320 ) -> Result<(), RevlogError> {
321 let all_values = self.entry(rev).or_insert_with(HashSet::new);
321 let all_values = self.entry(rev).or_insert_with(HashSet::new);
322 all_values.insert(value);
322 all_values.insert(value);
323 Ok(())
323 Ok(())
324 }
324 }
325 }
325 }
326
326
327 impl Index {
327 impl Index {
328 /// Create an index from bytes.
328 /// Create an index from bytes.
329 /// Calculate the start of each entry when is_inline is true.
329 /// Calculate the start of each entry when is_inline is true.
330 pub fn new(
330 pub fn new(
331 bytes: Box<dyn Deref<Target = [u8]> + Send>,
331 bytes: Box<dyn Deref<Target = [u8]> + Send>,
332 default_header: IndexHeader,
332 default_header: IndexHeader,
333 ) -> Result<Self, HgError> {
333 ) -> Result<Self, HgError> {
334 let header =
334 let header =
335 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
335 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
336
336
337 if header.format_version() != IndexHeader::REVLOGV1 {
337 if header.format_version() != IndexHeader::REVLOGV1 {
338 // A proper new version should have had a repo/store
338 // A proper new version should have had a repo/store
339 // requirement.
339 // requirement.
340 return Err(HgError::corrupted("unsupported revlog version"));
340 return Err(HgError::corrupted("unsupported revlog version"));
341 }
341 }
342
342
343 // This is only correct because we know version is REVLOGV1.
343 // This is only correct because we know version is REVLOGV1.
344 // In v2 we always use generaldelta, while in v0 we never use
344 // In v2 we always use generaldelta, while in v0 we never use
345 // generaldelta. Similar for [is_inline] (it's only used in v1).
345 // generaldelta. Similar for [is_inline] (it's only used in v1).
346 let uses_generaldelta = header.format_flags().uses_generaldelta();
346 let uses_generaldelta = header.format_flags().uses_generaldelta();
347
347
348 if header.format_flags().is_inline() {
348 if header.format_flags().is_inline() {
349 let mut offset: usize = 0;
349 let mut offset: usize = 0;
350 let mut offsets = Vec::new();
350 let mut offsets = Vec::new();
351
351
352 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
352 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
353 offsets.push(offset);
353 offsets.push(offset);
354 let end = offset + INDEX_ENTRY_SIZE;
354 let end = offset + INDEX_ENTRY_SIZE;
355 let entry = IndexEntry {
355 let entry = IndexEntry {
356 bytes: &bytes[offset..end],
356 bytes: &bytes[offset..end],
357 offset_override: None,
357 offset_override: None,
358 };
358 };
359
359
360 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
360 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
361 }
361 }
362
362
363 if offset == bytes.len() {
363 if offset == bytes.len() {
364 Ok(Self {
364 Ok(Self {
365 bytes: IndexData::new(bytes),
365 bytes: IndexData::new(bytes),
366 offsets: RwLock::new(Some(offsets)),
366 offsets: RwLock::new(Some(offsets)),
367 uses_generaldelta,
367 uses_generaldelta,
368 is_inline: true,
368 is_inline: true,
369 head_revs: vec![],
369 head_revs: vec![],
370 filtered_revs: HashSet::new(),
370 filtered_revs: HashSet::new(),
371 })
371 })
372 } else {
372 } else {
373 Err(HgError::corrupted("unexpected inline revlog length"))
373 Err(HgError::corrupted("unexpected inline revlog length"))
374 }
374 }
375 } else {
375 } else {
376 Ok(Self {
376 Ok(Self {
377 bytes: IndexData::new(bytes),
377 bytes: IndexData::new(bytes),
378 offsets: RwLock::new(None),
378 offsets: RwLock::new(None),
379 uses_generaldelta,
379 uses_generaldelta,
380 is_inline: false,
380 is_inline: false,
381 head_revs: vec![],
381 head_revs: vec![],
382 filtered_revs: HashSet::new(),
382 filtered_revs: HashSet::new(),
383 })
383 })
384 }
384 }
385 }
385 }
386
386
387 pub fn uses_generaldelta(&self) -> bool {
387 pub fn uses_generaldelta(&self) -> bool {
388 self.uses_generaldelta
388 self.uses_generaldelta
389 }
389 }
390
390
391 /// Value of the inline flag.
391 /// Value of the inline flag.
392 pub fn is_inline(&self) -> bool {
392 pub fn is_inline(&self) -> bool {
393 self.is_inline
393 self.is_inline
394 }
394 }
395
395
396 /// Return a slice of bytes if `revlog` is inline. Panic if not.
396 /// Return a slice of bytes if `revlog` is inline. Panic if not.
397 pub fn data(&self, start: usize, end: usize) -> &[u8] {
397 pub fn data(&self, start: usize, end: usize) -> &[u8] {
398 if !self.is_inline() {
398 if !self.is_inline() {
399 panic!("tried to access data in the index of a revlog that is not inline");
399 panic!("tried to access data in the index of a revlog that is not inline");
400 }
400 }
401 &self.bytes[start..end]
401 &self.bytes[start..end]
402 }
402 }
403
403
404 /// Return number of entries of the revlog index.
404 /// Return number of entries of the revlog index.
405 pub fn len(&self) -> usize {
405 pub fn len(&self) -> usize {
406 if let Some(offsets) = &*self.get_offsets() {
406 if let Some(offsets) = &*self.get_offsets() {
407 offsets.len()
407 offsets.len()
408 } else {
408 } else {
409 self.bytes.len() / INDEX_ENTRY_SIZE
409 self.bytes.len() / INDEX_ENTRY_SIZE
410 }
410 }
411 }
411 }
412
412
413 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
413 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
414 if self.is_inline() {
414 if self.is_inline() {
415 {
415 {
416 // Wrap in a block to drop the read guard
416 // Wrap in a block to drop the read guard
417 // TODO perf?
417 // TODO perf?
418 let mut offsets = self.offsets.write().unwrap();
418 let mut offsets = self.offsets.write().unwrap();
419 if offsets.is_none() {
419 if offsets.is_none() {
420 offsets.replace(inline_scan(&self.bytes.bytes).1);
420 offsets.replace(inline_scan(&self.bytes.bytes).1);
421 }
421 }
422 }
422 }
423 }
423 }
424 self.offsets.read().unwrap()
424 self.offsets.read().unwrap()
425 }
425 }
426
426
427 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
427 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
428 let mut offsets = self.offsets.write().unwrap();
428 let mut offsets = self.offsets.write().unwrap();
429 if self.is_inline() && offsets.is_none() {
429 if self.is_inline() && offsets.is_none() {
430 offsets.replace(inline_scan(&self.bytes.bytes).1);
430 offsets.replace(inline_scan(&self.bytes.bytes).1);
431 }
431 }
432 offsets
432 offsets
433 }
433 }
434
434
435 /// Returns `true` if the `Index` has zero `entries`.
435 /// Returns `true` if the `Index` has zero `entries`.
436 pub fn is_empty(&self) -> bool {
436 pub fn is_empty(&self) -> bool {
437 self.len() == 0
437 self.len() == 0
438 }
438 }
439
439
440 /// Return the index entry corresponding to the given revision or `None`
440 /// Return the index entry corresponding to the given revision or `None`
441 /// for [`NULL_REVISION`]
441 /// for [`NULL_REVISION`]
442 ///
442 ///
443 /// The specified revision being of the checked type, it always exists
443 /// The specified revision being of the checked type, it always exists
444 /// if it was validated by this index.
444 /// if it was validated by this index.
445 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
445 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
446 if rev == NULL_REVISION {
446 if rev == NULL_REVISION {
447 return None;
447 return None;
448 }
448 }
449 Some(if let Some(offsets) = &*self.get_offsets() {
449 Some(if let Some(offsets) = &*self.get_offsets() {
450 self.get_entry_inline(rev, offsets.as_ref())
450 self.get_entry_inline(rev, offsets.as_ref())
451 } else {
451 } else {
452 self.get_entry_separated(rev)
452 self.get_entry_separated(rev)
453 })
453 })
454 }
454 }
455
455
456 /// Return the binary content of the index entry for the given revision
456 /// Return the binary content of the index entry for the given revision
457 ///
457 ///
458 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
458 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
459 /// returned.
459 /// returned.
460 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
460 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
461 self.get_entry(rev).map(|e| {
461 self.get_entry(rev).map(|e| {
462 let bytes = e.as_bytes();
462 let bytes = e.as_bytes();
463 if rev.0 == 0 {
463 if rev.0 == 0 {
464 &bytes[4..]
464 &bytes[4..]
465 } else {
465 } else {
466 bytes
466 bytes
467 }
467 }
468 })
468 })
469 }
469 }
470
470
471 pub fn entry_as_params(
471 pub fn entry_as_params(
472 &self,
472 &self,
473 rev: UncheckedRevision,
473 rev: UncheckedRevision,
474 ) -> Option<RevisionDataParams> {
474 ) -> Option<RevisionDataParams> {
475 let rev = self.check_revision(rev)?;
475 let rev = self.check_revision(rev)?;
476 self.get_entry(rev).map(|e| RevisionDataParams {
476 self.get_entry(rev).map(|e| RevisionDataParams {
477 flags: e.flags(),
477 flags: e.flags(),
478 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
478 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
479 e.flags() as u64
479 e.flags() as u64
480 } else {
480 } else {
481 e.raw_offset()
481 e.raw_offset()
482 },
482 },
483 data_compressed_length: e.compressed_len().try_into().unwrap(),
483 data_compressed_length: e.compressed_len().try_into().unwrap(),
484 data_uncompressed_length: e.uncompressed_len(),
484 data_uncompressed_length: e.uncompressed_len(),
485 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
485 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
486 link_rev: e.link_revision().0,
486 link_rev: e.link_revision().0,
487 parent_rev_1: e.p1().0,
487 parent_rev_1: e.p1().0,
488 parent_rev_2: e.p2().0,
488 parent_rev_2: e.p2().0,
489 node_id: e.hash().as_bytes().try_into().unwrap(),
489 node_id: e.hash().as_bytes().try_into().unwrap(),
490 ..Default::default()
490 ..Default::default()
491 })
491 })
492 }
492 }
493
493
494 fn get_entry_inline(
494 fn get_entry_inline(
495 &self,
495 &self,
496 rev: Revision,
496 rev: Revision,
497 offsets: &[usize],
497 offsets: &[usize],
498 ) -> IndexEntry {
498 ) -> IndexEntry {
499 let start = offsets[rev.0 as usize];
499 let start = offsets[rev.0 as usize];
500 let end = start + INDEX_ENTRY_SIZE;
500 let end = start + INDEX_ENTRY_SIZE;
501 let bytes = &self.bytes[start..end];
501 let bytes = &self.bytes[start..end];
502
502
503 // See IndexEntry for an explanation of this override.
503 // See IndexEntry for an explanation of this override.
504 let offset_override = Some(end);
504 let offset_override = Some(end);
505
505
506 IndexEntry {
506 IndexEntry {
507 bytes,
507 bytes,
508 offset_override,
508 offset_override,
509 }
509 }
510 }
510 }
511
511
512 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
512 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
513 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
513 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
514 let end = start + INDEX_ENTRY_SIZE;
514 let end = start + INDEX_ENTRY_SIZE;
515 let bytes = &self.bytes[start..end];
515 let bytes = &self.bytes[start..end];
516
516
517 // Override the offset of the first revision as its bytes are used
517 // Override the offset of the first revision as its bytes are used
518 // for the index's metadata (saving space because it is always 0)
518 // for the index's metadata (saving space because it is always 0)
519 let offset_override = if rev == Revision(0) { Some(0) } else { None };
519 let offset_override = if rev == Revision(0) { Some(0) } else { None };
520
520
521 IndexEntry {
521 IndexEntry {
522 bytes,
522 bytes,
523 offset_override,
523 offset_override,
524 }
524 }
525 }
525 }
526
526
527 fn null_entry(&self) -> IndexEntry {
527 fn null_entry(&self) -> IndexEntry {
528 IndexEntry {
528 IndexEntry {
529 bytes: &[0; INDEX_ENTRY_SIZE],
529 bytes: &[0; INDEX_ENTRY_SIZE],
530 offset_override: Some(0),
530 offset_override: Some(0),
531 }
531 }
532 }
532 }
533
533
534 /// Return the head revisions of this index
534 /// Return the head revisions of this index
535 pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {
535 pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {
536 self.head_revs_filtered(&HashSet::new())
536 self.head_revs_filtered(&HashSet::new())
537 }
537 }
538
538
539 /// Return the head revisions of this index
539 /// Return the head revisions of this index
540 pub fn head_revs_filtered(
540 pub fn head_revs_filtered(
541 &mut self,
541 &mut self,
542 filtered_revs: &HashSet<Revision>,
542 filtered_revs: &HashSet<Revision>,
543 ) -> Result<Vec<Revision>, GraphError> {
543 ) -> Result<Vec<Revision>, GraphError> {
544 if !self.head_revs.is_empty() && filtered_revs == &self.filtered_revs {
544 if !self.head_revs.is_empty() && filtered_revs == &self.filtered_revs {
545 return Ok(self.head_revs.to_owned());
545 return Ok(self.head_revs.to_owned());
546 }
546 }
547 let mut revs: HashSet<Revision, RandomState> =
547 let mut revs: HashSet<Revision, RandomState> =
548 if filtered_revs.is_empty() {
548 if filtered_revs.is_empty() {
549 (0..self.len())
549 (0..self.len())
550 .into_iter()
550 .into_iter()
551 .map(|i| Revision(i as BaseRevision))
551 .map(|i| Revision(i as BaseRevision))
552 .collect()
552 .collect()
553 } else {
553 } else {
554 (0..self.len())
554 (0..self.len())
555 .into_iter()
555 .into_iter()
556 .filter_map(|i| {
556 .filter_map(|i| {
557 let r = Revision(i as BaseRevision);
557 let r = Revision(i as BaseRevision);
558 if filtered_revs.contains(&r) {
558 if filtered_revs.contains(&r) {
559 None
559 None
560 } else {
560 } else {
561 Some(r)
561 Some(r)
562 }
562 }
563 })
563 })
564 .collect()
564 .collect()
565 };
565 };
566 dagops::retain_heads(self, &mut revs)?;
566 dagops::retain_heads(self, &mut revs)?;
567 if self.is_empty() {
567 if self.is_empty() {
568 revs.insert(NULL_REVISION);
568 revs.insert(NULL_REVISION);
569 }
569 }
570 let mut as_vec: Vec<Revision> =
570 let mut as_vec: Vec<Revision> =
571 revs.into_iter().map(Into::into).collect();
571 revs.into_iter().map(Into::into).collect();
572 as_vec.sort_unstable();
572 as_vec.sort_unstable();
573 self.head_revs = as_vec.to_owned();
573 self.head_revs = as_vec.to_owned();
574 self.filtered_revs = filtered_revs.to_owned();
574 self.filtered_revs = filtered_revs.to_owned();
575 Ok(as_vec)
575 Ok(as_vec)
576 }
576 }
577
577
578 /// Obtain the delta chain for a revision.
578 /// Obtain the delta chain for a revision.
579 ///
579 ///
580 /// `stop_rev` specifies a revision to stop at. If not specified, we
580 /// `stop_rev` specifies a revision to stop at. If not specified, we
581 /// stop at the base of the chain.
581 /// stop at the base of the chain.
582 ///
582 ///
583 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
583 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
584 /// revs in ascending order and `stopped` is a bool indicating whether
584 /// revs in ascending order and `stopped` is a bool indicating whether
585 /// `stoprev` was hit.
585 /// `stoprev` was hit.
586 pub fn delta_chain(
586 pub fn delta_chain(
587 &self,
587 &self,
588 rev: Revision,
588 rev: Revision,
589 stop_rev: Option<Revision>,
589 stop_rev: Option<Revision>,
590 ) -> Result<(Vec<Revision>, bool), HgError> {
590 ) -> Result<(Vec<Revision>, bool), HgError> {
591 let mut current_rev = rev;
591 let mut current_rev = rev;
592 let mut entry = self.get_entry(rev).unwrap();
592 let mut entry = self.get_entry(rev).unwrap();
593 let mut chain = vec![];
593 let mut chain = vec![];
594 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
594 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
595 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
595 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
596 {
596 {
597 chain.push(current_rev);
597 chain.push(current_rev);
598 let new_rev = if self.uses_generaldelta() {
598 let new_rev = if self.uses_generaldelta() {
599 entry.base_revision_or_base_of_delta_chain()
599 entry.base_revision_or_base_of_delta_chain()
600 } else {
600 } else {
601 UncheckedRevision(current_rev.0 - 1)
601 UncheckedRevision(current_rev.0 - 1)
602 };
602 };
603 current_rev = self.check_revision(new_rev).ok_or_else(|| {
603 current_rev = self.check_revision(new_rev).ok_or_else(|| {
604 HgError::corrupted(format!("Revision {new_rev} out of range"))
604 HgError::corrupted(format!("Revision {new_rev} out of range"))
605 })?;
605 })?;
606 if current_rev.0 == NULL_REVISION.0 {
606 if current_rev.0 == NULL_REVISION.0 {
607 break;
607 break;
608 }
608 }
609 entry = self.get_entry(current_rev).unwrap()
609 entry = self.get_entry(current_rev).unwrap()
610 }
610 }
611
611
612 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
612 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
613 true
613 true
614 } else {
614 } else {
615 chain.push(current_rev);
615 chain.push(current_rev);
616 false
616 false
617 };
617 };
618 chain.reverse();
618 chain.reverse();
619 Ok((chain, stopped))
619 Ok((chain, stopped))
620 }
620 }
621
621
622 pub fn find_snapshots(
622 pub fn find_snapshots(
623 &self,
623 &self,
624 start_rev: UncheckedRevision,
624 start_rev: UncheckedRevision,
625 end_rev: UncheckedRevision,
625 end_rev: UncheckedRevision,
626 cache: &mut impl SnapshotsCache,
626 cache: &mut impl SnapshotsCache,
627 ) -> Result<(), RevlogError> {
627 ) -> Result<(), RevlogError> {
628 let mut start_rev = start_rev.0;
628 let mut start_rev = start_rev.0;
629 let mut end_rev = end_rev.0;
629 let mut end_rev = end_rev.0;
630 end_rev += 1;
630 end_rev += 1;
631 let len = self.len().try_into().unwrap();
631 let len = self.len().try_into().unwrap();
632 if end_rev > len {
632 if end_rev > len {
633 end_rev = len;
633 end_rev = len;
634 }
634 }
635 if start_rev < 0 {
635 if start_rev < 0 {
636 start_rev = 0;
636 start_rev = 0;
637 }
637 }
638 for rev in start_rev..end_rev {
638 for rev in start_rev..end_rev {
639 if !self.is_snapshot_unchecked(Revision(rev))? {
639 if !self.is_snapshot_unchecked(Revision(rev))? {
640 continue;
640 continue;
641 }
641 }
642 let mut base = self
642 let mut base = self
643 .get_entry(Revision(rev))
643 .get_entry(Revision(rev))
644 .unwrap()
644 .unwrap()
645 .base_revision_or_base_of_delta_chain();
645 .base_revision_or_base_of_delta_chain();
646 if base.0 == rev {
646 if base.0 == rev {
647 base = NULL_REVISION.into();
647 base = NULL_REVISION.into();
648 }
648 }
649 cache.insert_for(base.0, rev)?;
649 cache.insert_for(base.0, rev)?;
650 }
650 }
651 Ok(())
651 Ok(())
652 }
652 }
653
653
654 /// TODO move this to the trait probably, along with other things
654 /// TODO move this to the trait probably, along with other things
655 pub fn append(
655 pub fn append(
656 &mut self,
656 &mut self,
657 revision_data: RevisionDataParams,
657 revision_data: RevisionDataParams,
658 ) -> Result<(), RevlogError> {
658 ) -> Result<(), RevlogError> {
659 revision_data.validate()?;
659 revision_data.validate()?;
660 let new_offset = self.bytes.len();
660 let new_offset = self.bytes.len();
661 if let Some(offsets) = &mut *self.get_offsets_mut() {
661 if let Some(offsets) = &mut *self.get_offsets_mut() {
662 offsets.push(new_offset)
662 offsets.push(new_offset)
663 }
663 }
664 self.bytes.added.extend(revision_data.into_v1().as_bytes());
664 self.bytes.added.extend(revision_data.into_v1().as_bytes());
665 self.head_revs.clear();
665 self.head_revs.clear();
666 Ok(())
666 Ok(())
667 }
667 }
668
668
669 pub fn pack_header(&self, header: i32) -> [u8; 4] {
669 pub fn pack_header(&self, header: i32) -> [u8; 4] {
670 header.to_be_bytes()
670 header.to_be_bytes()
671 }
671 }
672
672
673 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
673 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
674 let offsets = self.get_offsets().clone();
674 let offsets = self.get_offsets().clone();
675 self.bytes.remove(rev, offsets.as_deref())?;
675 self.bytes.remove(rev, offsets.as_deref())?;
676 if let Some(offsets) = &mut *self.get_offsets_mut() {
676 if let Some(offsets) = &mut *self.get_offsets_mut() {
677 offsets.truncate(rev.0 as usize)
677 offsets.truncate(rev.0 as usize)
678 }
678 }
679 self.head_revs.clear();
679 self.head_revs.clear();
680 Ok(())
680 Ok(())
681 }
681 }
682
682
683 pub fn clear_caches(&mut self) {
683 pub fn clear_caches(&mut self) {
684 // We need to get the 'inline' value from Python at init and use this
684 // We need to get the 'inline' value from Python at init and use this
685 // instead of offsets to determine whether we're inline since we might
685 // instead of offsets to determine whether we're inline since we might
686 // clear caches. This implies re-populating the offsets on-demand.
686 // clear caches. This implies re-populating the offsets on-demand.
687 self.offsets = RwLock::new(None);
687 self.offsets = RwLock::new(None);
688 self.head_revs.clear();
688 self.head_revs.clear();
689 }
689 }
690
690
691 /// Unchecked version of `is_snapshot`.
691 /// Unchecked version of `is_snapshot`.
692 /// Assumes the caller checked that `rev` is within a valid revision range.
692 /// Assumes the caller checked that `rev` is within a valid revision range.
693 pub fn is_snapshot_unchecked(
693 pub fn is_snapshot_unchecked(
694 &self,
694 &self,
695 mut rev: Revision,
695 mut rev: Revision,
696 ) -> Result<bool, RevlogError> {
696 ) -> Result<bool, RevlogError> {
697 while rev.0 >= 0 {
697 while rev.0 >= 0 {
698 let entry = self.get_entry(rev).unwrap();
698 let entry = self.get_entry(rev).unwrap();
699 let mut base = entry.base_revision_or_base_of_delta_chain().0;
699 let mut base = entry.base_revision_or_base_of_delta_chain().0;
700 if base == rev.0 {
700 if base == rev.0 {
701 base = NULL_REVISION.0;
701 base = NULL_REVISION.0;
702 }
702 }
703 if base == NULL_REVISION.0 {
703 if base == NULL_REVISION.0 {
704 return Ok(true);
704 return Ok(true);
705 }
705 }
706 let [mut p1, mut p2] = self
706 let [mut p1, mut p2] = self
707 .parents(rev)
707 .parents(rev)
708 .map_err(|_| RevlogError::InvalidRevision)?;
708 .map_err(|_| RevlogError::InvalidRevision)?;
709 while let Some(p1_entry) = self.get_entry(p1) {
709 while let Some(p1_entry) = self.get_entry(p1) {
710 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
710 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
711 break;
711 break;
712 }
712 }
713 let parent_base =
713 let parent_base =
714 p1_entry.base_revision_or_base_of_delta_chain();
714 p1_entry.base_revision_or_base_of_delta_chain();
715 if parent_base.0 == p1.0 {
715 if parent_base.0 == p1.0 {
716 break;
716 break;
717 }
717 }
718 p1 = self
718 p1 = self
719 .check_revision(parent_base)
719 .check_revision(parent_base)
720 .ok_or(RevlogError::InvalidRevision)?;
720 .ok_or(RevlogError::InvalidRevision)?;
721 }
721 }
722 while let Some(p2_entry) = self.get_entry(p2) {
722 while let Some(p2_entry) = self.get_entry(p2) {
723 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
723 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
724 break;
724 break;
725 }
725 }
726 let parent_base =
726 let parent_base =
727 p2_entry.base_revision_or_base_of_delta_chain();
727 p2_entry.base_revision_or_base_of_delta_chain();
728 if parent_base.0 == p2.0 {
728 if parent_base.0 == p2.0 {
729 break;
729 break;
730 }
730 }
731 p2 = self
731 p2 = self
732 .check_revision(parent_base)
732 .check_revision(parent_base)
733 .ok_or(RevlogError::InvalidRevision)?;
733 .ok_or(RevlogError::InvalidRevision)?;
734 }
734 }
735 if base == p1.0 || base == p2.0 {
735 if base == p1.0 || base == p2.0 {
736 return Ok(false);
736 return Ok(false);
737 }
737 }
738 rev = self
738 rev = self
739 .check_revision(base.into())
739 .check_revision(base.into())
740 .ok_or(RevlogError::InvalidRevision)?;
740 .ok_or(RevlogError::InvalidRevision)?;
741 }
741 }
742 Ok(rev == NULL_REVISION)
742 Ok(rev == NULL_REVISION)
743 }
743 }
744
744
745 /// Return whether the given revision is a snapshot. Returns an error if
745 /// Return whether the given revision is a snapshot. Returns an error if
746 /// `rev` is not within a valid revision range.
746 /// `rev` is not within a valid revision range.
747 pub fn is_snapshot(
747 pub fn is_snapshot(
748 &self,
748 &self,
749 rev: UncheckedRevision,
749 rev: UncheckedRevision,
750 ) -> Result<bool, RevlogError> {
750 ) -> Result<bool, RevlogError> {
751 let rev = self
751 let rev = self
752 .check_revision(rev)
752 .check_revision(rev)
753 .ok_or_else(|| RevlogError::corrupted("test"))?;
753 .ok_or_else(|| RevlogError::corrupted("test"))?;
754 self.is_snapshot_unchecked(rev)
754 self.is_snapshot_unchecked(rev)
755 }
755 }
756
756
757 /// Slice revs to reduce the amount of unrelated data to be read from disk.
757 /// Slice revs to reduce the amount of unrelated data to be read from disk.
758 ///
758 ///
759 /// The index is sliced into groups that should be read in one time.
759 /// The index is sliced into groups that should be read in one time.
760 ///
760 ///
761 /// The initial chunk is sliced until the overall density
761 /// The initial chunk is sliced until the overall density
762 /// (payload/chunks-span ratio) is above `target_density`.
762 /// (payload/chunks-span ratio) is above `target_density`.
763 /// No gap smaller than `min_gap_size` is skipped.
763 /// No gap smaller than `min_gap_size` is skipped.
764 pub fn slice_chunk_to_density(
764 pub fn slice_chunk_to_density(
765 &self,
765 &self,
766 revs: &[Revision],
766 revs: &[Revision],
767 target_density: f64,
767 target_density: f64,
768 min_gap_size: usize,
768 min_gap_size: usize,
769 ) -> Vec<Vec<Revision>> {
769 ) -> Vec<Vec<Revision>> {
770 if revs.is_empty() {
770 if revs.is_empty() {
771 return vec![];
771 return vec![];
772 }
772 }
773 if revs.len() == 1 {
773 if revs.len() == 1 {
774 return vec![revs.to_owned()];
774 return vec![revs.to_owned()];
775 }
775 }
776 let delta_chain_span = self.segment_span(revs);
776 let delta_chain_span = self.segment_span(revs);
777 if delta_chain_span < min_gap_size {
777 if delta_chain_span < min_gap_size {
778 return vec![revs.to_owned()];
778 return vec![revs.to_owned()];
779 }
779 }
780 let entries: Vec<_> = revs
780 let entries: Vec<_> = revs
781 .iter()
781 .iter()
782 .map(|r| {
782 .map(|r| {
783 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
783 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
784 })
784 })
785 .collect();
785 .collect();
786
786
787 let mut read_data = delta_chain_span;
787 let mut read_data = delta_chain_span;
788 let chain_payload: u32 =
788 let chain_payload: u32 =
789 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
789 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
790 let mut density = if delta_chain_span > 0 {
790 let mut density = if delta_chain_span > 0 {
791 chain_payload as f64 / delta_chain_span as f64
791 chain_payload as f64 / delta_chain_span as f64
792 } else {
792 } else {
793 1.0
793 1.0
794 };
794 };
795
795
796 if density >= target_density {
796 if density >= target_density {
797 return vec![revs.to_owned()];
797 return vec![revs.to_owned()];
798 }
798 }
799
799
800 // Store the gaps in a heap to have them sorted by decreasing size
800 // Store the gaps in a heap to have them sorted by decreasing size
801 let mut gaps = Vec::new();
801 let mut gaps = Vec::new();
802 let mut previous_end = None;
802 let mut previous_end = None;
803
803
804 for (i, (_rev, entry)) in entries.iter().enumerate() {
804 for (i, (_rev, entry)) in entries.iter().enumerate() {
805 let start = entry.c_start() as usize;
805 let start = entry.c_start() as usize;
806 let length = entry.compressed_len();
806 let length = entry.compressed_len();
807
807
808 // Skip empty revisions to form larger holes
808 // Skip empty revisions to form larger holes
809 if length == 0 {
809 if length == 0 {
810 continue;
810 continue;
811 }
811 }
812
812
813 if let Some(end) = previous_end {
813 if let Some(end) = previous_end {
814 let gap_size = start - end;
814 let gap_size = start - end;
815 // Only consider holes that are large enough
815 // Only consider holes that are large enough
816 if gap_size > min_gap_size {
816 if gap_size > min_gap_size {
817 gaps.push((gap_size, i));
817 gaps.push((gap_size, i));
818 }
818 }
819 }
819 }
820 previous_end = Some(start + length as usize);
820 previous_end = Some(start + length as usize);
821 }
821 }
822 if gaps.is_empty() {
822 if gaps.is_empty() {
823 return vec![revs.to_owned()];
823 return vec![revs.to_owned()];
824 }
824 }
825 // sort the gaps to pop them from largest to small
825 // sort the gaps to pop them from largest to small
826 gaps.sort_unstable();
826 gaps.sort_unstable();
827
827
828 // Collect the indices of the largest holes until
828 // Collect the indices of the largest holes until
829 // the density is acceptable
829 // the density is acceptable
830 let mut selected = vec![];
830 let mut selected = vec![];
831 while let Some((gap_size, gap_id)) = gaps.pop() {
831 while let Some((gap_size, gap_id)) = gaps.pop() {
832 if density >= target_density {
832 if density >= target_density {
833 break;
833 break;
834 }
834 }
835 selected.push(gap_id);
835 selected.push(gap_id);
836
836
837 // The gap sizes are stored as negatives to be sorted decreasingly
837 // The gap sizes are stored as negatives to be sorted decreasingly
838 // by the heap
838 // by the heap
839 read_data -= gap_size;
839 read_data -= gap_size;
840 density = if read_data > 0 {
840 density = if read_data > 0 {
841 chain_payload as f64 / read_data as f64
841 chain_payload as f64 / read_data as f64
842 } else {
842 } else {
843 1.0
843 1.0
844 };
844 };
845 if density >= target_density {
845 if density >= target_density {
846 break;
846 break;
847 }
847 }
848 }
848 }
849 selected.sort_unstable();
849 selected.sort_unstable();
850 selected.push(revs.len());
850 selected.push(revs.len());
851
851
852 // Cut the revs at collected indices
852 // Cut the revs at collected indices
853 let mut previous_idx = 0;
853 let mut previous_idx = 0;
854 let mut chunks = vec![];
854 let mut chunks = vec![];
855 for idx in selected {
855 for idx in selected {
856 let chunk = self.trim_chunk(&entries, previous_idx, idx);
856 let chunk = self.trim_chunk(&entries, previous_idx, idx);
857 if !chunk.is_empty() {
857 if !chunk.is_empty() {
858 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
858 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
859 }
859 }
860 previous_idx = idx;
860 previous_idx = idx;
861 }
861 }
862 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
862 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
863 if !chunk.is_empty() {
863 if !chunk.is_empty() {
864 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
864 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
865 }
865 }
866
866
867 chunks
867 chunks
868 }
868 }
869
869
870 /// Get the byte span of a segment of sorted revisions.
870 /// Get the byte span of a segment of sorted revisions.
871 ///
871 ///
872 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
872 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
873 /// the `revs` segment.
873 /// the `revs` segment.
874 ///
874 ///
875 /// panics:
875 /// panics:
876 /// - if `revs` is empty or only made of `NULL_REVISION`
876 /// - if `revs` is empty or only made of `NULL_REVISION`
877 /// - if cannot retrieve entry for the last or first not null element of
877 /// - if cannot retrieve entry for the last or first not null element of
878 /// `revs`.
878 /// `revs`.
879 fn segment_span(&self, revs: &[Revision]) -> usize {
879 fn segment_span(&self, revs: &[Revision]) -> usize {
880 if revs.is_empty() {
880 if revs.is_empty() {
881 return 0;
881 return 0;
882 }
882 }
883 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
883 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
884 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
884 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
885 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
885 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
886 let start = if (*first_rev).0 == 0 {
886 let start = if (*first_rev).0 == 0 {
887 0
887 0
888 } else {
888 } else {
889 self.get_entry(*first_rev).unwrap().c_start()
889 self.get_entry(*first_rev).unwrap().c_start()
890 };
890 };
891 (end - start) as usize
891 (end - start) as usize
892 }
892 }
893
893
894 /// Returns `&revs[startidx..endidx]` without empty trailing revs
894 /// Returns `&revs[startidx..endidx]` without empty trailing revs
895 fn trim_chunk<'a>(
895 fn trim_chunk<'a>(
896 &'a self,
896 &'a self,
897 revs: &'a [(Revision, IndexEntry)],
897 revs: &'a [(Revision, IndexEntry)],
898 start: usize,
898 start: usize,
899 mut end: usize,
899 mut end: usize,
900 ) -> &'a [(Revision, IndexEntry)] {
900 ) -> &'a [(Revision, IndexEntry)] {
901 // Trim empty revs at the end, except the very first rev of a chain
901 // Trim empty revs at the end, except the very first rev of a chain
902 let last_rev = revs[end - 1].0;
902 let last_rev = revs[end - 1].0;
903 if last_rev.0 < self.len() as BaseRevision {
903 if last_rev.0 < self.len() as BaseRevision {
904 while end > 1
904 while end > 1
905 && end > start
905 && end > start
906 && revs[end - 1].1.compressed_len() == 0
906 && revs[end - 1].1.compressed_len() == 0
907 {
907 {
908 end -= 1
908 end -= 1
909 }
909 }
910 }
910 }
911 &revs[start..end]
911 &revs[start..end]
912 }
912 }
913
913
914 /// Computes the set of revisions for each non-public phase from `roots`,
914 /// Computes the set of revisions for each non-public phase from `roots`,
915 /// which are the last known roots for each non-public phase.
915 /// which are the last known roots for each non-public phase.
916 pub fn compute_phases_map_sets(
916 pub fn compute_phases_map_sets(
917 &self,
917 &self,
918 roots: HashMap<Phase, Vec<Revision>>,
918 roots: HashMap<Phase, Vec<Revision>>,
919 ) -> Result<(usize, RootsPerPhase), GraphError> {
919 ) -> Result<(usize, RootsPerPhase), GraphError> {
920 let mut phases = HashMap::new();
920 let mut phases = HashMap::new();
921 let mut min_phase_rev = NULL_REVISION;
921 let mut min_phase_rev = NULL_REVISION;
922
922
923 for phase in Phase::non_public_phases() {
923 for phase in Phase::non_public_phases() {
924 if let Some(phase_roots) = roots.get(phase) {
924 if let Some(phase_roots) = roots.get(phase) {
925 let min_rev =
925 let min_rev =
926 self.add_roots_get_min(phase_roots, &mut phases, *phase);
926 self.add_roots_get_min(phase_roots, &mut phases, *phase);
927 if min_rev != NULL_REVISION
927 if min_rev != NULL_REVISION
928 && (min_phase_rev == NULL_REVISION
928 && (min_phase_rev == NULL_REVISION
929 || min_rev < min_phase_rev)
929 || min_rev < min_phase_rev)
930 {
930 {
931 min_phase_rev = min_rev;
931 min_phase_rev = min_rev;
932 }
932 }
933 } else {
933 } else {
934 continue;
934 continue;
935 };
935 };
936 }
936 }
937 let mut phase_sets: RootsPerPhase = Default::default();
937 let mut phase_sets: RootsPerPhase = Default::default();
938
938
939 if min_phase_rev == NULL_REVISION {
939 if min_phase_rev == NULL_REVISION {
940 min_phase_rev = Revision(self.len() as BaseRevision);
940 min_phase_rev = Revision(self.len() as BaseRevision);
941 }
941 }
942
942
943 for rev in min_phase_rev.0..self.len() as BaseRevision {
943 for rev in min_phase_rev.0..self.len() as BaseRevision {
944 let rev = Revision(rev);
944 let rev = Revision(rev);
945 let [p1, p2] = self.parents(rev)?;
945 let [p1, p2] = self.parents(rev)?;
946
946
947 const DEFAULT_PHASE: &Phase = &Phase::Public;
947 const DEFAULT_PHASE: &Phase = &Phase::Public;
948 if p1.0 >= 0
948 if p1.0 >= 0
949 && phases.get(&p1).unwrap_or(DEFAULT_PHASE)
949 && phases.get(&p1).unwrap_or(DEFAULT_PHASE)
950 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
950 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
951 {
951 {
952 phases.insert(rev, phases[&p1]);
952 phases.insert(rev, phases[&p1]);
953 }
953 }
954 if p2.0 >= 0
954 if p2.0 >= 0
955 && phases.get(&p2).unwrap_or(DEFAULT_PHASE)
955 && phases.get(&p2).unwrap_or(DEFAULT_PHASE)
956 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
956 > phases.get(&rev).unwrap_or(DEFAULT_PHASE)
957 {
957 {
958 phases.insert(rev, phases[&p2]);
958 phases.insert(rev, phases[&p2]);
959 }
959 }
960 let set = match phases.get(&rev).unwrap_or(DEFAULT_PHASE) {
960 let set = match phases.get(&rev).unwrap_or(DEFAULT_PHASE) {
961 Phase::Public => continue,
961 Phase::Public => continue,
962 phase => &mut phase_sets[*phase as usize - 1],
962 phase => &mut phase_sets[*phase as usize - 1],
963 };
963 };
964 set.insert(rev);
964 set.insert(rev);
965 }
965 }
966
966
967 Ok((self.len(), phase_sets))
967 Ok((self.len(), phase_sets))
968 }
968 }
969
969
970 fn add_roots_get_min(
970 fn add_roots_get_min(
971 &self,
971 &self,
972 phase_roots: &[Revision],
972 phase_roots: &[Revision],
973 phases: &mut HashMap<Revision, Phase>,
973 phases: &mut HashMap<Revision, Phase>,
974 phase: Phase,
974 phase: Phase,
975 ) -> Revision {
975 ) -> Revision {
976 let mut min_rev = NULL_REVISION;
976 let mut min_rev = NULL_REVISION;
977
977
978 for root in phase_roots {
978 for root in phase_roots {
979 phases.insert(*root, phase);
979 phases.insert(*root, phase);
980 if min_rev == NULL_REVISION || min_rev > *root {
980 if min_rev == NULL_REVISION || min_rev > *root {
981 min_rev = *root;
981 min_rev = *root;
982 }
982 }
983 }
983 }
984 min_rev
984 min_rev
985 }
985 }
986
986
987 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
987 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
988 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
988 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
989 ///
989 ///
990 /// `min_root` and `roots` are unchecked since they are just used as
990 /// `min_root` and `roots` are unchecked since they are just used as
991 /// a bound or for comparison and don't need to represent a valid revision.
991 /// a bound or for comparison and don't need to represent a valid revision.
992 /// In practice, the only invalid revision passed is the working directory
992 /// In practice, the only invalid revision passed is the working directory
993 /// revision ([`i32::MAX`]).
993 /// revision ([`i32::MAX`]).
994 pub fn reachable_roots(
994 pub fn reachable_roots(
995 &self,
995 &self,
996 min_root: UncheckedRevision,
996 min_root: UncheckedRevision,
997 mut heads: Vec<Revision>,
997 mut heads: Vec<Revision>,
998 roots: HashSet<UncheckedRevision>,
998 roots: HashSet<UncheckedRevision>,
999 include_path: bool,
999 include_path: bool,
1000 ) -> Result<HashSet<Revision>, GraphError> {
1000 ) -> Result<HashSet<Revision>, GraphError> {
1001 if roots.is_empty() {
1001 if roots.is_empty() {
1002 return Ok(HashSet::new());
1002 return Ok(HashSet::new());
1003 }
1003 }
1004 let mut reachable = HashSet::new();
1004 let mut reachable = HashSet::new();
1005 let mut seen = HashMap::new();
1005 let mut seen = HashMap::new();
1006
1006
1007 while let Some(rev) = heads.pop() {
1007 while let Some(rev) = heads.pop() {
1008 if roots.contains(&rev.into()) {
1008 if roots.contains(&rev.into()) {
1009 reachable.insert(rev);
1009 reachable.insert(rev);
1010 if !include_path {
1010 if !include_path {
1011 continue;
1011 continue;
1012 }
1012 }
1013 }
1013 }
1014 let parents = self.parents(rev)?;
1014 let parents = self.parents(rev)?;
1015 seen.insert(rev, parents);
1015 seen.insert(rev, parents);
1016 for parent in parents {
1016 for parent in parents {
1017 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1017 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1018 heads.push(parent);
1018 heads.push(parent);
1019 }
1019 }
1020 }
1020 }
1021 }
1021 }
1022 if !include_path {
1022 if !include_path {
1023 return Ok(reachable);
1023 return Ok(reachable);
1024 }
1024 }
1025 let mut revs: Vec<_> = seen.keys().collect();
1025 let mut revs: Vec<_> = seen.keys().collect();
1026 revs.sort_unstable();
1026 revs.sort_unstable();
1027 for rev in revs {
1027 for rev in revs {
1028 for parent in seen[rev] {
1028 for parent in seen[rev] {
1029 if reachable.contains(&parent) {
1029 if reachable.contains(&parent) {
1030 reachable.insert(*rev);
1030 reachable.insert(*rev);
1031 }
1031 }
1032 }
1032 }
1033 }
1033 }
1034 Ok(reachable)
1034 Ok(reachable)
1035 }
1035 }
1036
1036
1037 /// Given a (possibly overlapping) set of revs, return all the
1037 /// Given a (possibly overlapping) set of revs, return all the
1038 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1038 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1039 pub fn common_ancestor_heads(&self, _revisions: &[Revision]) {
1039 pub fn common_ancestor_heads(&self, _revisions: &[Revision]) {
1040 todo!()
1040 todo!()
1041 }
1041 }
1042
1042
1043 /// Given a disjoint set of revs, return all candidates for the
1043 /// Given a disjoint set of revs, return all candidates for the
1044 /// greatest common ancestor. In revset notation, this is the set
1044 /// greatest common ancestor. In revset notation, this is the set
1045 /// `heads(::a and ::b and ...)`
1045 /// `heads(::a and ::b and ...)`
1046 #[allow(dead_code)]
1046 #[allow(dead_code)]
1047 fn find_gca_candidates(
1047 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1048 &self,
1048 &self,
1049 revs: &[Revision],
1049 revs: &[Revision],
1050 ) -> Result<Vec<Revision>, GraphError> {
1050 ) -> Result<Vec<Revision>, GraphError> {
1051 if revs.is_empty() {
1051 if revs.is_empty() {
1052 return Ok(vec![]);
1052 return Ok(vec![]);
1053 }
1053 }
1054 let revcount = revs.len();
1054 let revcount = revs.len();
1055 let mut candidates = vec![];
1055 let mut candidates = vec![];
1056 let all_seen = 1u64 << (revcount - 1);
1057 let poison = 1u64 << revs.len();
1058 let max_rev = revs.iter().max().unwrap();
1056 let max_rev = revs.iter().max().unwrap();
1059 let mut seen = vec![0u64; (max_rev.0 + 1) as usize];
1057
1058 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1060
1059
1061 for (idx, rev) in revs.iter().enumerate() {
1060 for (idx, rev) in revs.iter().enumerate() {
1062 seen[rev.0 as usize] = 1 << idx;
1061 seen[rev.0 as usize].add(idx);
1063 }
1062 }
1064 let mut current_rev = *max_rev;
1063 let mut current_rev = *max_rev;
1065 // Number of revisions whose inspection in the main loop
1064 // Number of revisions whose inspection in the main loop
1066 // will give a result or trigger inspection of other revisions
1065 // will give a result or trigger inspection of other revisions
1067 let mut interesting = revcount;
1066 let mut interesting = revcount;
1068
1067
1069 // poisoned means that the rev is already known to be a common
1068 // poisoned means that the rev is already known to be a common
1070 // ancestor, BUT when first encountered in the loop, not a maximal
1069 // ancestor, BUT when first encountered in the loop, not a maximal
1071 // common ancestor.
1070 // common ancestor.
1072
1071
1073 // The principle of the algorithm is as follows:
1072 // The principle of the algorithm is as follows:
1074 // For a revision `r`, when entering the loop, `seen[r]` is either
1073 // For a revision `r`, when entering the loop, `seen[r]` is either
1075 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1074 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1076 // In the latter case if it "is" `revs`, we have a maximal common
1075 // In the latter case if it "is" `revs`, we have a maximal common
1077 // ancestor.
1076 // ancestor.
1078 //
1077 //
1079 // At each iteration, the bit sets of the parents are updated by
1078 // At each iteration, the bit sets of the parents are updated by
1080 // union with `seen[r]`.
1079 // union with `seen[r]`.
1081 // As we walk the index from the end, we are sure we have encountered
1080 // As we walk the index from the end, we are sure we have encountered
1082 // all children of `r` before `r`, hence we know that `seen[r]` is
1081 // all children of `r` before `r`, hence we know that `seen[r]` is
1083 // fully computed.
1082 // fully computed.
1084 //
1083 //
1085 // On top of that there are several optimizations that make reading
1084 // On top of that there are several optimizations that make reading
1086 // less obvious than the comment above:
1085 // less obvious than the comment above:
1087 // - The `interesting` counter allows to break early
1086 // - The `interesting` counter allows to break early
1088 // - The loop starts from `max(revs)`
1087 // - The loop starts from `max(revs)`
1089 // - Early return in case it is detected that one of the incoming revs
1088 // - Early return in case it is detected that one of the incoming revs
1090 // is a common ancestor of all of them.
1089 // is a common ancestor of all of them.
1091 while current_rev.0 >= 0 && interesting > 0 {
1090 while current_rev.0 >= 0 && interesting > 0 {
1092 let mut current_seen = seen[current_rev.0 as usize];
1091 let mut current_seen = seen[current_rev.0 as usize].clone();
1093
1092
1094 if current_seen == 0 {
1093 if current_seen.is_empty() {
1095 current_rev = Revision(current_rev.0 - 1);
1094 current_rev = Revision(current_rev.0 - 1);
1096 continue;
1095 continue;
1097 }
1096 }
1098 if current_seen < poison {
1097 if !current_seen.is_poisoned() {
1099 interesting -= 1;
1098 interesting -= 1;
1100 if current_seen == all_seen {
1099 if current_seen.is_full_range(revcount) {
1101 candidates.push(current_rev);
1100 candidates.push(current_rev);
1102 current_seen |= poison;
1101 seen[current_rev.0 as usize].poison();
1102 current_seen.poison(); // avoid recloning
1103
1103
1104 // Being a common ancestor, if `current_rev` is among
1104 // Being a common ancestor, if `current_rev` is among
1105 // the input revisions, it is *the* answer.
1105 // the input revisions, it is *the* answer.
1106 for rev in revs {
1106 for rev in revs {
1107 if *rev == current_rev {
1107 if *rev == current_rev {
1108 return Ok(candidates);
1108 return Ok(candidates);
1109 }
1109 }
1110 }
1110 }
1111 }
1111 }
1112 }
1112 }
1113 for parent in self.parents(current_rev)? {
1113 for parent in self.parents(current_rev)? {
1114 if parent == NULL_REVISION {
1114 if parent == NULL_REVISION {
1115 continue;
1115 continue;
1116 }
1116 }
1117 let parent_seen = seen[parent.0 as usize];
1117 let parent_seen = &seen[parent.0 as usize];
1118 if current_seen < poison {
1118 if !current_seen.is_poisoned() {
1119 // Without the `interesting` accounting, this block would
1119 // Without the `interesting` accounting, this block would
1120 // be logically equivalent to: parent_seen |= current_seen
1120 // be logically equivalent to: parent_seen |= current_seen
1121 // The parent counts as interesting if it was not already
1121 // The parent counts as interesting if it was not already
1122 // known to be an ancestor (would already have counted)
1122 // known to be an ancestor (would already have counted)
1123 if parent_seen == 0 {
1123 if parent_seen.is_empty() {
1124 seen[parent.0 as usize] = current_seen;
1124 seen[parent.0 as usize] = current_seen.clone();
1125 interesting += 1;
1125 interesting += 1;
1126 } else if parent_seen != current_seen {
1126 } else if *parent_seen != current_seen {
1127 seen[parent.0 as usize] |= current_seen;
1127 seen[parent.0 as usize].union(&current_seen);
1128 }
1128 }
1129 } else {
1129 } else {
1130 // this block is logically equivalent to poisoning parent
1130 // this block is logically equivalent to poisoning parent
1131 // and counting it as non interesting if it
1131 // and counting it as non interesting if it
1132 // has been seen before (hence counted then as interesting)
1132 // has been seen before (hence counted then as interesting)
1133 if parent_seen != 0 && parent_seen < poison {
1133 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1134 interesting -= 1;
1134 interesting -= 1;
1135 }
1135 }
1136 seen[parent.0 as usize] = current_seen;
1136 // equivalent to poisoning parent, which we should do to
1137 // avoid the cloning.
1138 seen[parent.0 as usize] = current_seen.clone();
1137 }
1139 }
1138 }
1140 }
1139
1141
1140 current_rev = Revision(current_rev.0 - 1);
1142 current_rev = Revision(current_rev.0 - 1);
1141 }
1143 }
1142
1144
1143 Ok(candidates)
1145 Ok(candidates)
1144 }
1146 }
1145
1147
1146 /// Given a disjoint set of revs, return the subset with the longest path
1148 /// Given a disjoint set of revs, return the subset with the longest path
1147 /// to the root.
1149 /// to the root.
1148 #[allow(dead_code)]
1150 #[allow(dead_code)]
1149 fn find_deepest_revs(
1151 fn find_deepest_revs(
1150 &self,
1152 &self,
1151 revs: &[Revision],
1153 revs: &[Revision],
1152 ) -> Result<Vec<Revision>, GraphError> {
1154 ) -> Result<Vec<Revision>, GraphError> {
1153 // TODO replace this all with just comparing rank?
1155 // TODO replace this all with just comparing rank?
1154 // Also, the original implementations in C/Python are cryptic, not
1156 // Also, the original implementations in C/Python are cryptic, not
1155 // even sure we actually need this?
1157 // even sure we actually need this?
1156 if revs.len() <= 1 {
1158 if revs.len() <= 1 {
1157 return Ok(revs.to_owned());
1159 return Ok(revs.to_owned());
1158 }
1160 }
1159 let max_rev = revs.iter().max().unwrap().0;
1161 let max_rev = revs.iter().max().unwrap().0;
1160 let mut interesting = HashMap::new();
1162 let mut interesting = HashMap::new();
1161 let mut seen = vec![0; max_rev as usize + 1];
1163 let mut seen = vec![0; max_rev as usize + 1];
1162 let mut depth = vec![0; max_rev as usize + 1];
1164 let mut depth = vec![0; max_rev as usize + 1];
1163 let mut mapping = vec![];
1165 let mut mapping = vec![];
1164 let mut revs = revs.to_owned();
1166 let mut revs = revs.to_owned();
1165 revs.sort_unstable();
1167 revs.sort_unstable();
1166
1168
1167 for (idx, rev) in revs.iter().enumerate() {
1169 for (idx, rev) in revs.iter().enumerate() {
1168 depth[rev.0 as usize] = 1;
1170 depth[rev.0 as usize] = 1;
1169 let shift = 1 << idx;
1171 let shift = 1 << idx;
1170 seen[rev.0 as usize] = shift;
1172 seen[rev.0 as usize] = shift;
1171 interesting.insert(shift, 1);
1173 interesting.insert(shift, 1);
1172 mapping.push((shift, *rev));
1174 mapping.push((shift, *rev));
1173 }
1175 }
1174
1176
1175 let mut current_rev = Revision(max_rev);
1177 let mut current_rev = Revision(max_rev);
1176 while current_rev.0 >= 0 && interesting.len() > 1 {
1178 while current_rev.0 >= 0 && interesting.len() > 1 {
1177 let current_depth = depth[current_rev.0 as usize];
1179 let current_depth = depth[current_rev.0 as usize];
1178 if current_depth == 0 {
1180 if current_depth == 0 {
1179 current_rev = Revision(current_rev.0 - 1);
1181 current_rev = Revision(current_rev.0 - 1);
1180 continue;
1182 continue;
1181 }
1183 }
1182
1184
1183 let current_seen = seen[current_rev.0 as usize];
1185 let current_seen = seen[current_rev.0 as usize];
1184 for parent in self.parents(current_rev)? {
1186 for parent in self.parents(current_rev)? {
1185 if parent == NULL_REVISION {
1187 if parent == NULL_REVISION {
1186 continue;
1188 continue;
1187 }
1189 }
1188 let parent_seen = seen[parent.0 as usize];
1190 let parent_seen = seen[parent.0 as usize];
1189 let parent_depth = depth[parent.0 as usize];
1191 let parent_depth = depth[parent.0 as usize];
1190 if parent_depth <= current_depth {
1192 if parent_depth <= current_depth {
1191 depth[parent.0 as usize] = current_depth + 1;
1193 depth[parent.0 as usize] = current_depth + 1;
1192 if parent_seen != current_seen {
1194 if parent_seen != current_seen {
1193 *interesting.get_mut(&current_seen).unwrap() += 1;
1195 *interesting.get_mut(&current_seen).unwrap() += 1;
1194 seen[parent.0 as usize] = current_seen;
1196 seen[parent.0 as usize] = current_seen;
1195 if parent_seen != 0 {
1197 if parent_seen != 0 {
1196 let parent_interesting =
1198 let parent_interesting =
1197 interesting.get_mut(&parent_seen).unwrap();
1199 interesting.get_mut(&parent_seen).unwrap();
1198 *parent_interesting -= 1;
1200 *parent_interesting -= 1;
1199 if *parent_interesting == 0 {
1201 if *parent_interesting == 0 {
1200 interesting.remove(&parent_seen);
1202 interesting.remove(&parent_seen);
1201 }
1203 }
1202 }
1204 }
1203 }
1205 }
1204 } else if current_depth == parent_depth - 1 {
1206 } else if current_depth == parent_depth - 1 {
1205 let either_seen = parent_seen | current_seen;
1207 let either_seen = parent_seen | current_seen;
1206 if either_seen == parent_seen {
1208 if either_seen == parent_seen {
1207 continue;
1209 continue;
1208 }
1210 }
1209 seen[parent.0 as usize] = either_seen;
1211 seen[parent.0 as usize] = either_seen;
1210 interesting
1212 interesting
1211 .entry(either_seen)
1213 .entry(either_seen)
1212 .and_modify(|v| *v += 1)
1214 .and_modify(|v| *v += 1)
1213 .or_insert(1);
1215 .or_insert(1);
1214 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1216 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1215 if interesting[&parent_seen] == 0 {
1217 if interesting[&parent_seen] == 0 {
1216 interesting.remove(&parent_seen);
1218 interesting.remove(&parent_seen);
1217 }
1219 }
1218 }
1220 }
1219 }
1221 }
1220 *interesting.get_mut(&current_seen).unwrap() -= 1;
1222 *interesting.get_mut(&current_seen).unwrap() -= 1;
1221 if interesting[&current_seen] == 0 {
1223 if interesting[&current_seen] == 0 {
1222 interesting.remove(&current_seen);
1224 interesting.remove(&current_seen);
1223 }
1225 }
1224
1226
1225 current_rev = Revision(current_rev.0 - 1);
1227 current_rev = Revision(current_rev.0 - 1);
1226 }
1228 }
1227
1229
1228 if interesting.len() != 1 {
1230 if interesting.len() != 1 {
1229 return Ok(vec![]);
1231 return Ok(vec![]);
1230 }
1232 }
1231 let mask = interesting.keys().next().unwrap();
1233 let mask = interesting.keys().next().unwrap();
1232
1234
1233 Ok(mapping
1235 Ok(mapping
1234 .into_iter()
1236 .into_iter()
1235 .filter_map(|(shift, rev)| {
1237 .filter_map(|(shift, rev)| {
1236 if (mask & shift) != 0 {
1238 if (mask & shift) != 0 {
1237 return Some(rev);
1239 return Some(rev);
1238 }
1240 }
1239 None
1241 None
1240 })
1242 })
1241 .collect())
1243 .collect())
1242 }
1244 }
1243 }
1245 }
1244
1246
1247 /// The kind of functionality needed by find_gca_candidates
1248 ///
1249 /// This is a bit mask which can be declared to be "poisoned", which callers
1250 /// interpret to break out of some loops.
1251 ///
1252 /// The maximum capacity of the bit mask is up to the actual implementation
1253 trait PoisonableBitSet: Sized + PartialEq {
1254 /// Return a vector of exactly n elements, initialized to be empty.
1255 ///
1256 /// Optimization can vastly depend on implementation. Those being `Copy`
1257 /// and having constant capacity typically can have a very simple
1258 /// implementation.
1259 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1260
1261 /// The size of the bit mask in memory
1262 fn size(&self) -> usize;
1263
1264 /// The number of elements that can be represented in the set.
1265 ///
1266 /// Another way to put it is that it is the highest integer `C` such that
1267 /// the set is guaranteed to always be a subset of the integer range
1268 /// `[0, C)`
1269 fn capacity(&self) -> usize;
1270
1271 /// Declare `n` to belong to the set
1272 fn add(&mut self, n: usize);
1273
1274 /// Declare `n` not to belong to the set
1275 fn discard(&mut self, n: usize);
1276
1277 /// Replace this bit set by its union with other
1278 fn union(&mut self, other: &Self);
1279
1280 /// Poison the bit set
1281 ///
1282 /// Interpretation up to the caller
1283 fn poison(&mut self);
1284
1285 /// Is the bit set poisoned?
1286 ///
1287 /// Interpretation is up to the caller
1288 fn is_poisoned(&self) -> bool;
1289
1290 /// Is the bit set empty?
1291 fn is_empty(&self) -> bool;
1292
1293 /// return `true` if and only if the bit is the full range `[0, n)`
1294 /// of integers
1295 fn is_full_range(&self, n: usize) -> bool;
1296 }
1297
1298 const U64_POISON: u64 = 1 << 63;
1299
1300 impl PoisonableBitSet for u64 {
1301 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1302 vec![0u64; vec_len]
1303 }
1304
1305 fn size(&self) -> usize {
1306 8
1307 }
1308
1309 fn capacity(&self) -> usize {
1310 63
1311 }
1312
1313 fn add(&mut self, n: usize) {
1314 (*self) |= 1u64 << n;
1315 }
1316
1317 fn discard(&mut self, n: usize) {
1318 (*self) &= u64::MAX - (1u64 << n);
1319 }
1320
1321 fn union(&mut self, other: &Self) {
1322 (*self) |= *other;
1323 }
1324
1325 fn is_full_range(&self, n: usize) -> bool {
1326 *self + 1 == (1u64 << n)
1327 }
1328
1329 fn is_empty(&self) -> bool {
1330 *self == 0
1331 }
1332
1333 fn poison(&mut self) {
1334 *self = U64_POISON;
1335 }
1336
1337 fn is_poisoned(&self) -> bool {
1338 // equality comparison would be tempting but would not resist
1339 // operations after poisoning (even if these should be bogus).
1340 *self >= U64_POISON
1341 }
1342 }
1343
1344 /// A poisonable bit set whose capacity is not known at compile time but
1345 /// is constant after initial construction
1346 ///
1347 /// This can be way further optimized if performance assessments (speed
1348 /// and/or RAM) require it.
1349 /// As far as RAM is concerned, for large vectors of these, the main problem
1350 /// would be the repetition of set_size in each item. We would need a trait
1351 /// to abstract over the idea of a vector of such bit sets to do better.
1352 #[derive(Clone, PartialEq)]
1353 struct NonStaticPoisonableBitSet {
1354 set_size: usize,
1355 bit_set: Vec<u64>,
1356 }
1357
1358 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1359 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1360 1 + (set_size + 1) / 64
1361 }
1362
1363 impl NonStaticPoisonableBitSet {
1364 /// The index of the sub-bit set for the given n, and the index inside
1365 /// the latter
1366 fn index(&self, n: usize) -> (usize, usize) {
1367 (n / 64, n % 64)
1368 }
1369 }
1370
1371 /// Mock implementation to ensure that the trait makes sense
1372 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1373 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1374 let tmpl = Self {
1375 set_size,
1376 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1377 };
1378 vec![tmpl; vec_len]
1379 }
1380
1381 fn size(&self) -> usize {
1382 8 + self.bit_set.len() * 8
1383 }
1384
1385 fn capacity(&self) -> usize {
1386 self.set_size
1387 }
1388
1389 fn add(&mut self, n: usize) {
1390 let (sub_bs, bit_pos) = self.index(n);
1391 self.bit_set[sub_bs] |= 1 << bit_pos
1392 }
1393
1394 fn discard(&mut self, n: usize) {
1395 let (sub_bs, bit_pos) = self.index(n);
1396 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1397 }
1398
1399 fn union(&mut self, other: &Self) {
1400 assert!(
1401 self.set_size == other.set_size,
1402 "Binary operations on bit sets can only be done on same size"
1403 );
1404 for i in 0..self.bit_set.len() - 1 {
1405 self.bit_set[i] |= other.bit_set[i]
1406 }
1407 }
1408
1409 fn is_full_range(&self, n: usize) -> bool {
1410 let (sub_bs, bit_pos) = self.index(n);
1411 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1412 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1413 }
1414
1415 fn is_empty(&self) -> bool {
1416 self.bit_set.iter().all(|bs| *bs == 0u64)
1417 }
1418
1419 fn poison(&mut self) {
1420 let (sub_bs, bit_pos) = self.index(self.set_size);
1421 self.bit_set[sub_bs] = 1 << bit_pos;
1422 }
1423
1424 fn is_poisoned(&self) -> bool {
1425 let (sub_bs, bit_pos) = self.index(self.set_size);
1426 self.bit_set[sub_bs] >= 1 << bit_pos
1427 }
1428 }
1429
1245 /// Set of roots of all non-public phases
1430 /// Set of roots of all non-public phases
1246 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
1431 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
1247
1432
1248 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1433 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1249 pub enum Phase {
1434 pub enum Phase {
1250 Public = 0,
1435 Public = 0,
1251 Draft = 1,
1436 Draft = 1,
1252 Secret = 2,
1437 Secret = 2,
1253 Archived = 3,
1438 Archived = 3,
1254 Internal = 4,
1439 Internal = 4,
1255 }
1440 }
1256
1441
1257 impl TryFrom<usize> for Phase {
1442 impl TryFrom<usize> for Phase {
1258 type Error = RevlogError;
1443 type Error = RevlogError;
1259
1444
1260 fn try_from(value: usize) -> Result<Self, Self::Error> {
1445 fn try_from(value: usize) -> Result<Self, Self::Error> {
1261 Ok(match value {
1446 Ok(match value {
1262 0 => Self::Public,
1447 0 => Self::Public,
1263 1 => Self::Draft,
1448 1 => Self::Draft,
1264 2 => Self::Secret,
1449 2 => Self::Secret,
1265 32 => Self::Archived,
1450 32 => Self::Archived,
1266 96 => Self::Internal,
1451 96 => Self::Internal,
1267 v => {
1452 v => {
1268 return Err(RevlogError::corrupted(format!(
1453 return Err(RevlogError::corrupted(format!(
1269 "invalid phase value {}",
1454 "invalid phase value {}",
1270 v
1455 v
1271 )))
1456 )))
1272 }
1457 }
1273 })
1458 })
1274 }
1459 }
1275 }
1460 }
1276
1461
1277 impl Phase {
1462 impl Phase {
1278 pub const fn all_phases() -> &'static [Self] {
1463 pub const fn all_phases() -> &'static [Self] {
1279 &[
1464 &[
1280 Self::Public,
1465 Self::Public,
1281 Self::Draft,
1466 Self::Draft,
1282 Self::Secret,
1467 Self::Secret,
1283 Self::Archived,
1468 Self::Archived,
1284 Self::Internal,
1469 Self::Internal,
1285 ]
1470 ]
1286 }
1471 }
1287 pub const fn non_public_phases() -> &'static [Self] {
1472 pub const fn non_public_phases() -> &'static [Self] {
1288 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1473 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1289 }
1474 }
1290 }
1475 }
1291
1476
1292 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1477 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1293 let mut offset: usize = 0;
1478 let mut offset: usize = 0;
1294 let mut offsets = Vec::new();
1479 let mut offsets = Vec::new();
1295
1480
1296 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1481 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1297 offsets.push(offset);
1482 offsets.push(offset);
1298 let end = offset + INDEX_ENTRY_SIZE;
1483 let end = offset + INDEX_ENTRY_SIZE;
1299 let entry = IndexEntry {
1484 let entry = IndexEntry {
1300 bytes: &bytes[offset..end],
1485 bytes: &bytes[offset..end],
1301 offset_override: None,
1486 offset_override: None,
1302 };
1487 };
1303
1488
1304 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1489 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1305 }
1490 }
1306 (offset, offsets)
1491 (offset, offsets)
1307 }
1492 }
1308
1493
1309 impl super::RevlogIndex for Index {
1494 impl super::RevlogIndex for Index {
1310 fn len(&self) -> usize {
1495 fn len(&self) -> usize {
1311 self.len()
1496 self.len()
1312 }
1497 }
1313
1498
1314 fn node(&self, rev: Revision) -> Option<&Node> {
1499 fn node(&self, rev: Revision) -> Option<&Node> {
1315 if rev == NULL_REVISION {
1500 if rev == NULL_REVISION {
1316 return Some(&NULL_NODE);
1501 return Some(&NULL_NODE);
1317 }
1502 }
1318 self.get_entry(rev).map(|entry| entry.hash())
1503 self.get_entry(rev).map(|entry| entry.hash())
1319 }
1504 }
1320 }
1505 }
1321
1506
1322 #[derive(Debug)]
1507 #[derive(Debug)]
1323 pub struct IndexEntry<'a> {
1508 pub struct IndexEntry<'a> {
1324 bytes: &'a [u8],
1509 bytes: &'a [u8],
1325 /// Allows to override the offset value of the entry.
1510 /// Allows to override the offset value of the entry.
1326 ///
1511 ///
1327 /// For interleaved index and data, the offset stored in the index
1512 /// For interleaved index and data, the offset stored in the index
1328 /// corresponds to the separated data offset.
1513 /// corresponds to the separated data offset.
1329 /// It has to be overridden with the actual offset in the interleaved
1514 /// It has to be overridden with the actual offset in the interleaved
1330 /// index which is just after the index block.
1515 /// index which is just after the index block.
1331 ///
1516 ///
1332 /// For separated index and data, the offset stored in the first index
1517 /// For separated index and data, the offset stored in the first index
1333 /// entry is mixed with the index headers.
1518 /// entry is mixed with the index headers.
1334 /// It has to be overridden with 0.
1519 /// It has to be overridden with 0.
1335 offset_override: Option<usize>,
1520 offset_override: Option<usize>,
1336 }
1521 }
1337
1522
1338 impl<'a> IndexEntry<'a> {
1523 impl<'a> IndexEntry<'a> {
1339 /// Return the offset of the data.
1524 /// Return the offset of the data.
1340 pub fn offset(&self) -> usize {
1525 pub fn offset(&self) -> usize {
1341 if let Some(offset_override) = self.offset_override {
1526 if let Some(offset_override) = self.offset_override {
1342 offset_override
1527 offset_override
1343 } else {
1528 } else {
1344 let mut bytes = [0; 8];
1529 let mut bytes = [0; 8];
1345 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1530 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1346 BigEndian::read_u64(&bytes[..]) as usize
1531 BigEndian::read_u64(&bytes[..]) as usize
1347 }
1532 }
1348 }
1533 }
1349 pub fn raw_offset(&self) -> u64 {
1534 pub fn raw_offset(&self) -> u64 {
1350 BigEndian::read_u64(&self.bytes[0..8])
1535 BigEndian::read_u64(&self.bytes[0..8])
1351 }
1536 }
1352
1537
1353 /// Same result (except potentially for rev 0) as C `index_get_start()`
1538 /// Same result (except potentially for rev 0) as C `index_get_start()`
1354 fn c_start(&self) -> u64 {
1539 fn c_start(&self) -> u64 {
1355 self.raw_offset() >> 16
1540 self.raw_offset() >> 16
1356 }
1541 }
1357
1542
1358 pub fn flags(&self) -> u16 {
1543 pub fn flags(&self) -> u16 {
1359 BigEndian::read_u16(&self.bytes[6..=7])
1544 BigEndian::read_u16(&self.bytes[6..=7])
1360 }
1545 }
1361
1546
1362 /// Return the compressed length of the data.
1547 /// Return the compressed length of the data.
1363 pub fn compressed_len(&self) -> u32 {
1548 pub fn compressed_len(&self) -> u32 {
1364 BigEndian::read_u32(&self.bytes[8..=11])
1549 BigEndian::read_u32(&self.bytes[8..=11])
1365 }
1550 }
1366
1551
1367 /// Return the uncompressed length of the data.
1552 /// Return the uncompressed length of the data.
1368 pub fn uncompressed_len(&self) -> i32 {
1553 pub fn uncompressed_len(&self) -> i32 {
1369 BigEndian::read_i32(&self.bytes[12..=15])
1554 BigEndian::read_i32(&self.bytes[12..=15])
1370 }
1555 }
1371
1556
1372 /// Return the revision upon which the data has been derived.
1557 /// Return the revision upon which the data has been derived.
1373 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1558 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1374 // TODO Maybe return an Option when base_revision == rev?
1559 // TODO Maybe return an Option when base_revision == rev?
1375 // Requires to add rev to IndexEntry
1560 // Requires to add rev to IndexEntry
1376
1561
1377 BigEndian::read_i32(&self.bytes[16..]).into()
1562 BigEndian::read_i32(&self.bytes[16..]).into()
1378 }
1563 }
1379
1564
1380 pub fn link_revision(&self) -> UncheckedRevision {
1565 pub fn link_revision(&self) -> UncheckedRevision {
1381 BigEndian::read_i32(&self.bytes[20..]).into()
1566 BigEndian::read_i32(&self.bytes[20..]).into()
1382 }
1567 }
1383
1568
1384 pub fn p1(&self) -> UncheckedRevision {
1569 pub fn p1(&self) -> UncheckedRevision {
1385 BigEndian::read_i32(&self.bytes[24..]).into()
1570 BigEndian::read_i32(&self.bytes[24..]).into()
1386 }
1571 }
1387
1572
1388 pub fn p2(&self) -> UncheckedRevision {
1573 pub fn p2(&self) -> UncheckedRevision {
1389 BigEndian::read_i32(&self.bytes[28..]).into()
1574 BigEndian::read_i32(&self.bytes[28..]).into()
1390 }
1575 }
1391
1576
1392 /// Return the hash of revision's full text.
1577 /// Return the hash of revision's full text.
1393 ///
1578 ///
1394 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1579 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1395 /// are used.
1580 /// are used.
1396 pub fn hash(&self) -> &'a Node {
1581 pub fn hash(&self) -> &'a Node {
1397 (&self.bytes[32..52]).try_into().unwrap()
1582 (&self.bytes[32..52]).try_into().unwrap()
1398 }
1583 }
1399
1584
1400 pub fn as_bytes(&self) -> &'a [u8] {
1585 pub fn as_bytes(&self) -> &'a [u8] {
1401 self.bytes
1586 self.bytes
1402 }
1587 }
1403 }
1588 }
1404
1589
1405 #[cfg(test)]
1590 #[cfg(test)]
1406 mod tests {
1591 mod tests {
1407 use super::*;
1592 use super::*;
1408 use crate::node::NULL_NODE;
1593 use crate::node::NULL_NODE;
1409
1594
1410 #[cfg(test)]
1595 #[cfg(test)]
1411 #[derive(Debug, Copy, Clone)]
1596 #[derive(Debug, Copy, Clone)]
1412 pub struct IndexEntryBuilder {
1597 pub struct IndexEntryBuilder {
1413 is_first: bool,
1598 is_first: bool,
1414 is_inline: bool,
1599 is_inline: bool,
1415 is_general_delta: bool,
1600 is_general_delta: bool,
1416 version: u16,
1601 version: u16,
1417 offset: usize,
1602 offset: usize,
1418 compressed_len: usize,
1603 compressed_len: usize,
1419 uncompressed_len: usize,
1604 uncompressed_len: usize,
1420 base_revision_or_base_of_delta_chain: Revision,
1605 base_revision_or_base_of_delta_chain: Revision,
1421 link_revision: Revision,
1606 link_revision: Revision,
1422 p1: Revision,
1607 p1: Revision,
1423 p2: Revision,
1608 p2: Revision,
1424 node: Node,
1609 node: Node,
1425 }
1610 }
1426
1611
1427 #[cfg(test)]
1612 #[cfg(test)]
1428 impl IndexEntryBuilder {
1613 impl IndexEntryBuilder {
1429 #[allow(clippy::new_without_default)]
1614 #[allow(clippy::new_without_default)]
1430 pub fn new() -> Self {
1615 pub fn new() -> Self {
1431 Self {
1616 Self {
1432 is_first: false,
1617 is_first: false,
1433 is_inline: false,
1618 is_inline: false,
1434 is_general_delta: true,
1619 is_general_delta: true,
1435 version: 1,
1620 version: 1,
1436 offset: 0,
1621 offset: 0,
1437 compressed_len: 0,
1622 compressed_len: 0,
1438 uncompressed_len: 0,
1623 uncompressed_len: 0,
1439 base_revision_or_base_of_delta_chain: Revision(0),
1624 base_revision_or_base_of_delta_chain: Revision(0),
1440 link_revision: Revision(0),
1625 link_revision: Revision(0),
1441 p1: NULL_REVISION,
1626 p1: NULL_REVISION,
1442 p2: NULL_REVISION,
1627 p2: NULL_REVISION,
1443 node: NULL_NODE,
1628 node: NULL_NODE,
1444 }
1629 }
1445 }
1630 }
1446
1631
1447 pub fn is_first(&mut self, value: bool) -> &mut Self {
1632 pub fn is_first(&mut self, value: bool) -> &mut Self {
1448 self.is_first = value;
1633 self.is_first = value;
1449 self
1634 self
1450 }
1635 }
1451
1636
1452 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1637 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1453 self.is_inline = value;
1638 self.is_inline = value;
1454 self
1639 self
1455 }
1640 }
1456
1641
1457 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1642 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1458 self.is_general_delta = value;
1643 self.is_general_delta = value;
1459 self
1644 self
1460 }
1645 }
1461
1646
1462 pub fn with_version(&mut self, value: u16) -> &mut Self {
1647 pub fn with_version(&mut self, value: u16) -> &mut Self {
1463 self.version = value;
1648 self.version = value;
1464 self
1649 self
1465 }
1650 }
1466
1651
1467 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1652 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1468 self.offset = value;
1653 self.offset = value;
1469 self
1654 self
1470 }
1655 }
1471
1656
1472 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1657 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1473 self.compressed_len = value;
1658 self.compressed_len = value;
1474 self
1659 self
1475 }
1660 }
1476
1661
1477 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1662 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1478 self.uncompressed_len = value;
1663 self.uncompressed_len = value;
1479 self
1664 self
1480 }
1665 }
1481
1666
1482 pub fn with_base_revision_or_base_of_delta_chain(
1667 pub fn with_base_revision_or_base_of_delta_chain(
1483 &mut self,
1668 &mut self,
1484 value: Revision,
1669 value: Revision,
1485 ) -> &mut Self {
1670 ) -> &mut Self {
1486 self.base_revision_or_base_of_delta_chain = value;
1671 self.base_revision_or_base_of_delta_chain = value;
1487 self
1672 self
1488 }
1673 }
1489
1674
1490 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1675 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1491 self.link_revision = value;
1676 self.link_revision = value;
1492 self
1677 self
1493 }
1678 }
1494
1679
1495 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1680 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1496 self.p1 = value;
1681 self.p1 = value;
1497 self
1682 self
1498 }
1683 }
1499
1684
1500 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1685 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1501 self.p2 = value;
1686 self.p2 = value;
1502 self
1687 self
1503 }
1688 }
1504
1689
1505 pub fn with_node(&mut self, value: Node) -> &mut Self {
1690 pub fn with_node(&mut self, value: Node) -> &mut Self {
1506 self.node = value;
1691 self.node = value;
1507 self
1692 self
1508 }
1693 }
1509
1694
1510 pub fn build(&self) -> Vec<u8> {
1695 pub fn build(&self) -> Vec<u8> {
1511 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1696 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1512 if self.is_first {
1697 if self.is_first {
1513 bytes.extend(&match (self.is_general_delta, self.is_inline) {
1698 bytes.extend(&match (self.is_general_delta, self.is_inline) {
1514 (false, false) => [0u8, 0],
1699 (false, false) => [0u8, 0],
1515 (false, true) => [0u8, 1],
1700 (false, true) => [0u8, 1],
1516 (true, false) => [0u8, 2],
1701 (true, false) => [0u8, 2],
1517 (true, true) => [0u8, 3],
1702 (true, true) => [0u8, 3],
1518 });
1703 });
1519 bytes.extend(&self.version.to_be_bytes());
1704 bytes.extend(&self.version.to_be_bytes());
1520 // Remaining offset bytes.
1705 // Remaining offset bytes.
1521 bytes.extend(&[0u8; 2]);
1706 bytes.extend(&[0u8; 2]);
1522 } else {
1707 } else {
1523 // Offset stored on 48 bits (6 bytes)
1708 // Offset stored on 48 bits (6 bytes)
1524 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1709 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1525 }
1710 }
1526 bytes.extend(&[0u8; 2]); // Revision flags.
1711 bytes.extend(&[0u8; 2]); // Revision flags.
1527 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
1712 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
1528 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
1713 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
1529 bytes.extend(
1714 bytes.extend(
1530 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1715 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1531 );
1716 );
1532 bytes.extend(&self.link_revision.0.to_be_bytes());
1717 bytes.extend(&self.link_revision.0.to_be_bytes());
1533 bytes.extend(&self.p1.0.to_be_bytes());
1718 bytes.extend(&self.p1.0.to_be_bytes());
1534 bytes.extend(&self.p2.0.to_be_bytes());
1719 bytes.extend(&self.p2.0.to_be_bytes());
1535 bytes.extend(self.node.as_bytes());
1720 bytes.extend(self.node.as_bytes());
1536 bytes.extend(vec![0u8; 12]);
1721 bytes.extend(vec![0u8; 12]);
1537 bytes
1722 bytes
1538 }
1723 }
1539 }
1724 }
1540
1725
1541 pub fn is_inline(index_bytes: &[u8]) -> bool {
1726 pub fn is_inline(index_bytes: &[u8]) -> bool {
1542 IndexHeader::parse(index_bytes)
1727 IndexHeader::parse(index_bytes)
1543 .expect("too short")
1728 .expect("too short")
1544 .unwrap()
1729 .unwrap()
1545 .format_flags()
1730 .format_flags()
1546 .is_inline()
1731 .is_inline()
1547 }
1732 }
1548
1733
1549 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1734 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1550 IndexHeader::parse(index_bytes)
1735 IndexHeader::parse(index_bytes)
1551 .expect("too short")
1736 .expect("too short")
1552 .unwrap()
1737 .unwrap()
1553 .format_flags()
1738 .format_flags()
1554 .uses_generaldelta()
1739 .uses_generaldelta()
1555 }
1740 }
1556
1741
1557 pub fn get_version(index_bytes: &[u8]) -> u16 {
1742 pub fn get_version(index_bytes: &[u8]) -> u16 {
1558 IndexHeader::parse(index_bytes)
1743 IndexHeader::parse(index_bytes)
1559 .expect("too short")
1744 .expect("too short")
1560 .unwrap()
1745 .unwrap()
1561 .format_version()
1746 .format_version()
1562 }
1747 }
1563
1748
1564 #[test]
1749 #[test]
1565 fn flags_when_no_inline_flag_test() {
1750 fn flags_when_no_inline_flag_test() {
1566 let bytes = IndexEntryBuilder::new()
1751 let bytes = IndexEntryBuilder::new()
1567 .is_first(true)
1752 .is_first(true)
1568 .with_general_delta(false)
1753 .with_general_delta(false)
1569 .with_inline(false)
1754 .with_inline(false)
1570 .build();
1755 .build();
1571
1756
1572 assert!(!is_inline(&bytes));
1757 assert!(!is_inline(&bytes));
1573 assert!(!uses_generaldelta(&bytes));
1758 assert!(!uses_generaldelta(&bytes));
1574 }
1759 }
1575
1760
1576 #[test]
1761 #[test]
1577 fn flags_when_inline_flag_test() {
1762 fn flags_when_inline_flag_test() {
1578 let bytes = IndexEntryBuilder::new()
1763 let bytes = IndexEntryBuilder::new()
1579 .is_first(true)
1764 .is_first(true)
1580 .with_general_delta(false)
1765 .with_general_delta(false)
1581 .with_inline(true)
1766 .with_inline(true)
1582 .build();
1767 .build();
1583
1768
1584 assert!(is_inline(&bytes));
1769 assert!(is_inline(&bytes));
1585 assert!(!uses_generaldelta(&bytes));
1770 assert!(!uses_generaldelta(&bytes));
1586 }
1771 }
1587
1772
1588 #[test]
1773 #[test]
1589 fn flags_when_inline_and_generaldelta_flags_test() {
1774 fn flags_when_inline_and_generaldelta_flags_test() {
1590 let bytes = IndexEntryBuilder::new()
1775 let bytes = IndexEntryBuilder::new()
1591 .is_first(true)
1776 .is_first(true)
1592 .with_general_delta(true)
1777 .with_general_delta(true)
1593 .with_inline(true)
1778 .with_inline(true)
1594 .build();
1779 .build();
1595
1780
1596 assert!(is_inline(&bytes));
1781 assert!(is_inline(&bytes));
1597 assert!(uses_generaldelta(&bytes));
1782 assert!(uses_generaldelta(&bytes));
1598 }
1783 }
1599
1784
1600 #[test]
1785 #[test]
1601 fn test_offset() {
1786 fn test_offset() {
1602 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1787 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1603 let entry = IndexEntry {
1788 let entry = IndexEntry {
1604 bytes: &bytes,
1789 bytes: &bytes,
1605 offset_override: None,
1790 offset_override: None,
1606 };
1791 };
1607
1792
1608 assert_eq!(entry.offset(), 1)
1793 assert_eq!(entry.offset(), 1)
1609 }
1794 }
1610
1795
1611 #[test]
1796 #[test]
1612 fn test_with_overridden_offset() {
1797 fn test_with_overridden_offset() {
1613 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1798 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1614 let entry = IndexEntry {
1799 let entry = IndexEntry {
1615 bytes: &bytes,
1800 bytes: &bytes,
1616 offset_override: Some(2),
1801 offset_override: Some(2),
1617 };
1802 };
1618
1803
1619 assert_eq!(entry.offset(), 2)
1804 assert_eq!(entry.offset(), 2)
1620 }
1805 }
1621
1806
1622 #[test]
1807 #[test]
1623 fn test_compressed_len() {
1808 fn test_compressed_len() {
1624 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1809 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1625 let entry = IndexEntry {
1810 let entry = IndexEntry {
1626 bytes: &bytes,
1811 bytes: &bytes,
1627 offset_override: None,
1812 offset_override: None,
1628 };
1813 };
1629
1814
1630 assert_eq!(entry.compressed_len(), 1)
1815 assert_eq!(entry.compressed_len(), 1)
1631 }
1816 }
1632
1817
1633 #[test]
1818 #[test]
1634 fn test_uncompressed_len() {
1819 fn test_uncompressed_len() {
1635 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1820 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1636 let entry = IndexEntry {
1821 let entry = IndexEntry {
1637 bytes: &bytes,
1822 bytes: &bytes,
1638 offset_override: None,
1823 offset_override: None,
1639 };
1824 };
1640
1825
1641 assert_eq!(entry.uncompressed_len(), 1)
1826 assert_eq!(entry.uncompressed_len(), 1)
1642 }
1827 }
1643
1828
1644 #[test]
1829 #[test]
1645 fn test_base_revision_or_base_of_delta_chain() {
1830 fn test_base_revision_or_base_of_delta_chain() {
1646 let bytes = IndexEntryBuilder::new()
1831 let bytes = IndexEntryBuilder::new()
1647 .with_base_revision_or_base_of_delta_chain(Revision(1))
1832 .with_base_revision_or_base_of_delta_chain(Revision(1))
1648 .build();
1833 .build();
1649 let entry = IndexEntry {
1834 let entry = IndexEntry {
1650 bytes: &bytes,
1835 bytes: &bytes,
1651 offset_override: None,
1836 offset_override: None,
1652 };
1837 };
1653
1838
1654 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
1839 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
1655 }
1840 }
1656
1841
1657 #[test]
1842 #[test]
1658 fn link_revision_test() {
1843 fn link_revision_test() {
1659 let bytes = IndexEntryBuilder::new()
1844 let bytes = IndexEntryBuilder::new()
1660 .with_link_revision(Revision(123))
1845 .with_link_revision(Revision(123))
1661 .build();
1846 .build();
1662
1847
1663 let entry = IndexEntry {
1848 let entry = IndexEntry {
1664 bytes: &bytes,
1849 bytes: &bytes,
1665 offset_override: None,
1850 offset_override: None,
1666 };
1851 };
1667
1852
1668 assert_eq!(entry.link_revision(), 123.into());
1853 assert_eq!(entry.link_revision(), 123.into());
1669 }
1854 }
1670
1855
1671 #[test]
1856 #[test]
1672 fn p1_test() {
1857 fn p1_test() {
1673 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
1858 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
1674
1859
1675 let entry = IndexEntry {
1860 let entry = IndexEntry {
1676 bytes: &bytes,
1861 bytes: &bytes,
1677 offset_override: None,
1862 offset_override: None,
1678 };
1863 };
1679
1864
1680 assert_eq!(entry.p1(), 123.into());
1865 assert_eq!(entry.p1(), 123.into());
1681 }
1866 }
1682
1867
1683 #[test]
1868 #[test]
1684 fn p2_test() {
1869 fn p2_test() {
1685 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
1870 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
1686
1871
1687 let entry = IndexEntry {
1872 let entry = IndexEntry {
1688 bytes: &bytes,
1873 bytes: &bytes,
1689 offset_override: None,
1874 offset_override: None,
1690 };
1875 };
1691
1876
1692 assert_eq!(entry.p2(), 123.into());
1877 assert_eq!(entry.p2(), 123.into());
1693 }
1878 }
1694
1879
1695 #[test]
1880 #[test]
1696 fn node_test() {
1881 fn node_test() {
1697 let node = Node::from_hex("0123456789012345678901234567890123456789")
1882 let node = Node::from_hex("0123456789012345678901234567890123456789")
1698 .unwrap();
1883 .unwrap();
1699 let bytes = IndexEntryBuilder::new().with_node(node).build();
1884 let bytes = IndexEntryBuilder::new().with_node(node).build();
1700
1885
1701 let entry = IndexEntry {
1886 let entry = IndexEntry {
1702 bytes: &bytes,
1887 bytes: &bytes,
1703 offset_override: None,
1888 offset_override: None,
1704 };
1889 };
1705
1890
1706 assert_eq!(*entry.hash(), node);
1891 assert_eq!(*entry.hash(), node);
1707 }
1892 }
1708
1893
1709 #[test]
1894 #[test]
1710 fn version_test() {
1895 fn version_test() {
1711 let bytes = IndexEntryBuilder::new()
1896 let bytes = IndexEntryBuilder::new()
1712 .is_first(true)
1897 .is_first(true)
1713 .with_version(2)
1898 .with_version(2)
1714 .build();
1899 .build();
1715
1900
1716 assert_eq!(get_version(&bytes), 2)
1901 assert_eq!(get_version(&bytes), 2)
1717 }
1902 }
1718 }
1903 }
1719
1904
1720 #[cfg(test)]
1905 #[cfg(test)]
1721 pub use tests::IndexEntryBuilder;
1906 pub use tests::IndexEntryBuilder;
General Comments 0
You need to be logged in to leave comments. Login now