##// END OF EJS Templates
rust: fix clippy lints...
Raphaël Gomès -
r53188:f90796d3 default
parent child Browse files
Show More
@@ -1,847 +1,846
1 // ancestors.rs
1 // ancestors.rs
2 //
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9
9
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 use crate::dagops;
11 use crate::dagops;
12 use std::cmp::max;
12 use std::cmp::max;
13 use std::collections::{BinaryHeap, HashSet};
13 use std::collections::{BinaryHeap, HashSet};
14
14
15 /// Iterator over the ancestors of a given list of revisions
15 /// Iterator over the ancestors of a given list of revisions
16 /// This is a generic type, defined and implemented for any Graph, so that
16 /// This is a generic type, defined and implemented for any Graph, so that
17 /// it's easy to
17 /// it's easy to
18 ///
18 ///
19 /// - unit test in pure Rust
19 /// - unit test in pure Rust
20 /// - bind to main Mercurial code, potentially in several ways and have these
20 /// - bind to main Mercurial code, potentially in several ways and have these
21 /// bindings evolve over time
21 /// bindings evolve over time
22 pub struct AncestorsIterator<G: Graph> {
22 pub struct AncestorsIterator<G: Graph> {
23 graph: G,
23 graph: G,
24 visit: BinaryHeap<Revision>,
24 visit: BinaryHeap<Revision>,
25 seen: HashSet<Revision>,
25 seen: HashSet<Revision>,
26 stoprev: Revision,
26 stoprev: Revision,
27 }
27 }
28
28
29 pub struct MissingAncestors<G: Graph> {
29 pub struct MissingAncestors<G: Graph> {
30 graph: G,
30 graph: G,
31 bases: HashSet<Revision>,
31 bases: HashSet<Revision>,
32 max_base: Revision,
32 max_base: Revision,
33 }
33 }
34
34
35 impl<G: Graph> AncestorsIterator<G> {
35 impl<G: Graph> AncestorsIterator<G> {
36 /// Constructor.
36 /// Constructor.
37 ///
37 ///
38 /// if `inclusive` is true, then the init revisions are emitted in
38 /// if `inclusive` is true, then the init revisions are emitted in
39 /// particular, otherwise iteration starts from their parents.
39 /// particular, otherwise iteration starts from their parents.
40 pub fn new(
40 pub fn new(
41 graph: G,
41 graph: G,
42 initrevs: impl IntoIterator<Item = Revision>,
42 initrevs: impl IntoIterator<Item = Revision>,
43 stoprev: Revision,
43 stoprev: Revision,
44 inclusive: bool,
44 inclusive: bool,
45 ) -> Result<Self, GraphError> {
45 ) -> Result<Self, GraphError> {
46 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
46 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
47 if inclusive {
47 if inclusive {
48 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
48 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
49 let seen = visit.iter().cloned().collect();
49 let seen = visit.iter().cloned().collect();
50 return Ok(AncestorsIterator {
50 return Ok(AncestorsIterator {
51 visit,
51 visit,
52 seen,
52 seen,
53 stoprev,
53 stoprev,
54 graph,
54 graph,
55 });
55 });
56 }
56 }
57 let mut this = AncestorsIterator {
57 let mut this = AncestorsIterator {
58 visit: BinaryHeap::new(),
58 visit: BinaryHeap::new(),
59 seen: HashSet::new(),
59 seen: HashSet::new(),
60 stoprev,
60 stoprev,
61 graph,
61 graph,
62 };
62 };
63 this.seen.insert(NULL_REVISION);
63 this.seen.insert(NULL_REVISION);
64 for rev in filtered_initrevs {
64 for rev in filtered_initrevs {
65 for parent in this.graph.parents(rev)?.iter().cloned() {
65 for parent in this.graph.parents(rev)?.iter().cloned() {
66 this.conditionally_push_rev(parent);
66 this.conditionally_push_rev(parent);
67 }
67 }
68 }
68 }
69 Ok(this)
69 Ok(this)
70 }
70 }
71
71
72 #[inline]
72 #[inline]
73 fn conditionally_push_rev(&mut self, rev: Revision) {
73 fn conditionally_push_rev(&mut self, rev: Revision) {
74 if self.stoprev <= rev && self.seen.insert(rev) {
74 if self.stoprev <= rev && self.seen.insert(rev) {
75 self.visit.push(rev);
75 self.visit.push(rev);
76 }
76 }
77 }
77 }
78
78
79 /// Consumes partially the iterator to tell if the given target
79 /// Consumes partially the iterator to tell if the given target
80 /// revision
80 /// revision
81 /// is in the ancestors it emits.
81 /// is in the ancestors it emits.
82 /// This is meant for iterators actually dedicated to that kind of
82 /// This is meant for iterators actually dedicated to that kind of
83 /// purpose
83 /// purpose
84 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
84 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
85 if self.seen.contains(&target) && target != NULL_REVISION {
85 if self.seen.contains(&target) && target != NULL_REVISION {
86 return Ok(true);
86 return Ok(true);
87 }
87 }
88 for item in self {
88 for item in self {
89 let rev = item?;
89 let rev = item?;
90 if rev == target {
90 if rev == target {
91 return Ok(true);
91 return Ok(true);
92 }
92 }
93 if rev < target {
93 if rev < target {
94 return Ok(false);
94 return Ok(false);
95 }
95 }
96 }
96 }
97 Ok(false)
97 Ok(false)
98 }
98 }
99
99
100 pub fn peek(&self) -> Option<Revision> {
100 pub fn peek(&self) -> Option<Revision> {
101 self.visit.peek().cloned()
101 self.visit.peek().cloned()
102 }
102 }
103
103
104 /// Tell if the iterator is about an empty set
104 /// Tell if the iterator is about an empty set
105 ///
105 ///
106 /// The result does not depend whether the iterator has been consumed
106 /// The result does not depend whether the iterator has been consumed
107 /// or not.
107 /// or not.
108 /// This is mostly meant for iterators backing a lazy ancestors set
108 /// This is mostly meant for iterators backing a lazy ancestors set
109 pub fn is_empty(&self) -> bool {
109 pub fn is_empty(&self) -> bool {
110 if self.visit.len() > 0 {
110 if self.visit.len() > 0 {
111 return false;
111 return false;
112 }
112 }
113 if self.seen.len() > 1 {
113 if self.seen.len() > 1 {
114 return false;
114 return false;
115 }
115 }
116 // at this point, the seen set is at most a singleton.
116 // at this point, the seen set is at most a singleton.
117 // If not `self.inclusive`, it's still possible that it has only
117 // If not `self.inclusive`, it's still possible that it has only
118 // the null revision
118 // the null revision
119 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
119 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
120 }
120 }
121 }
121 }
122
122
123 /// Main implementation for the iterator
123 /// Main implementation for the iterator
124 ///
124 ///
125 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
125 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
126 /// with a few non crucial differences:
126 /// with a few non crucial differences:
127 ///
127 ///
128 /// - there's no filtering of invalid parent revisions. Actually, it should be
128 /// - there's no filtering of invalid parent revisions. Actually, it should be
129 /// consistent and more efficient to filter them from the end caller.
129 /// consistent and more efficient to filter them from the end caller.
130 /// - we don't have the optimization for adjacent revisions (i.e., the case
130 /// - we don't have the optimization for adjacent revisions (i.e., the case
131 /// where `p1 == rev - 1`), because it amounts to update the first element of
131 /// where `p1 == rev - 1`), because it amounts to update the first element of
132 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
132 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
133 /// - we save a few pushes by comparing with `stoprev` before pushing
133 /// - we save a few pushes by comparing with `stoprev` before pushing
134 impl<G: Graph> Iterator for AncestorsIterator<G> {
134 impl<G: Graph> Iterator for AncestorsIterator<G> {
135 type Item = Result<Revision, GraphError>;
135 type Item = Result<Revision, GraphError>;
136
136
137 fn next(&mut self) -> Option<Self::Item> {
137 fn next(&mut self) -> Option<Self::Item> {
138 let current = match self.visit.peek() {
138 let current = match self.visit.peek() {
139 None => {
139 None => {
140 return None;
140 return None;
141 }
141 }
142 Some(c) => *c,
142 Some(c) => *c,
143 };
143 };
144 let [p1, p2] = match self.graph.parents(current) {
144 let [p1, p2] = match self.graph.parents(current) {
145 Ok(ps) => ps,
145 Ok(ps) => ps,
146 Err(e) => return Some(Err(e)),
146 Err(e) => return Some(Err(e)),
147 };
147 };
148 if p1 < self.stoprev || !self.seen.insert(p1) {
148 if p1 < self.stoprev || !self.seen.insert(p1) {
149 self.visit.pop();
149 self.visit.pop();
150 } else {
150 } else {
151 *(self.visit.peek_mut().unwrap()) = p1;
151 *(self.visit.peek_mut().unwrap()) = p1;
152 };
152 };
153
153
154 self.conditionally_push_rev(p2);
154 self.conditionally_push_rev(p2);
155 Some(Ok(current))
155 Some(Ok(current))
156 }
156 }
157 }
157 }
158
158
159 impl<G: Graph> MissingAncestors<G> {
159 impl<G: Graph> MissingAncestors<G> {
160 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
160 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
161 let mut created = MissingAncestors {
161 let mut created = MissingAncestors {
162 graph,
162 graph,
163 bases: HashSet::new(),
163 bases: HashSet::new(),
164 max_base: NULL_REVISION,
164 max_base: NULL_REVISION,
165 };
165 };
166 created.add_bases(bases);
166 created.add_bases(bases);
167 created
167 created
168 }
168 }
169
169
170 pub fn has_bases(&self) -> bool {
170 pub fn has_bases(&self) -> bool {
171 !self.bases.is_empty()
171 !self.bases.is_empty()
172 }
172 }
173
173
174 /// Return a reference to current bases.
174 /// Return a reference to current bases.
175 ///
175 ///
176 /// This is useful in unit tests, but also setdiscovery.py does
176 /// This is useful in unit tests, but also setdiscovery.py does
177 /// read the bases attribute of a ancestor.missingancestors instance.
177 /// read the bases attribute of a ancestor.missingancestors instance.
178 pub fn get_bases(&self) -> &HashSet<Revision> {
178 pub fn get_bases(&self) -> &HashSet<Revision> {
179 &self.bases
179 &self.bases
180 }
180 }
181
181
182 /// Computes the relative heads of current bases.
182 /// Computes the relative heads of current bases.
183 ///
183 ///
184 /// The object is still usable after this.
184 /// The object is still usable after this.
185 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
185 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
186 dagops::heads(&self.graph, self.bases.iter())
186 dagops::heads(&self.graph, self.bases.iter())
187 }
187 }
188
188
189 /// Consumes the object and returns the relative heads of its bases.
189 /// Consumes the object and returns the relative heads of its bases.
190 pub fn into_bases_heads(
190 pub fn into_bases_heads(
191 mut self,
191 mut self,
192 ) -> Result<HashSet<Revision>, GraphError> {
192 ) -> Result<HashSet<Revision>, GraphError> {
193 dagops::retain_heads(&self.graph, &mut self.bases)?;
193 dagops::retain_heads(&self.graph, &mut self.bases)?;
194 Ok(self.bases)
194 Ok(self.bases)
195 }
195 }
196
196
197 /// Add some revisions to `self.bases`
197 /// Add some revisions to `self.bases`
198 ///
198 ///
199 /// Takes care of keeping `self.max_base` up to date.
199 /// Takes care of keeping `self.max_base` up to date.
200 pub fn add_bases(
200 pub fn add_bases(
201 &mut self,
201 &mut self,
202 new_bases: impl IntoIterator<Item = Revision>,
202 new_bases: impl IntoIterator<Item = Revision>,
203 ) {
203 ) {
204 let mut max_base = self.max_base;
204 let mut max_base = self.max_base;
205 self.bases.extend(
205 self.bases.extend(
206 new_bases
206 new_bases
207 .into_iter()
207 .into_iter()
208 .filter(|&rev| rev != NULL_REVISION)
208 .filter(|&rev| rev != NULL_REVISION)
209 .map(|r| {
209 .inspect(|&r| {
210 if r > max_base {
210 if r > max_base {
211 max_base = r;
211 max_base = r;
212 }
212 }
213 r
214 }),
213 }),
215 );
214 );
216 self.max_base = max_base;
215 self.max_base = max_base;
217 }
216 }
218
217
219 /// Remove all ancestors of self.bases from the revs set (in place)
218 /// Remove all ancestors of self.bases from the revs set (in place)
220 pub fn remove_ancestors_from(
219 pub fn remove_ancestors_from(
221 &mut self,
220 &mut self,
222 revs: &mut HashSet<Revision>,
221 revs: &mut HashSet<Revision>,
223 ) -> Result<(), GraphError> {
222 ) -> Result<(), GraphError> {
224 revs.retain(|r| !self.bases.contains(r));
223 revs.retain(|r| !self.bases.contains(r));
225 // the null revision is always an ancestor. Logically speaking
224 // the null revision is always an ancestor. Logically speaking
226 // it's debatable in case bases is empty, but the Python
225 // it's debatable in case bases is empty, but the Python
227 // implementation always adds NULL_REVISION to bases, making it
226 // implementation always adds NULL_REVISION to bases, making it
228 // unconditionnally true.
227 // unconditionnally true.
229 revs.remove(&NULL_REVISION);
228 revs.remove(&NULL_REVISION);
230 if revs.is_empty() {
229 if revs.is_empty() {
231 return Ok(());
230 return Ok(());
232 }
231 }
233 // anything in revs > start is definitely not an ancestor of bases
232 // anything in revs > start is definitely not an ancestor of bases
234 // revs <= start need to be investigated
233 // revs <= start need to be investigated
235 if self.max_base == NULL_REVISION {
234 if self.max_base == NULL_REVISION {
236 return Ok(());
235 return Ok(());
237 }
236 }
238
237
239 // whatever happens, we'll keep at least keepcount of them
238 // whatever happens, we'll keep at least keepcount of them
240 // knowing this gives us a earlier stop condition than
239 // knowing this gives us a earlier stop condition than
241 // going all the way to the root
240 // going all the way to the root
242 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
241 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
243
242
244 let mut curr = self.max_base;
243 let mut curr = self.max_base;
245 while curr != NULL_REVISION && revs.len() > keepcount {
244 while curr != NULL_REVISION && revs.len() > keepcount {
246 if self.bases.contains(&curr) {
245 if self.bases.contains(&curr) {
247 revs.remove(&curr);
246 revs.remove(&curr);
248 self.add_parents(curr)?;
247 self.add_parents(curr)?;
249 }
248 }
250 // We know this revision is safe because we've checked the bounds
249 // We know this revision is safe because we've checked the bounds
251 // before.
250 // before.
252 curr = Revision(curr.0 - 1);
251 curr = Revision(curr.0 - 1);
253 }
252 }
254 Ok(())
253 Ok(())
255 }
254 }
256
255
257 /// Add the parents of `rev` to `self.bases`
256 /// Add the parents of `rev` to `self.bases`
258 ///
257 ///
259 /// This has no effect on `self.max_base`
258 /// This has no effect on `self.max_base`
260 #[inline]
259 #[inline]
261 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
260 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
262 if rev == NULL_REVISION {
261 if rev == NULL_REVISION {
263 return Ok(());
262 return Ok(());
264 }
263 }
265 for p in self.graph.parents(rev)?.iter().cloned() {
264 for p in self.graph.parents(rev)?.iter().cloned() {
266 // No need to bother the set with inserting NULL_REVISION over and
265 // No need to bother the set with inserting NULL_REVISION over and
267 // over
266 // over
268 if p != NULL_REVISION {
267 if p != NULL_REVISION {
269 self.bases.insert(p);
268 self.bases.insert(p);
270 }
269 }
271 }
270 }
272 Ok(())
271 Ok(())
273 }
272 }
274
273
275 /// Return all the ancestors of revs that are not ancestors of self.bases
274 /// Return all the ancestors of revs that are not ancestors of self.bases
276 ///
275 ///
277 /// This may include elements from revs.
276 /// This may include elements from revs.
278 ///
277 ///
279 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
278 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
280 /// revision number order, which is a topological order.
279 /// revision number order, which is a topological order.
281 pub fn missing_ancestors(
280 pub fn missing_ancestors(
282 &mut self,
281 &mut self,
283 revs: impl IntoIterator<Item = Revision>,
282 revs: impl IntoIterator<Item = Revision>,
284 ) -> Result<Vec<Revision>, GraphError> {
283 ) -> Result<Vec<Revision>, GraphError> {
285 // just for convenience and comparison with Python version
284 // just for convenience and comparison with Python version
286 let bases_visit = &mut self.bases;
285 let bases_visit = &mut self.bases;
287 let mut revs: HashSet<Revision> = revs
286 let mut revs: HashSet<Revision> = revs
288 .into_iter()
287 .into_iter()
289 .filter(|r| !bases_visit.contains(r))
288 .filter(|r| !bases_visit.contains(r))
290 .collect();
289 .collect();
291 let revs_visit = &mut revs;
290 let revs_visit = &mut revs;
292 let mut both_visit: HashSet<Revision> =
291 let mut both_visit: HashSet<Revision> =
293 revs_visit.intersection(bases_visit).cloned().collect();
292 revs_visit.intersection(bases_visit).cloned().collect();
294 if revs_visit.is_empty() {
293 if revs_visit.is_empty() {
295 return Ok(Vec::new());
294 return Ok(Vec::new());
296 }
295 }
297 let max_revs = revs_visit.iter().cloned().max().unwrap();
296 let max_revs = revs_visit.iter().cloned().max().unwrap();
298 let start = max(self.max_base, max_revs);
297 let start = max(self.max_base, max_revs);
299
298
300 // TODO heuristics for with_capacity()?
299 // TODO heuristics for with_capacity()?
301 let mut missing: Vec<Revision> = Vec::new();
300 let mut missing: Vec<Revision> = Vec::new();
302 for curr in (0..=start.0).rev() {
301 for curr in (0..=start.0).rev() {
303 if revs_visit.is_empty() {
302 if revs_visit.is_empty() {
304 break;
303 break;
305 }
304 }
306 if both_visit.remove(&Revision(curr)) {
305 if both_visit.remove(&Revision(curr)) {
307 // curr's parents might have made it into revs_visit through
306 // curr's parents might have made it into revs_visit through
308 // another path
307 // another path
309 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
308 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
310 if p == NULL_REVISION {
309 if p == NULL_REVISION {
311 continue;
310 continue;
312 }
311 }
313 revs_visit.remove(&p);
312 revs_visit.remove(&p);
314 bases_visit.insert(p);
313 bases_visit.insert(p);
315 both_visit.insert(p);
314 both_visit.insert(p);
316 }
315 }
317 } else if revs_visit.remove(&Revision(curr)) {
316 } else if revs_visit.remove(&Revision(curr)) {
318 missing.push(Revision(curr));
317 missing.push(Revision(curr));
319 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
318 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
320 if p == NULL_REVISION {
319 if p == NULL_REVISION {
321 continue;
320 continue;
322 }
321 }
323 if bases_visit.contains(&p) {
322 if bases_visit.contains(&p) {
324 // p is already known to be an ancestor of revs_visit
323 // p is already known to be an ancestor of revs_visit
325 revs_visit.remove(&p);
324 revs_visit.remove(&p);
326 both_visit.insert(p);
325 both_visit.insert(p);
327 } else if both_visit.contains(&p) {
326 } else if both_visit.contains(&p) {
328 // p should have been in bases_visit
327 // p should have been in bases_visit
329 revs_visit.remove(&p);
328 revs_visit.remove(&p);
330 bases_visit.insert(p);
329 bases_visit.insert(p);
331 } else {
330 } else {
332 // visit later
331 // visit later
333 revs_visit.insert(p);
332 revs_visit.insert(p);
334 }
333 }
335 }
334 }
336 } else if bases_visit.contains(&Revision(curr)) {
335 } else if bases_visit.contains(&Revision(curr)) {
337 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
336 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
338 if p == NULL_REVISION {
337 if p == NULL_REVISION {
339 continue;
338 continue;
340 }
339 }
341 if revs_visit.remove(&p) || both_visit.contains(&p) {
340 if revs_visit.remove(&p) || both_visit.contains(&p) {
342 // p is an ancestor of bases_visit, and is implicitly
341 // p is an ancestor of bases_visit, and is implicitly
343 // in revs_visit, which means p is ::revs & ::bases.
342 // in revs_visit, which means p is ::revs & ::bases.
344 bases_visit.insert(p);
343 bases_visit.insert(p);
345 both_visit.insert(p);
344 both_visit.insert(p);
346 } else {
345 } else {
347 bases_visit.insert(p);
346 bases_visit.insert(p);
348 }
347 }
349 }
348 }
350 }
349 }
351 }
350 }
352 missing.reverse();
351 missing.reverse();
353 Ok(missing)
352 Ok(missing)
354 }
353 }
355 }
354 }
356
355
357 #[cfg(test)]
356 #[cfg(test)]
358 mod tests {
357 mod tests {
359
358
360 use super::*;
359 use super::*;
361 use crate::{
360 use crate::{
362 testing::{SampleGraph, VecGraph},
361 testing::{SampleGraph, VecGraph},
363 BaseRevision,
362 BaseRevision,
364 };
363 };
365
364
366 impl From<BaseRevision> for Revision {
365 impl From<BaseRevision> for Revision {
367 fn from(value: BaseRevision) -> Self {
366 fn from(value: BaseRevision) -> Self {
368 if !cfg!(test) {
367 if !cfg!(test) {
369 panic!("should only be used in tests")
368 panic!("should only be used in tests")
370 }
369 }
371 Revision(value)
370 Revision(value)
372 }
371 }
373 }
372 }
374
373
375 impl PartialEq<BaseRevision> for Revision {
374 impl PartialEq<BaseRevision> for Revision {
376 fn eq(&self, other: &BaseRevision) -> bool {
375 fn eq(&self, other: &BaseRevision) -> bool {
377 if !cfg!(test) {
376 if !cfg!(test) {
378 panic!("should only be used in tests")
377 panic!("should only be used in tests")
379 }
378 }
380 self.0.eq(other)
379 self.0.eq(other)
381 }
380 }
382 }
381 }
383
382
384 impl PartialEq<u32> for Revision {
383 impl PartialEq<u32> for Revision {
385 fn eq(&self, other: &u32) -> bool {
384 fn eq(&self, other: &u32) -> bool {
386 if !cfg!(test) {
385 if !cfg!(test) {
387 panic!("should only be used in tests")
386 panic!("should only be used in tests")
388 }
387 }
389 let check: Result<u32, _> = self.0.try_into();
388 let check: Result<u32, _> = self.0.try_into();
390 match check {
389 match check {
391 Ok(value) => value.eq(other),
390 Ok(value) => value.eq(other),
392 Err(_) => false,
391 Err(_) => false,
393 }
392 }
394 }
393 }
395 }
394 }
396
395
397 fn list_ancestors<G: Graph>(
396 fn list_ancestors<G: Graph>(
398 graph: G,
397 graph: G,
399 initrevs: Vec<Revision>,
398 initrevs: Vec<Revision>,
400 stoprev: Revision,
399 stoprev: Revision,
401 inclusive: bool,
400 inclusive: bool,
402 ) -> Vec<Revision> {
401 ) -> Vec<Revision> {
403 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
402 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
404 .unwrap()
403 .unwrap()
405 .map(|res| res.unwrap())
404 .map(|res| res.unwrap())
406 .collect()
405 .collect()
407 }
406 }
408
407
409 #[test]
408 #[test]
410 /// Same tests as test-ancestor.py, without membership
409 /// Same tests as test-ancestor.py, without membership
411 /// (see also test-ancestor.py.out)
410 /// (see also test-ancestor.py.out)
412 fn test_list_ancestor() {
411 fn test_list_ancestor() {
413 assert_eq!(
412 assert_eq!(
414 list_ancestors(SampleGraph, vec![], 0.into(), false),
413 list_ancestors(SampleGraph, vec![], 0.into(), false),
415 Vec::<Revision>::new()
414 Vec::<Revision>::new()
416 );
415 );
417 assert_eq!(
416 assert_eq!(
418 list_ancestors(
417 list_ancestors(
419 SampleGraph,
418 SampleGraph,
420 vec![11.into(), 13.into()],
419 vec![11.into(), 13.into()],
421 0.into(),
420 0.into(),
422 false
421 false
423 ),
422 ),
424 vec![8, 7, 4, 3, 2, 1, 0]
423 vec![8, 7, 4, 3, 2, 1, 0]
425 );
424 );
426 // it works as well on references, because &Graph implements Graph
425 // it works as well on references, because &Graph implements Graph
427 // this is needed as of this writing by RHGitaly
426 // this is needed as of this writing by RHGitaly
428 assert_eq!(
427 assert_eq!(
429 list_ancestors(
428 list_ancestors(
430 &SampleGraph,
429 &SampleGraph,
431 vec![11.into(), 13.into()],
430 vec![11.into(), 13.into()],
432 0.into(),
431 0.into(),
433 false
432 false
434 ),
433 ),
435 vec![8, 7, 4, 3, 2, 1, 0]
434 vec![8, 7, 4, 3, 2, 1, 0]
436 );
435 );
437
436
438 assert_eq!(
437 assert_eq!(
439 list_ancestors(
438 list_ancestors(
440 SampleGraph,
439 SampleGraph,
441 vec![1.into(), 3.into()],
440 vec![1.into(), 3.into()],
442 0.into(),
441 0.into(),
443 false
442 false
444 ),
443 ),
445 vec![1, 0]
444 vec![1, 0]
446 );
445 );
447 assert_eq!(
446 assert_eq!(
448 list_ancestors(
447 list_ancestors(
449 SampleGraph,
448 SampleGraph,
450 vec![11.into(), 13.into()],
449 vec![11.into(), 13.into()],
451 0.into(),
450 0.into(),
452 true
451 true
453 ),
452 ),
454 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
453 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
455 );
454 );
456 assert_eq!(
455 assert_eq!(
457 list_ancestors(
456 list_ancestors(
458 SampleGraph,
457 SampleGraph,
459 vec![11.into(), 13.into()],
458 vec![11.into(), 13.into()],
460 6.into(),
459 6.into(),
461 false
460 false
462 ),
461 ),
463 vec![8, 7]
462 vec![8, 7]
464 );
463 );
465 assert_eq!(
464 assert_eq!(
466 list_ancestors(
465 list_ancestors(
467 SampleGraph,
466 SampleGraph,
468 vec![11.into(), 13.into()],
467 vec![11.into(), 13.into()],
469 6.into(),
468 6.into(),
470 true
469 true
471 ),
470 ),
472 vec![13, 11, 8, 7]
471 vec![13, 11, 8, 7]
473 );
472 );
474 assert_eq!(
473 assert_eq!(
475 list_ancestors(
474 list_ancestors(
476 SampleGraph,
475 SampleGraph,
477 vec![11.into(), 13.into()],
476 vec![11.into(), 13.into()],
478 11.into(),
477 11.into(),
479 true
478 true
480 ),
479 ),
481 vec![13, 11]
480 vec![13, 11]
482 );
481 );
483 assert_eq!(
482 assert_eq!(
484 list_ancestors(
483 list_ancestors(
485 SampleGraph,
484 SampleGraph,
486 vec![11.into(), 13.into()],
485 vec![11.into(), 13.into()],
487 12.into(),
486 12.into(),
488 true
487 true
489 ),
488 ),
490 vec![13]
489 vec![13]
491 );
490 );
492 assert_eq!(
491 assert_eq!(
493 list_ancestors(
492 list_ancestors(
494 SampleGraph,
493 SampleGraph,
495 vec![10.into(), 1.into()],
494 vec![10.into(), 1.into()],
496 0.into(),
495 0.into(),
497 true
496 true
498 ),
497 ),
499 vec![10, 5, 4, 2, 1, 0]
498 vec![10, 5, 4, 2, 1, 0]
500 );
499 );
501 }
500 }
502
501
503 #[test]
502 #[test]
504 /// Corner case that's not directly in test-ancestors.py, but
503 /// Corner case that's not directly in test-ancestors.py, but
505 /// that happens quite often, as demonstrated by running the whole
504 /// that happens quite often, as demonstrated by running the whole
506 /// suite.
505 /// suite.
507 /// For instance, run tests/test-obsolete-checkheads.t
506 /// For instance, run tests/test-obsolete-checkheads.t
508 fn test_nullrev_input() {
507 fn test_nullrev_input() {
509 let mut iter = AncestorsIterator::new(
508 let mut iter = AncestorsIterator::new(
510 SampleGraph,
509 SampleGraph,
511 vec![Revision(-1)],
510 vec![Revision(-1)],
512 0.into(),
511 0.into(),
513 false,
512 false,
514 )
513 )
515 .unwrap();
514 .unwrap();
516 assert_eq!(iter.next(), None)
515 assert_eq!(iter.next(), None)
517 }
516 }
518
517
519 #[test]
518 #[test]
520 fn test_contains() {
519 fn test_contains() {
521 let mut lazy = AncestorsIterator::new(
520 let mut lazy = AncestorsIterator::new(
522 SampleGraph,
521 SampleGraph,
523 vec![10.into(), 1.into()],
522 vec![10.into(), 1.into()],
524 0.into(),
523 0.into(),
525 true,
524 true,
526 )
525 )
527 .unwrap();
526 .unwrap();
528 assert!(lazy.contains(1.into()).unwrap());
527 assert!(lazy.contains(1.into()).unwrap());
529 assert!(!lazy.contains(3.into()).unwrap());
528 assert!(!lazy.contains(3.into()).unwrap());
530
529
531 let mut lazy = AncestorsIterator::new(
530 let mut lazy = AncestorsIterator::new(
532 SampleGraph,
531 SampleGraph,
533 vec![0.into()],
532 vec![0.into()],
534 0.into(),
533 0.into(),
535 false,
534 false,
536 )
535 )
537 .unwrap();
536 .unwrap();
538 assert!(!lazy.contains(NULL_REVISION).unwrap());
537 assert!(!lazy.contains(NULL_REVISION).unwrap());
539 }
538 }
540
539
541 #[test]
540 #[test]
542 fn test_peek() {
541 fn test_peek() {
543 let mut iter = AncestorsIterator::new(
542 let mut iter = AncestorsIterator::new(
544 SampleGraph,
543 SampleGraph,
545 vec![10.into()],
544 vec![10.into()],
546 0.into(),
545 0.into(),
547 true,
546 true,
548 )
547 )
549 .unwrap();
548 .unwrap();
550 // peek() gives us the next value
549 // peek() gives us the next value
551 assert_eq!(iter.peek(), Some(10.into()));
550 assert_eq!(iter.peek(), Some(10.into()));
552 // but it's not been consumed
551 // but it's not been consumed
553 assert_eq!(iter.next(), Some(Ok(10.into())));
552 assert_eq!(iter.next(), Some(Ok(10.into())));
554 // and iteration resumes normally
553 // and iteration resumes normally
555 assert_eq!(iter.next(), Some(Ok(5.into())));
554 assert_eq!(iter.next(), Some(Ok(5.into())));
556
555
557 // let's drain the iterator to test peek() at the end
556 // let's drain the iterator to test peek() at the end
558 while iter.next().is_some() {}
557 while iter.next().is_some() {}
559 assert_eq!(iter.peek(), None);
558 assert_eq!(iter.peek(), None);
560 }
559 }
561
560
562 #[test]
561 #[test]
563 fn test_empty() {
562 fn test_empty() {
564 let mut iter = AncestorsIterator::new(
563 let mut iter = AncestorsIterator::new(
565 SampleGraph,
564 SampleGraph,
566 vec![10.into()],
565 vec![10.into()],
567 0.into(),
566 0.into(),
568 true,
567 true,
569 )
568 )
570 .unwrap();
569 .unwrap();
571 assert!(!iter.is_empty());
570 assert!(!iter.is_empty());
572 while iter.next().is_some() {}
571 while iter.next().is_some() {}
573 assert!(!iter.is_empty());
572 assert!(!iter.is_empty());
574
573
575 let iter = AncestorsIterator::new(SampleGraph, vec![], 0.into(), true)
574 let iter = AncestorsIterator::new(SampleGraph, vec![], 0.into(), true)
576 .unwrap();
575 .unwrap();
577 assert!(iter.is_empty());
576 assert!(iter.is_empty());
578
577
579 // case where iter.seen == {NULL_REVISION}
578 // case where iter.seen == {NULL_REVISION}
580 let iter = AncestorsIterator::new(
579 let iter = AncestorsIterator::new(
581 SampleGraph,
580 SampleGraph,
582 vec![0.into()],
581 vec![0.into()],
583 0.into(),
582 0.into(),
584 false,
583 false,
585 )
584 )
586 .unwrap();
585 .unwrap();
587 assert!(iter.is_empty());
586 assert!(iter.is_empty());
588 }
587 }
589
588
590 /// A corrupted Graph, supporting error handling tests
589 /// A corrupted Graph, supporting error handling tests
591 #[derive(Clone, Debug)]
590 #[derive(Clone, Debug)]
592 struct Corrupted;
591 struct Corrupted;
593
592
594 impl Graph for Corrupted {
593 impl Graph for Corrupted {
595 // FIXME what to do about this? Are we just not supposed to get them
594 // FIXME what to do about this? Are we just not supposed to get them
596 // anymore?
595 // anymore?
597 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
596 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
598 match rev {
597 match rev {
599 Revision(1) => Ok([0.into(), (-1).into()]),
598 Revision(1) => Ok([0.into(), (-1).into()]),
600 r => Err(GraphError::ParentOutOfRange(r)),
599 r => Err(GraphError::ParentOutOfRange(r)),
601 }
600 }
602 }
601 }
603 }
602 }
604
603
605 #[test]
604 #[test]
606 fn test_initrev_out_of_range() {
605 fn test_initrev_out_of_range() {
607 // inclusive=false looks up initrev's parents right away
606 // inclusive=false looks up initrev's parents right away
608 match AncestorsIterator::new(
607 match AncestorsIterator::new(
609 SampleGraph,
608 SampleGraph,
610 vec![25.into()],
609 vec![25.into()],
611 0.into(),
610 0.into(),
612 false,
611 false,
613 ) {
612 ) {
614 Ok(_) => panic!("Should have been ParentOutOfRange"),
613 Ok(_) => panic!("Should have been ParentOutOfRange"),
615 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25.into())),
614 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25.into())),
616 }
615 }
617 }
616 }
618
617
619 #[test]
618 #[test]
620 fn test_next_out_of_range() {
619 fn test_next_out_of_range() {
621 // inclusive=false looks up initrev's parents right away
620 // inclusive=false looks up initrev's parents right away
622 let mut iter =
621 let mut iter =
623 AncestorsIterator::new(Corrupted, vec![1.into()], 0.into(), false)
622 AncestorsIterator::new(Corrupted, vec![1.into()], 0.into(), false)
624 .unwrap();
623 .unwrap();
625 assert_eq!(
624 assert_eq!(
626 iter.next(),
625 iter.next(),
627 Some(Err(GraphError::ParentOutOfRange(0.into())))
626 Some(Err(GraphError::ParentOutOfRange(0.into())))
628 );
627 );
629 }
628 }
630
629
631 #[test]
630 #[test]
632 /// Test constructor, add/get bases and heads
631 /// Test constructor, add/get bases and heads
633 fn test_missing_bases() -> Result<(), GraphError> {
632 fn test_missing_bases() -> Result<(), GraphError> {
634 let mut missing_ancestors = MissingAncestors::new(
633 let mut missing_ancestors = MissingAncestors::new(
635 SampleGraph,
634 SampleGraph,
636 [5.into(), 3.into(), 1.into(), 3.into()].iter().cloned(),
635 [5.into(), 3.into(), 1.into(), 3.into()].iter().cloned(),
637 );
636 );
638 let mut as_vec: Vec<Revision> =
637 let mut as_vec: Vec<Revision> =
639 missing_ancestors.get_bases().iter().cloned().collect();
638 missing_ancestors.get_bases().iter().cloned().collect();
640 as_vec.sort_unstable();
639 as_vec.sort_unstable();
641 assert_eq!(as_vec, [1, 3, 5]);
640 assert_eq!(as_vec, [1, 3, 5]);
642 assert_eq!(missing_ancestors.max_base, 5);
641 assert_eq!(missing_ancestors.max_base, 5);
643
642
644 missing_ancestors
643 missing_ancestors
645 .add_bases([3.into(), 7.into(), 8.into()].iter().cloned());
644 .add_bases([3.into(), 7.into(), 8.into()].iter().cloned());
646 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
645 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
647 as_vec.sort_unstable();
646 as_vec.sort_unstable();
648 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
647 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
649 assert_eq!(missing_ancestors.max_base, 8);
648 assert_eq!(missing_ancestors.max_base, 8);
650
649
651 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
650 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
652 as_vec.sort_unstable();
651 as_vec.sort_unstable();
653 assert_eq!(as_vec, [3, 5, 7, 8]);
652 assert_eq!(as_vec, [3, 5, 7, 8]);
654 Ok(())
653 Ok(())
655 }
654 }
656
655
657 fn assert_missing_remove(
656 fn assert_missing_remove(
658 bases: &[BaseRevision],
657 bases: &[BaseRevision],
659 revs: &[BaseRevision],
658 revs: &[BaseRevision],
660 expected: &[BaseRevision],
659 expected: &[BaseRevision],
661 ) {
660 ) {
662 let mut missing_ancestors = MissingAncestors::new(
661 let mut missing_ancestors = MissingAncestors::new(
663 SampleGraph,
662 SampleGraph,
664 bases.iter().map(|r| Revision(*r)),
663 bases.iter().map(|r| Revision(*r)),
665 );
664 );
666 let mut revset: HashSet<Revision> =
665 let mut revset: HashSet<Revision> =
667 revs.iter().map(|r| Revision(*r)).collect();
666 revs.iter().map(|r| Revision(*r)).collect();
668 missing_ancestors
667 missing_ancestors
669 .remove_ancestors_from(&mut revset)
668 .remove_ancestors_from(&mut revset)
670 .unwrap();
669 .unwrap();
671 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
670 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
672 as_vec.sort_unstable();
671 as_vec.sort_unstable();
673 assert_eq!(as_vec.as_slice(), expected);
672 assert_eq!(as_vec.as_slice(), expected);
674 }
673 }
675
674
676 #[test]
675 #[test]
677 fn test_missing_remove() {
676 fn test_missing_remove() {
678 assert_missing_remove(
677 assert_missing_remove(
679 &[1, 2, 3, 4, 7],
678 &[1, 2, 3, 4, 7],
680 Vec::from_iter(1..10).as_slice(),
679 Vec::from_iter(1..10).as_slice(),
681 &[5, 6, 8, 9],
680 &[5, 6, 8, 9],
682 );
681 );
683 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
682 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
684 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
683 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
685 }
684 }
686
685
687 fn assert_missing_ancestors(
686 fn assert_missing_ancestors(
688 bases: &[BaseRevision],
687 bases: &[BaseRevision],
689 revs: &[BaseRevision],
688 revs: &[BaseRevision],
690 expected: &[BaseRevision],
689 expected: &[BaseRevision],
691 ) {
690 ) {
692 let mut missing_ancestors = MissingAncestors::new(
691 let mut missing_ancestors = MissingAncestors::new(
693 SampleGraph,
692 SampleGraph,
694 bases.iter().map(|r| Revision(*r)),
693 bases.iter().map(|r| Revision(*r)),
695 );
694 );
696 let missing = missing_ancestors
695 let missing = missing_ancestors
697 .missing_ancestors(revs.iter().map(|r| Revision(*r)))
696 .missing_ancestors(revs.iter().map(|r| Revision(*r)))
698 .unwrap();
697 .unwrap();
699 assert_eq!(missing.as_slice(), expected);
698 assert_eq!(missing.as_slice(), expected);
700 }
699 }
701
700
702 #[test]
701 #[test]
703 fn test_missing_ancestors() {
702 fn test_missing_ancestors() {
704 // examples taken from test-ancestors.py by having it run
703 // examples taken from test-ancestors.py by having it run
705 // on the same graph (both naive and fast Python algs)
704 // on the same graph (both naive and fast Python algs)
706 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
705 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
707 assert_missing_ancestors(&[11], &[10], &[5, 10]);
706 assert_missing_ancestors(&[11], &[10], &[5, 10]);
708 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
707 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
709 }
708 }
710
709
711 /// An interesting case found by a random generator similar to
710 /// An interesting case found by a random generator similar to
712 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
711 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
713 /// failed this, yet none of the integration tests of the whole suite
712 /// failed this, yet none of the integration tests of the whole suite
714 /// catched it.
713 /// catched it.
715 #[allow(clippy::unnecessary_cast)]
714 #[allow(clippy::unnecessary_cast)]
716 #[test]
715 #[test]
717 fn test_remove_ancestors_from_case1() {
716 fn test_remove_ancestors_from_case1() {
718 const FAKE_NULL_REVISION: BaseRevision = -1;
717 const FAKE_NULL_REVISION: BaseRevision = -1;
719 assert_eq!(FAKE_NULL_REVISION, NULL_REVISION.0);
718 assert_eq!(FAKE_NULL_REVISION, NULL_REVISION.0);
720 let graph: VecGraph = vec![
719 let graph: VecGraph = vec![
721 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
720 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
722 [0, FAKE_NULL_REVISION],
721 [0, FAKE_NULL_REVISION],
723 [1, 0],
722 [1, 0],
724 [2, 1],
723 [2, 1],
725 [3, FAKE_NULL_REVISION],
724 [3, FAKE_NULL_REVISION],
726 [4, FAKE_NULL_REVISION],
725 [4, FAKE_NULL_REVISION],
727 [5, 1],
726 [5, 1],
728 [2, FAKE_NULL_REVISION],
727 [2, FAKE_NULL_REVISION],
729 [7, FAKE_NULL_REVISION],
728 [7, FAKE_NULL_REVISION],
730 [8, FAKE_NULL_REVISION],
729 [8, FAKE_NULL_REVISION],
731 [9, FAKE_NULL_REVISION],
730 [9, FAKE_NULL_REVISION],
732 [10, 1],
731 [10, 1],
733 [3, FAKE_NULL_REVISION],
732 [3, FAKE_NULL_REVISION],
734 [12, FAKE_NULL_REVISION],
733 [12, FAKE_NULL_REVISION],
735 [13, FAKE_NULL_REVISION],
734 [13, FAKE_NULL_REVISION],
736 [14, FAKE_NULL_REVISION],
735 [14, FAKE_NULL_REVISION],
737 [4, FAKE_NULL_REVISION],
736 [4, FAKE_NULL_REVISION],
738 [16, FAKE_NULL_REVISION],
737 [16, FAKE_NULL_REVISION],
739 [17, FAKE_NULL_REVISION],
738 [17, FAKE_NULL_REVISION],
740 [18, FAKE_NULL_REVISION],
739 [18, FAKE_NULL_REVISION],
741 [19, 11],
740 [19, 11],
742 [20, FAKE_NULL_REVISION],
741 [20, FAKE_NULL_REVISION],
743 [21, FAKE_NULL_REVISION],
742 [21, FAKE_NULL_REVISION],
744 [22, FAKE_NULL_REVISION],
743 [22, FAKE_NULL_REVISION],
745 [23, FAKE_NULL_REVISION],
744 [23, FAKE_NULL_REVISION],
746 [2, FAKE_NULL_REVISION],
745 [2, FAKE_NULL_REVISION],
747 [3, FAKE_NULL_REVISION],
746 [3, FAKE_NULL_REVISION],
748 [26, 24],
747 [26, 24],
749 [27, FAKE_NULL_REVISION],
748 [27, FAKE_NULL_REVISION],
750 [28, FAKE_NULL_REVISION],
749 [28, FAKE_NULL_REVISION],
751 [12, FAKE_NULL_REVISION],
750 [12, FAKE_NULL_REVISION],
752 [1, FAKE_NULL_REVISION],
751 [1, FAKE_NULL_REVISION],
753 [1, 9],
752 [1, 9],
754 [32, FAKE_NULL_REVISION],
753 [32, FAKE_NULL_REVISION],
755 [33, FAKE_NULL_REVISION],
754 [33, FAKE_NULL_REVISION],
756 [34, 31],
755 [34, 31],
757 [35, FAKE_NULL_REVISION],
756 [35, FAKE_NULL_REVISION],
758 [36, 26],
757 [36, 26],
759 [37, FAKE_NULL_REVISION],
758 [37, FAKE_NULL_REVISION],
760 [38, FAKE_NULL_REVISION],
759 [38, FAKE_NULL_REVISION],
761 [39, FAKE_NULL_REVISION],
760 [39, FAKE_NULL_REVISION],
762 [40, FAKE_NULL_REVISION],
761 [40, FAKE_NULL_REVISION],
763 [41, FAKE_NULL_REVISION],
762 [41, FAKE_NULL_REVISION],
764 [42, 26],
763 [42, 26],
765 [0, FAKE_NULL_REVISION],
764 [0, FAKE_NULL_REVISION],
766 [44, FAKE_NULL_REVISION],
765 [44, FAKE_NULL_REVISION],
767 [45, 4],
766 [45, 4],
768 [40, FAKE_NULL_REVISION],
767 [40, FAKE_NULL_REVISION],
769 [47, FAKE_NULL_REVISION],
768 [47, FAKE_NULL_REVISION],
770 [36, 0],
769 [36, 0],
771 [49, FAKE_NULL_REVISION],
770 [49, FAKE_NULL_REVISION],
772 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
771 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
773 [51, FAKE_NULL_REVISION],
772 [51, FAKE_NULL_REVISION],
774 [52, FAKE_NULL_REVISION],
773 [52, FAKE_NULL_REVISION],
775 [53, FAKE_NULL_REVISION],
774 [53, FAKE_NULL_REVISION],
776 [14, FAKE_NULL_REVISION],
775 [14, FAKE_NULL_REVISION],
777 [55, FAKE_NULL_REVISION],
776 [55, FAKE_NULL_REVISION],
778 [15, FAKE_NULL_REVISION],
777 [15, FAKE_NULL_REVISION],
779 [23, FAKE_NULL_REVISION],
778 [23, FAKE_NULL_REVISION],
780 [58, FAKE_NULL_REVISION],
779 [58, FAKE_NULL_REVISION],
781 [59, FAKE_NULL_REVISION],
780 [59, FAKE_NULL_REVISION],
782 [2, FAKE_NULL_REVISION],
781 [2, FAKE_NULL_REVISION],
783 [61, 59],
782 [61, 59],
784 [62, FAKE_NULL_REVISION],
783 [62, FAKE_NULL_REVISION],
785 [63, FAKE_NULL_REVISION],
784 [63, FAKE_NULL_REVISION],
786 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
785 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
787 [65, FAKE_NULL_REVISION],
786 [65, FAKE_NULL_REVISION],
788 [66, FAKE_NULL_REVISION],
787 [66, FAKE_NULL_REVISION],
789 [67, FAKE_NULL_REVISION],
788 [67, FAKE_NULL_REVISION],
790 [68, FAKE_NULL_REVISION],
789 [68, FAKE_NULL_REVISION],
791 [37, 28],
790 [37, 28],
792 [69, 25],
791 [69, 25],
793 [71, FAKE_NULL_REVISION],
792 [71, FAKE_NULL_REVISION],
794 [72, FAKE_NULL_REVISION],
793 [72, FAKE_NULL_REVISION],
795 [50, 2],
794 [50, 2],
796 [74, FAKE_NULL_REVISION],
795 [74, FAKE_NULL_REVISION],
797 [12, FAKE_NULL_REVISION],
796 [12, FAKE_NULL_REVISION],
798 [18, FAKE_NULL_REVISION],
797 [18, FAKE_NULL_REVISION],
799 [77, FAKE_NULL_REVISION],
798 [77, FAKE_NULL_REVISION],
800 [78, FAKE_NULL_REVISION],
799 [78, FAKE_NULL_REVISION],
801 [79, FAKE_NULL_REVISION],
800 [79, FAKE_NULL_REVISION],
802 [43, 33],
801 [43, 33],
803 [81, FAKE_NULL_REVISION],
802 [81, FAKE_NULL_REVISION],
804 [82, FAKE_NULL_REVISION],
803 [82, FAKE_NULL_REVISION],
805 [83, FAKE_NULL_REVISION],
804 [83, FAKE_NULL_REVISION],
806 [84, 45],
805 [84, 45],
807 [85, FAKE_NULL_REVISION],
806 [85, FAKE_NULL_REVISION],
808 [86, FAKE_NULL_REVISION],
807 [86, FAKE_NULL_REVISION],
809 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
808 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
810 [88, FAKE_NULL_REVISION],
809 [88, FAKE_NULL_REVISION],
811 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
810 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
812 [76, 83],
811 [76, 83],
813 [44, FAKE_NULL_REVISION],
812 [44, FAKE_NULL_REVISION],
814 [92, FAKE_NULL_REVISION],
813 [92, FAKE_NULL_REVISION],
815 [93, FAKE_NULL_REVISION],
814 [93, FAKE_NULL_REVISION],
816 [9, FAKE_NULL_REVISION],
815 [9, FAKE_NULL_REVISION],
817 [95, 67],
816 [95, 67],
818 [96, FAKE_NULL_REVISION],
817 [96, FAKE_NULL_REVISION],
819 [97, FAKE_NULL_REVISION],
818 [97, FAKE_NULL_REVISION],
820 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
819 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
821 ]
820 ]
822 .into_iter()
821 .into_iter()
823 .map(|[a, b]| [Revision(a), Revision(b)])
822 .map(|[a, b]| [Revision(a), Revision(b)])
824 .collect();
823 .collect();
825 let problem_rev = 28.into();
824 let problem_rev = 28.into();
826 let problem_base = 70.into();
825 let problem_base = 70.into();
827 // making the problem obvious: problem_rev is a parent of problem_base
826 // making the problem obvious: problem_rev is a parent of problem_base
828 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
827 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
829
828
830 let mut missing_ancestors: MissingAncestors<VecGraph> =
829 let mut missing_ancestors: MissingAncestors<VecGraph> =
831 MissingAncestors::new(
830 MissingAncestors::new(
832 graph,
831 graph,
833 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
832 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
834 .iter()
833 .iter()
835 .map(|r| Revision(*r)),
834 .map(|r| Revision(*r)),
836 );
835 );
837 assert!(missing_ancestors.bases.contains(&problem_base));
836 assert!(missing_ancestors.bases.contains(&problem_base));
838
837
839 let mut revs: HashSet<Revision> =
838 let mut revs: HashSet<Revision> =
840 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
839 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
841 .iter()
840 .iter()
842 .map(|r| Revision(*r))
841 .map(|r| Revision(*r))
843 .collect();
842 .collect();
844 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
843 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
845 assert!(!revs.contains(&problem_rev));
844 assert!(!revs.contains(&problem_rev));
846 }
845 }
847 }
846 }
@@ -1,250 +1,250
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::exit_codes;
2 use crate::exit_codes;
3 use crate::repo::Repo;
3 use crate::repo::Repo;
4 use crate::revlog::path_encode::path_encode;
4 use crate::revlog::path_encode::path_encode;
5 use crate::revlog::NodePrefix;
5 use crate::revlog::NodePrefix;
6 use crate::revlog::Revision;
6 use crate::revlog::Revision;
7 use crate::revlog::RevlogEntry;
7 use crate::revlog::RevlogEntry;
8 use crate::revlog::{Revlog, RevlogError};
8 use crate::revlog::{Revlog, RevlogError};
9 use crate::utils::files::get_path_from_bytes;
9 use crate::utils::files::get_path_from_bytes;
10 use crate::utils::hg_path::HgPath;
10 use crate::utils::hg_path::HgPath;
11 use crate::utils::SliceExt;
11 use crate::utils::SliceExt;
12 use crate::Graph;
12 use crate::Graph;
13 use crate::GraphError;
13 use crate::GraphError;
14 use crate::UncheckedRevision;
14 use crate::UncheckedRevision;
15 use std::path::PathBuf;
15 use std::path::PathBuf;
16
16
17 use super::options::RevlogOpenOptions;
17 use super::options::RevlogOpenOptions;
18
18
19 /// A specialized `Revlog` to work with file data logs.
19 /// A specialized `Revlog` to work with file data logs.
20 pub struct Filelog {
20 pub struct Filelog {
21 /// The generic `revlog` format.
21 /// The generic `revlog` format.
22 revlog: Revlog,
22 revlog: Revlog,
23 }
23 }
24
24
25 impl Graph for Filelog {
25 impl Graph for Filelog {
26 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
26 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
27 self.revlog.parents(rev)
27 self.revlog.parents(rev)
28 }
28 }
29 }
29 }
30
30
31 impl Filelog {
31 impl Filelog {
32 pub fn open_vfs(
32 pub fn open_vfs(
33 store_vfs: &crate::vfs::VfsImpl,
33 store_vfs: &crate::vfs::VfsImpl,
34 file_path: &HgPath,
34 file_path: &HgPath,
35 options: RevlogOpenOptions,
35 options: RevlogOpenOptions,
36 ) -> Result<Self, HgError> {
36 ) -> Result<Self, HgError> {
37 let index_path = store_path(file_path, b".i");
37 let index_path = store_path(file_path, b".i");
38 let data_path = store_path(file_path, b".d");
38 let data_path = store_path(file_path, b".d");
39 let revlog =
39 let revlog =
40 Revlog::open(store_vfs, index_path, Some(&data_path), options)?;
40 Revlog::open(store_vfs, index_path, Some(&data_path), options)?;
41 Ok(Self { revlog })
41 Ok(Self { revlog })
42 }
42 }
43
43
44 pub fn open(
44 pub fn open(
45 repo: &Repo,
45 repo: &Repo,
46 file_path: &HgPath,
46 file_path: &HgPath,
47 options: RevlogOpenOptions,
47 options: RevlogOpenOptions,
48 ) -> Result<Self, HgError> {
48 ) -> Result<Self, HgError> {
49 Self::open_vfs(&repo.store_vfs(), file_path, options)
49 Self::open_vfs(&repo.store_vfs(), file_path, options)
50 }
50 }
51
51
52 /// The given node ID is that of the file as found in a filelog, not of a
52 /// The given node ID is that of the file as found in a filelog, not of a
53 /// changeset.
53 /// changeset.
54 pub fn data_for_node(
54 pub fn data_for_node(
55 &self,
55 &self,
56 file_node: impl Into<NodePrefix>,
56 file_node: impl Into<NodePrefix>,
57 ) -> Result<FilelogRevisionData, RevlogError> {
57 ) -> Result<FilelogRevisionData, RevlogError> {
58 let file_rev = self.revlog.rev_from_node(file_node.into())?;
58 let file_rev = self.revlog.rev_from_node(file_node.into())?;
59 self.data_for_unchecked_rev(file_rev.into())
59 self.data_for_unchecked_rev(file_rev.into())
60 }
60 }
61
61
62 /// The given revision is that of the file as found in a filelog, not of a
62 /// The given revision is that of the file as found in a filelog, not of a
63 /// changeset.
63 /// changeset.
64 pub fn data_for_unchecked_rev(
64 pub fn data_for_unchecked_rev(
65 &self,
65 &self,
66 file_rev: UncheckedRevision,
66 file_rev: UncheckedRevision,
67 ) -> Result<FilelogRevisionData, RevlogError> {
67 ) -> Result<FilelogRevisionData, RevlogError> {
68 let data: Vec<u8> = self
68 let data: Vec<u8> = self
69 .revlog
69 .revlog
70 .get_data_for_unchecked_rev(file_rev)?
70 .get_data_for_unchecked_rev(file_rev)?
71 .into_owned();
71 .into_owned();
72 Ok(FilelogRevisionData(data))
72 Ok(FilelogRevisionData(data))
73 }
73 }
74
74
75 /// The given node ID is that of the file as found in a filelog, not of a
75 /// The given node ID is that of the file as found in a filelog, not of a
76 /// changeset.
76 /// changeset.
77 pub fn entry_for_node(
77 pub fn entry_for_node(
78 &self,
78 &self,
79 file_node: impl Into<NodePrefix>,
79 file_node: impl Into<NodePrefix>,
80 ) -> Result<FilelogEntry, RevlogError> {
80 ) -> Result<FilelogEntry, RevlogError> {
81 let file_rev = self.revlog.rev_from_node(file_node.into())?;
81 let file_rev = self.revlog.rev_from_node(file_node.into())?;
82 self.entry(file_rev)
82 self.entry(file_rev)
83 }
83 }
84
84
85 /// The given revision is that of the file as found in a filelog, not of a
85 /// The given revision is that of the file as found in a filelog, not of a
86 /// changeset.
86 /// changeset.
87 pub fn entry_for_unchecked_rev(
87 pub fn entry_for_unchecked_rev(
88 &self,
88 &self,
89 file_rev: UncheckedRevision,
89 file_rev: UncheckedRevision,
90 ) -> Result<FilelogEntry, RevlogError> {
90 ) -> Result<FilelogEntry, RevlogError> {
91 Ok(FilelogEntry(
91 Ok(FilelogEntry(
92 self.revlog.get_entry_for_unchecked_rev(file_rev)?,
92 self.revlog.get_entry_for_unchecked_rev(file_rev)?,
93 ))
93 ))
94 }
94 }
95
95
96 /// Same as [`Self::entry_for_unchecked_rev`] for a checked revision.
96 /// Same as [`Self::entry_for_unchecked_rev`] for a checked revision.
97 pub fn entry(
97 pub fn entry(
98 &self,
98 &self,
99 file_rev: Revision,
99 file_rev: Revision,
100 ) -> Result<FilelogEntry, RevlogError> {
100 ) -> Result<FilelogEntry, RevlogError> {
101 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
101 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
102 }
102 }
103 }
103 }
104
104
105 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
105 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
106 let encoded_bytes =
106 let encoded_bytes =
107 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
107 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
108 get_path_from_bytes(&encoded_bytes).into()
108 get_path_from_bytes(&encoded_bytes).into()
109 }
109 }
110
110
111 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
111 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
112
112
113 impl FilelogEntry<'_> {
113 impl FilelogEntry<'_> {
114 /// `self.data()` can be expensive, with decompression and delta
114 /// `self.data()` can be expensive, with decompression and delta
115 /// resolution.
115 /// resolution.
116 ///
116 ///
117 /// *Without* paying this cost, based on revlog index information
117 /// *Without* paying this cost, based on revlog index information
118 /// including `RevlogEntry::uncompressed_len`:
118 /// including `RevlogEntry::uncompressed_len`:
119 ///
119 ///
120 /// * Returns `true` if the length that `self.data().file_data().len()`
120 /// * Returns `true` if the length that `self.data().file_data().len()`
121 /// would return is definitely **not equal** to `other_len`.
121 /// would return is definitely **not equal** to `other_len`.
122 /// * Returns `false` if available information is inconclusive.
122 /// * Returns `false` if available information is inconclusive.
123 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
123 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
124 // Relevant code that implement this behavior in Python code:
124 // Relevant code that implement this behavior in Python code:
125 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
125 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
126 // revlog.size, revlog.rawsize
126 // revlog.size, revlog.rawsize
127
127
128 // Let’s call `file_data_len` what would be returned by
128 // Let’s call `file_data_len` what would be returned by
129 // `self.data().file_data().len()`.
129 // `self.data().file_data().len()`.
130
130
131 if self.0.is_censored() {
131 if self.0.is_censored() {
132 let file_data_len = 0;
132 let file_data_len = 0;
133 return other_len != file_data_len;
133 return other_len != file_data_len;
134 }
134 }
135
135
136 if self.0.has_length_affecting_flag_processor() {
136 if self.0.has_length_affecting_flag_processor() {
137 // We can’t conclude anything about `file_data_len`.
137 // We can’t conclude anything about `file_data_len`.
138 return false;
138 return false;
139 }
139 }
140
140
141 // Revlog revisions (usually) have metadata for the size of
141 // Revlog revisions (usually) have metadata for the size of
142 // their data after decompression and delta resolution
142 // their data after decompression and delta resolution
143 // as would be returned by `Revlog::get_rev_data`.
143 // as would be returned by `Revlog::get_rev_data`.
144 //
144 //
145 // For filelogs this is the file’s contents preceded by an optional
145 // For filelogs this is the file’s contents preceded by an optional
146 // metadata block.
146 // metadata block.
147 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
147 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
148 l as u64
148 l as u64
149 } else {
149 } else {
150 // The field was set to -1, the actual uncompressed len is unknown.
150 // The field was set to -1, the actual uncompressed len is unknown.
151 // We need to decompress to say more.
151 // We need to decompress to say more.
152 return false;
152 return false;
153 };
153 };
154 // `uncompressed_len = file_data_len + optional_metadata_len`,
154 // `uncompressed_len = file_data_len + optional_metadata_len`,
155 // so `file_data_len <= uncompressed_len`.
155 // so `file_data_len <= uncompressed_len`.
156 if uncompressed_len < other_len {
156 if uncompressed_len < other_len {
157 // Transitively, `file_data_len < other_len`.
157 // Transitively, `file_data_len < other_len`.
158 // So `other_len != file_data_len` definitely.
158 // So `other_len != file_data_len` definitely.
159 return true;
159 return true;
160 }
160 }
161
161
162 if uncompressed_len == other_len + 4 {
162 if uncompressed_len == other_len + 4 {
163 // It’s possible that `file_data_len == other_len` with an empty
163 // It’s possible that `file_data_len == other_len` with an empty
164 // metadata block (2 start marker bytes + 2 end marker bytes).
164 // metadata block (2 start marker bytes + 2 end marker bytes).
165 // This happens when there wouldn’t otherwise be metadata, but
165 // This happens when there wouldn’t otherwise be metadata, but
166 // the first 2 bytes of file data happen to match a start marker
166 // the first 2 bytes of file data happen to match a start marker
167 // and would be ambiguous.
167 // and would be ambiguous.
168 return false;
168 return false;
169 }
169 }
170
170
171 if !self.0.has_p1() {
171 if !self.0.has_p1() {
172 // There may or may not be copy metadata, so we can’t deduce more
172 // There may or may not be copy metadata, so we can’t deduce more
173 // about `file_data_len` without computing file data.
173 // about `file_data_len` without computing file data.
174 return false;
174 return false;
175 }
175 }
176
176
177 // Filelog ancestry is not meaningful in the way changelog ancestry is.
177 // Filelog ancestry is not meaningful in the way changelog ancestry is.
178 // It only provides hints to delta generation.
178 // It only provides hints to delta generation.
179 // p1 and p2 are set to null when making a copy or rename since
179 // p1 and p2 are set to null when making a copy or rename since
180 // contents are likely unrelatedto what might have previously existed
180 // contents are likely unrelatedto what might have previously existed
181 // at the destination path.
181 // at the destination path.
182 //
182 //
183 // Conversely, since here p1 is non-null, there is no copy metadata.
183 // Conversely, since here p1 is non-null, there is no copy metadata.
184 // Note that this reasoning may be invalidated in the presence of
184 // Note that this reasoning may be invalidated in the presence of
185 // merges made by some previous versions of Mercurial that
185 // merges made by some previous versions of Mercurial that
186 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
186 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
187 // and `tests/test-issue6528.t`.
187 // and `tests/test-issue6528.t`.
188 //
188 //
189 // Since copy metadata is currently the only kind of metadata
189 // Since copy metadata is currently the only kind of metadata
190 // kept in revlog data of filelogs,
190 // kept in revlog data of filelogs,
191 // this `FilelogEntry` does not have such metadata:
191 // this `FilelogEntry` does not have such metadata:
192 let file_data_len = uncompressed_len;
192 let file_data_len = uncompressed_len;
193
193
194 file_data_len != other_len
194 file_data_len != other_len
195 }
195 }
196
196
197 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
197 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
198 let data = self.0.data();
198 let data = self.0.data();
199 match data {
199 match data {
200 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
200 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
201 // Errors other than `HgError` should not happen at this point
201 // Errors other than `HgError` should not happen at this point
202 Err(e) => match e {
202 Err(e) => match e {
203 RevlogError::Other(hg_error) => Err(hg_error),
203 RevlogError::Other(hg_error) => Err(hg_error),
204 revlog_error => Err(HgError::abort(
204 revlog_error => Err(HgError::abort(
205 revlog_error.to_string(),
205 revlog_error.to_string(),
206 exit_codes::ABORT,
206 exit_codes::ABORT,
207 None,
207 None,
208 )),
208 )),
209 },
209 },
210 }
210 }
211 }
211 }
212 }
212 }
213
213
214 /// The data for one revision in a filelog, uncompressed and delta-resolved.
214 /// The data for one revision in a filelog, uncompressed and delta-resolved.
215 pub struct FilelogRevisionData(Vec<u8>);
215 pub struct FilelogRevisionData(Vec<u8>);
216
216
217 impl FilelogRevisionData {
217 impl FilelogRevisionData {
218 /// Split into metadata and data
218 /// Split into metadata and data
219 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
219 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
220 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
220 const DELIMITER: &[u8; 2] = b"\x01\n";
221
221
222 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
222 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
223 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
223 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
224 Ok((Some(metadata), data))
224 Ok((Some(metadata), data))
225 } else {
225 } else {
226 Err(HgError::corrupted(
226 Err(HgError::corrupted(
227 "Missing metadata end delimiter in filelog entry",
227 "Missing metadata end delimiter in filelog entry",
228 ))
228 ))
229 }
229 }
230 } else {
230 } else {
231 Ok((None, &self.0))
231 Ok((None, &self.0))
232 }
232 }
233 }
233 }
234
234
235 /// Returns the file contents at this revision, stripped of any metadata
235 /// Returns the file contents at this revision, stripped of any metadata
236 pub fn file_data(&self) -> Result<&[u8], HgError> {
236 pub fn file_data(&self) -> Result<&[u8], HgError> {
237 let (_metadata, data) = self.split()?;
237 let (_metadata, data) = self.split()?;
238 Ok(data)
238 Ok(data)
239 }
239 }
240
240
241 /// Consume the entry, and convert it into data, discarding any metadata,
241 /// Consume the entry, and convert it into data, discarding any metadata,
242 /// if present.
242 /// if present.
243 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
243 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
244 if let (Some(_metadata), data) = self.split()? {
244 if let (Some(_metadata), data) = self.split()? {
245 Ok(data.to_owned())
245 Ok(data.to_owned())
246 } else {
246 } else {
247 Ok(self.0)
247 Ok(self.0)
248 }
248 }
249 }
249 }
250 }
250 }
@@ -1,1351 +1,1350
1 //! A layer of lower-level revlog functionality to encapsulate most of the
1 //! A layer of lower-level revlog functionality to encapsulate most of the
2 //! IO work and expensive operations.
2 //! IO work and expensive operations.
3 use std::{
3 use std::{
4 borrow::Cow,
4 borrow::Cow,
5 cell::RefCell,
5 cell::RefCell,
6 io::{ErrorKind, Seek, SeekFrom, Write},
6 io::{ErrorKind, Seek, SeekFrom, Write},
7 ops::Deref,
7 ops::Deref,
8 path::PathBuf,
8 path::PathBuf,
9 sync::{Arc, Mutex},
9 sync::{Arc, Mutex},
10 };
10 };
11
11
12 use schnellru::{ByMemoryUsage, LruMap};
12 use schnellru::{ByMemoryUsage, LruMap};
13 use sha1::{Digest, Sha1};
13 use sha1::{Digest, Sha1};
14
14
15 use crate::{
15 use crate::{
16 errors::{HgError, IoResultExt},
16 errors::{HgError, IoResultExt},
17 exit_codes,
17 exit_codes,
18 transaction::Transaction,
18 transaction::Transaction,
19 vfs::Vfs,
19 vfs::Vfs,
20 };
20 };
21
21
22 use super::{
22 use super::{
23 compression::{
23 compression::{
24 uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
24 uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
25 ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
25 ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
26 },
26 },
27 file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
27 file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
28 index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
28 index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
29 node::{NODE_BYTES_LENGTH, NULL_NODE},
29 node::{NODE_BYTES_LENGTH, NULL_NODE},
30 options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
30 options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
31 BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
31 BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
32 UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
32 UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
33 };
33 };
34
34
35 /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
35 /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
36 /// boundary to incrementally rewrite higher-level revlog functionality in
36 /// boundary to incrementally rewrite higher-level revlog functionality in
37 /// Rust.
37 /// Rust.
38 pub struct InnerRevlog {
38 pub struct InnerRevlog {
39 /// When index and data are not interleaved: bytes of the revlog index.
39 /// When index and data are not interleaved: bytes of the revlog index.
40 /// When index and data are interleaved (inline revlog): bytes of the
40 /// When index and data are interleaved (inline revlog): bytes of the
41 /// revlog index and data.
41 /// revlog index and data.
42 pub index: Index,
42 pub index: Index,
43 /// The store vfs that is used to interact with the filesystem
43 /// The store vfs that is used to interact with the filesystem
44 vfs: Box<dyn Vfs>,
44 vfs: Box<dyn Vfs>,
45 /// The index file path, relative to the vfs root
45 /// The index file path, relative to the vfs root
46 pub index_file: PathBuf,
46 pub index_file: PathBuf,
47 /// The data file path, relative to the vfs root (same as `index_file`
47 /// The data file path, relative to the vfs root (same as `index_file`
48 /// if inline)
48 /// if inline)
49 data_file: PathBuf,
49 data_file: PathBuf,
50 /// Data config that applies to this revlog
50 /// Data config that applies to this revlog
51 data_config: RevlogDataConfig,
51 data_config: RevlogDataConfig,
52 /// Delta config that applies to this revlog
52 /// Delta config that applies to this revlog
53 delta_config: RevlogDeltaConfig,
53 delta_config: RevlogDeltaConfig,
54 /// Feature config that applies to this revlog
54 /// Feature config that applies to this revlog
55 feature_config: RevlogFeatureConfig,
55 feature_config: RevlogFeatureConfig,
56 /// A view into this revlog's data file
56 /// A view into this revlog's data file
57 segment_file: RandomAccessFile,
57 segment_file: RandomAccessFile,
58 /// A cache of uncompressed chunks that have previously been restored.
58 /// A cache of uncompressed chunks that have previously been restored.
59 /// Its eviction policy is defined in [`Self::new`].
59 /// Its eviction policy is defined in [`Self::new`].
60 uncompressed_chunk_cache: Option<UncompressedChunkCache>,
60 uncompressed_chunk_cache: Option<UncompressedChunkCache>,
61 /// Used to keep track of the actual target during diverted writes
61 /// Used to keep track of the actual target during diverted writes
62 /// for the changelog
62 /// for the changelog
63 original_index_file: Option<PathBuf>,
63 original_index_file: Option<PathBuf>,
64 /// Write handles to the index and data files
64 /// Write handles to the index and data files
65 /// XXX why duplicate from `index` and `segment_file`?
65 /// XXX why duplicate from `index` and `segment_file`?
66 writing_handles: Option<WriteHandles>,
66 writing_handles: Option<WriteHandles>,
67 /// See [`DelayedBuffer`].
67 /// See [`DelayedBuffer`].
68 delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
68 delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
69 /// Whether this revlog is inline. XXX why duplicate from `index`?
69 /// Whether this revlog is inline. XXX why duplicate from `index`?
70 pub inline: bool,
70 pub inline: bool,
71 /// A cache of the last revision, which is usually accessed multiple
71 /// A cache of the last revision, which is usually accessed multiple
72 /// times.
72 /// times.
73 pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
73 pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
74 }
74 }
75
75
76 impl InnerRevlog {
76 impl InnerRevlog {
77 pub fn new(
77 pub fn new(
78 vfs: Box<dyn Vfs>,
78 vfs: Box<dyn Vfs>,
79 index: Index,
79 index: Index,
80 index_file: PathBuf,
80 index_file: PathBuf,
81 data_file: PathBuf,
81 data_file: PathBuf,
82 data_config: RevlogDataConfig,
82 data_config: RevlogDataConfig,
83 delta_config: RevlogDeltaConfig,
83 delta_config: RevlogDeltaConfig,
84 feature_config: RevlogFeatureConfig,
84 feature_config: RevlogFeatureConfig,
85 ) -> Self {
85 ) -> Self {
86 assert!(index_file.is_relative());
86 assert!(index_file.is_relative());
87 assert!(data_file.is_relative());
87 assert!(data_file.is_relative());
88 let segment_file = RandomAccessFile::new(
88 let segment_file = RandomAccessFile::new(
89 dyn_clone::clone_box(&*vfs),
89 dyn_clone::clone_box(&*vfs),
90 if index.is_inline() {
90 if index.is_inline() {
91 index_file.to_owned()
91 index_file.to_owned()
92 } else {
92 } else {
93 data_file.to_owned()
93 data_file.to_owned()
94 },
94 },
95 );
95 );
96
96
97 let uncompressed_chunk_cache =
97 let uncompressed_chunk_cache =
98 data_config.uncompressed_cache_factor.map(
98 data_config.uncompressed_cache_factor.map(
99 // Arbitrary initial value
99 // Arbitrary initial value
100 // TODO check if using a hasher specific to integers is useful
100 // TODO check if using a hasher specific to integers is useful
101 |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
101 |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
102 );
102 );
103
103
104 let inline = index.is_inline();
104 let inline = index.is_inline();
105 Self {
105 Self {
106 index,
106 index,
107 vfs,
107 vfs,
108 index_file,
108 index_file,
109 data_file,
109 data_file,
110 data_config,
110 data_config,
111 delta_config,
111 delta_config,
112 feature_config,
112 feature_config,
113 segment_file,
113 segment_file,
114 uncompressed_chunk_cache,
114 uncompressed_chunk_cache,
115 original_index_file: None,
115 original_index_file: None,
116 writing_handles: None,
116 writing_handles: None,
117 delayed_buffer: None,
117 delayed_buffer: None,
118 inline,
118 inline,
119 last_revision_cache: Mutex::new(None),
119 last_revision_cache: Mutex::new(None),
120 }
120 }
121 }
121 }
122
122
123 /// Return number of entries of the revlog index
123 /// Return number of entries of the revlog index
124 pub fn len(&self) -> usize {
124 pub fn len(&self) -> usize {
125 self.index.len()
125 self.index.len()
126 }
126 }
127
127
128 /// Return `true` if this revlog has no entries
128 /// Return `true` if this revlog has no entries
129 pub fn is_empty(&self) -> bool {
129 pub fn is_empty(&self) -> bool {
130 self.len() == 0
130 self.len() == 0
131 }
131 }
132
132
133 /// Return whether this revlog is inline (mixed index and data)
133 /// Return whether this revlog is inline (mixed index and data)
134 pub fn is_inline(&self) -> bool {
134 pub fn is_inline(&self) -> bool {
135 self.inline
135 self.inline
136 }
136 }
137
137
138 /// Clear all caches from this revlog
138 /// Clear all caches from this revlog
139 pub fn clear_cache(&mut self) {
139 pub fn clear_cache(&mut self) {
140 assert!(!self.is_delaying());
140 assert!(!self.is_delaying());
141 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
141 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
142 // We don't clear the allocation here because it's probably faster.
142 // We don't clear the allocation here because it's probably faster.
143 // We could change our minds later if this ends up being a problem
143 // We could change our minds later if this ends up being a problem
144 // with regards to memory consumption.
144 // with regards to memory consumption.
145 cache.borrow_mut().clear();
145 cache.borrow_mut().clear();
146 }
146 }
147 }
147 }
148
148
149 /// Return an entry for the null revision
149 /// Return an entry for the null revision
150 pub fn make_null_entry(&self) -> RevlogEntry {
150 pub fn make_null_entry(&self) -> RevlogEntry {
151 RevlogEntry {
151 RevlogEntry {
152 revlog: self,
152 revlog: self,
153 rev: NULL_REVISION,
153 rev: NULL_REVISION,
154 uncompressed_len: 0,
154 uncompressed_len: 0,
155 p1: NULL_REVISION,
155 p1: NULL_REVISION,
156 p2: NULL_REVISION,
156 p2: NULL_REVISION,
157 flags: NULL_REVLOG_ENTRY_FLAGS,
157 flags: NULL_REVLOG_ENTRY_FLAGS,
158 hash: NULL_NODE,
158 hash: NULL_NODE,
159 }
159 }
160 }
160 }
161
161
162 /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
162 /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
163 pub fn get_entry(
163 pub fn get_entry(
164 &self,
164 &self,
165 rev: Revision,
165 rev: Revision,
166 ) -> Result<RevlogEntry, RevlogError> {
166 ) -> Result<RevlogEntry, RevlogError> {
167 if rev == NULL_REVISION {
167 if rev == NULL_REVISION {
168 return Ok(self.make_null_entry());
168 return Ok(self.make_null_entry());
169 }
169 }
170 let index_entry = self
170 let index_entry = self
171 .index
171 .index
172 .get_entry(rev)
172 .get_entry(rev)
173 .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
173 .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
174 let p1 =
174 let p1 =
175 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
175 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
176 RevlogError::corrupted(format!(
176 RevlogError::corrupted(format!(
177 "p1 for rev {} is invalid",
177 "p1 for rev {} is invalid",
178 rev
178 rev
179 ))
179 ))
180 })?;
180 })?;
181 let p2 =
181 let p2 =
182 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
182 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
183 RevlogError::corrupted(format!(
183 RevlogError::corrupted(format!(
184 "p2 for rev {} is invalid",
184 "p2 for rev {} is invalid",
185 rev
185 rev
186 ))
186 ))
187 })?;
187 })?;
188 let entry = RevlogEntry {
188 let entry = RevlogEntry {
189 revlog: self,
189 revlog: self,
190 rev,
190 rev,
191 uncompressed_len: index_entry.uncompressed_len(),
191 uncompressed_len: index_entry.uncompressed_len(),
192 p1,
192 p1,
193 p2,
193 p2,
194 flags: index_entry.flags(),
194 flags: index_entry.flags(),
195 hash: *index_entry.hash(),
195 hash: *index_entry.hash(),
196 };
196 };
197 Ok(entry)
197 Ok(entry)
198 }
198 }
199
199
200 /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
200 /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
201 /// returns a [`RevlogError`].
201 /// returns a [`RevlogError`].
202 pub fn get_entry_for_unchecked_rev(
202 pub fn get_entry_for_unchecked_rev(
203 &self,
203 &self,
204 rev: UncheckedRevision,
204 rev: UncheckedRevision,
205 ) -> Result<RevlogEntry, RevlogError> {
205 ) -> Result<RevlogEntry, RevlogError> {
206 if rev == NULL_REVISION.into() {
206 if rev == NULL_REVISION.into() {
207 return Ok(self.make_null_entry());
207 return Ok(self.make_null_entry());
208 }
208 }
209 let rev = self.index.check_revision(rev).ok_or_else(|| {
209 let rev = self.index.check_revision(rev).ok_or_else(|| {
210 RevlogError::corrupted(format!("rev {} is invalid", rev))
210 RevlogError::corrupted(format!("rev {} is invalid", rev))
211 })?;
211 })?;
212 self.get_entry(rev)
212 self.get_entry(rev)
213 }
213 }
214
214
215 /// Is the revlog currently delaying the visibility of written data?
215 /// Is the revlog currently delaying the visibility of written data?
216 ///
216 ///
217 /// The delaying mechanism can be either in-memory or written on disk in a
217 /// The delaying mechanism can be either in-memory or written on disk in a
218 /// side-file.
218 /// side-file.
219 pub fn is_delaying(&self) -> bool {
219 pub fn is_delaying(&self) -> bool {
220 self.delayed_buffer.is_some() || self.original_index_file.is_some()
220 self.delayed_buffer.is_some() || self.original_index_file.is_some()
221 }
221 }
222
222
223 /// The offset of the data chunk for this revision
223 /// The offset of the data chunk for this revision
224 #[inline(always)]
224 #[inline(always)]
225 pub fn start(&self, rev: Revision) -> usize {
225 pub fn start(&self, rev: Revision) -> usize {
226 self.index.start(
226 self.index.start(
227 rev,
227 rev,
228 &self
228 &self
229 .index
229 .index
230 .get_entry(rev)
230 .get_entry(rev)
231 .unwrap_or_else(|| self.index.make_null_entry()),
231 .unwrap_or_else(|| self.index.make_null_entry()),
232 )
232 )
233 }
233 }
234
234
235 /// The length of the data chunk for this revision
235 /// The length of the data chunk for this revision
236 /// TODO rename this method and others to more explicit names than the
236 /// TODO rename this method and others to more explicit names than the
237 /// existing ones that were copied over from Python
237 /// existing ones that were copied over from Python
238 #[inline(always)]
238 #[inline(always)]
239 pub fn length(&self, rev: Revision) -> usize {
239 pub fn length(&self, rev: Revision) -> usize {
240 self.index
240 self.index
241 .get_entry(rev)
241 .get_entry(rev)
242 .unwrap_or_else(|| self.index.make_null_entry())
242 .unwrap_or_else(|| self.index.make_null_entry())
243 .compressed_len() as usize
243 .compressed_len() as usize
244 }
244 }
245
245
246 /// The end of the data chunk for this revision
246 /// The end of the data chunk for this revision
247 #[inline(always)]
247 #[inline(always)]
248 pub fn end(&self, rev: Revision) -> usize {
248 pub fn end(&self, rev: Revision) -> usize {
249 self.start(rev) + self.length(rev)
249 self.start(rev) + self.length(rev)
250 }
250 }
251
251
252 /// Return the delta parent of the given revision
252 /// Return the delta parent of the given revision
253 pub fn delta_parent(&self, rev: Revision) -> Revision {
253 pub fn delta_parent(&self, rev: Revision) -> Revision {
254 let base = self
254 let base = self
255 .index
255 .index
256 .get_entry(rev)
256 .get_entry(rev)
257 .unwrap()
257 .unwrap()
258 .base_revision_or_base_of_delta_chain();
258 .base_revision_or_base_of_delta_chain();
259 if base.0 == rev.0 {
259 if base.0 == rev.0 {
260 NULL_REVISION
260 NULL_REVISION
261 } else if self.delta_config.general_delta {
261 } else if self.delta_config.general_delta {
262 Revision(base.0)
262 Revision(base.0)
263 } else {
263 } else {
264 Revision(rev.0 - 1)
264 Revision(rev.0 - 1)
265 }
265 }
266 }
266 }
267
267
268 /// Return whether `rev` points to a snapshot revision (i.e. does not have
268 /// Return whether `rev` points to a snapshot revision (i.e. does not have
269 /// a delta base).
269 /// a delta base).
270 pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
270 pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
271 if !self.delta_config.sparse_revlog {
271 if !self.delta_config.sparse_revlog {
272 return Ok(self.delta_parent(rev) == NULL_REVISION);
272 return Ok(self.delta_parent(rev) == NULL_REVISION);
273 }
273 }
274 self.index.is_snapshot_unchecked(rev)
274 self.index.is_snapshot_unchecked(rev)
275 }
275 }
276
276
277 /// Return the delta chain for `rev` according to this revlog's config.
277 /// Return the delta chain for `rev` according to this revlog's config.
278 /// See [`Index::delta_chain`] for more information.
278 /// See [`Index::delta_chain`] for more information.
279 pub fn delta_chain(
279 pub fn delta_chain(
280 &self,
280 &self,
281 rev: Revision,
281 rev: Revision,
282 stop_rev: Option<Revision>,
282 stop_rev: Option<Revision>,
283 ) -> Result<(Vec<Revision>, bool), HgError> {
283 ) -> Result<(Vec<Revision>, bool), HgError> {
284 self.index.delta_chain(
284 self.index.delta_chain(
285 rev,
285 rev,
286 stop_rev,
286 stop_rev,
287 self.delta_config.general_delta.into(),
287 self.delta_config.general_delta.into(),
288 )
288 )
289 }
289 }
290
290
291 fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
291 fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
292 // TODO cache the compressor?
292 // TODO cache the compressor?
293 Ok(match self.feature_config.compression_engine {
293 Ok(match self.feature_config.compression_engine {
294 CompressionConfig::Zlib { level } => {
294 CompressionConfig::Zlib { level } => {
295 Box::new(ZlibCompressor::new(level))
295 Box::new(ZlibCompressor::new(level))
296 }
296 }
297 CompressionConfig::Zstd { level, threads } => {
297 CompressionConfig::Zstd { level, threads } => {
298 Box::new(ZstdCompressor::new(level, threads))
298 Box::new(ZstdCompressor::new(level, threads))
299 }
299 }
300 CompressionConfig::None => Box::new(NoneCompressor),
300 CompressionConfig::None => Box::new(NoneCompressor),
301 })
301 })
302 }
302 }
303
303
304 /// Generate a possibly-compressed representation of data.
304 /// Generate a possibly-compressed representation of data.
305 /// Returns `None` if the data was not compressed.
305 /// Returns `None` if the data was not compressed.
306 pub fn compress<'data>(
306 pub fn compress<'data>(
307 &self,
307 &self,
308 data: &'data [u8],
308 data: &'data [u8],
309 ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
309 ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
310 if data.is_empty() {
310 if data.is_empty() {
311 return Ok(Some(data.into()));
311 return Ok(Some(data.into()));
312 }
312 }
313 let res = self.compressor()?.compress(data)?;
313 let res = self.compressor()?.compress(data)?;
314 if let Some(compressed) = res {
314 if let Some(compressed) = res {
315 // The revlog compressor added the header in the returned data.
315 // The revlog compressor added the header in the returned data.
316 return Ok(Some(compressed.into()));
316 return Ok(Some(compressed.into()));
317 }
317 }
318
318
319 if data[0] == b'\0' {
319 if data[0] == b'\0' {
320 return Ok(Some(data.into()));
320 return Ok(Some(data.into()));
321 }
321 }
322 Ok(None)
322 Ok(None)
323 }
323 }
324
324
325 /// Decompress a revlog chunk.
325 /// Decompress a revlog chunk.
326 ///
326 ///
327 /// The chunk is expected to begin with a header identifying the
327 /// The chunk is expected to begin with a header identifying the
328 /// format type so it can be routed to an appropriate decompressor.
328 /// format type so it can be routed to an appropriate decompressor.
329 pub fn decompress<'a>(
329 pub fn decompress<'a>(
330 &'a self,
330 &'a self,
331 data: &'a [u8],
331 data: &'a [u8],
332 ) -> Result<Cow<[u8]>, RevlogError> {
332 ) -> Result<Cow<[u8]>, RevlogError> {
333 if data.is_empty() {
333 if data.is_empty() {
334 return Ok(data.into());
334 return Ok(data.into());
335 }
335 }
336
336
337 // Revlogs are read much more frequently than they are written and many
337 // Revlogs are read much more frequently than they are written and many
338 // chunks only take microseconds to decompress, so performance is
338 // chunks only take microseconds to decompress, so performance is
339 // important here.
339 // important here.
340
340
341 let header = data[0];
341 let header = data[0];
342 match header {
342 match header {
343 // Settings don't matter as they only affect compression
343 // Settings don't matter as they only affect compression
344 ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
344 ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
345 // Settings don't matter as they only affect compression
345 // Settings don't matter as they only affect compression
346 ZSTD_BYTE => {
346 ZSTD_BYTE => {
347 Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
347 Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
348 }
348 }
349 b'\0' => Ok(data.into()),
349 b'\0' => Ok(data.into()),
350 b'u' => Ok((&data[1..]).into()),
350 b'u' => Ok((&data[1..]).into()),
351 other => Err(HgError::UnsupportedFeature(format!(
351 other => Err(HgError::UnsupportedFeature(format!(
352 "unknown compression header '{}'",
352 "unknown compression header '{}'",
353 other
353 other
354 ))
354 ))
355 .into()),
355 .into()),
356 }
356 }
357 }
357 }
358
358
359 /// Obtain a segment of raw data corresponding to a range of revisions.
359 /// Obtain a segment of raw data corresponding to a range of revisions.
360 ///
360 ///
361 /// Requests for data may be satisfied by a cache.
361 /// Requests for data may be satisfied by a cache.
362 ///
362 ///
363 /// Returns a 2-tuple of (offset, data) for the requested range of
363 /// Returns a 2-tuple of (offset, data) for the requested range of
364 /// revisions. Offset is the integer offset from the beginning of the
364 /// revisions. Offset is the integer offset from the beginning of the
365 /// revlog and data is a slice of the raw byte data.
365 /// revlog and data is a slice of the raw byte data.
366 ///
366 ///
367 /// Callers will need to call `self.start(rev)` and `self.length(rev)`
367 /// Callers will need to call `self.start(rev)` and `self.length(rev)`
368 /// to determine where each revision's data begins and ends.
368 /// to determine where each revision's data begins and ends.
369 pub fn get_segment_for_revs(
369 pub fn get_segment_for_revs(
370 &self,
370 &self,
371 start_rev: Revision,
371 start_rev: Revision,
372 end_rev: Revision,
372 end_rev: Revision,
373 ) -> Result<(usize, Vec<u8>), HgError> {
373 ) -> Result<(usize, Vec<u8>), HgError> {
374 let start = if start_rev == NULL_REVISION {
374 let start = if start_rev == NULL_REVISION {
375 0
375 0
376 } else {
376 } else {
377 let start_entry = self
377 let start_entry = self
378 .index
378 .index
379 .get_entry(start_rev)
379 .get_entry(start_rev)
380 .expect("null revision segment");
380 .expect("null revision segment");
381 self.index.start(start_rev, &start_entry)
381 self.index.start(start_rev, &start_entry)
382 };
382 };
383 let end_entry = self
383 let end_entry = self
384 .index
384 .index
385 .get_entry(end_rev)
385 .get_entry(end_rev)
386 .expect("null revision segment");
386 .expect("null revision segment");
387 let end = self.index.start(end_rev, &end_entry) + self.length(end_rev);
387 let end = self.index.start(end_rev, &end_entry) + self.length(end_rev);
388
388
389 let length = end - start;
389 let length = end - start;
390
390
391 // XXX should we use mmap instead of doing this for platforms that
391 // XXX should we use mmap instead of doing this for platforms that
392 // support madvise/populate?
392 // support madvise/populate?
393 Ok((start, self.segment_file.read_chunk(start, length)?))
393 Ok((start, self.segment_file.read_chunk(start, length)?))
394 }
394 }
395
395
396 /// Return the uncompressed raw data for `rev`
396 /// Return the uncompressed raw data for `rev`
397 pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
397 pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
398 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
398 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
399 if let Some(chunk) = cache.borrow_mut().get(&rev) {
399 if let Some(chunk) = cache.borrow_mut().get(&rev) {
400 return Ok(chunk.clone());
400 return Ok(chunk.clone());
401 }
401 }
402 }
402 }
403 // TODO revlogv2 should check the compression mode
403 // TODO revlogv2 should check the compression mode
404 let data = self.get_segment_for_revs(rev, rev)?.1;
404 let data = self.get_segment_for_revs(rev, rev)?.1;
405 let uncompressed = self.decompress(&data).map_err(|e| {
405 let uncompressed = self.decompress(&data).map_err(|e| {
406 HgError::abort(
406 HgError::abort(
407 format!("revlog decompression error: {}", e),
407 format!("revlog decompression error: {}", e),
408 exit_codes::ABORT,
408 exit_codes::ABORT,
409 None,
409 None,
410 )
410 )
411 })?;
411 })?;
412 let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
412 let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
413 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
413 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
414 cache.borrow_mut().insert(rev, uncompressed.clone());
414 cache.borrow_mut().insert(rev, uncompressed.clone());
415 }
415 }
416 Ok(uncompressed)
416 Ok(uncompressed)
417 }
417 }
418
418
419 /// Execute `func` within a read context for the data file, meaning that
419 /// Execute `func` within a read context for the data file, meaning that
420 /// the read handle will be taken and discarded after the operation.
420 /// the read handle will be taken and discarded after the operation.
421 pub fn with_read<R>(
421 pub fn with_read<R>(
422 &self,
422 &self,
423 func: impl FnOnce() -> Result<R, RevlogError>,
423 func: impl FnOnce() -> Result<R, RevlogError>,
424 ) -> Result<R, RevlogError> {
424 ) -> Result<R, RevlogError> {
425 self.enter_reading_context()?;
425 self.enter_reading_context()?;
426 let res = func();
426 let res = func();
427 self.exit_reading_context();
427 self.exit_reading_context();
428 res.map_err(Into::into)
428 res.map_err(Into::into)
429 }
429 }
430
430
431 /// `pub` only for use in hg-cpython
431 /// `pub` only for use in hg-cpython
432 #[doc(hidden)]
432 #[doc(hidden)]
433 pub fn enter_reading_context(&self) -> Result<(), HgError> {
433 pub fn enter_reading_context(&self) -> Result<(), HgError> {
434 if self.is_empty() {
434 if self.is_empty() {
435 // Nothing to be read
435 // Nothing to be read
436 return Ok(());
436 return Ok(());
437 }
437 }
438 if self.delayed_buffer.is_some() && self.is_inline() {
438 if self.delayed_buffer.is_some() && self.is_inline() {
439 return Err(HgError::abort(
439 return Err(HgError::abort(
440 "revlog with delayed write should not be inline",
440 "revlog with delayed write should not be inline",
441 exit_codes::ABORT,
441 exit_codes::ABORT,
442 None,
442 None,
443 ));
443 ));
444 }
444 }
445 self.segment_file.get_read_handle()?;
445 self.segment_file.get_read_handle()?;
446 Ok(())
446 Ok(())
447 }
447 }
448
448
449 /// `pub` only for use in hg-cpython
449 /// `pub` only for use in hg-cpython
450 #[doc(hidden)]
450 #[doc(hidden)]
451 pub fn exit_reading_context(&self) {
451 pub fn exit_reading_context(&self) {
452 self.segment_file.exit_reading_context()
452 self.segment_file.exit_reading_context()
453 }
453 }
454
454
455 /// Fill the buffer returned by `get_buffer` with the possibly un-validated
455 /// Fill the buffer returned by `get_buffer` with the possibly un-validated
456 /// raw text for a revision. It can be already validated if it comes
456 /// raw text for a revision. It can be already validated if it comes
457 /// from the cache.
457 /// from the cache.
458 pub fn raw_text<G, T>(
458 pub fn raw_text<G, T>(
459 &self,
459 &self,
460 rev: Revision,
460 rev: Revision,
461 get_buffer: G,
461 get_buffer: G,
462 ) -> Result<(), RevlogError>
462 ) -> Result<(), RevlogError>
463 where
463 where
464 G: FnOnce(
464 G: FnOnce(
465 usize,
465 usize,
466 &mut dyn FnMut(
466 &mut dyn FnMut(
467 &mut dyn RevisionBuffer<Target = T>,
467 &mut dyn RevisionBuffer<Target = T>,
468 ) -> Result<(), RevlogError>,
468 ) -> Result<(), RevlogError>,
469 ) -> Result<(), RevlogError>,
469 ) -> Result<(), RevlogError>,
470 {
470 {
471 let entry = &self.get_entry(rev)?;
471 let entry = &self.get_entry(rev)?;
472 let raw_size = entry.uncompressed_len();
472 let raw_size = entry.uncompressed_len();
473 let mut mutex_guard = self
473 let mut mutex_guard = self
474 .last_revision_cache
474 .last_revision_cache
475 .lock()
475 .lock()
476 .expect("lock should not be held");
476 .expect("lock should not be held");
477 let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
477 let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
478 Some((*rev, data.deref().as_ref()))
478 Some((*rev, data.deref().as_ref()))
479 } else {
479 } else {
480 None
480 None
481 };
481 };
482 if let Some(cache) = &self.uncompressed_chunk_cache {
482 if let Some(cache) = &self.uncompressed_chunk_cache {
483 let cache = &mut cache.borrow_mut();
483 let cache = &mut cache.borrow_mut();
484 if let Some(size) = raw_size {
484 if let Some(size) = raw_size {
485 // Dynamically update the uncompressed_chunk_cache size to the
485 // Dynamically update the uncompressed_chunk_cache size to the
486 // largest revision we've seen in this revlog.
486 // largest revision we've seen in this revlog.
487 // Do it *before* restoration in case the current revision
487 // Do it *before* restoration in case the current revision
488 // is the largest.
488 // is the largest.
489 let factor = self
489 let factor = self
490 .data_config
490 .data_config
491 .uncompressed_cache_factor
491 .uncompressed_cache_factor
492 .expect("cache should not exist without factor");
492 .expect("cache should not exist without factor");
493 let candidate_size = (size as f64 * factor) as usize;
493 let candidate_size = (size as f64 * factor) as usize;
494 let limiter_mut = cache.limiter_mut();
494 let limiter_mut = cache.limiter_mut();
495 if candidate_size > limiter_mut.max_memory_usage() {
495 if candidate_size > limiter_mut.max_memory_usage() {
496 std::mem::swap(
496 std::mem::swap(
497 limiter_mut,
497 limiter_mut,
498 &mut ByMemoryUsage::new(candidate_size),
498 &mut ByMemoryUsage::new(candidate_size),
499 );
499 );
500 }
500 }
501 }
501 }
502 }
502 }
503 entry.rawdata(cached_rev, get_buffer)?;
503 entry.rawdata(cached_rev, get_buffer)?;
504 // drop cache to save memory, the caller is expected to update
504 // drop cache to save memory, the caller is expected to update
505 // the revision cache after validating the text
505 // the revision cache after validating the text
506 mutex_guard.take();
506 mutex_guard.take();
507 Ok(())
507 Ok(())
508 }
508 }
509
509
510 /// Only `pub` for `hg-cpython`.
510 /// Only `pub` for `hg-cpython`.
511 /// Obtain decompressed raw data for the specified revisions that are
511 /// Obtain decompressed raw data for the specified revisions that are
512 /// assumed to be in ascending order.
512 /// assumed to be in ascending order.
513 ///
513 ///
514 /// Returns a list with decompressed data for each requested revision.
514 /// Returns a list with decompressed data for each requested revision.
515 #[doc(hidden)]
515 #[doc(hidden)]
516 pub fn chunks(
516 pub fn chunks(
517 &self,
517 &self,
518 revs: Vec<Revision>,
518 revs: Vec<Revision>,
519 target_size: Option<u64>,
519 target_size: Option<u64>,
520 ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
520 ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
521 if revs.is_empty() {
521 if revs.is_empty() {
522 return Ok(vec![]);
522 return Ok(vec![]);
523 }
523 }
524 let mut fetched_revs = vec![];
524 let mut fetched_revs = vec![];
525 let mut chunks = Vec::with_capacity(revs.len());
525 let mut chunks = Vec::with_capacity(revs.len());
526
526
527 match self.uncompressed_chunk_cache.as_ref() {
527 match self.uncompressed_chunk_cache.as_ref() {
528 Some(cache) => {
528 Some(cache) => {
529 let mut cache = cache.borrow_mut();
529 let mut cache = cache.borrow_mut();
530 for rev in revs.iter() {
530 for rev in revs.iter() {
531 match cache.get(rev) {
531 match cache.get(rev) {
532 Some(hit) => chunks.push((*rev, hit.to_owned())),
532 Some(hit) => chunks.push((*rev, hit.to_owned())),
533 None => fetched_revs.push(*rev),
533 None => fetched_revs.push(*rev),
534 }
534 }
535 }
535 }
536 }
536 }
537 None => fetched_revs = revs,
537 None => fetched_revs = revs,
538 }
538 }
539
539
540 let already_cached = chunks.len();
540 let already_cached = chunks.len();
541
541
542 let sliced_chunks = if fetched_revs.is_empty() {
542 let sliced_chunks = if fetched_revs.is_empty() {
543 vec![]
543 vec![]
544 } else if !self.data_config.with_sparse_read || self.is_inline() {
544 } else if !self.data_config.with_sparse_read || self.is_inline() {
545 vec![fetched_revs]
545 vec![fetched_revs]
546 } else {
546 } else {
547 self.slice_chunk(&fetched_revs, target_size)?
547 self.slice_chunk(&fetched_revs, target_size)?
548 };
548 };
549
549
550 self.with_read(|| {
550 self.with_read(|| {
551 for revs_chunk in sliced_chunks {
551 for revs_chunk in sliced_chunks {
552 let first_rev = revs_chunk[0];
552 let first_rev = revs_chunk[0];
553 // Skip trailing revisions with empty diff
553 // Skip trailing revisions with empty diff
554 let last_rev_idx = revs_chunk
554 let last_rev_idx = revs_chunk
555 .iter()
555 .iter()
556 .rposition(|r| self.length(*r) != 0)
556 .rposition(|r| self.length(*r) != 0)
557 .unwrap_or(revs_chunk.len() - 1);
557 .unwrap_or(revs_chunk.len() - 1);
558
558
559 let last_rev = revs_chunk[last_rev_idx];
559 let last_rev = revs_chunk[last_rev_idx];
560
560
561 let (offset, data) =
561 let (offset, data) =
562 self.get_segment_for_revs(first_rev, last_rev)?;
562 self.get_segment_for_revs(first_rev, last_rev)?;
563
563
564 let revs_chunk = &revs_chunk[..=last_rev_idx];
564 let revs_chunk = &revs_chunk[..=last_rev_idx];
565
565
566 for rev in revs_chunk {
566 for rev in revs_chunk {
567 let chunk_start = self.start(*rev);
567 let chunk_start = self.start(*rev);
568 let chunk_length = self.length(*rev);
568 let chunk_length = self.length(*rev);
569 // TODO revlogv2 should check the compression mode
569 // TODO revlogv2 should check the compression mode
570 let bytes = &data[chunk_start - offset..][..chunk_length];
570 let bytes = &data[chunk_start - offset..][..chunk_length];
571 let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
571 let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
572 // If we're using `zstd`, we want to try a more
572 // If we're using `zstd`, we want to try a more
573 // specialized decompression
573 // specialized decompression
574 let entry = self.index.get_entry(*rev).unwrap();
574 let entry = self.index.get_entry(*rev).unwrap();
575 let is_delta = entry
575 let is_delta = entry
576 .base_revision_or_base_of_delta_chain()
576 .base_revision_or_base_of_delta_chain()
577 != (*rev).into();
577 != (*rev).into();
578 let uncompressed = uncompressed_zstd_data(
578 let uncompressed = uncompressed_zstd_data(
579 bytes,
579 bytes,
580 is_delta,
580 is_delta,
581 entry.uncompressed_len(),
581 entry.uncompressed_len(),
582 )?;
582 )?;
583 Cow::Owned(uncompressed)
583 Cow::Owned(uncompressed)
584 } else {
584 } else {
585 // Otherwise just fallback to generic decompression.
585 // Otherwise just fallback to generic decompression.
586 self.decompress(bytes)?
586 self.decompress(bytes)?
587 };
587 };
588
588
589 chunks.push((*rev, chunk.into()));
589 chunks.push((*rev, chunk.into()));
590 }
590 }
591 }
591 }
592 Ok(())
592 Ok(())
593 })?;
593 })?;
594
594
595 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
595 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
596 let mut cache = cache.borrow_mut();
596 let mut cache = cache.borrow_mut();
597 for (rev, chunk) in chunks.iter().skip(already_cached) {
597 for (rev, chunk) in chunks.iter().skip(already_cached) {
598 cache.insert(*rev, chunk.clone());
598 cache.insert(*rev, chunk.clone());
599 }
599 }
600 }
600 }
601 // Use stable sort here since it's *mostly* sorted
601 // Use stable sort here since it's *mostly* sorted
602 chunks.sort_by(|a, b| a.0.cmp(&b.0));
602 chunks.sort_by(|a, b| a.0.cmp(&b.0));
603 Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
603 Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
604 }
604 }
605
605
606 /// Slice revs to reduce the amount of unrelated data to be read from disk.
606 /// Slice revs to reduce the amount of unrelated data to be read from disk.
607 ///
607 ///
608 /// ``revs`` is sliced into groups that should be read in one time.
608 /// ``revs`` is sliced into groups that should be read in one time.
609 /// Assume that revs are sorted.
609 /// Assume that revs are sorted.
610 ///
610 ///
611 /// The initial chunk is sliced until the overall density
611 /// The initial chunk is sliced until the overall density
612 /// (payload/chunks-span ratio) is above
612 /// (payload/chunks-span ratio) is above
613 /// `revlog.data_config.sr_density_threshold`.
613 /// `revlog.data_config.sr_density_threshold`.
614 /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
614 /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
615 ///
615 ///
616 /// If `target_size` is set, no chunk larger than `target_size`
616 /// If `target_size` is set, no chunk larger than `target_size`
617 /// will be returned.
617 /// will be returned.
618 /// For consistency with other slicing choices, this limit won't go lower
618 /// For consistency with other slicing choices, this limit won't go lower
619 /// than `revlog.data_config.sr_min_gap_size`.
619 /// than `revlog.data_config.sr_min_gap_size`.
620 ///
620 ///
621 /// If individual revision chunks are larger than this limit, they will
621 /// If individual revision chunks are larger than this limit, they will
622 /// still be raised individually.
622 /// still be raised individually.
623 pub fn slice_chunk(
623 pub fn slice_chunk(
624 &self,
624 &self,
625 revs: &[Revision],
625 revs: &[Revision],
626 target_size: Option<u64>,
626 target_size: Option<u64>,
627 ) -> Result<Vec<Vec<Revision>>, RevlogError> {
627 ) -> Result<Vec<Vec<Revision>>, RevlogError> {
628 let target_size =
628 let target_size =
629 target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
629 target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
630
630
631 let target_density = self.data_config.sr_density_threshold;
631 let target_density = self.data_config.sr_density_threshold;
632 let min_gap_size = self.data_config.sr_min_gap_size as usize;
632 let min_gap_size = self.data_config.sr_min_gap_size as usize;
633 let to_density = self.index.slice_chunk_to_density(
633 let to_density = self.index.slice_chunk_to_density(
634 revs,
634 revs,
635 target_density,
635 target_density,
636 min_gap_size,
636 min_gap_size,
637 );
637 );
638
638
639 let mut sliced = vec![];
639 let mut sliced = vec![];
640
640
641 for chunk in to_density {
641 for chunk in to_density {
642 sliced.extend(
642 sliced.extend(
643 self.slice_chunk_to_size(&chunk, target_size)?
643 self.slice_chunk_to_size(&chunk, target_size)?
644 .into_iter()
644 .into_iter()
645 .map(ToOwned::to_owned),
645 .map(ToOwned::to_owned),
646 );
646 );
647 }
647 }
648
648
649 Ok(sliced)
649 Ok(sliced)
650 }
650 }
651
651
652 /// Slice revs to match the target size
652 /// Slice revs to match the target size
653 ///
653 ///
654 /// This is intended to be used on chunks that density slicing selected,
654 /// This is intended to be used on chunks that density slicing selected,
655 /// but that are still too large compared to the read guarantee of revlogs.
655 /// but that are still too large compared to the read guarantee of revlogs.
656 /// This might happen when the "minimal gap size" interrupted the slicing
656 /// This might happen when the "minimal gap size" interrupted the slicing
657 /// or when chains are built in a way that create large blocks next to
657 /// or when chains are built in a way that create large blocks next to
658 /// each other.
658 /// each other.
659 fn slice_chunk_to_size<'a>(
659 fn slice_chunk_to_size<'a>(
660 &self,
660 &self,
661 revs: &'a [Revision],
661 revs: &'a [Revision],
662 target_size: Option<u64>,
662 target_size: Option<u64>,
663 ) -> Result<Vec<&'a [Revision]>, RevlogError> {
663 ) -> Result<Vec<&'a [Revision]>, RevlogError> {
664 let mut start_data = self.start(revs[0]);
664 let mut start_data = self.start(revs[0]);
665 let end_data = self.end(revs[revs.len() - 1]);
665 let end_data = self.end(revs[revs.len() - 1]);
666 let full_span = end_data - start_data;
666 let full_span = end_data - start_data;
667
667
668 let nothing_to_do = target_size
668 let nothing_to_do = target_size
669 .map(|size| full_span <= size as usize)
669 .map(|size| full_span <= size as usize)
670 .unwrap_or(true);
670 .unwrap_or(true);
671
671
672 if nothing_to_do {
672 if nothing_to_do {
673 return Ok(vec![revs]);
673 return Ok(vec![revs]);
674 }
674 }
675 let target_size = target_size.expect("target_size is set") as usize;
675 let target_size = target_size.expect("target_size is set") as usize;
676
676
677 let mut start_rev_idx = 0;
677 let mut start_rev_idx = 0;
678 let mut end_rev_idx = 1;
678 let mut end_rev_idx = 1;
679 let mut chunks = vec![];
679 let mut chunks = vec![];
680
680
681 for (idx, rev) in revs.iter().enumerate().skip(1) {
681 for (idx, rev) in revs.iter().enumerate().skip(1) {
682 let span = self.end(*rev) - start_data;
682 let span = self.end(*rev) - start_data;
683 let is_snapshot = self.is_snapshot(*rev)?;
683 let is_snapshot = self.is_snapshot(*rev)?;
684 if span <= target_size && is_snapshot {
684 if span <= target_size && is_snapshot {
685 end_rev_idx = idx + 1;
685 end_rev_idx = idx + 1;
686 } else {
686 } else {
687 let chunk =
687 let chunk =
688 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
688 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
689 if !chunk.is_empty() {
689 if !chunk.is_empty() {
690 chunks.push(chunk);
690 chunks.push(chunk);
691 }
691 }
692 start_rev_idx = idx;
692 start_rev_idx = idx;
693 start_data = self.start(*rev);
693 start_data = self.start(*rev);
694 end_rev_idx = idx + 1;
694 end_rev_idx = idx + 1;
695 }
695 }
696 if !is_snapshot {
696 if !is_snapshot {
697 break;
697 break;
698 }
698 }
699 }
699 }
700
700
701 // For the others, we use binary slicing to quickly converge towards
701 // For the others, we use binary slicing to quickly converge towards
702 // valid chunks (otherwise, we might end up looking for the start/end
702 // valid chunks (otherwise, we might end up looking for the start/end
703 // of many revisions). This logic is not looking for the perfect
703 // of many revisions). This logic is not looking for the perfect
704 // slicing point, it quickly converges towards valid chunks.
704 // slicing point, it quickly converges towards valid chunks.
705 let number_of_items = revs.len();
705 let number_of_items = revs.len();
706
706
707 while (end_data - start_data) > target_size {
707 while (end_data - start_data) > target_size {
708 end_rev_idx = number_of_items;
708 end_rev_idx = number_of_items;
709 if number_of_items - start_rev_idx <= 1 {
709 if number_of_items - start_rev_idx <= 1 {
710 // Protect against individual chunks larger than the limit
710 // Protect against individual chunks larger than the limit
711 break;
711 break;
712 }
712 }
713 let mut local_end_data = self.end(revs[end_rev_idx - 1]);
713 let mut local_end_data = self.end(revs[end_rev_idx - 1]);
714 let mut span = local_end_data - start_data;
714 let mut span = local_end_data - start_data;
715 while span > target_size {
715 while span > target_size {
716 if end_rev_idx - start_rev_idx <= 1 {
716 if end_rev_idx - start_rev_idx <= 1 {
717 // Protect against individual chunks larger than the limit
717 // Protect against individual chunks larger than the limit
718 break;
718 break;
719 }
719 }
720 end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
720 end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
721 local_end_data = self.end(revs[end_rev_idx - 1]);
721 local_end_data = self.end(revs[end_rev_idx - 1]);
722 span = local_end_data - start_data;
722 span = local_end_data - start_data;
723 }
723 }
724 let chunk =
724 let chunk =
725 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
725 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
726 if !chunk.is_empty() {
726 if !chunk.is_empty() {
727 chunks.push(chunk);
727 chunks.push(chunk);
728 }
728 }
729 start_rev_idx = end_rev_idx;
729 start_rev_idx = end_rev_idx;
730 start_data = self.start(revs[start_rev_idx]);
730 start_data = self.start(revs[start_rev_idx]);
731 }
731 }
732
732
733 let chunk = self.trim_chunk(revs, start_rev_idx, None);
733 let chunk = self.trim_chunk(revs, start_rev_idx, None);
734 if !chunk.is_empty() {
734 if !chunk.is_empty() {
735 chunks.push(chunk);
735 chunks.push(chunk);
736 }
736 }
737
737
738 Ok(chunks)
738 Ok(chunks)
739 }
739 }
740
740
741 /// Returns `revs[startidx..endidx]` without empty trailing revs
741 /// Returns `revs[startidx..endidx]` without empty trailing revs
742 fn trim_chunk<'a>(
742 fn trim_chunk<'a>(
743 &self,
743 &self,
744 revs: &'a [Revision],
744 revs: &'a [Revision],
745 start_rev_idx: usize,
745 start_rev_idx: usize,
746 end_rev_idx: Option<usize>,
746 end_rev_idx: Option<usize>,
747 ) -> &'a [Revision] {
747 ) -> &'a [Revision] {
748 let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
748 let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
749
749
750 // If we have a non-empty delta candidate, there is nothing to trim
750 // If we have a non-empty delta candidate, there is nothing to trim
751 if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
751 if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
752 // Trim empty revs at the end, except the very first rev of a chain
752 // Trim empty revs at the end, except the very first rev of a chain
753 while end_rev_idx > 1
753 while end_rev_idx > 1
754 && end_rev_idx > start_rev_idx
754 && end_rev_idx > start_rev_idx
755 && self.length(revs[end_rev_idx - 1]) == 0
755 && self.length(revs[end_rev_idx - 1]) == 0
756 {
756 {
757 end_rev_idx -= 1
757 end_rev_idx -= 1
758 }
758 }
759 }
759 }
760
760
761 &revs[start_rev_idx..end_rev_idx]
761 &revs[start_rev_idx..end_rev_idx]
762 }
762 }
763
763
764 /// Check the hash of some given data against the recorded hash.
764 /// Check the hash of some given data against the recorded hash.
765 pub fn check_hash(
765 pub fn check_hash(
766 &self,
766 &self,
767 p1: Revision,
767 p1: Revision,
768 p2: Revision,
768 p2: Revision,
769 expected: &[u8],
769 expected: &[u8],
770 data: &[u8],
770 data: &[u8],
771 ) -> bool {
771 ) -> bool {
772 let e1 = self.index.get_entry(p1);
772 let e1 = self.index.get_entry(p1);
773 let h1 = match e1 {
773 let h1 = match e1 {
774 Some(ref entry) => entry.hash(),
774 Some(ref entry) => entry.hash(),
775 None => &NULL_NODE,
775 None => &NULL_NODE,
776 };
776 };
777 let e2 = self.index.get_entry(p2);
777 let e2 = self.index.get_entry(p2);
778 let h2 = match e2 {
778 let h2 = match e2 {
779 Some(ref entry) => entry.hash(),
779 Some(ref entry) => entry.hash(),
780 None => &NULL_NODE,
780 None => &NULL_NODE,
781 };
781 };
782
782
783 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
783 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
784 }
784 }
785
785
786 /// Returns whether we are currently in a [`Self::with_write`] context
786 /// Returns whether we are currently in a [`Self::with_write`] context
787 pub fn is_writing(&self) -> bool {
787 pub fn is_writing(&self) -> bool {
788 self.writing_handles.is_some()
788 self.writing_handles.is_some()
789 }
789 }
790
790
791 /// Open the revlog files for writing
791 /// Open the revlog files for writing
792 ///
792 ///
793 /// Adding content to a revlog should be done within this context.
793 /// Adding content to a revlog should be done within this context.
794 /// TODO try using `BufRead` and `BufWrite` and see if performance improves
794 /// TODO try using `BufRead` and `BufWrite` and see if performance improves
795 pub fn with_write<R>(
795 pub fn with_write<R>(
796 &mut self,
796 &mut self,
797 transaction: &mut impl Transaction,
797 transaction: &mut impl Transaction,
798 data_end: Option<usize>,
798 data_end: Option<usize>,
799 func: impl FnOnce() -> R,
799 func: impl FnOnce() -> R,
800 ) -> Result<R, HgError> {
800 ) -> Result<R, HgError> {
801 if self.is_writing() {
801 if self.is_writing() {
802 return Ok(func());
802 return Ok(func());
803 }
803 }
804 self.enter_writing_context(data_end, transaction)
804 self.enter_writing_context(data_end, transaction)
805 .map_err(|e| {
805 .inspect_err(|_| {
806 self.exit_writing_context();
806 self.exit_writing_context();
807 e
808 })?;
807 })?;
809 let res = func();
808 let res = func();
810 self.exit_writing_context();
809 self.exit_writing_context();
811 Ok(res)
810 Ok(res)
812 }
811 }
813
812
814 /// `pub` only for use in hg-cpython
813 /// `pub` only for use in hg-cpython
815 #[doc(hidden)]
814 #[doc(hidden)]
816 pub fn exit_writing_context(&mut self) {
815 pub fn exit_writing_context(&mut self) {
817 self.writing_handles.take();
816 self.writing_handles.take();
818 self.segment_file.writing_handle.take();
817 self.segment_file.writing_handle.take();
819 self.segment_file.reading_handle.take();
818 self.segment_file.reading_handle.take();
820 }
819 }
821
820
822 /// `pub` only for use in hg-cpython
821 /// `pub` only for use in hg-cpython
823 #[doc(hidden)]
822 #[doc(hidden)]
824 pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
823 pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
825 self.writing_handles.as_ref()
824 self.writing_handles.as_ref()
826 }
825 }
827
826
828 /// `pub` only for use in hg-cpython
827 /// `pub` only for use in hg-cpython
829 #[doc(hidden)]
828 #[doc(hidden)]
830 pub fn enter_writing_context(
829 pub fn enter_writing_context(
831 &mut self,
830 &mut self,
832 data_end: Option<usize>,
831 data_end: Option<usize>,
833 transaction: &mut impl Transaction,
832 transaction: &mut impl Transaction,
834 ) -> Result<(), HgError> {
833 ) -> Result<(), HgError> {
835 let data_size = if self.is_empty() {
834 let data_size = if self.is_empty() {
836 0
835 0
837 } else {
836 } else {
838 self.end(Revision((self.len() - 1) as BaseRevision))
837 self.end(Revision((self.len() - 1) as BaseRevision))
839 };
838 };
840 let data_handle = if !self.is_inline() {
839 let data_handle = if !self.is_inline() {
841 let data_handle = match self.vfs.open(&self.data_file) {
840 let data_handle = match self.vfs.open(&self.data_file) {
842 Ok(mut f) => {
841 Ok(mut f) => {
843 if let Some(end) = data_end {
842 if let Some(end) = data_end {
844 f.seek(SeekFrom::Start(end as u64))
843 f.seek(SeekFrom::Start(end as u64))
845 .when_reading_file(&self.data_file)?;
844 .when_reading_file(&self.data_file)?;
846 } else {
845 } else {
847 f.seek(SeekFrom::End(0))
846 f.seek(SeekFrom::End(0))
848 .when_reading_file(&self.data_file)?;
847 .when_reading_file(&self.data_file)?;
849 }
848 }
850 f
849 f
851 }
850 }
852 Err(e) => match e {
851 Err(e) => match e {
853 HgError::IoError { error, context } => {
852 HgError::IoError { error, context } => {
854 if error.kind() != ErrorKind::NotFound {
853 if error.kind() != ErrorKind::NotFound {
855 return Err(HgError::IoError { error, context });
854 return Err(HgError::IoError { error, context });
856 }
855 }
857 self.vfs.create(&self.data_file, true)?
856 self.vfs.create(&self.data_file, true)?
858 }
857 }
859 e => return Err(e),
858 e => return Err(e),
860 },
859 },
861 };
860 };
862 transaction.add(&self.data_file, data_size);
861 transaction.add(&self.data_file, data_size);
863 Some(FileHandle::from_file(
862 Some(FileHandle::from_file(
864 data_handle,
863 data_handle,
865 dyn_clone::clone_box(&*self.vfs),
864 dyn_clone::clone_box(&*self.vfs),
866 &self.data_file,
865 &self.data_file,
867 ))
866 ))
868 } else {
867 } else {
869 None
868 None
870 };
869 };
871 let index_size = self.len() * INDEX_ENTRY_SIZE;
870 let index_size = self.len() * INDEX_ENTRY_SIZE;
872 let index_handle = self.index_write_handle()?;
871 let index_handle = self.index_write_handle()?;
873 if self.is_inline() {
872 if self.is_inline() {
874 transaction.add(&self.index_file, data_size);
873 transaction.add(&self.index_file, data_size);
875 } else {
874 } else {
876 transaction.add(&self.index_file, index_size);
875 transaction.add(&self.index_file, index_size);
877 }
876 }
878 self.writing_handles = Some(WriteHandles {
877 self.writing_handles = Some(WriteHandles {
879 index_handle: index_handle.clone(),
878 index_handle: index_handle.clone(),
880 data_handle: data_handle.clone(),
879 data_handle: data_handle.clone(),
881 });
880 });
882 *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
881 *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
883 Some(index_handle)
882 Some(index_handle)
884 } else {
883 } else {
885 data_handle
884 data_handle
886 };
885 };
887 Ok(())
886 Ok(())
888 }
887 }
889
888
890 /// Get a write handle to the index, sought to the end of its data.
889 /// Get a write handle to the index, sought to the end of its data.
891 fn index_write_handle(&self) -> Result<FileHandle, HgError> {
890 fn index_write_handle(&self) -> Result<FileHandle, HgError> {
892 let res = if self.delayed_buffer.is_none() {
891 let res = if self.delayed_buffer.is_none() {
893 if self.data_config.check_ambig {
892 if self.data_config.check_ambig {
894 self.vfs.open_check_ambig(&self.index_file)
893 self.vfs.open_check_ambig(&self.index_file)
895 } else {
894 } else {
896 self.vfs.open(&self.index_file)
895 self.vfs.open(&self.index_file)
897 }
896 }
898 } else {
897 } else {
899 self.vfs.open(&self.index_file)
898 self.vfs.open(&self.index_file)
900 };
899 };
901 match res {
900 match res {
902 Ok(mut handle) => {
901 Ok(mut handle) => {
903 handle
902 handle
904 .seek(SeekFrom::End(0))
903 .seek(SeekFrom::End(0))
905 .when_reading_file(&self.index_file)?;
904 .when_reading_file(&self.index_file)?;
906 Ok(
905 Ok(
907 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
906 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
908 {
907 {
909 FileHandle::from_file_delayed(
908 FileHandle::from_file_delayed(
910 handle,
909 handle,
911 dyn_clone::clone_box(&*self.vfs),
910 dyn_clone::clone_box(&*self.vfs),
912 &self.index_file,
911 &self.index_file,
913 delayed_buffer.clone(),
912 delayed_buffer.clone(),
914 )?
913 )?
915 } else {
914 } else {
916 FileHandle::from_file(
915 FileHandle::from_file(
917 handle,
916 handle,
918 dyn_clone::clone_box(&*self.vfs),
917 dyn_clone::clone_box(&*self.vfs),
919 &self.index_file,
918 &self.index_file,
920 )
919 )
921 },
920 },
922 )
921 )
923 }
922 }
924 Err(e) => match e {
923 Err(e) => match e {
925 HgError::IoError { error, context } => {
924 HgError::IoError { error, context } => {
926 if error.kind() != ErrorKind::NotFound {
925 if error.kind() != ErrorKind::NotFound {
927 return Err(HgError::IoError { error, context });
926 return Err(HgError::IoError { error, context });
928 };
927 };
929 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
928 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
930 {
929 {
931 FileHandle::new_delayed(
930 FileHandle::new_delayed(
932 dyn_clone::clone_box(&*self.vfs),
931 dyn_clone::clone_box(&*self.vfs),
933 &self.index_file,
932 &self.index_file,
934 true,
933 true,
935 delayed_buffer.clone(),
934 delayed_buffer.clone(),
936 )
935 )
937 } else {
936 } else {
938 FileHandle::new(
937 FileHandle::new(
939 dyn_clone::clone_box(&*self.vfs),
938 dyn_clone::clone_box(&*self.vfs),
940 &self.index_file,
939 &self.index_file,
941 true,
940 true,
942 true,
941 true,
943 )
942 )
944 }
943 }
945 }
944 }
946 e => Err(e),
945 e => Err(e),
947 },
946 },
948 }
947 }
949 }
948 }
950
949
951 /// Split the data of an inline revlog into an index and a data file
950 /// Split the data of an inline revlog into an index and a data file
952 pub fn split_inline(
951 pub fn split_inline(
953 &mut self,
952 &mut self,
954 header: IndexHeader,
953 header: IndexHeader,
955 new_index_file_path: Option<PathBuf>,
954 new_index_file_path: Option<PathBuf>,
956 ) -> Result<PathBuf, RevlogError> {
955 ) -> Result<PathBuf, RevlogError> {
957 assert!(self.delayed_buffer.is_none());
956 assert!(self.delayed_buffer.is_none());
958 let existing_handles = self.writing_handles.is_some();
957 let existing_handles = self.writing_handles.is_some();
959 if let Some(handles) = &mut self.writing_handles {
958 if let Some(handles) = &mut self.writing_handles {
960 handles.index_handle.flush()?;
959 handles.index_handle.flush()?;
961 self.writing_handles.take();
960 self.writing_handles.take();
962 self.segment_file.writing_handle.take();
961 self.segment_file.writing_handle.take();
963 }
962 }
964 let mut new_data_file_handle =
963 let mut new_data_file_handle =
965 self.vfs.create(&self.data_file, true)?;
964 self.vfs.create(&self.data_file, true)?;
966 // Drop any potential data, possibly redundant with the VFS impl.
965 // Drop any potential data, possibly redundant with the VFS impl.
967 new_data_file_handle
966 new_data_file_handle
968 .set_len(0)
967 .set_len(0)
969 .when_writing_file(&self.data_file)?;
968 .when_writing_file(&self.data_file)?;
970
969
971 self.with_read(|| -> Result<(), RevlogError> {
970 self.with_read(|| -> Result<(), RevlogError> {
972 for r in 0..self.index.len() {
971 for r in 0..self.index.len() {
973 let rev = Revision(r as BaseRevision);
972 let rev = Revision(r as BaseRevision);
974 let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
973 let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
975 new_data_file_handle
974 new_data_file_handle
976 .write_all(&rev_segment)
975 .write_all(&rev_segment)
977 .when_writing_file(&self.data_file)?;
976 .when_writing_file(&self.data_file)?;
978 }
977 }
979 new_data_file_handle
978 new_data_file_handle
980 .flush()
979 .flush()
981 .when_writing_file(&self.data_file)?;
980 .when_writing_file(&self.data_file)?;
982 Ok(())
981 Ok(())
983 })?;
982 })?;
984
983
985 if let Some(index_path) = new_index_file_path {
984 if let Some(index_path) = new_index_file_path {
986 self.index_file = index_path
985 self.index_file = index_path
987 }
986 }
988
987
989 let mut new_index_handle = self.vfs.create(&self.index_file, true)?;
988 let mut new_index_handle = self.vfs.create(&self.index_file, true)?;
990 let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
989 let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
991 for r in 0..self.len() {
990 for r in 0..self.len() {
992 let rev = Revision(r as BaseRevision);
991 let rev = Revision(r as BaseRevision);
993 let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
992 let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
994 panic!(
993 panic!(
995 "entry {} should exist in {}",
994 "entry {} should exist in {}",
996 r,
995 r,
997 self.index_file.display()
996 self.index_file.display()
998 )
997 )
999 });
998 });
1000 if r == 0 {
999 if r == 0 {
1001 new_data.extend(header.header_bytes);
1000 new_data.extend(header.header_bytes);
1002 }
1001 }
1003 new_data.extend(entry);
1002 new_data.extend(entry);
1004 }
1003 }
1005 new_index_handle
1004 new_index_handle
1006 .write_all(&new_data)
1005 .write_all(&new_data)
1007 .when_writing_file(&self.index_file)?;
1006 .when_writing_file(&self.index_file)?;
1008 // Replace the index with a new one because the buffer contains inline
1007 // Replace the index with a new one because the buffer contains inline
1009 // data
1008 // data
1010 self.index = Index::new(Box::new(new_data), header)?;
1009 self.index = Index::new(Box::new(new_data), header)?;
1011 self.inline = false;
1010 self.inline = false;
1012
1011
1013 self.segment_file = RandomAccessFile::new(
1012 self.segment_file = RandomAccessFile::new(
1014 dyn_clone::clone_box(&*self.vfs),
1013 dyn_clone::clone_box(&*self.vfs),
1015 self.data_file.to_owned(),
1014 self.data_file.to_owned(),
1016 );
1015 );
1017 if existing_handles {
1016 if existing_handles {
1018 // Switched from inline to conventional, reopen the index
1017 // Switched from inline to conventional, reopen the index
1019 let new_data_handle = Some(FileHandle::from_file(
1018 let new_data_handle = Some(FileHandle::from_file(
1020 new_data_file_handle,
1019 new_data_file_handle,
1021 dyn_clone::clone_box(&*self.vfs),
1020 dyn_clone::clone_box(&*self.vfs),
1022 &self.data_file,
1021 &self.data_file,
1023 ));
1022 ));
1024 self.writing_handles = Some(WriteHandles {
1023 self.writing_handles = Some(WriteHandles {
1025 index_handle: self.index_write_handle()?,
1024 index_handle: self.index_write_handle()?,
1026 data_handle: new_data_handle.clone(),
1025 data_handle: new_data_handle.clone(),
1027 });
1026 });
1028 *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
1027 *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
1029 }
1028 }
1030
1029
1031 Ok(self.index_file.to_owned())
1030 Ok(self.index_file.to_owned())
1032 }
1031 }
1033
1032
1034 /// Write a new entry to this revlog.
1033 /// Write a new entry to this revlog.
1035 /// - `entry` is the index bytes
1034 /// - `entry` is the index bytes
1036 /// - `header_and_data` is the compression header and the revision data
1035 /// - `header_and_data` is the compression header and the revision data
1037 /// - `offset` is the position in the data file to write to
1036 /// - `offset` is the position in the data file to write to
1038 /// - `index_end` is the overwritten position in the index in revlog-v2,
1037 /// - `index_end` is the overwritten position in the index in revlog-v2,
1039 /// since the format may allow a rewrite of garbage data at the end.
1038 /// since the format may allow a rewrite of garbage data at the end.
1040 /// - `data_end` is the overwritten position in the data-file in revlog-v2,
1039 /// - `data_end` is the overwritten position in the data-file in revlog-v2,
1041 /// since the format may allow a rewrite of garbage data at the end.
1040 /// since the format may allow a rewrite of garbage data at the end.
1042 ///
1041 ///
1043 /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
1042 /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
1044 pub fn write_entry(
1043 pub fn write_entry(
1045 &mut self,
1044 &mut self,
1046 mut transaction: impl Transaction,
1045 mut transaction: impl Transaction,
1047 entry: &[u8],
1046 entry: &[u8],
1048 header_and_data: (&[u8], &[u8]),
1047 header_and_data: (&[u8], &[u8]),
1049 mut offset: usize,
1048 mut offset: usize,
1050 index_end: Option<u64>,
1049 index_end: Option<u64>,
1051 data_end: Option<u64>,
1050 data_end: Option<u64>,
1052 ) -> Result<(u64, Option<u64>), HgError> {
1051 ) -> Result<(u64, Option<u64>), HgError> {
1053 let current_revision = self.len() - 1;
1052 let current_revision = self.len() - 1;
1054 let canonical_index_file = self.canonical_index_file();
1053 let canonical_index_file = self.canonical_index_file();
1055
1054
1056 let is_inline = self.is_inline();
1055 let is_inline = self.is_inline();
1057 let handles = match &mut self.writing_handles {
1056 let handles = match &mut self.writing_handles {
1058 None => {
1057 None => {
1059 return Err(HgError::abort(
1058 return Err(HgError::abort(
1060 "adding revision outside of the `with_write` context",
1059 "adding revision outside of the `with_write` context",
1061 exit_codes::ABORT,
1060 exit_codes::ABORT,
1062 None,
1061 None,
1063 ));
1062 ));
1064 }
1063 }
1065 Some(handles) => handles,
1064 Some(handles) => handles,
1066 };
1065 };
1067 let index_handle = &mut handles.index_handle;
1066 let index_handle = &mut handles.index_handle;
1068 let data_handle = &mut handles.data_handle;
1067 let data_handle = &mut handles.data_handle;
1069 if let Some(end) = index_end {
1068 if let Some(end) = index_end {
1070 index_handle
1069 index_handle
1071 .seek(SeekFrom::Start(end))
1070 .seek(SeekFrom::Start(end))
1072 .when_reading_file(&self.index_file)?;
1071 .when_reading_file(&self.index_file)?;
1073 } else {
1072 } else {
1074 index_handle
1073 index_handle
1075 .seek(SeekFrom::End(0))
1074 .seek(SeekFrom::End(0))
1076 .when_reading_file(&self.index_file)?;
1075 .when_reading_file(&self.index_file)?;
1077 }
1076 }
1078 if let Some(data_handle) = data_handle {
1077 if let Some(data_handle) = data_handle {
1079 if let Some(end) = data_end {
1078 if let Some(end) = data_end {
1080 data_handle
1079 data_handle
1081 .seek(SeekFrom::Start(end))
1080 .seek(SeekFrom::Start(end))
1082 .when_reading_file(&self.data_file)?;
1081 .when_reading_file(&self.data_file)?;
1083 } else {
1082 } else {
1084 data_handle
1083 data_handle
1085 .seek(SeekFrom::End(0))
1084 .seek(SeekFrom::End(0))
1086 .when_reading_file(&self.data_file)?;
1085 .when_reading_file(&self.data_file)?;
1087 }
1086 }
1088 }
1087 }
1089 let (header, data) = header_and_data;
1088 let (header, data) = header_and_data;
1090
1089
1091 if !is_inline {
1090 if !is_inline {
1092 transaction.add(&self.data_file, offset);
1091 transaction.add(&self.data_file, offset);
1093 transaction
1092 transaction
1094 .add(&canonical_index_file, current_revision * entry.len());
1093 .add(&canonical_index_file, current_revision * entry.len());
1095 let data_handle = data_handle
1094 let data_handle = data_handle
1096 .as_mut()
1095 .as_mut()
1097 .expect("data handle should exist when not inline");
1096 .expect("data handle should exist when not inline");
1098 if !header.is_empty() {
1097 if !header.is_empty() {
1099 data_handle.write_all(header)?;
1098 data_handle.write_all(header)?;
1100 }
1099 }
1101 data_handle.write_all(data)?;
1100 data_handle.write_all(data)?;
1102 match &mut self.delayed_buffer {
1101 match &mut self.delayed_buffer {
1103 Some(buf) => {
1102 Some(buf) => {
1104 buf.lock()
1103 buf.lock()
1105 .expect("propagate the panic")
1104 .expect("propagate the panic")
1106 .buffer
1105 .buffer
1107 .write_all(entry)
1106 .write_all(entry)
1108 .expect("write to delay buffer should succeed");
1107 .expect("write to delay buffer should succeed");
1109 }
1108 }
1110 None => index_handle.write_all(entry)?,
1109 None => index_handle.write_all(entry)?,
1111 }
1110 }
1112 } else if self.delayed_buffer.is_some() {
1111 } else if self.delayed_buffer.is_some() {
1113 return Err(HgError::abort(
1112 return Err(HgError::abort(
1114 "invalid delayed write on inline revlog",
1113 "invalid delayed write on inline revlog",
1115 exit_codes::ABORT,
1114 exit_codes::ABORT,
1116 None,
1115 None,
1117 ));
1116 ));
1118 } else {
1117 } else {
1119 offset += current_revision * entry.len();
1118 offset += current_revision * entry.len();
1120 transaction.add(&canonical_index_file, offset);
1119 transaction.add(&canonical_index_file, offset);
1121 index_handle.write_all(entry)?;
1120 index_handle.write_all(entry)?;
1122 index_handle.write_all(header)?;
1121 index_handle.write_all(header)?;
1123 index_handle.write_all(data)?;
1122 index_handle.write_all(data)?;
1124 }
1123 }
1125 let data_position = match data_handle {
1124 let data_position = match data_handle {
1126 Some(h) => Some(h.position()?),
1125 Some(h) => Some(h.position()?),
1127 None => None,
1126 None => None,
1128 };
1127 };
1129 Ok((index_handle.position()?, data_position))
1128 Ok((index_handle.position()?, data_position))
1130 }
1129 }
1131
1130
1132 /// Return the real target index file and not the temporary when diverting
1131 /// Return the real target index file and not the temporary when diverting
1133 pub fn canonical_index_file(&self) -> PathBuf {
1132 pub fn canonical_index_file(&self) -> PathBuf {
1134 self.original_index_file
1133 self.original_index_file
1135 .as_ref()
1134 .as_ref()
1136 .map(ToOwned::to_owned)
1135 .map(ToOwned::to_owned)
1137 .unwrap_or_else(|| self.index_file.to_owned())
1136 .unwrap_or_else(|| self.index_file.to_owned())
1138 }
1137 }
1139
1138
1140 /// Return the path to the diverted index
1139 /// Return the path to the diverted index
1141 fn diverted_index(&self) -> PathBuf {
1140 fn diverted_index(&self) -> PathBuf {
1142 self.index_file.with_extension("i.a")
1141 self.index_file.with_extension("i.a")
1143 }
1142 }
1144
1143
1145 /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
1144 /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
1146 pub fn is_open(&self) -> bool {
1145 pub fn is_open(&self) -> bool {
1147 self.segment_file.is_open()
1146 self.segment_file.is_open()
1148 }
1147 }
1149
1148
1150 /// Set this revlog to delay its writes to a buffer
1149 /// Set this revlog to delay its writes to a buffer
1151 pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
1150 pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
1152 assert!(!self.is_open());
1151 assert!(!self.is_open());
1153 if self.is_inline() {
1152 if self.is_inline() {
1154 return Err(HgError::abort(
1153 return Err(HgError::abort(
1155 "revlog with delayed write should not be inline",
1154 "revlog with delayed write should not be inline",
1156 exit_codes::ABORT,
1155 exit_codes::ABORT,
1157 None,
1156 None,
1158 ));
1157 ));
1159 }
1158 }
1160 if self.delayed_buffer.is_some() || self.original_index_file.is_some()
1159 if self.delayed_buffer.is_some() || self.original_index_file.is_some()
1161 {
1160 {
1162 // Delay or divert already happening
1161 // Delay or divert already happening
1163 return Ok(None);
1162 return Ok(None);
1164 }
1163 }
1165 if self.is_empty() {
1164 if self.is_empty() {
1166 self.original_index_file = Some(self.index_file.to_owned());
1165 self.original_index_file = Some(self.index_file.to_owned());
1167 self.index_file = self.diverted_index();
1166 self.index_file = self.diverted_index();
1168 if self.vfs.exists(&self.index_file) {
1167 if self.vfs.exists(&self.index_file) {
1169 self.vfs.unlink(&self.index_file)?;
1168 self.vfs.unlink(&self.index_file)?;
1170 }
1169 }
1171 Ok(Some(self.index_file.to_owned()))
1170 Ok(Some(self.index_file.to_owned()))
1172 } else {
1171 } else {
1173 self.delayed_buffer =
1172 self.delayed_buffer =
1174 Some(Arc::new(Mutex::new(DelayedBuffer::default())));
1173 Some(Arc::new(Mutex::new(DelayedBuffer::default())));
1175 Ok(None)
1174 Ok(None)
1176 }
1175 }
1177 }
1176 }
1178
1177
1179 /// Write the pending data (in memory) if any to the diverted index file
1178 /// Write the pending data (in memory) if any to the diverted index file
1180 /// (on disk temporary file)
1179 /// (on disk temporary file)
1181 pub fn write_pending(
1180 pub fn write_pending(
1182 &mut self,
1181 &mut self,
1183 ) -> Result<(Option<PathBuf>, bool), HgError> {
1182 ) -> Result<(Option<PathBuf>, bool), HgError> {
1184 assert!(!self.is_open());
1183 assert!(!self.is_open());
1185 if self.is_inline() {
1184 if self.is_inline() {
1186 return Err(HgError::abort(
1185 return Err(HgError::abort(
1187 "revlog with delayed write should not be inline",
1186 "revlog with delayed write should not be inline",
1188 exit_codes::ABORT,
1187 exit_codes::ABORT,
1189 None,
1188 None,
1190 ));
1189 ));
1191 }
1190 }
1192 if self.original_index_file.is_some() {
1191 if self.original_index_file.is_some() {
1193 return Ok((None, true));
1192 return Ok((None, true));
1194 }
1193 }
1195 let mut any_pending = false;
1194 let mut any_pending = false;
1196 let pending_index_file = self.diverted_index();
1195 let pending_index_file = self.diverted_index();
1197 if self.vfs.exists(&pending_index_file) {
1196 if self.vfs.exists(&pending_index_file) {
1198 self.vfs.unlink(&pending_index_file)?;
1197 self.vfs.unlink(&pending_index_file)?;
1199 }
1198 }
1200 self.vfs.copy(&self.index_file, &pending_index_file)?;
1199 self.vfs.copy(&self.index_file, &pending_index_file)?;
1201 if let Some(delayed_buffer) = self.delayed_buffer.take() {
1200 if let Some(delayed_buffer) = self.delayed_buffer.take() {
1202 let mut index_file_handle = self.vfs.open(&pending_index_file)?;
1201 let mut index_file_handle = self.vfs.open(&pending_index_file)?;
1203 index_file_handle
1202 index_file_handle
1204 .seek(SeekFrom::End(0))
1203 .seek(SeekFrom::End(0))
1205 .when_writing_file(&pending_index_file)?;
1204 .when_writing_file(&pending_index_file)?;
1206 let delayed_data =
1205 let delayed_data =
1207 &delayed_buffer.lock().expect("propagate the panic").buffer;
1206 &delayed_buffer.lock().expect("propagate the panic").buffer;
1208 index_file_handle
1207 index_file_handle
1209 .write_all(delayed_data)
1208 .write_all(delayed_data)
1210 .when_writing_file(&pending_index_file)?;
1209 .when_writing_file(&pending_index_file)?;
1211 any_pending = true;
1210 any_pending = true;
1212 }
1211 }
1213 self.original_index_file = Some(self.index_file.to_owned());
1212 self.original_index_file = Some(self.index_file.to_owned());
1214 self.index_file = pending_index_file;
1213 self.index_file = pending_index_file;
1215 Ok((Some(self.index_file.to_owned()), any_pending))
1214 Ok((Some(self.index_file.to_owned()), any_pending))
1216 }
1215 }
1217
1216
1218 /// Overwrite the canonical file with the diverted file, or write out the
1217 /// Overwrite the canonical file with the diverted file, or write out the
1219 /// delayed buffer.
1218 /// delayed buffer.
1220 /// Returns an error if the revlog is neither diverted nor delayed.
1219 /// Returns an error if the revlog is neither diverted nor delayed.
1221 pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
1220 pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
1222 assert!(!self.is_open());
1221 assert!(!self.is_open());
1223 if self.is_inline() {
1222 if self.is_inline() {
1224 return Err(HgError::abort(
1223 return Err(HgError::abort(
1225 "revlog with delayed write should not be inline",
1224 "revlog with delayed write should not be inline",
1226 exit_codes::ABORT,
1225 exit_codes::ABORT,
1227 None,
1226 None,
1228 ));
1227 ));
1229 }
1228 }
1230 match (
1229 match (
1231 self.delayed_buffer.as_ref(),
1230 self.delayed_buffer.as_ref(),
1232 self.original_index_file.as_ref(),
1231 self.original_index_file.as_ref(),
1233 ) {
1232 ) {
1234 (None, None) => {
1233 (None, None) => {
1235 return Err(HgError::abort(
1234 return Err(HgError::abort(
1236 "neither delay nor divert found on this revlog",
1235 "neither delay nor divert found on this revlog",
1237 exit_codes::ABORT,
1236 exit_codes::ABORT,
1238 None,
1237 None,
1239 ));
1238 ));
1240 }
1239 }
1241 (Some(delay), None) => {
1240 (Some(delay), None) => {
1242 let mut index_file_handle = self.vfs.open(&self.index_file)?;
1241 let mut index_file_handle = self.vfs.open(&self.index_file)?;
1243 index_file_handle
1242 index_file_handle
1244 .seek(SeekFrom::End(0))
1243 .seek(SeekFrom::End(0))
1245 .when_writing_file(&self.index_file)?;
1244 .when_writing_file(&self.index_file)?;
1246 index_file_handle
1245 index_file_handle
1247 .write_all(
1246 .write_all(
1248 &delay.lock().expect("propagate the panic").buffer,
1247 &delay.lock().expect("propagate the panic").buffer,
1249 )
1248 )
1250 .when_writing_file(&self.index_file)?;
1249 .when_writing_file(&self.index_file)?;
1251 self.delayed_buffer = None;
1250 self.delayed_buffer = None;
1252 }
1251 }
1253 (None, Some(divert)) => {
1252 (None, Some(divert)) => {
1254 if self.vfs.exists(&self.index_file) {
1253 if self.vfs.exists(&self.index_file) {
1255 self.vfs.rename(&self.index_file, divert, true)?;
1254 self.vfs.rename(&self.index_file, divert, true)?;
1256 }
1255 }
1257 divert.clone_into(&mut self.index_file);
1256 divert.clone_into(&mut self.index_file);
1258 self.original_index_file = None;
1257 self.original_index_file = None;
1259 }
1258 }
1260 (Some(_), Some(_)) => unreachable!(
1259 (Some(_), Some(_)) => unreachable!(
1261 "{} is in an inconsistent state of both delay and divert",
1260 "{} is in an inconsistent state of both delay and divert",
1262 self.canonical_index_file().display(),
1261 self.canonical_index_file().display(),
1263 ),
1262 ),
1264 }
1263 }
1265 Ok(self.canonical_index_file())
1264 Ok(self.canonical_index_file())
1266 }
1265 }
1267
1266
1268 /// `pub` only for `hg-cpython`. This is made a different method than
1267 /// `pub` only for `hg-cpython`. This is made a different method than
1269 /// [`Revlog::index`] in case there is a different invariant that pops up
1268 /// [`Revlog::index`] in case there is a different invariant that pops up
1270 /// later.
1269 /// later.
1271 #[doc(hidden)]
1270 #[doc(hidden)]
1272 pub fn shared_index(&self) -> &Index {
1271 pub fn shared_index(&self) -> &Index {
1273 &self.index
1272 &self.index
1274 }
1273 }
1275 }
1274 }
1276
1275
1277 /// The use of a [`Refcell`] assumes that a given revlog will only
1276 /// The use of a [`Refcell`] assumes that a given revlog will only
1278 /// be accessed (read or write) by a single thread.
1277 /// be accessed (read or write) by a single thread.
1279 type UncompressedChunkCache =
1278 type UncompressedChunkCache =
1280 RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
1279 RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
1281
1280
1282 /// The node, revision and data for the last revision we've seen. Speeds up
1281 /// The node, revision and data for the last revision we've seen. Speeds up
1283 /// a lot of sequential operations of the revlog.
1282 /// a lot of sequential operations of the revlog.
1284 ///
1283 ///
1285 /// The data is not just bytes since it can come from Python and we want to
1284 /// The data is not just bytes since it can come from Python and we want to
1286 /// avoid copies if possible.
1285 /// avoid copies if possible.
1287 type SingleRevisionCache =
1286 type SingleRevisionCache =
1288 (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
1287 (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
1289
1288
1290 /// A way of progressively filling a buffer with revision data, then return
1289 /// A way of progressively filling a buffer with revision data, then return
1291 /// that buffer. Used to abstract away Python-allocated code to reduce copying
1290 /// that buffer. Used to abstract away Python-allocated code to reduce copying
1292 /// for performance reasons.
1291 /// for performance reasons.
1293 pub trait RevisionBuffer {
1292 pub trait RevisionBuffer {
1294 /// The owned buffer type to return
1293 /// The owned buffer type to return
1295 type Target;
1294 type Target;
1296 /// Copies the slice into the buffer
1295 /// Copies the slice into the buffer
1297 fn extend_from_slice(&mut self, slice: &[u8]);
1296 fn extend_from_slice(&mut self, slice: &[u8]);
1298 /// Returns the now finished owned buffer
1297 /// Returns the now finished owned buffer
1299 fn finish(self) -> Self::Target;
1298 fn finish(self) -> Self::Target;
1300 }
1299 }
1301
1300
1302 /// A simple vec-based buffer. This is uselessly complicated for the pure Rust
1301 /// A simple vec-based buffer. This is uselessly complicated for the pure Rust
1303 /// case, but it's the price to pay for Python compatibility.
1302 /// case, but it's the price to pay for Python compatibility.
1304 #[derive(Debug)]
1303 #[derive(Debug)]
1305 pub(super) struct CoreRevisionBuffer {
1304 pub(super) struct CoreRevisionBuffer {
1306 buf: Vec<u8>,
1305 buf: Vec<u8>,
1307 }
1306 }
1308
1307
1309 impl CoreRevisionBuffer {
1308 impl CoreRevisionBuffer {
1310 pub fn new() -> Self {
1309 pub fn new() -> Self {
1311 Self { buf: vec![] }
1310 Self { buf: vec![] }
1312 }
1311 }
1313
1312
1314 #[inline]
1313 #[inline]
1315 pub fn resize(&mut self, size: usize) {
1314 pub fn resize(&mut self, size: usize) {
1316 self.buf.reserve_exact(size - self.buf.capacity());
1315 self.buf.reserve_exact(size - self.buf.capacity());
1317 }
1316 }
1318 }
1317 }
1319
1318
1320 impl RevisionBuffer for CoreRevisionBuffer {
1319 impl RevisionBuffer for CoreRevisionBuffer {
1321 type Target = Vec<u8>;
1320 type Target = Vec<u8>;
1322
1321
1323 #[inline]
1322 #[inline]
1324 fn extend_from_slice(&mut self, slice: &[u8]) {
1323 fn extend_from_slice(&mut self, slice: &[u8]) {
1325 self.buf.extend_from_slice(slice);
1324 self.buf.extend_from_slice(slice);
1326 }
1325 }
1327
1326
1328 #[inline]
1327 #[inline]
1329 fn finish(self) -> Self::Target {
1328 fn finish(self) -> Self::Target {
1330 self.buf
1329 self.buf
1331 }
1330 }
1332 }
1331 }
1333
1332
1334 /// Calculate the hash of a revision given its data and its parents.
1333 /// Calculate the hash of a revision given its data and its parents.
1335 pub fn hash(
1334 pub fn hash(
1336 data: &[u8],
1335 data: &[u8],
1337 p1_hash: &[u8],
1336 p1_hash: &[u8],
1338 p2_hash: &[u8],
1337 p2_hash: &[u8],
1339 ) -> [u8; NODE_BYTES_LENGTH] {
1338 ) -> [u8; NODE_BYTES_LENGTH] {
1340 let mut hasher = Sha1::new();
1339 let mut hasher = Sha1::new();
1341 let (a, b) = (p1_hash, p2_hash);
1340 let (a, b) = (p1_hash, p2_hash);
1342 if a > b {
1341 if a > b {
1343 hasher.update(b);
1342 hasher.update(b);
1344 hasher.update(a);
1343 hasher.update(a);
1345 } else {
1344 } else {
1346 hasher.update(a);
1345 hasher.update(a);
1347 hasher.update(b);
1346 hasher.update(b);
1348 }
1347 }
1349 hasher.update(data);
1348 hasher.update(data);
1350 *hasher.finalize().as_ref()
1349 *hasher.finalize().as_ref()
1351 }
1350 }
General Comments 0
You need to be logged in to leave comments. Login now