Show More
@@ -1,847 +1,846 | |||||
1 | // ancestors.rs |
|
1 | // ancestors.rs | |
2 | // |
|
2 | // | |
3 | // Copyright 2018 Georges Racinet <gracinet@anybox.fr> |
|
3 | // Copyright 2018 Georges Racinet <gracinet@anybox.fr> | |
4 | // |
|
4 | // | |
5 | // This software may be used and distributed according to the terms of the |
|
5 | // This software may be used and distributed according to the terms of the | |
6 | // GNU General Public License version 2 or any later version. |
|
6 | // GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | //! Rust versions of generic DAG ancestors algorithms for Mercurial |
|
8 | //! Rust versions of generic DAG ancestors algorithms for Mercurial | |
9 |
|
9 | |||
10 | use super::{Graph, GraphError, Revision, NULL_REVISION}; |
|
10 | use super::{Graph, GraphError, Revision, NULL_REVISION}; | |
11 | use crate::dagops; |
|
11 | use crate::dagops; | |
12 | use std::cmp::max; |
|
12 | use std::cmp::max; | |
13 | use std::collections::{BinaryHeap, HashSet}; |
|
13 | use std::collections::{BinaryHeap, HashSet}; | |
14 |
|
14 | |||
15 | /// Iterator over the ancestors of a given list of revisions |
|
15 | /// Iterator over the ancestors of a given list of revisions | |
16 | /// This is a generic type, defined and implemented for any Graph, so that |
|
16 | /// This is a generic type, defined and implemented for any Graph, so that | |
17 | /// it's easy to |
|
17 | /// it's easy to | |
18 | /// |
|
18 | /// | |
19 | /// - unit test in pure Rust |
|
19 | /// - unit test in pure Rust | |
20 | /// - bind to main Mercurial code, potentially in several ways and have these |
|
20 | /// - bind to main Mercurial code, potentially in several ways and have these | |
21 | /// bindings evolve over time |
|
21 | /// bindings evolve over time | |
22 | pub struct AncestorsIterator<G: Graph> { |
|
22 | pub struct AncestorsIterator<G: Graph> { | |
23 | graph: G, |
|
23 | graph: G, | |
24 | visit: BinaryHeap<Revision>, |
|
24 | visit: BinaryHeap<Revision>, | |
25 | seen: HashSet<Revision>, |
|
25 | seen: HashSet<Revision>, | |
26 | stoprev: Revision, |
|
26 | stoprev: Revision, | |
27 | } |
|
27 | } | |
28 |
|
28 | |||
29 | pub struct MissingAncestors<G: Graph> { |
|
29 | pub struct MissingAncestors<G: Graph> { | |
30 | graph: G, |
|
30 | graph: G, | |
31 | bases: HashSet<Revision>, |
|
31 | bases: HashSet<Revision>, | |
32 | max_base: Revision, |
|
32 | max_base: Revision, | |
33 | } |
|
33 | } | |
34 |
|
34 | |||
35 | impl<G: Graph> AncestorsIterator<G> { |
|
35 | impl<G: Graph> AncestorsIterator<G> { | |
36 | /// Constructor. |
|
36 | /// Constructor. | |
37 | /// |
|
37 | /// | |
38 | /// if `inclusive` is true, then the init revisions are emitted in |
|
38 | /// if `inclusive` is true, then the init revisions are emitted in | |
39 | /// particular, otherwise iteration starts from their parents. |
|
39 | /// particular, otherwise iteration starts from their parents. | |
40 | pub fn new( |
|
40 | pub fn new( | |
41 | graph: G, |
|
41 | graph: G, | |
42 | initrevs: impl IntoIterator<Item = Revision>, |
|
42 | initrevs: impl IntoIterator<Item = Revision>, | |
43 | stoprev: Revision, |
|
43 | stoprev: Revision, | |
44 | inclusive: bool, |
|
44 | inclusive: bool, | |
45 | ) -> Result<Self, GraphError> { |
|
45 | ) -> Result<Self, GraphError> { | |
46 | let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev); |
|
46 | let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev); | |
47 | if inclusive { |
|
47 | if inclusive { | |
48 | let visit: BinaryHeap<Revision> = filtered_initrevs.collect(); |
|
48 | let visit: BinaryHeap<Revision> = filtered_initrevs.collect(); | |
49 | let seen = visit.iter().cloned().collect(); |
|
49 | let seen = visit.iter().cloned().collect(); | |
50 | return Ok(AncestorsIterator { |
|
50 | return Ok(AncestorsIterator { | |
51 | visit, |
|
51 | visit, | |
52 | seen, |
|
52 | seen, | |
53 | stoprev, |
|
53 | stoprev, | |
54 | graph, |
|
54 | graph, | |
55 | }); |
|
55 | }); | |
56 | } |
|
56 | } | |
57 | let mut this = AncestorsIterator { |
|
57 | let mut this = AncestorsIterator { | |
58 | visit: BinaryHeap::new(), |
|
58 | visit: BinaryHeap::new(), | |
59 | seen: HashSet::new(), |
|
59 | seen: HashSet::new(), | |
60 | stoprev, |
|
60 | stoprev, | |
61 | graph, |
|
61 | graph, | |
62 | }; |
|
62 | }; | |
63 | this.seen.insert(NULL_REVISION); |
|
63 | this.seen.insert(NULL_REVISION); | |
64 | for rev in filtered_initrevs { |
|
64 | for rev in filtered_initrevs { | |
65 | for parent in this.graph.parents(rev)?.iter().cloned() { |
|
65 | for parent in this.graph.parents(rev)?.iter().cloned() { | |
66 | this.conditionally_push_rev(parent); |
|
66 | this.conditionally_push_rev(parent); | |
67 | } |
|
67 | } | |
68 | } |
|
68 | } | |
69 | Ok(this) |
|
69 | Ok(this) | |
70 | } |
|
70 | } | |
71 |
|
71 | |||
72 | #[inline] |
|
72 | #[inline] | |
73 | fn conditionally_push_rev(&mut self, rev: Revision) { |
|
73 | fn conditionally_push_rev(&mut self, rev: Revision) { | |
74 | if self.stoprev <= rev && self.seen.insert(rev) { |
|
74 | if self.stoprev <= rev && self.seen.insert(rev) { | |
75 | self.visit.push(rev); |
|
75 | self.visit.push(rev); | |
76 | } |
|
76 | } | |
77 | } |
|
77 | } | |
78 |
|
78 | |||
79 | /// Consumes partially the iterator to tell if the given target |
|
79 | /// Consumes partially the iterator to tell if the given target | |
80 | /// revision |
|
80 | /// revision | |
81 | /// is in the ancestors it emits. |
|
81 | /// is in the ancestors it emits. | |
82 | /// This is meant for iterators actually dedicated to that kind of |
|
82 | /// This is meant for iterators actually dedicated to that kind of | |
83 | /// purpose |
|
83 | /// purpose | |
84 | pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> { |
|
84 | pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> { | |
85 | if self.seen.contains(&target) && target != NULL_REVISION { |
|
85 | if self.seen.contains(&target) && target != NULL_REVISION { | |
86 | return Ok(true); |
|
86 | return Ok(true); | |
87 | } |
|
87 | } | |
88 | for item in self { |
|
88 | for item in self { | |
89 | let rev = item?; |
|
89 | let rev = item?; | |
90 | if rev == target { |
|
90 | if rev == target { | |
91 | return Ok(true); |
|
91 | return Ok(true); | |
92 | } |
|
92 | } | |
93 | if rev < target { |
|
93 | if rev < target { | |
94 | return Ok(false); |
|
94 | return Ok(false); | |
95 | } |
|
95 | } | |
96 | } |
|
96 | } | |
97 | Ok(false) |
|
97 | Ok(false) | |
98 | } |
|
98 | } | |
99 |
|
99 | |||
100 | pub fn peek(&self) -> Option<Revision> { |
|
100 | pub fn peek(&self) -> Option<Revision> { | |
101 | self.visit.peek().cloned() |
|
101 | self.visit.peek().cloned() | |
102 | } |
|
102 | } | |
103 |
|
103 | |||
104 | /// Tell if the iterator is about an empty set |
|
104 | /// Tell if the iterator is about an empty set | |
105 | /// |
|
105 | /// | |
106 | /// The result does not depend whether the iterator has been consumed |
|
106 | /// The result does not depend whether the iterator has been consumed | |
107 | /// or not. |
|
107 | /// or not. | |
108 | /// This is mostly meant for iterators backing a lazy ancestors set |
|
108 | /// This is mostly meant for iterators backing a lazy ancestors set | |
109 | pub fn is_empty(&self) -> bool { |
|
109 | pub fn is_empty(&self) -> bool { | |
110 | if self.visit.len() > 0 { |
|
110 | if self.visit.len() > 0 { | |
111 | return false; |
|
111 | return false; | |
112 | } |
|
112 | } | |
113 | if self.seen.len() > 1 { |
|
113 | if self.seen.len() > 1 { | |
114 | return false; |
|
114 | return false; | |
115 | } |
|
115 | } | |
116 | // at this point, the seen set is at most a singleton. |
|
116 | // at this point, the seen set is at most a singleton. | |
117 | // If not `self.inclusive`, it's still possible that it has only |
|
117 | // If not `self.inclusive`, it's still possible that it has only | |
118 | // the null revision |
|
118 | // the null revision | |
119 | self.seen.is_empty() || self.seen.contains(&NULL_REVISION) |
|
119 | self.seen.is_empty() || self.seen.contains(&NULL_REVISION) | |
120 | } |
|
120 | } | |
121 | } |
|
121 | } | |
122 |
|
122 | |||
123 | /// Main implementation for the iterator |
|
123 | /// Main implementation for the iterator | |
124 | /// |
|
124 | /// | |
125 | /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py` |
|
125 | /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py` | |
126 | /// with a few non crucial differences: |
|
126 | /// with a few non crucial differences: | |
127 | /// |
|
127 | /// | |
128 | /// - there's no filtering of invalid parent revisions. Actually, it should be |
|
128 | /// - there's no filtering of invalid parent revisions. Actually, it should be | |
129 | /// consistent and more efficient to filter them from the end caller. |
|
129 | /// consistent and more efficient to filter them from the end caller. | |
130 | /// - we don't have the optimization for adjacent revisions (i.e., the case |
|
130 | /// - we don't have the optimization for adjacent revisions (i.e., the case | |
131 | /// where `p1 == rev - 1`), because it amounts to update the first element of |
|
131 | /// where `p1 == rev - 1`), because it amounts to update the first element of | |
132 | /// the heap without sifting, which Rust's BinaryHeap doesn't let us do. |
|
132 | /// the heap without sifting, which Rust's BinaryHeap doesn't let us do. | |
133 | /// - we save a few pushes by comparing with `stoprev` before pushing |
|
133 | /// - we save a few pushes by comparing with `stoprev` before pushing | |
134 | impl<G: Graph> Iterator for AncestorsIterator<G> { |
|
134 | impl<G: Graph> Iterator for AncestorsIterator<G> { | |
135 | type Item = Result<Revision, GraphError>; |
|
135 | type Item = Result<Revision, GraphError>; | |
136 |
|
136 | |||
137 | fn next(&mut self) -> Option<Self::Item> { |
|
137 | fn next(&mut self) -> Option<Self::Item> { | |
138 | let current = match self.visit.peek() { |
|
138 | let current = match self.visit.peek() { | |
139 | None => { |
|
139 | None => { | |
140 | return None; |
|
140 | return None; | |
141 | } |
|
141 | } | |
142 | Some(c) => *c, |
|
142 | Some(c) => *c, | |
143 | }; |
|
143 | }; | |
144 | let [p1, p2] = match self.graph.parents(current) { |
|
144 | let [p1, p2] = match self.graph.parents(current) { | |
145 | Ok(ps) => ps, |
|
145 | Ok(ps) => ps, | |
146 | Err(e) => return Some(Err(e)), |
|
146 | Err(e) => return Some(Err(e)), | |
147 | }; |
|
147 | }; | |
148 | if p1 < self.stoprev || !self.seen.insert(p1) { |
|
148 | if p1 < self.stoprev || !self.seen.insert(p1) { | |
149 | self.visit.pop(); |
|
149 | self.visit.pop(); | |
150 | } else { |
|
150 | } else { | |
151 | *(self.visit.peek_mut().unwrap()) = p1; |
|
151 | *(self.visit.peek_mut().unwrap()) = p1; | |
152 | }; |
|
152 | }; | |
153 |
|
153 | |||
154 | self.conditionally_push_rev(p2); |
|
154 | self.conditionally_push_rev(p2); | |
155 | Some(Ok(current)) |
|
155 | Some(Ok(current)) | |
156 | } |
|
156 | } | |
157 | } |
|
157 | } | |
158 |
|
158 | |||
159 | impl<G: Graph> MissingAncestors<G> { |
|
159 | impl<G: Graph> MissingAncestors<G> { | |
160 | pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self { |
|
160 | pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self { | |
161 | let mut created = MissingAncestors { |
|
161 | let mut created = MissingAncestors { | |
162 | graph, |
|
162 | graph, | |
163 | bases: HashSet::new(), |
|
163 | bases: HashSet::new(), | |
164 | max_base: NULL_REVISION, |
|
164 | max_base: NULL_REVISION, | |
165 | }; |
|
165 | }; | |
166 | created.add_bases(bases); |
|
166 | created.add_bases(bases); | |
167 | created |
|
167 | created | |
168 | } |
|
168 | } | |
169 |
|
169 | |||
170 | pub fn has_bases(&self) -> bool { |
|
170 | pub fn has_bases(&self) -> bool { | |
171 | !self.bases.is_empty() |
|
171 | !self.bases.is_empty() | |
172 | } |
|
172 | } | |
173 |
|
173 | |||
174 | /// Return a reference to current bases. |
|
174 | /// Return a reference to current bases. | |
175 | /// |
|
175 | /// | |
176 | /// This is useful in unit tests, but also setdiscovery.py does |
|
176 | /// This is useful in unit tests, but also setdiscovery.py does | |
177 | /// read the bases attribute of a ancestor.missingancestors instance. |
|
177 | /// read the bases attribute of a ancestor.missingancestors instance. | |
178 | pub fn get_bases(&self) -> &HashSet<Revision> { |
|
178 | pub fn get_bases(&self) -> &HashSet<Revision> { | |
179 | &self.bases |
|
179 | &self.bases | |
180 | } |
|
180 | } | |
181 |
|
181 | |||
182 | /// Computes the relative heads of current bases. |
|
182 | /// Computes the relative heads of current bases. | |
183 | /// |
|
183 | /// | |
184 | /// The object is still usable after this. |
|
184 | /// The object is still usable after this. | |
185 | pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> { |
|
185 | pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> { | |
186 | dagops::heads(&self.graph, self.bases.iter()) |
|
186 | dagops::heads(&self.graph, self.bases.iter()) | |
187 | } |
|
187 | } | |
188 |
|
188 | |||
189 | /// Consumes the object and returns the relative heads of its bases. |
|
189 | /// Consumes the object and returns the relative heads of its bases. | |
190 | pub fn into_bases_heads( |
|
190 | pub fn into_bases_heads( | |
191 | mut self, |
|
191 | mut self, | |
192 | ) -> Result<HashSet<Revision>, GraphError> { |
|
192 | ) -> Result<HashSet<Revision>, GraphError> { | |
193 | dagops::retain_heads(&self.graph, &mut self.bases)?; |
|
193 | dagops::retain_heads(&self.graph, &mut self.bases)?; | |
194 | Ok(self.bases) |
|
194 | Ok(self.bases) | |
195 | } |
|
195 | } | |
196 |
|
196 | |||
197 | /// Add some revisions to `self.bases` |
|
197 | /// Add some revisions to `self.bases` | |
198 | /// |
|
198 | /// | |
199 | /// Takes care of keeping `self.max_base` up to date. |
|
199 | /// Takes care of keeping `self.max_base` up to date. | |
200 | pub fn add_bases( |
|
200 | pub fn add_bases( | |
201 | &mut self, |
|
201 | &mut self, | |
202 | new_bases: impl IntoIterator<Item = Revision>, |
|
202 | new_bases: impl IntoIterator<Item = Revision>, | |
203 | ) { |
|
203 | ) { | |
204 | let mut max_base = self.max_base; |
|
204 | let mut max_base = self.max_base; | |
205 | self.bases.extend( |
|
205 | self.bases.extend( | |
206 | new_bases |
|
206 | new_bases | |
207 | .into_iter() |
|
207 | .into_iter() | |
208 | .filter(|&rev| rev != NULL_REVISION) |
|
208 | .filter(|&rev| rev != NULL_REVISION) | |
209 |
. |
|
209 | .inspect(|&r| { | |
210 | if r > max_base { |
|
210 | if r > max_base { | |
211 | max_base = r; |
|
211 | max_base = r; | |
212 | } |
|
212 | } | |
213 | r |
|
|||
214 | }), |
|
213 | }), | |
215 | ); |
|
214 | ); | |
216 | self.max_base = max_base; |
|
215 | self.max_base = max_base; | |
217 | } |
|
216 | } | |
218 |
|
217 | |||
219 | /// Remove all ancestors of self.bases from the revs set (in place) |
|
218 | /// Remove all ancestors of self.bases from the revs set (in place) | |
220 | pub fn remove_ancestors_from( |
|
219 | pub fn remove_ancestors_from( | |
221 | &mut self, |
|
220 | &mut self, | |
222 | revs: &mut HashSet<Revision>, |
|
221 | revs: &mut HashSet<Revision>, | |
223 | ) -> Result<(), GraphError> { |
|
222 | ) -> Result<(), GraphError> { | |
224 | revs.retain(|r| !self.bases.contains(r)); |
|
223 | revs.retain(|r| !self.bases.contains(r)); | |
225 | // the null revision is always an ancestor. Logically speaking |
|
224 | // the null revision is always an ancestor. Logically speaking | |
226 | // it's debatable in case bases is empty, but the Python |
|
225 | // it's debatable in case bases is empty, but the Python | |
227 | // implementation always adds NULL_REVISION to bases, making it |
|
226 | // implementation always adds NULL_REVISION to bases, making it | |
228 | // unconditionnally true. |
|
227 | // unconditionnally true. | |
229 | revs.remove(&NULL_REVISION); |
|
228 | revs.remove(&NULL_REVISION); | |
230 | if revs.is_empty() { |
|
229 | if revs.is_empty() { | |
231 | return Ok(()); |
|
230 | return Ok(()); | |
232 | } |
|
231 | } | |
233 | // anything in revs > start is definitely not an ancestor of bases |
|
232 | // anything in revs > start is definitely not an ancestor of bases | |
234 | // revs <= start need to be investigated |
|
233 | // revs <= start need to be investigated | |
235 | if self.max_base == NULL_REVISION { |
|
234 | if self.max_base == NULL_REVISION { | |
236 | return Ok(()); |
|
235 | return Ok(()); | |
237 | } |
|
236 | } | |
238 |
|
237 | |||
239 | // whatever happens, we'll keep at least keepcount of them |
|
238 | // whatever happens, we'll keep at least keepcount of them | |
240 | // knowing this gives us a earlier stop condition than |
|
239 | // knowing this gives us a earlier stop condition than | |
241 | // going all the way to the root |
|
240 | // going all the way to the root | |
242 | let keepcount = revs.iter().filter(|r| **r > self.max_base).count(); |
|
241 | let keepcount = revs.iter().filter(|r| **r > self.max_base).count(); | |
243 |
|
242 | |||
244 | let mut curr = self.max_base; |
|
243 | let mut curr = self.max_base; | |
245 | while curr != NULL_REVISION && revs.len() > keepcount { |
|
244 | while curr != NULL_REVISION && revs.len() > keepcount { | |
246 | if self.bases.contains(&curr) { |
|
245 | if self.bases.contains(&curr) { | |
247 | revs.remove(&curr); |
|
246 | revs.remove(&curr); | |
248 | self.add_parents(curr)?; |
|
247 | self.add_parents(curr)?; | |
249 | } |
|
248 | } | |
250 | // We know this revision is safe because we've checked the bounds |
|
249 | // We know this revision is safe because we've checked the bounds | |
251 | // before. |
|
250 | // before. | |
252 | curr = Revision(curr.0 - 1); |
|
251 | curr = Revision(curr.0 - 1); | |
253 | } |
|
252 | } | |
254 | Ok(()) |
|
253 | Ok(()) | |
255 | } |
|
254 | } | |
256 |
|
255 | |||
257 | /// Add the parents of `rev` to `self.bases` |
|
256 | /// Add the parents of `rev` to `self.bases` | |
258 | /// |
|
257 | /// | |
259 | /// This has no effect on `self.max_base` |
|
258 | /// This has no effect on `self.max_base` | |
260 | #[inline] |
|
259 | #[inline] | |
261 | fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> { |
|
260 | fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> { | |
262 | if rev == NULL_REVISION { |
|
261 | if rev == NULL_REVISION { | |
263 | return Ok(()); |
|
262 | return Ok(()); | |
264 | } |
|
263 | } | |
265 | for p in self.graph.parents(rev)?.iter().cloned() { |
|
264 | for p in self.graph.parents(rev)?.iter().cloned() { | |
266 | // No need to bother the set with inserting NULL_REVISION over and |
|
265 | // No need to bother the set with inserting NULL_REVISION over and | |
267 | // over |
|
266 | // over | |
268 | if p != NULL_REVISION { |
|
267 | if p != NULL_REVISION { | |
269 | self.bases.insert(p); |
|
268 | self.bases.insert(p); | |
270 | } |
|
269 | } | |
271 | } |
|
270 | } | |
272 | Ok(()) |
|
271 | Ok(()) | |
273 | } |
|
272 | } | |
274 |
|
273 | |||
275 | /// Return all the ancestors of revs that are not ancestors of self.bases |
|
274 | /// Return all the ancestors of revs that are not ancestors of self.bases | |
276 | /// |
|
275 | /// | |
277 | /// This may include elements from revs. |
|
276 | /// This may include elements from revs. | |
278 | /// |
|
277 | /// | |
279 | /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in |
|
278 | /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in | |
280 | /// revision number order, which is a topological order. |
|
279 | /// revision number order, which is a topological order. | |
281 | pub fn missing_ancestors( |
|
280 | pub fn missing_ancestors( | |
282 | &mut self, |
|
281 | &mut self, | |
283 | revs: impl IntoIterator<Item = Revision>, |
|
282 | revs: impl IntoIterator<Item = Revision>, | |
284 | ) -> Result<Vec<Revision>, GraphError> { |
|
283 | ) -> Result<Vec<Revision>, GraphError> { | |
285 | // just for convenience and comparison with Python version |
|
284 | // just for convenience and comparison with Python version | |
286 | let bases_visit = &mut self.bases; |
|
285 | let bases_visit = &mut self.bases; | |
287 | let mut revs: HashSet<Revision> = revs |
|
286 | let mut revs: HashSet<Revision> = revs | |
288 | .into_iter() |
|
287 | .into_iter() | |
289 | .filter(|r| !bases_visit.contains(r)) |
|
288 | .filter(|r| !bases_visit.contains(r)) | |
290 | .collect(); |
|
289 | .collect(); | |
291 | let revs_visit = &mut revs; |
|
290 | let revs_visit = &mut revs; | |
292 | let mut both_visit: HashSet<Revision> = |
|
291 | let mut both_visit: HashSet<Revision> = | |
293 | revs_visit.intersection(bases_visit).cloned().collect(); |
|
292 | revs_visit.intersection(bases_visit).cloned().collect(); | |
294 | if revs_visit.is_empty() { |
|
293 | if revs_visit.is_empty() { | |
295 | return Ok(Vec::new()); |
|
294 | return Ok(Vec::new()); | |
296 | } |
|
295 | } | |
297 | let max_revs = revs_visit.iter().cloned().max().unwrap(); |
|
296 | let max_revs = revs_visit.iter().cloned().max().unwrap(); | |
298 | let start = max(self.max_base, max_revs); |
|
297 | let start = max(self.max_base, max_revs); | |
299 |
|
298 | |||
300 | // TODO heuristics for with_capacity()? |
|
299 | // TODO heuristics for with_capacity()? | |
301 | let mut missing: Vec<Revision> = Vec::new(); |
|
300 | let mut missing: Vec<Revision> = Vec::new(); | |
302 | for curr in (0..=start.0).rev() { |
|
301 | for curr in (0..=start.0).rev() { | |
303 | if revs_visit.is_empty() { |
|
302 | if revs_visit.is_empty() { | |
304 | break; |
|
303 | break; | |
305 | } |
|
304 | } | |
306 | if both_visit.remove(&Revision(curr)) { |
|
305 | if both_visit.remove(&Revision(curr)) { | |
307 | // curr's parents might have made it into revs_visit through |
|
306 | // curr's parents might have made it into revs_visit through | |
308 | // another path |
|
307 | // another path | |
309 | for p in self.graph.parents(Revision(curr))?.iter().cloned() { |
|
308 | for p in self.graph.parents(Revision(curr))?.iter().cloned() { | |
310 | if p == NULL_REVISION { |
|
309 | if p == NULL_REVISION { | |
311 | continue; |
|
310 | continue; | |
312 | } |
|
311 | } | |
313 | revs_visit.remove(&p); |
|
312 | revs_visit.remove(&p); | |
314 | bases_visit.insert(p); |
|
313 | bases_visit.insert(p); | |
315 | both_visit.insert(p); |
|
314 | both_visit.insert(p); | |
316 | } |
|
315 | } | |
317 | } else if revs_visit.remove(&Revision(curr)) { |
|
316 | } else if revs_visit.remove(&Revision(curr)) { | |
318 | missing.push(Revision(curr)); |
|
317 | missing.push(Revision(curr)); | |
319 | for p in self.graph.parents(Revision(curr))?.iter().cloned() { |
|
318 | for p in self.graph.parents(Revision(curr))?.iter().cloned() { | |
320 | if p == NULL_REVISION { |
|
319 | if p == NULL_REVISION { | |
321 | continue; |
|
320 | continue; | |
322 | } |
|
321 | } | |
323 | if bases_visit.contains(&p) { |
|
322 | if bases_visit.contains(&p) { | |
324 | // p is already known to be an ancestor of revs_visit |
|
323 | // p is already known to be an ancestor of revs_visit | |
325 | revs_visit.remove(&p); |
|
324 | revs_visit.remove(&p); | |
326 | both_visit.insert(p); |
|
325 | both_visit.insert(p); | |
327 | } else if both_visit.contains(&p) { |
|
326 | } else if both_visit.contains(&p) { | |
328 | // p should have been in bases_visit |
|
327 | // p should have been in bases_visit | |
329 | revs_visit.remove(&p); |
|
328 | revs_visit.remove(&p); | |
330 | bases_visit.insert(p); |
|
329 | bases_visit.insert(p); | |
331 | } else { |
|
330 | } else { | |
332 | // visit later |
|
331 | // visit later | |
333 | revs_visit.insert(p); |
|
332 | revs_visit.insert(p); | |
334 | } |
|
333 | } | |
335 | } |
|
334 | } | |
336 | } else if bases_visit.contains(&Revision(curr)) { |
|
335 | } else if bases_visit.contains(&Revision(curr)) { | |
337 | for p in self.graph.parents(Revision(curr))?.iter().cloned() { |
|
336 | for p in self.graph.parents(Revision(curr))?.iter().cloned() { | |
338 | if p == NULL_REVISION { |
|
337 | if p == NULL_REVISION { | |
339 | continue; |
|
338 | continue; | |
340 | } |
|
339 | } | |
341 | if revs_visit.remove(&p) || both_visit.contains(&p) { |
|
340 | if revs_visit.remove(&p) || both_visit.contains(&p) { | |
342 | // p is an ancestor of bases_visit, and is implicitly |
|
341 | // p is an ancestor of bases_visit, and is implicitly | |
343 | // in revs_visit, which means p is ::revs & ::bases. |
|
342 | // in revs_visit, which means p is ::revs & ::bases. | |
344 | bases_visit.insert(p); |
|
343 | bases_visit.insert(p); | |
345 | both_visit.insert(p); |
|
344 | both_visit.insert(p); | |
346 | } else { |
|
345 | } else { | |
347 | bases_visit.insert(p); |
|
346 | bases_visit.insert(p); | |
348 | } |
|
347 | } | |
349 | } |
|
348 | } | |
350 | } |
|
349 | } | |
351 | } |
|
350 | } | |
352 | missing.reverse(); |
|
351 | missing.reverse(); | |
353 | Ok(missing) |
|
352 | Ok(missing) | |
354 | } |
|
353 | } | |
355 | } |
|
354 | } | |
356 |
|
355 | |||
357 | #[cfg(test)] |
|
356 | #[cfg(test)] | |
358 | mod tests { |
|
357 | mod tests { | |
359 |
|
358 | |||
360 | use super::*; |
|
359 | use super::*; | |
361 | use crate::{ |
|
360 | use crate::{ | |
362 | testing::{SampleGraph, VecGraph}, |
|
361 | testing::{SampleGraph, VecGraph}, | |
363 | BaseRevision, |
|
362 | BaseRevision, | |
364 | }; |
|
363 | }; | |
365 |
|
364 | |||
366 | impl From<BaseRevision> for Revision { |
|
365 | impl From<BaseRevision> for Revision { | |
367 | fn from(value: BaseRevision) -> Self { |
|
366 | fn from(value: BaseRevision) -> Self { | |
368 | if !cfg!(test) { |
|
367 | if !cfg!(test) { | |
369 | panic!("should only be used in tests") |
|
368 | panic!("should only be used in tests") | |
370 | } |
|
369 | } | |
371 | Revision(value) |
|
370 | Revision(value) | |
372 | } |
|
371 | } | |
373 | } |
|
372 | } | |
374 |
|
373 | |||
375 | impl PartialEq<BaseRevision> for Revision { |
|
374 | impl PartialEq<BaseRevision> for Revision { | |
376 | fn eq(&self, other: &BaseRevision) -> bool { |
|
375 | fn eq(&self, other: &BaseRevision) -> bool { | |
377 | if !cfg!(test) { |
|
376 | if !cfg!(test) { | |
378 | panic!("should only be used in tests") |
|
377 | panic!("should only be used in tests") | |
379 | } |
|
378 | } | |
380 | self.0.eq(other) |
|
379 | self.0.eq(other) | |
381 | } |
|
380 | } | |
382 | } |
|
381 | } | |
383 |
|
382 | |||
384 | impl PartialEq<u32> for Revision { |
|
383 | impl PartialEq<u32> for Revision { | |
385 | fn eq(&self, other: &u32) -> bool { |
|
384 | fn eq(&self, other: &u32) -> bool { | |
386 | if !cfg!(test) { |
|
385 | if !cfg!(test) { | |
387 | panic!("should only be used in tests") |
|
386 | panic!("should only be used in tests") | |
388 | } |
|
387 | } | |
389 | let check: Result<u32, _> = self.0.try_into(); |
|
388 | let check: Result<u32, _> = self.0.try_into(); | |
390 | match check { |
|
389 | match check { | |
391 | Ok(value) => value.eq(other), |
|
390 | Ok(value) => value.eq(other), | |
392 | Err(_) => false, |
|
391 | Err(_) => false, | |
393 | } |
|
392 | } | |
394 | } |
|
393 | } | |
395 | } |
|
394 | } | |
396 |
|
395 | |||
397 | fn list_ancestors<G: Graph>( |
|
396 | fn list_ancestors<G: Graph>( | |
398 | graph: G, |
|
397 | graph: G, | |
399 | initrevs: Vec<Revision>, |
|
398 | initrevs: Vec<Revision>, | |
400 | stoprev: Revision, |
|
399 | stoprev: Revision, | |
401 | inclusive: bool, |
|
400 | inclusive: bool, | |
402 | ) -> Vec<Revision> { |
|
401 | ) -> Vec<Revision> { | |
403 | AncestorsIterator::new(graph, initrevs, stoprev, inclusive) |
|
402 | AncestorsIterator::new(graph, initrevs, stoprev, inclusive) | |
404 | .unwrap() |
|
403 | .unwrap() | |
405 | .map(|res| res.unwrap()) |
|
404 | .map(|res| res.unwrap()) | |
406 | .collect() |
|
405 | .collect() | |
407 | } |
|
406 | } | |
408 |
|
407 | |||
409 | #[test] |
|
408 | #[test] | |
410 | /// Same tests as test-ancestor.py, without membership |
|
409 | /// Same tests as test-ancestor.py, without membership | |
411 | /// (see also test-ancestor.py.out) |
|
410 | /// (see also test-ancestor.py.out) | |
412 | fn test_list_ancestor() { |
|
411 | fn test_list_ancestor() { | |
413 | assert_eq!( |
|
412 | assert_eq!( | |
414 | list_ancestors(SampleGraph, vec![], 0.into(), false), |
|
413 | list_ancestors(SampleGraph, vec![], 0.into(), false), | |
415 | Vec::<Revision>::new() |
|
414 | Vec::<Revision>::new() | |
416 | ); |
|
415 | ); | |
417 | assert_eq!( |
|
416 | assert_eq!( | |
418 | list_ancestors( |
|
417 | list_ancestors( | |
419 | SampleGraph, |
|
418 | SampleGraph, | |
420 | vec![11.into(), 13.into()], |
|
419 | vec![11.into(), 13.into()], | |
421 | 0.into(), |
|
420 | 0.into(), | |
422 | false |
|
421 | false | |
423 | ), |
|
422 | ), | |
424 | vec![8, 7, 4, 3, 2, 1, 0] |
|
423 | vec![8, 7, 4, 3, 2, 1, 0] | |
425 | ); |
|
424 | ); | |
426 | // it works as well on references, because &Graph implements Graph |
|
425 | // it works as well on references, because &Graph implements Graph | |
427 | // this is needed as of this writing by RHGitaly |
|
426 | // this is needed as of this writing by RHGitaly | |
428 | assert_eq!( |
|
427 | assert_eq!( | |
429 | list_ancestors( |
|
428 | list_ancestors( | |
430 | &SampleGraph, |
|
429 | &SampleGraph, | |
431 | vec![11.into(), 13.into()], |
|
430 | vec![11.into(), 13.into()], | |
432 | 0.into(), |
|
431 | 0.into(), | |
433 | false |
|
432 | false | |
434 | ), |
|
433 | ), | |
435 | vec![8, 7, 4, 3, 2, 1, 0] |
|
434 | vec![8, 7, 4, 3, 2, 1, 0] | |
436 | ); |
|
435 | ); | |
437 |
|
436 | |||
438 | assert_eq!( |
|
437 | assert_eq!( | |
439 | list_ancestors( |
|
438 | list_ancestors( | |
440 | SampleGraph, |
|
439 | SampleGraph, | |
441 | vec![1.into(), 3.into()], |
|
440 | vec![1.into(), 3.into()], | |
442 | 0.into(), |
|
441 | 0.into(), | |
443 | false |
|
442 | false | |
444 | ), |
|
443 | ), | |
445 | vec![1, 0] |
|
444 | vec![1, 0] | |
446 | ); |
|
445 | ); | |
447 | assert_eq!( |
|
446 | assert_eq!( | |
448 | list_ancestors( |
|
447 | list_ancestors( | |
449 | SampleGraph, |
|
448 | SampleGraph, | |
450 | vec![11.into(), 13.into()], |
|
449 | vec![11.into(), 13.into()], | |
451 | 0.into(), |
|
450 | 0.into(), | |
452 | true |
|
451 | true | |
453 | ), |
|
452 | ), | |
454 | vec![13, 11, 8, 7, 4, 3, 2, 1, 0] |
|
453 | vec![13, 11, 8, 7, 4, 3, 2, 1, 0] | |
455 | ); |
|
454 | ); | |
456 | assert_eq!( |
|
455 | assert_eq!( | |
457 | list_ancestors( |
|
456 | list_ancestors( | |
458 | SampleGraph, |
|
457 | SampleGraph, | |
459 | vec![11.into(), 13.into()], |
|
458 | vec![11.into(), 13.into()], | |
460 | 6.into(), |
|
459 | 6.into(), | |
461 | false |
|
460 | false | |
462 | ), |
|
461 | ), | |
463 | vec![8, 7] |
|
462 | vec![8, 7] | |
464 | ); |
|
463 | ); | |
465 | assert_eq!( |
|
464 | assert_eq!( | |
466 | list_ancestors( |
|
465 | list_ancestors( | |
467 | SampleGraph, |
|
466 | SampleGraph, | |
468 | vec![11.into(), 13.into()], |
|
467 | vec![11.into(), 13.into()], | |
469 | 6.into(), |
|
468 | 6.into(), | |
470 | true |
|
469 | true | |
471 | ), |
|
470 | ), | |
472 | vec![13, 11, 8, 7] |
|
471 | vec![13, 11, 8, 7] | |
473 | ); |
|
472 | ); | |
474 | assert_eq!( |
|
473 | assert_eq!( | |
475 | list_ancestors( |
|
474 | list_ancestors( | |
476 | SampleGraph, |
|
475 | SampleGraph, | |
477 | vec![11.into(), 13.into()], |
|
476 | vec![11.into(), 13.into()], | |
478 | 11.into(), |
|
477 | 11.into(), | |
479 | true |
|
478 | true | |
480 | ), |
|
479 | ), | |
481 | vec![13, 11] |
|
480 | vec![13, 11] | |
482 | ); |
|
481 | ); | |
483 | assert_eq!( |
|
482 | assert_eq!( | |
484 | list_ancestors( |
|
483 | list_ancestors( | |
485 | SampleGraph, |
|
484 | SampleGraph, | |
486 | vec![11.into(), 13.into()], |
|
485 | vec![11.into(), 13.into()], | |
487 | 12.into(), |
|
486 | 12.into(), | |
488 | true |
|
487 | true | |
489 | ), |
|
488 | ), | |
490 | vec![13] |
|
489 | vec![13] | |
491 | ); |
|
490 | ); | |
492 | assert_eq!( |
|
491 | assert_eq!( | |
493 | list_ancestors( |
|
492 | list_ancestors( | |
494 | SampleGraph, |
|
493 | SampleGraph, | |
495 | vec![10.into(), 1.into()], |
|
494 | vec![10.into(), 1.into()], | |
496 | 0.into(), |
|
495 | 0.into(), | |
497 | true |
|
496 | true | |
498 | ), |
|
497 | ), | |
499 | vec![10, 5, 4, 2, 1, 0] |
|
498 | vec![10, 5, 4, 2, 1, 0] | |
500 | ); |
|
499 | ); | |
501 | } |
|
500 | } | |
502 |
|
501 | |||
503 | #[test] |
|
502 | #[test] | |
504 | /// Corner case that's not directly in test-ancestors.py, but |
|
503 | /// Corner case that's not directly in test-ancestors.py, but | |
505 | /// that happens quite often, as demonstrated by running the whole |
|
504 | /// that happens quite often, as demonstrated by running the whole | |
506 | /// suite. |
|
505 | /// suite. | |
507 | /// For instance, run tests/test-obsolete-checkheads.t |
|
506 | /// For instance, run tests/test-obsolete-checkheads.t | |
508 | fn test_nullrev_input() { |
|
507 | fn test_nullrev_input() { | |
509 | let mut iter = AncestorsIterator::new( |
|
508 | let mut iter = AncestorsIterator::new( | |
510 | SampleGraph, |
|
509 | SampleGraph, | |
511 | vec![Revision(-1)], |
|
510 | vec![Revision(-1)], | |
512 | 0.into(), |
|
511 | 0.into(), | |
513 | false, |
|
512 | false, | |
514 | ) |
|
513 | ) | |
515 | .unwrap(); |
|
514 | .unwrap(); | |
516 | assert_eq!(iter.next(), None) |
|
515 | assert_eq!(iter.next(), None) | |
517 | } |
|
516 | } | |
518 |
|
517 | |||
519 | #[test] |
|
518 | #[test] | |
520 | fn test_contains() { |
|
519 | fn test_contains() { | |
521 | let mut lazy = AncestorsIterator::new( |
|
520 | let mut lazy = AncestorsIterator::new( | |
522 | SampleGraph, |
|
521 | SampleGraph, | |
523 | vec![10.into(), 1.into()], |
|
522 | vec![10.into(), 1.into()], | |
524 | 0.into(), |
|
523 | 0.into(), | |
525 | true, |
|
524 | true, | |
526 | ) |
|
525 | ) | |
527 | .unwrap(); |
|
526 | .unwrap(); | |
528 | assert!(lazy.contains(1.into()).unwrap()); |
|
527 | assert!(lazy.contains(1.into()).unwrap()); | |
529 | assert!(!lazy.contains(3.into()).unwrap()); |
|
528 | assert!(!lazy.contains(3.into()).unwrap()); | |
530 |
|
529 | |||
531 | let mut lazy = AncestorsIterator::new( |
|
530 | let mut lazy = AncestorsIterator::new( | |
532 | SampleGraph, |
|
531 | SampleGraph, | |
533 | vec![0.into()], |
|
532 | vec![0.into()], | |
534 | 0.into(), |
|
533 | 0.into(), | |
535 | false, |
|
534 | false, | |
536 | ) |
|
535 | ) | |
537 | .unwrap(); |
|
536 | .unwrap(); | |
538 | assert!(!lazy.contains(NULL_REVISION).unwrap()); |
|
537 | assert!(!lazy.contains(NULL_REVISION).unwrap()); | |
539 | } |
|
538 | } | |
540 |
|
539 | |||
541 | #[test] |
|
540 | #[test] | |
542 | fn test_peek() { |
|
541 | fn test_peek() { | |
543 | let mut iter = AncestorsIterator::new( |
|
542 | let mut iter = AncestorsIterator::new( | |
544 | SampleGraph, |
|
543 | SampleGraph, | |
545 | vec![10.into()], |
|
544 | vec![10.into()], | |
546 | 0.into(), |
|
545 | 0.into(), | |
547 | true, |
|
546 | true, | |
548 | ) |
|
547 | ) | |
549 | .unwrap(); |
|
548 | .unwrap(); | |
550 | // peek() gives us the next value |
|
549 | // peek() gives us the next value | |
551 | assert_eq!(iter.peek(), Some(10.into())); |
|
550 | assert_eq!(iter.peek(), Some(10.into())); | |
552 | // but it's not been consumed |
|
551 | // but it's not been consumed | |
553 | assert_eq!(iter.next(), Some(Ok(10.into()))); |
|
552 | assert_eq!(iter.next(), Some(Ok(10.into()))); | |
554 | // and iteration resumes normally |
|
553 | // and iteration resumes normally | |
555 | assert_eq!(iter.next(), Some(Ok(5.into()))); |
|
554 | assert_eq!(iter.next(), Some(Ok(5.into()))); | |
556 |
|
555 | |||
557 | // let's drain the iterator to test peek() at the end |
|
556 | // let's drain the iterator to test peek() at the end | |
558 | while iter.next().is_some() {} |
|
557 | while iter.next().is_some() {} | |
559 | assert_eq!(iter.peek(), None); |
|
558 | assert_eq!(iter.peek(), None); | |
560 | } |
|
559 | } | |
561 |
|
560 | |||
562 | #[test] |
|
561 | #[test] | |
563 | fn test_empty() { |
|
562 | fn test_empty() { | |
564 | let mut iter = AncestorsIterator::new( |
|
563 | let mut iter = AncestorsIterator::new( | |
565 | SampleGraph, |
|
564 | SampleGraph, | |
566 | vec![10.into()], |
|
565 | vec![10.into()], | |
567 | 0.into(), |
|
566 | 0.into(), | |
568 | true, |
|
567 | true, | |
569 | ) |
|
568 | ) | |
570 | .unwrap(); |
|
569 | .unwrap(); | |
571 | assert!(!iter.is_empty()); |
|
570 | assert!(!iter.is_empty()); | |
572 | while iter.next().is_some() {} |
|
571 | while iter.next().is_some() {} | |
573 | assert!(!iter.is_empty()); |
|
572 | assert!(!iter.is_empty()); | |
574 |
|
573 | |||
575 | let iter = AncestorsIterator::new(SampleGraph, vec![], 0.into(), true) |
|
574 | let iter = AncestorsIterator::new(SampleGraph, vec![], 0.into(), true) | |
576 | .unwrap(); |
|
575 | .unwrap(); | |
577 | assert!(iter.is_empty()); |
|
576 | assert!(iter.is_empty()); | |
578 |
|
577 | |||
579 | // case where iter.seen == {NULL_REVISION} |
|
578 | // case where iter.seen == {NULL_REVISION} | |
580 | let iter = AncestorsIterator::new( |
|
579 | let iter = AncestorsIterator::new( | |
581 | SampleGraph, |
|
580 | SampleGraph, | |
582 | vec![0.into()], |
|
581 | vec![0.into()], | |
583 | 0.into(), |
|
582 | 0.into(), | |
584 | false, |
|
583 | false, | |
585 | ) |
|
584 | ) | |
586 | .unwrap(); |
|
585 | .unwrap(); | |
587 | assert!(iter.is_empty()); |
|
586 | assert!(iter.is_empty()); | |
588 | } |
|
587 | } | |
589 |
|
588 | |||
590 | /// A corrupted Graph, supporting error handling tests |
|
589 | /// A corrupted Graph, supporting error handling tests | |
591 | #[derive(Clone, Debug)] |
|
590 | #[derive(Clone, Debug)] | |
592 | struct Corrupted; |
|
591 | struct Corrupted; | |
593 |
|
592 | |||
594 | impl Graph for Corrupted { |
|
593 | impl Graph for Corrupted { | |
595 | // FIXME what to do about this? Are we just not supposed to get them |
|
594 | // FIXME what to do about this? Are we just not supposed to get them | |
596 | // anymore? |
|
595 | // anymore? | |
597 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { |
|
596 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { | |
598 | match rev { |
|
597 | match rev { | |
599 | Revision(1) => Ok([0.into(), (-1).into()]), |
|
598 | Revision(1) => Ok([0.into(), (-1).into()]), | |
600 | r => Err(GraphError::ParentOutOfRange(r)), |
|
599 | r => Err(GraphError::ParentOutOfRange(r)), | |
601 | } |
|
600 | } | |
602 | } |
|
601 | } | |
603 | } |
|
602 | } | |
604 |
|
603 | |||
605 | #[test] |
|
604 | #[test] | |
606 | fn test_initrev_out_of_range() { |
|
605 | fn test_initrev_out_of_range() { | |
607 | // inclusive=false looks up initrev's parents right away |
|
606 | // inclusive=false looks up initrev's parents right away | |
608 | match AncestorsIterator::new( |
|
607 | match AncestorsIterator::new( | |
609 | SampleGraph, |
|
608 | SampleGraph, | |
610 | vec![25.into()], |
|
609 | vec![25.into()], | |
611 | 0.into(), |
|
610 | 0.into(), | |
612 | false, |
|
611 | false, | |
613 | ) { |
|
612 | ) { | |
614 | Ok(_) => panic!("Should have been ParentOutOfRange"), |
|
613 | Ok(_) => panic!("Should have been ParentOutOfRange"), | |
615 | Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25.into())), |
|
614 | Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25.into())), | |
616 | } |
|
615 | } | |
617 | } |
|
616 | } | |
618 |
|
617 | |||
619 | #[test] |
|
618 | #[test] | |
620 | fn test_next_out_of_range() { |
|
619 | fn test_next_out_of_range() { | |
621 | // inclusive=false looks up initrev's parents right away |
|
620 | // inclusive=false looks up initrev's parents right away | |
622 | let mut iter = |
|
621 | let mut iter = | |
623 | AncestorsIterator::new(Corrupted, vec![1.into()], 0.into(), false) |
|
622 | AncestorsIterator::new(Corrupted, vec![1.into()], 0.into(), false) | |
624 | .unwrap(); |
|
623 | .unwrap(); | |
625 | assert_eq!( |
|
624 | assert_eq!( | |
626 | iter.next(), |
|
625 | iter.next(), | |
627 | Some(Err(GraphError::ParentOutOfRange(0.into()))) |
|
626 | Some(Err(GraphError::ParentOutOfRange(0.into()))) | |
628 | ); |
|
627 | ); | |
629 | } |
|
628 | } | |
630 |
|
629 | |||
631 | #[test] |
|
630 | #[test] | |
632 | /// Test constructor, add/get bases and heads |
|
631 | /// Test constructor, add/get bases and heads | |
633 | fn test_missing_bases() -> Result<(), GraphError> { |
|
632 | fn test_missing_bases() -> Result<(), GraphError> { | |
634 | let mut missing_ancestors = MissingAncestors::new( |
|
633 | let mut missing_ancestors = MissingAncestors::new( | |
635 | SampleGraph, |
|
634 | SampleGraph, | |
636 | [5.into(), 3.into(), 1.into(), 3.into()].iter().cloned(), |
|
635 | [5.into(), 3.into(), 1.into(), 3.into()].iter().cloned(), | |
637 | ); |
|
636 | ); | |
638 | let mut as_vec: Vec<Revision> = |
|
637 | let mut as_vec: Vec<Revision> = | |
639 | missing_ancestors.get_bases().iter().cloned().collect(); |
|
638 | missing_ancestors.get_bases().iter().cloned().collect(); | |
640 | as_vec.sort_unstable(); |
|
639 | as_vec.sort_unstable(); | |
641 | assert_eq!(as_vec, [1, 3, 5]); |
|
640 | assert_eq!(as_vec, [1, 3, 5]); | |
642 | assert_eq!(missing_ancestors.max_base, 5); |
|
641 | assert_eq!(missing_ancestors.max_base, 5); | |
643 |
|
642 | |||
644 | missing_ancestors |
|
643 | missing_ancestors | |
645 | .add_bases([3.into(), 7.into(), 8.into()].iter().cloned()); |
|
644 | .add_bases([3.into(), 7.into(), 8.into()].iter().cloned()); | |
646 | as_vec = missing_ancestors.get_bases().iter().cloned().collect(); |
|
645 | as_vec = missing_ancestors.get_bases().iter().cloned().collect(); | |
647 | as_vec.sort_unstable(); |
|
646 | as_vec.sort_unstable(); | |
648 | assert_eq!(as_vec, [1, 3, 5, 7, 8]); |
|
647 | assert_eq!(as_vec, [1, 3, 5, 7, 8]); | |
649 | assert_eq!(missing_ancestors.max_base, 8); |
|
648 | assert_eq!(missing_ancestors.max_base, 8); | |
650 |
|
649 | |||
651 | as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect(); |
|
650 | as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect(); | |
652 | as_vec.sort_unstable(); |
|
651 | as_vec.sort_unstable(); | |
653 | assert_eq!(as_vec, [3, 5, 7, 8]); |
|
652 | assert_eq!(as_vec, [3, 5, 7, 8]); | |
654 | Ok(()) |
|
653 | Ok(()) | |
655 | } |
|
654 | } | |
656 |
|
655 | |||
657 | fn assert_missing_remove( |
|
656 | fn assert_missing_remove( | |
658 | bases: &[BaseRevision], |
|
657 | bases: &[BaseRevision], | |
659 | revs: &[BaseRevision], |
|
658 | revs: &[BaseRevision], | |
660 | expected: &[BaseRevision], |
|
659 | expected: &[BaseRevision], | |
661 | ) { |
|
660 | ) { | |
662 | let mut missing_ancestors = MissingAncestors::new( |
|
661 | let mut missing_ancestors = MissingAncestors::new( | |
663 | SampleGraph, |
|
662 | SampleGraph, | |
664 | bases.iter().map(|r| Revision(*r)), |
|
663 | bases.iter().map(|r| Revision(*r)), | |
665 | ); |
|
664 | ); | |
666 | let mut revset: HashSet<Revision> = |
|
665 | let mut revset: HashSet<Revision> = | |
667 | revs.iter().map(|r| Revision(*r)).collect(); |
|
666 | revs.iter().map(|r| Revision(*r)).collect(); | |
668 | missing_ancestors |
|
667 | missing_ancestors | |
669 | .remove_ancestors_from(&mut revset) |
|
668 | .remove_ancestors_from(&mut revset) | |
670 | .unwrap(); |
|
669 | .unwrap(); | |
671 | let mut as_vec: Vec<Revision> = revset.into_iter().collect(); |
|
670 | let mut as_vec: Vec<Revision> = revset.into_iter().collect(); | |
672 | as_vec.sort_unstable(); |
|
671 | as_vec.sort_unstable(); | |
673 | assert_eq!(as_vec.as_slice(), expected); |
|
672 | assert_eq!(as_vec.as_slice(), expected); | |
674 | } |
|
673 | } | |
675 |
|
674 | |||
676 | #[test] |
|
675 | #[test] | |
677 | fn test_missing_remove() { |
|
676 | fn test_missing_remove() { | |
678 | assert_missing_remove( |
|
677 | assert_missing_remove( | |
679 | &[1, 2, 3, 4, 7], |
|
678 | &[1, 2, 3, 4, 7], | |
680 | Vec::from_iter(1..10).as_slice(), |
|
679 | Vec::from_iter(1..10).as_slice(), | |
681 | &[5, 6, 8, 9], |
|
680 | &[5, 6, 8, 9], | |
682 | ); |
|
681 | ); | |
683 | assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]); |
|
682 | assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]); | |
684 | assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]); |
|
683 | assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]); | |
685 | } |
|
684 | } | |
686 |
|
685 | |||
687 | fn assert_missing_ancestors( |
|
686 | fn assert_missing_ancestors( | |
688 | bases: &[BaseRevision], |
|
687 | bases: &[BaseRevision], | |
689 | revs: &[BaseRevision], |
|
688 | revs: &[BaseRevision], | |
690 | expected: &[BaseRevision], |
|
689 | expected: &[BaseRevision], | |
691 | ) { |
|
690 | ) { | |
692 | let mut missing_ancestors = MissingAncestors::new( |
|
691 | let mut missing_ancestors = MissingAncestors::new( | |
693 | SampleGraph, |
|
692 | SampleGraph, | |
694 | bases.iter().map(|r| Revision(*r)), |
|
693 | bases.iter().map(|r| Revision(*r)), | |
695 | ); |
|
694 | ); | |
696 | let missing = missing_ancestors |
|
695 | let missing = missing_ancestors | |
697 | .missing_ancestors(revs.iter().map(|r| Revision(*r))) |
|
696 | .missing_ancestors(revs.iter().map(|r| Revision(*r))) | |
698 | .unwrap(); |
|
697 | .unwrap(); | |
699 | assert_eq!(missing.as_slice(), expected); |
|
698 | assert_eq!(missing.as_slice(), expected); | |
700 | } |
|
699 | } | |
701 |
|
700 | |||
702 | #[test] |
|
701 | #[test] | |
703 | fn test_missing_ancestors() { |
|
702 | fn test_missing_ancestors() { | |
704 | // examples taken from test-ancestors.py by having it run |
|
703 | // examples taken from test-ancestors.py by having it run | |
705 | // on the same graph (both naive and fast Python algs) |
|
704 | // on the same graph (both naive and fast Python algs) | |
706 | assert_missing_ancestors(&[10], &[11], &[3, 7, 11]); |
|
705 | assert_missing_ancestors(&[10], &[11], &[3, 7, 11]); | |
707 | assert_missing_ancestors(&[11], &[10], &[5, 10]); |
|
706 | assert_missing_ancestors(&[11], &[10], &[5, 10]); | |
708 | assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]); |
|
707 | assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]); | |
709 | } |
|
708 | } | |
710 |
|
709 | |||
711 | /// An interesting case found by a random generator similar to |
|
710 | /// An interesting case found by a random generator similar to | |
712 | /// the one in test-ancestor.py. An early version of Rust MissingAncestors |
|
711 | /// the one in test-ancestor.py. An early version of Rust MissingAncestors | |
713 | /// failed this, yet none of the integration tests of the whole suite |
|
712 | /// failed this, yet none of the integration tests of the whole suite | |
714 | /// catched it. |
|
713 | /// catched it. | |
715 | #[allow(clippy::unnecessary_cast)] |
|
714 | #[allow(clippy::unnecessary_cast)] | |
716 | #[test] |
|
715 | #[test] | |
717 | fn test_remove_ancestors_from_case1() { |
|
716 | fn test_remove_ancestors_from_case1() { | |
718 | const FAKE_NULL_REVISION: BaseRevision = -1; |
|
717 | const FAKE_NULL_REVISION: BaseRevision = -1; | |
719 | assert_eq!(FAKE_NULL_REVISION, NULL_REVISION.0); |
|
718 | assert_eq!(FAKE_NULL_REVISION, NULL_REVISION.0); | |
720 | let graph: VecGraph = vec![ |
|
719 | let graph: VecGraph = vec![ | |
721 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], |
|
720 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], | |
722 | [0, FAKE_NULL_REVISION], |
|
721 | [0, FAKE_NULL_REVISION], | |
723 | [1, 0], |
|
722 | [1, 0], | |
724 | [2, 1], |
|
723 | [2, 1], | |
725 | [3, FAKE_NULL_REVISION], |
|
724 | [3, FAKE_NULL_REVISION], | |
726 | [4, FAKE_NULL_REVISION], |
|
725 | [4, FAKE_NULL_REVISION], | |
727 | [5, 1], |
|
726 | [5, 1], | |
728 | [2, FAKE_NULL_REVISION], |
|
727 | [2, FAKE_NULL_REVISION], | |
729 | [7, FAKE_NULL_REVISION], |
|
728 | [7, FAKE_NULL_REVISION], | |
730 | [8, FAKE_NULL_REVISION], |
|
729 | [8, FAKE_NULL_REVISION], | |
731 | [9, FAKE_NULL_REVISION], |
|
730 | [9, FAKE_NULL_REVISION], | |
732 | [10, 1], |
|
731 | [10, 1], | |
733 | [3, FAKE_NULL_REVISION], |
|
732 | [3, FAKE_NULL_REVISION], | |
734 | [12, FAKE_NULL_REVISION], |
|
733 | [12, FAKE_NULL_REVISION], | |
735 | [13, FAKE_NULL_REVISION], |
|
734 | [13, FAKE_NULL_REVISION], | |
736 | [14, FAKE_NULL_REVISION], |
|
735 | [14, FAKE_NULL_REVISION], | |
737 | [4, FAKE_NULL_REVISION], |
|
736 | [4, FAKE_NULL_REVISION], | |
738 | [16, FAKE_NULL_REVISION], |
|
737 | [16, FAKE_NULL_REVISION], | |
739 | [17, FAKE_NULL_REVISION], |
|
738 | [17, FAKE_NULL_REVISION], | |
740 | [18, FAKE_NULL_REVISION], |
|
739 | [18, FAKE_NULL_REVISION], | |
741 | [19, 11], |
|
740 | [19, 11], | |
742 | [20, FAKE_NULL_REVISION], |
|
741 | [20, FAKE_NULL_REVISION], | |
743 | [21, FAKE_NULL_REVISION], |
|
742 | [21, FAKE_NULL_REVISION], | |
744 | [22, FAKE_NULL_REVISION], |
|
743 | [22, FAKE_NULL_REVISION], | |
745 | [23, FAKE_NULL_REVISION], |
|
744 | [23, FAKE_NULL_REVISION], | |
746 | [2, FAKE_NULL_REVISION], |
|
745 | [2, FAKE_NULL_REVISION], | |
747 | [3, FAKE_NULL_REVISION], |
|
746 | [3, FAKE_NULL_REVISION], | |
748 | [26, 24], |
|
747 | [26, 24], | |
749 | [27, FAKE_NULL_REVISION], |
|
748 | [27, FAKE_NULL_REVISION], | |
750 | [28, FAKE_NULL_REVISION], |
|
749 | [28, FAKE_NULL_REVISION], | |
751 | [12, FAKE_NULL_REVISION], |
|
750 | [12, FAKE_NULL_REVISION], | |
752 | [1, FAKE_NULL_REVISION], |
|
751 | [1, FAKE_NULL_REVISION], | |
753 | [1, 9], |
|
752 | [1, 9], | |
754 | [32, FAKE_NULL_REVISION], |
|
753 | [32, FAKE_NULL_REVISION], | |
755 | [33, FAKE_NULL_REVISION], |
|
754 | [33, FAKE_NULL_REVISION], | |
756 | [34, 31], |
|
755 | [34, 31], | |
757 | [35, FAKE_NULL_REVISION], |
|
756 | [35, FAKE_NULL_REVISION], | |
758 | [36, 26], |
|
757 | [36, 26], | |
759 | [37, FAKE_NULL_REVISION], |
|
758 | [37, FAKE_NULL_REVISION], | |
760 | [38, FAKE_NULL_REVISION], |
|
759 | [38, FAKE_NULL_REVISION], | |
761 | [39, FAKE_NULL_REVISION], |
|
760 | [39, FAKE_NULL_REVISION], | |
762 | [40, FAKE_NULL_REVISION], |
|
761 | [40, FAKE_NULL_REVISION], | |
763 | [41, FAKE_NULL_REVISION], |
|
762 | [41, FAKE_NULL_REVISION], | |
764 | [42, 26], |
|
763 | [42, 26], | |
765 | [0, FAKE_NULL_REVISION], |
|
764 | [0, FAKE_NULL_REVISION], | |
766 | [44, FAKE_NULL_REVISION], |
|
765 | [44, FAKE_NULL_REVISION], | |
767 | [45, 4], |
|
766 | [45, 4], | |
768 | [40, FAKE_NULL_REVISION], |
|
767 | [40, FAKE_NULL_REVISION], | |
769 | [47, FAKE_NULL_REVISION], |
|
768 | [47, FAKE_NULL_REVISION], | |
770 | [36, 0], |
|
769 | [36, 0], | |
771 | [49, FAKE_NULL_REVISION], |
|
770 | [49, FAKE_NULL_REVISION], | |
772 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], |
|
771 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], | |
773 | [51, FAKE_NULL_REVISION], |
|
772 | [51, FAKE_NULL_REVISION], | |
774 | [52, FAKE_NULL_REVISION], |
|
773 | [52, FAKE_NULL_REVISION], | |
775 | [53, FAKE_NULL_REVISION], |
|
774 | [53, FAKE_NULL_REVISION], | |
776 | [14, FAKE_NULL_REVISION], |
|
775 | [14, FAKE_NULL_REVISION], | |
777 | [55, FAKE_NULL_REVISION], |
|
776 | [55, FAKE_NULL_REVISION], | |
778 | [15, FAKE_NULL_REVISION], |
|
777 | [15, FAKE_NULL_REVISION], | |
779 | [23, FAKE_NULL_REVISION], |
|
778 | [23, FAKE_NULL_REVISION], | |
780 | [58, FAKE_NULL_REVISION], |
|
779 | [58, FAKE_NULL_REVISION], | |
781 | [59, FAKE_NULL_REVISION], |
|
780 | [59, FAKE_NULL_REVISION], | |
782 | [2, FAKE_NULL_REVISION], |
|
781 | [2, FAKE_NULL_REVISION], | |
783 | [61, 59], |
|
782 | [61, 59], | |
784 | [62, FAKE_NULL_REVISION], |
|
783 | [62, FAKE_NULL_REVISION], | |
785 | [63, FAKE_NULL_REVISION], |
|
784 | [63, FAKE_NULL_REVISION], | |
786 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], |
|
785 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], | |
787 | [65, FAKE_NULL_REVISION], |
|
786 | [65, FAKE_NULL_REVISION], | |
788 | [66, FAKE_NULL_REVISION], |
|
787 | [66, FAKE_NULL_REVISION], | |
789 | [67, FAKE_NULL_REVISION], |
|
788 | [67, FAKE_NULL_REVISION], | |
790 | [68, FAKE_NULL_REVISION], |
|
789 | [68, FAKE_NULL_REVISION], | |
791 | [37, 28], |
|
790 | [37, 28], | |
792 | [69, 25], |
|
791 | [69, 25], | |
793 | [71, FAKE_NULL_REVISION], |
|
792 | [71, FAKE_NULL_REVISION], | |
794 | [72, FAKE_NULL_REVISION], |
|
793 | [72, FAKE_NULL_REVISION], | |
795 | [50, 2], |
|
794 | [50, 2], | |
796 | [74, FAKE_NULL_REVISION], |
|
795 | [74, FAKE_NULL_REVISION], | |
797 | [12, FAKE_NULL_REVISION], |
|
796 | [12, FAKE_NULL_REVISION], | |
798 | [18, FAKE_NULL_REVISION], |
|
797 | [18, FAKE_NULL_REVISION], | |
799 | [77, FAKE_NULL_REVISION], |
|
798 | [77, FAKE_NULL_REVISION], | |
800 | [78, FAKE_NULL_REVISION], |
|
799 | [78, FAKE_NULL_REVISION], | |
801 | [79, FAKE_NULL_REVISION], |
|
800 | [79, FAKE_NULL_REVISION], | |
802 | [43, 33], |
|
801 | [43, 33], | |
803 | [81, FAKE_NULL_REVISION], |
|
802 | [81, FAKE_NULL_REVISION], | |
804 | [82, FAKE_NULL_REVISION], |
|
803 | [82, FAKE_NULL_REVISION], | |
805 | [83, FAKE_NULL_REVISION], |
|
804 | [83, FAKE_NULL_REVISION], | |
806 | [84, 45], |
|
805 | [84, 45], | |
807 | [85, FAKE_NULL_REVISION], |
|
806 | [85, FAKE_NULL_REVISION], | |
808 | [86, FAKE_NULL_REVISION], |
|
807 | [86, FAKE_NULL_REVISION], | |
809 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], |
|
808 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], | |
810 | [88, FAKE_NULL_REVISION], |
|
809 | [88, FAKE_NULL_REVISION], | |
811 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], |
|
810 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], | |
812 | [76, 83], |
|
811 | [76, 83], | |
813 | [44, FAKE_NULL_REVISION], |
|
812 | [44, FAKE_NULL_REVISION], | |
814 | [92, FAKE_NULL_REVISION], |
|
813 | [92, FAKE_NULL_REVISION], | |
815 | [93, FAKE_NULL_REVISION], |
|
814 | [93, FAKE_NULL_REVISION], | |
816 | [9, FAKE_NULL_REVISION], |
|
815 | [9, FAKE_NULL_REVISION], | |
817 | [95, 67], |
|
816 | [95, 67], | |
818 | [96, FAKE_NULL_REVISION], |
|
817 | [96, FAKE_NULL_REVISION], | |
819 | [97, FAKE_NULL_REVISION], |
|
818 | [97, FAKE_NULL_REVISION], | |
820 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], |
|
819 | [FAKE_NULL_REVISION, FAKE_NULL_REVISION], | |
821 | ] |
|
820 | ] | |
822 | .into_iter() |
|
821 | .into_iter() | |
823 | .map(|[a, b]| [Revision(a), Revision(b)]) |
|
822 | .map(|[a, b]| [Revision(a), Revision(b)]) | |
824 | .collect(); |
|
823 | .collect(); | |
825 | let problem_rev = 28.into(); |
|
824 | let problem_rev = 28.into(); | |
826 | let problem_base = 70.into(); |
|
825 | let problem_base = 70.into(); | |
827 | // making the problem obvious: problem_rev is a parent of problem_base |
|
826 | // making the problem obvious: problem_rev is a parent of problem_base | |
828 | assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev); |
|
827 | assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev); | |
829 |
|
828 | |||
830 | let mut missing_ancestors: MissingAncestors<VecGraph> = |
|
829 | let mut missing_ancestors: MissingAncestors<VecGraph> = | |
831 | MissingAncestors::new( |
|
830 | MissingAncestors::new( | |
832 | graph, |
|
831 | graph, | |
833 | [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6] |
|
832 | [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6] | |
834 | .iter() |
|
833 | .iter() | |
835 | .map(|r| Revision(*r)), |
|
834 | .map(|r| Revision(*r)), | |
836 | ); |
|
835 | ); | |
837 | assert!(missing_ancestors.bases.contains(&problem_base)); |
|
836 | assert!(missing_ancestors.bases.contains(&problem_base)); | |
838 |
|
837 | |||
839 | let mut revs: HashSet<Revision> = |
|
838 | let mut revs: HashSet<Revision> = | |
840 | [4, 12, 41, 28, 68, 38, 1, 30, 56, 44] |
|
839 | [4, 12, 41, 28, 68, 38, 1, 30, 56, 44] | |
841 | .iter() |
|
840 | .iter() | |
842 | .map(|r| Revision(*r)) |
|
841 | .map(|r| Revision(*r)) | |
843 | .collect(); |
|
842 | .collect(); | |
844 | missing_ancestors.remove_ancestors_from(&mut revs).unwrap(); |
|
843 | missing_ancestors.remove_ancestors_from(&mut revs).unwrap(); | |
845 | assert!(!revs.contains(&problem_rev)); |
|
844 | assert!(!revs.contains(&problem_rev)); | |
846 | } |
|
845 | } | |
847 | } |
|
846 | } |
@@ -1,250 +1,250 | |||||
1 | use crate::errors::HgError; |
|
1 | use crate::errors::HgError; | |
2 | use crate::exit_codes; |
|
2 | use crate::exit_codes; | |
3 | use crate::repo::Repo; |
|
3 | use crate::repo::Repo; | |
4 | use crate::revlog::path_encode::path_encode; |
|
4 | use crate::revlog::path_encode::path_encode; | |
5 | use crate::revlog::NodePrefix; |
|
5 | use crate::revlog::NodePrefix; | |
6 | use crate::revlog::Revision; |
|
6 | use crate::revlog::Revision; | |
7 | use crate::revlog::RevlogEntry; |
|
7 | use crate::revlog::RevlogEntry; | |
8 | use crate::revlog::{Revlog, RevlogError}; |
|
8 | use crate::revlog::{Revlog, RevlogError}; | |
9 | use crate::utils::files::get_path_from_bytes; |
|
9 | use crate::utils::files::get_path_from_bytes; | |
10 | use crate::utils::hg_path::HgPath; |
|
10 | use crate::utils::hg_path::HgPath; | |
11 | use crate::utils::SliceExt; |
|
11 | use crate::utils::SliceExt; | |
12 | use crate::Graph; |
|
12 | use crate::Graph; | |
13 | use crate::GraphError; |
|
13 | use crate::GraphError; | |
14 | use crate::UncheckedRevision; |
|
14 | use crate::UncheckedRevision; | |
15 | use std::path::PathBuf; |
|
15 | use std::path::PathBuf; | |
16 |
|
16 | |||
17 | use super::options::RevlogOpenOptions; |
|
17 | use super::options::RevlogOpenOptions; | |
18 |
|
18 | |||
19 | /// A specialized `Revlog` to work with file data logs. |
|
19 | /// A specialized `Revlog` to work with file data logs. | |
20 | pub struct Filelog { |
|
20 | pub struct Filelog { | |
21 | /// The generic `revlog` format. |
|
21 | /// The generic `revlog` format. | |
22 | revlog: Revlog, |
|
22 | revlog: Revlog, | |
23 | } |
|
23 | } | |
24 |
|
24 | |||
25 | impl Graph for Filelog { |
|
25 | impl Graph for Filelog { | |
26 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { |
|
26 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { | |
27 | self.revlog.parents(rev) |
|
27 | self.revlog.parents(rev) | |
28 | } |
|
28 | } | |
29 | } |
|
29 | } | |
30 |
|
30 | |||
31 | impl Filelog { |
|
31 | impl Filelog { | |
32 | pub fn open_vfs( |
|
32 | pub fn open_vfs( | |
33 | store_vfs: &crate::vfs::VfsImpl, |
|
33 | store_vfs: &crate::vfs::VfsImpl, | |
34 | file_path: &HgPath, |
|
34 | file_path: &HgPath, | |
35 | options: RevlogOpenOptions, |
|
35 | options: RevlogOpenOptions, | |
36 | ) -> Result<Self, HgError> { |
|
36 | ) -> Result<Self, HgError> { | |
37 | let index_path = store_path(file_path, b".i"); |
|
37 | let index_path = store_path(file_path, b".i"); | |
38 | let data_path = store_path(file_path, b".d"); |
|
38 | let data_path = store_path(file_path, b".d"); | |
39 | let revlog = |
|
39 | let revlog = | |
40 | Revlog::open(store_vfs, index_path, Some(&data_path), options)?; |
|
40 | Revlog::open(store_vfs, index_path, Some(&data_path), options)?; | |
41 | Ok(Self { revlog }) |
|
41 | Ok(Self { revlog }) | |
42 | } |
|
42 | } | |
43 |
|
43 | |||
44 | pub fn open( |
|
44 | pub fn open( | |
45 | repo: &Repo, |
|
45 | repo: &Repo, | |
46 | file_path: &HgPath, |
|
46 | file_path: &HgPath, | |
47 | options: RevlogOpenOptions, |
|
47 | options: RevlogOpenOptions, | |
48 | ) -> Result<Self, HgError> { |
|
48 | ) -> Result<Self, HgError> { | |
49 | Self::open_vfs(&repo.store_vfs(), file_path, options) |
|
49 | Self::open_vfs(&repo.store_vfs(), file_path, options) | |
50 | } |
|
50 | } | |
51 |
|
51 | |||
52 | /// The given node ID is that of the file as found in a filelog, not of a |
|
52 | /// The given node ID is that of the file as found in a filelog, not of a | |
53 | /// changeset. |
|
53 | /// changeset. | |
54 | pub fn data_for_node( |
|
54 | pub fn data_for_node( | |
55 | &self, |
|
55 | &self, | |
56 | file_node: impl Into<NodePrefix>, |
|
56 | file_node: impl Into<NodePrefix>, | |
57 | ) -> Result<FilelogRevisionData, RevlogError> { |
|
57 | ) -> Result<FilelogRevisionData, RevlogError> { | |
58 | let file_rev = self.revlog.rev_from_node(file_node.into())?; |
|
58 | let file_rev = self.revlog.rev_from_node(file_node.into())?; | |
59 | self.data_for_unchecked_rev(file_rev.into()) |
|
59 | self.data_for_unchecked_rev(file_rev.into()) | |
60 | } |
|
60 | } | |
61 |
|
61 | |||
62 | /// The given revision is that of the file as found in a filelog, not of a |
|
62 | /// The given revision is that of the file as found in a filelog, not of a | |
63 | /// changeset. |
|
63 | /// changeset. | |
64 | pub fn data_for_unchecked_rev( |
|
64 | pub fn data_for_unchecked_rev( | |
65 | &self, |
|
65 | &self, | |
66 | file_rev: UncheckedRevision, |
|
66 | file_rev: UncheckedRevision, | |
67 | ) -> Result<FilelogRevisionData, RevlogError> { |
|
67 | ) -> Result<FilelogRevisionData, RevlogError> { | |
68 | let data: Vec<u8> = self |
|
68 | let data: Vec<u8> = self | |
69 | .revlog |
|
69 | .revlog | |
70 | .get_data_for_unchecked_rev(file_rev)? |
|
70 | .get_data_for_unchecked_rev(file_rev)? | |
71 | .into_owned(); |
|
71 | .into_owned(); | |
72 | Ok(FilelogRevisionData(data)) |
|
72 | Ok(FilelogRevisionData(data)) | |
73 | } |
|
73 | } | |
74 |
|
74 | |||
75 | /// The given node ID is that of the file as found in a filelog, not of a |
|
75 | /// The given node ID is that of the file as found in a filelog, not of a | |
76 | /// changeset. |
|
76 | /// changeset. | |
77 | pub fn entry_for_node( |
|
77 | pub fn entry_for_node( | |
78 | &self, |
|
78 | &self, | |
79 | file_node: impl Into<NodePrefix>, |
|
79 | file_node: impl Into<NodePrefix>, | |
80 | ) -> Result<FilelogEntry, RevlogError> { |
|
80 | ) -> Result<FilelogEntry, RevlogError> { | |
81 | let file_rev = self.revlog.rev_from_node(file_node.into())?; |
|
81 | let file_rev = self.revlog.rev_from_node(file_node.into())?; | |
82 | self.entry(file_rev) |
|
82 | self.entry(file_rev) | |
83 | } |
|
83 | } | |
84 |
|
84 | |||
85 | /// The given revision is that of the file as found in a filelog, not of a |
|
85 | /// The given revision is that of the file as found in a filelog, not of a | |
86 | /// changeset. |
|
86 | /// changeset. | |
87 | pub fn entry_for_unchecked_rev( |
|
87 | pub fn entry_for_unchecked_rev( | |
88 | &self, |
|
88 | &self, | |
89 | file_rev: UncheckedRevision, |
|
89 | file_rev: UncheckedRevision, | |
90 | ) -> Result<FilelogEntry, RevlogError> { |
|
90 | ) -> Result<FilelogEntry, RevlogError> { | |
91 | Ok(FilelogEntry( |
|
91 | Ok(FilelogEntry( | |
92 | self.revlog.get_entry_for_unchecked_rev(file_rev)?, |
|
92 | self.revlog.get_entry_for_unchecked_rev(file_rev)?, | |
93 | )) |
|
93 | )) | |
94 | } |
|
94 | } | |
95 |
|
95 | |||
96 | /// Same as [`Self::entry_for_unchecked_rev`] for a checked revision. |
|
96 | /// Same as [`Self::entry_for_unchecked_rev`] for a checked revision. | |
97 | pub fn entry( |
|
97 | pub fn entry( | |
98 | &self, |
|
98 | &self, | |
99 | file_rev: Revision, |
|
99 | file_rev: Revision, | |
100 | ) -> Result<FilelogEntry, RevlogError> { |
|
100 | ) -> Result<FilelogEntry, RevlogError> { | |
101 | Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) |
|
101 | Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) | |
102 | } |
|
102 | } | |
103 | } |
|
103 | } | |
104 |
|
104 | |||
105 | fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { |
|
105 | fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { | |
106 | let encoded_bytes = |
|
106 | let encoded_bytes = | |
107 | path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); |
|
107 | path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); | |
108 | get_path_from_bytes(&encoded_bytes).into() |
|
108 | get_path_from_bytes(&encoded_bytes).into() | |
109 | } |
|
109 | } | |
110 |
|
110 | |||
111 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); |
|
111 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); | |
112 |
|
112 | |||
113 | impl FilelogEntry<'_> { |
|
113 | impl FilelogEntry<'_> { | |
114 | /// `self.data()` can be expensive, with decompression and delta |
|
114 | /// `self.data()` can be expensive, with decompression and delta | |
115 | /// resolution. |
|
115 | /// resolution. | |
116 | /// |
|
116 | /// | |
117 | /// *Without* paying this cost, based on revlog index information |
|
117 | /// *Without* paying this cost, based on revlog index information | |
118 | /// including `RevlogEntry::uncompressed_len`: |
|
118 | /// including `RevlogEntry::uncompressed_len`: | |
119 | /// |
|
119 | /// | |
120 | /// * Returns `true` if the length that `self.data().file_data().len()` |
|
120 | /// * Returns `true` if the length that `self.data().file_data().len()` | |
121 | /// would return is definitely **not equal** to `other_len`. |
|
121 | /// would return is definitely **not equal** to `other_len`. | |
122 | /// * Returns `false` if available information is inconclusive. |
|
122 | /// * Returns `false` if available information is inconclusive. | |
123 | pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { |
|
123 | pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { | |
124 | // Relevant code that implement this behavior in Python code: |
|
124 | // Relevant code that implement this behavior in Python code: | |
125 | // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, |
|
125 | // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, | |
126 | // revlog.size, revlog.rawsize |
|
126 | // revlog.size, revlog.rawsize | |
127 |
|
127 | |||
128 | // Let’s call `file_data_len` what would be returned by |
|
128 | // Let’s call `file_data_len` what would be returned by | |
129 | // `self.data().file_data().len()`. |
|
129 | // `self.data().file_data().len()`. | |
130 |
|
130 | |||
131 | if self.0.is_censored() { |
|
131 | if self.0.is_censored() { | |
132 | let file_data_len = 0; |
|
132 | let file_data_len = 0; | |
133 | return other_len != file_data_len; |
|
133 | return other_len != file_data_len; | |
134 | } |
|
134 | } | |
135 |
|
135 | |||
136 | if self.0.has_length_affecting_flag_processor() { |
|
136 | if self.0.has_length_affecting_flag_processor() { | |
137 | // We can’t conclude anything about `file_data_len`. |
|
137 | // We can’t conclude anything about `file_data_len`. | |
138 | return false; |
|
138 | return false; | |
139 | } |
|
139 | } | |
140 |
|
140 | |||
141 | // Revlog revisions (usually) have metadata for the size of |
|
141 | // Revlog revisions (usually) have metadata for the size of | |
142 | // their data after decompression and delta resolution |
|
142 | // their data after decompression and delta resolution | |
143 | // as would be returned by `Revlog::get_rev_data`. |
|
143 | // as would be returned by `Revlog::get_rev_data`. | |
144 | // |
|
144 | // | |
145 | // For filelogs this is the file’s contents preceded by an optional |
|
145 | // For filelogs this is the file’s contents preceded by an optional | |
146 | // metadata block. |
|
146 | // metadata block. | |
147 | let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { |
|
147 | let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { | |
148 | l as u64 |
|
148 | l as u64 | |
149 | } else { |
|
149 | } else { | |
150 | // The field was set to -1, the actual uncompressed len is unknown. |
|
150 | // The field was set to -1, the actual uncompressed len is unknown. | |
151 | // We need to decompress to say more. |
|
151 | // We need to decompress to say more. | |
152 | return false; |
|
152 | return false; | |
153 | }; |
|
153 | }; | |
154 | // `uncompressed_len = file_data_len + optional_metadata_len`, |
|
154 | // `uncompressed_len = file_data_len + optional_metadata_len`, | |
155 | // so `file_data_len <= uncompressed_len`. |
|
155 | // so `file_data_len <= uncompressed_len`. | |
156 | if uncompressed_len < other_len { |
|
156 | if uncompressed_len < other_len { | |
157 | // Transitively, `file_data_len < other_len`. |
|
157 | // Transitively, `file_data_len < other_len`. | |
158 | // So `other_len != file_data_len` definitely. |
|
158 | // So `other_len != file_data_len` definitely. | |
159 | return true; |
|
159 | return true; | |
160 | } |
|
160 | } | |
161 |
|
161 | |||
162 | if uncompressed_len == other_len + 4 { |
|
162 | if uncompressed_len == other_len + 4 { | |
163 | // It’s possible that `file_data_len == other_len` with an empty |
|
163 | // It’s possible that `file_data_len == other_len` with an empty | |
164 | // metadata block (2 start marker bytes + 2 end marker bytes). |
|
164 | // metadata block (2 start marker bytes + 2 end marker bytes). | |
165 | // This happens when there wouldn’t otherwise be metadata, but |
|
165 | // This happens when there wouldn’t otherwise be metadata, but | |
166 | // the first 2 bytes of file data happen to match a start marker |
|
166 | // the first 2 bytes of file data happen to match a start marker | |
167 | // and would be ambiguous. |
|
167 | // and would be ambiguous. | |
168 | return false; |
|
168 | return false; | |
169 | } |
|
169 | } | |
170 |
|
170 | |||
171 | if !self.0.has_p1() { |
|
171 | if !self.0.has_p1() { | |
172 | // There may or may not be copy metadata, so we can’t deduce more |
|
172 | // There may or may not be copy metadata, so we can’t deduce more | |
173 | // about `file_data_len` without computing file data. |
|
173 | // about `file_data_len` without computing file data. | |
174 | return false; |
|
174 | return false; | |
175 | } |
|
175 | } | |
176 |
|
176 | |||
177 | // Filelog ancestry is not meaningful in the way changelog ancestry is. |
|
177 | // Filelog ancestry is not meaningful in the way changelog ancestry is. | |
178 | // It only provides hints to delta generation. |
|
178 | // It only provides hints to delta generation. | |
179 | // p1 and p2 are set to null when making a copy or rename since |
|
179 | // p1 and p2 are set to null when making a copy or rename since | |
180 | // contents are likely unrelatedto what might have previously existed |
|
180 | // contents are likely unrelatedto what might have previously existed | |
181 | // at the destination path. |
|
181 | // at the destination path. | |
182 | // |
|
182 | // | |
183 | // Conversely, since here p1 is non-null, there is no copy metadata. |
|
183 | // Conversely, since here p1 is non-null, there is no copy metadata. | |
184 | // Note that this reasoning may be invalidated in the presence of |
|
184 | // Note that this reasoning may be invalidated in the presence of | |
185 | // merges made by some previous versions of Mercurial that |
|
185 | // merges made by some previous versions of Mercurial that | |
186 | // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> |
|
186 | // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> | |
187 | // and `tests/test-issue6528.t`. |
|
187 | // and `tests/test-issue6528.t`. | |
188 | // |
|
188 | // | |
189 | // Since copy metadata is currently the only kind of metadata |
|
189 | // Since copy metadata is currently the only kind of metadata | |
190 | // kept in revlog data of filelogs, |
|
190 | // kept in revlog data of filelogs, | |
191 | // this `FilelogEntry` does not have such metadata: |
|
191 | // this `FilelogEntry` does not have such metadata: | |
192 | let file_data_len = uncompressed_len; |
|
192 | let file_data_len = uncompressed_len; | |
193 |
|
193 | |||
194 | file_data_len != other_len |
|
194 | file_data_len != other_len | |
195 | } |
|
195 | } | |
196 |
|
196 | |||
197 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { |
|
197 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { | |
198 | let data = self.0.data(); |
|
198 | let data = self.0.data(); | |
199 | match data { |
|
199 | match data { | |
200 | Ok(data) => Ok(FilelogRevisionData(data.into_owned())), |
|
200 | Ok(data) => Ok(FilelogRevisionData(data.into_owned())), | |
201 | // Errors other than `HgError` should not happen at this point |
|
201 | // Errors other than `HgError` should not happen at this point | |
202 | Err(e) => match e { |
|
202 | Err(e) => match e { | |
203 | RevlogError::Other(hg_error) => Err(hg_error), |
|
203 | RevlogError::Other(hg_error) => Err(hg_error), | |
204 | revlog_error => Err(HgError::abort( |
|
204 | revlog_error => Err(HgError::abort( | |
205 | revlog_error.to_string(), |
|
205 | revlog_error.to_string(), | |
206 | exit_codes::ABORT, |
|
206 | exit_codes::ABORT, | |
207 | None, |
|
207 | None, | |
208 | )), |
|
208 | )), | |
209 | }, |
|
209 | }, | |
210 | } |
|
210 | } | |
211 | } |
|
211 | } | |
212 | } |
|
212 | } | |
213 |
|
213 | |||
214 | /// The data for one revision in a filelog, uncompressed and delta-resolved. |
|
214 | /// The data for one revision in a filelog, uncompressed and delta-resolved. | |
215 | pub struct FilelogRevisionData(Vec<u8>); |
|
215 | pub struct FilelogRevisionData(Vec<u8>); | |
216 |
|
216 | |||
217 | impl FilelogRevisionData { |
|
217 | impl FilelogRevisionData { | |
218 | /// Split into metadata and data |
|
218 | /// Split into metadata and data | |
219 | pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { |
|
219 | pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { | |
220 |
const DELIMITER: &[u8; 2] = |
|
220 | const DELIMITER: &[u8; 2] = b"\x01\n"; | |
221 |
|
221 | |||
222 | if let Some(rest) = self.0.drop_prefix(DELIMITER) { |
|
222 | if let Some(rest) = self.0.drop_prefix(DELIMITER) { | |
223 | if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { |
|
223 | if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { | |
224 | Ok((Some(metadata), data)) |
|
224 | Ok((Some(metadata), data)) | |
225 | } else { |
|
225 | } else { | |
226 | Err(HgError::corrupted( |
|
226 | Err(HgError::corrupted( | |
227 | "Missing metadata end delimiter in filelog entry", |
|
227 | "Missing metadata end delimiter in filelog entry", | |
228 | )) |
|
228 | )) | |
229 | } |
|
229 | } | |
230 | } else { |
|
230 | } else { | |
231 | Ok((None, &self.0)) |
|
231 | Ok((None, &self.0)) | |
232 | } |
|
232 | } | |
233 | } |
|
233 | } | |
234 |
|
234 | |||
235 | /// Returns the file contents at this revision, stripped of any metadata |
|
235 | /// Returns the file contents at this revision, stripped of any metadata | |
236 | pub fn file_data(&self) -> Result<&[u8], HgError> { |
|
236 | pub fn file_data(&self) -> Result<&[u8], HgError> { | |
237 | let (_metadata, data) = self.split()?; |
|
237 | let (_metadata, data) = self.split()?; | |
238 | Ok(data) |
|
238 | Ok(data) | |
239 | } |
|
239 | } | |
240 |
|
240 | |||
241 | /// Consume the entry, and convert it into data, discarding any metadata, |
|
241 | /// Consume the entry, and convert it into data, discarding any metadata, | |
242 | /// if present. |
|
242 | /// if present. | |
243 | pub fn into_file_data(self) -> Result<Vec<u8>, HgError> { |
|
243 | pub fn into_file_data(self) -> Result<Vec<u8>, HgError> { | |
244 | if let (Some(_metadata), data) = self.split()? { |
|
244 | if let (Some(_metadata), data) = self.split()? { | |
245 | Ok(data.to_owned()) |
|
245 | Ok(data.to_owned()) | |
246 | } else { |
|
246 | } else { | |
247 | Ok(self.0) |
|
247 | Ok(self.0) | |
248 | } |
|
248 | } | |
249 | } |
|
249 | } | |
250 | } |
|
250 | } |
@@ -1,1351 +1,1350 | |||||
1 | //! A layer of lower-level revlog functionality to encapsulate most of the |
|
1 | //! A layer of lower-level revlog functionality to encapsulate most of the | |
2 | //! IO work and expensive operations. |
|
2 | //! IO work and expensive operations. | |
3 | use std::{ |
|
3 | use std::{ | |
4 | borrow::Cow, |
|
4 | borrow::Cow, | |
5 | cell::RefCell, |
|
5 | cell::RefCell, | |
6 | io::{ErrorKind, Seek, SeekFrom, Write}, |
|
6 | io::{ErrorKind, Seek, SeekFrom, Write}, | |
7 | ops::Deref, |
|
7 | ops::Deref, | |
8 | path::PathBuf, |
|
8 | path::PathBuf, | |
9 | sync::{Arc, Mutex}, |
|
9 | sync::{Arc, Mutex}, | |
10 | }; |
|
10 | }; | |
11 |
|
11 | |||
12 | use schnellru::{ByMemoryUsage, LruMap}; |
|
12 | use schnellru::{ByMemoryUsage, LruMap}; | |
13 | use sha1::{Digest, Sha1}; |
|
13 | use sha1::{Digest, Sha1}; | |
14 |
|
14 | |||
15 | use crate::{ |
|
15 | use crate::{ | |
16 | errors::{HgError, IoResultExt}, |
|
16 | errors::{HgError, IoResultExt}, | |
17 | exit_codes, |
|
17 | exit_codes, | |
18 | transaction::Transaction, |
|
18 | transaction::Transaction, | |
19 | vfs::Vfs, |
|
19 | vfs::Vfs, | |
20 | }; |
|
20 | }; | |
21 |
|
21 | |||
22 | use super::{ |
|
22 | use super::{ | |
23 | compression::{ |
|
23 | compression::{ | |
24 | uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor, |
|
24 | uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor, | |
25 | ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE, |
|
25 | ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE, | |
26 | }, |
|
26 | }, | |
27 | file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles}, |
|
27 | file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles}, | |
28 | index::{Index, IndexHeader, INDEX_ENTRY_SIZE}, |
|
28 | index::{Index, IndexHeader, INDEX_ENTRY_SIZE}, | |
29 | node::{NODE_BYTES_LENGTH, NULL_NODE}, |
|
29 | node::{NODE_BYTES_LENGTH, NULL_NODE}, | |
30 | options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig}, |
|
30 | options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig}, | |
31 | BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex, |
|
31 | BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex, | |
32 | UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS, |
|
32 | UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS, | |
33 | }; |
|
33 | }; | |
34 |
|
34 | |||
35 | /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary |
|
35 | /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary | |
36 | /// boundary to incrementally rewrite higher-level revlog functionality in |
|
36 | /// boundary to incrementally rewrite higher-level revlog functionality in | |
37 | /// Rust. |
|
37 | /// Rust. | |
38 | pub struct InnerRevlog { |
|
38 | pub struct InnerRevlog { | |
39 | /// When index and data are not interleaved: bytes of the revlog index. |
|
39 | /// When index and data are not interleaved: bytes of the revlog index. | |
40 | /// When index and data are interleaved (inline revlog): bytes of the |
|
40 | /// When index and data are interleaved (inline revlog): bytes of the | |
41 | /// revlog index and data. |
|
41 | /// revlog index and data. | |
42 | pub index: Index, |
|
42 | pub index: Index, | |
43 | /// The store vfs that is used to interact with the filesystem |
|
43 | /// The store vfs that is used to interact with the filesystem | |
44 | vfs: Box<dyn Vfs>, |
|
44 | vfs: Box<dyn Vfs>, | |
45 | /// The index file path, relative to the vfs root |
|
45 | /// The index file path, relative to the vfs root | |
46 | pub index_file: PathBuf, |
|
46 | pub index_file: PathBuf, | |
47 | /// The data file path, relative to the vfs root (same as `index_file` |
|
47 | /// The data file path, relative to the vfs root (same as `index_file` | |
48 | /// if inline) |
|
48 | /// if inline) | |
49 | data_file: PathBuf, |
|
49 | data_file: PathBuf, | |
50 | /// Data config that applies to this revlog |
|
50 | /// Data config that applies to this revlog | |
51 | data_config: RevlogDataConfig, |
|
51 | data_config: RevlogDataConfig, | |
52 | /// Delta config that applies to this revlog |
|
52 | /// Delta config that applies to this revlog | |
53 | delta_config: RevlogDeltaConfig, |
|
53 | delta_config: RevlogDeltaConfig, | |
54 | /// Feature config that applies to this revlog |
|
54 | /// Feature config that applies to this revlog | |
55 | feature_config: RevlogFeatureConfig, |
|
55 | feature_config: RevlogFeatureConfig, | |
56 | /// A view into this revlog's data file |
|
56 | /// A view into this revlog's data file | |
57 | segment_file: RandomAccessFile, |
|
57 | segment_file: RandomAccessFile, | |
58 | /// A cache of uncompressed chunks that have previously been restored. |
|
58 | /// A cache of uncompressed chunks that have previously been restored. | |
59 | /// Its eviction policy is defined in [`Self::new`]. |
|
59 | /// Its eviction policy is defined in [`Self::new`]. | |
60 | uncompressed_chunk_cache: Option<UncompressedChunkCache>, |
|
60 | uncompressed_chunk_cache: Option<UncompressedChunkCache>, | |
61 | /// Used to keep track of the actual target during diverted writes |
|
61 | /// Used to keep track of the actual target during diverted writes | |
62 | /// for the changelog |
|
62 | /// for the changelog | |
63 | original_index_file: Option<PathBuf>, |
|
63 | original_index_file: Option<PathBuf>, | |
64 | /// Write handles to the index and data files |
|
64 | /// Write handles to the index and data files | |
65 | /// XXX why duplicate from `index` and `segment_file`? |
|
65 | /// XXX why duplicate from `index` and `segment_file`? | |
66 | writing_handles: Option<WriteHandles>, |
|
66 | writing_handles: Option<WriteHandles>, | |
67 | /// See [`DelayedBuffer`]. |
|
67 | /// See [`DelayedBuffer`]. | |
68 | delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>, |
|
68 | delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>, | |
69 | /// Whether this revlog is inline. XXX why duplicate from `index`? |
|
69 | /// Whether this revlog is inline. XXX why duplicate from `index`? | |
70 | pub inline: bool, |
|
70 | pub inline: bool, | |
71 | /// A cache of the last revision, which is usually accessed multiple |
|
71 | /// A cache of the last revision, which is usually accessed multiple | |
72 | /// times. |
|
72 | /// times. | |
73 | pub last_revision_cache: Mutex<Option<SingleRevisionCache>>, |
|
73 | pub last_revision_cache: Mutex<Option<SingleRevisionCache>>, | |
74 | } |
|
74 | } | |
75 |
|
75 | |||
76 | impl InnerRevlog { |
|
76 | impl InnerRevlog { | |
77 | pub fn new( |
|
77 | pub fn new( | |
78 | vfs: Box<dyn Vfs>, |
|
78 | vfs: Box<dyn Vfs>, | |
79 | index: Index, |
|
79 | index: Index, | |
80 | index_file: PathBuf, |
|
80 | index_file: PathBuf, | |
81 | data_file: PathBuf, |
|
81 | data_file: PathBuf, | |
82 | data_config: RevlogDataConfig, |
|
82 | data_config: RevlogDataConfig, | |
83 | delta_config: RevlogDeltaConfig, |
|
83 | delta_config: RevlogDeltaConfig, | |
84 | feature_config: RevlogFeatureConfig, |
|
84 | feature_config: RevlogFeatureConfig, | |
85 | ) -> Self { |
|
85 | ) -> Self { | |
86 | assert!(index_file.is_relative()); |
|
86 | assert!(index_file.is_relative()); | |
87 | assert!(data_file.is_relative()); |
|
87 | assert!(data_file.is_relative()); | |
88 | let segment_file = RandomAccessFile::new( |
|
88 | let segment_file = RandomAccessFile::new( | |
89 | dyn_clone::clone_box(&*vfs), |
|
89 | dyn_clone::clone_box(&*vfs), | |
90 | if index.is_inline() { |
|
90 | if index.is_inline() { | |
91 | index_file.to_owned() |
|
91 | index_file.to_owned() | |
92 | } else { |
|
92 | } else { | |
93 | data_file.to_owned() |
|
93 | data_file.to_owned() | |
94 | }, |
|
94 | }, | |
95 | ); |
|
95 | ); | |
96 |
|
96 | |||
97 | let uncompressed_chunk_cache = |
|
97 | let uncompressed_chunk_cache = | |
98 | data_config.uncompressed_cache_factor.map( |
|
98 | data_config.uncompressed_cache_factor.map( | |
99 | // Arbitrary initial value |
|
99 | // Arbitrary initial value | |
100 | // TODO check if using a hasher specific to integers is useful |
|
100 | // TODO check if using a hasher specific to integers is useful | |
101 | |_factor| RefCell::new(LruMap::with_memory_budget(65536)), |
|
101 | |_factor| RefCell::new(LruMap::with_memory_budget(65536)), | |
102 | ); |
|
102 | ); | |
103 |
|
103 | |||
104 | let inline = index.is_inline(); |
|
104 | let inline = index.is_inline(); | |
105 | Self { |
|
105 | Self { | |
106 | index, |
|
106 | index, | |
107 | vfs, |
|
107 | vfs, | |
108 | index_file, |
|
108 | index_file, | |
109 | data_file, |
|
109 | data_file, | |
110 | data_config, |
|
110 | data_config, | |
111 | delta_config, |
|
111 | delta_config, | |
112 | feature_config, |
|
112 | feature_config, | |
113 | segment_file, |
|
113 | segment_file, | |
114 | uncompressed_chunk_cache, |
|
114 | uncompressed_chunk_cache, | |
115 | original_index_file: None, |
|
115 | original_index_file: None, | |
116 | writing_handles: None, |
|
116 | writing_handles: None, | |
117 | delayed_buffer: None, |
|
117 | delayed_buffer: None, | |
118 | inline, |
|
118 | inline, | |
119 | last_revision_cache: Mutex::new(None), |
|
119 | last_revision_cache: Mutex::new(None), | |
120 | } |
|
120 | } | |
121 | } |
|
121 | } | |
122 |
|
122 | |||
123 | /// Return number of entries of the revlog index |
|
123 | /// Return number of entries of the revlog index | |
124 | pub fn len(&self) -> usize { |
|
124 | pub fn len(&self) -> usize { | |
125 | self.index.len() |
|
125 | self.index.len() | |
126 | } |
|
126 | } | |
127 |
|
127 | |||
128 | /// Return `true` if this revlog has no entries |
|
128 | /// Return `true` if this revlog has no entries | |
129 | pub fn is_empty(&self) -> bool { |
|
129 | pub fn is_empty(&self) -> bool { | |
130 | self.len() == 0 |
|
130 | self.len() == 0 | |
131 | } |
|
131 | } | |
132 |
|
132 | |||
133 | /// Return whether this revlog is inline (mixed index and data) |
|
133 | /// Return whether this revlog is inline (mixed index and data) | |
134 | pub fn is_inline(&self) -> bool { |
|
134 | pub fn is_inline(&self) -> bool { | |
135 | self.inline |
|
135 | self.inline | |
136 | } |
|
136 | } | |
137 |
|
137 | |||
138 | /// Clear all caches from this revlog |
|
138 | /// Clear all caches from this revlog | |
139 | pub fn clear_cache(&mut self) { |
|
139 | pub fn clear_cache(&mut self) { | |
140 | assert!(!self.is_delaying()); |
|
140 | assert!(!self.is_delaying()); | |
141 | if let Some(cache) = self.uncompressed_chunk_cache.as_ref() { |
|
141 | if let Some(cache) = self.uncompressed_chunk_cache.as_ref() { | |
142 | // We don't clear the allocation here because it's probably faster. |
|
142 | // We don't clear the allocation here because it's probably faster. | |
143 | // We could change our minds later if this ends up being a problem |
|
143 | // We could change our minds later if this ends up being a problem | |
144 | // with regards to memory consumption. |
|
144 | // with regards to memory consumption. | |
145 | cache.borrow_mut().clear(); |
|
145 | cache.borrow_mut().clear(); | |
146 | } |
|
146 | } | |
147 | } |
|
147 | } | |
148 |
|
148 | |||
149 | /// Return an entry for the null revision |
|
149 | /// Return an entry for the null revision | |
150 | pub fn make_null_entry(&self) -> RevlogEntry { |
|
150 | pub fn make_null_entry(&self) -> RevlogEntry { | |
151 | RevlogEntry { |
|
151 | RevlogEntry { | |
152 | revlog: self, |
|
152 | revlog: self, | |
153 | rev: NULL_REVISION, |
|
153 | rev: NULL_REVISION, | |
154 | uncompressed_len: 0, |
|
154 | uncompressed_len: 0, | |
155 | p1: NULL_REVISION, |
|
155 | p1: NULL_REVISION, | |
156 | p2: NULL_REVISION, |
|
156 | p2: NULL_REVISION, | |
157 | flags: NULL_REVLOG_ENTRY_FLAGS, |
|
157 | flags: NULL_REVLOG_ENTRY_FLAGS, | |
158 | hash: NULL_NODE, |
|
158 | hash: NULL_NODE, | |
159 | } |
|
159 | } | |
160 | } |
|
160 | } | |
161 |
|
161 | |||
162 | /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist |
|
162 | /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist | |
163 | pub fn get_entry( |
|
163 | pub fn get_entry( | |
164 | &self, |
|
164 | &self, | |
165 | rev: Revision, |
|
165 | rev: Revision, | |
166 | ) -> Result<RevlogEntry, RevlogError> { |
|
166 | ) -> Result<RevlogEntry, RevlogError> { | |
167 | if rev == NULL_REVISION { |
|
167 | if rev == NULL_REVISION { | |
168 | return Ok(self.make_null_entry()); |
|
168 | return Ok(self.make_null_entry()); | |
169 | } |
|
169 | } | |
170 | let index_entry = self |
|
170 | let index_entry = self | |
171 | .index |
|
171 | .index | |
172 | .get_entry(rev) |
|
172 | .get_entry(rev) | |
173 | .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?; |
|
173 | .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?; | |
174 | let p1 = |
|
174 | let p1 = | |
175 | self.index.check_revision(index_entry.p1()).ok_or_else(|| { |
|
175 | self.index.check_revision(index_entry.p1()).ok_or_else(|| { | |
176 | RevlogError::corrupted(format!( |
|
176 | RevlogError::corrupted(format!( | |
177 | "p1 for rev {} is invalid", |
|
177 | "p1 for rev {} is invalid", | |
178 | rev |
|
178 | rev | |
179 | )) |
|
179 | )) | |
180 | })?; |
|
180 | })?; | |
181 | let p2 = |
|
181 | let p2 = | |
182 | self.index.check_revision(index_entry.p2()).ok_or_else(|| { |
|
182 | self.index.check_revision(index_entry.p2()).ok_or_else(|| { | |
183 | RevlogError::corrupted(format!( |
|
183 | RevlogError::corrupted(format!( | |
184 | "p2 for rev {} is invalid", |
|
184 | "p2 for rev {} is invalid", | |
185 | rev |
|
185 | rev | |
186 | )) |
|
186 | )) | |
187 | })?; |
|
187 | })?; | |
188 | let entry = RevlogEntry { |
|
188 | let entry = RevlogEntry { | |
189 | revlog: self, |
|
189 | revlog: self, | |
190 | rev, |
|
190 | rev, | |
191 | uncompressed_len: index_entry.uncompressed_len(), |
|
191 | uncompressed_len: index_entry.uncompressed_len(), | |
192 | p1, |
|
192 | p1, | |
193 | p2, |
|
193 | p2, | |
194 | flags: index_entry.flags(), |
|
194 | flags: index_entry.flags(), | |
195 | hash: *index_entry.hash(), |
|
195 | hash: *index_entry.hash(), | |
196 | }; |
|
196 | }; | |
197 | Ok(entry) |
|
197 | Ok(entry) | |
198 | } |
|
198 | } | |
199 |
|
199 | |||
200 | /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this |
|
200 | /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this | |
201 | /// returns a [`RevlogError`]. |
|
201 | /// returns a [`RevlogError`]. | |
202 | pub fn get_entry_for_unchecked_rev( |
|
202 | pub fn get_entry_for_unchecked_rev( | |
203 | &self, |
|
203 | &self, | |
204 | rev: UncheckedRevision, |
|
204 | rev: UncheckedRevision, | |
205 | ) -> Result<RevlogEntry, RevlogError> { |
|
205 | ) -> Result<RevlogEntry, RevlogError> { | |
206 | if rev == NULL_REVISION.into() { |
|
206 | if rev == NULL_REVISION.into() { | |
207 | return Ok(self.make_null_entry()); |
|
207 | return Ok(self.make_null_entry()); | |
208 | } |
|
208 | } | |
209 | let rev = self.index.check_revision(rev).ok_or_else(|| { |
|
209 | let rev = self.index.check_revision(rev).ok_or_else(|| { | |
210 | RevlogError::corrupted(format!("rev {} is invalid", rev)) |
|
210 | RevlogError::corrupted(format!("rev {} is invalid", rev)) | |
211 | })?; |
|
211 | })?; | |
212 | self.get_entry(rev) |
|
212 | self.get_entry(rev) | |
213 | } |
|
213 | } | |
214 |
|
214 | |||
215 | /// Is the revlog currently delaying the visibility of written data? |
|
215 | /// Is the revlog currently delaying the visibility of written data? | |
216 | /// |
|
216 | /// | |
217 | /// The delaying mechanism can be either in-memory or written on disk in a |
|
217 | /// The delaying mechanism can be either in-memory or written on disk in a | |
218 | /// side-file. |
|
218 | /// side-file. | |
219 | pub fn is_delaying(&self) -> bool { |
|
219 | pub fn is_delaying(&self) -> bool { | |
220 | self.delayed_buffer.is_some() || self.original_index_file.is_some() |
|
220 | self.delayed_buffer.is_some() || self.original_index_file.is_some() | |
221 | } |
|
221 | } | |
222 |
|
222 | |||
223 | /// The offset of the data chunk for this revision |
|
223 | /// The offset of the data chunk for this revision | |
224 | #[inline(always)] |
|
224 | #[inline(always)] | |
225 | pub fn start(&self, rev: Revision) -> usize { |
|
225 | pub fn start(&self, rev: Revision) -> usize { | |
226 | self.index.start( |
|
226 | self.index.start( | |
227 | rev, |
|
227 | rev, | |
228 | &self |
|
228 | &self | |
229 | .index |
|
229 | .index | |
230 | .get_entry(rev) |
|
230 | .get_entry(rev) | |
231 | .unwrap_or_else(|| self.index.make_null_entry()), |
|
231 | .unwrap_or_else(|| self.index.make_null_entry()), | |
232 | ) |
|
232 | ) | |
233 | } |
|
233 | } | |
234 |
|
234 | |||
235 | /// The length of the data chunk for this revision |
|
235 | /// The length of the data chunk for this revision | |
236 | /// TODO rename this method and others to more explicit names than the |
|
236 | /// TODO rename this method and others to more explicit names than the | |
237 | /// existing ones that were copied over from Python |
|
237 | /// existing ones that were copied over from Python | |
238 | #[inline(always)] |
|
238 | #[inline(always)] | |
239 | pub fn length(&self, rev: Revision) -> usize { |
|
239 | pub fn length(&self, rev: Revision) -> usize { | |
240 | self.index |
|
240 | self.index | |
241 | .get_entry(rev) |
|
241 | .get_entry(rev) | |
242 | .unwrap_or_else(|| self.index.make_null_entry()) |
|
242 | .unwrap_or_else(|| self.index.make_null_entry()) | |
243 | .compressed_len() as usize |
|
243 | .compressed_len() as usize | |
244 | } |
|
244 | } | |
245 |
|
245 | |||
246 | /// The end of the data chunk for this revision |
|
246 | /// The end of the data chunk for this revision | |
247 | #[inline(always)] |
|
247 | #[inline(always)] | |
248 | pub fn end(&self, rev: Revision) -> usize { |
|
248 | pub fn end(&self, rev: Revision) -> usize { | |
249 | self.start(rev) + self.length(rev) |
|
249 | self.start(rev) + self.length(rev) | |
250 | } |
|
250 | } | |
251 |
|
251 | |||
252 | /// Return the delta parent of the given revision |
|
252 | /// Return the delta parent of the given revision | |
253 | pub fn delta_parent(&self, rev: Revision) -> Revision { |
|
253 | pub fn delta_parent(&self, rev: Revision) -> Revision { | |
254 | let base = self |
|
254 | let base = self | |
255 | .index |
|
255 | .index | |
256 | .get_entry(rev) |
|
256 | .get_entry(rev) | |
257 | .unwrap() |
|
257 | .unwrap() | |
258 | .base_revision_or_base_of_delta_chain(); |
|
258 | .base_revision_or_base_of_delta_chain(); | |
259 | if base.0 == rev.0 { |
|
259 | if base.0 == rev.0 { | |
260 | NULL_REVISION |
|
260 | NULL_REVISION | |
261 | } else if self.delta_config.general_delta { |
|
261 | } else if self.delta_config.general_delta { | |
262 | Revision(base.0) |
|
262 | Revision(base.0) | |
263 | } else { |
|
263 | } else { | |
264 | Revision(rev.0 - 1) |
|
264 | Revision(rev.0 - 1) | |
265 | } |
|
265 | } | |
266 | } |
|
266 | } | |
267 |
|
267 | |||
268 | /// Return whether `rev` points to a snapshot revision (i.e. does not have |
|
268 | /// Return whether `rev` points to a snapshot revision (i.e. does not have | |
269 | /// a delta base). |
|
269 | /// a delta base). | |
270 | pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> { |
|
270 | pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> { | |
271 | if !self.delta_config.sparse_revlog { |
|
271 | if !self.delta_config.sparse_revlog { | |
272 | return Ok(self.delta_parent(rev) == NULL_REVISION); |
|
272 | return Ok(self.delta_parent(rev) == NULL_REVISION); | |
273 | } |
|
273 | } | |
274 | self.index.is_snapshot_unchecked(rev) |
|
274 | self.index.is_snapshot_unchecked(rev) | |
275 | } |
|
275 | } | |
276 |
|
276 | |||
277 | /// Return the delta chain for `rev` according to this revlog's config. |
|
277 | /// Return the delta chain for `rev` according to this revlog's config. | |
278 | /// See [`Index::delta_chain`] for more information. |
|
278 | /// See [`Index::delta_chain`] for more information. | |
279 | pub fn delta_chain( |
|
279 | pub fn delta_chain( | |
280 | &self, |
|
280 | &self, | |
281 | rev: Revision, |
|
281 | rev: Revision, | |
282 | stop_rev: Option<Revision>, |
|
282 | stop_rev: Option<Revision>, | |
283 | ) -> Result<(Vec<Revision>, bool), HgError> { |
|
283 | ) -> Result<(Vec<Revision>, bool), HgError> { | |
284 | self.index.delta_chain( |
|
284 | self.index.delta_chain( | |
285 | rev, |
|
285 | rev, | |
286 | stop_rev, |
|
286 | stop_rev, | |
287 | self.delta_config.general_delta.into(), |
|
287 | self.delta_config.general_delta.into(), | |
288 | ) |
|
288 | ) | |
289 | } |
|
289 | } | |
290 |
|
290 | |||
291 | fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> { |
|
291 | fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> { | |
292 | // TODO cache the compressor? |
|
292 | // TODO cache the compressor? | |
293 | Ok(match self.feature_config.compression_engine { |
|
293 | Ok(match self.feature_config.compression_engine { | |
294 | CompressionConfig::Zlib { level } => { |
|
294 | CompressionConfig::Zlib { level } => { | |
295 | Box::new(ZlibCompressor::new(level)) |
|
295 | Box::new(ZlibCompressor::new(level)) | |
296 | } |
|
296 | } | |
297 | CompressionConfig::Zstd { level, threads } => { |
|
297 | CompressionConfig::Zstd { level, threads } => { | |
298 | Box::new(ZstdCompressor::new(level, threads)) |
|
298 | Box::new(ZstdCompressor::new(level, threads)) | |
299 | } |
|
299 | } | |
300 | CompressionConfig::None => Box::new(NoneCompressor), |
|
300 | CompressionConfig::None => Box::new(NoneCompressor), | |
301 | }) |
|
301 | }) | |
302 | } |
|
302 | } | |
303 |
|
303 | |||
304 | /// Generate a possibly-compressed representation of data. |
|
304 | /// Generate a possibly-compressed representation of data. | |
305 | /// Returns `None` if the data was not compressed. |
|
305 | /// Returns `None` if the data was not compressed. | |
306 | pub fn compress<'data>( |
|
306 | pub fn compress<'data>( | |
307 | &self, |
|
307 | &self, | |
308 | data: &'data [u8], |
|
308 | data: &'data [u8], | |
309 | ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> { |
|
309 | ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> { | |
310 | if data.is_empty() { |
|
310 | if data.is_empty() { | |
311 | return Ok(Some(data.into())); |
|
311 | return Ok(Some(data.into())); | |
312 | } |
|
312 | } | |
313 | let res = self.compressor()?.compress(data)?; |
|
313 | let res = self.compressor()?.compress(data)?; | |
314 | if let Some(compressed) = res { |
|
314 | if let Some(compressed) = res { | |
315 | // The revlog compressor added the header in the returned data. |
|
315 | // The revlog compressor added the header in the returned data. | |
316 | return Ok(Some(compressed.into())); |
|
316 | return Ok(Some(compressed.into())); | |
317 | } |
|
317 | } | |
318 |
|
318 | |||
319 | if data[0] == b'\0' { |
|
319 | if data[0] == b'\0' { | |
320 | return Ok(Some(data.into())); |
|
320 | return Ok(Some(data.into())); | |
321 | } |
|
321 | } | |
322 | Ok(None) |
|
322 | Ok(None) | |
323 | } |
|
323 | } | |
324 |
|
324 | |||
325 | /// Decompress a revlog chunk. |
|
325 | /// Decompress a revlog chunk. | |
326 | /// |
|
326 | /// | |
327 | /// The chunk is expected to begin with a header identifying the |
|
327 | /// The chunk is expected to begin with a header identifying the | |
328 | /// format type so it can be routed to an appropriate decompressor. |
|
328 | /// format type so it can be routed to an appropriate decompressor. | |
329 | pub fn decompress<'a>( |
|
329 | pub fn decompress<'a>( | |
330 | &'a self, |
|
330 | &'a self, | |
331 | data: &'a [u8], |
|
331 | data: &'a [u8], | |
332 | ) -> Result<Cow<[u8]>, RevlogError> { |
|
332 | ) -> Result<Cow<[u8]>, RevlogError> { | |
333 | if data.is_empty() { |
|
333 | if data.is_empty() { | |
334 | return Ok(data.into()); |
|
334 | return Ok(data.into()); | |
335 | } |
|
335 | } | |
336 |
|
336 | |||
337 | // Revlogs are read much more frequently than they are written and many |
|
337 | // Revlogs are read much more frequently than they are written and many | |
338 | // chunks only take microseconds to decompress, so performance is |
|
338 | // chunks only take microseconds to decompress, so performance is | |
339 | // important here. |
|
339 | // important here. | |
340 |
|
340 | |||
341 | let header = data[0]; |
|
341 | let header = data[0]; | |
342 | match header { |
|
342 | match header { | |
343 | // Settings don't matter as they only affect compression |
|
343 | // Settings don't matter as they only affect compression | |
344 | ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()), |
|
344 | ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()), | |
345 | // Settings don't matter as they only affect compression |
|
345 | // Settings don't matter as they only affect compression | |
346 | ZSTD_BYTE => { |
|
346 | ZSTD_BYTE => { | |
347 | Ok(ZstdCompressor::new(0, 0).decompress(data)?.into()) |
|
347 | Ok(ZstdCompressor::new(0, 0).decompress(data)?.into()) | |
348 | } |
|
348 | } | |
349 | b'\0' => Ok(data.into()), |
|
349 | b'\0' => Ok(data.into()), | |
350 | b'u' => Ok((&data[1..]).into()), |
|
350 | b'u' => Ok((&data[1..]).into()), | |
351 | other => Err(HgError::UnsupportedFeature(format!( |
|
351 | other => Err(HgError::UnsupportedFeature(format!( | |
352 | "unknown compression header '{}'", |
|
352 | "unknown compression header '{}'", | |
353 | other |
|
353 | other | |
354 | )) |
|
354 | )) | |
355 | .into()), |
|
355 | .into()), | |
356 | } |
|
356 | } | |
357 | } |
|
357 | } | |
358 |
|
358 | |||
359 | /// Obtain a segment of raw data corresponding to a range of revisions. |
|
359 | /// Obtain a segment of raw data corresponding to a range of revisions. | |
360 | /// |
|
360 | /// | |
361 | /// Requests for data may be satisfied by a cache. |
|
361 | /// Requests for data may be satisfied by a cache. | |
362 | /// |
|
362 | /// | |
363 | /// Returns a 2-tuple of (offset, data) for the requested range of |
|
363 | /// Returns a 2-tuple of (offset, data) for the requested range of | |
364 | /// revisions. Offset is the integer offset from the beginning of the |
|
364 | /// revisions. Offset is the integer offset from the beginning of the | |
365 | /// revlog and data is a slice of the raw byte data. |
|
365 | /// revlog and data is a slice of the raw byte data. | |
366 | /// |
|
366 | /// | |
367 | /// Callers will need to call `self.start(rev)` and `self.length(rev)` |
|
367 | /// Callers will need to call `self.start(rev)` and `self.length(rev)` | |
368 | /// to determine where each revision's data begins and ends. |
|
368 | /// to determine where each revision's data begins and ends. | |
369 | pub fn get_segment_for_revs( |
|
369 | pub fn get_segment_for_revs( | |
370 | &self, |
|
370 | &self, | |
371 | start_rev: Revision, |
|
371 | start_rev: Revision, | |
372 | end_rev: Revision, |
|
372 | end_rev: Revision, | |
373 | ) -> Result<(usize, Vec<u8>), HgError> { |
|
373 | ) -> Result<(usize, Vec<u8>), HgError> { | |
374 | let start = if start_rev == NULL_REVISION { |
|
374 | let start = if start_rev == NULL_REVISION { | |
375 | 0 |
|
375 | 0 | |
376 | } else { |
|
376 | } else { | |
377 | let start_entry = self |
|
377 | let start_entry = self | |
378 | .index |
|
378 | .index | |
379 | .get_entry(start_rev) |
|
379 | .get_entry(start_rev) | |
380 | .expect("null revision segment"); |
|
380 | .expect("null revision segment"); | |
381 | self.index.start(start_rev, &start_entry) |
|
381 | self.index.start(start_rev, &start_entry) | |
382 | }; |
|
382 | }; | |
383 | let end_entry = self |
|
383 | let end_entry = self | |
384 | .index |
|
384 | .index | |
385 | .get_entry(end_rev) |
|
385 | .get_entry(end_rev) | |
386 | .expect("null revision segment"); |
|
386 | .expect("null revision segment"); | |
387 | let end = self.index.start(end_rev, &end_entry) + self.length(end_rev); |
|
387 | let end = self.index.start(end_rev, &end_entry) + self.length(end_rev); | |
388 |
|
388 | |||
389 | let length = end - start; |
|
389 | let length = end - start; | |
390 |
|
390 | |||
391 | // XXX should we use mmap instead of doing this for platforms that |
|
391 | // XXX should we use mmap instead of doing this for platforms that | |
392 | // support madvise/populate? |
|
392 | // support madvise/populate? | |
393 | Ok((start, self.segment_file.read_chunk(start, length)?)) |
|
393 | Ok((start, self.segment_file.read_chunk(start, length)?)) | |
394 | } |
|
394 | } | |
395 |
|
395 | |||
396 | /// Return the uncompressed raw data for `rev` |
|
396 | /// Return the uncompressed raw data for `rev` | |
397 | pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> { |
|
397 | pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> { | |
398 | if let Some(cache) = self.uncompressed_chunk_cache.as_ref() { |
|
398 | if let Some(cache) = self.uncompressed_chunk_cache.as_ref() { | |
399 | if let Some(chunk) = cache.borrow_mut().get(&rev) { |
|
399 | if let Some(chunk) = cache.borrow_mut().get(&rev) { | |
400 | return Ok(chunk.clone()); |
|
400 | return Ok(chunk.clone()); | |
401 | } |
|
401 | } | |
402 | } |
|
402 | } | |
403 | // TODO revlogv2 should check the compression mode |
|
403 | // TODO revlogv2 should check the compression mode | |
404 | let data = self.get_segment_for_revs(rev, rev)?.1; |
|
404 | let data = self.get_segment_for_revs(rev, rev)?.1; | |
405 | let uncompressed = self.decompress(&data).map_err(|e| { |
|
405 | let uncompressed = self.decompress(&data).map_err(|e| { | |
406 | HgError::abort( |
|
406 | HgError::abort( | |
407 | format!("revlog decompression error: {}", e), |
|
407 | format!("revlog decompression error: {}", e), | |
408 | exit_codes::ABORT, |
|
408 | exit_codes::ABORT, | |
409 | None, |
|
409 | None, | |
410 | ) |
|
410 | ) | |
411 | })?; |
|
411 | })?; | |
412 | let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned()); |
|
412 | let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned()); | |
413 | if let Some(cache) = self.uncompressed_chunk_cache.as_ref() { |
|
413 | if let Some(cache) = self.uncompressed_chunk_cache.as_ref() { | |
414 | cache.borrow_mut().insert(rev, uncompressed.clone()); |
|
414 | cache.borrow_mut().insert(rev, uncompressed.clone()); | |
415 | } |
|
415 | } | |
416 | Ok(uncompressed) |
|
416 | Ok(uncompressed) | |
417 | } |
|
417 | } | |
418 |
|
418 | |||
419 | /// Execute `func` within a read context for the data file, meaning that |
|
419 | /// Execute `func` within a read context for the data file, meaning that | |
420 | /// the read handle will be taken and discarded after the operation. |
|
420 | /// the read handle will be taken and discarded after the operation. | |
421 | pub fn with_read<R>( |
|
421 | pub fn with_read<R>( | |
422 | &self, |
|
422 | &self, | |
423 | func: impl FnOnce() -> Result<R, RevlogError>, |
|
423 | func: impl FnOnce() -> Result<R, RevlogError>, | |
424 | ) -> Result<R, RevlogError> { |
|
424 | ) -> Result<R, RevlogError> { | |
425 | self.enter_reading_context()?; |
|
425 | self.enter_reading_context()?; | |
426 | let res = func(); |
|
426 | let res = func(); | |
427 | self.exit_reading_context(); |
|
427 | self.exit_reading_context(); | |
428 | res.map_err(Into::into) |
|
428 | res.map_err(Into::into) | |
429 | } |
|
429 | } | |
430 |
|
430 | |||
431 | /// `pub` only for use in hg-cpython |
|
431 | /// `pub` only for use in hg-cpython | |
432 | #[doc(hidden)] |
|
432 | #[doc(hidden)] | |
433 | pub fn enter_reading_context(&self) -> Result<(), HgError> { |
|
433 | pub fn enter_reading_context(&self) -> Result<(), HgError> { | |
434 | if self.is_empty() { |
|
434 | if self.is_empty() { | |
435 | // Nothing to be read |
|
435 | // Nothing to be read | |
436 | return Ok(()); |
|
436 | return Ok(()); | |
437 | } |
|
437 | } | |
438 | if self.delayed_buffer.is_some() && self.is_inline() { |
|
438 | if self.delayed_buffer.is_some() && self.is_inline() { | |
439 | return Err(HgError::abort( |
|
439 | return Err(HgError::abort( | |
440 | "revlog with delayed write should not be inline", |
|
440 | "revlog with delayed write should not be inline", | |
441 | exit_codes::ABORT, |
|
441 | exit_codes::ABORT, | |
442 | None, |
|
442 | None, | |
443 | )); |
|
443 | )); | |
444 | } |
|
444 | } | |
445 | self.segment_file.get_read_handle()?; |
|
445 | self.segment_file.get_read_handle()?; | |
446 | Ok(()) |
|
446 | Ok(()) | |
447 | } |
|
447 | } | |
448 |
|
448 | |||
449 | /// `pub` only for use in hg-cpython |
|
449 | /// `pub` only for use in hg-cpython | |
450 | #[doc(hidden)] |
|
450 | #[doc(hidden)] | |
451 | pub fn exit_reading_context(&self) { |
|
451 | pub fn exit_reading_context(&self) { | |
452 | self.segment_file.exit_reading_context() |
|
452 | self.segment_file.exit_reading_context() | |
453 | } |
|
453 | } | |
454 |
|
454 | |||
455 | /// Fill the buffer returned by `get_buffer` with the possibly un-validated |
|
455 | /// Fill the buffer returned by `get_buffer` with the possibly un-validated | |
456 | /// raw text for a revision. It can be already validated if it comes |
|
456 | /// raw text for a revision. It can be already validated if it comes | |
457 | /// from the cache. |
|
457 | /// from the cache. | |
458 | pub fn raw_text<G, T>( |
|
458 | pub fn raw_text<G, T>( | |
459 | &self, |
|
459 | &self, | |
460 | rev: Revision, |
|
460 | rev: Revision, | |
461 | get_buffer: G, |
|
461 | get_buffer: G, | |
462 | ) -> Result<(), RevlogError> |
|
462 | ) -> Result<(), RevlogError> | |
463 | where |
|
463 | where | |
464 | G: FnOnce( |
|
464 | G: FnOnce( | |
465 | usize, |
|
465 | usize, | |
466 | &mut dyn FnMut( |
|
466 | &mut dyn FnMut( | |
467 | &mut dyn RevisionBuffer<Target = T>, |
|
467 | &mut dyn RevisionBuffer<Target = T>, | |
468 | ) -> Result<(), RevlogError>, |
|
468 | ) -> Result<(), RevlogError>, | |
469 | ) -> Result<(), RevlogError>, |
|
469 | ) -> Result<(), RevlogError>, | |
470 | { |
|
470 | { | |
471 | let entry = &self.get_entry(rev)?; |
|
471 | let entry = &self.get_entry(rev)?; | |
472 | let raw_size = entry.uncompressed_len(); |
|
472 | let raw_size = entry.uncompressed_len(); | |
473 | let mut mutex_guard = self |
|
473 | let mut mutex_guard = self | |
474 | .last_revision_cache |
|
474 | .last_revision_cache | |
475 | .lock() |
|
475 | .lock() | |
476 | .expect("lock should not be held"); |
|
476 | .expect("lock should not be held"); | |
477 | let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard { |
|
477 | let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard { | |
478 | Some((*rev, data.deref().as_ref())) |
|
478 | Some((*rev, data.deref().as_ref())) | |
479 | } else { |
|
479 | } else { | |
480 | None |
|
480 | None | |
481 | }; |
|
481 | }; | |
482 | if let Some(cache) = &self.uncompressed_chunk_cache { |
|
482 | if let Some(cache) = &self.uncompressed_chunk_cache { | |
483 | let cache = &mut cache.borrow_mut(); |
|
483 | let cache = &mut cache.borrow_mut(); | |
484 | if let Some(size) = raw_size { |
|
484 | if let Some(size) = raw_size { | |
485 | // Dynamically update the uncompressed_chunk_cache size to the |
|
485 | // Dynamically update the uncompressed_chunk_cache size to the | |
486 | // largest revision we've seen in this revlog. |
|
486 | // largest revision we've seen in this revlog. | |
487 | // Do it *before* restoration in case the current revision |
|
487 | // Do it *before* restoration in case the current revision | |
488 | // is the largest. |
|
488 | // is the largest. | |
489 | let factor = self |
|
489 | let factor = self | |
490 | .data_config |
|
490 | .data_config | |
491 | .uncompressed_cache_factor |
|
491 | .uncompressed_cache_factor | |
492 | .expect("cache should not exist without factor"); |
|
492 | .expect("cache should not exist without factor"); | |
493 | let candidate_size = (size as f64 * factor) as usize; |
|
493 | let candidate_size = (size as f64 * factor) as usize; | |
494 | let limiter_mut = cache.limiter_mut(); |
|
494 | let limiter_mut = cache.limiter_mut(); | |
495 | if candidate_size > limiter_mut.max_memory_usage() { |
|
495 | if candidate_size > limiter_mut.max_memory_usage() { | |
496 | std::mem::swap( |
|
496 | std::mem::swap( | |
497 | limiter_mut, |
|
497 | limiter_mut, | |
498 | &mut ByMemoryUsage::new(candidate_size), |
|
498 | &mut ByMemoryUsage::new(candidate_size), | |
499 | ); |
|
499 | ); | |
500 | } |
|
500 | } | |
501 | } |
|
501 | } | |
502 | } |
|
502 | } | |
503 | entry.rawdata(cached_rev, get_buffer)?; |
|
503 | entry.rawdata(cached_rev, get_buffer)?; | |
504 | // drop cache to save memory, the caller is expected to update |
|
504 | // drop cache to save memory, the caller is expected to update | |
505 | // the revision cache after validating the text |
|
505 | // the revision cache after validating the text | |
506 | mutex_guard.take(); |
|
506 | mutex_guard.take(); | |
507 | Ok(()) |
|
507 | Ok(()) | |
508 | } |
|
508 | } | |
509 |
|
509 | |||
510 | /// Only `pub` for `hg-cpython`. |
|
510 | /// Only `pub` for `hg-cpython`. | |
511 | /// Obtain decompressed raw data for the specified revisions that are |
|
511 | /// Obtain decompressed raw data for the specified revisions that are | |
512 | /// assumed to be in ascending order. |
|
512 | /// assumed to be in ascending order. | |
513 | /// |
|
513 | /// | |
514 | /// Returns a list with decompressed data for each requested revision. |
|
514 | /// Returns a list with decompressed data for each requested revision. | |
515 | #[doc(hidden)] |
|
515 | #[doc(hidden)] | |
516 | pub fn chunks( |
|
516 | pub fn chunks( | |
517 | &self, |
|
517 | &self, | |
518 | revs: Vec<Revision>, |
|
518 | revs: Vec<Revision>, | |
519 | target_size: Option<u64>, |
|
519 | target_size: Option<u64>, | |
520 | ) -> Result<Vec<Arc<[u8]>>, RevlogError> { |
|
520 | ) -> Result<Vec<Arc<[u8]>>, RevlogError> { | |
521 | if revs.is_empty() { |
|
521 | if revs.is_empty() { | |
522 | return Ok(vec![]); |
|
522 | return Ok(vec![]); | |
523 | } |
|
523 | } | |
524 | let mut fetched_revs = vec![]; |
|
524 | let mut fetched_revs = vec![]; | |
525 | let mut chunks = Vec::with_capacity(revs.len()); |
|
525 | let mut chunks = Vec::with_capacity(revs.len()); | |
526 |
|
526 | |||
527 | match self.uncompressed_chunk_cache.as_ref() { |
|
527 | match self.uncompressed_chunk_cache.as_ref() { | |
528 | Some(cache) => { |
|
528 | Some(cache) => { | |
529 | let mut cache = cache.borrow_mut(); |
|
529 | let mut cache = cache.borrow_mut(); | |
530 | for rev in revs.iter() { |
|
530 | for rev in revs.iter() { | |
531 | match cache.get(rev) { |
|
531 | match cache.get(rev) { | |
532 | Some(hit) => chunks.push((*rev, hit.to_owned())), |
|
532 | Some(hit) => chunks.push((*rev, hit.to_owned())), | |
533 | None => fetched_revs.push(*rev), |
|
533 | None => fetched_revs.push(*rev), | |
534 | } |
|
534 | } | |
535 | } |
|
535 | } | |
536 | } |
|
536 | } | |
537 | None => fetched_revs = revs, |
|
537 | None => fetched_revs = revs, | |
538 | } |
|
538 | } | |
539 |
|
539 | |||
540 | let already_cached = chunks.len(); |
|
540 | let already_cached = chunks.len(); | |
541 |
|
541 | |||
542 | let sliced_chunks = if fetched_revs.is_empty() { |
|
542 | let sliced_chunks = if fetched_revs.is_empty() { | |
543 | vec![] |
|
543 | vec![] | |
544 | } else if !self.data_config.with_sparse_read || self.is_inline() { |
|
544 | } else if !self.data_config.with_sparse_read || self.is_inline() { | |
545 | vec![fetched_revs] |
|
545 | vec![fetched_revs] | |
546 | } else { |
|
546 | } else { | |
547 | self.slice_chunk(&fetched_revs, target_size)? |
|
547 | self.slice_chunk(&fetched_revs, target_size)? | |
548 | }; |
|
548 | }; | |
549 |
|
549 | |||
550 | self.with_read(|| { |
|
550 | self.with_read(|| { | |
551 | for revs_chunk in sliced_chunks { |
|
551 | for revs_chunk in sliced_chunks { | |
552 | let first_rev = revs_chunk[0]; |
|
552 | let first_rev = revs_chunk[0]; | |
553 | // Skip trailing revisions with empty diff |
|
553 | // Skip trailing revisions with empty diff | |
554 | let last_rev_idx = revs_chunk |
|
554 | let last_rev_idx = revs_chunk | |
555 | .iter() |
|
555 | .iter() | |
556 | .rposition(|r| self.length(*r) != 0) |
|
556 | .rposition(|r| self.length(*r) != 0) | |
557 | .unwrap_or(revs_chunk.len() - 1); |
|
557 | .unwrap_or(revs_chunk.len() - 1); | |
558 |
|
558 | |||
559 | let last_rev = revs_chunk[last_rev_idx]; |
|
559 | let last_rev = revs_chunk[last_rev_idx]; | |
560 |
|
560 | |||
561 | let (offset, data) = |
|
561 | let (offset, data) = | |
562 | self.get_segment_for_revs(first_rev, last_rev)?; |
|
562 | self.get_segment_for_revs(first_rev, last_rev)?; | |
563 |
|
563 | |||
564 | let revs_chunk = &revs_chunk[..=last_rev_idx]; |
|
564 | let revs_chunk = &revs_chunk[..=last_rev_idx]; | |
565 |
|
565 | |||
566 | for rev in revs_chunk { |
|
566 | for rev in revs_chunk { | |
567 | let chunk_start = self.start(*rev); |
|
567 | let chunk_start = self.start(*rev); | |
568 | let chunk_length = self.length(*rev); |
|
568 | let chunk_length = self.length(*rev); | |
569 | // TODO revlogv2 should check the compression mode |
|
569 | // TODO revlogv2 should check the compression mode | |
570 | let bytes = &data[chunk_start - offset..][..chunk_length]; |
|
570 | let bytes = &data[chunk_start - offset..][..chunk_length]; | |
571 | let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE { |
|
571 | let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE { | |
572 | // If we're using `zstd`, we want to try a more |
|
572 | // If we're using `zstd`, we want to try a more | |
573 | // specialized decompression |
|
573 | // specialized decompression | |
574 | let entry = self.index.get_entry(*rev).unwrap(); |
|
574 | let entry = self.index.get_entry(*rev).unwrap(); | |
575 | let is_delta = entry |
|
575 | let is_delta = entry | |
576 | .base_revision_or_base_of_delta_chain() |
|
576 | .base_revision_or_base_of_delta_chain() | |
577 | != (*rev).into(); |
|
577 | != (*rev).into(); | |
578 | let uncompressed = uncompressed_zstd_data( |
|
578 | let uncompressed = uncompressed_zstd_data( | |
579 | bytes, |
|
579 | bytes, | |
580 | is_delta, |
|
580 | is_delta, | |
581 | entry.uncompressed_len(), |
|
581 | entry.uncompressed_len(), | |
582 | )?; |
|
582 | )?; | |
583 | Cow::Owned(uncompressed) |
|
583 | Cow::Owned(uncompressed) | |
584 | } else { |
|
584 | } else { | |
585 | // Otherwise just fallback to generic decompression. |
|
585 | // Otherwise just fallback to generic decompression. | |
586 | self.decompress(bytes)? |
|
586 | self.decompress(bytes)? | |
587 | }; |
|
587 | }; | |
588 |
|
588 | |||
589 | chunks.push((*rev, chunk.into())); |
|
589 | chunks.push((*rev, chunk.into())); | |
590 | } |
|
590 | } | |
591 | } |
|
591 | } | |
592 | Ok(()) |
|
592 | Ok(()) | |
593 | })?; |
|
593 | })?; | |
594 |
|
594 | |||
595 | if let Some(cache) = self.uncompressed_chunk_cache.as_ref() { |
|
595 | if let Some(cache) = self.uncompressed_chunk_cache.as_ref() { | |
596 | let mut cache = cache.borrow_mut(); |
|
596 | let mut cache = cache.borrow_mut(); | |
597 | for (rev, chunk) in chunks.iter().skip(already_cached) { |
|
597 | for (rev, chunk) in chunks.iter().skip(already_cached) { | |
598 | cache.insert(*rev, chunk.clone()); |
|
598 | cache.insert(*rev, chunk.clone()); | |
599 | } |
|
599 | } | |
600 | } |
|
600 | } | |
601 | // Use stable sort here since it's *mostly* sorted |
|
601 | // Use stable sort here since it's *mostly* sorted | |
602 | chunks.sort_by(|a, b| a.0.cmp(&b.0)); |
|
602 | chunks.sort_by(|a, b| a.0.cmp(&b.0)); | |
603 | Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect()) |
|
603 | Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect()) | |
604 | } |
|
604 | } | |
605 |
|
605 | |||
606 | /// Slice revs to reduce the amount of unrelated data to be read from disk. |
|
606 | /// Slice revs to reduce the amount of unrelated data to be read from disk. | |
607 | /// |
|
607 | /// | |
608 | /// ``revs`` is sliced into groups that should be read in one time. |
|
608 | /// ``revs`` is sliced into groups that should be read in one time. | |
609 | /// Assume that revs are sorted. |
|
609 | /// Assume that revs are sorted. | |
610 | /// |
|
610 | /// | |
611 | /// The initial chunk is sliced until the overall density |
|
611 | /// The initial chunk is sliced until the overall density | |
612 | /// (payload/chunks-span ratio) is above |
|
612 | /// (payload/chunks-span ratio) is above | |
613 | /// `revlog.data_config.sr_density_threshold`. |
|
613 | /// `revlog.data_config.sr_density_threshold`. | |
614 | /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped. |
|
614 | /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped. | |
615 | /// |
|
615 | /// | |
616 | /// If `target_size` is set, no chunk larger than `target_size` |
|
616 | /// If `target_size` is set, no chunk larger than `target_size` | |
617 | /// will be returned. |
|
617 | /// will be returned. | |
618 | /// For consistency with other slicing choices, this limit won't go lower |
|
618 | /// For consistency with other slicing choices, this limit won't go lower | |
619 | /// than `revlog.data_config.sr_min_gap_size`. |
|
619 | /// than `revlog.data_config.sr_min_gap_size`. | |
620 | /// |
|
620 | /// | |
621 | /// If individual revision chunks are larger than this limit, they will |
|
621 | /// If individual revision chunks are larger than this limit, they will | |
622 | /// still be raised individually. |
|
622 | /// still be raised individually. | |
623 | pub fn slice_chunk( |
|
623 | pub fn slice_chunk( | |
624 | &self, |
|
624 | &self, | |
625 | revs: &[Revision], |
|
625 | revs: &[Revision], | |
626 | target_size: Option<u64>, |
|
626 | target_size: Option<u64>, | |
627 | ) -> Result<Vec<Vec<Revision>>, RevlogError> { |
|
627 | ) -> Result<Vec<Vec<Revision>>, RevlogError> { | |
628 | let target_size = |
|
628 | let target_size = | |
629 | target_size.map(|size| size.max(self.data_config.sr_min_gap_size)); |
|
629 | target_size.map(|size| size.max(self.data_config.sr_min_gap_size)); | |
630 |
|
630 | |||
631 | let target_density = self.data_config.sr_density_threshold; |
|
631 | let target_density = self.data_config.sr_density_threshold; | |
632 | let min_gap_size = self.data_config.sr_min_gap_size as usize; |
|
632 | let min_gap_size = self.data_config.sr_min_gap_size as usize; | |
633 | let to_density = self.index.slice_chunk_to_density( |
|
633 | let to_density = self.index.slice_chunk_to_density( | |
634 | revs, |
|
634 | revs, | |
635 | target_density, |
|
635 | target_density, | |
636 | min_gap_size, |
|
636 | min_gap_size, | |
637 | ); |
|
637 | ); | |
638 |
|
638 | |||
639 | let mut sliced = vec![]; |
|
639 | let mut sliced = vec![]; | |
640 |
|
640 | |||
641 | for chunk in to_density { |
|
641 | for chunk in to_density { | |
642 | sliced.extend( |
|
642 | sliced.extend( | |
643 | self.slice_chunk_to_size(&chunk, target_size)? |
|
643 | self.slice_chunk_to_size(&chunk, target_size)? | |
644 | .into_iter() |
|
644 | .into_iter() | |
645 | .map(ToOwned::to_owned), |
|
645 | .map(ToOwned::to_owned), | |
646 | ); |
|
646 | ); | |
647 | } |
|
647 | } | |
648 |
|
648 | |||
649 | Ok(sliced) |
|
649 | Ok(sliced) | |
650 | } |
|
650 | } | |
651 |
|
651 | |||
652 | /// Slice revs to match the target size |
|
652 | /// Slice revs to match the target size | |
653 | /// |
|
653 | /// | |
654 | /// This is intended to be used on chunks that density slicing selected, |
|
654 | /// This is intended to be used on chunks that density slicing selected, | |
655 | /// but that are still too large compared to the read guarantee of revlogs. |
|
655 | /// but that are still too large compared to the read guarantee of revlogs. | |
656 | /// This might happen when the "minimal gap size" interrupted the slicing |
|
656 | /// This might happen when the "minimal gap size" interrupted the slicing | |
657 | /// or when chains are built in a way that create large blocks next to |
|
657 | /// or when chains are built in a way that create large blocks next to | |
658 | /// each other. |
|
658 | /// each other. | |
659 | fn slice_chunk_to_size<'a>( |
|
659 | fn slice_chunk_to_size<'a>( | |
660 | &self, |
|
660 | &self, | |
661 | revs: &'a [Revision], |
|
661 | revs: &'a [Revision], | |
662 | target_size: Option<u64>, |
|
662 | target_size: Option<u64>, | |
663 | ) -> Result<Vec<&'a [Revision]>, RevlogError> { |
|
663 | ) -> Result<Vec<&'a [Revision]>, RevlogError> { | |
664 | let mut start_data = self.start(revs[0]); |
|
664 | let mut start_data = self.start(revs[0]); | |
665 | let end_data = self.end(revs[revs.len() - 1]); |
|
665 | let end_data = self.end(revs[revs.len() - 1]); | |
666 | let full_span = end_data - start_data; |
|
666 | let full_span = end_data - start_data; | |
667 |
|
667 | |||
668 | let nothing_to_do = target_size |
|
668 | let nothing_to_do = target_size | |
669 | .map(|size| full_span <= size as usize) |
|
669 | .map(|size| full_span <= size as usize) | |
670 | .unwrap_or(true); |
|
670 | .unwrap_or(true); | |
671 |
|
671 | |||
672 | if nothing_to_do { |
|
672 | if nothing_to_do { | |
673 | return Ok(vec![revs]); |
|
673 | return Ok(vec![revs]); | |
674 | } |
|
674 | } | |
675 | let target_size = target_size.expect("target_size is set") as usize; |
|
675 | let target_size = target_size.expect("target_size is set") as usize; | |
676 |
|
676 | |||
677 | let mut start_rev_idx = 0; |
|
677 | let mut start_rev_idx = 0; | |
678 | let mut end_rev_idx = 1; |
|
678 | let mut end_rev_idx = 1; | |
679 | let mut chunks = vec![]; |
|
679 | let mut chunks = vec![]; | |
680 |
|
680 | |||
681 | for (idx, rev) in revs.iter().enumerate().skip(1) { |
|
681 | for (idx, rev) in revs.iter().enumerate().skip(1) { | |
682 | let span = self.end(*rev) - start_data; |
|
682 | let span = self.end(*rev) - start_data; | |
683 | let is_snapshot = self.is_snapshot(*rev)?; |
|
683 | let is_snapshot = self.is_snapshot(*rev)?; | |
684 | if span <= target_size && is_snapshot { |
|
684 | if span <= target_size && is_snapshot { | |
685 | end_rev_idx = idx + 1; |
|
685 | end_rev_idx = idx + 1; | |
686 | } else { |
|
686 | } else { | |
687 | let chunk = |
|
687 | let chunk = | |
688 | self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx)); |
|
688 | self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx)); | |
689 | if !chunk.is_empty() { |
|
689 | if !chunk.is_empty() { | |
690 | chunks.push(chunk); |
|
690 | chunks.push(chunk); | |
691 | } |
|
691 | } | |
692 | start_rev_idx = idx; |
|
692 | start_rev_idx = idx; | |
693 | start_data = self.start(*rev); |
|
693 | start_data = self.start(*rev); | |
694 | end_rev_idx = idx + 1; |
|
694 | end_rev_idx = idx + 1; | |
695 | } |
|
695 | } | |
696 | if !is_snapshot { |
|
696 | if !is_snapshot { | |
697 | break; |
|
697 | break; | |
698 | } |
|
698 | } | |
699 | } |
|
699 | } | |
700 |
|
700 | |||
701 | // For the others, we use binary slicing to quickly converge towards |
|
701 | // For the others, we use binary slicing to quickly converge towards | |
702 | // valid chunks (otherwise, we might end up looking for the start/end |
|
702 | // valid chunks (otherwise, we might end up looking for the start/end | |
703 | // of many revisions). This logic is not looking for the perfect |
|
703 | // of many revisions). This logic is not looking for the perfect | |
704 | // slicing point, it quickly converges towards valid chunks. |
|
704 | // slicing point, it quickly converges towards valid chunks. | |
705 | let number_of_items = revs.len(); |
|
705 | let number_of_items = revs.len(); | |
706 |
|
706 | |||
707 | while (end_data - start_data) > target_size { |
|
707 | while (end_data - start_data) > target_size { | |
708 | end_rev_idx = number_of_items; |
|
708 | end_rev_idx = number_of_items; | |
709 | if number_of_items - start_rev_idx <= 1 { |
|
709 | if number_of_items - start_rev_idx <= 1 { | |
710 | // Protect against individual chunks larger than the limit |
|
710 | // Protect against individual chunks larger than the limit | |
711 | break; |
|
711 | break; | |
712 | } |
|
712 | } | |
713 | let mut local_end_data = self.end(revs[end_rev_idx - 1]); |
|
713 | let mut local_end_data = self.end(revs[end_rev_idx - 1]); | |
714 | let mut span = local_end_data - start_data; |
|
714 | let mut span = local_end_data - start_data; | |
715 | while span > target_size { |
|
715 | while span > target_size { | |
716 | if end_rev_idx - start_rev_idx <= 1 { |
|
716 | if end_rev_idx - start_rev_idx <= 1 { | |
717 | // Protect against individual chunks larger than the limit |
|
717 | // Protect against individual chunks larger than the limit | |
718 | break; |
|
718 | break; | |
719 | } |
|
719 | } | |
720 | end_rev_idx -= (end_rev_idx - start_rev_idx) / 2; |
|
720 | end_rev_idx -= (end_rev_idx - start_rev_idx) / 2; | |
721 | local_end_data = self.end(revs[end_rev_idx - 1]); |
|
721 | local_end_data = self.end(revs[end_rev_idx - 1]); | |
722 | span = local_end_data - start_data; |
|
722 | span = local_end_data - start_data; | |
723 | } |
|
723 | } | |
724 | let chunk = |
|
724 | let chunk = | |
725 | self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx)); |
|
725 | self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx)); | |
726 | if !chunk.is_empty() { |
|
726 | if !chunk.is_empty() { | |
727 | chunks.push(chunk); |
|
727 | chunks.push(chunk); | |
728 | } |
|
728 | } | |
729 | start_rev_idx = end_rev_idx; |
|
729 | start_rev_idx = end_rev_idx; | |
730 | start_data = self.start(revs[start_rev_idx]); |
|
730 | start_data = self.start(revs[start_rev_idx]); | |
731 | } |
|
731 | } | |
732 |
|
732 | |||
733 | let chunk = self.trim_chunk(revs, start_rev_idx, None); |
|
733 | let chunk = self.trim_chunk(revs, start_rev_idx, None); | |
734 | if !chunk.is_empty() { |
|
734 | if !chunk.is_empty() { | |
735 | chunks.push(chunk); |
|
735 | chunks.push(chunk); | |
736 | } |
|
736 | } | |
737 |
|
737 | |||
738 | Ok(chunks) |
|
738 | Ok(chunks) | |
739 | } |
|
739 | } | |
740 |
|
740 | |||
741 | /// Returns `revs[startidx..endidx]` without empty trailing revs |
|
741 | /// Returns `revs[startidx..endidx]` without empty trailing revs | |
742 | fn trim_chunk<'a>( |
|
742 | fn trim_chunk<'a>( | |
743 | &self, |
|
743 | &self, | |
744 | revs: &'a [Revision], |
|
744 | revs: &'a [Revision], | |
745 | start_rev_idx: usize, |
|
745 | start_rev_idx: usize, | |
746 | end_rev_idx: Option<usize>, |
|
746 | end_rev_idx: Option<usize>, | |
747 | ) -> &'a [Revision] { |
|
747 | ) -> &'a [Revision] { | |
748 | let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len()); |
|
748 | let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len()); | |
749 |
|
749 | |||
750 | // If we have a non-empty delta candidate, there is nothing to trim |
|
750 | // If we have a non-empty delta candidate, there is nothing to trim | |
751 | if revs[end_rev_idx - 1].0 < self.len() as BaseRevision { |
|
751 | if revs[end_rev_idx - 1].0 < self.len() as BaseRevision { | |
752 | // Trim empty revs at the end, except the very first rev of a chain |
|
752 | // Trim empty revs at the end, except the very first rev of a chain | |
753 | while end_rev_idx > 1 |
|
753 | while end_rev_idx > 1 | |
754 | && end_rev_idx > start_rev_idx |
|
754 | && end_rev_idx > start_rev_idx | |
755 | && self.length(revs[end_rev_idx - 1]) == 0 |
|
755 | && self.length(revs[end_rev_idx - 1]) == 0 | |
756 | { |
|
756 | { | |
757 | end_rev_idx -= 1 |
|
757 | end_rev_idx -= 1 | |
758 | } |
|
758 | } | |
759 | } |
|
759 | } | |
760 |
|
760 | |||
761 | &revs[start_rev_idx..end_rev_idx] |
|
761 | &revs[start_rev_idx..end_rev_idx] | |
762 | } |
|
762 | } | |
763 |
|
763 | |||
764 | /// Check the hash of some given data against the recorded hash. |
|
764 | /// Check the hash of some given data against the recorded hash. | |
765 | pub fn check_hash( |
|
765 | pub fn check_hash( | |
766 | &self, |
|
766 | &self, | |
767 | p1: Revision, |
|
767 | p1: Revision, | |
768 | p2: Revision, |
|
768 | p2: Revision, | |
769 | expected: &[u8], |
|
769 | expected: &[u8], | |
770 | data: &[u8], |
|
770 | data: &[u8], | |
771 | ) -> bool { |
|
771 | ) -> bool { | |
772 | let e1 = self.index.get_entry(p1); |
|
772 | let e1 = self.index.get_entry(p1); | |
773 | let h1 = match e1 { |
|
773 | let h1 = match e1 { | |
774 | Some(ref entry) => entry.hash(), |
|
774 | Some(ref entry) => entry.hash(), | |
775 | None => &NULL_NODE, |
|
775 | None => &NULL_NODE, | |
776 | }; |
|
776 | }; | |
777 | let e2 = self.index.get_entry(p2); |
|
777 | let e2 = self.index.get_entry(p2); | |
778 | let h2 = match e2 { |
|
778 | let h2 = match e2 { | |
779 | Some(ref entry) => entry.hash(), |
|
779 | Some(ref entry) => entry.hash(), | |
780 | None => &NULL_NODE, |
|
780 | None => &NULL_NODE, | |
781 | }; |
|
781 | }; | |
782 |
|
782 | |||
783 | hash(data, h1.as_bytes(), h2.as_bytes()) == expected |
|
783 | hash(data, h1.as_bytes(), h2.as_bytes()) == expected | |
784 | } |
|
784 | } | |
785 |
|
785 | |||
786 | /// Returns whether we are currently in a [`Self::with_write`] context |
|
786 | /// Returns whether we are currently in a [`Self::with_write`] context | |
787 | pub fn is_writing(&self) -> bool { |
|
787 | pub fn is_writing(&self) -> bool { | |
788 | self.writing_handles.is_some() |
|
788 | self.writing_handles.is_some() | |
789 | } |
|
789 | } | |
790 |
|
790 | |||
791 | /// Open the revlog files for writing |
|
791 | /// Open the revlog files for writing | |
792 | /// |
|
792 | /// | |
793 | /// Adding content to a revlog should be done within this context. |
|
793 | /// Adding content to a revlog should be done within this context. | |
794 | /// TODO try using `BufRead` and `BufWrite` and see if performance improves |
|
794 | /// TODO try using `BufRead` and `BufWrite` and see if performance improves | |
795 | pub fn with_write<R>( |
|
795 | pub fn with_write<R>( | |
796 | &mut self, |
|
796 | &mut self, | |
797 | transaction: &mut impl Transaction, |
|
797 | transaction: &mut impl Transaction, | |
798 | data_end: Option<usize>, |
|
798 | data_end: Option<usize>, | |
799 | func: impl FnOnce() -> R, |
|
799 | func: impl FnOnce() -> R, | |
800 | ) -> Result<R, HgError> { |
|
800 | ) -> Result<R, HgError> { | |
801 | if self.is_writing() { |
|
801 | if self.is_writing() { | |
802 | return Ok(func()); |
|
802 | return Ok(func()); | |
803 | } |
|
803 | } | |
804 | self.enter_writing_context(data_end, transaction) |
|
804 | self.enter_writing_context(data_end, transaction) | |
805 |
. |
|
805 | .inspect_err(|_| { | |
806 | self.exit_writing_context(); |
|
806 | self.exit_writing_context(); | |
807 | e |
|
|||
808 | })?; |
|
807 | })?; | |
809 | let res = func(); |
|
808 | let res = func(); | |
810 | self.exit_writing_context(); |
|
809 | self.exit_writing_context(); | |
811 | Ok(res) |
|
810 | Ok(res) | |
812 | } |
|
811 | } | |
813 |
|
812 | |||
814 | /// `pub` only for use in hg-cpython |
|
813 | /// `pub` only for use in hg-cpython | |
815 | #[doc(hidden)] |
|
814 | #[doc(hidden)] | |
816 | pub fn exit_writing_context(&mut self) { |
|
815 | pub fn exit_writing_context(&mut self) { | |
817 | self.writing_handles.take(); |
|
816 | self.writing_handles.take(); | |
818 | self.segment_file.writing_handle.take(); |
|
817 | self.segment_file.writing_handle.take(); | |
819 | self.segment_file.reading_handle.take(); |
|
818 | self.segment_file.reading_handle.take(); | |
820 | } |
|
819 | } | |
821 |
|
820 | |||
822 | /// `pub` only for use in hg-cpython |
|
821 | /// `pub` only for use in hg-cpython | |
823 | #[doc(hidden)] |
|
822 | #[doc(hidden)] | |
824 | pub fn python_writing_handles(&self) -> Option<&WriteHandles> { |
|
823 | pub fn python_writing_handles(&self) -> Option<&WriteHandles> { | |
825 | self.writing_handles.as_ref() |
|
824 | self.writing_handles.as_ref() | |
826 | } |
|
825 | } | |
827 |
|
826 | |||
828 | /// `pub` only for use in hg-cpython |
|
827 | /// `pub` only for use in hg-cpython | |
829 | #[doc(hidden)] |
|
828 | #[doc(hidden)] | |
830 | pub fn enter_writing_context( |
|
829 | pub fn enter_writing_context( | |
831 | &mut self, |
|
830 | &mut self, | |
832 | data_end: Option<usize>, |
|
831 | data_end: Option<usize>, | |
833 | transaction: &mut impl Transaction, |
|
832 | transaction: &mut impl Transaction, | |
834 | ) -> Result<(), HgError> { |
|
833 | ) -> Result<(), HgError> { | |
835 | let data_size = if self.is_empty() { |
|
834 | let data_size = if self.is_empty() { | |
836 | 0 |
|
835 | 0 | |
837 | } else { |
|
836 | } else { | |
838 | self.end(Revision((self.len() - 1) as BaseRevision)) |
|
837 | self.end(Revision((self.len() - 1) as BaseRevision)) | |
839 | }; |
|
838 | }; | |
840 | let data_handle = if !self.is_inline() { |
|
839 | let data_handle = if !self.is_inline() { | |
841 | let data_handle = match self.vfs.open(&self.data_file) { |
|
840 | let data_handle = match self.vfs.open(&self.data_file) { | |
842 | Ok(mut f) => { |
|
841 | Ok(mut f) => { | |
843 | if let Some(end) = data_end { |
|
842 | if let Some(end) = data_end { | |
844 | f.seek(SeekFrom::Start(end as u64)) |
|
843 | f.seek(SeekFrom::Start(end as u64)) | |
845 | .when_reading_file(&self.data_file)?; |
|
844 | .when_reading_file(&self.data_file)?; | |
846 | } else { |
|
845 | } else { | |
847 | f.seek(SeekFrom::End(0)) |
|
846 | f.seek(SeekFrom::End(0)) | |
848 | .when_reading_file(&self.data_file)?; |
|
847 | .when_reading_file(&self.data_file)?; | |
849 | } |
|
848 | } | |
850 | f |
|
849 | f | |
851 | } |
|
850 | } | |
852 | Err(e) => match e { |
|
851 | Err(e) => match e { | |
853 | HgError::IoError { error, context } => { |
|
852 | HgError::IoError { error, context } => { | |
854 | if error.kind() != ErrorKind::NotFound { |
|
853 | if error.kind() != ErrorKind::NotFound { | |
855 | return Err(HgError::IoError { error, context }); |
|
854 | return Err(HgError::IoError { error, context }); | |
856 | } |
|
855 | } | |
857 | self.vfs.create(&self.data_file, true)? |
|
856 | self.vfs.create(&self.data_file, true)? | |
858 | } |
|
857 | } | |
859 | e => return Err(e), |
|
858 | e => return Err(e), | |
860 | }, |
|
859 | }, | |
861 | }; |
|
860 | }; | |
862 | transaction.add(&self.data_file, data_size); |
|
861 | transaction.add(&self.data_file, data_size); | |
863 | Some(FileHandle::from_file( |
|
862 | Some(FileHandle::from_file( | |
864 | data_handle, |
|
863 | data_handle, | |
865 | dyn_clone::clone_box(&*self.vfs), |
|
864 | dyn_clone::clone_box(&*self.vfs), | |
866 | &self.data_file, |
|
865 | &self.data_file, | |
867 | )) |
|
866 | )) | |
868 | } else { |
|
867 | } else { | |
869 | None |
|
868 | None | |
870 | }; |
|
869 | }; | |
871 | let index_size = self.len() * INDEX_ENTRY_SIZE; |
|
870 | let index_size = self.len() * INDEX_ENTRY_SIZE; | |
872 | let index_handle = self.index_write_handle()?; |
|
871 | let index_handle = self.index_write_handle()?; | |
873 | if self.is_inline() { |
|
872 | if self.is_inline() { | |
874 | transaction.add(&self.index_file, data_size); |
|
873 | transaction.add(&self.index_file, data_size); | |
875 | } else { |
|
874 | } else { | |
876 | transaction.add(&self.index_file, index_size); |
|
875 | transaction.add(&self.index_file, index_size); | |
877 | } |
|
876 | } | |
878 | self.writing_handles = Some(WriteHandles { |
|
877 | self.writing_handles = Some(WriteHandles { | |
879 | index_handle: index_handle.clone(), |
|
878 | index_handle: index_handle.clone(), | |
880 | data_handle: data_handle.clone(), |
|
879 | data_handle: data_handle.clone(), | |
881 | }); |
|
880 | }); | |
882 | *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() { |
|
881 | *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() { | |
883 | Some(index_handle) |
|
882 | Some(index_handle) | |
884 | } else { |
|
883 | } else { | |
885 | data_handle |
|
884 | data_handle | |
886 | }; |
|
885 | }; | |
887 | Ok(()) |
|
886 | Ok(()) | |
888 | } |
|
887 | } | |
889 |
|
888 | |||
890 | /// Get a write handle to the index, sought to the end of its data. |
|
889 | /// Get a write handle to the index, sought to the end of its data. | |
891 | fn index_write_handle(&self) -> Result<FileHandle, HgError> { |
|
890 | fn index_write_handle(&self) -> Result<FileHandle, HgError> { | |
892 | let res = if self.delayed_buffer.is_none() { |
|
891 | let res = if self.delayed_buffer.is_none() { | |
893 | if self.data_config.check_ambig { |
|
892 | if self.data_config.check_ambig { | |
894 | self.vfs.open_check_ambig(&self.index_file) |
|
893 | self.vfs.open_check_ambig(&self.index_file) | |
895 | } else { |
|
894 | } else { | |
896 | self.vfs.open(&self.index_file) |
|
895 | self.vfs.open(&self.index_file) | |
897 | } |
|
896 | } | |
898 | } else { |
|
897 | } else { | |
899 | self.vfs.open(&self.index_file) |
|
898 | self.vfs.open(&self.index_file) | |
900 | }; |
|
899 | }; | |
901 | match res { |
|
900 | match res { | |
902 | Ok(mut handle) => { |
|
901 | Ok(mut handle) => { | |
903 | handle |
|
902 | handle | |
904 | .seek(SeekFrom::End(0)) |
|
903 | .seek(SeekFrom::End(0)) | |
905 | .when_reading_file(&self.index_file)?; |
|
904 | .when_reading_file(&self.index_file)?; | |
906 | Ok( |
|
905 | Ok( | |
907 | if let Some(delayed_buffer) = self.delayed_buffer.as_ref() |
|
906 | if let Some(delayed_buffer) = self.delayed_buffer.as_ref() | |
908 | { |
|
907 | { | |
909 | FileHandle::from_file_delayed( |
|
908 | FileHandle::from_file_delayed( | |
910 | handle, |
|
909 | handle, | |
911 | dyn_clone::clone_box(&*self.vfs), |
|
910 | dyn_clone::clone_box(&*self.vfs), | |
912 | &self.index_file, |
|
911 | &self.index_file, | |
913 | delayed_buffer.clone(), |
|
912 | delayed_buffer.clone(), | |
914 | )? |
|
913 | )? | |
915 | } else { |
|
914 | } else { | |
916 | FileHandle::from_file( |
|
915 | FileHandle::from_file( | |
917 | handle, |
|
916 | handle, | |
918 | dyn_clone::clone_box(&*self.vfs), |
|
917 | dyn_clone::clone_box(&*self.vfs), | |
919 | &self.index_file, |
|
918 | &self.index_file, | |
920 | ) |
|
919 | ) | |
921 | }, |
|
920 | }, | |
922 | ) |
|
921 | ) | |
923 | } |
|
922 | } | |
924 | Err(e) => match e { |
|
923 | Err(e) => match e { | |
925 | HgError::IoError { error, context } => { |
|
924 | HgError::IoError { error, context } => { | |
926 | if error.kind() != ErrorKind::NotFound { |
|
925 | if error.kind() != ErrorKind::NotFound { | |
927 | return Err(HgError::IoError { error, context }); |
|
926 | return Err(HgError::IoError { error, context }); | |
928 | }; |
|
927 | }; | |
929 | if let Some(delayed_buffer) = self.delayed_buffer.as_ref() |
|
928 | if let Some(delayed_buffer) = self.delayed_buffer.as_ref() | |
930 | { |
|
929 | { | |
931 | FileHandle::new_delayed( |
|
930 | FileHandle::new_delayed( | |
932 | dyn_clone::clone_box(&*self.vfs), |
|
931 | dyn_clone::clone_box(&*self.vfs), | |
933 | &self.index_file, |
|
932 | &self.index_file, | |
934 | true, |
|
933 | true, | |
935 | delayed_buffer.clone(), |
|
934 | delayed_buffer.clone(), | |
936 | ) |
|
935 | ) | |
937 | } else { |
|
936 | } else { | |
938 | FileHandle::new( |
|
937 | FileHandle::new( | |
939 | dyn_clone::clone_box(&*self.vfs), |
|
938 | dyn_clone::clone_box(&*self.vfs), | |
940 | &self.index_file, |
|
939 | &self.index_file, | |
941 | true, |
|
940 | true, | |
942 | true, |
|
941 | true, | |
943 | ) |
|
942 | ) | |
944 | } |
|
943 | } | |
945 | } |
|
944 | } | |
946 | e => Err(e), |
|
945 | e => Err(e), | |
947 | }, |
|
946 | }, | |
948 | } |
|
947 | } | |
949 | } |
|
948 | } | |
950 |
|
949 | |||
951 | /// Split the data of an inline revlog into an index and a data file |
|
950 | /// Split the data of an inline revlog into an index and a data file | |
952 | pub fn split_inline( |
|
951 | pub fn split_inline( | |
953 | &mut self, |
|
952 | &mut self, | |
954 | header: IndexHeader, |
|
953 | header: IndexHeader, | |
955 | new_index_file_path: Option<PathBuf>, |
|
954 | new_index_file_path: Option<PathBuf>, | |
956 | ) -> Result<PathBuf, RevlogError> { |
|
955 | ) -> Result<PathBuf, RevlogError> { | |
957 | assert!(self.delayed_buffer.is_none()); |
|
956 | assert!(self.delayed_buffer.is_none()); | |
958 | let existing_handles = self.writing_handles.is_some(); |
|
957 | let existing_handles = self.writing_handles.is_some(); | |
959 | if let Some(handles) = &mut self.writing_handles { |
|
958 | if let Some(handles) = &mut self.writing_handles { | |
960 | handles.index_handle.flush()?; |
|
959 | handles.index_handle.flush()?; | |
961 | self.writing_handles.take(); |
|
960 | self.writing_handles.take(); | |
962 | self.segment_file.writing_handle.take(); |
|
961 | self.segment_file.writing_handle.take(); | |
963 | } |
|
962 | } | |
964 | let mut new_data_file_handle = |
|
963 | let mut new_data_file_handle = | |
965 | self.vfs.create(&self.data_file, true)?; |
|
964 | self.vfs.create(&self.data_file, true)?; | |
966 | // Drop any potential data, possibly redundant with the VFS impl. |
|
965 | // Drop any potential data, possibly redundant with the VFS impl. | |
967 | new_data_file_handle |
|
966 | new_data_file_handle | |
968 | .set_len(0) |
|
967 | .set_len(0) | |
969 | .when_writing_file(&self.data_file)?; |
|
968 | .when_writing_file(&self.data_file)?; | |
970 |
|
969 | |||
971 | self.with_read(|| -> Result<(), RevlogError> { |
|
970 | self.with_read(|| -> Result<(), RevlogError> { | |
972 | for r in 0..self.index.len() { |
|
971 | for r in 0..self.index.len() { | |
973 | let rev = Revision(r as BaseRevision); |
|
972 | let rev = Revision(r as BaseRevision); | |
974 | let rev_segment = self.get_segment_for_revs(rev, rev)?.1; |
|
973 | let rev_segment = self.get_segment_for_revs(rev, rev)?.1; | |
975 | new_data_file_handle |
|
974 | new_data_file_handle | |
976 | .write_all(&rev_segment) |
|
975 | .write_all(&rev_segment) | |
977 | .when_writing_file(&self.data_file)?; |
|
976 | .when_writing_file(&self.data_file)?; | |
978 | } |
|
977 | } | |
979 | new_data_file_handle |
|
978 | new_data_file_handle | |
980 | .flush() |
|
979 | .flush() | |
981 | .when_writing_file(&self.data_file)?; |
|
980 | .when_writing_file(&self.data_file)?; | |
982 | Ok(()) |
|
981 | Ok(()) | |
983 | })?; |
|
982 | })?; | |
984 |
|
983 | |||
985 | if let Some(index_path) = new_index_file_path { |
|
984 | if let Some(index_path) = new_index_file_path { | |
986 | self.index_file = index_path |
|
985 | self.index_file = index_path | |
987 | } |
|
986 | } | |
988 |
|
987 | |||
989 | let mut new_index_handle = self.vfs.create(&self.index_file, true)?; |
|
988 | let mut new_index_handle = self.vfs.create(&self.index_file, true)?; | |
990 | let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE); |
|
989 | let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE); | |
991 | for r in 0..self.len() { |
|
990 | for r in 0..self.len() { | |
992 | let rev = Revision(r as BaseRevision); |
|
991 | let rev = Revision(r as BaseRevision); | |
993 | let entry = self.index.entry_binary(rev).unwrap_or_else(|| { |
|
992 | let entry = self.index.entry_binary(rev).unwrap_or_else(|| { | |
994 | panic!( |
|
993 | panic!( | |
995 | "entry {} should exist in {}", |
|
994 | "entry {} should exist in {}", | |
996 | r, |
|
995 | r, | |
997 | self.index_file.display() |
|
996 | self.index_file.display() | |
998 | ) |
|
997 | ) | |
999 | }); |
|
998 | }); | |
1000 | if r == 0 { |
|
999 | if r == 0 { | |
1001 | new_data.extend(header.header_bytes); |
|
1000 | new_data.extend(header.header_bytes); | |
1002 | } |
|
1001 | } | |
1003 | new_data.extend(entry); |
|
1002 | new_data.extend(entry); | |
1004 | } |
|
1003 | } | |
1005 | new_index_handle |
|
1004 | new_index_handle | |
1006 | .write_all(&new_data) |
|
1005 | .write_all(&new_data) | |
1007 | .when_writing_file(&self.index_file)?; |
|
1006 | .when_writing_file(&self.index_file)?; | |
1008 | // Replace the index with a new one because the buffer contains inline |
|
1007 | // Replace the index with a new one because the buffer contains inline | |
1009 | // data |
|
1008 | // data | |
1010 | self.index = Index::new(Box::new(new_data), header)?; |
|
1009 | self.index = Index::new(Box::new(new_data), header)?; | |
1011 | self.inline = false; |
|
1010 | self.inline = false; | |
1012 |
|
1011 | |||
1013 | self.segment_file = RandomAccessFile::new( |
|
1012 | self.segment_file = RandomAccessFile::new( | |
1014 | dyn_clone::clone_box(&*self.vfs), |
|
1013 | dyn_clone::clone_box(&*self.vfs), | |
1015 | self.data_file.to_owned(), |
|
1014 | self.data_file.to_owned(), | |
1016 | ); |
|
1015 | ); | |
1017 | if existing_handles { |
|
1016 | if existing_handles { | |
1018 | // Switched from inline to conventional, reopen the index |
|
1017 | // Switched from inline to conventional, reopen the index | |
1019 | let new_data_handle = Some(FileHandle::from_file( |
|
1018 | let new_data_handle = Some(FileHandle::from_file( | |
1020 | new_data_file_handle, |
|
1019 | new_data_file_handle, | |
1021 | dyn_clone::clone_box(&*self.vfs), |
|
1020 | dyn_clone::clone_box(&*self.vfs), | |
1022 | &self.data_file, |
|
1021 | &self.data_file, | |
1023 | )); |
|
1022 | )); | |
1024 | self.writing_handles = Some(WriteHandles { |
|
1023 | self.writing_handles = Some(WriteHandles { | |
1025 | index_handle: self.index_write_handle()?, |
|
1024 | index_handle: self.index_write_handle()?, | |
1026 | data_handle: new_data_handle.clone(), |
|
1025 | data_handle: new_data_handle.clone(), | |
1027 | }); |
|
1026 | }); | |
1028 | *self.segment_file.writing_handle.borrow_mut() = new_data_handle; |
|
1027 | *self.segment_file.writing_handle.borrow_mut() = new_data_handle; | |
1029 | } |
|
1028 | } | |
1030 |
|
1029 | |||
1031 | Ok(self.index_file.to_owned()) |
|
1030 | Ok(self.index_file.to_owned()) | |
1032 | } |
|
1031 | } | |
1033 |
|
1032 | |||
1034 | /// Write a new entry to this revlog. |
|
1033 | /// Write a new entry to this revlog. | |
1035 | /// - `entry` is the index bytes |
|
1034 | /// - `entry` is the index bytes | |
1036 | /// - `header_and_data` is the compression header and the revision data |
|
1035 | /// - `header_and_data` is the compression header and the revision data | |
1037 | /// - `offset` is the position in the data file to write to |
|
1036 | /// - `offset` is the position in the data file to write to | |
1038 | /// - `index_end` is the overwritten position in the index in revlog-v2, |
|
1037 | /// - `index_end` is the overwritten position in the index in revlog-v2, | |
1039 | /// since the format may allow a rewrite of garbage data at the end. |
|
1038 | /// since the format may allow a rewrite of garbage data at the end. | |
1040 | /// - `data_end` is the overwritten position in the data-file in revlog-v2, |
|
1039 | /// - `data_end` is the overwritten position in the data-file in revlog-v2, | |
1041 | /// since the format may allow a rewrite of garbage data at the end. |
|
1040 | /// since the format may allow a rewrite of garbage data at the end. | |
1042 | /// |
|
1041 | /// | |
1043 | /// XXX Why do we have `data_end` *and* `offset`? Same question in Python |
|
1042 | /// XXX Why do we have `data_end` *and* `offset`? Same question in Python | |
1044 | pub fn write_entry( |
|
1043 | pub fn write_entry( | |
1045 | &mut self, |
|
1044 | &mut self, | |
1046 | mut transaction: impl Transaction, |
|
1045 | mut transaction: impl Transaction, | |
1047 | entry: &[u8], |
|
1046 | entry: &[u8], | |
1048 | header_and_data: (&[u8], &[u8]), |
|
1047 | header_and_data: (&[u8], &[u8]), | |
1049 | mut offset: usize, |
|
1048 | mut offset: usize, | |
1050 | index_end: Option<u64>, |
|
1049 | index_end: Option<u64>, | |
1051 | data_end: Option<u64>, |
|
1050 | data_end: Option<u64>, | |
1052 | ) -> Result<(u64, Option<u64>), HgError> { |
|
1051 | ) -> Result<(u64, Option<u64>), HgError> { | |
1053 | let current_revision = self.len() - 1; |
|
1052 | let current_revision = self.len() - 1; | |
1054 | let canonical_index_file = self.canonical_index_file(); |
|
1053 | let canonical_index_file = self.canonical_index_file(); | |
1055 |
|
1054 | |||
1056 | let is_inline = self.is_inline(); |
|
1055 | let is_inline = self.is_inline(); | |
1057 | let handles = match &mut self.writing_handles { |
|
1056 | let handles = match &mut self.writing_handles { | |
1058 | None => { |
|
1057 | None => { | |
1059 | return Err(HgError::abort( |
|
1058 | return Err(HgError::abort( | |
1060 | "adding revision outside of the `with_write` context", |
|
1059 | "adding revision outside of the `with_write` context", | |
1061 | exit_codes::ABORT, |
|
1060 | exit_codes::ABORT, | |
1062 | None, |
|
1061 | None, | |
1063 | )); |
|
1062 | )); | |
1064 | } |
|
1063 | } | |
1065 | Some(handles) => handles, |
|
1064 | Some(handles) => handles, | |
1066 | }; |
|
1065 | }; | |
1067 | let index_handle = &mut handles.index_handle; |
|
1066 | let index_handle = &mut handles.index_handle; | |
1068 | let data_handle = &mut handles.data_handle; |
|
1067 | let data_handle = &mut handles.data_handle; | |
1069 | if let Some(end) = index_end { |
|
1068 | if let Some(end) = index_end { | |
1070 | index_handle |
|
1069 | index_handle | |
1071 | .seek(SeekFrom::Start(end)) |
|
1070 | .seek(SeekFrom::Start(end)) | |
1072 | .when_reading_file(&self.index_file)?; |
|
1071 | .when_reading_file(&self.index_file)?; | |
1073 | } else { |
|
1072 | } else { | |
1074 | index_handle |
|
1073 | index_handle | |
1075 | .seek(SeekFrom::End(0)) |
|
1074 | .seek(SeekFrom::End(0)) | |
1076 | .when_reading_file(&self.index_file)?; |
|
1075 | .when_reading_file(&self.index_file)?; | |
1077 | } |
|
1076 | } | |
1078 | if let Some(data_handle) = data_handle { |
|
1077 | if let Some(data_handle) = data_handle { | |
1079 | if let Some(end) = data_end { |
|
1078 | if let Some(end) = data_end { | |
1080 | data_handle |
|
1079 | data_handle | |
1081 | .seek(SeekFrom::Start(end)) |
|
1080 | .seek(SeekFrom::Start(end)) | |
1082 | .when_reading_file(&self.data_file)?; |
|
1081 | .when_reading_file(&self.data_file)?; | |
1083 | } else { |
|
1082 | } else { | |
1084 | data_handle |
|
1083 | data_handle | |
1085 | .seek(SeekFrom::End(0)) |
|
1084 | .seek(SeekFrom::End(0)) | |
1086 | .when_reading_file(&self.data_file)?; |
|
1085 | .when_reading_file(&self.data_file)?; | |
1087 | } |
|
1086 | } | |
1088 | } |
|
1087 | } | |
1089 | let (header, data) = header_and_data; |
|
1088 | let (header, data) = header_and_data; | |
1090 |
|
1089 | |||
1091 | if !is_inline { |
|
1090 | if !is_inline { | |
1092 | transaction.add(&self.data_file, offset); |
|
1091 | transaction.add(&self.data_file, offset); | |
1093 | transaction |
|
1092 | transaction | |
1094 | .add(&canonical_index_file, current_revision * entry.len()); |
|
1093 | .add(&canonical_index_file, current_revision * entry.len()); | |
1095 | let data_handle = data_handle |
|
1094 | let data_handle = data_handle | |
1096 | .as_mut() |
|
1095 | .as_mut() | |
1097 | .expect("data handle should exist when not inline"); |
|
1096 | .expect("data handle should exist when not inline"); | |
1098 | if !header.is_empty() { |
|
1097 | if !header.is_empty() { | |
1099 | data_handle.write_all(header)?; |
|
1098 | data_handle.write_all(header)?; | |
1100 | } |
|
1099 | } | |
1101 | data_handle.write_all(data)?; |
|
1100 | data_handle.write_all(data)?; | |
1102 | match &mut self.delayed_buffer { |
|
1101 | match &mut self.delayed_buffer { | |
1103 | Some(buf) => { |
|
1102 | Some(buf) => { | |
1104 | buf.lock() |
|
1103 | buf.lock() | |
1105 | .expect("propagate the panic") |
|
1104 | .expect("propagate the panic") | |
1106 | .buffer |
|
1105 | .buffer | |
1107 | .write_all(entry) |
|
1106 | .write_all(entry) | |
1108 | .expect("write to delay buffer should succeed"); |
|
1107 | .expect("write to delay buffer should succeed"); | |
1109 | } |
|
1108 | } | |
1110 | None => index_handle.write_all(entry)?, |
|
1109 | None => index_handle.write_all(entry)?, | |
1111 | } |
|
1110 | } | |
1112 | } else if self.delayed_buffer.is_some() { |
|
1111 | } else if self.delayed_buffer.is_some() { | |
1113 | return Err(HgError::abort( |
|
1112 | return Err(HgError::abort( | |
1114 | "invalid delayed write on inline revlog", |
|
1113 | "invalid delayed write on inline revlog", | |
1115 | exit_codes::ABORT, |
|
1114 | exit_codes::ABORT, | |
1116 | None, |
|
1115 | None, | |
1117 | )); |
|
1116 | )); | |
1118 | } else { |
|
1117 | } else { | |
1119 | offset += current_revision * entry.len(); |
|
1118 | offset += current_revision * entry.len(); | |
1120 | transaction.add(&canonical_index_file, offset); |
|
1119 | transaction.add(&canonical_index_file, offset); | |
1121 | index_handle.write_all(entry)?; |
|
1120 | index_handle.write_all(entry)?; | |
1122 | index_handle.write_all(header)?; |
|
1121 | index_handle.write_all(header)?; | |
1123 | index_handle.write_all(data)?; |
|
1122 | index_handle.write_all(data)?; | |
1124 | } |
|
1123 | } | |
1125 | let data_position = match data_handle { |
|
1124 | let data_position = match data_handle { | |
1126 | Some(h) => Some(h.position()?), |
|
1125 | Some(h) => Some(h.position()?), | |
1127 | None => None, |
|
1126 | None => None, | |
1128 | }; |
|
1127 | }; | |
1129 | Ok((index_handle.position()?, data_position)) |
|
1128 | Ok((index_handle.position()?, data_position)) | |
1130 | } |
|
1129 | } | |
1131 |
|
1130 | |||
1132 | /// Return the real target index file and not the temporary when diverting |
|
1131 | /// Return the real target index file and not the temporary when diverting | |
1133 | pub fn canonical_index_file(&self) -> PathBuf { |
|
1132 | pub fn canonical_index_file(&self) -> PathBuf { | |
1134 | self.original_index_file |
|
1133 | self.original_index_file | |
1135 | .as_ref() |
|
1134 | .as_ref() | |
1136 | .map(ToOwned::to_owned) |
|
1135 | .map(ToOwned::to_owned) | |
1137 | .unwrap_or_else(|| self.index_file.to_owned()) |
|
1136 | .unwrap_or_else(|| self.index_file.to_owned()) | |
1138 | } |
|
1137 | } | |
1139 |
|
1138 | |||
1140 | /// Return the path to the diverted index |
|
1139 | /// Return the path to the diverted index | |
1141 | fn diverted_index(&self) -> PathBuf { |
|
1140 | fn diverted_index(&self) -> PathBuf { | |
1142 | self.index_file.with_extension("i.a") |
|
1141 | self.index_file.with_extension("i.a") | |
1143 | } |
|
1142 | } | |
1144 |
|
1143 | |||
1145 | /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context |
|
1144 | /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context | |
1146 | pub fn is_open(&self) -> bool { |
|
1145 | pub fn is_open(&self) -> bool { | |
1147 | self.segment_file.is_open() |
|
1146 | self.segment_file.is_open() | |
1148 | } |
|
1147 | } | |
1149 |
|
1148 | |||
1150 | /// Set this revlog to delay its writes to a buffer |
|
1149 | /// Set this revlog to delay its writes to a buffer | |
1151 | pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> { |
|
1150 | pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> { | |
1152 | assert!(!self.is_open()); |
|
1151 | assert!(!self.is_open()); | |
1153 | if self.is_inline() { |
|
1152 | if self.is_inline() { | |
1154 | return Err(HgError::abort( |
|
1153 | return Err(HgError::abort( | |
1155 | "revlog with delayed write should not be inline", |
|
1154 | "revlog with delayed write should not be inline", | |
1156 | exit_codes::ABORT, |
|
1155 | exit_codes::ABORT, | |
1157 | None, |
|
1156 | None, | |
1158 | )); |
|
1157 | )); | |
1159 | } |
|
1158 | } | |
1160 | if self.delayed_buffer.is_some() || self.original_index_file.is_some() |
|
1159 | if self.delayed_buffer.is_some() || self.original_index_file.is_some() | |
1161 | { |
|
1160 | { | |
1162 | // Delay or divert already happening |
|
1161 | // Delay or divert already happening | |
1163 | return Ok(None); |
|
1162 | return Ok(None); | |
1164 | } |
|
1163 | } | |
1165 | if self.is_empty() { |
|
1164 | if self.is_empty() { | |
1166 | self.original_index_file = Some(self.index_file.to_owned()); |
|
1165 | self.original_index_file = Some(self.index_file.to_owned()); | |
1167 | self.index_file = self.diverted_index(); |
|
1166 | self.index_file = self.diverted_index(); | |
1168 | if self.vfs.exists(&self.index_file) { |
|
1167 | if self.vfs.exists(&self.index_file) { | |
1169 | self.vfs.unlink(&self.index_file)?; |
|
1168 | self.vfs.unlink(&self.index_file)?; | |
1170 | } |
|
1169 | } | |
1171 | Ok(Some(self.index_file.to_owned())) |
|
1170 | Ok(Some(self.index_file.to_owned())) | |
1172 | } else { |
|
1171 | } else { | |
1173 | self.delayed_buffer = |
|
1172 | self.delayed_buffer = | |
1174 | Some(Arc::new(Mutex::new(DelayedBuffer::default()))); |
|
1173 | Some(Arc::new(Mutex::new(DelayedBuffer::default()))); | |
1175 | Ok(None) |
|
1174 | Ok(None) | |
1176 | } |
|
1175 | } | |
1177 | } |
|
1176 | } | |
1178 |
|
1177 | |||
1179 | /// Write the pending data (in memory) if any to the diverted index file |
|
1178 | /// Write the pending data (in memory) if any to the diverted index file | |
1180 | /// (on disk temporary file) |
|
1179 | /// (on disk temporary file) | |
1181 | pub fn write_pending( |
|
1180 | pub fn write_pending( | |
1182 | &mut self, |
|
1181 | &mut self, | |
1183 | ) -> Result<(Option<PathBuf>, bool), HgError> { |
|
1182 | ) -> Result<(Option<PathBuf>, bool), HgError> { | |
1184 | assert!(!self.is_open()); |
|
1183 | assert!(!self.is_open()); | |
1185 | if self.is_inline() { |
|
1184 | if self.is_inline() { | |
1186 | return Err(HgError::abort( |
|
1185 | return Err(HgError::abort( | |
1187 | "revlog with delayed write should not be inline", |
|
1186 | "revlog with delayed write should not be inline", | |
1188 | exit_codes::ABORT, |
|
1187 | exit_codes::ABORT, | |
1189 | None, |
|
1188 | None, | |
1190 | )); |
|
1189 | )); | |
1191 | } |
|
1190 | } | |
1192 | if self.original_index_file.is_some() { |
|
1191 | if self.original_index_file.is_some() { | |
1193 | return Ok((None, true)); |
|
1192 | return Ok((None, true)); | |
1194 | } |
|
1193 | } | |
1195 | let mut any_pending = false; |
|
1194 | let mut any_pending = false; | |
1196 | let pending_index_file = self.diverted_index(); |
|
1195 | let pending_index_file = self.diverted_index(); | |
1197 | if self.vfs.exists(&pending_index_file) { |
|
1196 | if self.vfs.exists(&pending_index_file) { | |
1198 | self.vfs.unlink(&pending_index_file)?; |
|
1197 | self.vfs.unlink(&pending_index_file)?; | |
1199 | } |
|
1198 | } | |
1200 | self.vfs.copy(&self.index_file, &pending_index_file)?; |
|
1199 | self.vfs.copy(&self.index_file, &pending_index_file)?; | |
1201 | if let Some(delayed_buffer) = self.delayed_buffer.take() { |
|
1200 | if let Some(delayed_buffer) = self.delayed_buffer.take() { | |
1202 | let mut index_file_handle = self.vfs.open(&pending_index_file)?; |
|
1201 | let mut index_file_handle = self.vfs.open(&pending_index_file)?; | |
1203 | index_file_handle |
|
1202 | index_file_handle | |
1204 | .seek(SeekFrom::End(0)) |
|
1203 | .seek(SeekFrom::End(0)) | |
1205 | .when_writing_file(&pending_index_file)?; |
|
1204 | .when_writing_file(&pending_index_file)?; | |
1206 | let delayed_data = |
|
1205 | let delayed_data = | |
1207 | &delayed_buffer.lock().expect("propagate the panic").buffer; |
|
1206 | &delayed_buffer.lock().expect("propagate the panic").buffer; | |
1208 | index_file_handle |
|
1207 | index_file_handle | |
1209 | .write_all(delayed_data) |
|
1208 | .write_all(delayed_data) | |
1210 | .when_writing_file(&pending_index_file)?; |
|
1209 | .when_writing_file(&pending_index_file)?; | |
1211 | any_pending = true; |
|
1210 | any_pending = true; | |
1212 | } |
|
1211 | } | |
1213 | self.original_index_file = Some(self.index_file.to_owned()); |
|
1212 | self.original_index_file = Some(self.index_file.to_owned()); | |
1214 | self.index_file = pending_index_file; |
|
1213 | self.index_file = pending_index_file; | |
1215 | Ok((Some(self.index_file.to_owned()), any_pending)) |
|
1214 | Ok((Some(self.index_file.to_owned()), any_pending)) | |
1216 | } |
|
1215 | } | |
1217 |
|
1216 | |||
1218 | /// Overwrite the canonical file with the diverted file, or write out the |
|
1217 | /// Overwrite the canonical file with the diverted file, or write out the | |
1219 | /// delayed buffer. |
|
1218 | /// delayed buffer. | |
1220 | /// Returns an error if the revlog is neither diverted nor delayed. |
|
1219 | /// Returns an error if the revlog is neither diverted nor delayed. | |
1221 | pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> { |
|
1220 | pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> { | |
1222 | assert!(!self.is_open()); |
|
1221 | assert!(!self.is_open()); | |
1223 | if self.is_inline() { |
|
1222 | if self.is_inline() { | |
1224 | return Err(HgError::abort( |
|
1223 | return Err(HgError::abort( | |
1225 | "revlog with delayed write should not be inline", |
|
1224 | "revlog with delayed write should not be inline", | |
1226 | exit_codes::ABORT, |
|
1225 | exit_codes::ABORT, | |
1227 | None, |
|
1226 | None, | |
1228 | )); |
|
1227 | )); | |
1229 | } |
|
1228 | } | |
1230 | match ( |
|
1229 | match ( | |
1231 | self.delayed_buffer.as_ref(), |
|
1230 | self.delayed_buffer.as_ref(), | |
1232 | self.original_index_file.as_ref(), |
|
1231 | self.original_index_file.as_ref(), | |
1233 | ) { |
|
1232 | ) { | |
1234 | (None, None) => { |
|
1233 | (None, None) => { | |
1235 | return Err(HgError::abort( |
|
1234 | return Err(HgError::abort( | |
1236 | "neither delay nor divert found on this revlog", |
|
1235 | "neither delay nor divert found on this revlog", | |
1237 | exit_codes::ABORT, |
|
1236 | exit_codes::ABORT, | |
1238 | None, |
|
1237 | None, | |
1239 | )); |
|
1238 | )); | |
1240 | } |
|
1239 | } | |
1241 | (Some(delay), None) => { |
|
1240 | (Some(delay), None) => { | |
1242 | let mut index_file_handle = self.vfs.open(&self.index_file)?; |
|
1241 | let mut index_file_handle = self.vfs.open(&self.index_file)?; | |
1243 | index_file_handle |
|
1242 | index_file_handle | |
1244 | .seek(SeekFrom::End(0)) |
|
1243 | .seek(SeekFrom::End(0)) | |
1245 | .when_writing_file(&self.index_file)?; |
|
1244 | .when_writing_file(&self.index_file)?; | |
1246 | index_file_handle |
|
1245 | index_file_handle | |
1247 | .write_all( |
|
1246 | .write_all( | |
1248 | &delay.lock().expect("propagate the panic").buffer, |
|
1247 | &delay.lock().expect("propagate the panic").buffer, | |
1249 | ) |
|
1248 | ) | |
1250 | .when_writing_file(&self.index_file)?; |
|
1249 | .when_writing_file(&self.index_file)?; | |
1251 | self.delayed_buffer = None; |
|
1250 | self.delayed_buffer = None; | |
1252 | } |
|
1251 | } | |
1253 | (None, Some(divert)) => { |
|
1252 | (None, Some(divert)) => { | |
1254 | if self.vfs.exists(&self.index_file) { |
|
1253 | if self.vfs.exists(&self.index_file) { | |
1255 | self.vfs.rename(&self.index_file, divert, true)?; |
|
1254 | self.vfs.rename(&self.index_file, divert, true)?; | |
1256 | } |
|
1255 | } | |
1257 | divert.clone_into(&mut self.index_file); |
|
1256 | divert.clone_into(&mut self.index_file); | |
1258 | self.original_index_file = None; |
|
1257 | self.original_index_file = None; | |
1259 | } |
|
1258 | } | |
1260 | (Some(_), Some(_)) => unreachable!( |
|
1259 | (Some(_), Some(_)) => unreachable!( | |
1261 | "{} is in an inconsistent state of both delay and divert", |
|
1260 | "{} is in an inconsistent state of both delay and divert", | |
1262 | self.canonical_index_file().display(), |
|
1261 | self.canonical_index_file().display(), | |
1263 | ), |
|
1262 | ), | |
1264 | } |
|
1263 | } | |
1265 | Ok(self.canonical_index_file()) |
|
1264 | Ok(self.canonical_index_file()) | |
1266 | } |
|
1265 | } | |
1267 |
|
1266 | |||
1268 | /// `pub` only for `hg-cpython`. This is made a different method than |
|
1267 | /// `pub` only for `hg-cpython`. This is made a different method than | |
1269 | /// [`Revlog::index`] in case there is a different invariant that pops up |
|
1268 | /// [`Revlog::index`] in case there is a different invariant that pops up | |
1270 | /// later. |
|
1269 | /// later. | |
1271 | #[doc(hidden)] |
|
1270 | #[doc(hidden)] | |
1272 | pub fn shared_index(&self) -> &Index { |
|
1271 | pub fn shared_index(&self) -> &Index { | |
1273 | &self.index |
|
1272 | &self.index | |
1274 | } |
|
1273 | } | |
1275 | } |
|
1274 | } | |
1276 |
|
1275 | |||
1277 | /// The use of a [`Refcell`] assumes that a given revlog will only |
|
1276 | /// The use of a [`Refcell`] assumes that a given revlog will only | |
1278 | /// be accessed (read or write) by a single thread. |
|
1277 | /// be accessed (read or write) by a single thread. | |
1279 | type UncompressedChunkCache = |
|
1278 | type UncompressedChunkCache = | |
1280 | RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>; |
|
1279 | RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>; | |
1281 |
|
1280 | |||
1282 | /// The node, revision and data for the last revision we've seen. Speeds up |
|
1281 | /// The node, revision and data for the last revision we've seen. Speeds up | |
1283 | /// a lot of sequential operations of the revlog. |
|
1282 | /// a lot of sequential operations of the revlog. | |
1284 | /// |
|
1283 | /// | |
1285 | /// The data is not just bytes since it can come from Python and we want to |
|
1284 | /// The data is not just bytes since it can come from Python and we want to | |
1286 | /// avoid copies if possible. |
|
1285 | /// avoid copies if possible. | |
1287 | type SingleRevisionCache = |
|
1286 | type SingleRevisionCache = | |
1288 | (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>); |
|
1287 | (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>); | |
1289 |
|
1288 | |||
1290 | /// A way of progressively filling a buffer with revision data, then return |
|
1289 | /// A way of progressively filling a buffer with revision data, then return | |
1291 | /// that buffer. Used to abstract away Python-allocated code to reduce copying |
|
1290 | /// that buffer. Used to abstract away Python-allocated code to reduce copying | |
1292 | /// for performance reasons. |
|
1291 | /// for performance reasons. | |
1293 | pub trait RevisionBuffer { |
|
1292 | pub trait RevisionBuffer { | |
1294 | /// The owned buffer type to return |
|
1293 | /// The owned buffer type to return | |
1295 | type Target; |
|
1294 | type Target; | |
1296 | /// Copies the slice into the buffer |
|
1295 | /// Copies the slice into the buffer | |
1297 | fn extend_from_slice(&mut self, slice: &[u8]); |
|
1296 | fn extend_from_slice(&mut self, slice: &[u8]); | |
1298 | /// Returns the now finished owned buffer |
|
1297 | /// Returns the now finished owned buffer | |
1299 | fn finish(self) -> Self::Target; |
|
1298 | fn finish(self) -> Self::Target; | |
1300 | } |
|
1299 | } | |
1301 |
|
1300 | |||
1302 | /// A simple vec-based buffer. This is uselessly complicated for the pure Rust |
|
1301 | /// A simple vec-based buffer. This is uselessly complicated for the pure Rust | |
1303 | /// case, but it's the price to pay for Python compatibility. |
|
1302 | /// case, but it's the price to pay for Python compatibility. | |
1304 | #[derive(Debug)] |
|
1303 | #[derive(Debug)] | |
1305 | pub(super) struct CoreRevisionBuffer { |
|
1304 | pub(super) struct CoreRevisionBuffer { | |
1306 | buf: Vec<u8>, |
|
1305 | buf: Vec<u8>, | |
1307 | } |
|
1306 | } | |
1308 |
|
1307 | |||
1309 | impl CoreRevisionBuffer { |
|
1308 | impl CoreRevisionBuffer { | |
1310 | pub fn new() -> Self { |
|
1309 | pub fn new() -> Self { | |
1311 | Self { buf: vec![] } |
|
1310 | Self { buf: vec![] } | |
1312 | } |
|
1311 | } | |
1313 |
|
1312 | |||
1314 | #[inline] |
|
1313 | #[inline] | |
1315 | pub fn resize(&mut self, size: usize) { |
|
1314 | pub fn resize(&mut self, size: usize) { | |
1316 | self.buf.reserve_exact(size - self.buf.capacity()); |
|
1315 | self.buf.reserve_exact(size - self.buf.capacity()); | |
1317 | } |
|
1316 | } | |
1318 | } |
|
1317 | } | |
1319 |
|
1318 | |||
1320 | impl RevisionBuffer for CoreRevisionBuffer { |
|
1319 | impl RevisionBuffer for CoreRevisionBuffer { | |
1321 | type Target = Vec<u8>; |
|
1320 | type Target = Vec<u8>; | |
1322 |
|
1321 | |||
1323 | #[inline] |
|
1322 | #[inline] | |
1324 | fn extend_from_slice(&mut self, slice: &[u8]) { |
|
1323 | fn extend_from_slice(&mut self, slice: &[u8]) { | |
1325 | self.buf.extend_from_slice(slice); |
|
1324 | self.buf.extend_from_slice(slice); | |
1326 | } |
|
1325 | } | |
1327 |
|
1326 | |||
1328 | #[inline] |
|
1327 | #[inline] | |
1329 | fn finish(self) -> Self::Target { |
|
1328 | fn finish(self) -> Self::Target { | |
1330 | self.buf |
|
1329 | self.buf | |
1331 | } |
|
1330 | } | |
1332 | } |
|
1331 | } | |
1333 |
|
1332 | |||
1334 | /// Calculate the hash of a revision given its data and its parents. |
|
1333 | /// Calculate the hash of a revision given its data and its parents. | |
1335 | pub fn hash( |
|
1334 | pub fn hash( | |
1336 | data: &[u8], |
|
1335 | data: &[u8], | |
1337 | p1_hash: &[u8], |
|
1336 | p1_hash: &[u8], | |
1338 | p2_hash: &[u8], |
|
1337 | p2_hash: &[u8], | |
1339 | ) -> [u8; NODE_BYTES_LENGTH] { |
|
1338 | ) -> [u8; NODE_BYTES_LENGTH] { | |
1340 | let mut hasher = Sha1::new(); |
|
1339 | let mut hasher = Sha1::new(); | |
1341 | let (a, b) = (p1_hash, p2_hash); |
|
1340 | let (a, b) = (p1_hash, p2_hash); | |
1342 | if a > b { |
|
1341 | if a > b { | |
1343 | hasher.update(b); |
|
1342 | hasher.update(b); | |
1344 | hasher.update(a); |
|
1343 | hasher.update(a); | |
1345 | } else { |
|
1344 | } else { | |
1346 | hasher.update(a); |
|
1345 | hasher.update(a); | |
1347 | hasher.update(b); |
|
1346 | hasher.update(b); | |
1348 | } |
|
1347 | } | |
1349 | hasher.update(data); |
|
1348 | hasher.update(data); | |
1350 | *hasher.finalize().as_ref() |
|
1349 | *hasher.finalize().as_ref() | |
1351 | } |
|
1350 | } |
@@ -1,121 +1,121 | |||||
1 | // ref_sharing.rs |
|
1 | // ref_sharing.rs | |
2 | // |
|
2 | // | |
3 | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> |
|
3 | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |
4 | // |
|
4 | // | |
5 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
|
5 | // Permission is hereby granted, free of charge, to any person obtaining a copy | |
6 | // of this software and associated documentation files (the "Software"), to |
|
6 | // of this software and associated documentation files (the "Software"), to | |
7 | // deal in the Software without restriction, including without limitation the |
|
7 | // deal in the Software without restriction, including without limitation the | |
8 | // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
|
8 | // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
9 | // sell copies of the Software, and to permit persons to whom the Software is |
|
9 | // sell copies of the Software, and to permit persons to whom the Software is | |
10 | // furnished to do so, subject to the following conditions: |
|
10 | // furnished to do so, subject to the following conditions: | |
11 | // |
|
11 | // | |
12 | // The above copyright notice and this permission notice shall be included in |
|
12 | // The above copyright notice and this permission notice shall be included in | |
13 | // all copies or substantial portions of the Software. |
|
13 | // all copies or substantial portions of the Software. | |
14 | // |
|
14 | // | |
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
|
20 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | // IN THE SOFTWARE. |
|
21 | // IN THE SOFTWARE. | |
22 |
|
22 | |||
23 | //! Macros for use in the `hg-cpython` bridge library. |
|
23 | //! Macros for use in the `hg-cpython` bridge library. | |
24 |
|
24 | |||
25 | /// Defines a `py_class!` that acts as a Python iterator over a Rust iterator. |
|
25 | /// Defines a `py_class!` that acts as a Python iterator over a Rust iterator. | |
26 | /// |
|
26 | /// | |
27 | /// TODO: this is a bit awkward to use, and a better (more complicated) |
|
27 | /// TODO: this is a bit awkward to use, and a better (more complicated) | |
28 | /// procedural macro would simplify the interface a lot. |
|
28 | /// procedural macro would simplify the interface a lot. | |
29 | /// |
|
29 | /// | |
30 | /// # Parameters |
|
30 | /// # Parameters | |
31 | /// |
|
31 | /// | |
32 | /// * `$name` is the identifier to give to the resulting Rust struct. |
|
32 | /// * `$name` is the identifier to give to the resulting Rust struct. | |
33 | /// * `$leaked` corresponds to `UnsafePyLeaked` in the matching `@shared data` |
|
33 | /// * `$leaked` corresponds to `UnsafePyLeaked` in the matching `@shared data` | |
34 | /// declaration. |
|
34 | /// declaration. | |
35 | /// * `$iterator_type` is the type of the Rust iterator. |
|
35 | /// * `$iterator_type` is the type of the Rust iterator. | |
36 | /// * `$success_func` is a function for processing the Rust `(key, value)` |
|
36 | /// * `$success_func` is a function for processing the Rust `(key, value)` | |
37 | /// tuple on iteration success, turning it into something Python understands. |
|
37 | /// tuple on iteration success, turning it into something Python understands. | |
38 | /// * `$success_func` is the return type of `$success_func` |
|
38 | /// * `$success_func` is the return type of `$success_func` | |
39 | /// |
|
39 | /// | |
40 | /// # Safety |
|
40 | /// # Safety | |
41 | /// |
|
41 | /// | |
42 | /// `$success_func` may take a reference, but it's lifetime may be cheated. |
|
42 | /// `$success_func` may take a reference, but it's lifetime may be cheated. | |
43 | /// Do not copy it out of the function call. |
|
43 | /// Do not copy it out of the function call. | |
44 | /// |
|
44 | /// | |
45 | /// # Example |
|
45 | /// # Example | |
46 | /// |
|
46 | /// | |
47 | /// ``` |
|
47 | /// ``` | |
48 | /// struct MyStruct { |
|
48 | /// struct MyStruct { | |
49 | /// inner: HashMap<Vec<u8>, Vec<u8>>; |
|
49 | /// inner: HashMap<Vec<u8>, Vec<u8>>; | |
50 | /// } |
|
50 | /// } | |
51 | /// |
|
51 | /// | |
52 | /// py_class!(pub class MyType |py| { |
|
52 | /// py_class!(pub class MyType |py| { | |
53 | /// @shared data inner: MyStruct; |
|
53 | /// @shared data inner: MyStruct; | |
54 | /// |
|
54 | /// | |
55 | /// def __iter__(&self) -> PyResult<MyTypeItemsIterator> { |
|
55 | /// def __iter__(&self) -> PyResult<MyTypeItemsIterator> { | |
56 | /// let leaked_ref = self.inner_shared(py).leak_immutable(); |
|
56 | /// let leaked_ref = self.inner_shared(py).leak_immutable(); | |
57 | /// MyTypeItemsIterator::from_inner( |
|
57 | /// MyTypeItemsIterator::from_inner( | |
58 | /// py, |
|
58 | /// py, | |
59 | /// unsafe { leaked_ref.map(py, |o| o.iter()) }, |
|
59 | /// unsafe { leaked_ref.map(py, |o| o.iter()) }, | |
60 | /// ) |
|
60 | /// ) | |
61 | /// } |
|
61 | /// } | |
62 | /// }); |
|
62 | /// }); | |
63 | /// |
|
63 | /// | |
64 | /// impl MyType { |
|
64 | /// impl MyType { | |
65 | /// fn translate_key_value( |
|
65 | /// fn translate_key_value( | |
66 | /// py: Python, |
|
66 | /// py: Python, | |
67 | /// res: (&Vec<u8>, &Vec<u8>), |
|
67 | /// res: (&Vec<u8>, &Vec<u8>), | |
68 | /// ) -> PyResult<Option<(PyBytes, PyBytes)>> { |
|
68 | /// ) -> PyResult<Option<(PyBytes, PyBytes)>> { | |
69 | /// let (f, entry) = res; |
|
69 | /// let (f, entry) = res; | |
70 | /// Ok(Some(( |
|
70 | /// Ok(Some(( | |
71 | /// PyBytes::new(py, f), |
|
71 | /// PyBytes::new(py, f), | |
72 | /// PyBytes::new(py, entry), |
|
72 | /// PyBytes::new(py, entry), | |
73 | /// ))) |
|
73 | /// ))) | |
74 | /// } |
|
74 | /// } | |
75 | /// } |
|
75 | /// } | |
76 | /// |
|
76 | /// | |
77 | /// py_shared_iterator!( |
|
77 | /// py_shared_iterator!( | |
78 | /// MyTypeItemsIterator, |
|
78 | /// MyTypeItemsIterator, | |
79 | /// UnsafePyLeaked<HashMap<'static, Vec<u8>, Vec<u8>>>, |
|
79 | /// UnsafePyLeaked<HashMap<'static, Vec<u8>, Vec<u8>>>, | |
80 | /// MyType::translate_key_value, |
|
80 | /// MyType::translate_key_value, | |
81 | /// Option<(PyBytes, PyBytes)> |
|
81 | /// Option<(PyBytes, PyBytes)> | |
82 | /// ); |
|
82 | /// ); | |
83 | /// ``` |
|
83 | /// ``` | |
84 | macro_rules! py_shared_iterator { |
|
84 | macro_rules! py_shared_iterator { | |
85 | ( |
|
85 | ( | |
86 | $name: ident, |
|
86 | $name: ident, | |
87 | $leaked: ty, |
|
87 | $leaked: ty, | |
88 | $success_func: expr, |
|
88 | $success_func: expr, | |
89 | $success_type: ty |
|
89 | $success_type: ty | |
90 | ) => { |
|
90 | ) => { | |
91 | py_class!(pub class $name |py| { |
|
91 | py_class!(pub class $name |py| { | |
92 | data inner: RefCell<$leaked>; |
|
92 | data inner: RefCell<$leaked>; | |
93 |
|
93 | |||
94 | def __next__(&self) -> PyResult<$success_type> { |
|
94 | def __next__(&self) -> PyResult<$success_type> { | |
95 | let mut leaked = self.inner(py).borrow_mut(); |
|
95 | let mut leaked = self.inner(py).borrow_mut(); | |
96 | let mut iter = unsafe { leaked.try_borrow_mut(py)? }; |
|
96 | let mut iter = unsafe { leaked.try_borrow_mut(py)? }; | |
97 | match iter.next() { |
|
97 | match iter.next() { | |
98 | None => Ok(None), |
|
98 | None => Ok(None), | |
99 | // res may be a reference of cheated 'static lifetime |
|
99 | // res may be a reference of cheated 'static lifetime | |
100 | Some(res) => $success_func(py, res), |
|
100 | Some(res) => $success_func(py, res), | |
101 | } |
|
101 | } | |
102 | } |
|
102 | } | |
103 |
|
103 | |||
104 | def __iter__(&self) -> PyResult<Self> { |
|
104 | def __iter__(&self) -> PyResult<Self> { | |
105 | Ok(self.clone_ref(py)) |
|
105 | Ok(self.clone_ref(py)) | |
106 | } |
|
106 | } | |
107 | }); |
|
107 | }); | |
108 |
|
108 | |||
109 | impl $name { |
|
109 | impl $name { | |
110 | pub fn from_inner( |
|
110 | pub fn from_inner( | |
111 | py: Python, |
|
111 | py: Python, | |
112 | leaked: $leaked, |
|
112 | leaked: $leaked, | |
113 | ) -> PyResult<Self> { |
|
113 | ) -> PyResult<Self> { | |
114 | Self::create_instance( |
|
114 | Self::create_instance( | |
115 | py, |
|
115 | py, | |
116 | RefCell::new(leaked), |
|
116 | RefCell::new(leaked), | |
117 | ) |
|
117 | ) | |
118 | } |
|
118 | } | |
119 | } |
|
119 | } | |
120 | }; |
|
120 | }; | |
121 | } |
|
121 | } |
General Comments 0
You need to be logged in to leave comments.
Login now