##// END OF EJS Templates
rust: apply more formatting fixes...
Yuya Nishihara -
r43109:ce6797ef default
parent child Browse files
Show More
@@ -1,788 +1,787 b''
1 // ancestors.rs
1 // ancestors.rs
2 //
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9
9
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 use crate::dagops;
11 use crate::dagops;
12 use std::cmp::max;
12 use std::cmp::max;
13 use std::collections::{BinaryHeap, HashSet};
13 use std::collections::{BinaryHeap, HashSet};
14
14
15 /// Iterator over the ancestors of a given list of revisions
15 /// Iterator over the ancestors of a given list of revisions
16 /// This is a generic type, defined and implemented for any Graph, so that
16 /// This is a generic type, defined and implemented for any Graph, so that
17 /// it's easy to
17 /// it's easy to
18 ///
18 ///
19 /// - unit test in pure Rust
19 /// - unit test in pure Rust
20 /// - bind to main Mercurial code, potentially in several ways and have these
20 /// - bind to main Mercurial code, potentially in several ways and have these
21 /// bindings evolve over time
21 /// bindings evolve over time
22 pub struct AncestorsIterator<G: Graph> {
22 pub struct AncestorsIterator<G: Graph> {
23 graph: G,
23 graph: G,
24 visit: BinaryHeap<Revision>,
24 visit: BinaryHeap<Revision>,
25 seen: HashSet<Revision>,
25 seen: HashSet<Revision>,
26 stoprev: Revision,
26 stoprev: Revision,
27 }
27 }
28
28
29 /// Lazy ancestors set, backed by AncestorsIterator
29 /// Lazy ancestors set, backed by AncestorsIterator
30 pub struct LazyAncestors<G: Graph + Clone> {
30 pub struct LazyAncestors<G: Graph + Clone> {
31 graph: G,
31 graph: G,
32 containsiter: AncestorsIterator<G>,
32 containsiter: AncestorsIterator<G>,
33 initrevs: Vec<Revision>,
33 initrevs: Vec<Revision>,
34 stoprev: Revision,
34 stoprev: Revision,
35 inclusive: bool,
35 inclusive: bool,
36 }
36 }
37
37
38 pub struct MissingAncestors<G: Graph> {
38 pub struct MissingAncestors<G: Graph> {
39 graph: G,
39 graph: G,
40 bases: HashSet<Revision>,
40 bases: HashSet<Revision>,
41 max_base: Revision,
41 max_base: Revision,
42 }
42 }
43
43
44 impl<G: Graph> AncestorsIterator<G> {
44 impl<G: Graph> AncestorsIterator<G> {
45 /// Constructor.
45 /// Constructor.
46 ///
46 ///
47 /// if `inclusive` is true, then the init revisions are emitted in
47 /// if `inclusive` is true, then the init revisions are emitted in
48 /// particular, otherwise iteration starts from their parents.
48 /// particular, otherwise iteration starts from their parents.
49 pub fn new(
49 pub fn new(
50 graph: G,
50 graph: G,
51 initrevs: impl IntoIterator<Item = Revision>,
51 initrevs: impl IntoIterator<Item = Revision>,
52 stoprev: Revision,
52 stoprev: Revision,
53 inclusive: bool,
53 inclusive: bool,
54 ) -> Result<Self, GraphError> {
54 ) -> Result<Self, GraphError> {
55 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
55 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
56 if inclusive {
56 if inclusive {
57 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
57 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
58 let seen = visit.iter().map(|&x| x).collect();
58 let seen = visit.iter().map(|&x| x).collect();
59 return Ok(AncestorsIterator {
59 return Ok(AncestorsIterator {
60 visit: visit,
60 visit: visit,
61 seen: seen,
61 seen: seen,
62 stoprev: stoprev,
62 stoprev: stoprev,
63 graph: graph,
63 graph: graph,
64 });
64 });
65 }
65 }
66 let mut this = AncestorsIterator {
66 let mut this = AncestorsIterator {
67 visit: BinaryHeap::new(),
67 visit: BinaryHeap::new(),
68 seen: HashSet::new(),
68 seen: HashSet::new(),
69 stoprev: stoprev,
69 stoprev: stoprev,
70 graph: graph,
70 graph: graph,
71 };
71 };
72 this.seen.insert(NULL_REVISION);
72 this.seen.insert(NULL_REVISION);
73 for rev in filtered_initrevs {
73 for rev in filtered_initrevs {
74 for parent in this.graph.parents(rev)?.iter().cloned() {
74 for parent in this.graph.parents(rev)?.iter().cloned() {
75 this.conditionally_push_rev(parent);
75 this.conditionally_push_rev(parent);
76 }
76 }
77 }
77 }
78 Ok(this)
78 Ok(this)
79 }
79 }
80
80
81 #[inline]
81 #[inline]
82 fn conditionally_push_rev(&mut self, rev: Revision) {
82 fn conditionally_push_rev(&mut self, rev: Revision) {
83 if self.stoprev <= rev && self.seen.insert(rev) {
83 if self.stoprev <= rev && self.seen.insert(rev) {
84 self.visit.push(rev);
84 self.visit.push(rev);
85 }
85 }
86 }
86 }
87
87
88 /// Consumes partially the iterator to tell if the given target
88 /// Consumes partially the iterator to tell if the given target
89 /// revision
89 /// revision
90 /// is in the ancestors it emits.
90 /// is in the ancestors it emits.
91 /// This is meant for iterators actually dedicated to that kind of
91 /// This is meant for iterators actually dedicated to that kind of
92 /// purpose
92 /// purpose
93 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
93 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
94 if self.seen.contains(&target) && target != NULL_REVISION {
94 if self.seen.contains(&target) && target != NULL_REVISION {
95 return Ok(true);
95 return Ok(true);
96 }
96 }
97 for item in self {
97 for item in self {
98 let rev = item?;
98 let rev = item?;
99 if rev == target {
99 if rev == target {
100 return Ok(true);
100 return Ok(true);
101 }
101 }
102 if rev < target {
102 if rev < target {
103 return Ok(false);
103 return Ok(false);
104 }
104 }
105 }
105 }
106 Ok(false)
106 Ok(false)
107 }
107 }
108
108
109 pub fn peek(&self) -> Option<Revision> {
109 pub fn peek(&self) -> Option<Revision> {
110 self.visit.peek().map(|&r| r)
110 self.visit.peek().map(|&r| r)
111 }
111 }
112
112
113 /// Tell if the iterator is about an empty set
113 /// Tell if the iterator is about an empty set
114 ///
114 ///
115 /// The result does not depend whether the iterator has been consumed
115 /// The result does not depend whether the iterator has been consumed
116 /// or not.
116 /// or not.
117 /// This is mostly meant for iterators backing a lazy ancestors set
117 /// This is mostly meant for iterators backing a lazy ancestors set
118 pub fn is_empty(&self) -> bool {
118 pub fn is_empty(&self) -> bool {
119 if self.visit.len() > 0 {
119 if self.visit.len() > 0 {
120 return false;
120 return false;
121 }
121 }
122 if self.seen.len() > 1 {
122 if self.seen.len() > 1 {
123 return false;
123 return false;
124 }
124 }
125 // at this point, the seen set is at most a singleton.
125 // at this point, the seen set is at most a singleton.
126 // If not `self.inclusive`, it's still possible that it has only
126 // If not `self.inclusive`, it's still possible that it has only
127 // the null revision
127 // the null revision
128 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
128 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
129 }
129 }
130 }
130 }
131
131
132 /// Main implementation for the iterator
132 /// Main implementation for the iterator
133 ///
133 ///
134 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
134 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
135 /// with a few non crucial differences:
135 /// with a few non crucial differences:
136 ///
136 ///
137 /// - there's no filtering of invalid parent revisions. Actually, it should be
137 /// - there's no filtering of invalid parent revisions. Actually, it should be
138 /// consistent and more efficient to filter them from the end caller.
138 /// consistent and more efficient to filter them from the end caller.
139 /// - we don't have the optimization for adjacent revisions (i.e., the case
139 /// - we don't have the optimization for adjacent revisions (i.e., the case
140 /// where `p1 == rev - 1`), because it amounts to update the first element of
140 /// where `p1 == rev - 1`), because it amounts to update the first element of
141 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
141 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
142 /// - we save a few pushes by comparing with `stoprev` before pushing
142 /// - we save a few pushes by comparing with `stoprev` before pushing
143 impl<G: Graph> Iterator for AncestorsIterator<G> {
143 impl<G: Graph> Iterator for AncestorsIterator<G> {
144 type Item = Result<Revision, GraphError>;
144 type Item = Result<Revision, GraphError>;
145
145
146 fn next(&mut self) -> Option<Self::Item> {
146 fn next(&mut self) -> Option<Self::Item> {
147 let current = match self.visit.peek() {
147 let current = match self.visit.peek() {
148 None => {
148 None => {
149 return None;
149 return None;
150 }
150 }
151 Some(c) => *c,
151 Some(c) => *c,
152 };
152 };
153 let [p1, p2] = match self.graph.parents(current) {
153 let [p1, p2] = match self.graph.parents(current) {
154 Ok(ps) => ps,
154 Ok(ps) => ps,
155 Err(e) => return Some(Err(e)),
155 Err(e) => return Some(Err(e)),
156 };
156 };
157 if p1 < self.stoprev || !self.seen.insert(p1) {
157 if p1 < self.stoprev || !self.seen.insert(p1) {
158 self.visit.pop();
158 self.visit.pop();
159 } else {
159 } else {
160 *(self.visit.peek_mut().unwrap()) = p1;
160 *(self.visit.peek_mut().unwrap()) = p1;
161 };
161 };
162
162
163 self.conditionally_push_rev(p2);
163 self.conditionally_push_rev(p2);
164 Some(Ok(current))
164 Some(Ok(current))
165 }
165 }
166 }
166 }
167
167
168 impl<G: Graph + Clone> LazyAncestors<G> {
168 impl<G: Graph + Clone> LazyAncestors<G> {
169 pub fn new(
169 pub fn new(
170 graph: G,
170 graph: G,
171 initrevs: impl IntoIterator<Item = Revision>,
171 initrevs: impl IntoIterator<Item = Revision>,
172 stoprev: Revision,
172 stoprev: Revision,
173 inclusive: bool,
173 inclusive: bool,
174 ) -> Result<Self, GraphError> {
174 ) -> Result<Self, GraphError> {
175 let v: Vec<Revision> = initrevs.into_iter().collect();
175 let v: Vec<Revision> = initrevs.into_iter().collect();
176 Ok(LazyAncestors {
176 Ok(LazyAncestors {
177 graph: graph.clone(),
177 graph: graph.clone(),
178 containsiter: AncestorsIterator::new(
178 containsiter: AncestorsIterator::new(
179 graph,
179 graph,
180 v.iter().cloned(),
180 v.iter().cloned(),
181 stoprev,
181 stoprev,
182 inclusive,
182 inclusive,
183 )?,
183 )?,
184 initrevs: v,
184 initrevs: v,
185 stoprev: stoprev,
185 stoprev: stoprev,
186 inclusive: inclusive,
186 inclusive: inclusive,
187 })
187 })
188 }
188 }
189
189
190 pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
190 pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
191 self.containsiter.contains(rev)
191 self.containsiter.contains(rev)
192 }
192 }
193
193
194 pub fn is_empty(&self) -> bool {
194 pub fn is_empty(&self) -> bool {
195 self.containsiter.is_empty()
195 self.containsiter.is_empty()
196 }
196 }
197
197
198 pub fn iter(&self) -> AncestorsIterator<G> {
198 pub fn iter(&self) -> AncestorsIterator<G> {
199 // the arguments being the same as for self.containsiter, we know
199 // the arguments being the same as for self.containsiter, we know
200 // for sure that AncestorsIterator constructor can't fail
200 // for sure that AncestorsIterator constructor can't fail
201 AncestorsIterator::new(
201 AncestorsIterator::new(
202 self.graph.clone(),
202 self.graph.clone(),
203 self.initrevs.iter().cloned(),
203 self.initrevs.iter().cloned(),
204 self.stoprev,
204 self.stoprev,
205 self.inclusive,
205 self.inclusive,
206 )
206 )
207 .unwrap()
207 .unwrap()
208 }
208 }
209 }
209 }
210
210
211 impl<G: Graph> MissingAncestors<G> {
211 impl<G: Graph> MissingAncestors<G> {
212 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
212 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
213 let mut created = MissingAncestors {
213 let mut created = MissingAncestors {
214 graph: graph,
214 graph: graph,
215 bases: HashSet::new(),
215 bases: HashSet::new(),
216 max_base: NULL_REVISION,
216 max_base: NULL_REVISION,
217 };
217 };
218 created.add_bases(bases);
218 created.add_bases(bases);
219 created
219 created
220 }
220 }
221
221
222 pub fn has_bases(&self) -> bool {
222 pub fn has_bases(&self) -> bool {
223 !self.bases.is_empty()
223 !self.bases.is_empty()
224 }
224 }
225
225
226 /// Return a reference to current bases.
226 /// Return a reference to current bases.
227 ///
227 ///
228 /// This is useful in unit tests, but also setdiscovery.py does
228 /// This is useful in unit tests, but also setdiscovery.py does
229 /// read the bases attribute of a ancestor.missingancestors instance.
229 /// read the bases attribute of a ancestor.missingancestors instance.
230 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
230 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
231 &self.bases
231 &self.bases
232 }
232 }
233
233
234 /// Computes the relative heads of current bases.
234 /// Computes the relative heads of current bases.
235 ///
235 ///
236 /// The object is still usable after this.
236 /// The object is still usable after this.
237 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
237 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
238 dagops::heads(&self.graph, self.bases.iter())
238 dagops::heads(&self.graph, self.bases.iter())
239 }
239 }
240
240
241 /// Consumes the object and returns the relative heads of its bases.
241 /// Consumes the object and returns the relative heads of its bases.
242 pub fn into_bases_heads(
242 pub fn into_bases_heads(
243 mut self,
243 mut self,
244 ) -> Result<HashSet<Revision>, GraphError> {
244 ) -> Result<HashSet<Revision>, GraphError> {
245 dagops::retain_heads(&self.graph, &mut self.bases)?;
245 dagops::retain_heads(&self.graph, &mut self.bases)?;
246 Ok(self.bases)
246 Ok(self.bases)
247 }
247 }
248
248
249 /// Add some revisions to `self.bases`
249 /// Add some revisions to `self.bases`
250 ///
250 ///
251 /// Takes care of keeping `self.max_base` up to date.
251 /// Takes care of keeping `self.max_base` up to date.
252 pub fn add_bases(
252 pub fn add_bases(
253 &mut self,
253 &mut self,
254 new_bases: impl IntoIterator<Item = Revision>,
254 new_bases: impl IntoIterator<Item = Revision>,
255 ) {
255 ) {
256 let mut max_base = self.max_base;
256 let mut max_base = self.max_base;
257 self.bases.extend(
257 self.bases.extend(
258 new_bases
258 new_bases
259 .into_iter()
259 .into_iter()
260 .filter(|&rev| rev != NULL_REVISION)
260 .filter(|&rev| rev != NULL_REVISION)
261 .map(|r| {
261 .map(|r| {
262 if r > max_base {
262 if r > max_base {
263 max_base = r;
263 max_base = r;
264 }
264 }
265 r
265 r
266 }),
266 }),
267 );
267 );
268 self.max_base = max_base;
268 self.max_base = max_base;
269 }
269 }
270
270
271 /// Remove all ancestors of self.bases from the revs set (in place)
271 /// Remove all ancestors of self.bases from the revs set (in place)
272 pub fn remove_ancestors_from(
272 pub fn remove_ancestors_from(
273 &mut self,
273 &mut self,
274 revs: &mut HashSet<Revision>,
274 revs: &mut HashSet<Revision>,
275 ) -> Result<(), GraphError> {
275 ) -> Result<(), GraphError> {
276 revs.retain(|r| !self.bases.contains(r));
276 revs.retain(|r| !self.bases.contains(r));
277 // the null revision is always an ancestor. Logically speaking
277 // the null revision is always an ancestor. Logically speaking
278 // it's debatable in case bases is empty, but the Python
278 // it's debatable in case bases is empty, but the Python
279 // implementation always adds NULL_REVISION to bases, making it
279 // implementation always adds NULL_REVISION to bases, making it
280 // unconditionnally true.
280 // unconditionnally true.
281 revs.remove(&NULL_REVISION);
281 revs.remove(&NULL_REVISION);
282 if revs.is_empty() {
282 if revs.is_empty() {
283 return Ok(());
283 return Ok(());
284 }
284 }
285 // anything in revs > start is definitely not an ancestor of bases
285 // anything in revs > start is definitely not an ancestor of bases
286 // revs <= start need to be investigated
286 // revs <= start need to be investigated
287 if self.max_base == NULL_REVISION {
287 if self.max_base == NULL_REVISION {
288 return Ok(());
288 return Ok(());
289 }
289 }
290
290
291 // whatever happens, we'll keep at least keepcount of them
291 // whatever happens, we'll keep at least keepcount of them
292 // knowing this gives us a earlier stop condition than
292 // knowing this gives us a earlier stop condition than
293 // going all the way to the root
293 // going all the way to the root
294 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
294 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
295
295
296 let mut curr = self.max_base;
296 let mut curr = self.max_base;
297 while curr != NULL_REVISION && revs.len() > keepcount {
297 while curr != NULL_REVISION && revs.len() > keepcount {
298 if self.bases.contains(&curr) {
298 if self.bases.contains(&curr) {
299 revs.remove(&curr);
299 revs.remove(&curr);
300 self.add_parents(curr)?;
300 self.add_parents(curr)?;
301 }
301 }
302 curr -= 1;
302 curr -= 1;
303 }
303 }
304 Ok(())
304 Ok(())
305 }
305 }
306
306
307 /// Add the parents of `rev` to `self.bases`
307 /// Add the parents of `rev` to `self.bases`
308 ///
308 ///
309 /// This has no effect on `self.max_base`
309 /// This has no effect on `self.max_base`
310 #[inline]
310 #[inline]
311 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
311 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
312 if rev == NULL_REVISION {
312 if rev == NULL_REVISION {
313 return Ok(());
313 return Ok(());
314 }
314 }
315 for p in self.graph.parents(rev)?.iter().cloned() {
315 for p in self.graph.parents(rev)?.iter().cloned() {
316 // No need to bother the set with inserting NULL_REVISION over and
316 // No need to bother the set with inserting NULL_REVISION over and
317 // over
317 // over
318 if p != NULL_REVISION {
318 if p != NULL_REVISION {
319 self.bases.insert(p);
319 self.bases.insert(p);
320 }
320 }
321 }
321 }
322 Ok(())
322 Ok(())
323 }
323 }
324
324
325 /// Return all the ancestors of revs that are not ancestors of self.bases
325 /// Return all the ancestors of revs that are not ancestors of self.bases
326 ///
326 ///
327 /// This may include elements from revs.
327 /// This may include elements from revs.
328 ///
328 ///
329 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
329 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
330 /// revision number order, which is a topological order.
330 /// revision number order, which is a topological order.
331 pub fn missing_ancestors(
331 pub fn missing_ancestors(
332 &mut self,
332 &mut self,
333 revs: impl IntoIterator<Item = Revision>,
333 revs: impl IntoIterator<Item = Revision>,
334 ) -> Result<Vec<Revision>, GraphError> {
334 ) -> Result<Vec<Revision>, GraphError> {
335 // just for convenience and comparison with Python version
335 // just for convenience and comparison with Python version
336 let bases_visit = &mut self.bases;
336 let bases_visit = &mut self.bases;
337 let mut revs: HashSet<Revision> = revs
337 let mut revs: HashSet<Revision> = revs
338 .into_iter()
338 .into_iter()
339 .filter(|r| !bases_visit.contains(r))
339 .filter(|r| !bases_visit.contains(r))
340 .collect();
340 .collect();
341 let revs_visit = &mut revs;
341 let revs_visit = &mut revs;
342 let mut both_visit: HashSet<Revision> =
342 let mut both_visit: HashSet<Revision> =
343 revs_visit.intersection(&bases_visit).cloned().collect();
343 revs_visit.intersection(&bases_visit).cloned().collect();
344 if revs_visit.is_empty() {
344 if revs_visit.is_empty() {
345 return Ok(Vec::new());
345 return Ok(Vec::new());
346 }
346 }
347 let max_revs = revs_visit.iter().cloned().max().unwrap();
347 let max_revs = revs_visit.iter().cloned().max().unwrap();
348 let start = max(self.max_base, max_revs);
348 let start = max(self.max_base, max_revs);
349
349
350 // TODO heuristics for with_capacity()?
350 // TODO heuristics for with_capacity()?
351 let mut missing: Vec<Revision> = Vec::new();
351 let mut missing: Vec<Revision> = Vec::new();
352 for curr in (0..=start).rev() {
352 for curr in (0..=start).rev() {
353 if revs_visit.is_empty() {
353 if revs_visit.is_empty() {
354 break;
354 break;
355 }
355 }
356 if both_visit.remove(&curr) {
356 if both_visit.remove(&curr) {
357 // curr's parents might have made it into revs_visit through
357 // curr's parents might have made it into revs_visit through
358 // another path
358 // another path
359 for p in self.graph.parents(curr)?.iter().cloned() {
359 for p in self.graph.parents(curr)?.iter().cloned() {
360 if p == NULL_REVISION {
360 if p == NULL_REVISION {
361 continue;
361 continue;
362 }
362 }
363 revs_visit.remove(&p);
363 revs_visit.remove(&p);
364 bases_visit.insert(p);
364 bases_visit.insert(p);
365 both_visit.insert(p);
365 both_visit.insert(p);
366 }
366 }
367 } else if revs_visit.remove(&curr) {
367 } else if revs_visit.remove(&curr) {
368 missing.push(curr);
368 missing.push(curr);
369 for p in self.graph.parents(curr)?.iter().cloned() {
369 for p in self.graph.parents(curr)?.iter().cloned() {
370 if p == NULL_REVISION {
370 if p == NULL_REVISION {
371 continue;
371 continue;
372 }
372 }
373 if bases_visit.contains(&p) {
373 if bases_visit.contains(&p) {
374 // p is already known to be an ancestor of revs_visit
374 // p is already known to be an ancestor of revs_visit
375 revs_visit.remove(&p);
375 revs_visit.remove(&p);
376 both_visit.insert(p);
376 both_visit.insert(p);
377 } else if both_visit.contains(&p) {
377 } else if both_visit.contains(&p) {
378 // p should have been in bases_visit
378 // p should have been in bases_visit
379 revs_visit.remove(&p);
379 revs_visit.remove(&p);
380 bases_visit.insert(p);
380 bases_visit.insert(p);
381 } else {
381 } else {
382 // visit later
382 // visit later
383 revs_visit.insert(p);
383 revs_visit.insert(p);
384 }
384 }
385 }
385 }
386 } else if bases_visit.contains(&curr) {
386 } else if bases_visit.contains(&curr) {
387 for p in self.graph.parents(curr)?.iter().cloned() {
387 for p in self.graph.parents(curr)?.iter().cloned() {
388 if p == NULL_REVISION {
388 if p == NULL_REVISION {
389 continue;
389 continue;
390 }
390 }
391 if revs_visit.remove(&p) || both_visit.contains(&p) {
391 if revs_visit.remove(&p) || both_visit.contains(&p) {
392 // p is an ancestor of bases_visit, and is implicitly
392 // p is an ancestor of bases_visit, and is implicitly
393 // in revs_visit, which means p is ::revs & ::bases.
393 // in revs_visit, which means p is ::revs & ::bases.
394 bases_visit.insert(p);
394 bases_visit.insert(p);
395 both_visit.insert(p);
395 both_visit.insert(p);
396 } else {
396 } else {
397 bases_visit.insert(p);
397 bases_visit.insert(p);
398 }
398 }
399 }
399 }
400 }
400 }
401 }
401 }
402 missing.reverse();
402 missing.reverse();
403 Ok(missing)
403 Ok(missing)
404 }
404 }
405 }
405 }
406
406
407 #[cfg(test)]
407 #[cfg(test)]
408 mod tests {
408 mod tests {
409
409
410 use super::*;
410 use super::*;
411 use crate::testing::{SampleGraph, VecGraph};
411 use crate::testing::{SampleGraph, VecGraph};
412 use std::iter::FromIterator;
412 use std::iter::FromIterator;
413
413
414 fn list_ancestors<G: Graph>(
414 fn list_ancestors<G: Graph>(
415 graph: G,
415 graph: G,
416 initrevs: Vec<Revision>,
416 initrevs: Vec<Revision>,
417 stoprev: Revision,
417 stoprev: Revision,
418 inclusive: bool,
418 inclusive: bool,
419 ) -> Vec<Revision> {
419 ) -> Vec<Revision> {
420 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
420 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
421 .unwrap()
421 .unwrap()
422 .map(|res| res.unwrap())
422 .map(|res| res.unwrap())
423 .collect()
423 .collect()
424 }
424 }
425
425
426 #[test]
426 #[test]
427 /// Same tests as test-ancestor.py, without membership
427 /// Same tests as test-ancestor.py, without membership
428 /// (see also test-ancestor.py.out)
428 /// (see also test-ancestor.py.out)
429 fn test_list_ancestor() {
429 fn test_list_ancestor() {
430 assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
430 assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
431 assert_eq!(
431 assert_eq!(
432 list_ancestors(SampleGraph, vec![11, 13], 0, false),
432 list_ancestors(SampleGraph, vec![11, 13], 0, false),
433 vec![8, 7, 4, 3, 2, 1, 0]
433 vec![8, 7, 4, 3, 2, 1, 0]
434 );
434 );
435 assert_eq!(
435 assert_eq!(
436 list_ancestors(SampleGraph, vec![1, 3], 0, false),
436 list_ancestors(SampleGraph, vec![1, 3], 0, false),
437 vec![1, 0]
437 vec![1, 0]
438 );
438 );
439 assert_eq!(
439 assert_eq!(
440 list_ancestors(SampleGraph, vec![11, 13], 0, true),
440 list_ancestors(SampleGraph, vec![11, 13], 0, true),
441 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
441 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
442 );
442 );
443 assert_eq!(
443 assert_eq!(
444 list_ancestors(SampleGraph, vec![11, 13], 6, false),
444 list_ancestors(SampleGraph, vec![11, 13], 6, false),
445 vec![8, 7]
445 vec![8, 7]
446 );
446 );
447 assert_eq!(
447 assert_eq!(
448 list_ancestors(SampleGraph, vec![11, 13], 6, true),
448 list_ancestors(SampleGraph, vec![11, 13], 6, true),
449 vec![13, 11, 8, 7]
449 vec![13, 11, 8, 7]
450 );
450 );
451 assert_eq!(
451 assert_eq!(
452 list_ancestors(SampleGraph, vec![11, 13], 11, true),
452 list_ancestors(SampleGraph, vec![11, 13], 11, true),
453 vec![13, 11]
453 vec![13, 11]
454 );
454 );
455 assert_eq!(
455 assert_eq!(
456 list_ancestors(SampleGraph, vec![11, 13], 12, true),
456 list_ancestors(SampleGraph, vec![11, 13], 12, true),
457 vec![13]
457 vec![13]
458 );
458 );
459 assert_eq!(
459 assert_eq!(
460 list_ancestors(SampleGraph, vec![10, 1], 0, true),
460 list_ancestors(SampleGraph, vec![10, 1], 0, true),
461 vec![10, 5, 4, 2, 1, 0]
461 vec![10, 5, 4, 2, 1, 0]
462 );
462 );
463 }
463 }
464
464
465 #[test]
465 #[test]
466 /// Corner case that's not directly in test-ancestors.py, but
466 /// Corner case that's not directly in test-ancestors.py, but
467 /// that happens quite often, as demonstrated by running the whole
467 /// that happens quite often, as demonstrated by running the whole
468 /// suite.
468 /// suite.
469 /// For instance, run tests/test-obsolete-checkheads.t
469 /// For instance, run tests/test-obsolete-checkheads.t
470 fn test_nullrev_input() {
470 fn test_nullrev_input() {
471 let mut iter =
471 let mut iter =
472 AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
472 AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
473 assert_eq!(iter.next(), None)
473 assert_eq!(iter.next(), None)
474 }
474 }
475
475
476 #[test]
476 #[test]
477 fn test_contains() {
477 fn test_contains() {
478 let mut lazy =
478 let mut lazy =
479 AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
479 AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
480 assert!(lazy.contains(1).unwrap());
480 assert!(lazy.contains(1).unwrap());
481 assert!(!lazy.contains(3).unwrap());
481 assert!(!lazy.contains(3).unwrap());
482
482
483 let mut lazy =
483 let mut lazy =
484 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
484 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
485 assert!(!lazy.contains(NULL_REVISION).unwrap());
485 assert!(!lazy.contains(NULL_REVISION).unwrap());
486 }
486 }
487
487
488 #[test]
488 #[test]
489 fn test_peek() {
489 fn test_peek() {
490 let mut iter =
490 let mut iter =
491 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
491 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
492 // peek() gives us the next value
492 // peek() gives us the next value
493 assert_eq!(iter.peek(), Some(10));
493 assert_eq!(iter.peek(), Some(10));
494 // but it's not been consumed
494 // but it's not been consumed
495 assert_eq!(iter.next(), Some(Ok(10)));
495 assert_eq!(iter.next(), Some(Ok(10)));
496 // and iteration resumes normally
496 // and iteration resumes normally
497 assert_eq!(iter.next(), Some(Ok(5)));
497 assert_eq!(iter.next(), Some(Ok(5)));
498
498
499 // let's drain the iterator to test peek() at the end
499 // let's drain the iterator to test peek() at the end
500 while iter.next().is_some() {}
500 while iter.next().is_some() {}
501 assert_eq!(iter.peek(), None);
501 assert_eq!(iter.peek(), None);
502 }
502 }
503
503
504 #[test]
504 #[test]
505 fn test_empty() {
505 fn test_empty() {
506 let mut iter =
506 let mut iter =
507 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
507 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
508 assert!(!iter.is_empty());
508 assert!(!iter.is_empty());
509 while iter.next().is_some() {}
509 while iter.next().is_some() {}
510 assert!(!iter.is_empty());
510 assert!(!iter.is_empty());
511
511
512 let iter =
512 let iter =
513 AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
513 AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
514 assert!(iter.is_empty());
514 assert!(iter.is_empty());
515
515
516 // case where iter.seen == {NULL_REVISION}
516 // case where iter.seen == {NULL_REVISION}
517 let iter =
517 let iter =
518 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
518 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
519 assert!(iter.is_empty());
519 assert!(iter.is_empty());
520 }
520 }
521
521
522 /// A corrupted Graph, supporting error handling tests
522 /// A corrupted Graph, supporting error handling tests
523 #[derive(Clone, Debug)]
523 #[derive(Clone, Debug)]
524 struct Corrupted;
524 struct Corrupted;
525
525
526 impl Graph for Corrupted {
526 impl Graph for Corrupted {
527 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
527 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
528 match rev {
528 match rev {
529 1 => Ok([0, -1]),
529 1 => Ok([0, -1]),
530 r => Err(GraphError::ParentOutOfRange(r)),
530 r => Err(GraphError::ParentOutOfRange(r)),
531 }
531 }
532 }
532 }
533 }
533 }
534
534
535 #[test]
535 #[test]
536 fn test_initrev_out_of_range() {
536 fn test_initrev_out_of_range() {
537 // inclusive=false looks up initrev's parents right away
537 // inclusive=false looks up initrev's parents right away
538 match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
538 match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
539 Ok(_) => panic!("Should have been ParentOutOfRange"),
539 Ok(_) => panic!("Should have been ParentOutOfRange"),
540 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
540 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
541 }
541 }
542 }
542 }
543
543
544 #[test]
544 #[test]
545 fn test_next_out_of_range() {
545 fn test_next_out_of_range() {
546 // inclusive=false looks up initrev's parents right away
546 // inclusive=false looks up initrev's parents right away
547 let mut iter =
547 let mut iter =
548 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
548 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
549 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
549 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
550 }
550 }
551
551
552 #[test]
552 #[test]
553 fn test_lazy_iter_contains() {
553 fn test_lazy_iter_contains() {
554 let mut lazy =
554 let mut lazy =
555 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
555 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
556
556
557 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
557 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
558 // compare with iterator tests on the same initial revisions
558 // compare with iterator tests on the same initial revisions
559 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
559 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
560
560
561 // contains() results are correct, unaffected by the fact that
561 // contains() results are correct, unaffected by the fact that
562 // we consumed entirely an iterator out of lazy
562 // we consumed entirely an iterator out of lazy
563 assert_eq!(lazy.contains(2), Ok(true));
563 assert_eq!(lazy.contains(2), Ok(true));
564 assert_eq!(lazy.contains(9), Ok(false));
564 assert_eq!(lazy.contains(9), Ok(false));
565 }
565 }
566
566
567 #[test]
567 #[test]
568 fn test_lazy_contains_iter() {
568 fn test_lazy_contains_iter() {
569 let mut lazy =
569 let mut lazy =
570 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
570 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
571
571
572 assert_eq!(lazy.contains(2), Ok(true));
572 assert_eq!(lazy.contains(2), Ok(true));
573 assert_eq!(lazy.contains(6), Ok(false));
573 assert_eq!(lazy.contains(6), Ok(false));
574
574
575 // after consumption of 2 by the inner iterator, results stay
575 // after consumption of 2 by the inner iterator, results stay
576 // consistent
576 // consistent
577 assert_eq!(lazy.contains(2), Ok(true));
577 assert_eq!(lazy.contains(2), Ok(true));
578 assert_eq!(lazy.contains(5), Ok(false));
578 assert_eq!(lazy.contains(5), Ok(false));
579
579
580 // iter() still gives us a fresh iterator
580 // iter() still gives us a fresh iterator
581 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
581 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
582 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
582 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
583 }
583 }
584
584
585 #[test]
585 #[test]
586 /// Test constructor, add/get bases and heads
586 /// Test constructor, add/get bases and heads
587 fn test_missing_bases() -> Result<(), GraphError> {
587 fn test_missing_bases() -> Result<(), GraphError> {
588 let mut missing_ancestors =
588 let mut missing_ancestors =
589 MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
589 MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
590 let mut as_vec: Vec<Revision> =
590 let mut as_vec: Vec<Revision> =
591 missing_ancestors.get_bases().iter().cloned().collect();
591 missing_ancestors.get_bases().iter().cloned().collect();
592 as_vec.sort();
592 as_vec.sort();
593 assert_eq!(as_vec, [1, 3, 5]);
593 assert_eq!(as_vec, [1, 3, 5]);
594 assert_eq!(missing_ancestors.max_base, 5);
594 assert_eq!(missing_ancestors.max_base, 5);
595
595
596 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
596 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
597 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
597 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
598 as_vec.sort();
598 as_vec.sort();
599 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
599 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
600 assert_eq!(missing_ancestors.max_base, 8);
600 assert_eq!(missing_ancestors.max_base, 8);
601
601
602 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
602 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
603 as_vec.sort();
603 as_vec.sort();
604 assert_eq!(as_vec, [3, 5, 7, 8]);
604 assert_eq!(as_vec, [3, 5, 7, 8]);
605 Ok(())
605 Ok(())
606 }
606 }
607
607
608 fn assert_missing_remove(
608 fn assert_missing_remove(
609 bases: &[Revision],
609 bases: &[Revision],
610 revs: &[Revision],
610 revs: &[Revision],
611 expected: &[Revision],
611 expected: &[Revision],
612 ) {
612 ) {
613 let mut missing_ancestors =
613 let mut missing_ancestors =
614 MissingAncestors::new(SampleGraph, bases.iter().cloned());
614 MissingAncestors::new(SampleGraph, bases.iter().cloned());
615 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
615 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
616 missing_ancestors
616 missing_ancestors
617 .remove_ancestors_from(&mut revset)
617 .remove_ancestors_from(&mut revset)
618 .unwrap();
618 .unwrap();
619 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
619 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
620 as_vec.sort();
620 as_vec.sort();
621 assert_eq!(as_vec.as_slice(), expected);
621 assert_eq!(as_vec.as_slice(), expected);
622 }
622 }
623
623
624 #[test]
624 #[test]
625 fn test_missing_remove() {
625 fn test_missing_remove() {
626 assert_missing_remove(
626 assert_missing_remove(
627 &[1, 2, 3, 4, 7],
627 &[1, 2, 3, 4, 7],
628 Vec::from_iter(1..10).as_slice(),
628 Vec::from_iter(1..10).as_slice(),
629 &[5, 6, 8, 9],
629 &[5, 6, 8, 9],
630 );
630 );
631 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
631 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
632 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
632 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
633 }
633 }
634
634
635 fn assert_missing_ancestors(
635 fn assert_missing_ancestors(
636 bases: &[Revision],
636 bases: &[Revision],
637 revs: &[Revision],
637 revs: &[Revision],
638 expected: &[Revision],
638 expected: &[Revision],
639 ) {
639 ) {
640 let mut missing_ancestors =
640 let mut missing_ancestors =
641 MissingAncestors::new(SampleGraph, bases.iter().cloned());
641 MissingAncestors::new(SampleGraph, bases.iter().cloned());
642 let missing = missing_ancestors
642 let missing = missing_ancestors
643 .missing_ancestors(revs.iter().cloned())
643 .missing_ancestors(revs.iter().cloned())
644 .unwrap();
644 .unwrap();
645 assert_eq!(missing.as_slice(), expected);
645 assert_eq!(missing.as_slice(), expected);
646 }
646 }
647
647
648 #[test]
648 #[test]
649 fn test_missing_ancestors() {
649 fn test_missing_ancestors() {
650 // examples taken from test-ancestors.py by having it run
650 // examples taken from test-ancestors.py by having it run
651 // on the same graph (both naive and fast Python algs)
651 // on the same graph (both naive and fast Python algs)
652 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
652 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
653 assert_missing_ancestors(&[11], &[10], &[5, 10]);
653 assert_missing_ancestors(&[11], &[10], &[5, 10]);
654 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
654 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
655 }
655 }
656
656
657 /// An interesting case found by a random generator similar to
657 /// An interesting case found by a random generator similar to
658 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
658 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
659 /// failed this, yet none of the integration tests of the whole suite
659 /// failed this, yet none of the integration tests of the whole suite
660 /// catched it.
660 /// catched it.
661 #[test]
661 #[test]
662 fn test_remove_ancestors_from_case1() {
662 fn test_remove_ancestors_from_case1() {
663 let graph: VecGraph = vec![
663 let graph: VecGraph = vec![
664 [NULL_REVISION, NULL_REVISION],
664 [NULL_REVISION, NULL_REVISION],
665 [0, NULL_REVISION],
665 [0, NULL_REVISION],
666 [1, 0],
666 [1, 0],
667 [2, 1],
667 [2, 1],
668 [3, NULL_REVISION],
668 [3, NULL_REVISION],
669 [4, NULL_REVISION],
669 [4, NULL_REVISION],
670 [5, 1],
670 [5, 1],
671 [2, NULL_REVISION],
671 [2, NULL_REVISION],
672 [7, NULL_REVISION],
672 [7, NULL_REVISION],
673 [8, NULL_REVISION],
673 [8, NULL_REVISION],
674 [9, NULL_REVISION],
674 [9, NULL_REVISION],
675 [10, 1],
675 [10, 1],
676 [3, NULL_REVISION],
676 [3, NULL_REVISION],
677 [12, NULL_REVISION],
677 [12, NULL_REVISION],
678 [13, NULL_REVISION],
678 [13, NULL_REVISION],
679 [14, NULL_REVISION],
679 [14, NULL_REVISION],
680 [4, NULL_REVISION],
680 [4, NULL_REVISION],
681 [16, NULL_REVISION],
681 [16, NULL_REVISION],
682 [17, NULL_REVISION],
682 [17, NULL_REVISION],
683 [18, NULL_REVISION],
683 [18, NULL_REVISION],
684 [19, 11],
684 [19, 11],
685 [20, NULL_REVISION],
685 [20, NULL_REVISION],
686 [21, NULL_REVISION],
686 [21, NULL_REVISION],
687 [22, NULL_REVISION],
687 [22, NULL_REVISION],
688 [23, NULL_REVISION],
688 [23, NULL_REVISION],
689 [2, NULL_REVISION],
689 [2, NULL_REVISION],
690 [3, NULL_REVISION],
690 [3, NULL_REVISION],
691 [26, 24],
691 [26, 24],
692 [27, NULL_REVISION],
692 [27, NULL_REVISION],
693 [28, NULL_REVISION],
693 [28, NULL_REVISION],
694 [12, NULL_REVISION],
694 [12, NULL_REVISION],
695 [1, NULL_REVISION],
695 [1, NULL_REVISION],
696 [1, 9],
696 [1, 9],
697 [32, NULL_REVISION],
697 [32, NULL_REVISION],
698 [33, NULL_REVISION],
698 [33, NULL_REVISION],
699 [34, 31],
699 [34, 31],
700 [35, NULL_REVISION],
700 [35, NULL_REVISION],
701 [36, 26],
701 [36, 26],
702 [37, NULL_REVISION],
702 [37, NULL_REVISION],
703 [38, NULL_REVISION],
703 [38, NULL_REVISION],
704 [39, NULL_REVISION],
704 [39, NULL_REVISION],
705 [40, NULL_REVISION],
705 [40, NULL_REVISION],
706 [41, NULL_REVISION],
706 [41, NULL_REVISION],
707 [42, 26],
707 [42, 26],
708 [0, NULL_REVISION],
708 [0, NULL_REVISION],
709 [44, NULL_REVISION],
709 [44, NULL_REVISION],
710 [45, 4],
710 [45, 4],
711 [40, NULL_REVISION],
711 [40, NULL_REVISION],
712 [47, NULL_REVISION],
712 [47, NULL_REVISION],
713 [36, 0],
713 [36, 0],
714 [49, NULL_REVISION],
714 [49, NULL_REVISION],
715 [NULL_REVISION, NULL_REVISION],
715 [NULL_REVISION, NULL_REVISION],
716 [51, NULL_REVISION],
716 [51, NULL_REVISION],
717 [52, NULL_REVISION],
717 [52, NULL_REVISION],
718 [53, NULL_REVISION],
718 [53, NULL_REVISION],
719 [14, NULL_REVISION],
719 [14, NULL_REVISION],
720 [55, NULL_REVISION],
720 [55, NULL_REVISION],
721 [15, NULL_REVISION],
721 [15, NULL_REVISION],
722 [23, NULL_REVISION],
722 [23, NULL_REVISION],
723 [58, NULL_REVISION],
723 [58, NULL_REVISION],
724 [59, NULL_REVISION],
724 [59, NULL_REVISION],
725 [2, NULL_REVISION],
725 [2, NULL_REVISION],
726 [61, 59],
726 [61, 59],
727 [62, NULL_REVISION],
727 [62, NULL_REVISION],
728 [63, NULL_REVISION],
728 [63, NULL_REVISION],
729 [NULL_REVISION, NULL_REVISION],
729 [NULL_REVISION, NULL_REVISION],
730 [65, NULL_REVISION],
730 [65, NULL_REVISION],
731 [66, NULL_REVISION],
731 [66, NULL_REVISION],
732 [67, NULL_REVISION],
732 [67, NULL_REVISION],
733 [68, NULL_REVISION],
733 [68, NULL_REVISION],
734 [37, 28],
734 [37, 28],
735 [69, 25],
735 [69, 25],
736 [71, NULL_REVISION],
736 [71, NULL_REVISION],
737 [72, NULL_REVISION],
737 [72, NULL_REVISION],
738 [50, 2],
738 [50, 2],
739 [74, NULL_REVISION],
739 [74, NULL_REVISION],
740 [12, NULL_REVISION],
740 [12, NULL_REVISION],
741 [18, NULL_REVISION],
741 [18, NULL_REVISION],
742 [77, NULL_REVISION],
742 [77, NULL_REVISION],
743 [78, NULL_REVISION],
743 [78, NULL_REVISION],
744 [79, NULL_REVISION],
744 [79, NULL_REVISION],
745 [43, 33],
745 [43, 33],
746 [81, NULL_REVISION],
746 [81, NULL_REVISION],
747 [82, NULL_REVISION],
747 [82, NULL_REVISION],
748 [83, NULL_REVISION],
748 [83, NULL_REVISION],
749 [84, 45],
749 [84, 45],
750 [85, NULL_REVISION],
750 [85, NULL_REVISION],
751 [86, NULL_REVISION],
751 [86, NULL_REVISION],
752 [NULL_REVISION, NULL_REVISION],
752 [NULL_REVISION, NULL_REVISION],
753 [88, NULL_REVISION],
753 [88, NULL_REVISION],
754 [NULL_REVISION, NULL_REVISION],
754 [NULL_REVISION, NULL_REVISION],
755 [76, 83],
755 [76, 83],
756 [44, NULL_REVISION],
756 [44, NULL_REVISION],
757 [92, NULL_REVISION],
757 [92, NULL_REVISION],
758 [93, NULL_REVISION],
758 [93, NULL_REVISION],
759 [9, NULL_REVISION],
759 [9, NULL_REVISION],
760 [95, 67],
760 [95, 67],
761 [96, NULL_REVISION],
761 [96, NULL_REVISION],
762 [97, NULL_REVISION],
762 [97, NULL_REVISION],
763 [NULL_REVISION, NULL_REVISION],
763 [NULL_REVISION, NULL_REVISION],
764 ];
764 ];
765 let problem_rev = 28 as Revision;
765 let problem_rev = 28 as Revision;
766 let problem_base = 70 as Revision;
766 let problem_base = 70 as Revision;
767 // making the problem obvious: problem_rev is a parent of problem_base
767 // making the problem obvious: problem_rev is a parent of problem_base
768 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
768 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
769
769
770 let mut missing_ancestors: MissingAncestors<VecGraph> =
770 let mut missing_ancestors: MissingAncestors<VecGraph> =
771 MissingAncestors::new(
771 MissingAncestors::new(
772 graph,
772 graph,
773 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
773 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
774 .iter()
774 .iter()
775 .cloned(),
775 .cloned(),
776 );
776 );
777 assert!(missing_ancestors.bases.contains(&problem_base));
777 assert!(missing_ancestors.bases.contains(&problem_base));
778
778
779 let mut revs: HashSet<Revision> =
779 let mut revs: HashSet<Revision> =
780 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
780 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
781 .iter()
781 .iter()
782 .cloned()
782 .cloned()
783 .collect();
783 .collect();
784 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
784 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
785 assert!(!revs.contains(&problem_rev));
785 assert!(!revs.contains(&problem_rev));
786 }
786 }
787
788 }
787 }
@@ -1,276 +1,275 b''
1 // dagops.rs
1 // dagops.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Miscellaneous DAG operations
8 //! Miscellaneous DAG operations
9 //!
9 //!
10 //! # Terminology
10 //! # Terminology
11 //! - By *relative heads* of a collection of revision numbers (`Revision`),
11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
12 //! we mean those revisions that have no children among the collection.
12 //! mean those revisions that have no children among the collection.
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
14 //! those whose parents, if any, don't belong to the collection.
14 //! whose parents, if any, don't belong to the collection.
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
16 use crate::ancestors::AncestorsIterator;
16 use crate::ancestors::AncestorsIterator;
17 use std::collections::{BTreeSet, HashSet};
17 use std::collections::{BTreeSet, HashSet};
18
18
19 fn remove_parents(
19 fn remove_parents(
20 graph: &impl Graph,
20 graph: &impl Graph,
21 rev: Revision,
21 rev: Revision,
22 set: &mut HashSet<Revision>,
22 set: &mut HashSet<Revision>,
23 ) -> Result<(), GraphError> {
23 ) -> Result<(), GraphError> {
24 for parent in graph.parents(rev)?.iter() {
24 for parent in graph.parents(rev)?.iter() {
25 if *parent != NULL_REVISION {
25 if *parent != NULL_REVISION {
26 set.remove(parent);
26 set.remove(parent);
27 }
27 }
28 }
28 }
29 Ok(())
29 Ok(())
30 }
30 }
31
31
32 /// Relative heads out of some revisions, passed as an iterator.
32 /// Relative heads out of some revisions, passed as an iterator.
33 ///
33 ///
34 /// These heads are defined as those revisions that have no children
34 /// These heads are defined as those revisions that have no children
35 /// among those emitted by the iterator.
35 /// among those emitted by the iterator.
36 ///
36 ///
37 /// # Performance notes
37 /// # Performance notes
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
39 ///
39 ///
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
41 /// guarantee that cloning the iterator doesn't result in cloning the full
41 /// guarantee that cloning the iterator doesn't result in cloning the full
42 /// construct it comes from.
42 /// construct it comes from.
43 pub fn heads<'a>(
43 pub fn heads<'a>(
44 graph: &impl Graph,
44 graph: &impl Graph,
45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
46 ) -> Result<HashSet<Revision>, GraphError> {
46 ) -> Result<HashSet<Revision>, GraphError> {
47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
48 heads.remove(&NULL_REVISION);
48 heads.remove(&NULL_REVISION);
49 for rev in iter_revs {
49 for rev in iter_revs {
50 if *rev != NULL_REVISION {
50 if *rev != NULL_REVISION {
51 remove_parents(graph, *rev, &mut heads)?;
51 remove_parents(graph, *rev, &mut heads)?;
52 }
52 }
53 }
53 }
54 Ok(heads)
54 Ok(heads)
55 }
55 }
56
56
57 /// Retain in `revs` only its relative heads.
57 /// Retain in `revs` only its relative heads.
58 ///
58 ///
59 /// This is an in-place operation, so that control of the incoming
59 /// This is an in-place operation, so that control of the incoming
60 /// set is left to the caller.
60 /// set is left to the caller.
61 /// - a direct Python binding would probably need to build its own `HashSet`
61 /// - a direct Python binding would probably need to build its own `HashSet`
62 /// from an incoming iterable, even if its sole purpose is to extract the
62 /// from an incoming iterable, even if its sole purpose is to extract the
63 /// heads.
63 /// heads.
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
65 ///
65 ///
66 /// # Performance notes
66 /// # Performance notes
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
68 pub fn retain_heads(
68 pub fn retain_heads(
69 graph: &impl Graph,
69 graph: &impl Graph,
70 revs: &mut HashSet<Revision>,
70 revs: &mut HashSet<Revision>,
71 ) -> Result<(), GraphError> {
71 ) -> Result<(), GraphError> {
72 revs.remove(&NULL_REVISION);
72 revs.remove(&NULL_REVISION);
73 // we need to construct an iterable copy of revs to avoid itering while
73 // we need to construct an iterable copy of revs to avoid itering while
74 // mutating
74 // mutating
75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
76 for rev in as_vec {
76 for rev in as_vec {
77 if rev != NULL_REVISION {
77 if rev != NULL_REVISION {
78 remove_parents(graph, rev, revs)?;
78 remove_parents(graph, rev, revs)?;
79 }
79 }
80 }
80 }
81 Ok(())
81 Ok(())
82 }
82 }
83
83
84 /// Roots of `revs`, passed as a `HashSet`
84 /// Roots of `revs`, passed as a `HashSet`
85 ///
85 ///
86 /// They are returned in arbitrary order
86 /// They are returned in arbitrary order
87 pub fn roots<G: Graph>(
87 pub fn roots<G: Graph>(
88 graph: &G,
88 graph: &G,
89 revs: &HashSet<Revision>,
89 revs: &HashSet<Revision>,
90 ) -> Result<Vec<Revision>, GraphError> {
90 ) -> Result<Vec<Revision>, GraphError> {
91 let mut roots: Vec<Revision> = Vec::new();
91 let mut roots: Vec<Revision> = Vec::new();
92 for rev in revs {
92 for rev in revs {
93 if graph
93 if graph
94 .parents(*rev)?
94 .parents(*rev)?
95 .iter()
95 .iter()
96 .filter(|p| **p != NULL_REVISION)
96 .filter(|p| **p != NULL_REVISION)
97 .all(|p| !revs.contains(p))
97 .all(|p| !revs.contains(p))
98 {
98 {
99 roots.push(*rev);
99 roots.push(*rev);
100 }
100 }
101 }
101 }
102 Ok(roots)
102 Ok(roots)
103 }
103 }
104
104
105 /// Compute the topological range between two collections of revisions
105 /// Compute the topological range between two collections of revisions
106 ///
106 ///
107 /// This is equivalent to the revset `<roots>::<heads>`.
107 /// This is equivalent to the revset `<roots>::<heads>`.
108 ///
108 ///
109 /// Currently, the given `Graph` has to implement `Clone`, which means
109 /// Currently, the given `Graph` has to implement `Clone`, which means
110 /// actually cloning just a reference-counted Python pointer if
110 /// actually cloning just a reference-counted Python pointer if
111 /// it's passed over through `rust-cpython`. This is due to the internal
111 /// it's passed over through `rust-cpython`. This is due to the internal
112 /// use of `AncestorsIterator`
112 /// use of `AncestorsIterator`
113 ///
113 ///
114 /// # Algorithmic details
114 /// # Algorithmic details
115 ///
115 ///
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
117 /// `mercurial.cext.parsers` does to obtain the same results.
117 /// `mercurial.cext.parsers` does to obtain the same results.
118 ///
118 ///
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
120 /// them in the vector `heads_ancestors` vector, and adding any element of
120 /// them in the vector `heads_ancestors` vector, and adding any element of
121 /// `roots` we find among them to the resulting range.
121 /// `roots` we find among them to the resulting range.
122 /// - Then, we iterate on that recorded vector so that a revision is always
122 /// - Then, we iterate on that recorded vector so that a revision is always
123 /// emitted after its parents and add all revisions whose parents are already
123 /// emitted after its parents and add all revisions whose parents are already
124 /// in the range to the results.
124 /// in the range to the results.
125 ///
125 ///
126 /// # Performance notes
126 /// # Performance notes
127 ///
127 ///
128 /// The main difference with the C implementation is that
128 /// The main difference with the C implementation is that
129 /// the latter uses a flat array with bit flags, instead of complex structures
129 /// the latter uses a flat array with bit flags, instead of complex structures
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
131 /// possible that the present implementation could be more memory efficient
131 /// possible that the present implementation could be more memory efficient
132 /// for very large repositories with many branches.
132 /// for very large repositories with many branches.
133 pub fn range(
133 pub fn range(
134 graph: &(impl Graph + Clone),
134 graph: &(impl Graph + Clone),
135 roots: impl IntoIterator<Item = Revision>,
135 roots: impl IntoIterator<Item = Revision>,
136 heads: impl IntoIterator<Item = Revision>,
136 heads: impl IntoIterator<Item = Revision>,
137 ) -> Result<BTreeSet<Revision>, GraphError> {
137 ) -> Result<BTreeSet<Revision>, GraphError> {
138 let mut range = BTreeSet::new();
138 let mut range = BTreeSet::new();
139 let roots: HashSet<Revision> = roots.into_iter().collect();
139 let roots: HashSet<Revision> = roots.into_iter().collect();
140 let min_root: Revision = match roots.iter().cloned().min() {
140 let min_root: Revision = match roots.iter().cloned().min() {
141 None => {
141 None => {
142 return Ok(range);
142 return Ok(range);
143 }
143 }
144 Some(r) => r,
144 Some(r) => r,
145 };
145 };
146
146
147 // Internally, AncestorsIterator currently maintains a `HashSet`
147 // Internally, AncestorsIterator currently maintains a `HashSet`
148 // of all seen revision, which is also what we record, albeit in an ordered
148 // of all seen revision, which is also what we record, albeit in an ordered
149 // way. There's room for improvement on this duplication.
149 // way. There's room for improvement on this duplication.
150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
151 let mut heads_ancestors: Vec<Revision> = Vec::new();
151 let mut heads_ancestors: Vec<Revision> = Vec::new();
152 for revres in ait {
152 for revres in ait {
153 let rev = revres?;
153 let rev = revres?;
154 if roots.contains(&rev) {
154 if roots.contains(&rev) {
155 range.insert(rev);
155 range.insert(rev);
156 }
156 }
157 heads_ancestors.push(rev);
157 heads_ancestors.push(rev);
158 }
158 }
159
159
160 for rev in heads_ancestors.into_iter().rev() {
160 for rev in heads_ancestors.into_iter().rev() {
161 for parent in graph.parents(rev)?.iter() {
161 for parent in graph.parents(rev)?.iter() {
162 if *parent != NULL_REVISION && range.contains(parent) {
162 if *parent != NULL_REVISION && range.contains(parent) {
163 range.insert(rev);
163 range.insert(rev);
164 }
164 }
165 }
165 }
166 }
166 }
167 Ok(range)
167 Ok(range)
168 }
168 }
169
169
170 #[cfg(test)]
170 #[cfg(test)]
171 mod tests {
171 mod tests {
172
172
173 use super::*;
173 use super::*;
174 use crate::testing::SampleGraph;
174 use crate::testing::SampleGraph;
175
175
176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
177 fn retain_heads_sorted(
177 fn retain_heads_sorted(
178 graph: &impl Graph,
178 graph: &impl Graph,
179 revs: &[Revision],
179 revs: &[Revision],
180 ) -> Result<Vec<Revision>, GraphError> {
180 ) -> Result<Vec<Revision>, GraphError> {
181 let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
181 let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
182 retain_heads(graph, &mut revs)?;
182 retain_heads(graph, &mut revs)?;
183 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
183 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
184 as_vec.sort();
184 as_vec.sort();
185 Ok(as_vec)
185 Ok(as_vec)
186 }
186 }
187
187
188 #[test]
188 #[test]
189 fn test_retain_heads() -> Result<(), GraphError> {
189 fn test_retain_heads() -> Result<(), GraphError> {
190 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
190 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
191 assert_eq!(
191 assert_eq!(
192 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
192 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
193 vec![1, 6, 12]
193 vec![1, 6, 12]
194 );
194 );
195 assert_eq!(
195 assert_eq!(
196 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
196 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
197 vec![3, 5, 8, 9]
197 vec![3, 5, 8, 9]
198 );
198 );
199 Ok(())
199 Ok(())
200 }
200 }
201
201
202 /// Apply `heads()` to the given slice and return as a sorted `Vec`
202 /// Apply `heads()` to the given slice and return as a sorted `Vec`
203 fn heads_sorted(
203 fn heads_sorted(
204 graph: &impl Graph,
204 graph: &impl Graph,
205 revs: &[Revision],
205 revs: &[Revision],
206 ) -> Result<Vec<Revision>, GraphError> {
206 ) -> Result<Vec<Revision>, GraphError> {
207 let heads = heads(graph, revs.iter())?;
207 let heads = heads(graph, revs.iter())?;
208 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
208 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
209 as_vec.sort();
209 as_vec.sort();
210 Ok(as_vec)
210 Ok(as_vec)
211 }
211 }
212
212
213 #[test]
213 #[test]
214 fn test_heads() -> Result<(), GraphError> {
214 fn test_heads() -> Result<(), GraphError> {
215 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
215 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
216 assert_eq!(
216 assert_eq!(
217 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
217 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
218 vec![1, 6, 12]
218 vec![1, 6, 12]
219 );
219 );
220 assert_eq!(
220 assert_eq!(
221 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
221 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
222 vec![3, 5, 8, 9]
222 vec![3, 5, 8, 9]
223 );
223 );
224 Ok(())
224 Ok(())
225 }
225 }
226
226
227 /// Apply `roots()` and sort the result for easier comparison
227 /// Apply `roots()` and sort the result for easier comparison
228 fn roots_sorted(
228 fn roots_sorted(
229 graph: &impl Graph,
229 graph: &impl Graph,
230 revs: &[Revision],
230 revs: &[Revision],
231 ) -> Result<Vec<Revision>, GraphError> {
231 ) -> Result<Vec<Revision>, GraphError> {
232 let mut as_vec = roots(graph, &revs.iter().cloned().collect())?;
232 let mut as_vec = roots(graph, &revs.iter().cloned().collect())?;
233 as_vec.sort();
233 as_vec.sort();
234 Ok(as_vec)
234 Ok(as_vec)
235 }
235 }
236
236
237 #[test]
237 #[test]
238 fn test_roots() -> Result<(), GraphError> {
238 fn test_roots() -> Result<(), GraphError> {
239 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
239 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
240 assert_eq!(
240 assert_eq!(
241 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
241 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
242 vec![0, 4, 12]
242 vec![0, 4, 12]
243 );
243 );
244 assert_eq!(
244 assert_eq!(
245 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
245 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
246 vec![1, 8]
246 vec![1, 8]
247 );
247 );
248 Ok(())
248 Ok(())
249 }
249 }
250
250
251 /// Apply `range()` and convert the result into a Vec for easier comparison
251 /// Apply `range()` and convert the result into a Vec for easier comparison
252 fn range_vec(
252 fn range_vec(
253 graph: impl Graph + Clone,
253 graph: impl Graph + Clone,
254 roots: &[Revision],
254 roots: &[Revision],
255 heads: &[Revision],
255 heads: &[Revision],
256 ) -> Result<Vec<Revision>, GraphError> {
256 ) -> Result<Vec<Revision>, GraphError> {
257 range(&graph, roots.iter().cloned(), heads.iter().cloned())
257 range(&graph, roots.iter().cloned(), heads.iter().cloned())
258 .map(|bs| bs.into_iter().collect())
258 .map(|bs| bs.into_iter().collect())
259 }
259 }
260
260
261 #[test]
261 #[test]
262 fn test_range() -> Result<(), GraphError> {
262 fn test_range() -> Result<(), GraphError> {
263 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
263 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
264 assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
264 assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
265 assert_eq!(
265 assert_eq!(
266 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
266 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
267 vec![5, 10]
267 vec![5, 10]
268 );
268 );
269 assert_eq!(
269 assert_eq!(
270 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
270 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
271 vec![5, 6, 9, 10, 12]
271 vec![5, 6, 9, 10, 12]
272 );
272 );
273 Ok(())
273 Ok(())
274 }
274 }
275
276 }
275 }
@@ -1,320 +1,319 b''
1 // dirs_multiset.rs
1 // dirs_multiset.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! A multiset of directory names.
8 //! A multiset of directory names.
9 //!
9 //!
10 //! Used to counts the references to directories in a manifest or dirstate.
10 //! Used to counts the references to directories in a manifest or dirstate.
11 use crate::{
11 use crate::{
12 dirstate::EntryState, utils::files, DirstateEntry, DirstateMapError,
12 dirstate::EntryState, utils::files, DirstateEntry, DirstateMapError,
13 };
13 };
14 use std::collections::hash_map::Entry;
14 use std::collections::hash_map::Entry;
15 use std::collections::HashMap;
15 use std::collections::HashMap;
16
16
17 #[derive(PartialEq, Debug)]
17 #[derive(PartialEq, Debug)]
18 pub struct DirsMultiset {
18 pub struct DirsMultiset {
19 inner: HashMap<Vec<u8>, u32>,
19 inner: HashMap<Vec<u8>, u32>,
20 }
20 }
21
21
22 impl DirsMultiset {
22 impl DirsMultiset {
23 /// Initializes the multiset from a dirstate.
23 /// Initializes the multiset from a dirstate.
24 ///
24 ///
25 /// If `skip_state` is provided, skips dirstate entries with equal state.
25 /// If `skip_state` is provided, skips dirstate entries with equal state.
26 pub fn from_dirstate(
26 pub fn from_dirstate(
27 vec: &HashMap<Vec<u8>, DirstateEntry>,
27 vec: &HashMap<Vec<u8>, DirstateEntry>,
28 skip_state: Option<EntryState>,
28 skip_state: Option<EntryState>,
29 ) -> Self {
29 ) -> Self {
30 let mut multiset = DirsMultiset {
30 let mut multiset = DirsMultiset {
31 inner: HashMap::new(),
31 inner: HashMap::new(),
32 };
32 };
33
33
34 for (filename, DirstateEntry { state, .. }) in vec {
34 for (filename, DirstateEntry { state, .. }) in vec {
35 // This `if` is optimized out of the loop
35 // This `if` is optimized out of the loop
36 if let Some(skip) = skip_state {
36 if let Some(skip) = skip_state {
37 if skip != *state {
37 if skip != *state {
38 multiset.add_path(filename);
38 multiset.add_path(filename);
39 }
39 }
40 } else {
40 } else {
41 multiset.add_path(filename);
41 multiset.add_path(filename);
42 }
42 }
43 }
43 }
44
44
45 multiset
45 multiset
46 }
46 }
47
47
48 /// Initializes the multiset from a manifest.
48 /// Initializes the multiset from a manifest.
49 pub fn from_manifest(vec: &Vec<Vec<u8>>) -> Self {
49 pub fn from_manifest(vec: &Vec<Vec<u8>>) -> Self {
50 let mut multiset = DirsMultiset {
50 let mut multiset = DirsMultiset {
51 inner: HashMap::new(),
51 inner: HashMap::new(),
52 };
52 };
53
53
54 for filename in vec {
54 for filename in vec {
55 multiset.add_path(filename);
55 multiset.add_path(filename);
56 }
56 }
57
57
58 multiset
58 multiset
59 }
59 }
60
60
61 /// Increases the count of deepest directory contained in the path.
61 /// Increases the count of deepest directory contained in the path.
62 ///
62 ///
63 /// If the directory is not yet in the map, adds its parents.
63 /// If the directory is not yet in the map, adds its parents.
64 pub fn add_path(&mut self, path: &[u8]) {
64 pub fn add_path(&mut self, path: &[u8]) {
65 for subpath in files::find_dirs(path) {
65 for subpath in files::find_dirs(path) {
66 if let Some(val) = self.inner.get_mut(subpath) {
66 if let Some(val) = self.inner.get_mut(subpath) {
67 *val += 1;
67 *val += 1;
68 break;
68 break;
69 }
69 }
70 self.inner.insert(subpath.to_owned(), 1);
70 self.inner.insert(subpath.to_owned(), 1);
71 }
71 }
72 }
72 }
73
73
74 /// Decreases the count of deepest directory contained in the path.
74 /// Decreases the count of deepest directory contained in the path.
75 ///
75 ///
76 /// If it is the only reference, decreases all parents until one is
76 /// If it is the only reference, decreases all parents until one is
77 /// removed.
77 /// removed.
78 /// If the directory is not in the map, something horrible has happened.
78 /// If the directory is not in the map, something horrible has happened.
79 pub fn delete_path(
79 pub fn delete_path(
80 &mut self,
80 &mut self,
81 path: &[u8],
81 path: &[u8],
82 ) -> Result<(), DirstateMapError> {
82 ) -> Result<(), DirstateMapError> {
83 for subpath in files::find_dirs(path) {
83 for subpath in files::find_dirs(path) {
84 match self.inner.entry(subpath.to_owned()) {
84 match self.inner.entry(subpath.to_owned()) {
85 Entry::Occupied(mut entry) => {
85 Entry::Occupied(mut entry) => {
86 let val = entry.get().clone();
86 let val = entry.get().clone();
87 if val > 1 {
87 if val > 1 {
88 entry.insert(val - 1);
88 entry.insert(val - 1);
89 break;
89 break;
90 }
90 }
91 entry.remove();
91 entry.remove();
92 }
92 }
93 Entry::Vacant(_) => {
93 Entry::Vacant(_) => {
94 return Err(DirstateMapError::PathNotFound(
94 return Err(DirstateMapError::PathNotFound(
95 path.to_owned(),
95 path.to_owned(),
96 ))
96 ))
97 }
97 }
98 };
98 };
99 }
99 }
100
100
101 Ok(())
101 Ok(())
102 }
102 }
103
103
104 pub fn contains(&self, key: &[u8]) -> bool {
104 pub fn contains(&self, key: &[u8]) -> bool {
105 self.inner.contains_key(key)
105 self.inner.contains_key(key)
106 }
106 }
107
107
108 pub fn iter(&self) -> impl Iterator<Item = &Vec<u8>> {
108 pub fn iter(&self) -> impl Iterator<Item = &Vec<u8>> {
109 self.inner.keys()
109 self.inner.keys()
110 }
110 }
111
111
112 pub fn len(&self) -> usize {
112 pub fn len(&self) -> usize {
113 self.inner.len()
113 self.inner.len()
114 }
114 }
115 }
115 }
116
116
117 #[cfg(test)]
117 #[cfg(test)]
118 mod tests {
118 mod tests {
119 use super::*;
119 use super::*;
120 use std::collections::HashMap;
120 use std::collections::HashMap;
121
121
122 #[test]
122 #[test]
123 fn test_delete_path_path_not_found() {
123 fn test_delete_path_path_not_found() {
124 let mut map = DirsMultiset::from_manifest(&vec![]);
124 let mut map = DirsMultiset::from_manifest(&vec![]);
125 let path = b"doesnotexist/";
125 let path = b"doesnotexist/";
126 assert_eq!(
126 assert_eq!(
127 Err(DirstateMapError::PathNotFound(path.to_vec())),
127 Err(DirstateMapError::PathNotFound(path.to_vec())),
128 map.delete_path(path)
128 map.delete_path(path)
129 );
129 );
130 }
130 }
131
131
132 #[test]
132 #[test]
133 fn test_delete_path_empty_path() {
133 fn test_delete_path_empty_path() {
134 let mut map = DirsMultiset::from_manifest(&vec![vec![]]);
134 let mut map = DirsMultiset::from_manifest(&vec![vec![]]);
135 let path = b"";
135 let path = b"";
136 assert_eq!(Ok(()), map.delete_path(path));
136 assert_eq!(Ok(()), map.delete_path(path));
137 assert_eq!(
137 assert_eq!(
138 Err(DirstateMapError::PathNotFound(path.to_vec())),
138 Err(DirstateMapError::PathNotFound(path.to_vec())),
139 map.delete_path(path)
139 map.delete_path(path)
140 );
140 );
141 }
141 }
142
142
143 #[test]
143 #[test]
144 fn test_delete_path_successful() {
144 fn test_delete_path_successful() {
145 let mut map = DirsMultiset {
145 let mut map = DirsMultiset {
146 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
146 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
147 .iter()
147 .iter()
148 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
148 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
149 .collect(),
149 .collect(),
150 };
150 };
151
151
152 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
152 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
153 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
153 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
154 assert_eq!(
154 assert_eq!(
155 Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
155 Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
156 map.delete_path(b"a/b/")
156 map.delete_path(b"a/b/")
157 );
157 );
158
158
159 assert_eq!(2, *map.inner.get(&b"a".to_vec()).unwrap());
159 assert_eq!(2, *map.inner.get(&b"a".to_vec()).unwrap());
160 assert_eq!(1, *map.inner.get(&b"a/c".to_vec()).unwrap());
160 assert_eq!(1, *map.inner.get(&b"a/c".to_vec()).unwrap());
161 eprintln!("{:?}", map);
161 eprintln!("{:?}", map);
162 assert_eq!(Ok(()), map.delete_path(b"a/"));
162 assert_eq!(Ok(()), map.delete_path(b"a/"));
163 eprintln!("{:?}", map);
163 eprintln!("{:?}", map);
164
164
165 assert_eq!(Ok(()), map.delete_path(b"a/c/"));
165 assert_eq!(Ok(()), map.delete_path(b"a/c/"));
166 assert_eq!(
166 assert_eq!(
167 Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
167 Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
168 map.delete_path(b"a/c/")
168 map.delete_path(b"a/c/")
169 );
169 );
170 }
170 }
171
171
172 #[test]
172 #[test]
173 fn test_add_path_empty_path() {
173 fn test_add_path_empty_path() {
174 let mut map = DirsMultiset::from_manifest(&vec![]);
174 let mut map = DirsMultiset::from_manifest(&vec![]);
175 let path = b"";
175 let path = b"";
176 map.add_path(path);
176 map.add_path(path);
177
177
178 assert_eq!(1, map.len());
178 assert_eq!(1, map.len());
179 }
179 }
180
180
181 #[test]
181 #[test]
182 fn test_add_path_successful() {
182 fn test_add_path_successful() {
183 let mut map = DirsMultiset::from_manifest(&vec![]);
183 let mut map = DirsMultiset::from_manifest(&vec![]);
184
184
185 map.add_path(b"a/");
185 map.add_path(b"a/");
186 assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
186 assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
187 assert_eq!(1, *map.inner.get(&Vec::new()).unwrap());
187 assert_eq!(1, *map.inner.get(&Vec::new()).unwrap());
188 assert_eq!(2, map.len());
188 assert_eq!(2, map.len());
189
189
190 // Non directory should be ignored
190 // Non directory should be ignored
191 map.add_path(b"a");
191 map.add_path(b"a");
192 assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
192 assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
193 assert_eq!(2, map.len());
193 assert_eq!(2, map.len());
194
194
195 // Non directory will still add its base
195 // Non directory will still add its base
196 map.add_path(b"a/b");
196 map.add_path(b"a/b");
197 assert_eq!(2, *map.inner.get(&b"a".to_vec()).unwrap());
197 assert_eq!(2, *map.inner.get(&b"a".to_vec()).unwrap());
198 assert_eq!(2, map.len());
198 assert_eq!(2, map.len());
199
199
200 // Duplicate path works
200 // Duplicate path works
201 map.add_path(b"a/");
201 map.add_path(b"a/");
202 assert_eq!(3, *map.inner.get(&b"a".to_vec()).unwrap());
202 assert_eq!(3, *map.inner.get(&b"a".to_vec()).unwrap());
203
203
204 // Nested dir adds to its base
204 // Nested dir adds to its base
205 map.add_path(b"a/b/");
205 map.add_path(b"a/b/");
206 assert_eq!(4, *map.inner.get(&b"a".to_vec()).unwrap());
206 assert_eq!(4, *map.inner.get(&b"a".to_vec()).unwrap());
207 assert_eq!(1, *map.inner.get(&b"a/b".to_vec()).unwrap());
207 assert_eq!(1, *map.inner.get(&b"a/b".to_vec()).unwrap());
208
208
209 // but not its base's base, because it already existed
209 // but not its base's base, because it already existed
210 map.add_path(b"a/b/c/");
210 map.add_path(b"a/b/c/");
211 assert_eq!(4, *map.inner.get(&b"a".to_vec()).unwrap());
211 assert_eq!(4, *map.inner.get(&b"a".to_vec()).unwrap());
212 assert_eq!(2, *map.inner.get(&b"a/b".to_vec()).unwrap());
212 assert_eq!(2, *map.inner.get(&b"a/b".to_vec()).unwrap());
213
213
214 map.add_path(b"a/c/");
214 map.add_path(b"a/c/");
215 assert_eq!(1, *map.inner.get(&b"a/c".to_vec()).unwrap());
215 assert_eq!(1, *map.inner.get(&b"a/c".to_vec()).unwrap());
216
216
217 let expected = DirsMultiset {
217 let expected = DirsMultiset {
218 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
218 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
219 .iter()
219 .iter()
220 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
220 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
221 .collect(),
221 .collect(),
222 };
222 };
223 assert_eq!(map, expected);
223 assert_eq!(map, expected);
224 }
224 }
225
225
226 #[test]
226 #[test]
227 fn test_dirsmultiset_new_empty() {
227 fn test_dirsmultiset_new_empty() {
228 let new = DirsMultiset::from_manifest(&vec![]);
228 let new = DirsMultiset::from_manifest(&vec![]);
229 let expected = DirsMultiset {
229 let expected = DirsMultiset {
230 inner: HashMap::new(),
230 inner: HashMap::new(),
231 };
231 };
232 assert_eq!(expected, new);
232 assert_eq!(expected, new);
233
233
234 let new = DirsMultiset::from_dirstate(&HashMap::new(), None);
234 let new = DirsMultiset::from_dirstate(&HashMap::new(), None);
235 let expected = DirsMultiset {
235 let expected = DirsMultiset {
236 inner: HashMap::new(),
236 inner: HashMap::new(),
237 };
237 };
238 assert_eq!(expected, new);
238 assert_eq!(expected, new);
239 }
239 }
240
240
241 #[test]
241 #[test]
242 fn test_dirsmultiset_new_no_skip() {
242 fn test_dirsmultiset_new_no_skip() {
243 let input_vec = ["a/", "b/", "a/c", "a/d/"]
243 let input_vec = ["a/", "b/", "a/c", "a/d/"]
244 .iter()
244 .iter()
245 .map(|e| e.as_bytes().to_vec())
245 .map(|e| e.as_bytes().to_vec())
246 .collect();
246 .collect();
247 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
247 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
248 .iter()
248 .iter()
249 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
249 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
250 .collect();
250 .collect();
251
251
252 let new = DirsMultiset::from_manifest(&input_vec);
252 let new = DirsMultiset::from_manifest(&input_vec);
253 let expected = DirsMultiset {
253 let expected = DirsMultiset {
254 inner: expected_inner,
254 inner: expected_inner,
255 };
255 };
256 assert_eq!(expected, new);
256 assert_eq!(expected, new);
257
257
258 let input_map = ["a/", "b/", "a/c", "a/d/"]
258 let input_map = ["a/", "b/", "a/c", "a/d/"]
259 .iter()
259 .iter()
260 .map(|f| {
260 .map(|f| {
261 (
261 (
262 f.as_bytes().to_vec(),
262 f.as_bytes().to_vec(),
263 DirstateEntry {
263 DirstateEntry {
264 state: EntryState::Normal,
264 state: EntryState::Normal,
265 mode: 0,
265 mode: 0,
266 mtime: 0,
266 mtime: 0,
267 size: 0,
267 size: 0,
268 },
268 },
269 )
269 )
270 })
270 })
271 .collect();
271 .collect();
272 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
272 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
273 .iter()
273 .iter()
274 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
274 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
275 .collect();
275 .collect();
276
276
277 let new = DirsMultiset::from_dirstate(&input_map, None);
277 let new = DirsMultiset::from_dirstate(&input_map, None);
278 let expected = DirsMultiset {
278 let expected = DirsMultiset {
279 inner: expected_inner,
279 inner: expected_inner,
280 };
280 };
281 assert_eq!(expected, new);
281 assert_eq!(expected, new);
282 }
282 }
283
283
284 #[test]
284 #[test]
285 fn test_dirsmultiset_new_skip() {
285 fn test_dirsmultiset_new_skip() {
286 let input_map = [
286 let input_map = [
287 ("a/", EntryState::Normal),
287 ("a/", EntryState::Normal),
288 ("a/b/", EntryState::Normal),
288 ("a/b/", EntryState::Normal),
289 ("a/c", EntryState::Removed),
289 ("a/c", EntryState::Removed),
290 ("a/d/", EntryState::Merged),
290 ("a/d/", EntryState::Merged),
291 ]
291 ]
292 .iter()
292 .iter()
293 .map(|(f, state)| {
293 .map(|(f, state)| {
294 (
294 (
295 f.as_bytes().to_vec(),
295 f.as_bytes().to_vec(),
296 DirstateEntry {
296 DirstateEntry {
297 state: *state,
297 state: *state,
298 mode: 0,
298 mode: 0,
299 mtime: 0,
299 mtime: 0,
300 size: 0,
300 size: 0,
301 },
301 },
302 )
302 )
303 })
303 })
304 .collect();
304 .collect();
305
305
306 // "a" incremented with "a/c" and "a/d/"
306 // "a" incremented with "a/c" and "a/d/"
307 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
307 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
308 .iter()
308 .iter()
309 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
309 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
310 .collect();
310 .collect();
311
311
312 let new =
312 let new =
313 DirsMultiset::from_dirstate(&input_map, Some(EntryState::Normal));
313 DirsMultiset::from_dirstate(&input_map, Some(EntryState::Normal));
314 let expected = DirsMultiset {
314 let expected = DirsMultiset {
315 inner: expected_inner,
315 inner: expected_inner,
316 };
316 };
317 assert_eq!(expected, new);
317 assert_eq!(expected, new);
318 }
318 }
319
320 }
319 }
@@ -1,695 +1,694 b''
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 use crate::ancestors::MissingAncestors;
14 use crate::ancestors::MissingAncestors;
15 use crate::dagops;
15 use crate::dagops;
16 use rand::seq::SliceRandom;
16 use rand::seq::SliceRandom;
17 use rand::{thread_rng, RngCore, SeedableRng};
17 use rand::{thread_rng, RngCore, SeedableRng};
18 use std::cmp::{max, min};
18 use std::cmp::{max, min};
19 use std::collections::{HashMap, HashSet, VecDeque};
19 use std::collections::{HashMap, HashSet, VecDeque};
20
20
21 type Rng = rand_pcg::Pcg32;
21 type Rng = rand_pcg::Pcg32;
22
22
23 pub struct PartialDiscovery<G: Graph + Clone> {
23 pub struct PartialDiscovery<G: Graph + Clone> {
24 target_heads: Option<Vec<Revision>>,
24 target_heads: Option<Vec<Revision>>,
25 graph: G, // plays the role of self._repo
25 graph: G, // plays the role of self._repo
26 common: MissingAncestors<G>,
26 common: MissingAncestors<G>,
27 undecided: Option<HashSet<Revision>>,
27 undecided: Option<HashSet<Revision>>,
28 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
28 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
29 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
30 rng: Rng,
30 rng: Rng,
31 respect_size: bool,
31 respect_size: bool,
32 randomize: bool,
32 randomize: bool,
33 }
33 }
34
34
35 pub struct DiscoveryStats {
35 pub struct DiscoveryStats {
36 pub undecided: Option<usize>,
36 pub undecided: Option<usize>,
37 }
37 }
38
38
39 /// Update an existing sample to match the expected size
39 /// Update an existing sample to match the expected size
40 ///
40 ///
41 /// The sample is updated with revisions exponentially distant from each
41 /// The sample is updated with revisions exponentially distant from each
42 /// element of `heads`.
42 /// element of `heads`.
43 ///
43 ///
44 /// If a target size is specified, the sampling will stop once this size is
44 /// If a target size is specified, the sampling will stop once this size is
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
46 /// reached.
46 /// reached.
47 ///
47 ///
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
49 /// represented by `parentfn`
49 /// represented by `parentfn`
50 /// - `heads`: set of DAG head revs
50 /// - `heads`: set of DAG head revs
51 /// - `sample`: a sample to update
51 /// - `sample`: a sample to update
52 /// - `parentfn`: a callable to resolve parents for a revision
52 /// - `parentfn`: a callable to resolve parents for a revision
53 /// - `quicksamplesize`: optional target size of the sample
53 /// - `quicksamplesize`: optional target size of the sample
54 fn update_sample<I>(
54 fn update_sample<I>(
55 revs: Option<&HashSet<Revision>>,
55 revs: Option<&HashSet<Revision>>,
56 heads: impl IntoIterator<Item = Revision>,
56 heads: impl IntoIterator<Item = Revision>,
57 sample: &mut HashSet<Revision>,
57 sample: &mut HashSet<Revision>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
59 quicksamplesize: Option<usize>,
59 quicksamplesize: Option<usize>,
60 ) -> Result<(), GraphError>
60 ) -> Result<(), GraphError>
61 where
61 where
62 I: Iterator<Item = Revision>,
62 I: Iterator<Item = Revision>,
63 {
63 {
64 let mut distances: HashMap<Revision, u32> = HashMap::new();
64 let mut distances: HashMap<Revision, u32> = HashMap::new();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
66 let mut factor: u32 = 1;
66 let mut factor: u32 = 1;
67 let mut seen: HashSet<Revision> = HashSet::new();
67 let mut seen: HashSet<Revision> = HashSet::new();
68 while let Some(current) = visit.pop_front() {
68 while let Some(current) = visit.pop_front() {
69 if !seen.insert(current) {
69 if !seen.insert(current) {
70 continue;
70 continue;
71 }
71 }
72
72
73 let d = *distances.entry(current).or_insert(1);
73 let d = *distances.entry(current).or_insert(1);
74 if d > factor {
74 if d > factor {
75 factor *= 2;
75 factor *= 2;
76 }
76 }
77 if d == factor {
77 if d == factor {
78 sample.insert(current);
78 sample.insert(current);
79 if let Some(sz) = quicksamplesize {
79 if let Some(sz) = quicksamplesize {
80 if sample.len() >= sz {
80 if sample.len() >= sz {
81 return Ok(());
81 return Ok(());
82 }
82 }
83 }
83 }
84 }
84 }
85 for p in parentsfn(current)? {
85 for p in parentsfn(current)? {
86 if let Some(revs) = revs {
86 if let Some(revs) = revs {
87 if !revs.contains(&p) {
87 if !revs.contains(&p) {
88 continue;
88 continue;
89 }
89 }
90 }
90 }
91 distances.entry(p).or_insert(d + 1);
91 distances.entry(p).or_insert(d + 1);
92 visit.push_back(p);
92 visit.push_back(p);
93 }
93 }
94 }
94 }
95 Ok(())
95 Ok(())
96 }
96 }
97
97
98 struct ParentsIterator {
98 struct ParentsIterator {
99 parents: [Revision; 2],
99 parents: [Revision; 2],
100 cur: usize,
100 cur: usize,
101 }
101 }
102
102
103 impl ParentsIterator {
103 impl ParentsIterator {
104 fn graph_parents(
104 fn graph_parents(
105 graph: &impl Graph,
105 graph: &impl Graph,
106 r: Revision,
106 r: Revision,
107 ) -> Result<ParentsIterator, GraphError> {
107 ) -> Result<ParentsIterator, GraphError> {
108 Ok(ParentsIterator {
108 Ok(ParentsIterator {
109 parents: graph.parents(r)?,
109 parents: graph.parents(r)?,
110 cur: 0,
110 cur: 0,
111 })
111 })
112 }
112 }
113 }
113 }
114
114
115 impl Iterator for ParentsIterator {
115 impl Iterator for ParentsIterator {
116 type Item = Revision;
116 type Item = Revision;
117
117
118 fn next(&mut self) -> Option<Revision> {
118 fn next(&mut self) -> Option<Revision> {
119 if self.cur > 1 {
119 if self.cur > 1 {
120 return None;
120 return None;
121 }
121 }
122 let rev = self.parents[self.cur];
122 let rev = self.parents[self.cur];
123 self.cur += 1;
123 self.cur += 1;
124 if rev == NULL_REVISION {
124 if rev == NULL_REVISION {
125 return self.next();
125 return self.next();
126 }
126 }
127 Some(rev)
127 Some(rev)
128 }
128 }
129 }
129 }
130
130
131 impl<G: Graph + Clone> PartialDiscovery<G> {
131 impl<G: Graph + Clone> PartialDiscovery<G> {
132 /// Create a PartialDiscovery object, with the intent
132 /// Create a PartialDiscovery object, with the intent
133 /// of comparing our `::<target_heads>` revset to the contents of another
133 /// of comparing our `::<target_heads>` revset to the contents of another
134 /// repo.
134 /// repo.
135 ///
135 ///
136 /// For now `target_heads` is passed as a vector, and will be used
136 /// For now `target_heads` is passed as a vector, and will be used
137 /// at the first call to `ensure_undecided()`.
137 /// at the first call to `ensure_undecided()`.
138 ///
138 ///
139 /// If we want to make the signature more flexible,
139 /// If we want to make the signature more flexible,
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
141 /// object since we'll keep it in the meanwhile
141 /// object since we'll keep it in the meanwhile
142 ///
142 ///
143 /// The `respect_size` boolean controls how the sampling methods
143 /// The `respect_size` boolean controls how the sampling methods
144 /// will interpret the size argument requested by the caller. If it's
144 /// will interpret the size argument requested by the caller. If it's
145 /// `false`, they are allowed to produce a sample whose size is more
145 /// `false`, they are allowed to produce a sample whose size is more
146 /// appropriate to the situation (typically bigger).
146 /// appropriate to the situation (typically bigger).
147 ///
147 ///
148 /// The `randomize` boolean affects sampling, and specifically how
148 /// The `randomize` boolean affects sampling, and specifically how
149 /// limiting or last-minute expanding is been done:
149 /// limiting or last-minute expanding is been done:
150 ///
150 ///
151 /// If `true`, both will perform random picking from `self.undecided`.
151 /// If `true`, both will perform random picking from `self.undecided`.
152 /// This is currently the best for actual discoveries.
152 /// This is currently the best for actual discoveries.
153 ///
153 ///
154 /// If `false`, a reproductible picking strategy is performed. This is
154 /// If `false`, a reproductible picking strategy is performed. This is
155 /// useful for integration tests.
155 /// useful for integration tests.
156 pub fn new(
156 pub fn new(
157 graph: G,
157 graph: G,
158 target_heads: Vec<Revision>,
158 target_heads: Vec<Revision>,
159 respect_size: bool,
159 respect_size: bool,
160 randomize: bool,
160 randomize: bool,
161 ) -> Self {
161 ) -> Self {
162 let mut seed: [u8; 16] = [0; 16];
162 let mut seed: [u8; 16] = [0; 16];
163 if randomize {
163 if randomize {
164 thread_rng().fill_bytes(&mut seed);
164 thread_rng().fill_bytes(&mut seed);
165 }
165 }
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
167 }
167 }
168
168
169 pub fn new_with_seed(
169 pub fn new_with_seed(
170 graph: G,
170 graph: G,
171 target_heads: Vec<Revision>,
171 target_heads: Vec<Revision>,
172 seed: [u8; 16],
172 seed: [u8; 16],
173 respect_size: bool,
173 respect_size: bool,
174 randomize: bool,
174 randomize: bool,
175 ) -> Self {
175 ) -> Self {
176 PartialDiscovery {
176 PartialDiscovery {
177 undecided: None,
177 undecided: None,
178 children_cache: None,
178 children_cache: None,
179 target_heads: Some(target_heads),
179 target_heads: Some(target_heads),
180 graph: graph.clone(),
180 graph: graph.clone(),
181 common: MissingAncestors::new(graph, vec![]),
181 common: MissingAncestors::new(graph, vec![]),
182 missing: HashSet::new(),
182 missing: HashSet::new(),
183 rng: Rng::from_seed(seed),
183 rng: Rng::from_seed(seed),
184 respect_size: respect_size,
184 respect_size: respect_size,
185 randomize: randomize,
185 randomize: randomize,
186 }
186 }
187 }
187 }
188
188
189 /// Extract at most `size` random elements from sample and return them
189 /// Extract at most `size` random elements from sample and return them
190 /// as a vector
190 /// as a vector
191 fn limit_sample(
191 fn limit_sample(
192 &mut self,
192 &mut self,
193 mut sample: Vec<Revision>,
193 mut sample: Vec<Revision>,
194 size: usize,
194 size: usize,
195 ) -> Vec<Revision> {
195 ) -> Vec<Revision> {
196 if !self.randomize {
196 if !self.randomize {
197 sample.sort();
197 sample.sort();
198 sample.truncate(size);
198 sample.truncate(size);
199 return sample;
199 return sample;
200 }
200 }
201 let sample_len = sample.len();
201 let sample_len = sample.len();
202 if sample_len <= size {
202 if sample_len <= size {
203 return sample;
203 return sample;
204 }
204 }
205 let rng = &mut self.rng;
205 let rng = &mut self.rng;
206 let dropped_size = sample_len - size;
206 let dropped_size = sample_len - size;
207 let limited_slice = if size < dropped_size {
207 let limited_slice = if size < dropped_size {
208 sample.partial_shuffle(rng, size).0
208 sample.partial_shuffle(rng, size).0
209 } else {
209 } else {
210 sample.partial_shuffle(rng, dropped_size).1
210 sample.partial_shuffle(rng, dropped_size).1
211 };
211 };
212 limited_slice.to_owned()
212 limited_slice.to_owned()
213 }
213 }
214
214
215 /// Register revisions known as being common
215 /// Register revisions known as being common
216 pub fn add_common_revisions(
216 pub fn add_common_revisions(
217 &mut self,
217 &mut self,
218 common: impl IntoIterator<Item = Revision>,
218 common: impl IntoIterator<Item = Revision>,
219 ) -> Result<(), GraphError> {
219 ) -> Result<(), GraphError> {
220 let before_len = self.common.get_bases().len();
220 let before_len = self.common.get_bases().len();
221 self.common.add_bases(common);
221 self.common.add_bases(common);
222 if self.common.get_bases().len() == before_len {
222 if self.common.get_bases().len() == before_len {
223 return Ok(());
223 return Ok(());
224 }
224 }
225 if let Some(ref mut undecided) = self.undecided {
225 if let Some(ref mut undecided) = self.undecided {
226 self.common.remove_ancestors_from(undecided)?;
226 self.common.remove_ancestors_from(undecided)?;
227 }
227 }
228 Ok(())
228 Ok(())
229 }
229 }
230
230
231 /// Register revisions known as being missing
231 /// Register revisions known as being missing
232 ///
232 ///
233 /// # Performance note
233 /// # Performance note
234 ///
234 ///
235 /// Except in the most trivial case, the first call of this method has
235 /// Except in the most trivial case, the first call of this method has
236 /// the side effect of computing `self.undecided` set for the first time,
236 /// the side effect of computing `self.undecided` set for the first time,
237 /// and the related caches it might need for efficiency of its internal
237 /// and the related caches it might need for efficiency of its internal
238 /// computation. This is typically faster if more information is
238 /// computation. This is typically faster if more information is
239 /// available in `self.common`. Therefore, for good performance, the
239 /// available in `self.common`. Therefore, for good performance, the
240 /// caller should avoid calling this too early.
240 /// caller should avoid calling this too early.
241 pub fn add_missing_revisions(
241 pub fn add_missing_revisions(
242 &mut self,
242 &mut self,
243 missing: impl IntoIterator<Item = Revision>,
243 missing: impl IntoIterator<Item = Revision>,
244 ) -> Result<(), GraphError> {
244 ) -> Result<(), GraphError> {
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
246 if tovisit.is_empty() {
246 if tovisit.is_empty() {
247 return Ok(());
247 return Ok(());
248 }
248 }
249 self.ensure_children_cache()?;
249 self.ensure_children_cache()?;
250 self.ensure_undecided()?; // for safety of possible future refactors
250 self.ensure_undecided()?; // for safety of possible future refactors
251 let children = self.children_cache.as_ref().unwrap();
251 let children = self.children_cache.as_ref().unwrap();
252 let mut seen: HashSet<Revision> = HashSet::new();
252 let mut seen: HashSet<Revision> = HashSet::new();
253 let undecided_mut = self.undecided.as_mut().unwrap();
253 let undecided_mut = self.undecided.as_mut().unwrap();
254 while let Some(rev) = tovisit.pop_front() {
254 while let Some(rev) = tovisit.pop_front() {
255 if !self.missing.insert(rev) {
255 if !self.missing.insert(rev) {
256 // either it's known to be missing from a previous
256 // either it's known to be missing from a previous
257 // invocation, and there's no need to iterate on its
257 // invocation, and there's no need to iterate on its
258 // children (we now they are all missing)
258 // children (we now they are all missing)
259 // or it's from a previous iteration of this loop
259 // or it's from a previous iteration of this loop
260 // and its children have already been queued
260 // and its children have already been queued
261 continue;
261 continue;
262 }
262 }
263 undecided_mut.remove(&rev);
263 undecided_mut.remove(&rev);
264 match children.get(&rev) {
264 match children.get(&rev) {
265 None => {
265 None => {
266 continue;
266 continue;
267 }
267 }
268 Some(this_children) => {
268 Some(this_children) => {
269 for child in this_children.iter().cloned() {
269 for child in this_children.iter().cloned() {
270 if seen.insert(child) {
270 if seen.insert(child) {
271 tovisit.push_back(child);
271 tovisit.push_back(child);
272 }
272 }
273 }
273 }
274 }
274 }
275 }
275 }
276 }
276 }
277 Ok(())
277 Ok(())
278 }
278 }
279
279
280 /// Do we have any information about the peer?
280 /// Do we have any information about the peer?
281 pub fn has_info(&self) -> bool {
281 pub fn has_info(&self) -> bool {
282 self.common.has_bases()
282 self.common.has_bases()
283 }
283 }
284
284
285 /// Did we acquire full knowledge of our Revisions that the peer has?
285 /// Did we acquire full knowledge of our Revisions that the peer has?
286 pub fn is_complete(&self) -> bool {
286 pub fn is_complete(&self) -> bool {
287 self.undecided.as_ref().map_or(false, |s| s.is_empty())
287 self.undecided.as_ref().map_or(false, |s| s.is_empty())
288 }
288 }
289
289
290 /// Return the heads of the currently known common set of revisions.
290 /// Return the heads of the currently known common set of revisions.
291 ///
291 ///
292 /// If the discovery process is not complete (see `is_complete()`), the
292 /// If the discovery process is not complete (see `is_complete()`), the
293 /// caller must be aware that this is an intermediate state.
293 /// caller must be aware that this is an intermediate state.
294 ///
294 ///
295 /// On the other hand, if it is complete, then this is currently
295 /// On the other hand, if it is complete, then this is currently
296 /// the only way to retrieve the end results of the discovery process.
296 /// the only way to retrieve the end results of the discovery process.
297 ///
297 ///
298 /// We may introduce in the future an `into_common_heads` call that
298 /// We may introduce in the future an `into_common_heads` call that
299 /// would be more appropriate for normal Rust callers, dropping `self`
299 /// would be more appropriate for normal Rust callers, dropping `self`
300 /// if it is complete.
300 /// if it is complete.
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
302 self.common.bases_heads()
302 self.common.bases_heads()
303 }
303 }
304
304
305 /// Force first computation of `self.undecided`
305 /// Force first computation of `self.undecided`
306 ///
306 ///
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
308 /// unwrapped to get workable immutable or mutable references without
308 /// unwrapped to get workable immutable or mutable references without
309 /// any panic.
309 /// any panic.
310 ///
310 ///
311 /// This is an imperative call instead of an access with added lazyness
311 /// This is an imperative call instead of an access with added lazyness
312 /// to reduce easily the scope of mutable borrow for the caller,
312 /// to reduce easily the scope of mutable borrow for the caller,
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
314 /// as long as the resulting immutable one.
314 /// as long as the resulting immutable one.
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
316 if self.undecided.is_some() {
316 if self.undecided.is_some() {
317 return Ok(());
317 return Ok(());
318 }
318 }
319 let tgt = self.target_heads.take().unwrap();
319 let tgt = self.target_heads.take().unwrap();
320 self.undecided =
320 self.undecided =
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
322 Ok(())
322 Ok(())
323 }
323 }
324
324
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
326 if self.children_cache.is_some() {
326 if self.children_cache.is_some() {
327 return Ok(());
327 return Ok(());
328 }
328 }
329 self.ensure_undecided()?;
329 self.ensure_undecided()?;
330
330
331 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
331 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
332 for &rev in self.undecided.as_ref().unwrap() {
332 for &rev in self.undecided.as_ref().unwrap() {
333 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
333 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
334 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
334 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
335 }
335 }
336 }
336 }
337 self.children_cache = Some(children);
337 self.children_cache = Some(children);
338 Ok(())
338 Ok(())
339 }
339 }
340
340
341 /// Provide statistics about the current state of the discovery process
341 /// Provide statistics about the current state of the discovery process
342 pub fn stats(&self) -> DiscoveryStats {
342 pub fn stats(&self) -> DiscoveryStats {
343 DiscoveryStats {
343 DiscoveryStats {
344 undecided: self.undecided.as_ref().map(|s| s.len()),
344 undecided: self.undecided.as_ref().map(|s| s.len()),
345 }
345 }
346 }
346 }
347
347
348 pub fn take_quick_sample(
348 pub fn take_quick_sample(
349 &mut self,
349 &mut self,
350 headrevs: impl IntoIterator<Item = Revision>,
350 headrevs: impl IntoIterator<Item = Revision>,
351 size: usize,
351 size: usize,
352 ) -> Result<Vec<Revision>, GraphError> {
352 ) -> Result<Vec<Revision>, GraphError> {
353 self.ensure_undecided()?;
353 self.ensure_undecided()?;
354 let mut sample = {
354 let mut sample = {
355 let undecided = self.undecided.as_ref().unwrap();
355 let undecided = self.undecided.as_ref().unwrap();
356 if undecided.len() <= size {
356 if undecided.len() <= size {
357 return Ok(undecided.iter().cloned().collect());
357 return Ok(undecided.iter().cloned().collect());
358 }
358 }
359 dagops::heads(&self.graph, undecided.iter())?
359 dagops::heads(&self.graph, undecided.iter())?
360 };
360 };
361 if sample.len() >= size {
361 if sample.len() >= size {
362 return Ok(self.limit_sample(sample.into_iter().collect(), size));
362 return Ok(self.limit_sample(sample.into_iter().collect(), size));
363 }
363 }
364 update_sample(
364 update_sample(
365 None,
365 None,
366 headrevs,
366 headrevs,
367 &mut sample,
367 &mut sample,
368 |r| ParentsIterator::graph_parents(&self.graph, r),
368 |r| ParentsIterator::graph_parents(&self.graph, r),
369 Some(size),
369 Some(size),
370 )?;
370 )?;
371 Ok(sample.into_iter().collect())
371 Ok(sample.into_iter().collect())
372 }
372 }
373
373
374 /// Extract a sample from `self.undecided`, going from its heads and roots.
374 /// Extract a sample from `self.undecided`, going from its heads and roots.
375 ///
375 ///
376 /// The `size` parameter is used to avoid useless computations if
376 /// The `size` parameter is used to avoid useless computations if
377 /// it turns out to be bigger than the whole set of undecided Revisions.
377 /// it turns out to be bigger than the whole set of undecided Revisions.
378 ///
378 ///
379 /// The sample is taken by using `update_sample` from the heads, then
379 /// The sample is taken by using `update_sample` from the heads, then
380 /// from the roots, working on the reverse DAG,
380 /// from the roots, working on the reverse DAG,
381 /// expressed by `self.children_cache`.
381 /// expressed by `self.children_cache`.
382 ///
382 ///
383 /// No effort is being made to complete or limit the sample to `size`
383 /// No effort is being made to complete or limit the sample to `size`
384 /// but this method returns another interesting size that it derives
384 /// but this method returns another interesting size that it derives
385 /// from its knowledge of the structure of the various sets, leaving
385 /// from its knowledge of the structure of the various sets, leaving
386 /// to the caller the decision to use it or not.
386 /// to the caller the decision to use it or not.
387 fn bidirectional_sample(
387 fn bidirectional_sample(
388 &mut self,
388 &mut self,
389 size: usize,
389 size: usize,
390 ) -> Result<(HashSet<Revision>, usize), GraphError> {
390 ) -> Result<(HashSet<Revision>, usize), GraphError> {
391 self.ensure_undecided()?;
391 self.ensure_undecided()?;
392 {
392 {
393 // we don't want to compute children_cache before this
393 // we don't want to compute children_cache before this
394 // but doing it after extracting self.undecided takes a mutable
394 // but doing it after extracting self.undecided takes a mutable
395 // ref to self while a shareable one is still active.
395 // ref to self while a shareable one is still active.
396 let undecided = self.undecided.as_ref().unwrap();
396 let undecided = self.undecided.as_ref().unwrap();
397 if undecided.len() <= size {
397 if undecided.len() <= size {
398 return Ok((undecided.clone(), size));
398 return Ok((undecided.clone(), size));
399 }
399 }
400 }
400 }
401
401
402 self.ensure_children_cache()?;
402 self.ensure_children_cache()?;
403 let revs = self.undecided.as_ref().unwrap();
403 let revs = self.undecided.as_ref().unwrap();
404 let mut sample: HashSet<Revision> = revs.clone();
404 let mut sample: HashSet<Revision> = revs.clone();
405
405
406 // it's possible that leveraging the children cache would be more
406 // it's possible that leveraging the children cache would be more
407 // efficient here
407 // efficient here
408 dagops::retain_heads(&self.graph, &mut sample)?;
408 dagops::retain_heads(&self.graph, &mut sample)?;
409 let revsheads = sample.clone(); // was again heads(revs) in python
409 let revsheads = sample.clone(); // was again heads(revs) in python
410
410
411 // update from heads
411 // update from heads
412 update_sample(
412 update_sample(
413 Some(revs),
413 Some(revs),
414 revsheads.iter().cloned(),
414 revsheads.iter().cloned(),
415 &mut sample,
415 &mut sample,
416 |r| ParentsIterator::graph_parents(&self.graph, r),
416 |r| ParentsIterator::graph_parents(&self.graph, r),
417 None,
417 None,
418 )?;
418 )?;
419
419
420 // update from roots
420 // update from roots
421 let revroots: HashSet<Revision> =
421 let revroots: HashSet<Revision> =
422 dagops::roots(&self.graph, revs)?.into_iter().collect();
422 dagops::roots(&self.graph, revs)?.into_iter().collect();
423 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
423 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
424
424
425 let children = self.children_cache.as_ref().unwrap();
425 let children = self.children_cache.as_ref().unwrap();
426 let empty_vec: Vec<Revision> = Vec::new();
426 let empty_vec: Vec<Revision> = Vec::new();
427 update_sample(
427 update_sample(
428 Some(revs),
428 Some(revs),
429 revroots,
429 revroots,
430 &mut sample,
430 &mut sample,
431 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
431 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
432 None,
432 None,
433 )?;
433 )?;
434 Ok((sample, prescribed_size))
434 Ok((sample, prescribed_size))
435 }
435 }
436
436
437 /// Fill up sample up to the wished size with random undecided Revisions.
437 /// Fill up sample up to the wished size with random undecided Revisions.
438 ///
438 ///
439 /// This is intended to be used as a last resort completion if the
439 /// This is intended to be used as a last resort completion if the
440 /// regular sampling algorithm returns too few elements.
440 /// regular sampling algorithm returns too few elements.
441 fn random_complete_sample(
441 fn random_complete_sample(
442 &mut self,
442 &mut self,
443 sample: &mut Vec<Revision>,
443 sample: &mut Vec<Revision>,
444 size: usize,
444 size: usize,
445 ) {
445 ) {
446 let sample_len = sample.len();
446 let sample_len = sample.len();
447 if size <= sample_len {
447 if size <= sample_len {
448 return;
448 return;
449 }
449 }
450 let take_from: Vec<Revision> = self
450 let take_from: Vec<Revision> = self
451 .undecided
451 .undecided
452 .as_ref()
452 .as_ref()
453 .unwrap()
453 .unwrap()
454 .iter()
454 .iter()
455 .filter(|&r| !sample.contains(r))
455 .filter(|&r| !sample.contains(r))
456 .cloned()
456 .cloned()
457 .collect();
457 .collect();
458 sample.extend(self.limit_sample(take_from, size - sample_len));
458 sample.extend(self.limit_sample(take_from, size - sample_len));
459 }
459 }
460
460
461 pub fn take_full_sample(
461 pub fn take_full_sample(
462 &mut self,
462 &mut self,
463 size: usize,
463 size: usize,
464 ) -> Result<Vec<Revision>, GraphError> {
464 ) -> Result<Vec<Revision>, GraphError> {
465 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
465 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
466 let size = if self.respect_size {
466 let size = if self.respect_size {
467 size
467 size
468 } else {
468 } else {
469 prescribed_size
469 prescribed_size
470 };
470 };
471 let mut sample =
471 let mut sample =
472 self.limit_sample(sample_set.into_iter().collect(), size);
472 self.limit_sample(sample_set.into_iter().collect(), size);
473 self.random_complete_sample(&mut sample, size);
473 self.random_complete_sample(&mut sample, size);
474 Ok(sample)
474 Ok(sample)
475 }
475 }
476 }
476 }
477
477
478 #[cfg(test)]
478 #[cfg(test)]
479 mod tests {
479 mod tests {
480 use super::*;
480 use super::*;
481 use crate::testing::SampleGraph;
481 use crate::testing::SampleGraph;
482
482
483 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
483 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
484 ///
484 ///
485 /// To avoid actual randomness in these tests, we give it a fixed
485 /// To avoid actual randomness in these tests, we give it a fixed
486 /// random seed, but by default we'll test the random version.
486 /// random seed, but by default we'll test the random version.
487 fn full_disco() -> PartialDiscovery<SampleGraph> {
487 fn full_disco() -> PartialDiscovery<SampleGraph> {
488 PartialDiscovery::new_with_seed(
488 PartialDiscovery::new_with_seed(
489 SampleGraph,
489 SampleGraph,
490 vec![10, 11, 12, 13],
490 vec![10, 11, 12, 13],
491 [0; 16],
491 [0; 16],
492 true,
492 true,
493 true,
493 true,
494 )
494 )
495 }
495 }
496
496
497 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
497 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
498 ///
498 ///
499 /// To avoid actual randomness in tests, we give it a fixed random seed.
499 /// To avoid actual randomness in tests, we give it a fixed random seed.
500 fn disco12() -> PartialDiscovery<SampleGraph> {
500 fn disco12() -> PartialDiscovery<SampleGraph> {
501 PartialDiscovery::new_with_seed(
501 PartialDiscovery::new_with_seed(
502 SampleGraph,
502 SampleGraph,
503 vec![12],
503 vec![12],
504 [0; 16],
504 [0; 16],
505 true,
505 true,
506 true,
506 true,
507 )
507 )
508 }
508 }
509
509
510 fn sorted_undecided(
510 fn sorted_undecided(
511 disco: &PartialDiscovery<SampleGraph>,
511 disco: &PartialDiscovery<SampleGraph>,
512 ) -> Vec<Revision> {
512 ) -> Vec<Revision> {
513 let mut as_vec: Vec<Revision> =
513 let mut as_vec: Vec<Revision> =
514 disco.undecided.as_ref().unwrap().iter().cloned().collect();
514 disco.undecided.as_ref().unwrap().iter().cloned().collect();
515 as_vec.sort();
515 as_vec.sort();
516 as_vec
516 as_vec
517 }
517 }
518
518
519 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
519 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
520 let mut as_vec: Vec<Revision> =
520 let mut as_vec: Vec<Revision> =
521 disco.missing.iter().cloned().collect();
521 disco.missing.iter().cloned().collect();
522 as_vec.sort();
522 as_vec.sort();
523 as_vec
523 as_vec
524 }
524 }
525
525
526 fn sorted_common_heads(
526 fn sorted_common_heads(
527 disco: &PartialDiscovery<SampleGraph>,
527 disco: &PartialDiscovery<SampleGraph>,
528 ) -> Result<Vec<Revision>, GraphError> {
528 ) -> Result<Vec<Revision>, GraphError> {
529 let mut as_vec: Vec<Revision> =
529 let mut as_vec: Vec<Revision> =
530 disco.common_heads()?.iter().cloned().collect();
530 disco.common_heads()?.iter().cloned().collect();
531 as_vec.sort();
531 as_vec.sort();
532 Ok(as_vec)
532 Ok(as_vec)
533 }
533 }
534
534
535 #[test]
535 #[test]
536 fn test_add_common_get_undecided() -> Result<(), GraphError> {
536 fn test_add_common_get_undecided() -> Result<(), GraphError> {
537 let mut disco = full_disco();
537 let mut disco = full_disco();
538 assert_eq!(disco.undecided, None);
538 assert_eq!(disco.undecided, None);
539 assert!(!disco.has_info());
539 assert!(!disco.has_info());
540 assert_eq!(disco.stats().undecided, None);
540 assert_eq!(disco.stats().undecided, None);
541
541
542 disco.add_common_revisions(vec![11, 12])?;
542 disco.add_common_revisions(vec![11, 12])?;
543 assert!(disco.has_info());
543 assert!(disco.has_info());
544 assert!(!disco.is_complete());
544 assert!(!disco.is_complete());
545 assert!(disco.missing.is_empty());
545 assert!(disco.missing.is_empty());
546
546
547 // add_common_revisions did not trigger a premature computation
547 // add_common_revisions did not trigger a premature computation
548 // of `undecided`, let's check that and ask for them
548 // of `undecided`, let's check that and ask for them
549 assert_eq!(disco.undecided, None);
549 assert_eq!(disco.undecided, None);
550 disco.ensure_undecided()?;
550 disco.ensure_undecided()?;
551 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
551 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
552 assert_eq!(disco.stats().undecided, Some(4));
552 assert_eq!(disco.stats().undecided, Some(4));
553 Ok(())
553 Ok(())
554 }
554 }
555
555
556 /// in this test, we pretend that our peer misses exactly (8+10)::
556 /// in this test, we pretend that our peer misses exactly (8+10)::
557 /// and we're comparing all our repo to it (as in a bare push)
557 /// and we're comparing all our repo to it (as in a bare push)
558 #[test]
558 #[test]
559 fn test_discovery() -> Result<(), GraphError> {
559 fn test_discovery() -> Result<(), GraphError> {
560 let mut disco = full_disco();
560 let mut disco = full_disco();
561 disco.add_common_revisions(vec![11, 12])?;
561 disco.add_common_revisions(vec![11, 12])?;
562 disco.add_missing_revisions(vec![8, 10])?;
562 disco.add_missing_revisions(vec![8, 10])?;
563 assert_eq!(sorted_undecided(&disco), vec![5]);
563 assert_eq!(sorted_undecided(&disco), vec![5]);
564 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
564 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
565 assert!(!disco.is_complete());
565 assert!(!disco.is_complete());
566
566
567 disco.add_common_revisions(vec![5])?;
567 disco.add_common_revisions(vec![5])?;
568 assert_eq!(sorted_undecided(&disco), vec![]);
568 assert_eq!(sorted_undecided(&disco), vec![]);
569 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
569 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
570 assert!(disco.is_complete());
570 assert!(disco.is_complete());
571 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
571 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
572 Ok(())
572 Ok(())
573 }
573 }
574
574
575 #[test]
575 #[test]
576 fn test_add_missing_early_continue() -> Result<(), GraphError> {
576 fn test_add_missing_early_continue() -> Result<(), GraphError> {
577 eprintln!("test_add_missing_early_stop");
577 eprintln!("test_add_missing_early_stop");
578 let mut disco = full_disco();
578 let mut disco = full_disco();
579 disco.add_common_revisions(vec![13, 3, 4])?;
579 disco.add_common_revisions(vec![13, 3, 4])?;
580 disco.ensure_children_cache()?;
580 disco.ensure_children_cache()?;
581 // 12 is grand-child of 6 through 9
581 // 12 is grand-child of 6 through 9
582 // passing them in this order maximizes the chances of the
582 // passing them in this order maximizes the chances of the
583 // early continue to do the wrong thing
583 // early continue to do the wrong thing
584 disco.add_missing_revisions(vec![6, 9, 12])?;
584 disco.add_missing_revisions(vec![6, 9, 12])?;
585 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
585 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
586 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
586 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
587 assert!(!disco.is_complete());
587 assert!(!disco.is_complete());
588 Ok(())
588 Ok(())
589 }
589 }
590
590
591 #[test]
591 #[test]
592 fn test_limit_sample_no_need_to() {
592 fn test_limit_sample_no_need_to() {
593 let sample = vec![1, 2, 3, 4];
593 let sample = vec![1, 2, 3, 4];
594 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
594 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
595 }
595 }
596
596
597 #[test]
597 #[test]
598 fn test_limit_sample_less_than_half() {
598 fn test_limit_sample_less_than_half() {
599 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
599 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
600 }
600 }
601
601
602 #[test]
602 #[test]
603 fn test_limit_sample_more_than_half() {
603 fn test_limit_sample_more_than_half() {
604 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
604 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
605 }
605 }
606
606
607 #[test]
607 #[test]
608 fn test_limit_sample_no_random() {
608 fn test_limit_sample_no_random() {
609 let mut disco = full_disco();
609 let mut disco = full_disco();
610 disco.randomize = false;
610 disco.randomize = false;
611 assert_eq!(
611 assert_eq!(
612 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
612 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
613 vec![1, 3, 5, 7]
613 vec![1, 3, 5, 7]
614 );
614 );
615 }
615 }
616
616
617 #[test]
617 #[test]
618 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
618 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
619 let mut disco = full_disco();
619 let mut disco = full_disco();
620 disco.undecided = Some((1..=13).collect());
620 disco.undecided = Some((1..=13).collect());
621
621
622 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
622 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
623 sample_vec.sort();
623 sample_vec.sort();
624 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
624 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
625 Ok(())
625 Ok(())
626 }
626 }
627
627
628 #[test]
628 #[test]
629 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
629 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
630 let mut disco = disco12();
630 let mut disco = disco12();
631 disco.ensure_undecided()?;
631 disco.ensure_undecided()?;
632
632
633 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
633 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
634 sample_vec.sort();
634 sample_vec.sort();
635 // r12's only parent is r9, whose unique grand-parent through the
635 // r12's only parent is r9, whose unique grand-parent through the
636 // diamond shape is r4. This ends there because the distance from r4
636 // diamond shape is r4. This ends there because the distance from r4
637 // to the root is only 3.
637 // to the root is only 3.
638 assert_eq!(sample_vec, vec![4, 9, 12]);
638 assert_eq!(sample_vec, vec![4, 9, 12]);
639 Ok(())
639 Ok(())
640 }
640 }
641
641
642 #[test]
642 #[test]
643 fn test_children_cache() -> Result<(), GraphError> {
643 fn test_children_cache() -> Result<(), GraphError> {
644 let mut disco = full_disco();
644 let mut disco = full_disco();
645 disco.ensure_children_cache()?;
645 disco.ensure_children_cache()?;
646
646
647 let cache = disco.children_cache.unwrap();
647 let cache = disco.children_cache.unwrap();
648 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
648 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
649 assert_eq!(cache.get(&10).cloned(), None);
649 assert_eq!(cache.get(&10).cloned(), None);
650
650
651 let mut children_4 = cache.get(&4).cloned().unwrap();
651 let mut children_4 = cache.get(&4).cloned().unwrap();
652 children_4.sort();
652 children_4.sort();
653 assert_eq!(children_4, vec![5, 6, 7]);
653 assert_eq!(children_4, vec![5, 6, 7]);
654
654
655 let mut children_7 = cache.get(&7).cloned().unwrap();
655 let mut children_7 = cache.get(&7).cloned().unwrap();
656 children_7.sort();
656 children_7.sort();
657 assert_eq!(children_7, vec![9, 11]);
657 assert_eq!(children_7, vec![9, 11]);
658
658
659 Ok(())
659 Ok(())
660 }
660 }
661
661
662 #[test]
662 #[test]
663 fn test_complete_sample() {
663 fn test_complete_sample() {
664 let mut disco = full_disco();
664 let mut disco = full_disco();
665 let undecided: HashSet<Revision> =
665 let undecided: HashSet<Revision> =
666 [4, 7, 9, 2, 3].iter().cloned().collect();
666 [4, 7, 9, 2, 3].iter().cloned().collect();
667 disco.undecided = Some(undecided);
667 disco.undecided = Some(undecided);
668
668
669 let mut sample = vec![0];
669 let mut sample = vec![0];
670 disco.random_complete_sample(&mut sample, 3);
670 disco.random_complete_sample(&mut sample, 3);
671 assert_eq!(sample.len(), 3);
671 assert_eq!(sample.len(), 3);
672
672
673 let mut sample = vec![2, 4, 7];
673 let mut sample = vec![2, 4, 7];
674 disco.random_complete_sample(&mut sample, 1);
674 disco.random_complete_sample(&mut sample, 1);
675 assert_eq!(sample.len(), 3);
675 assert_eq!(sample.len(), 3);
676 }
676 }
677
677
678 #[test]
678 #[test]
679 fn test_bidirectional_sample() -> Result<(), GraphError> {
679 fn test_bidirectional_sample() -> Result<(), GraphError> {
680 let mut disco = full_disco();
680 let mut disco = full_disco();
681 disco.undecided = Some((0..=13).into_iter().collect());
681 disco.undecided = Some((0..=13).into_iter().collect());
682
682
683 let (sample_set, size) = disco.bidirectional_sample(7)?;
683 let (sample_set, size) = disco.bidirectional_sample(7)?;
684 assert_eq!(size, 7);
684 assert_eq!(size, 7);
685 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
685 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
686 sample.sort();
686 sample.sort();
687 // our DAG is a bit too small for the results to be really interesting
687 // our DAG is a bit too small for the results to be really interesting
688 // at least it shows that
688 // at least it shows that
689 // - we went both ways
689 // - we went both ways
690 // - we didn't take all Revisions (6 is not in the sample)
690 // - we didn't take all Revisions (6 is not in the sample)
691 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
691 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
692 Ok(())
692 Ok(())
693 }
693 }
694
695 }
694 }
@@ -1,382 +1,383 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{
11 utils::{files::get_path_from_bytes, SliceExt},
11 utils::{files::get_path_from_bytes, SliceExt},
12 LineNumber, PatternError, PatternFileError,
12 LineNumber, PatternError, PatternFileError,
13 };
13 };
14 use lazy_static::lazy_static;
14 use lazy_static::lazy_static;
15 use regex::bytes::{NoExpand, Regex};
15 use regex::bytes::{NoExpand, Regex};
16 use std::collections::HashMap;
16 use std::collections::HashMap;
17 use std::fs::File;
17 use std::fs::File;
18 use std::io::Read;
18 use std::io::Read;
19 use std::vec::Vec;
19 use std::vec::Vec;
20
20
21 lazy_static! {
21 lazy_static! {
22 static ref RE_ESCAPE: Vec<Vec<u8>> = {
22 static ref RE_ESCAPE: Vec<Vec<u8>> = {
23 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
23 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
24 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
24 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
25 for byte in to_escape {
25 for byte in to_escape {
26 v[*byte as usize].insert(0, b'\\');
26 v[*byte as usize].insert(0, b'\\');
27 }
27 }
28 v
28 v
29 };
29 };
30 }
30 }
31
31
32 /// These are matched in order
32 /// These are matched in order
33 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
33 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
34 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
34 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
35
35
36 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
36 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
37 pub enum PatternSyntax {
37 pub enum PatternSyntax {
38 Regexp,
38 Regexp,
39 /// Glob that matches at the front of the path
39 /// Glob that matches at the front of the path
40 RootGlob,
40 RootGlob,
41 /// Glob that matches at any suffix of the path (still anchored at slashes)
41 /// Glob that matches at any suffix of the path (still anchored at
42 /// slashes)
42 Glob,
43 Glob,
43 Path,
44 Path,
44 RelPath,
45 RelPath,
45 RelGlob,
46 RelGlob,
46 RelRegexp,
47 RelRegexp,
47 RootFiles,
48 RootFiles,
48 }
49 }
49
50
50 /// Transforms a glob pattern into a regex
51 /// Transforms a glob pattern into a regex
51 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
52 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
52 let mut input = pat;
53 let mut input = pat;
53 let mut res: Vec<u8> = vec![];
54 let mut res: Vec<u8> = vec![];
54 let mut group_depth = 0;
55 let mut group_depth = 0;
55
56
56 while let Some((c, rest)) = input.split_first() {
57 while let Some((c, rest)) = input.split_first() {
57 input = rest;
58 input = rest;
58
59
59 match c {
60 match c {
60 b'*' => {
61 b'*' => {
61 for (source, repl) in GLOB_REPLACEMENTS {
62 for (source, repl) in GLOB_REPLACEMENTS {
62 if input.starts_with(source) {
63 if input.starts_with(source) {
63 input = &input[source.len()..];
64 input = &input[source.len()..];
64 res.extend(*repl);
65 res.extend(*repl);
65 break;
66 break;
66 }
67 }
67 }
68 }
68 }
69 }
69 b'?' => res.extend(b"."),
70 b'?' => res.extend(b"."),
70 b'[' => {
71 b'[' => {
71 match input.iter().skip(1).position(|b| *b == b']') {
72 match input.iter().skip(1).position(|b| *b == b']') {
72 None => res.extend(b"\\["),
73 None => res.extend(b"\\["),
73 Some(end) => {
74 Some(end) => {
74 // Account for the one we skipped
75 // Account for the one we skipped
75 let end = end + 1;
76 let end = end + 1;
76
77
77 res.extend(b"[");
78 res.extend(b"[");
78
79
79 for (i, b) in input[..end].iter().enumerate() {
80 for (i, b) in input[..end].iter().enumerate() {
80 if *b == b'!' && i == 0 {
81 if *b == b'!' && i == 0 {
81 res.extend(b"^")
82 res.extend(b"^")
82 } else if *b == b'^' && i == 0 {
83 } else if *b == b'^' && i == 0 {
83 res.extend(b"\\^")
84 res.extend(b"\\^")
84 } else if *b == b'\\' {
85 } else if *b == b'\\' {
85 res.extend(b"\\\\")
86 res.extend(b"\\\\")
86 } else {
87 } else {
87 res.push(*b)
88 res.push(*b)
88 }
89 }
89 }
90 }
90 res.extend(b"]");
91 res.extend(b"]");
91 input = &input[end + 1..];
92 input = &input[end + 1..];
92 }
93 }
93 }
94 }
94 }
95 }
95 b'{' => {
96 b'{' => {
96 group_depth += 1;
97 group_depth += 1;
97 res.extend(b"(?:")
98 res.extend(b"(?:")
98 }
99 }
99 b'}' if group_depth > 0 => {
100 b'}' if group_depth > 0 => {
100 group_depth -= 1;
101 group_depth -= 1;
101 res.extend(b")");
102 res.extend(b")");
102 }
103 }
103 b',' if group_depth > 0 => res.extend(b"|"),
104 b',' if group_depth > 0 => res.extend(b"|"),
104 b'\\' => {
105 b'\\' => {
105 let c = {
106 let c = {
106 if let Some((c, rest)) = input.split_first() {
107 if let Some((c, rest)) = input.split_first() {
107 input = rest;
108 input = rest;
108 c
109 c
109 } else {
110 } else {
110 c
111 c
111 }
112 }
112 };
113 };
113 res.extend(&RE_ESCAPE[*c as usize])
114 res.extend(&RE_ESCAPE[*c as usize])
114 }
115 }
115 _ => res.extend(&RE_ESCAPE[*c as usize]),
116 _ => res.extend(&RE_ESCAPE[*c as usize]),
116 }
117 }
117 }
118 }
118 res
119 res
119 }
120 }
120
121
121 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
122 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
122 pattern
123 pattern
123 .iter()
124 .iter()
124 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
125 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
125 .collect()
126 .collect()
126 }
127 }
127
128
128 fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
129 fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
129 match kind {
130 match kind {
130 b"re" => Ok(PatternSyntax::Regexp),
131 b"re" => Ok(PatternSyntax::Regexp),
131 b"path" => Ok(PatternSyntax::Path),
132 b"path" => Ok(PatternSyntax::Path),
132 b"relpath" => Ok(PatternSyntax::RelPath),
133 b"relpath" => Ok(PatternSyntax::RelPath),
133 b"rootfilesin" => Ok(PatternSyntax::RootFiles),
134 b"rootfilesin" => Ok(PatternSyntax::RootFiles),
134 b"relglob" => Ok(PatternSyntax::RelGlob),
135 b"relglob" => Ok(PatternSyntax::RelGlob),
135 b"relre" => Ok(PatternSyntax::RelRegexp),
136 b"relre" => Ok(PatternSyntax::RelRegexp),
136 b"glob" => Ok(PatternSyntax::Glob),
137 b"glob" => Ok(PatternSyntax::Glob),
137 b"rootglob" => Ok(PatternSyntax::RootGlob),
138 b"rootglob" => Ok(PatternSyntax::RootGlob),
138 _ => Err(PatternError::UnsupportedSyntax(
139 _ => Err(PatternError::UnsupportedSyntax(
139 String::from_utf8_lossy(kind).to_string(),
140 String::from_utf8_lossy(kind).to_string(),
140 )),
141 )),
141 }
142 }
142 }
143 }
143
144
144 /// Builds the regex that corresponds to the given pattern.
145 /// Builds the regex that corresponds to the given pattern.
145 /// If within a `syntax: regexp` context, returns the pattern,
146 /// If within a `syntax: regexp` context, returns the pattern,
146 /// otherwise, returns the corresponding regex.
147 /// otherwise, returns the corresponding regex.
147 fn _build_single_regex(
148 fn _build_single_regex(
148 syntax: PatternSyntax,
149 syntax: PatternSyntax,
149 pattern: &[u8],
150 pattern: &[u8],
150 globsuffix: &[u8],
151 globsuffix: &[u8],
151 ) -> Vec<u8> {
152 ) -> Vec<u8> {
152 if pattern.is_empty() {
153 if pattern.is_empty() {
153 return vec![];
154 return vec![];
154 }
155 }
155 match syntax {
156 match syntax {
156 PatternSyntax::Regexp => pattern.to_owned(),
157 PatternSyntax::Regexp => pattern.to_owned(),
157 PatternSyntax::RelRegexp => {
158 PatternSyntax::RelRegexp => {
158 if pattern[0] == b'^' {
159 if pattern[0] == b'^' {
159 return pattern.to_owned();
160 return pattern.to_owned();
160 }
161 }
161 let mut res = b".*".to_vec();
162 let mut res = b".*".to_vec();
162 res.extend(pattern);
163 res.extend(pattern);
163 res
164 res
164 }
165 }
165 PatternSyntax::Path | PatternSyntax::RelPath => {
166 PatternSyntax::Path | PatternSyntax::RelPath => {
166 if pattern == b"." {
167 if pattern == b"." {
167 return vec![];
168 return vec![];
168 }
169 }
169 let mut pattern = escape_pattern(pattern);
170 let mut pattern = escape_pattern(pattern);
170 pattern.extend(b"(?:/|$)");
171 pattern.extend(b"(?:/|$)");
171 pattern
172 pattern
172 }
173 }
173 PatternSyntax::RootFiles => {
174 PatternSyntax::RootFiles => {
174 let mut res = if pattern == b"." {
175 let mut res = if pattern == b"." {
175 vec![]
176 vec![]
176 } else {
177 } else {
177 // Pattern is a directory name.
178 // Pattern is a directory name.
178 let mut as_vec: Vec<u8> = escape_pattern(pattern);
179 let mut as_vec: Vec<u8> = escape_pattern(pattern);
179 as_vec.push(b'/');
180 as_vec.push(b'/');
180 as_vec
181 as_vec
181 };
182 };
182
183
183 // Anything after the pattern must be a non-directory.
184 // Anything after the pattern must be a non-directory.
184 res.extend(b"[^/]+$");
185 res.extend(b"[^/]+$");
185 res
186 res
186 }
187 }
187 PatternSyntax::Glob
188 PatternSyntax::Glob
188 | PatternSyntax::RelGlob
189 | PatternSyntax::RelGlob
189 | PatternSyntax::RootGlob => {
190 | PatternSyntax::RootGlob => {
190 let mut res: Vec<u8> = vec![];
191 let mut res: Vec<u8> = vec![];
191 if syntax == PatternSyntax::RelGlob {
192 if syntax == PatternSyntax::RelGlob {
192 res.extend(b"(?:|.*/)");
193 res.extend(b"(?:|.*/)");
193 }
194 }
194
195
195 res.extend(glob_to_re(pattern));
196 res.extend(glob_to_re(pattern));
196 res.extend(globsuffix.iter());
197 res.extend(globsuffix.iter());
197 res
198 res
198 }
199 }
199 }
200 }
200 }
201 }
201
202
202 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
203 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
203 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
204 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
204
205
205 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
206 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
206 /// that don't need to be transformed into a regex.
207 /// that don't need to be transformed into a regex.
207 pub fn build_single_regex(
208 pub fn build_single_regex(
208 kind: &[u8],
209 kind: &[u8],
209 pat: &[u8],
210 pat: &[u8],
210 globsuffix: &[u8],
211 globsuffix: &[u8],
211 ) -> Result<Vec<u8>, PatternError> {
212 ) -> Result<Vec<u8>, PatternError> {
212 let enum_kind = parse_pattern_syntax(kind)?;
213 let enum_kind = parse_pattern_syntax(kind)?;
213 if enum_kind == PatternSyntax::RootGlob
214 if enum_kind == PatternSyntax::RootGlob
214 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
215 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
215 {
216 {
216 let mut escaped = escape_pattern(pat);
217 let mut escaped = escape_pattern(pat);
217 escaped.extend(b"(?:/|$)");
218 escaped.extend(b"(?:/|$)");
218 Ok(escaped)
219 Ok(escaped)
219 } else {
220 } else {
220 Ok(_build_single_regex(enum_kind, pat, globsuffix))
221 Ok(_build_single_regex(enum_kind, pat, globsuffix))
221 }
222 }
222 }
223 }
223
224
224 lazy_static! {
225 lazy_static! {
225 static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
226 static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
226 let mut m = HashMap::new();
227 let mut m = HashMap::new();
227
228
228 m.insert(b"re".as_ref(), b"relre:".as_ref());
229 m.insert(b"re".as_ref(), b"relre:".as_ref());
229 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
230 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
230 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
231 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
231 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
232 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
232 m.insert(b"include".as_ref(), b"include".as_ref());
233 m.insert(b"include".as_ref(), b"include".as_ref());
233 m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
234 m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
234 m
235 m
235 };
236 };
236 }
237 }
237
238
238 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
239 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
239 type WarningTuple = (Vec<u8>, Vec<u8>);
240 type WarningTuple = (Vec<u8>, Vec<u8>);
240
241
241 pub fn parse_pattern_file_contents(
242 pub fn parse_pattern_file_contents(
242 lines: &[u8],
243 lines: &[u8],
243 file_path: &[u8],
244 file_path: &[u8],
244 warn: bool,
245 warn: bool,
245 ) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
246 ) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
246 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
247 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
247 let comment_escape_regex = Regex::new(r"\\#").unwrap();
248 let comment_escape_regex = Regex::new(r"\\#").unwrap();
248 let mut inputs: Vec<PatternTuple> = vec![];
249 let mut inputs: Vec<PatternTuple> = vec![];
249 let mut warnings: Vec<WarningTuple> = vec![];
250 let mut warnings: Vec<WarningTuple> = vec![];
250
251
251 let mut current_syntax = b"relre:".as_ref();
252 let mut current_syntax = b"relre:".as_ref();
252
253
253 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
254 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
254 let line_number = line_number + 1;
255 let line_number = line_number + 1;
255
256
256 let line_buf;
257 let line_buf;
257 if line.contains(&b'#') {
258 if line.contains(&b'#') {
258 if let Some(cap) = comment_regex.captures(line) {
259 if let Some(cap) = comment_regex.captures(line) {
259 line = &line[..cap.get(1).unwrap().end()]
260 line = &line[..cap.get(1).unwrap().end()]
260 }
261 }
261 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
262 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
262 line = &line_buf;
263 line = &line_buf;
263 }
264 }
264
265
265 let mut line = line.trim_end();
266 let mut line = line.trim_end();
266
267
267 if line.is_empty() {
268 if line.is_empty() {
268 continue;
269 continue;
269 }
270 }
270
271
271 if line.starts_with(b"syntax:") {
272 if line.starts_with(b"syntax:") {
272 let syntax = line[b"syntax:".len()..].trim();
273 let syntax = line[b"syntax:".len()..].trim();
273
274
274 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
275 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
275 current_syntax = rel_syntax;
276 current_syntax = rel_syntax;
276 } else if warn {
277 } else if warn {
277 warnings.push((file_path.to_owned(), syntax.to_owned()));
278 warnings.push((file_path.to_owned(), syntax.to_owned()));
278 }
279 }
279 continue;
280 continue;
280 }
281 }
281
282
282 let mut line_syntax: &[u8] = &current_syntax;
283 let mut line_syntax: &[u8] = &current_syntax;
283
284
284 for (s, rels) in SYNTAXES.iter() {
285 for (s, rels) in SYNTAXES.iter() {
285 if line.starts_with(rels) {
286 if line.starts_with(rels) {
286 line_syntax = rels;
287 line_syntax = rels;
287 line = &line[rels.len()..];
288 line = &line[rels.len()..];
288 break;
289 break;
289 } else if line.starts_with(&[s, b":".as_ref()].concat()) {
290 } else if line.starts_with(&[s, b":".as_ref()].concat()) {
290 line_syntax = rels;
291 line_syntax = rels;
291 line = &line[s.len() + 1..];
292 line = &line[s.len() + 1..];
292 break;
293 break;
293 }
294 }
294 }
295 }
295
296
296 inputs.push((
297 inputs.push((
297 [line_syntax, line].concat(),
298 [line_syntax, line].concat(),
298 line_number,
299 line_number,
299 line.to_owned(),
300 line.to_owned(),
300 ));
301 ));
301 }
302 }
302 (inputs, warnings)
303 (inputs, warnings)
303 }
304 }
304
305
305 pub fn read_pattern_file(
306 pub fn read_pattern_file(
306 file_path: &[u8],
307 file_path: &[u8],
307 warn: bool,
308 warn: bool,
308 ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
309 ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
309 let mut f = File::open(get_path_from_bytes(file_path))?;
310 let mut f = File::open(get_path_from_bytes(file_path))?;
310 let mut contents = Vec::new();
311 let mut contents = Vec::new();
311
312
312 f.read_to_end(&mut contents)?;
313 f.read_to_end(&mut contents)?;
313
314
314 Ok(parse_pattern_file_contents(&contents, file_path, warn))
315 Ok(parse_pattern_file_contents(&contents, file_path, warn))
315 }
316 }
316
317
317 #[cfg(test)]
318 #[cfg(test)]
318 mod tests {
319 mod tests {
319 use super::*;
320 use super::*;
320
321
321 #[test]
322 #[test]
322 fn escape_pattern_test() {
323 fn escape_pattern_test() {
323 let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
324 let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
324 assert_eq!(escape_pattern(untouched), untouched.to_vec());
325 assert_eq!(escape_pattern(untouched), untouched.to_vec());
325 // All escape codes
326 // All escape codes
326 assert_eq!(
327 assert_eq!(
327 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
328 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
328 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
329 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
329 .to_vec()
330 .to_vec()
330 );
331 );
331 }
332 }
332
333
333 #[test]
334 #[test]
334 fn glob_test() {
335 fn glob_test() {
335 assert_eq!(glob_to_re(br#"?"#), br#"."#);
336 assert_eq!(glob_to_re(br#"?"#), br#"."#);
336 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
337 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
337 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
338 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
338 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
339 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
339 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
340 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
340 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
341 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
341 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
342 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
342 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
343 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
343 }
344 }
344
345
345 #[test]
346 #[test]
346 fn test_parse_pattern_file_contents() {
347 fn test_parse_pattern_file_contents() {
347 let lines = b"syntax: glob\n*.elc";
348 let lines = b"syntax: glob\n*.elc";
348
349
349 assert_eq!(
350 assert_eq!(
350 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
351 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
351 parse_pattern_file_contents(lines, b"file_path", false).0,
352 parse_pattern_file_contents(lines, b"file_path", false).0,
352 );
353 );
353
354
354 let lines = b"syntax: include\nsyntax: glob";
355 let lines = b"syntax: include\nsyntax: glob";
355
356
356 assert_eq!(
357 assert_eq!(
357 parse_pattern_file_contents(lines, b"file_path", false).0,
358 parse_pattern_file_contents(lines, b"file_path", false).0,
358 vec![]
359 vec![]
359 );
360 );
360 let lines = b"glob:**.o";
361 let lines = b"glob:**.o";
361 assert_eq!(
362 assert_eq!(
362 parse_pattern_file_contents(lines, b"file_path", false).0,
363 parse_pattern_file_contents(lines, b"file_path", false).0,
363 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
364 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
364 );
365 );
365 }
366 }
366
367
367 #[test]
368 #[test]
368 fn test_build_single_regex_shortcut() {
369 fn test_build_single_regex_shortcut() {
369 assert_eq!(
370 assert_eq!(
370 br"(?:/|$)".to_vec(),
371 br"(?:/|$)".to_vec(),
371 build_single_regex(b"rootglob", b"", b"").unwrap()
372 build_single_regex(b"rootglob", b"", b"").unwrap()
372 );
373 );
373 assert_eq!(
374 assert_eq!(
374 br"whatever(?:/|$)".to_vec(),
375 br"whatever(?:/|$)".to_vec(),
375 build_single_regex(b"rootglob", b"whatever", b"").unwrap()
376 build_single_regex(b"rootglob", b"whatever", b"").unwrap()
376 );
377 );
377 assert_eq!(
378 assert_eq!(
378 br"[^/]*\.o".to_vec(),
379 br"[^/]*\.o".to_vec(),
379 build_single_regex(b"rootglob", b"*.o", b"").unwrap()
380 build_single_regex(b"rootglob", b"*.o", b"").unwrap()
380 );
381 );
381 }
382 }
382 }
383 }
@@ -1,84 +1,83 b''
1 // utils module
1 // utils module
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Contains useful functions, traits, structs, etc. for use in core.
8 //! Contains useful functions, traits, structs, etc. for use in core.
9
9
10 pub mod files;
10 pub mod files;
11
11
12 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
12 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
13 ///
13 ///
14 /// # Examples
14 /// # Examples
15 ///
15 ///
16 /// ```
16 /// ```
17 /// use crate::hg::utils::replace_slice;
17 /// use crate::hg::utils::replace_slice;
18 /// let mut line = b"I hate writing tests!".to_vec();
18 /// let mut line = b"I hate writing tests!".to_vec();
19 /// replace_slice(&mut line, b"hate", b"love");
19 /// replace_slice(&mut line, b"hate", b"love");
20 /// assert_eq!(
20 /// assert_eq!(
21 /// line,
21 /// line,
22 /// b"I love writing tests!".to_vec()
22 /// b"I love writing tests!".to_vec()
23 ///);
23 /// );
24 ///
25 /// ```
24 /// ```
26 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
25 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
27 where
26 where
28 T: Clone + PartialEq,
27 T: Clone + PartialEq,
29 {
28 {
30 if buf.len() < from.len() || from.len() != to.len() {
29 if buf.len() < from.len() || from.len() != to.len() {
31 return;
30 return;
32 }
31 }
33 for i in 0..=buf.len() - from.len() {
32 for i in 0..=buf.len() - from.len() {
34 if buf[i..].starts_with(from) {
33 if buf[i..].starts_with(from) {
35 buf[i..(i + from.len())].clone_from_slice(to);
34 buf[i..(i + from.len())].clone_from_slice(to);
36 }
35 }
37 }
36 }
38 }
37 }
39
38
40 pub trait SliceExt {
39 pub trait SliceExt {
41 fn trim_end(&self) -> &Self;
40 fn trim_end(&self) -> &Self;
42 fn trim_start(&self) -> &Self;
41 fn trim_start(&self) -> &Self;
43 fn trim(&self) -> &Self;
42 fn trim(&self) -> &Self;
44 }
43 }
45
44
46 fn is_not_whitespace(c: &u8) -> bool {
45 fn is_not_whitespace(c: &u8) -> bool {
47 !(*c as char).is_whitespace()
46 !(*c as char).is_whitespace()
48 }
47 }
49
48
50 impl SliceExt for [u8] {
49 impl SliceExt for [u8] {
51 fn trim_end(&self) -> &[u8] {
50 fn trim_end(&self) -> &[u8] {
52 if let Some(last) = self.iter().rposition(is_not_whitespace) {
51 if let Some(last) = self.iter().rposition(is_not_whitespace) {
53 &self[..last + 1]
52 &self[..last + 1]
54 } else {
53 } else {
55 &[]
54 &[]
56 }
55 }
57 }
56 }
58 fn trim_start(&self) -> &[u8] {
57 fn trim_start(&self) -> &[u8] {
59 if let Some(first) = self.iter().position(is_not_whitespace) {
58 if let Some(first) = self.iter().position(is_not_whitespace) {
60 &self[first..]
59 &self[first..]
61 } else {
60 } else {
62 &[]
61 &[]
63 }
62 }
64 }
63 }
65
64
66 /// ```
65 /// ```
67 /// use hg::utils::SliceExt;
66 /// use hg::utils::SliceExt;
68 /// assert_eq!(
67 /// assert_eq!(
69 /// b" to trim ".trim(),
68 /// b" to trim ".trim(),
70 /// b"to trim"
69 /// b"to trim"
71 /// );
70 /// );
72 /// assert_eq!(
71 /// assert_eq!(
73 /// b"to trim ".trim(),
72 /// b"to trim ".trim(),
74 /// b"to trim"
73 /// b"to trim"
75 /// );
74 /// );
76 /// assert_eq!(
75 /// assert_eq!(
77 /// b" to trim".trim(),
76 /// b" to trim".trim(),
78 /// b"to trim"
77 /// b"to trim"
79 /// );
78 /// );
80 /// ```
79 /// ```
81 fn trim(&self) -> &[u8] {
80 fn trim(&self) -> &[u8] {
82 self.trim_start().trim_end()
81 self.trim_start().trim_end()
83 }
82 }
84 }
83 }
@@ -1,124 +1,125 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019, Georges Racinet <gracinet@anybox.fr>,
3 // Copyright 2019, Georges Racinet <gracinet@anybox.fr>,
4 // Raphaël Gomès <rgomes@octobus.net>
4 // Raphaël Gomès <rgomes@octobus.net>
5 //
5 //
6 // This software may be used and distributed according to the terms of the
6 // This software may be used and distributed according to the terms of the
7 // GNU General Public License version 2 or any later version.
7 // GNU General Public License version 2 or any later version.
8
8
9 //! Bindings for the `hg::filepatterns` module provided by the
9 //! Bindings for the `hg::filepatterns` module provided by the
10 //! `hg-core` crate. From Python, this will be seen as `rustext.filepatterns`
10 //! `hg-core` crate. From Python, this will be seen as `rustext.filepatterns`
11 //! and can be used as replacement for the the pure `filepatterns` Python module.
11 //! and can be used as replacement for the the pure `filepatterns` Python
12 //! module.
12 //!
13 //!
13 use crate::exceptions::{PatternError, PatternFileError};
14 use crate::exceptions::{PatternError, PatternFileError};
14 use cpython::{
15 use cpython::{
15 PyBytes, PyDict, PyModule, PyObject, PyResult, PyTuple, Python, ToPyObject,
16 PyBytes, PyDict, PyModule, PyObject, PyResult, PyTuple, Python, ToPyObject,
16 };
17 };
17 use hg::{build_single_regex, read_pattern_file, LineNumber, PatternTuple};
18 use hg::{build_single_regex, read_pattern_file, LineNumber, PatternTuple};
18
19
19 /// Rust does not like functions with different return signatures.
20 /// Rust does not like functions with different return signatures.
20 /// The 3-tuple version is always returned by the hg-core function,
21 /// The 3-tuple version is always returned by the hg-core function,
21 /// the (potential) conversion is handled at this level since it is not likely
22 /// the (potential) conversion is handled at this level since it is not likely
22 /// to have any measurable impact on performance.
23 /// to have any measurable impact on performance.
23 ///
24 ///
24 /// The Python implementation passes a function reference for `warn` instead
25 /// The Python implementation passes a function reference for `warn` instead
25 /// of a boolean that is used to emit warnings while parsing. The Rust
26 /// of a boolean that is used to emit warnings while parsing. The Rust
26 /// implementation chooses to accumulate the warnings and propagate them to
27 /// implementation chooses to accumulate the warnings and propagate them to
27 /// Python upon completion. See the `readpatternfile` function in `match.py`
28 /// Python upon completion. See the `readpatternfile` function in `match.py`
28 /// for more details.
29 /// for more details.
29 fn read_pattern_file_wrapper(
30 fn read_pattern_file_wrapper(
30 py: Python,
31 py: Python,
31 file_path: PyObject,
32 file_path: PyObject,
32 warn: bool,
33 warn: bool,
33 source_info: bool,
34 source_info: bool,
34 ) -> PyResult<PyTuple> {
35 ) -> PyResult<PyTuple> {
35 match read_pattern_file(file_path.extract::<PyBytes>(py)?.data(py), warn) {
36 match read_pattern_file(file_path.extract::<PyBytes>(py)?.data(py), warn) {
36 Ok((patterns, warnings)) => {
37 Ok((patterns, warnings)) => {
37 if source_info {
38 if source_info {
38 let itemgetter = |x: &PatternTuple| {
39 let itemgetter = |x: &PatternTuple| {
39 (PyBytes::new(py, &x.0), x.1, PyBytes::new(py, &x.2))
40 (PyBytes::new(py, &x.0), x.1, PyBytes::new(py, &x.2))
40 };
41 };
41 let results: Vec<(PyBytes, LineNumber, PyBytes)> =
42 let results: Vec<(PyBytes, LineNumber, PyBytes)> =
42 patterns.iter().map(itemgetter).collect();
43 patterns.iter().map(itemgetter).collect();
43 return Ok((results, warnings_to_py_bytes(py, &warnings))
44 return Ok((results, warnings_to_py_bytes(py, &warnings))
44 .to_py_object(py));
45 .to_py_object(py));
45 }
46 }
46 let itemgetter = |x: &PatternTuple| PyBytes::new(py, &x.0);
47 let itemgetter = |x: &PatternTuple| PyBytes::new(py, &x.0);
47 let results: Vec<PyBytes> =
48 let results: Vec<PyBytes> =
48 patterns.iter().map(itemgetter).collect();
49 patterns.iter().map(itemgetter).collect();
49 Ok(
50 Ok(
50 (results, warnings_to_py_bytes(py, &warnings))
51 (results, warnings_to_py_bytes(py, &warnings))
51 .to_py_object(py),
52 .to_py_object(py),
52 )
53 )
53 }
54 }
54 Err(e) => Err(PatternFileError::pynew(py, e)),
55 Err(e) => Err(PatternFileError::pynew(py, e)),
55 }
56 }
56 }
57 }
57
58
58 fn warnings_to_py_bytes(
59 fn warnings_to_py_bytes(
59 py: Python,
60 py: Python,
60 warnings: &[(Vec<u8>, Vec<u8>)],
61 warnings: &[(Vec<u8>, Vec<u8>)],
61 ) -> Vec<(PyBytes, PyBytes)> {
62 ) -> Vec<(PyBytes, PyBytes)> {
62 warnings
63 warnings
63 .iter()
64 .iter()
64 .map(|(path, syn)| (PyBytes::new(py, path), PyBytes::new(py, syn)))
65 .map(|(path, syn)| (PyBytes::new(py, path), PyBytes::new(py, syn)))
65 .collect()
66 .collect()
66 }
67 }
67
68
68 fn build_single_regex_wrapper(
69 fn build_single_regex_wrapper(
69 py: Python,
70 py: Python,
70 kind: PyObject,
71 kind: PyObject,
71 pat: PyObject,
72 pat: PyObject,
72 globsuffix: PyObject,
73 globsuffix: PyObject,
73 ) -> PyResult<PyBytes> {
74 ) -> PyResult<PyBytes> {
74 match build_single_regex(
75 match build_single_regex(
75 kind.extract::<PyBytes>(py)?.data(py),
76 kind.extract::<PyBytes>(py)?.data(py),
76 pat.extract::<PyBytes>(py)?.data(py),
77 pat.extract::<PyBytes>(py)?.data(py),
77 globsuffix.extract::<PyBytes>(py)?.data(py),
78 globsuffix.extract::<PyBytes>(py)?.data(py),
78 ) {
79 ) {
79 Ok(regex) => Ok(PyBytes::new(py, &regex)),
80 Ok(regex) => Ok(PyBytes::new(py, &regex)),
80 Err(e) => Err(PatternError::pynew(py, e)),
81 Err(e) => Err(PatternError::pynew(py, e)),
81 }
82 }
82 }
83 }
83
84
84 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
85 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
85 let dotted_name = &format!("{}.filepatterns", package);
86 let dotted_name = &format!("{}.filepatterns", package);
86 let m = PyModule::new(py, dotted_name)?;
87 let m = PyModule::new(py, dotted_name)?;
87
88
88 m.add(py, "__package__", package)?;
89 m.add(py, "__package__", package)?;
89 m.add(
90 m.add(
90 py,
91 py,
91 "__doc__",
92 "__doc__",
92 "Patterns files parsing - Rust implementation",
93 "Patterns files parsing - Rust implementation",
93 )?;
94 )?;
94 m.add(
95 m.add(
95 py,
96 py,
96 "build_single_regex",
97 "build_single_regex",
97 py_fn!(
98 py_fn!(
98 py,
99 py,
99 build_single_regex_wrapper(
100 build_single_regex_wrapper(
100 kind: PyObject,
101 kind: PyObject,
101 pat: PyObject,
102 pat: PyObject,
102 globsuffix: PyObject
103 globsuffix: PyObject
103 )
104 )
104 ),
105 ),
105 )?;
106 )?;
106 m.add(
107 m.add(
107 py,
108 py,
108 "read_pattern_file",
109 "read_pattern_file",
109 py_fn!(
110 py_fn!(
110 py,
111 py,
111 read_pattern_file_wrapper(
112 read_pattern_file_wrapper(
112 file_path: PyObject,
113 file_path: PyObject,
113 warn: bool,
114 warn: bool,
114 source_info: bool
115 source_info: bool
115 )
116 )
116 ),
117 ),
117 )?;
118 )?;
118 m.add(py, "PatternError", py.get_type::<PatternError>())?;
119 m.add(py, "PatternError", py.get_type::<PatternError>())?;
119 let sys = PyModule::import(py, "sys")?;
120 let sys = PyModule::import(py, "sys")?;
120 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
121 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
121 sys_modules.set_item(py, dotted_name, &m)?;
122 sys_modules.set_item(py, dotted_name, &m)?;
122
123
123 Ok(m)
124 Ok(m)
124 }
125 }
General Comments 0
You need to be logged in to leave comments. Login now