##// END OF EJS Templates
rust: do a clippy pass...
Raphaël Gomès -
r45500:26114bd6 default
parent child Browse files
Show More
@@ -1,787 +1,787
1 // ancestors.rs
1 // ancestors.rs
2 //
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9
9
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 use crate::dagops;
11 use crate::dagops;
12 use std::cmp::max;
12 use std::cmp::max;
13 use std::collections::{BinaryHeap, HashSet};
13 use std::collections::{BinaryHeap, HashSet};
14
14
15 /// Iterator over the ancestors of a given list of revisions
15 /// Iterator over the ancestors of a given list of revisions
16 /// This is a generic type, defined and implemented for any Graph, so that
16 /// This is a generic type, defined and implemented for any Graph, so that
17 /// it's easy to
17 /// it's easy to
18 ///
18 ///
19 /// - unit test in pure Rust
19 /// - unit test in pure Rust
20 /// - bind to main Mercurial code, potentially in several ways and have these
20 /// - bind to main Mercurial code, potentially in several ways and have these
21 /// bindings evolve over time
21 /// bindings evolve over time
22 pub struct AncestorsIterator<G: Graph> {
22 pub struct AncestorsIterator<G: Graph> {
23 graph: G,
23 graph: G,
24 visit: BinaryHeap<Revision>,
24 visit: BinaryHeap<Revision>,
25 seen: HashSet<Revision>,
25 seen: HashSet<Revision>,
26 stoprev: Revision,
26 stoprev: Revision,
27 }
27 }
28
28
29 /// Lazy ancestors set, backed by AncestorsIterator
29 /// Lazy ancestors set, backed by AncestorsIterator
30 pub struct LazyAncestors<G: Graph + Clone> {
30 pub struct LazyAncestors<G: Graph + Clone> {
31 graph: G,
31 graph: G,
32 containsiter: AncestorsIterator<G>,
32 containsiter: AncestorsIterator<G>,
33 initrevs: Vec<Revision>,
33 initrevs: Vec<Revision>,
34 stoprev: Revision,
34 stoprev: Revision,
35 inclusive: bool,
35 inclusive: bool,
36 }
36 }
37
37
38 pub struct MissingAncestors<G: Graph> {
38 pub struct MissingAncestors<G: Graph> {
39 graph: G,
39 graph: G,
40 bases: HashSet<Revision>,
40 bases: HashSet<Revision>,
41 max_base: Revision,
41 max_base: Revision,
42 }
42 }
43
43
44 impl<G: Graph> AncestorsIterator<G> {
44 impl<G: Graph> AncestorsIterator<G> {
45 /// Constructor.
45 /// Constructor.
46 ///
46 ///
47 /// if `inclusive` is true, then the init revisions are emitted in
47 /// if `inclusive` is true, then the init revisions are emitted in
48 /// particular, otherwise iteration starts from their parents.
48 /// particular, otherwise iteration starts from their parents.
49 pub fn new(
49 pub fn new(
50 graph: G,
50 graph: G,
51 initrevs: impl IntoIterator<Item = Revision>,
51 initrevs: impl IntoIterator<Item = Revision>,
52 stoprev: Revision,
52 stoprev: Revision,
53 inclusive: bool,
53 inclusive: bool,
54 ) -> Result<Self, GraphError> {
54 ) -> Result<Self, GraphError> {
55 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
55 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
56 if inclusive {
56 if inclusive {
57 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
57 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
58 let seen = visit.iter().map(|&x| x).collect();
58 let seen = visit.iter().cloned().collect();
59 return Ok(AncestorsIterator {
59 return Ok(AncestorsIterator {
60 visit: visit,
60 visit,
61 seen: seen,
61 seen,
62 stoprev: stoprev,
62 stoprev,
63 graph: graph,
63 graph,
64 });
64 });
65 }
65 }
66 let mut this = AncestorsIterator {
66 let mut this = AncestorsIterator {
67 visit: BinaryHeap::new(),
67 visit: BinaryHeap::new(),
68 seen: HashSet::new(),
68 seen: HashSet::new(),
69 stoprev: stoprev,
69 stoprev,
70 graph: graph,
70 graph,
71 };
71 };
72 this.seen.insert(NULL_REVISION);
72 this.seen.insert(NULL_REVISION);
73 for rev in filtered_initrevs {
73 for rev in filtered_initrevs {
74 for parent in this.graph.parents(rev)?.iter().cloned() {
74 for parent in this.graph.parents(rev)?.iter().cloned() {
75 this.conditionally_push_rev(parent);
75 this.conditionally_push_rev(parent);
76 }
76 }
77 }
77 }
78 Ok(this)
78 Ok(this)
79 }
79 }
80
80
81 #[inline]
81 #[inline]
82 fn conditionally_push_rev(&mut self, rev: Revision) {
82 fn conditionally_push_rev(&mut self, rev: Revision) {
83 if self.stoprev <= rev && self.seen.insert(rev) {
83 if self.stoprev <= rev && self.seen.insert(rev) {
84 self.visit.push(rev);
84 self.visit.push(rev);
85 }
85 }
86 }
86 }
87
87
88 /// Consumes partially the iterator to tell if the given target
88 /// Consumes partially the iterator to tell if the given target
89 /// revision
89 /// revision
90 /// is in the ancestors it emits.
90 /// is in the ancestors it emits.
91 /// This is meant for iterators actually dedicated to that kind of
91 /// This is meant for iterators actually dedicated to that kind of
92 /// purpose
92 /// purpose
93 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
93 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
94 if self.seen.contains(&target) && target != NULL_REVISION {
94 if self.seen.contains(&target) && target != NULL_REVISION {
95 return Ok(true);
95 return Ok(true);
96 }
96 }
97 for item in self {
97 for item in self {
98 let rev = item?;
98 let rev = item?;
99 if rev == target {
99 if rev == target {
100 return Ok(true);
100 return Ok(true);
101 }
101 }
102 if rev < target {
102 if rev < target {
103 return Ok(false);
103 return Ok(false);
104 }
104 }
105 }
105 }
106 Ok(false)
106 Ok(false)
107 }
107 }
108
108
109 pub fn peek(&self) -> Option<Revision> {
109 pub fn peek(&self) -> Option<Revision> {
110 self.visit.peek().map(|&r| r)
110 self.visit.peek().cloned()
111 }
111 }
112
112
113 /// Tell if the iterator is about an empty set
113 /// Tell if the iterator is about an empty set
114 ///
114 ///
115 /// The result does not depend whether the iterator has been consumed
115 /// The result does not depend whether the iterator has been consumed
116 /// or not.
116 /// or not.
117 /// This is mostly meant for iterators backing a lazy ancestors set
117 /// This is mostly meant for iterators backing a lazy ancestors set
118 pub fn is_empty(&self) -> bool {
118 pub fn is_empty(&self) -> bool {
119 if self.visit.len() > 0 {
119 if self.visit.len() > 0 {
120 return false;
120 return false;
121 }
121 }
122 if self.seen.len() > 1 {
122 if self.seen.len() > 1 {
123 return false;
123 return false;
124 }
124 }
125 // at this point, the seen set is at most a singleton.
125 // at this point, the seen set is at most a singleton.
126 // If not `self.inclusive`, it's still possible that it has only
126 // If not `self.inclusive`, it's still possible that it has only
127 // the null revision
127 // the null revision
128 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
128 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
129 }
129 }
130 }
130 }
131
131
132 /// Main implementation for the iterator
132 /// Main implementation for the iterator
133 ///
133 ///
134 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
134 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
135 /// with a few non crucial differences:
135 /// with a few non crucial differences:
136 ///
136 ///
137 /// - there's no filtering of invalid parent revisions. Actually, it should be
137 /// - there's no filtering of invalid parent revisions. Actually, it should be
138 /// consistent and more efficient to filter them from the end caller.
138 /// consistent and more efficient to filter them from the end caller.
139 /// - we don't have the optimization for adjacent revisions (i.e., the case
139 /// - we don't have the optimization for adjacent revisions (i.e., the case
140 /// where `p1 == rev - 1`), because it amounts to update the first element of
140 /// where `p1 == rev - 1`), because it amounts to update the first element of
141 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
141 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
142 /// - we save a few pushes by comparing with `stoprev` before pushing
142 /// - we save a few pushes by comparing with `stoprev` before pushing
143 impl<G: Graph> Iterator for AncestorsIterator<G> {
143 impl<G: Graph> Iterator for AncestorsIterator<G> {
144 type Item = Result<Revision, GraphError>;
144 type Item = Result<Revision, GraphError>;
145
145
146 fn next(&mut self) -> Option<Self::Item> {
146 fn next(&mut self) -> Option<Self::Item> {
147 let current = match self.visit.peek() {
147 let current = match self.visit.peek() {
148 None => {
148 None => {
149 return None;
149 return None;
150 }
150 }
151 Some(c) => *c,
151 Some(c) => *c,
152 };
152 };
153 let [p1, p2] = match self.graph.parents(current) {
153 let [p1, p2] = match self.graph.parents(current) {
154 Ok(ps) => ps,
154 Ok(ps) => ps,
155 Err(e) => return Some(Err(e)),
155 Err(e) => return Some(Err(e)),
156 };
156 };
157 if p1 < self.stoprev || !self.seen.insert(p1) {
157 if p1 < self.stoprev || !self.seen.insert(p1) {
158 self.visit.pop();
158 self.visit.pop();
159 } else {
159 } else {
160 *(self.visit.peek_mut().unwrap()) = p1;
160 *(self.visit.peek_mut().unwrap()) = p1;
161 };
161 };
162
162
163 self.conditionally_push_rev(p2);
163 self.conditionally_push_rev(p2);
164 Some(Ok(current))
164 Some(Ok(current))
165 }
165 }
166 }
166 }
167
167
168 impl<G: Graph + Clone> LazyAncestors<G> {
168 impl<G: Graph + Clone> LazyAncestors<G> {
169 pub fn new(
169 pub fn new(
170 graph: G,
170 graph: G,
171 initrevs: impl IntoIterator<Item = Revision>,
171 initrevs: impl IntoIterator<Item = Revision>,
172 stoprev: Revision,
172 stoprev: Revision,
173 inclusive: bool,
173 inclusive: bool,
174 ) -> Result<Self, GraphError> {
174 ) -> Result<Self, GraphError> {
175 let v: Vec<Revision> = initrevs.into_iter().collect();
175 let v: Vec<Revision> = initrevs.into_iter().collect();
176 Ok(LazyAncestors {
176 Ok(LazyAncestors {
177 graph: graph.clone(),
177 graph: graph.clone(),
178 containsiter: AncestorsIterator::new(
178 containsiter: AncestorsIterator::new(
179 graph,
179 graph,
180 v.iter().cloned(),
180 v.iter().cloned(),
181 stoprev,
181 stoprev,
182 inclusive,
182 inclusive,
183 )?,
183 )?,
184 initrevs: v,
184 initrevs: v,
185 stoprev: stoprev,
185 stoprev,
186 inclusive: inclusive,
186 inclusive,
187 })
187 })
188 }
188 }
189
189
190 pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
190 pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
191 self.containsiter.contains(rev)
191 self.containsiter.contains(rev)
192 }
192 }
193
193
194 pub fn is_empty(&self) -> bool {
194 pub fn is_empty(&self) -> bool {
195 self.containsiter.is_empty()
195 self.containsiter.is_empty()
196 }
196 }
197
197
198 pub fn iter(&self) -> AncestorsIterator<G> {
198 pub fn iter(&self) -> AncestorsIterator<G> {
199 // the arguments being the same as for self.containsiter, we know
199 // the arguments being the same as for self.containsiter, we know
200 // for sure that AncestorsIterator constructor can't fail
200 // for sure that AncestorsIterator constructor can't fail
201 AncestorsIterator::new(
201 AncestorsIterator::new(
202 self.graph.clone(),
202 self.graph.clone(),
203 self.initrevs.iter().cloned(),
203 self.initrevs.iter().cloned(),
204 self.stoprev,
204 self.stoprev,
205 self.inclusive,
205 self.inclusive,
206 )
206 )
207 .unwrap()
207 .unwrap()
208 }
208 }
209 }
209 }
210
210
211 impl<G: Graph> MissingAncestors<G> {
211 impl<G: Graph> MissingAncestors<G> {
212 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
212 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
213 let mut created = MissingAncestors {
213 let mut created = MissingAncestors {
214 graph: graph,
214 graph,
215 bases: HashSet::new(),
215 bases: HashSet::new(),
216 max_base: NULL_REVISION,
216 max_base: NULL_REVISION,
217 };
217 };
218 created.add_bases(bases);
218 created.add_bases(bases);
219 created
219 created
220 }
220 }
221
221
222 pub fn has_bases(&self) -> bool {
222 pub fn has_bases(&self) -> bool {
223 !self.bases.is_empty()
223 !self.bases.is_empty()
224 }
224 }
225
225
226 /// Return a reference to current bases.
226 /// Return a reference to current bases.
227 ///
227 ///
228 /// This is useful in unit tests, but also setdiscovery.py does
228 /// This is useful in unit tests, but also setdiscovery.py does
229 /// read the bases attribute of a ancestor.missingancestors instance.
229 /// read the bases attribute of a ancestor.missingancestors instance.
230 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
230 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
231 &self.bases
231 &self.bases
232 }
232 }
233
233
234 /// Computes the relative heads of current bases.
234 /// Computes the relative heads of current bases.
235 ///
235 ///
236 /// The object is still usable after this.
236 /// The object is still usable after this.
237 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
237 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
238 dagops::heads(&self.graph, self.bases.iter())
238 dagops::heads(&self.graph, self.bases.iter())
239 }
239 }
240
240
241 /// Consumes the object and returns the relative heads of its bases.
241 /// Consumes the object and returns the relative heads of its bases.
242 pub fn into_bases_heads(
242 pub fn into_bases_heads(
243 mut self,
243 mut self,
244 ) -> Result<HashSet<Revision>, GraphError> {
244 ) -> Result<HashSet<Revision>, GraphError> {
245 dagops::retain_heads(&self.graph, &mut self.bases)?;
245 dagops::retain_heads(&self.graph, &mut self.bases)?;
246 Ok(self.bases)
246 Ok(self.bases)
247 }
247 }
248
248
249 /// Add some revisions to `self.bases`
249 /// Add some revisions to `self.bases`
250 ///
250 ///
251 /// Takes care of keeping `self.max_base` up to date.
251 /// Takes care of keeping `self.max_base` up to date.
252 pub fn add_bases(
252 pub fn add_bases(
253 &mut self,
253 &mut self,
254 new_bases: impl IntoIterator<Item = Revision>,
254 new_bases: impl IntoIterator<Item = Revision>,
255 ) {
255 ) {
256 let mut max_base = self.max_base;
256 let mut max_base = self.max_base;
257 self.bases.extend(
257 self.bases.extend(
258 new_bases
258 new_bases
259 .into_iter()
259 .into_iter()
260 .filter(|&rev| rev != NULL_REVISION)
260 .filter(|&rev| rev != NULL_REVISION)
261 .map(|r| {
261 .map(|r| {
262 if r > max_base {
262 if r > max_base {
263 max_base = r;
263 max_base = r;
264 }
264 }
265 r
265 r
266 }),
266 }),
267 );
267 );
268 self.max_base = max_base;
268 self.max_base = max_base;
269 }
269 }
270
270
271 /// Remove all ancestors of self.bases from the revs set (in place)
271 /// Remove all ancestors of self.bases from the revs set (in place)
272 pub fn remove_ancestors_from(
272 pub fn remove_ancestors_from(
273 &mut self,
273 &mut self,
274 revs: &mut HashSet<Revision>,
274 revs: &mut HashSet<Revision>,
275 ) -> Result<(), GraphError> {
275 ) -> Result<(), GraphError> {
276 revs.retain(|r| !self.bases.contains(r));
276 revs.retain(|r| !self.bases.contains(r));
277 // the null revision is always an ancestor. Logically speaking
277 // the null revision is always an ancestor. Logically speaking
278 // it's debatable in case bases is empty, but the Python
278 // it's debatable in case bases is empty, but the Python
279 // implementation always adds NULL_REVISION to bases, making it
279 // implementation always adds NULL_REVISION to bases, making it
280 // unconditionnally true.
280 // unconditionnally true.
281 revs.remove(&NULL_REVISION);
281 revs.remove(&NULL_REVISION);
282 if revs.is_empty() {
282 if revs.is_empty() {
283 return Ok(());
283 return Ok(());
284 }
284 }
285 // anything in revs > start is definitely not an ancestor of bases
285 // anything in revs > start is definitely not an ancestor of bases
286 // revs <= start need to be investigated
286 // revs <= start need to be investigated
287 if self.max_base == NULL_REVISION {
287 if self.max_base == NULL_REVISION {
288 return Ok(());
288 return Ok(());
289 }
289 }
290
290
291 // whatever happens, we'll keep at least keepcount of them
291 // whatever happens, we'll keep at least keepcount of them
292 // knowing this gives us a earlier stop condition than
292 // knowing this gives us a earlier stop condition than
293 // going all the way to the root
293 // going all the way to the root
294 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
294 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
295
295
296 let mut curr = self.max_base;
296 let mut curr = self.max_base;
297 while curr != NULL_REVISION && revs.len() > keepcount {
297 while curr != NULL_REVISION && revs.len() > keepcount {
298 if self.bases.contains(&curr) {
298 if self.bases.contains(&curr) {
299 revs.remove(&curr);
299 revs.remove(&curr);
300 self.add_parents(curr)?;
300 self.add_parents(curr)?;
301 }
301 }
302 curr -= 1;
302 curr -= 1;
303 }
303 }
304 Ok(())
304 Ok(())
305 }
305 }
306
306
307 /// Add the parents of `rev` to `self.bases`
307 /// Add the parents of `rev` to `self.bases`
308 ///
308 ///
309 /// This has no effect on `self.max_base`
309 /// This has no effect on `self.max_base`
310 #[inline]
310 #[inline]
311 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
311 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
312 if rev == NULL_REVISION {
312 if rev == NULL_REVISION {
313 return Ok(());
313 return Ok(());
314 }
314 }
315 for p in self.graph.parents(rev)?.iter().cloned() {
315 for p in self.graph.parents(rev)?.iter().cloned() {
316 // No need to bother the set with inserting NULL_REVISION over and
316 // No need to bother the set with inserting NULL_REVISION over and
317 // over
317 // over
318 if p != NULL_REVISION {
318 if p != NULL_REVISION {
319 self.bases.insert(p);
319 self.bases.insert(p);
320 }
320 }
321 }
321 }
322 Ok(())
322 Ok(())
323 }
323 }
324
324
325 /// Return all the ancestors of revs that are not ancestors of self.bases
325 /// Return all the ancestors of revs that are not ancestors of self.bases
326 ///
326 ///
327 /// This may include elements from revs.
327 /// This may include elements from revs.
328 ///
328 ///
329 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
329 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
330 /// revision number order, which is a topological order.
330 /// revision number order, which is a topological order.
331 pub fn missing_ancestors(
331 pub fn missing_ancestors(
332 &mut self,
332 &mut self,
333 revs: impl IntoIterator<Item = Revision>,
333 revs: impl IntoIterator<Item = Revision>,
334 ) -> Result<Vec<Revision>, GraphError> {
334 ) -> Result<Vec<Revision>, GraphError> {
335 // just for convenience and comparison with Python version
335 // just for convenience and comparison with Python version
336 let bases_visit = &mut self.bases;
336 let bases_visit = &mut self.bases;
337 let mut revs: HashSet<Revision> = revs
337 let mut revs: HashSet<Revision> = revs
338 .into_iter()
338 .into_iter()
339 .filter(|r| !bases_visit.contains(r))
339 .filter(|r| !bases_visit.contains(r))
340 .collect();
340 .collect();
341 let revs_visit = &mut revs;
341 let revs_visit = &mut revs;
342 let mut both_visit: HashSet<Revision> =
342 let mut both_visit: HashSet<Revision> =
343 revs_visit.intersection(&bases_visit).cloned().collect();
343 revs_visit.intersection(&bases_visit).cloned().collect();
344 if revs_visit.is_empty() {
344 if revs_visit.is_empty() {
345 return Ok(Vec::new());
345 return Ok(Vec::new());
346 }
346 }
347 let max_revs = revs_visit.iter().cloned().max().unwrap();
347 let max_revs = revs_visit.iter().cloned().max().unwrap();
348 let start = max(self.max_base, max_revs);
348 let start = max(self.max_base, max_revs);
349
349
350 // TODO heuristics for with_capacity()?
350 // TODO heuristics for with_capacity()?
351 let mut missing: Vec<Revision> = Vec::new();
351 let mut missing: Vec<Revision> = Vec::new();
352 for curr in (0..=start).rev() {
352 for curr in (0..=start).rev() {
353 if revs_visit.is_empty() {
353 if revs_visit.is_empty() {
354 break;
354 break;
355 }
355 }
356 if both_visit.remove(&curr) {
356 if both_visit.remove(&curr) {
357 // curr's parents might have made it into revs_visit through
357 // curr's parents might have made it into revs_visit through
358 // another path
358 // another path
359 for p in self.graph.parents(curr)?.iter().cloned() {
359 for p in self.graph.parents(curr)?.iter().cloned() {
360 if p == NULL_REVISION {
360 if p == NULL_REVISION {
361 continue;
361 continue;
362 }
362 }
363 revs_visit.remove(&p);
363 revs_visit.remove(&p);
364 bases_visit.insert(p);
364 bases_visit.insert(p);
365 both_visit.insert(p);
365 both_visit.insert(p);
366 }
366 }
367 } else if revs_visit.remove(&curr) {
367 } else if revs_visit.remove(&curr) {
368 missing.push(curr);
368 missing.push(curr);
369 for p in self.graph.parents(curr)?.iter().cloned() {
369 for p in self.graph.parents(curr)?.iter().cloned() {
370 if p == NULL_REVISION {
370 if p == NULL_REVISION {
371 continue;
371 continue;
372 }
372 }
373 if bases_visit.contains(&p) {
373 if bases_visit.contains(&p) {
374 // p is already known to be an ancestor of revs_visit
374 // p is already known to be an ancestor of revs_visit
375 revs_visit.remove(&p);
375 revs_visit.remove(&p);
376 both_visit.insert(p);
376 both_visit.insert(p);
377 } else if both_visit.contains(&p) {
377 } else if both_visit.contains(&p) {
378 // p should have been in bases_visit
378 // p should have been in bases_visit
379 revs_visit.remove(&p);
379 revs_visit.remove(&p);
380 bases_visit.insert(p);
380 bases_visit.insert(p);
381 } else {
381 } else {
382 // visit later
382 // visit later
383 revs_visit.insert(p);
383 revs_visit.insert(p);
384 }
384 }
385 }
385 }
386 } else if bases_visit.contains(&curr) {
386 } else if bases_visit.contains(&curr) {
387 for p in self.graph.parents(curr)?.iter().cloned() {
387 for p in self.graph.parents(curr)?.iter().cloned() {
388 if p == NULL_REVISION {
388 if p == NULL_REVISION {
389 continue;
389 continue;
390 }
390 }
391 if revs_visit.remove(&p) || both_visit.contains(&p) {
391 if revs_visit.remove(&p) || both_visit.contains(&p) {
392 // p is an ancestor of bases_visit, and is implicitly
392 // p is an ancestor of bases_visit, and is implicitly
393 // in revs_visit, which means p is ::revs & ::bases.
393 // in revs_visit, which means p is ::revs & ::bases.
394 bases_visit.insert(p);
394 bases_visit.insert(p);
395 both_visit.insert(p);
395 both_visit.insert(p);
396 } else {
396 } else {
397 bases_visit.insert(p);
397 bases_visit.insert(p);
398 }
398 }
399 }
399 }
400 }
400 }
401 }
401 }
402 missing.reverse();
402 missing.reverse();
403 Ok(missing)
403 Ok(missing)
404 }
404 }
405 }
405 }
406
406
407 #[cfg(test)]
407 #[cfg(test)]
408 mod tests {
408 mod tests {
409
409
410 use super::*;
410 use super::*;
411 use crate::testing::{SampleGraph, VecGraph};
411 use crate::testing::{SampleGraph, VecGraph};
412 use std::iter::FromIterator;
412 use std::iter::FromIterator;
413
413
414 fn list_ancestors<G: Graph>(
414 fn list_ancestors<G: Graph>(
415 graph: G,
415 graph: G,
416 initrevs: Vec<Revision>,
416 initrevs: Vec<Revision>,
417 stoprev: Revision,
417 stoprev: Revision,
418 inclusive: bool,
418 inclusive: bool,
419 ) -> Vec<Revision> {
419 ) -> Vec<Revision> {
420 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
420 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
421 .unwrap()
421 .unwrap()
422 .map(|res| res.unwrap())
422 .map(|res| res.unwrap())
423 .collect()
423 .collect()
424 }
424 }
425
425
426 #[test]
426 #[test]
427 /// Same tests as test-ancestor.py, without membership
427 /// Same tests as test-ancestor.py, without membership
428 /// (see also test-ancestor.py.out)
428 /// (see also test-ancestor.py.out)
429 fn test_list_ancestor() {
429 fn test_list_ancestor() {
430 assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
430 assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
431 assert_eq!(
431 assert_eq!(
432 list_ancestors(SampleGraph, vec![11, 13], 0, false),
432 list_ancestors(SampleGraph, vec![11, 13], 0, false),
433 vec![8, 7, 4, 3, 2, 1, 0]
433 vec![8, 7, 4, 3, 2, 1, 0]
434 );
434 );
435 assert_eq!(
435 assert_eq!(
436 list_ancestors(SampleGraph, vec![1, 3], 0, false),
436 list_ancestors(SampleGraph, vec![1, 3], 0, false),
437 vec![1, 0]
437 vec![1, 0]
438 );
438 );
439 assert_eq!(
439 assert_eq!(
440 list_ancestors(SampleGraph, vec![11, 13], 0, true),
440 list_ancestors(SampleGraph, vec![11, 13], 0, true),
441 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
441 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
442 );
442 );
443 assert_eq!(
443 assert_eq!(
444 list_ancestors(SampleGraph, vec![11, 13], 6, false),
444 list_ancestors(SampleGraph, vec![11, 13], 6, false),
445 vec![8, 7]
445 vec![8, 7]
446 );
446 );
447 assert_eq!(
447 assert_eq!(
448 list_ancestors(SampleGraph, vec![11, 13], 6, true),
448 list_ancestors(SampleGraph, vec![11, 13], 6, true),
449 vec![13, 11, 8, 7]
449 vec![13, 11, 8, 7]
450 );
450 );
451 assert_eq!(
451 assert_eq!(
452 list_ancestors(SampleGraph, vec![11, 13], 11, true),
452 list_ancestors(SampleGraph, vec![11, 13], 11, true),
453 vec![13, 11]
453 vec![13, 11]
454 );
454 );
455 assert_eq!(
455 assert_eq!(
456 list_ancestors(SampleGraph, vec![11, 13], 12, true),
456 list_ancestors(SampleGraph, vec![11, 13], 12, true),
457 vec![13]
457 vec![13]
458 );
458 );
459 assert_eq!(
459 assert_eq!(
460 list_ancestors(SampleGraph, vec![10, 1], 0, true),
460 list_ancestors(SampleGraph, vec![10, 1], 0, true),
461 vec![10, 5, 4, 2, 1, 0]
461 vec![10, 5, 4, 2, 1, 0]
462 );
462 );
463 }
463 }
464
464
465 #[test]
465 #[test]
466 /// Corner case that's not directly in test-ancestors.py, but
466 /// Corner case that's not directly in test-ancestors.py, but
467 /// that happens quite often, as demonstrated by running the whole
467 /// that happens quite often, as demonstrated by running the whole
468 /// suite.
468 /// suite.
469 /// For instance, run tests/test-obsolete-checkheads.t
469 /// For instance, run tests/test-obsolete-checkheads.t
470 fn test_nullrev_input() {
470 fn test_nullrev_input() {
471 let mut iter =
471 let mut iter =
472 AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
472 AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
473 assert_eq!(iter.next(), None)
473 assert_eq!(iter.next(), None)
474 }
474 }
475
475
476 #[test]
476 #[test]
477 fn test_contains() {
477 fn test_contains() {
478 let mut lazy =
478 let mut lazy =
479 AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
479 AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
480 assert!(lazy.contains(1).unwrap());
480 assert!(lazy.contains(1).unwrap());
481 assert!(!lazy.contains(3).unwrap());
481 assert!(!lazy.contains(3).unwrap());
482
482
483 let mut lazy =
483 let mut lazy =
484 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
484 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
485 assert!(!lazy.contains(NULL_REVISION).unwrap());
485 assert!(!lazy.contains(NULL_REVISION).unwrap());
486 }
486 }
487
487
488 #[test]
488 #[test]
489 fn test_peek() {
489 fn test_peek() {
490 let mut iter =
490 let mut iter =
491 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
491 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
492 // peek() gives us the next value
492 // peek() gives us the next value
493 assert_eq!(iter.peek(), Some(10));
493 assert_eq!(iter.peek(), Some(10));
494 // but it's not been consumed
494 // but it's not been consumed
495 assert_eq!(iter.next(), Some(Ok(10)));
495 assert_eq!(iter.next(), Some(Ok(10)));
496 // and iteration resumes normally
496 // and iteration resumes normally
497 assert_eq!(iter.next(), Some(Ok(5)));
497 assert_eq!(iter.next(), Some(Ok(5)));
498
498
499 // let's drain the iterator to test peek() at the end
499 // let's drain the iterator to test peek() at the end
500 while iter.next().is_some() {}
500 while iter.next().is_some() {}
501 assert_eq!(iter.peek(), None);
501 assert_eq!(iter.peek(), None);
502 }
502 }
503
503
504 #[test]
504 #[test]
505 fn test_empty() {
505 fn test_empty() {
506 let mut iter =
506 let mut iter =
507 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
507 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
508 assert!(!iter.is_empty());
508 assert!(!iter.is_empty());
509 while iter.next().is_some() {}
509 while iter.next().is_some() {}
510 assert!(!iter.is_empty());
510 assert!(!iter.is_empty());
511
511
512 let iter =
512 let iter =
513 AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
513 AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
514 assert!(iter.is_empty());
514 assert!(iter.is_empty());
515
515
516 // case where iter.seen == {NULL_REVISION}
516 // case where iter.seen == {NULL_REVISION}
517 let iter =
517 let iter =
518 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
518 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
519 assert!(iter.is_empty());
519 assert!(iter.is_empty());
520 }
520 }
521
521
522 /// A corrupted Graph, supporting error handling tests
522 /// A corrupted Graph, supporting error handling tests
523 #[derive(Clone, Debug)]
523 #[derive(Clone, Debug)]
524 struct Corrupted;
524 struct Corrupted;
525
525
526 impl Graph for Corrupted {
526 impl Graph for Corrupted {
527 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
527 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
528 match rev {
528 match rev {
529 1 => Ok([0, -1]),
529 1 => Ok([0, -1]),
530 r => Err(GraphError::ParentOutOfRange(r)),
530 r => Err(GraphError::ParentOutOfRange(r)),
531 }
531 }
532 }
532 }
533 }
533 }
534
534
535 #[test]
535 #[test]
536 fn test_initrev_out_of_range() {
536 fn test_initrev_out_of_range() {
537 // inclusive=false looks up initrev's parents right away
537 // inclusive=false looks up initrev's parents right away
538 match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
538 match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
539 Ok(_) => panic!("Should have been ParentOutOfRange"),
539 Ok(_) => panic!("Should have been ParentOutOfRange"),
540 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
540 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
541 }
541 }
542 }
542 }
543
543
544 #[test]
544 #[test]
545 fn test_next_out_of_range() {
545 fn test_next_out_of_range() {
546 // inclusive=false looks up initrev's parents right away
546 // inclusive=false looks up initrev's parents right away
547 let mut iter =
547 let mut iter =
548 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
548 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
549 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
549 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
550 }
550 }
551
551
552 #[test]
552 #[test]
553 fn test_lazy_iter_contains() {
553 fn test_lazy_iter_contains() {
554 let mut lazy =
554 let mut lazy =
555 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
555 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
556
556
557 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
557 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
558 // compare with iterator tests on the same initial revisions
558 // compare with iterator tests on the same initial revisions
559 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
559 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
560
560
561 // contains() results are correct, unaffected by the fact that
561 // contains() results are correct, unaffected by the fact that
562 // we consumed entirely an iterator out of lazy
562 // we consumed entirely an iterator out of lazy
563 assert_eq!(lazy.contains(2), Ok(true));
563 assert_eq!(lazy.contains(2), Ok(true));
564 assert_eq!(lazy.contains(9), Ok(false));
564 assert_eq!(lazy.contains(9), Ok(false));
565 }
565 }
566
566
567 #[test]
567 #[test]
568 fn test_lazy_contains_iter() {
568 fn test_lazy_contains_iter() {
569 let mut lazy =
569 let mut lazy =
570 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
570 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
571
571
572 assert_eq!(lazy.contains(2), Ok(true));
572 assert_eq!(lazy.contains(2), Ok(true));
573 assert_eq!(lazy.contains(6), Ok(false));
573 assert_eq!(lazy.contains(6), Ok(false));
574
574
575 // after consumption of 2 by the inner iterator, results stay
575 // after consumption of 2 by the inner iterator, results stay
576 // consistent
576 // consistent
577 assert_eq!(lazy.contains(2), Ok(true));
577 assert_eq!(lazy.contains(2), Ok(true));
578 assert_eq!(lazy.contains(5), Ok(false));
578 assert_eq!(lazy.contains(5), Ok(false));
579
579
580 // iter() still gives us a fresh iterator
580 // iter() still gives us a fresh iterator
581 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
581 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
582 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
582 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
583 }
583 }
584
584
585 #[test]
585 #[test]
586 /// Test constructor, add/get bases and heads
586 /// Test constructor, add/get bases and heads
587 fn test_missing_bases() -> Result<(), GraphError> {
587 fn test_missing_bases() -> Result<(), GraphError> {
588 let mut missing_ancestors =
588 let mut missing_ancestors =
589 MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
589 MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
590 let mut as_vec: Vec<Revision> =
590 let mut as_vec: Vec<Revision> =
591 missing_ancestors.get_bases().iter().cloned().collect();
591 missing_ancestors.get_bases().iter().cloned().collect();
592 as_vec.sort();
592 as_vec.sort();
593 assert_eq!(as_vec, [1, 3, 5]);
593 assert_eq!(as_vec, [1, 3, 5]);
594 assert_eq!(missing_ancestors.max_base, 5);
594 assert_eq!(missing_ancestors.max_base, 5);
595
595
596 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
596 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
597 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
597 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
598 as_vec.sort();
598 as_vec.sort();
599 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
599 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
600 assert_eq!(missing_ancestors.max_base, 8);
600 assert_eq!(missing_ancestors.max_base, 8);
601
601
602 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
602 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
603 as_vec.sort();
603 as_vec.sort();
604 assert_eq!(as_vec, [3, 5, 7, 8]);
604 assert_eq!(as_vec, [3, 5, 7, 8]);
605 Ok(())
605 Ok(())
606 }
606 }
607
607
608 fn assert_missing_remove(
608 fn assert_missing_remove(
609 bases: &[Revision],
609 bases: &[Revision],
610 revs: &[Revision],
610 revs: &[Revision],
611 expected: &[Revision],
611 expected: &[Revision],
612 ) {
612 ) {
613 let mut missing_ancestors =
613 let mut missing_ancestors =
614 MissingAncestors::new(SampleGraph, bases.iter().cloned());
614 MissingAncestors::new(SampleGraph, bases.iter().cloned());
615 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
615 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
616 missing_ancestors
616 missing_ancestors
617 .remove_ancestors_from(&mut revset)
617 .remove_ancestors_from(&mut revset)
618 .unwrap();
618 .unwrap();
619 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
619 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
620 as_vec.sort();
620 as_vec.sort();
621 assert_eq!(as_vec.as_slice(), expected);
621 assert_eq!(as_vec.as_slice(), expected);
622 }
622 }
623
623
624 #[test]
624 #[test]
625 fn test_missing_remove() {
625 fn test_missing_remove() {
626 assert_missing_remove(
626 assert_missing_remove(
627 &[1, 2, 3, 4, 7],
627 &[1, 2, 3, 4, 7],
628 Vec::from_iter(1..10).as_slice(),
628 Vec::from_iter(1..10).as_slice(),
629 &[5, 6, 8, 9],
629 &[5, 6, 8, 9],
630 );
630 );
631 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
631 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
632 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
632 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
633 }
633 }
634
634
635 fn assert_missing_ancestors(
635 fn assert_missing_ancestors(
636 bases: &[Revision],
636 bases: &[Revision],
637 revs: &[Revision],
637 revs: &[Revision],
638 expected: &[Revision],
638 expected: &[Revision],
639 ) {
639 ) {
640 let mut missing_ancestors =
640 let mut missing_ancestors =
641 MissingAncestors::new(SampleGraph, bases.iter().cloned());
641 MissingAncestors::new(SampleGraph, bases.iter().cloned());
642 let missing = missing_ancestors
642 let missing = missing_ancestors
643 .missing_ancestors(revs.iter().cloned())
643 .missing_ancestors(revs.iter().cloned())
644 .unwrap();
644 .unwrap();
645 assert_eq!(missing.as_slice(), expected);
645 assert_eq!(missing.as_slice(), expected);
646 }
646 }
647
647
648 #[test]
648 #[test]
649 fn test_missing_ancestors() {
649 fn test_missing_ancestors() {
650 // examples taken from test-ancestors.py by having it run
650 // examples taken from test-ancestors.py by having it run
651 // on the same graph (both naive and fast Python algs)
651 // on the same graph (both naive and fast Python algs)
652 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
652 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
653 assert_missing_ancestors(&[11], &[10], &[5, 10]);
653 assert_missing_ancestors(&[11], &[10], &[5, 10]);
654 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
654 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
655 }
655 }
656
656
657 /// An interesting case found by a random generator similar to
657 /// An interesting case found by a random generator similar to
658 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
658 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
659 /// failed this, yet none of the integration tests of the whole suite
659 /// failed this, yet none of the integration tests of the whole suite
660 /// catched it.
660 /// catched it.
661 #[test]
661 #[test]
662 fn test_remove_ancestors_from_case1() {
662 fn test_remove_ancestors_from_case1() {
663 let graph: VecGraph = vec![
663 let graph: VecGraph = vec![
664 [NULL_REVISION, NULL_REVISION],
664 [NULL_REVISION, NULL_REVISION],
665 [0, NULL_REVISION],
665 [0, NULL_REVISION],
666 [1, 0],
666 [1, 0],
667 [2, 1],
667 [2, 1],
668 [3, NULL_REVISION],
668 [3, NULL_REVISION],
669 [4, NULL_REVISION],
669 [4, NULL_REVISION],
670 [5, 1],
670 [5, 1],
671 [2, NULL_REVISION],
671 [2, NULL_REVISION],
672 [7, NULL_REVISION],
672 [7, NULL_REVISION],
673 [8, NULL_REVISION],
673 [8, NULL_REVISION],
674 [9, NULL_REVISION],
674 [9, NULL_REVISION],
675 [10, 1],
675 [10, 1],
676 [3, NULL_REVISION],
676 [3, NULL_REVISION],
677 [12, NULL_REVISION],
677 [12, NULL_REVISION],
678 [13, NULL_REVISION],
678 [13, NULL_REVISION],
679 [14, NULL_REVISION],
679 [14, NULL_REVISION],
680 [4, NULL_REVISION],
680 [4, NULL_REVISION],
681 [16, NULL_REVISION],
681 [16, NULL_REVISION],
682 [17, NULL_REVISION],
682 [17, NULL_REVISION],
683 [18, NULL_REVISION],
683 [18, NULL_REVISION],
684 [19, 11],
684 [19, 11],
685 [20, NULL_REVISION],
685 [20, NULL_REVISION],
686 [21, NULL_REVISION],
686 [21, NULL_REVISION],
687 [22, NULL_REVISION],
687 [22, NULL_REVISION],
688 [23, NULL_REVISION],
688 [23, NULL_REVISION],
689 [2, NULL_REVISION],
689 [2, NULL_REVISION],
690 [3, NULL_REVISION],
690 [3, NULL_REVISION],
691 [26, 24],
691 [26, 24],
692 [27, NULL_REVISION],
692 [27, NULL_REVISION],
693 [28, NULL_REVISION],
693 [28, NULL_REVISION],
694 [12, NULL_REVISION],
694 [12, NULL_REVISION],
695 [1, NULL_REVISION],
695 [1, NULL_REVISION],
696 [1, 9],
696 [1, 9],
697 [32, NULL_REVISION],
697 [32, NULL_REVISION],
698 [33, NULL_REVISION],
698 [33, NULL_REVISION],
699 [34, 31],
699 [34, 31],
700 [35, NULL_REVISION],
700 [35, NULL_REVISION],
701 [36, 26],
701 [36, 26],
702 [37, NULL_REVISION],
702 [37, NULL_REVISION],
703 [38, NULL_REVISION],
703 [38, NULL_REVISION],
704 [39, NULL_REVISION],
704 [39, NULL_REVISION],
705 [40, NULL_REVISION],
705 [40, NULL_REVISION],
706 [41, NULL_REVISION],
706 [41, NULL_REVISION],
707 [42, 26],
707 [42, 26],
708 [0, NULL_REVISION],
708 [0, NULL_REVISION],
709 [44, NULL_REVISION],
709 [44, NULL_REVISION],
710 [45, 4],
710 [45, 4],
711 [40, NULL_REVISION],
711 [40, NULL_REVISION],
712 [47, NULL_REVISION],
712 [47, NULL_REVISION],
713 [36, 0],
713 [36, 0],
714 [49, NULL_REVISION],
714 [49, NULL_REVISION],
715 [NULL_REVISION, NULL_REVISION],
715 [NULL_REVISION, NULL_REVISION],
716 [51, NULL_REVISION],
716 [51, NULL_REVISION],
717 [52, NULL_REVISION],
717 [52, NULL_REVISION],
718 [53, NULL_REVISION],
718 [53, NULL_REVISION],
719 [14, NULL_REVISION],
719 [14, NULL_REVISION],
720 [55, NULL_REVISION],
720 [55, NULL_REVISION],
721 [15, NULL_REVISION],
721 [15, NULL_REVISION],
722 [23, NULL_REVISION],
722 [23, NULL_REVISION],
723 [58, NULL_REVISION],
723 [58, NULL_REVISION],
724 [59, NULL_REVISION],
724 [59, NULL_REVISION],
725 [2, NULL_REVISION],
725 [2, NULL_REVISION],
726 [61, 59],
726 [61, 59],
727 [62, NULL_REVISION],
727 [62, NULL_REVISION],
728 [63, NULL_REVISION],
728 [63, NULL_REVISION],
729 [NULL_REVISION, NULL_REVISION],
729 [NULL_REVISION, NULL_REVISION],
730 [65, NULL_REVISION],
730 [65, NULL_REVISION],
731 [66, NULL_REVISION],
731 [66, NULL_REVISION],
732 [67, NULL_REVISION],
732 [67, NULL_REVISION],
733 [68, NULL_REVISION],
733 [68, NULL_REVISION],
734 [37, 28],
734 [37, 28],
735 [69, 25],
735 [69, 25],
736 [71, NULL_REVISION],
736 [71, NULL_REVISION],
737 [72, NULL_REVISION],
737 [72, NULL_REVISION],
738 [50, 2],
738 [50, 2],
739 [74, NULL_REVISION],
739 [74, NULL_REVISION],
740 [12, NULL_REVISION],
740 [12, NULL_REVISION],
741 [18, NULL_REVISION],
741 [18, NULL_REVISION],
742 [77, NULL_REVISION],
742 [77, NULL_REVISION],
743 [78, NULL_REVISION],
743 [78, NULL_REVISION],
744 [79, NULL_REVISION],
744 [79, NULL_REVISION],
745 [43, 33],
745 [43, 33],
746 [81, NULL_REVISION],
746 [81, NULL_REVISION],
747 [82, NULL_REVISION],
747 [82, NULL_REVISION],
748 [83, NULL_REVISION],
748 [83, NULL_REVISION],
749 [84, 45],
749 [84, 45],
750 [85, NULL_REVISION],
750 [85, NULL_REVISION],
751 [86, NULL_REVISION],
751 [86, NULL_REVISION],
752 [NULL_REVISION, NULL_REVISION],
752 [NULL_REVISION, NULL_REVISION],
753 [88, NULL_REVISION],
753 [88, NULL_REVISION],
754 [NULL_REVISION, NULL_REVISION],
754 [NULL_REVISION, NULL_REVISION],
755 [76, 83],
755 [76, 83],
756 [44, NULL_REVISION],
756 [44, NULL_REVISION],
757 [92, NULL_REVISION],
757 [92, NULL_REVISION],
758 [93, NULL_REVISION],
758 [93, NULL_REVISION],
759 [9, NULL_REVISION],
759 [9, NULL_REVISION],
760 [95, 67],
760 [95, 67],
761 [96, NULL_REVISION],
761 [96, NULL_REVISION],
762 [97, NULL_REVISION],
762 [97, NULL_REVISION],
763 [NULL_REVISION, NULL_REVISION],
763 [NULL_REVISION, NULL_REVISION],
764 ];
764 ];
765 let problem_rev = 28 as Revision;
765 let problem_rev = 28 as Revision;
766 let problem_base = 70 as Revision;
766 let problem_base = 70 as Revision;
767 // making the problem obvious: problem_rev is a parent of problem_base
767 // making the problem obvious: problem_rev is a parent of problem_base
768 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
768 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
769
769
770 let mut missing_ancestors: MissingAncestors<VecGraph> =
770 let mut missing_ancestors: MissingAncestors<VecGraph> =
771 MissingAncestors::new(
771 MissingAncestors::new(
772 graph,
772 graph,
773 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
773 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
774 .iter()
774 .iter()
775 .cloned(),
775 .cloned(),
776 );
776 );
777 assert!(missing_ancestors.bases.contains(&problem_base));
777 assert!(missing_ancestors.bases.contains(&problem_base));
778
778
779 let mut revs: HashSet<Revision> =
779 let mut revs: HashSet<Revision> =
780 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
780 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
781 .iter()
781 .iter()
782 .cloned()
782 .cloned()
783 .collect();
783 .collect();
784 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
784 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
785 assert!(!revs.contains(&problem_rev));
785 assert!(!revs.contains(&problem_rev));
786 }
786 }
787 }
787 }
@@ -1,275 +1,276
1 // dagops.rs
1 // dagops.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Miscellaneous DAG operations
8 //! Miscellaneous DAG operations
9 //!
9 //!
10 //! # Terminology
10 //! # Terminology
11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
12 //! mean those revisions that have no children among the collection.
12 //! mean those revisions that have no children among the collection.
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
14 //! whose parents, if any, don't belong to the collection.
14 //! whose parents, if any, don't belong to the collection.
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
16 use crate::ancestors::AncestorsIterator;
16 use crate::ancestors::AncestorsIterator;
17 use std::collections::{BTreeSet, HashSet};
17 use std::collections::{BTreeSet, HashSet};
18
18
19 fn remove_parents(
19 fn remove_parents<S: std::hash::BuildHasher>(
20 graph: &impl Graph,
20 graph: &impl Graph,
21 rev: Revision,
21 rev: Revision,
22 set: &mut HashSet<Revision>,
22 set: &mut HashSet<Revision, S>,
23 ) -> Result<(), GraphError> {
23 ) -> Result<(), GraphError> {
24 for parent in graph.parents(rev)?.iter() {
24 for parent in graph.parents(rev)?.iter() {
25 if *parent != NULL_REVISION {
25 if *parent != NULL_REVISION {
26 set.remove(parent);
26 set.remove(parent);
27 }
27 }
28 }
28 }
29 Ok(())
29 Ok(())
30 }
30 }
31
31
32 /// Relative heads out of some revisions, passed as an iterator.
32 /// Relative heads out of some revisions, passed as an iterator.
33 ///
33 ///
34 /// These heads are defined as those revisions that have no children
34 /// These heads are defined as those revisions that have no children
35 /// among those emitted by the iterator.
35 /// among those emitted by the iterator.
36 ///
36 ///
37 /// # Performance notes
37 /// # Performance notes
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
39 ///
39 ///
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
41 /// guarantee that cloning the iterator doesn't result in cloning the full
41 /// guarantee that cloning the iterator doesn't result in cloning the full
42 /// construct it comes from.
42 /// construct it comes from.
43 pub fn heads<'a>(
43 pub fn heads<'a>(
44 graph: &impl Graph,
44 graph: &impl Graph,
45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
46 ) -> Result<HashSet<Revision>, GraphError> {
46 ) -> Result<HashSet<Revision>, GraphError> {
47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
48 heads.remove(&NULL_REVISION);
48 heads.remove(&NULL_REVISION);
49 for rev in iter_revs {
49 for rev in iter_revs {
50 if *rev != NULL_REVISION {
50 if *rev != NULL_REVISION {
51 remove_parents(graph, *rev, &mut heads)?;
51 remove_parents(graph, *rev, &mut heads)?;
52 }
52 }
53 }
53 }
54 Ok(heads)
54 Ok(heads)
55 }
55 }
56
56
57 /// Retain in `revs` only its relative heads.
57 /// Retain in `revs` only its relative heads.
58 ///
58 ///
59 /// This is an in-place operation, so that control of the incoming
59 /// This is an in-place operation, so that control of the incoming
60 /// set is left to the caller.
60 /// set is left to the caller.
61 /// - a direct Python binding would probably need to build its own `HashSet`
61 /// - a direct Python binding would probably need to build its own `HashSet`
62 /// from an incoming iterable, even if its sole purpose is to extract the
62 /// from an incoming iterable, even if its sole purpose is to extract the
63 /// heads.
63 /// heads.
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
65 ///
65 ///
66 /// # Performance notes
66 /// # Performance notes
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
68 pub fn retain_heads(
68 pub fn retain_heads<S: std::hash::BuildHasher>(
69 graph: &impl Graph,
69 graph: &impl Graph,
70 revs: &mut HashSet<Revision>,
70 revs: &mut HashSet<Revision, S>,
71 ) -> Result<(), GraphError> {
71 ) -> Result<(), GraphError> {
72 revs.remove(&NULL_REVISION);
72 revs.remove(&NULL_REVISION);
73 // we need to construct an iterable copy of revs to avoid itering while
73 // we need to construct an iterable copy of revs to avoid itering while
74 // mutating
74 // mutating
75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
76 for rev in as_vec {
76 for rev in as_vec {
77 if rev != NULL_REVISION {
77 if rev != NULL_REVISION {
78 remove_parents(graph, rev, revs)?;
78 remove_parents(graph, rev, revs)?;
79 }
79 }
80 }
80 }
81 Ok(())
81 Ok(())
82 }
82 }
83
83
84 /// Roots of `revs`, passed as a `HashSet`
84 /// Roots of `revs`, passed as a `HashSet`
85 ///
85 ///
86 /// They are returned in arbitrary order
86 /// They are returned in arbitrary order
87 pub fn roots<G: Graph>(
87 pub fn roots<G: Graph, S: std::hash::BuildHasher>(
88 graph: &G,
88 graph: &G,
89 revs: &HashSet<Revision>,
89 revs: &HashSet<Revision, S>,
90 ) -> Result<Vec<Revision>, GraphError> {
90 ) -> Result<Vec<Revision>, GraphError> {
91 let mut roots: Vec<Revision> = Vec::new();
91 let mut roots: Vec<Revision> = Vec::new();
92 for rev in revs {
92 for rev in revs {
93 if graph
93 if graph
94 .parents(*rev)?
94 .parents(*rev)?
95 .iter()
95 .iter()
96 .filter(|p| **p != NULL_REVISION)
96 .filter(|p| **p != NULL_REVISION)
97 .all(|p| !revs.contains(p))
97 .all(|p| !revs.contains(p))
98 {
98 {
99 roots.push(*rev);
99 roots.push(*rev);
100 }
100 }
101 }
101 }
102 Ok(roots)
102 Ok(roots)
103 }
103 }
104
104
105 /// Compute the topological range between two collections of revisions
105 /// Compute the topological range between two collections of revisions
106 ///
106 ///
107 /// This is equivalent to the revset `<roots>::<heads>`.
107 /// This is equivalent to the revset `<roots>::<heads>`.
108 ///
108 ///
109 /// Currently, the given `Graph` has to implement `Clone`, which means
109 /// Currently, the given `Graph` has to implement `Clone`, which means
110 /// actually cloning just a reference-counted Python pointer if
110 /// actually cloning just a reference-counted Python pointer if
111 /// it's passed over through `rust-cpython`. This is due to the internal
111 /// it's passed over through `rust-cpython`. This is due to the internal
112 /// use of `AncestorsIterator`
112 /// use of `AncestorsIterator`
113 ///
113 ///
114 /// # Algorithmic details
114 /// # Algorithmic details
115 ///
115 ///
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
117 /// `mercurial.cext.parsers` does to obtain the same results.
117 /// `mercurial.cext.parsers` does to obtain the same results.
118 ///
118 ///
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
120 /// them in the vector `heads_ancestors` vector, and adding any element of
120 /// them in the vector `heads_ancestors` vector, and adding any element of
121 /// `roots` we find among them to the resulting range.
121 /// `roots` we find among them to the resulting range.
122 /// - Then, we iterate on that recorded vector so that a revision is always
122 /// - Then, we iterate on that recorded vector so that a revision is always
123 /// emitted after its parents and add all revisions whose parents are already
123 /// emitted after its parents and add all revisions whose parents are already
124 /// in the range to the results.
124 /// in the range to the results.
125 ///
125 ///
126 /// # Performance notes
126 /// # Performance notes
127 ///
127 ///
128 /// The main difference with the C implementation is that
128 /// The main difference with the C implementation is that
129 /// the latter uses a flat array with bit flags, instead of complex structures
129 /// the latter uses a flat array with bit flags, instead of complex structures
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
131 /// possible that the present implementation could be more memory efficient
131 /// possible that the present implementation could be more memory efficient
132 /// for very large repositories with many branches.
132 /// for very large repositories with many branches.
133 pub fn range(
133 pub fn range(
134 graph: &(impl Graph + Clone),
134 graph: &(impl Graph + Clone),
135 roots: impl IntoIterator<Item = Revision>,
135 roots: impl IntoIterator<Item = Revision>,
136 heads: impl IntoIterator<Item = Revision>,
136 heads: impl IntoIterator<Item = Revision>,
137 ) -> Result<BTreeSet<Revision>, GraphError> {
137 ) -> Result<BTreeSet<Revision>, GraphError> {
138 let mut range = BTreeSet::new();
138 let mut range = BTreeSet::new();
139 let roots: HashSet<Revision> = roots.into_iter().collect();
139 let roots: HashSet<Revision> = roots.into_iter().collect();
140 let min_root: Revision = match roots.iter().cloned().min() {
140 let min_root: Revision = match roots.iter().cloned().min() {
141 None => {
141 None => {
142 return Ok(range);
142 return Ok(range);
143 }
143 }
144 Some(r) => r,
144 Some(r) => r,
145 };
145 };
146
146
147 // Internally, AncestorsIterator currently maintains a `HashSet`
147 // Internally, AncestorsIterator currently maintains a `HashSet`
148 // of all seen revision, which is also what we record, albeit in an ordered
148 // of all seen revision, which is also what we record, albeit in an ordered
149 // way. There's room for improvement on this duplication.
149 // way. There's room for improvement on this duplication.
150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
151 let mut heads_ancestors: Vec<Revision> = Vec::new();
151 let mut heads_ancestors: Vec<Revision> = Vec::new();
152 for revres in ait {
152 for revres in ait {
153 let rev = revres?;
153 let rev = revres?;
154 if roots.contains(&rev) {
154 if roots.contains(&rev) {
155 range.insert(rev);
155 range.insert(rev);
156 }
156 }
157 heads_ancestors.push(rev);
157 heads_ancestors.push(rev);
158 }
158 }
159
159
160 for rev in heads_ancestors.into_iter().rev() {
160 for rev in heads_ancestors.into_iter().rev() {
161 for parent in graph.parents(rev)?.iter() {
161 for parent in graph.parents(rev)?.iter() {
162 if *parent != NULL_REVISION && range.contains(parent) {
162 if *parent != NULL_REVISION && range.contains(parent) {
163 range.insert(rev);
163 range.insert(rev);
164 }
164 }
165 }
165 }
166 }
166 }
167 Ok(range)
167 Ok(range)
168 }
168 }
169
169
170 #[cfg(test)]
170 #[cfg(test)]
171 mod tests {
171 mod tests {
172
172
173 use super::*;
173 use super::*;
174 use crate::testing::SampleGraph;
174 use crate::testing::SampleGraph;
175
175
176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
177 fn retain_heads_sorted(
177 fn retain_heads_sorted(
178 graph: &impl Graph,
178 graph: &impl Graph,
179 revs: &[Revision],
179 revs: &[Revision],
180 ) -> Result<Vec<Revision>, GraphError> {
180 ) -> Result<Vec<Revision>, GraphError> {
181 let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
181 let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
182 retain_heads(graph, &mut revs)?;
182 retain_heads(graph, &mut revs)?;
183 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
183 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
184 as_vec.sort();
184 as_vec.sort();
185 Ok(as_vec)
185 Ok(as_vec)
186 }
186 }
187
187
188 #[test]
188 #[test]
189 fn test_retain_heads() -> Result<(), GraphError> {
189 fn test_retain_heads() -> Result<(), GraphError> {
190 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
190 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
191 assert_eq!(
191 assert_eq!(
192 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
192 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
193 vec![1, 6, 12]
193 vec![1, 6, 12]
194 );
194 );
195 assert_eq!(
195 assert_eq!(
196 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
196 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
197 vec![3, 5, 8, 9]
197 vec![3, 5, 8, 9]
198 );
198 );
199 Ok(())
199 Ok(())
200 }
200 }
201
201
202 /// Apply `heads()` to the given slice and return as a sorted `Vec`
202 /// Apply `heads()` to the given slice and return as a sorted `Vec`
203 fn heads_sorted(
203 fn heads_sorted(
204 graph: &impl Graph,
204 graph: &impl Graph,
205 revs: &[Revision],
205 revs: &[Revision],
206 ) -> Result<Vec<Revision>, GraphError> {
206 ) -> Result<Vec<Revision>, GraphError> {
207 let heads = heads(graph, revs.iter())?;
207 let heads = heads(graph, revs.iter())?;
208 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
208 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
209 as_vec.sort();
209 as_vec.sort();
210 Ok(as_vec)
210 Ok(as_vec)
211 }
211 }
212
212
213 #[test]
213 #[test]
214 fn test_heads() -> Result<(), GraphError> {
214 fn test_heads() -> Result<(), GraphError> {
215 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
215 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
216 assert_eq!(
216 assert_eq!(
217 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
217 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
218 vec![1, 6, 12]
218 vec![1, 6, 12]
219 );
219 );
220 assert_eq!(
220 assert_eq!(
221 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
221 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
222 vec![3, 5, 8, 9]
222 vec![3, 5, 8, 9]
223 );
223 );
224 Ok(())
224 Ok(())
225 }
225 }
226
226
227 /// Apply `roots()` and sort the result for easier comparison
227 /// Apply `roots()` and sort the result for easier comparison
228 fn roots_sorted(
228 fn roots_sorted(
229 graph: &impl Graph,
229 graph: &impl Graph,
230 revs: &[Revision],
230 revs: &[Revision],
231 ) -> Result<Vec<Revision>, GraphError> {
231 ) -> Result<Vec<Revision>, GraphError> {
232 let mut as_vec = roots(graph, &revs.iter().cloned().collect())?;
232 let set: HashSet<_> = revs.iter().cloned().collect();
233 let mut as_vec = roots(graph, &set)?;
233 as_vec.sort();
234 as_vec.sort();
234 Ok(as_vec)
235 Ok(as_vec)
235 }
236 }
236
237
237 #[test]
238 #[test]
238 fn test_roots() -> Result<(), GraphError> {
239 fn test_roots() -> Result<(), GraphError> {
239 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
240 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
240 assert_eq!(
241 assert_eq!(
241 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
242 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
242 vec![0, 4, 12]
243 vec![0, 4, 12]
243 );
244 );
244 assert_eq!(
245 assert_eq!(
245 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
246 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
246 vec![1, 8]
247 vec![1, 8]
247 );
248 );
248 Ok(())
249 Ok(())
249 }
250 }
250
251
251 /// Apply `range()` and convert the result into a Vec for easier comparison
252 /// Apply `range()` and convert the result into a Vec for easier comparison
252 fn range_vec(
253 fn range_vec(
253 graph: impl Graph + Clone,
254 graph: impl Graph + Clone,
254 roots: &[Revision],
255 roots: &[Revision],
255 heads: &[Revision],
256 heads: &[Revision],
256 ) -> Result<Vec<Revision>, GraphError> {
257 ) -> Result<Vec<Revision>, GraphError> {
257 range(&graph, roots.iter().cloned(), heads.iter().cloned())
258 range(&graph, roots.iter().cloned(), heads.iter().cloned())
258 .map(|bs| bs.into_iter().collect())
259 .map(|bs| bs.into_iter().collect())
259 }
260 }
260
261
261 #[test]
262 #[test]
262 fn test_range() -> Result<(), GraphError> {
263 fn test_range() -> Result<(), GraphError> {
263 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
264 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
264 assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
265 assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
265 assert_eq!(
266 assert_eq!(
266 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
267 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
267 vec![5, 10]
268 vec![5, 10]
268 );
269 );
269 assert_eq!(
270 assert_eq!(
270 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
271 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
271 vec![5, 6, 9, 10, 12]
272 vec![5, 6, 9, 10, 12]
272 );
273 );
273 Ok(())
274 Ok(())
274 }
275 }
275 }
276 }
@@ -1,418 +1,422
1 // dirs_multiset.rs
1 // dirs_multiset.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! A multiset of directory names.
8 //! A multiset of directory names.
9 //!
9 //!
10 //! Used to counts the references to directories in a manifest or dirstate.
10 //! Used to counts the references to directories in a manifest or dirstate.
11 use crate::{
11 use crate::{
12 dirstate::EntryState,
12 dirstate::EntryState,
13 utils::{
13 utils::{
14 files,
14 files,
15 hg_path::{HgPath, HgPathBuf, HgPathError},
15 hg_path::{HgPath, HgPathBuf, HgPathError},
16 },
16 },
17 DirstateEntry, DirstateMapError, FastHashMap,
17 DirstateEntry, DirstateMapError, FastHashMap,
18 };
18 };
19 use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
19 use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
20
20
21 // could be encapsulated if we care API stability more seriously
21 // could be encapsulated if we care API stability more seriously
22 pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
22 pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
23
23
24 #[derive(PartialEq, Debug)]
24 #[derive(PartialEq, Debug)]
25 pub struct DirsMultiset {
25 pub struct DirsMultiset {
26 inner: FastHashMap<HgPathBuf, u32>,
26 inner: FastHashMap<HgPathBuf, u32>,
27 }
27 }
28
28
29 impl DirsMultiset {
29 impl DirsMultiset {
30 /// Initializes the multiset from a dirstate.
30 /// Initializes the multiset from a dirstate.
31 ///
31 ///
32 /// If `skip_state` is provided, skips dirstate entries with equal state.
32 /// If `skip_state` is provided, skips dirstate entries with equal state.
33 pub fn from_dirstate(
33 pub fn from_dirstate(
34 dirstate: &FastHashMap<HgPathBuf, DirstateEntry>,
34 dirstate: &FastHashMap<HgPathBuf, DirstateEntry>,
35 skip_state: Option<EntryState>,
35 skip_state: Option<EntryState>,
36 ) -> Result<Self, DirstateMapError> {
36 ) -> Result<Self, DirstateMapError> {
37 let mut multiset = DirsMultiset {
37 let mut multiset = DirsMultiset {
38 inner: FastHashMap::default(),
38 inner: FastHashMap::default(),
39 };
39 };
40
40
41 for (filename, DirstateEntry { state, .. }) in dirstate {
41 for (filename, DirstateEntry { state, .. }) in dirstate {
42 // This `if` is optimized out of the loop
42 // This `if` is optimized out of the loop
43 if let Some(skip) = skip_state {
43 if let Some(skip) = skip_state {
44 if skip != *state {
44 if skip != *state {
45 multiset.add_path(filename)?;
45 multiset.add_path(filename)?;
46 }
46 }
47 } else {
47 } else {
48 multiset.add_path(filename)?;
48 multiset.add_path(filename)?;
49 }
49 }
50 }
50 }
51
51
52 Ok(multiset)
52 Ok(multiset)
53 }
53 }
54
54
55 /// Initializes the multiset from a manifest.
55 /// Initializes the multiset from a manifest.
56 pub fn from_manifest(
56 pub fn from_manifest(
57 manifest: &[impl AsRef<HgPath>],
57 manifest: &[impl AsRef<HgPath>],
58 ) -> Result<Self, DirstateMapError> {
58 ) -> Result<Self, DirstateMapError> {
59 let mut multiset = DirsMultiset {
59 let mut multiset = DirsMultiset {
60 inner: FastHashMap::default(),
60 inner: FastHashMap::default(),
61 };
61 };
62
62
63 for filename in manifest {
63 for filename in manifest {
64 multiset.add_path(filename.as_ref())?;
64 multiset.add_path(filename.as_ref())?;
65 }
65 }
66
66
67 Ok(multiset)
67 Ok(multiset)
68 }
68 }
69
69
70 /// Increases the count of deepest directory contained in the path.
70 /// Increases the count of deepest directory contained in the path.
71 ///
71 ///
72 /// If the directory is not yet in the map, adds its parents.
72 /// If the directory is not yet in the map, adds its parents.
73 pub fn add_path(
73 pub fn add_path(
74 &mut self,
74 &mut self,
75 path: impl AsRef<HgPath>,
75 path: impl AsRef<HgPath>,
76 ) -> Result<(), DirstateMapError> {
76 ) -> Result<(), DirstateMapError> {
77 for subpath in files::find_dirs(path.as_ref()) {
77 for subpath in files::find_dirs(path.as_ref()) {
78 if subpath.as_bytes().last() == Some(&b'/') {
78 if subpath.as_bytes().last() == Some(&b'/') {
79 // TODO Remove this once PathAuditor is certified
79 // TODO Remove this once PathAuditor is certified
80 // as the only entrypoint for path data
80 // as the only entrypoint for path data
81 let second_slash_index = subpath.len() - 1;
81 let second_slash_index = subpath.len() - 1;
82
82
83 return Err(DirstateMapError::InvalidPath(
83 return Err(DirstateMapError::InvalidPath(
84 HgPathError::ConsecutiveSlashes {
84 HgPathError::ConsecutiveSlashes {
85 bytes: path.as_ref().as_bytes().to_owned(),
85 bytes: path.as_ref().as_bytes().to_owned(),
86 second_slash_index,
86 second_slash_index,
87 },
87 },
88 ));
88 ));
89 }
89 }
90 if let Some(val) = self.inner.get_mut(subpath) {
90 if let Some(val) = self.inner.get_mut(subpath) {
91 *val += 1;
91 *val += 1;
92 break;
92 break;
93 }
93 }
94 self.inner.insert(subpath.to_owned(), 1);
94 self.inner.insert(subpath.to_owned(), 1);
95 }
95 }
96 Ok(())
96 Ok(())
97 }
97 }
98
98
99 /// Decreases the count of deepest directory contained in the path.
99 /// Decreases the count of deepest directory contained in the path.
100 ///
100 ///
101 /// If it is the only reference, decreases all parents until one is
101 /// If it is the only reference, decreases all parents until one is
102 /// removed.
102 /// removed.
103 /// If the directory is not in the map, something horrible has happened.
103 /// If the directory is not in the map, something horrible has happened.
104 pub fn delete_path(
104 pub fn delete_path(
105 &mut self,
105 &mut self,
106 path: impl AsRef<HgPath>,
106 path: impl AsRef<HgPath>,
107 ) -> Result<(), DirstateMapError> {
107 ) -> Result<(), DirstateMapError> {
108 for subpath in files::find_dirs(path.as_ref()) {
108 for subpath in files::find_dirs(path.as_ref()) {
109 match self.inner.entry(subpath.to_owned()) {
109 match self.inner.entry(subpath.to_owned()) {
110 Entry::Occupied(mut entry) => {
110 Entry::Occupied(mut entry) => {
111 let val = entry.get().clone();
111 let val = *entry.get();
112 if val > 1 {
112 if val > 1 {
113 entry.insert(val - 1);
113 entry.insert(val - 1);
114 break;
114 break;
115 }
115 }
116 entry.remove();
116 entry.remove();
117 }
117 }
118 Entry::Vacant(_) => {
118 Entry::Vacant(_) => {
119 return Err(DirstateMapError::PathNotFound(
119 return Err(DirstateMapError::PathNotFound(
120 path.as_ref().to_owned(),
120 path.as_ref().to_owned(),
121 ))
121 ))
122 }
122 }
123 };
123 };
124 }
124 }
125
125
126 Ok(())
126 Ok(())
127 }
127 }
128
128
129 pub fn contains(&self, key: impl AsRef<HgPath>) -> bool {
129 pub fn contains(&self, key: impl AsRef<HgPath>) -> bool {
130 self.inner.contains_key(key.as_ref())
130 self.inner.contains_key(key.as_ref())
131 }
131 }
132
132
133 pub fn iter(&self) -> DirsMultisetIter {
133 pub fn iter(&self) -> DirsMultisetIter {
134 self.inner.keys()
134 self.inner.keys()
135 }
135 }
136
136
137 pub fn len(&self) -> usize {
137 pub fn len(&self) -> usize {
138 self.inner.len()
138 self.inner.len()
139 }
139 }
140
141 pub fn is_empty(&self) -> bool {
142 self.len() == 0
143 }
140 }
144 }
141
145
142 /// This is basically a reimplementation of `DirsMultiset` that stores the
146 /// This is basically a reimplementation of `DirsMultiset` that stores the
143 /// children instead of just a count of them, plus a small optional
147 /// children instead of just a count of them, plus a small optional
144 /// optimization to avoid some directories we don't need.
148 /// optimization to avoid some directories we don't need.
145 #[derive(PartialEq, Debug)]
149 #[derive(PartialEq, Debug)]
146 pub struct DirsChildrenMultiset<'a> {
150 pub struct DirsChildrenMultiset<'a> {
147 inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
151 inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
148 only_include: Option<HashSet<&'a HgPath>>,
152 only_include: Option<HashSet<&'a HgPath>>,
149 }
153 }
150
154
151 impl<'a> DirsChildrenMultiset<'a> {
155 impl<'a> DirsChildrenMultiset<'a> {
152 pub fn new(
156 pub fn new(
153 paths: impl Iterator<Item = &'a HgPathBuf>,
157 paths: impl Iterator<Item = &'a HgPathBuf>,
154 only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
158 only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
155 ) -> Self {
159 ) -> Self {
156 let mut new = Self {
160 let mut new = Self {
157 inner: HashMap::default(),
161 inner: HashMap::default(),
158 only_include: only_include
162 only_include: only_include
159 .map(|s| s.iter().map(|p| p.as_ref()).collect()),
163 .map(|s| s.iter().map(AsRef::as_ref).collect()),
160 };
164 };
161
165
162 for path in paths {
166 for path in paths {
163 new.add_path(path)
167 new.add_path(path)
164 }
168 }
165
169
166 new
170 new
167 }
171 }
168 fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
172 fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
169 if path.as_ref().is_empty() {
173 if path.as_ref().is_empty() {
170 return;
174 return;
171 }
175 }
172 for (directory, basename) in files::find_dirs_with_base(path.as_ref())
176 for (directory, basename) in files::find_dirs_with_base(path.as_ref())
173 {
177 {
174 if !self.is_dir_included(directory) {
178 if !self.is_dir_included(directory) {
175 continue;
179 continue;
176 }
180 }
177 self.inner
181 self.inner
178 .entry(directory)
182 .entry(directory)
179 .and_modify(|e| {
183 .and_modify(|e| {
180 e.insert(basename);
184 e.insert(basename);
181 })
185 })
182 .or_insert_with(|| {
186 .or_insert_with(|| {
183 let mut set = HashSet::new();
187 let mut set = HashSet::new();
184 set.insert(basename);
188 set.insert(basename);
185 set
189 set
186 });
190 });
187 }
191 }
188 }
192 }
189 fn is_dir_included(&self, dir: impl AsRef<HgPath>) -> bool {
193 fn is_dir_included(&self, dir: impl AsRef<HgPath>) -> bool {
190 match &self.only_include {
194 match &self.only_include {
191 None => false,
195 None => false,
192 Some(i) => i.contains(dir.as_ref()),
196 Some(i) => i.contains(dir.as_ref()),
193 }
197 }
194 }
198 }
195
199
196 pub fn get(
200 pub fn get(
197 &self,
201 &self,
198 path: impl AsRef<HgPath>,
202 path: impl AsRef<HgPath>,
199 ) -> Option<&HashSet<&'a HgPath>> {
203 ) -> Option<&HashSet<&'a HgPath>> {
200 self.inner.get(path.as_ref())
204 self.inner.get(path.as_ref())
201 }
205 }
202 }
206 }
203
207
204 #[cfg(test)]
208 #[cfg(test)]
205 mod tests {
209 mod tests {
206 use super::*;
210 use super::*;
207
211
208 #[test]
212 #[test]
209 fn test_delete_path_path_not_found() {
213 fn test_delete_path_path_not_found() {
210 let manifest: Vec<HgPathBuf> = vec![];
214 let manifest: Vec<HgPathBuf> = vec![];
211 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
215 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
212 let path = HgPathBuf::from_bytes(b"doesnotexist/");
216 let path = HgPathBuf::from_bytes(b"doesnotexist/");
213 assert_eq!(
217 assert_eq!(
214 Err(DirstateMapError::PathNotFound(path.to_owned())),
218 Err(DirstateMapError::PathNotFound(path.to_owned())),
215 map.delete_path(&path)
219 map.delete_path(&path)
216 );
220 );
217 }
221 }
218
222
219 #[test]
223 #[test]
220 fn test_delete_path_empty_path() {
224 fn test_delete_path_empty_path() {
221 let mut map =
225 let mut map =
222 DirsMultiset::from_manifest(&vec![HgPathBuf::new()]).unwrap();
226 DirsMultiset::from_manifest(&vec![HgPathBuf::new()]).unwrap();
223 let path = HgPath::new(b"");
227 let path = HgPath::new(b"");
224 assert_eq!(Ok(()), map.delete_path(path));
228 assert_eq!(Ok(()), map.delete_path(path));
225 assert_eq!(
229 assert_eq!(
226 Err(DirstateMapError::PathNotFound(path.to_owned())),
230 Err(DirstateMapError::PathNotFound(path.to_owned())),
227 map.delete_path(path)
231 map.delete_path(path)
228 );
232 );
229 }
233 }
230
234
231 #[test]
235 #[test]
232 fn test_delete_path_successful() {
236 fn test_delete_path_successful() {
233 let mut map = DirsMultiset {
237 let mut map = DirsMultiset {
234 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
238 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
235 .iter()
239 .iter()
236 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
240 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
237 .collect(),
241 .collect(),
238 };
242 };
239
243
240 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
244 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
241 eprintln!("{:?}", map);
245 eprintln!("{:?}", map);
242 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
246 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
243 eprintln!("{:?}", map);
247 eprintln!("{:?}", map);
244 assert_eq!(
248 assert_eq!(
245 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
249 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
246 b"a/b/"
250 b"a/b/"
247 ))),
251 ))),
248 map.delete_path(HgPath::new(b"a/b/"))
252 map.delete_path(HgPath::new(b"a/b/"))
249 );
253 );
250
254
251 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
255 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
252 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
256 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
253 eprintln!("{:?}", map);
257 eprintln!("{:?}", map);
254 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/")));
258 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/")));
255 eprintln!("{:?}", map);
259 eprintln!("{:?}", map);
256
260
257 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/c/")));
261 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/c/")));
258 assert_eq!(
262 assert_eq!(
259 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
263 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
260 b"a/c/"
264 b"a/c/"
261 ))),
265 ))),
262 map.delete_path(HgPath::new(b"a/c/"))
266 map.delete_path(HgPath::new(b"a/c/"))
263 );
267 );
264 }
268 }
265
269
266 #[test]
270 #[test]
267 fn test_add_path_empty_path() {
271 fn test_add_path_empty_path() {
268 let manifest: Vec<HgPathBuf> = vec![];
272 let manifest: Vec<HgPathBuf> = vec![];
269 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
273 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
270 let path = HgPath::new(b"");
274 let path = HgPath::new(b"");
271 map.add_path(path).unwrap();
275 map.add_path(path).unwrap();
272
276
273 assert_eq!(1, map.len());
277 assert_eq!(1, map.len());
274 }
278 }
275
279
276 #[test]
280 #[test]
277 fn test_add_path_successful() {
281 fn test_add_path_successful() {
278 let manifest: Vec<HgPathBuf> = vec![];
282 let manifest: Vec<HgPathBuf> = vec![];
279 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
283 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
280
284
281 map.add_path(HgPath::new(b"a/")).unwrap();
285 map.add_path(HgPath::new(b"a/")).unwrap();
282 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
286 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
283 assert_eq!(1, *map.inner.get(HgPath::new(b"")).unwrap());
287 assert_eq!(1, *map.inner.get(HgPath::new(b"")).unwrap());
284 assert_eq!(2, map.len());
288 assert_eq!(2, map.len());
285
289
286 // Non directory should be ignored
290 // Non directory should be ignored
287 map.add_path(HgPath::new(b"a")).unwrap();
291 map.add_path(HgPath::new(b"a")).unwrap();
288 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
292 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
289 assert_eq!(2, map.len());
293 assert_eq!(2, map.len());
290
294
291 // Non directory will still add its base
295 // Non directory will still add its base
292 map.add_path(HgPath::new(b"a/b")).unwrap();
296 map.add_path(HgPath::new(b"a/b")).unwrap();
293 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
297 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
294 assert_eq!(2, map.len());
298 assert_eq!(2, map.len());
295
299
296 // Duplicate path works
300 // Duplicate path works
297 map.add_path(HgPath::new(b"a/")).unwrap();
301 map.add_path(HgPath::new(b"a/")).unwrap();
298 assert_eq!(3, *map.inner.get(HgPath::new(b"a")).unwrap());
302 assert_eq!(3, *map.inner.get(HgPath::new(b"a")).unwrap());
299
303
300 // Nested dir adds to its base
304 // Nested dir adds to its base
301 map.add_path(HgPath::new(b"a/b/")).unwrap();
305 map.add_path(HgPath::new(b"a/b/")).unwrap();
302 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
306 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
303 assert_eq!(1, *map.inner.get(HgPath::new(b"a/b")).unwrap());
307 assert_eq!(1, *map.inner.get(HgPath::new(b"a/b")).unwrap());
304
308
305 // but not its base's base, because it already existed
309 // but not its base's base, because it already existed
306 map.add_path(HgPath::new(b"a/b/c/")).unwrap();
310 map.add_path(HgPath::new(b"a/b/c/")).unwrap();
307 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
311 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
308 assert_eq!(2, *map.inner.get(HgPath::new(b"a/b")).unwrap());
312 assert_eq!(2, *map.inner.get(HgPath::new(b"a/b")).unwrap());
309
313
310 map.add_path(HgPath::new(b"a/c/")).unwrap();
314 map.add_path(HgPath::new(b"a/c/")).unwrap();
311 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
315 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
312
316
313 let expected = DirsMultiset {
317 let expected = DirsMultiset {
314 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
318 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
315 .iter()
319 .iter()
316 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
320 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
317 .collect(),
321 .collect(),
318 };
322 };
319 assert_eq!(map, expected);
323 assert_eq!(map, expected);
320 }
324 }
321
325
322 #[test]
326 #[test]
323 fn test_dirsmultiset_new_empty() {
327 fn test_dirsmultiset_new_empty() {
324 let manifest: Vec<HgPathBuf> = vec![];
328 let manifest: Vec<HgPathBuf> = vec![];
325 let new = DirsMultiset::from_manifest(&manifest).unwrap();
329 let new = DirsMultiset::from_manifest(&manifest).unwrap();
326 let expected = DirsMultiset {
330 let expected = DirsMultiset {
327 inner: FastHashMap::default(),
331 inner: FastHashMap::default(),
328 };
332 };
329 assert_eq!(expected, new);
333 assert_eq!(expected, new);
330
334
331 let new = DirsMultiset::from_dirstate(&FastHashMap::default(), None)
335 let new = DirsMultiset::from_dirstate(&FastHashMap::default(), None)
332 .unwrap();
336 .unwrap();
333 let expected = DirsMultiset {
337 let expected = DirsMultiset {
334 inner: FastHashMap::default(),
338 inner: FastHashMap::default(),
335 };
339 };
336 assert_eq!(expected, new);
340 assert_eq!(expected, new);
337 }
341 }
338
342
339 #[test]
343 #[test]
340 fn test_dirsmultiset_new_no_skip() {
344 fn test_dirsmultiset_new_no_skip() {
341 let input_vec: Vec<HgPathBuf> = ["a/", "b/", "a/c", "a/d/"]
345 let input_vec: Vec<HgPathBuf> = ["a/", "b/", "a/c", "a/d/"]
342 .iter()
346 .iter()
343 .map(|e| HgPathBuf::from_bytes(e.as_bytes()))
347 .map(|e| HgPathBuf::from_bytes(e.as_bytes()))
344 .collect();
348 .collect();
345 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
349 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
346 .iter()
350 .iter()
347 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
351 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
348 .collect();
352 .collect();
349
353
350 let new = DirsMultiset::from_manifest(&input_vec).unwrap();
354 let new = DirsMultiset::from_manifest(&input_vec).unwrap();
351 let expected = DirsMultiset {
355 let expected = DirsMultiset {
352 inner: expected_inner,
356 inner: expected_inner,
353 };
357 };
354 assert_eq!(expected, new);
358 assert_eq!(expected, new);
355
359
356 let input_map = ["a/", "b/", "a/c", "a/d/"]
360 let input_map = ["a/", "b/", "a/c", "a/d/"]
357 .iter()
361 .iter()
358 .map(|f| {
362 .map(|f| {
359 (
363 (
360 HgPathBuf::from_bytes(f.as_bytes()),
364 HgPathBuf::from_bytes(f.as_bytes()),
361 DirstateEntry {
365 DirstateEntry {
362 state: EntryState::Normal,
366 state: EntryState::Normal,
363 mode: 0,
367 mode: 0,
364 mtime: 0,
368 mtime: 0,
365 size: 0,
369 size: 0,
366 },
370 },
367 )
371 )
368 })
372 })
369 .collect();
373 .collect();
370 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
374 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
371 .iter()
375 .iter()
372 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
376 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
373 .collect();
377 .collect();
374
378
375 let new = DirsMultiset::from_dirstate(&input_map, None).unwrap();
379 let new = DirsMultiset::from_dirstate(&input_map, None).unwrap();
376 let expected = DirsMultiset {
380 let expected = DirsMultiset {
377 inner: expected_inner,
381 inner: expected_inner,
378 };
382 };
379 assert_eq!(expected, new);
383 assert_eq!(expected, new);
380 }
384 }
381
385
382 #[test]
386 #[test]
383 fn test_dirsmultiset_new_skip() {
387 fn test_dirsmultiset_new_skip() {
384 let input_map = [
388 let input_map = [
385 ("a/", EntryState::Normal),
389 ("a/", EntryState::Normal),
386 ("a/b/", EntryState::Normal),
390 ("a/b/", EntryState::Normal),
387 ("a/c", EntryState::Removed),
391 ("a/c", EntryState::Removed),
388 ("a/d/", EntryState::Merged),
392 ("a/d/", EntryState::Merged),
389 ]
393 ]
390 .iter()
394 .iter()
391 .map(|(f, state)| {
395 .map(|(f, state)| {
392 (
396 (
393 HgPathBuf::from_bytes(f.as_bytes()),
397 HgPathBuf::from_bytes(f.as_bytes()),
394 DirstateEntry {
398 DirstateEntry {
395 state: *state,
399 state: *state,
396 mode: 0,
400 mode: 0,
397 mtime: 0,
401 mtime: 0,
398 size: 0,
402 size: 0,
399 },
403 },
400 )
404 )
401 })
405 })
402 .collect();
406 .collect();
403
407
404 // "a" incremented with "a/c" and "a/d/"
408 // "a" incremented with "a/c" and "a/d/"
405 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
409 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
406 .iter()
410 .iter()
407 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
411 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
408 .collect();
412 .collect();
409
413
410 let new =
414 let new =
411 DirsMultiset::from_dirstate(&input_map, Some(EntryState::Normal))
415 DirsMultiset::from_dirstate(&input_map, Some(EntryState::Normal))
412 .unwrap();
416 .unwrap();
413 let expected = DirsMultiset {
417 let expected = DirsMultiset {
414 inner: expected_inner,
418 inner: expected_inner,
415 };
419 };
416 assert_eq!(expected, new);
420 assert_eq!(expected, new);
417 }
421 }
418 }
422 }
@@ -1,497 +1,497
1 // dirstate_map.rs
1 // dirstate_map.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::{
8 use crate::{
9 dirstate::{parsers::PARENT_SIZE, EntryState, SIZE_FROM_OTHER_PARENT},
9 dirstate::{parsers::PARENT_SIZE, EntryState, SIZE_FROM_OTHER_PARENT},
10 pack_dirstate, parse_dirstate,
10 pack_dirstate, parse_dirstate,
11 utils::{
11 utils::{
12 files::normalize_case,
12 files::normalize_case,
13 hg_path::{HgPath, HgPathBuf},
13 hg_path::{HgPath, HgPathBuf},
14 },
14 },
15 CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateMapError,
15 CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateMapError,
16 DirstateParents, DirstateParseError, FastHashMap, StateMap,
16 DirstateParents, DirstateParseError, FastHashMap, StateMap,
17 };
17 };
18 use core::borrow::Borrow;
18 use core::borrow::Borrow;
19 use std::collections::HashSet;
19 use std::collections::HashSet;
20 use std::convert::TryInto;
20 use std::convert::TryInto;
21 use std::iter::FromIterator;
21 use std::iter::FromIterator;
22 use std::ops::Deref;
22 use std::ops::Deref;
23 use std::time::Duration;
23 use std::time::Duration;
24
24
25 pub type FileFoldMap = FastHashMap<HgPathBuf, HgPathBuf>;
25 pub type FileFoldMap = FastHashMap<HgPathBuf, HgPathBuf>;
26
26
27 const NULL_ID: [u8; 20] = [0; 20];
27 const NULL_ID: [u8; 20] = [0; 20];
28 const MTIME_UNSET: i32 = -1;
28 const MTIME_UNSET: i32 = -1;
29
29
30 #[derive(Default)]
30 #[derive(Default)]
31 pub struct DirstateMap {
31 pub struct DirstateMap {
32 state_map: StateMap,
32 state_map: StateMap,
33 pub copy_map: CopyMap,
33 pub copy_map: CopyMap,
34 file_fold_map: Option<FileFoldMap>,
34 file_fold_map: Option<FileFoldMap>,
35 pub dirs: Option<DirsMultiset>,
35 pub dirs: Option<DirsMultiset>,
36 pub all_dirs: Option<DirsMultiset>,
36 pub all_dirs: Option<DirsMultiset>,
37 non_normal_set: Option<HashSet<HgPathBuf>>,
37 non_normal_set: Option<HashSet<HgPathBuf>>,
38 other_parent_set: Option<HashSet<HgPathBuf>>,
38 other_parent_set: Option<HashSet<HgPathBuf>>,
39 parents: Option<DirstateParents>,
39 parents: Option<DirstateParents>,
40 dirty_parents: bool,
40 dirty_parents: bool,
41 }
41 }
42
42
43 /// Should only really be used in python interface code, for clarity
43 /// Should only really be used in python interface code, for clarity
44 impl Deref for DirstateMap {
44 impl Deref for DirstateMap {
45 type Target = StateMap;
45 type Target = StateMap;
46
46
47 fn deref(&self) -> &Self::Target {
47 fn deref(&self) -> &Self::Target {
48 &self.state_map
48 &self.state_map
49 }
49 }
50 }
50 }
51
51
52 impl FromIterator<(HgPathBuf, DirstateEntry)> for DirstateMap {
52 impl FromIterator<(HgPathBuf, DirstateEntry)> for DirstateMap {
53 fn from_iter<I: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(
53 fn from_iter<I: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(
54 iter: I,
54 iter: I,
55 ) -> Self {
55 ) -> Self {
56 Self {
56 Self {
57 state_map: iter.into_iter().collect(),
57 state_map: iter.into_iter().collect(),
58 ..Self::default()
58 ..Self::default()
59 }
59 }
60 }
60 }
61 }
61 }
62
62
63 impl DirstateMap {
63 impl DirstateMap {
64 pub fn new() -> Self {
64 pub fn new() -> Self {
65 Self::default()
65 Self::default()
66 }
66 }
67
67
68 pub fn clear(&mut self) {
68 pub fn clear(&mut self) {
69 self.state_map.clear();
69 self.state_map.clear();
70 self.copy_map.clear();
70 self.copy_map.clear();
71 self.file_fold_map = None;
71 self.file_fold_map = None;
72 self.non_normal_set = None;
72 self.non_normal_set = None;
73 self.other_parent_set = None;
73 self.other_parent_set = None;
74 self.set_parents(&DirstateParents {
74 self.set_parents(&DirstateParents {
75 p1: NULL_ID,
75 p1: NULL_ID,
76 p2: NULL_ID,
76 p2: NULL_ID,
77 })
77 })
78 }
78 }
79
79
80 /// Add a tracked file to the dirstate
80 /// Add a tracked file to the dirstate
81 pub fn add_file(
81 pub fn add_file(
82 &mut self,
82 &mut self,
83 filename: &HgPath,
83 filename: &HgPath,
84 old_state: EntryState,
84 old_state: EntryState,
85 entry: DirstateEntry,
85 entry: DirstateEntry,
86 ) -> Result<(), DirstateMapError> {
86 ) -> Result<(), DirstateMapError> {
87 if old_state == EntryState::Unknown || old_state == EntryState::Removed
87 if old_state == EntryState::Unknown || old_state == EntryState::Removed
88 {
88 {
89 if let Some(ref mut dirs) = self.dirs {
89 if let Some(ref mut dirs) = self.dirs {
90 dirs.add_path(filename)?;
90 dirs.add_path(filename)?;
91 }
91 }
92 }
92 }
93 if old_state == EntryState::Unknown {
93 if old_state == EntryState::Unknown {
94 if let Some(ref mut all_dirs) = self.all_dirs {
94 if let Some(ref mut all_dirs) = self.all_dirs {
95 all_dirs.add_path(filename)?;
95 all_dirs.add_path(filename)?;
96 }
96 }
97 }
97 }
98 self.state_map.insert(filename.to_owned(), entry.to_owned());
98 self.state_map.insert(filename.to_owned(), entry.to_owned());
99
99
100 if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
100 if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
101 self.get_non_normal_other_parent_entries()
101 self.get_non_normal_other_parent_entries()
102 .0
102 .0
103 .insert(filename.to_owned());
103 .insert(filename.to_owned());
104 }
104 }
105
105
106 if entry.size == SIZE_FROM_OTHER_PARENT {
106 if entry.size == SIZE_FROM_OTHER_PARENT {
107 self.get_non_normal_other_parent_entries()
107 self.get_non_normal_other_parent_entries()
108 .1
108 .1
109 .insert(filename.to_owned());
109 .insert(filename.to_owned());
110 }
110 }
111 Ok(())
111 Ok(())
112 }
112 }
113
113
114 /// Mark a file as removed in the dirstate.
114 /// Mark a file as removed in the dirstate.
115 ///
115 ///
116 /// The `size` parameter is used to store sentinel values that indicate
116 /// The `size` parameter is used to store sentinel values that indicate
117 /// the file's previous state. In the future, we should refactor this
117 /// the file's previous state. In the future, we should refactor this
118 /// to be more explicit about what that state is.
118 /// to be more explicit about what that state is.
119 pub fn remove_file(
119 pub fn remove_file(
120 &mut self,
120 &mut self,
121 filename: &HgPath,
121 filename: &HgPath,
122 old_state: EntryState,
122 old_state: EntryState,
123 size: i32,
123 size: i32,
124 ) -> Result<(), DirstateMapError> {
124 ) -> Result<(), DirstateMapError> {
125 if old_state != EntryState::Unknown && old_state != EntryState::Removed
125 if old_state != EntryState::Unknown && old_state != EntryState::Removed
126 {
126 {
127 if let Some(ref mut dirs) = self.dirs {
127 if let Some(ref mut dirs) = self.dirs {
128 dirs.delete_path(filename)?;
128 dirs.delete_path(filename)?;
129 }
129 }
130 }
130 }
131 if old_state == EntryState::Unknown {
131 if old_state == EntryState::Unknown {
132 if let Some(ref mut all_dirs) = self.all_dirs {
132 if let Some(ref mut all_dirs) = self.all_dirs {
133 all_dirs.add_path(filename)?;
133 all_dirs.add_path(filename)?;
134 }
134 }
135 }
135 }
136
136
137 if let Some(ref mut file_fold_map) = self.file_fold_map {
137 if let Some(ref mut file_fold_map) = self.file_fold_map {
138 file_fold_map.remove(&normalize_case(filename));
138 file_fold_map.remove(&normalize_case(filename));
139 }
139 }
140 self.state_map.insert(
140 self.state_map.insert(
141 filename.to_owned(),
141 filename.to_owned(),
142 DirstateEntry {
142 DirstateEntry {
143 state: EntryState::Removed,
143 state: EntryState::Removed,
144 mode: 0,
144 mode: 0,
145 size,
145 size,
146 mtime: 0,
146 mtime: 0,
147 },
147 },
148 );
148 );
149 self.get_non_normal_other_parent_entries()
149 self.get_non_normal_other_parent_entries()
150 .0
150 .0
151 .insert(filename.to_owned());
151 .insert(filename.to_owned());
152 Ok(())
152 Ok(())
153 }
153 }
154
154
155 /// Remove a file from the dirstate.
155 /// Remove a file from the dirstate.
156 /// Returns `true` if the file was previously recorded.
156 /// Returns `true` if the file was previously recorded.
157 pub fn drop_file(
157 pub fn drop_file(
158 &mut self,
158 &mut self,
159 filename: &HgPath,
159 filename: &HgPath,
160 old_state: EntryState,
160 old_state: EntryState,
161 ) -> Result<bool, DirstateMapError> {
161 ) -> Result<bool, DirstateMapError> {
162 let exists = self.state_map.remove(filename).is_some();
162 let exists = self.state_map.remove(filename).is_some();
163
163
164 if exists {
164 if exists {
165 if old_state != EntryState::Removed {
165 if old_state != EntryState::Removed {
166 if let Some(ref mut dirs) = self.dirs {
166 if let Some(ref mut dirs) = self.dirs {
167 dirs.delete_path(filename)?;
167 dirs.delete_path(filename)?;
168 }
168 }
169 }
169 }
170 if let Some(ref mut all_dirs) = self.all_dirs {
170 if let Some(ref mut all_dirs) = self.all_dirs {
171 all_dirs.delete_path(filename)?;
171 all_dirs.delete_path(filename)?;
172 }
172 }
173 }
173 }
174 if let Some(ref mut file_fold_map) = self.file_fold_map {
174 if let Some(ref mut file_fold_map) = self.file_fold_map {
175 file_fold_map.remove(&normalize_case(filename));
175 file_fold_map.remove(&normalize_case(filename));
176 }
176 }
177 self.get_non_normal_other_parent_entries()
177 self.get_non_normal_other_parent_entries()
178 .0
178 .0
179 .remove(filename);
179 .remove(filename);
180
180
181 Ok(exists)
181 Ok(exists)
182 }
182 }
183
183
184 pub fn clear_ambiguous_times(
184 pub fn clear_ambiguous_times(
185 &mut self,
185 &mut self,
186 filenames: Vec<HgPathBuf>,
186 filenames: Vec<HgPathBuf>,
187 now: i32,
187 now: i32,
188 ) {
188 ) {
189 for filename in filenames {
189 for filename in filenames {
190 let mut changed = false;
190 let mut changed = false;
191 self.state_map
191 self.state_map
192 .entry(filename.to_owned())
192 .entry(filename.to_owned())
193 .and_modify(|entry| {
193 .and_modify(|entry| {
194 if entry.state == EntryState::Normal && entry.mtime == now
194 if entry.state == EntryState::Normal && entry.mtime == now
195 {
195 {
196 changed = true;
196 changed = true;
197 *entry = DirstateEntry {
197 *entry = DirstateEntry {
198 mtime: MTIME_UNSET,
198 mtime: MTIME_UNSET,
199 ..*entry
199 ..*entry
200 };
200 };
201 }
201 }
202 });
202 });
203 if changed {
203 if changed {
204 self.get_non_normal_other_parent_entries()
204 self.get_non_normal_other_parent_entries()
205 .0
205 .0
206 .insert(filename.to_owned());
206 .insert(filename.to_owned());
207 }
207 }
208 }
208 }
209 }
209 }
210
210
211 pub fn non_normal_entries_remove(
211 pub fn non_normal_entries_remove(
212 &mut self,
212 &mut self,
213 key: impl AsRef<HgPath>,
213 key: impl AsRef<HgPath>,
214 ) -> bool {
214 ) -> bool {
215 self.get_non_normal_other_parent_entries()
215 self.get_non_normal_other_parent_entries()
216 .0
216 .0
217 .remove(key.as_ref())
217 .remove(key.as_ref())
218 }
218 }
219 pub fn non_normal_entries_union(
219 pub fn non_normal_entries_union(
220 &mut self,
220 &mut self,
221 other: HashSet<HgPathBuf>,
221 other: HashSet<HgPathBuf>,
222 ) -> Vec<HgPathBuf> {
222 ) -> Vec<HgPathBuf> {
223 self.get_non_normal_other_parent_entries()
223 self.get_non_normal_other_parent_entries()
224 .0
224 .0
225 .union(&other)
225 .union(&other)
226 .map(|e| e.to_owned())
226 .map(ToOwned::to_owned)
227 .collect()
227 .collect()
228 }
228 }
229
229
230 pub fn get_non_normal_other_parent_entries(
230 pub fn get_non_normal_other_parent_entries(
231 &mut self,
231 &mut self,
232 ) -> (&mut HashSet<HgPathBuf>, &mut HashSet<HgPathBuf>) {
232 ) -> (&mut HashSet<HgPathBuf>, &mut HashSet<HgPathBuf>) {
233 self.set_non_normal_other_parent_entries(false);
233 self.set_non_normal_other_parent_entries(false);
234 (
234 (
235 self.non_normal_set.as_mut().unwrap(),
235 self.non_normal_set.as_mut().unwrap(),
236 self.other_parent_set.as_mut().unwrap(),
236 self.other_parent_set.as_mut().unwrap(),
237 )
237 )
238 }
238 }
239
239
240 /// Useful to get immutable references to those sets in contexts where
240 /// Useful to get immutable references to those sets in contexts where
241 /// you only have an immutable reference to the `DirstateMap`, like when
241 /// you only have an immutable reference to the `DirstateMap`, like when
242 /// sharing references with Python.
242 /// sharing references with Python.
243 ///
243 ///
244 /// TODO, get rid of this along with the other "setter/getter" stuff when
244 /// TODO, get rid of this along with the other "setter/getter" stuff when
245 /// a nice typestate plan is defined.
245 /// a nice typestate plan is defined.
246 ///
246 ///
247 /// # Panics
247 /// # Panics
248 ///
248 ///
249 /// Will panic if either set is `None`.
249 /// Will panic if either set is `None`.
250 pub fn get_non_normal_other_parent_entries_panic(
250 pub fn get_non_normal_other_parent_entries_panic(
251 &self,
251 &self,
252 ) -> (&HashSet<HgPathBuf>, &HashSet<HgPathBuf>) {
252 ) -> (&HashSet<HgPathBuf>, &HashSet<HgPathBuf>) {
253 (
253 (
254 self.non_normal_set.as_ref().unwrap(),
254 self.non_normal_set.as_ref().unwrap(),
255 self.other_parent_set.as_ref().unwrap(),
255 self.other_parent_set.as_ref().unwrap(),
256 )
256 )
257 }
257 }
258
258
259 pub fn set_non_normal_other_parent_entries(&mut self, force: bool) {
259 pub fn set_non_normal_other_parent_entries(&mut self, force: bool) {
260 if !force
260 if !force
261 && self.non_normal_set.is_some()
261 && self.non_normal_set.is_some()
262 && self.other_parent_set.is_some()
262 && self.other_parent_set.is_some()
263 {
263 {
264 return;
264 return;
265 }
265 }
266 let mut non_normal = HashSet::new();
266 let mut non_normal = HashSet::new();
267 let mut other_parent = HashSet::new();
267 let mut other_parent = HashSet::new();
268
268
269 for (
269 for (
270 filename,
270 filename,
271 DirstateEntry {
271 DirstateEntry {
272 state, size, mtime, ..
272 state, size, mtime, ..
273 },
273 },
274 ) in self.state_map.iter()
274 ) in self.state_map.iter()
275 {
275 {
276 if *state != EntryState::Normal || *mtime == MTIME_UNSET {
276 if *state != EntryState::Normal || *mtime == MTIME_UNSET {
277 non_normal.insert(filename.to_owned());
277 non_normal.insert(filename.to_owned());
278 }
278 }
279 if *state == EntryState::Normal && *size == SIZE_FROM_OTHER_PARENT
279 if *state == EntryState::Normal && *size == SIZE_FROM_OTHER_PARENT
280 {
280 {
281 other_parent.insert(filename.to_owned());
281 other_parent.insert(filename.to_owned());
282 }
282 }
283 }
283 }
284 self.non_normal_set = Some(non_normal);
284 self.non_normal_set = Some(non_normal);
285 self.other_parent_set = Some(other_parent);
285 self.other_parent_set = Some(other_parent);
286 }
286 }
287
287
288 /// Both of these setters and their uses appear to be the simplest way to
288 /// Both of these setters and their uses appear to be the simplest way to
289 /// emulate a Python lazy property, but it is ugly and unidiomatic.
289 /// emulate a Python lazy property, but it is ugly and unidiomatic.
290 /// TODO One day, rewriting this struct using the typestate might be a
290 /// TODO One day, rewriting this struct using the typestate might be a
291 /// good idea.
291 /// good idea.
292 pub fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
292 pub fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
293 if self.all_dirs.is_none() {
293 if self.all_dirs.is_none() {
294 self.all_dirs =
294 self.all_dirs =
295 Some(DirsMultiset::from_dirstate(&self.state_map, None)?);
295 Some(DirsMultiset::from_dirstate(&self.state_map, None)?);
296 }
296 }
297 Ok(())
297 Ok(())
298 }
298 }
299
299
300 pub fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
300 pub fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
301 if self.dirs.is_none() {
301 if self.dirs.is_none() {
302 self.dirs = Some(DirsMultiset::from_dirstate(
302 self.dirs = Some(DirsMultiset::from_dirstate(
303 &self.state_map,
303 &self.state_map,
304 Some(EntryState::Removed),
304 Some(EntryState::Removed),
305 )?);
305 )?);
306 }
306 }
307 Ok(())
307 Ok(())
308 }
308 }
309
309
310 pub fn has_tracked_dir(
310 pub fn has_tracked_dir(
311 &mut self,
311 &mut self,
312 directory: &HgPath,
312 directory: &HgPath,
313 ) -> Result<bool, DirstateMapError> {
313 ) -> Result<bool, DirstateMapError> {
314 self.set_dirs()?;
314 self.set_dirs()?;
315 Ok(self.dirs.as_ref().unwrap().contains(directory))
315 Ok(self.dirs.as_ref().unwrap().contains(directory))
316 }
316 }
317
317
318 pub fn has_dir(
318 pub fn has_dir(
319 &mut self,
319 &mut self,
320 directory: &HgPath,
320 directory: &HgPath,
321 ) -> Result<bool, DirstateMapError> {
321 ) -> Result<bool, DirstateMapError> {
322 self.set_all_dirs()?;
322 self.set_all_dirs()?;
323 Ok(self.all_dirs.as_ref().unwrap().contains(directory))
323 Ok(self.all_dirs.as_ref().unwrap().contains(directory))
324 }
324 }
325
325
326 pub fn parents(
326 pub fn parents(
327 &mut self,
327 &mut self,
328 file_contents: &[u8],
328 file_contents: &[u8],
329 ) -> Result<&DirstateParents, DirstateError> {
329 ) -> Result<&DirstateParents, DirstateError> {
330 if let Some(ref parents) = self.parents {
330 if let Some(ref parents) = self.parents {
331 return Ok(parents);
331 return Ok(parents);
332 }
332 }
333 let parents;
333 let parents;
334 if file_contents.len() == PARENT_SIZE * 2 {
334 if file_contents.len() == PARENT_SIZE * 2 {
335 parents = DirstateParents {
335 parents = DirstateParents {
336 p1: file_contents[..PARENT_SIZE].try_into().unwrap(),
336 p1: file_contents[..PARENT_SIZE].try_into().unwrap(),
337 p2: file_contents[PARENT_SIZE..PARENT_SIZE * 2]
337 p2: file_contents[PARENT_SIZE..PARENT_SIZE * 2]
338 .try_into()
338 .try_into()
339 .unwrap(),
339 .unwrap(),
340 };
340 };
341 } else if file_contents.is_empty() {
341 } else if file_contents.is_empty() {
342 parents = DirstateParents {
342 parents = DirstateParents {
343 p1: NULL_ID,
343 p1: NULL_ID,
344 p2: NULL_ID,
344 p2: NULL_ID,
345 };
345 };
346 } else {
346 } else {
347 return Err(DirstateError::Parse(DirstateParseError::Damaged));
347 return Err(DirstateError::Parse(DirstateParseError::Damaged));
348 }
348 }
349
349
350 self.parents = Some(parents);
350 self.parents = Some(parents);
351 Ok(self.parents.as_ref().unwrap())
351 Ok(self.parents.as_ref().unwrap())
352 }
352 }
353
353
354 pub fn set_parents(&mut self, parents: &DirstateParents) {
354 pub fn set_parents(&mut self, parents: &DirstateParents) {
355 self.parents = Some(parents.clone());
355 self.parents = Some(parents.clone());
356 self.dirty_parents = true;
356 self.dirty_parents = true;
357 }
357 }
358
358
359 pub fn read(
359 pub fn read(
360 &mut self,
360 &mut self,
361 file_contents: &[u8],
361 file_contents: &[u8],
362 ) -> Result<Option<DirstateParents>, DirstateError> {
362 ) -> Result<Option<DirstateParents>, DirstateError> {
363 if file_contents.is_empty() {
363 if file_contents.is_empty() {
364 return Ok(None);
364 return Ok(None);
365 }
365 }
366
366
367 let parents = parse_dirstate(
367 let parents = parse_dirstate(
368 &mut self.state_map,
368 &mut self.state_map,
369 &mut self.copy_map,
369 &mut self.copy_map,
370 file_contents,
370 file_contents,
371 )?;
371 )?;
372
372
373 if !self.dirty_parents {
373 if !self.dirty_parents {
374 self.set_parents(&parents);
374 self.set_parents(&parents);
375 }
375 }
376
376
377 Ok(Some(parents))
377 Ok(Some(parents))
378 }
378 }
379
379
380 pub fn pack(
380 pub fn pack(
381 &mut self,
381 &mut self,
382 parents: DirstateParents,
382 parents: DirstateParents,
383 now: Duration,
383 now: Duration,
384 ) -> Result<Vec<u8>, DirstateError> {
384 ) -> Result<Vec<u8>, DirstateError> {
385 let packed =
385 let packed =
386 pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
386 pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
387
387
388 self.dirty_parents = false;
388 self.dirty_parents = false;
389
389
390 self.set_non_normal_other_parent_entries(true);
390 self.set_non_normal_other_parent_entries(true);
391 Ok(packed)
391 Ok(packed)
392 }
392 }
393
393
394 pub fn build_file_fold_map(&mut self) -> &FileFoldMap {
394 pub fn build_file_fold_map(&mut self) -> &FileFoldMap {
395 if let Some(ref file_fold_map) = self.file_fold_map {
395 if let Some(ref file_fold_map) = self.file_fold_map {
396 return file_fold_map;
396 return file_fold_map;
397 }
397 }
398 let mut new_file_fold_map = FileFoldMap::default();
398 let mut new_file_fold_map = FileFoldMap::default();
399 for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
399 for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
400 {
400 {
401 if *state == EntryState::Removed {
401 if *state == EntryState::Removed {
402 new_file_fold_map
402 new_file_fold_map
403 .insert(normalize_case(filename), filename.to_owned());
403 .insert(normalize_case(filename), filename.to_owned());
404 }
404 }
405 }
405 }
406 self.file_fold_map = Some(new_file_fold_map);
406 self.file_fold_map = Some(new_file_fold_map);
407 self.file_fold_map.as_ref().unwrap()
407 self.file_fold_map.as_ref().unwrap()
408 }
408 }
409 }
409 }
410
410
411 #[cfg(test)]
411 #[cfg(test)]
412 mod tests {
412 mod tests {
413 use super::*;
413 use super::*;
414
414
415 #[test]
415 #[test]
416 fn test_dirs_multiset() {
416 fn test_dirs_multiset() {
417 let mut map = DirstateMap::new();
417 let mut map = DirstateMap::new();
418 assert!(map.dirs.is_none());
418 assert!(map.dirs.is_none());
419 assert!(map.all_dirs.is_none());
419 assert!(map.all_dirs.is_none());
420
420
421 assert_eq!(map.has_dir(HgPath::new(b"nope")).unwrap(), false);
421 assert_eq!(map.has_dir(HgPath::new(b"nope")).unwrap(), false);
422 assert!(map.all_dirs.is_some());
422 assert!(map.all_dirs.is_some());
423 assert!(map.dirs.is_none());
423 assert!(map.dirs.is_none());
424
424
425 assert_eq!(map.has_tracked_dir(HgPath::new(b"nope")).unwrap(), false);
425 assert_eq!(map.has_tracked_dir(HgPath::new(b"nope")).unwrap(), false);
426 assert!(map.dirs.is_some());
426 assert!(map.dirs.is_some());
427 }
427 }
428
428
429 #[test]
429 #[test]
430 fn test_add_file() {
430 fn test_add_file() {
431 let mut map = DirstateMap::new();
431 let mut map = DirstateMap::new();
432
432
433 assert_eq!(0, map.len());
433 assert_eq!(0, map.len());
434
434
435 map.add_file(
435 map.add_file(
436 HgPath::new(b"meh"),
436 HgPath::new(b"meh"),
437 EntryState::Normal,
437 EntryState::Normal,
438 DirstateEntry {
438 DirstateEntry {
439 state: EntryState::Normal,
439 state: EntryState::Normal,
440 mode: 1337,
440 mode: 1337,
441 mtime: 1337,
441 mtime: 1337,
442 size: 1337,
442 size: 1337,
443 },
443 },
444 )
444 )
445 .unwrap();
445 .unwrap();
446
446
447 assert_eq!(1, map.len());
447 assert_eq!(1, map.len());
448 assert_eq!(0, map.get_non_normal_other_parent_entries().0.len());
448 assert_eq!(0, map.get_non_normal_other_parent_entries().0.len());
449 assert_eq!(0, map.get_non_normal_other_parent_entries().1.len());
449 assert_eq!(0, map.get_non_normal_other_parent_entries().1.len());
450 }
450 }
451
451
452 #[test]
452 #[test]
453 fn test_non_normal_other_parent_entries() {
453 fn test_non_normal_other_parent_entries() {
454 let mut map: DirstateMap = [
454 let mut map: DirstateMap = [
455 (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
455 (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
456 (b"f2", (EntryState::Normal, 1337, 1337, -1)),
456 (b"f2", (EntryState::Normal, 1337, 1337, -1)),
457 (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
457 (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
458 (b"f4", (EntryState::Normal, 1337, -2, 1337)),
458 (b"f4", (EntryState::Normal, 1337, -2, 1337)),
459 (b"f5", (EntryState::Added, 1337, 1337, 1337)),
459 (b"f5", (EntryState::Added, 1337, 1337, 1337)),
460 (b"f6", (EntryState::Added, 1337, 1337, -1)),
460 (b"f6", (EntryState::Added, 1337, 1337, -1)),
461 (b"f7", (EntryState::Merged, 1337, 1337, -1)),
461 (b"f7", (EntryState::Merged, 1337, 1337, -1)),
462 (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
462 (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
463 (b"f9", (EntryState::Merged, 1337, -2, 1337)),
463 (b"f9", (EntryState::Merged, 1337, -2, 1337)),
464 (b"fa", (EntryState::Added, 1337, -2, 1337)),
464 (b"fa", (EntryState::Added, 1337, -2, 1337)),
465 (b"fb", (EntryState::Removed, 1337, -2, 1337)),
465 (b"fb", (EntryState::Removed, 1337, -2, 1337)),
466 ]
466 ]
467 .iter()
467 .iter()
468 .map(|(fname, (state, mode, size, mtime))| {
468 .map(|(fname, (state, mode, size, mtime))| {
469 (
469 (
470 HgPathBuf::from_bytes(fname.as_ref()),
470 HgPathBuf::from_bytes(fname.as_ref()),
471 DirstateEntry {
471 DirstateEntry {
472 state: *state,
472 state: *state,
473 mode: *mode,
473 mode: *mode,
474 size: *size,
474 size: *size,
475 mtime: *mtime,
475 mtime: *mtime,
476 },
476 },
477 )
477 )
478 })
478 })
479 .collect();
479 .collect();
480
480
481 let mut non_normal = [
481 let mut non_normal = [
482 b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
482 b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
483 ]
483 ]
484 .iter()
484 .iter()
485 .map(|x| HgPathBuf::from_bytes(x.as_ref()))
485 .map(|x| HgPathBuf::from_bytes(x.as_ref()))
486 .collect();
486 .collect();
487
487
488 let mut other_parent = HashSet::new();
488 let mut other_parent = HashSet::new();
489 other_parent.insert(HgPathBuf::from_bytes(b"f4"));
489 other_parent.insert(HgPathBuf::from_bytes(b"f4"));
490 let entries = map.get_non_normal_other_parent_entries();
490 let entries = map.get_non_normal_other_parent_entries();
491
491
492 assert_eq!(
492 assert_eq!(
493 (&mut non_normal, &mut other_parent),
493 (&mut non_normal, &mut other_parent),
494 (entries.0, entries.1)
494 (entries.0, entries.1)
495 );
495 );
496 }
496 }
497 }
497 }
@@ -1,435 +1,435
1 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
1 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5
5
6 use crate::utils::hg_path::HgPath;
6 use crate::utils::hg_path::HgPath;
7 use crate::{
7 use crate::{
8 dirstate::{CopyMap, EntryState, StateMap},
8 dirstate::{CopyMap, EntryState, StateMap},
9 DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
9 DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
10 };
10 };
11 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
11 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
12 use micro_timer::timed;
12 use micro_timer::timed;
13 use std::convert::{TryFrom, TryInto};
13 use std::convert::{TryFrom, TryInto};
14 use std::io::Cursor;
14 use std::io::Cursor;
15 use std::time::Duration;
15 use std::time::Duration;
16
16
17 /// Parents are stored in the dirstate as byte hashes.
17 /// Parents are stored in the dirstate as byte hashes.
18 pub const PARENT_SIZE: usize = 20;
18 pub const PARENT_SIZE: usize = 20;
19 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
19 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
20 const MIN_ENTRY_SIZE: usize = 17;
20 const MIN_ENTRY_SIZE: usize = 17;
21
21
22 // TODO parse/pack: is mutate-on-loop better for performance?
22 // TODO parse/pack: is mutate-on-loop better for performance?
23
23
24 #[timed]
24 #[timed]
25 pub fn parse_dirstate(
25 pub fn parse_dirstate(
26 state_map: &mut StateMap,
26 state_map: &mut StateMap,
27 copy_map: &mut CopyMap,
27 copy_map: &mut CopyMap,
28 contents: &[u8],
28 contents: &[u8],
29 ) -> Result<DirstateParents, DirstateParseError> {
29 ) -> Result<DirstateParents, DirstateParseError> {
30 if contents.len() < PARENT_SIZE * 2 {
30 if contents.len() < PARENT_SIZE * 2 {
31 return Err(DirstateParseError::TooLittleData);
31 return Err(DirstateParseError::TooLittleData);
32 }
32 }
33
33
34 let mut curr_pos = PARENT_SIZE * 2;
34 let mut curr_pos = PARENT_SIZE * 2;
35 let parents = DirstateParents {
35 let parents = DirstateParents {
36 p1: contents[..PARENT_SIZE].try_into().unwrap(),
36 p1: contents[..PARENT_SIZE].try_into().unwrap(),
37 p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(),
37 p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(),
38 };
38 };
39
39
40 while curr_pos < contents.len() {
40 while curr_pos < contents.len() {
41 if curr_pos + MIN_ENTRY_SIZE > contents.len() {
41 if curr_pos + MIN_ENTRY_SIZE > contents.len() {
42 return Err(DirstateParseError::Overflow);
42 return Err(DirstateParseError::Overflow);
43 }
43 }
44 let entry_bytes = &contents[curr_pos..];
44 let entry_bytes = &contents[curr_pos..];
45
45
46 let mut cursor = Cursor::new(entry_bytes);
46 let mut cursor = Cursor::new(entry_bytes);
47 let state = EntryState::try_from(cursor.read_u8()?)?;
47 let state = EntryState::try_from(cursor.read_u8()?)?;
48 let mode = cursor.read_i32::<BigEndian>()?;
48 let mode = cursor.read_i32::<BigEndian>()?;
49 let size = cursor.read_i32::<BigEndian>()?;
49 let size = cursor.read_i32::<BigEndian>()?;
50 let mtime = cursor.read_i32::<BigEndian>()?;
50 let mtime = cursor.read_i32::<BigEndian>()?;
51 let path_len = cursor.read_i32::<BigEndian>()? as usize;
51 let path_len = cursor.read_i32::<BigEndian>()? as usize;
52
52
53 if path_len > contents.len() - curr_pos {
53 if path_len > contents.len() - curr_pos {
54 return Err(DirstateParseError::Overflow);
54 return Err(DirstateParseError::Overflow);
55 }
55 }
56
56
57 // Slice instead of allocating a Vec needed for `read_exact`
57 // Slice instead of allocating a Vec needed for `read_exact`
58 let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)];
58 let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)];
59
59
60 let (path, copy) = match memchr::memchr(0, path) {
60 let (path, copy) = match memchr::memchr(0, path) {
61 None => (path, None),
61 None => (path, None),
62 Some(i) => (&path[..i], Some(&path[(i + 1)..])),
62 Some(i) => (&path[..i], Some(&path[(i + 1)..])),
63 };
63 };
64
64
65 if let Some(copy_path) = copy {
65 if let Some(copy_path) = copy {
66 copy_map.insert(
66 copy_map.insert(
67 HgPath::new(path).to_owned(),
67 HgPath::new(path).to_owned(),
68 HgPath::new(copy_path).to_owned(),
68 HgPath::new(copy_path).to_owned(),
69 );
69 );
70 };
70 };
71 state_map.insert(
71 state_map.insert(
72 HgPath::new(path).to_owned(),
72 HgPath::new(path).to_owned(),
73 DirstateEntry {
73 DirstateEntry {
74 state,
74 state,
75 mode,
75 mode,
76 size,
76 size,
77 mtime,
77 mtime,
78 },
78 },
79 );
79 );
80 curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
80 curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
81 }
81 }
82
82
83 Ok(parents)
83 Ok(parents)
84 }
84 }
85
85
86 /// `now` is the duration in seconds since the Unix epoch
86 /// `now` is the duration in seconds since the Unix epoch
87 pub fn pack_dirstate(
87 pub fn pack_dirstate(
88 state_map: &mut StateMap,
88 state_map: &mut StateMap,
89 copy_map: &CopyMap,
89 copy_map: &CopyMap,
90 parents: DirstateParents,
90 parents: DirstateParents,
91 now: Duration,
91 now: Duration,
92 ) -> Result<Vec<u8>, DirstatePackError> {
92 ) -> Result<Vec<u8>, DirstatePackError> {
93 // TODO move away from i32 before 2038.
93 // TODO move away from i32 before 2038.
94 let now: i32 = now.as_secs().try_into().expect("time overflow");
94 let now: i32 = now.as_secs().try_into().expect("time overflow");
95
95
96 let expected_size: usize = state_map
96 let expected_size: usize = state_map
97 .iter()
97 .iter()
98 .map(|(filename, _)| {
98 .map(|(filename, _)| {
99 let mut length = MIN_ENTRY_SIZE + filename.len();
99 let mut length = MIN_ENTRY_SIZE + filename.len();
100 if let Some(copy) = copy_map.get(filename) {
100 if let Some(copy) = copy_map.get(filename) {
101 length += copy.len() + 1;
101 length += copy.len() + 1;
102 }
102 }
103 length
103 length
104 })
104 })
105 .sum();
105 .sum();
106 let expected_size = expected_size + PARENT_SIZE * 2;
106 let expected_size = expected_size + PARENT_SIZE * 2;
107
107
108 let mut packed = Vec::with_capacity(expected_size);
108 let mut packed = Vec::with_capacity(expected_size);
109 let mut new_state_map = vec![];
109 let mut new_state_map = vec![];
110
110
111 packed.extend(&parents.p1);
111 packed.extend(&parents.p1);
112 packed.extend(&parents.p2);
112 packed.extend(&parents.p2);
113
113
114 for (filename, entry) in state_map.iter() {
114 for (filename, entry) in state_map.iter() {
115 let new_filename = filename.to_owned();
115 let new_filename = filename.to_owned();
116 let mut new_mtime: i32 = entry.mtime;
116 let mut new_mtime: i32 = entry.mtime;
117 if entry.state == EntryState::Normal && entry.mtime == now {
117 if entry.state == EntryState::Normal && entry.mtime == now {
118 // The file was last modified "simultaneously" with the current
118 // The file was last modified "simultaneously" with the current
119 // write to dirstate (i.e. within the same second for file-
119 // write to dirstate (i.e. within the same second for file-
120 // systems with a granularity of 1 sec). This commonly happens
120 // systems with a granularity of 1 sec). This commonly happens
121 // for at least a couple of files on 'update'.
121 // for at least a couple of files on 'update'.
122 // The user could change the file without changing its size
122 // The user could change the file without changing its size
123 // within the same second. Invalidate the file's mtime in
123 // within the same second. Invalidate the file's mtime in
124 // dirstate, forcing future 'status' calls to compare the
124 // dirstate, forcing future 'status' calls to compare the
125 // contents of the file if the size is the same. This prevents
125 // contents of the file if the size is the same. This prevents
126 // mistakenly treating such files as clean.
126 // mistakenly treating such files as clean.
127 new_mtime = -1;
127 new_mtime = -1;
128 new_state_map.push((
128 new_state_map.push((
129 filename.to_owned(),
129 filename.to_owned(),
130 DirstateEntry {
130 DirstateEntry {
131 mtime: new_mtime,
131 mtime: new_mtime,
132 ..*entry
132 ..*entry
133 },
133 },
134 ));
134 ));
135 }
135 }
136 let mut new_filename = new_filename.into_vec();
136 let mut new_filename = new_filename.into_vec();
137 if let Some(copy) = copy_map.get(filename) {
137 if let Some(copy) = copy_map.get(filename) {
138 new_filename.push('\0' as u8);
138 new_filename.push(b'\0');
139 new_filename.extend(copy.bytes());
139 new_filename.extend(copy.bytes());
140 }
140 }
141
141
142 packed.write_u8(entry.state.into())?;
142 packed.write_u8(entry.state.into())?;
143 packed.write_i32::<BigEndian>(entry.mode)?;
143 packed.write_i32::<BigEndian>(entry.mode)?;
144 packed.write_i32::<BigEndian>(entry.size)?;
144 packed.write_i32::<BigEndian>(entry.size)?;
145 packed.write_i32::<BigEndian>(new_mtime)?;
145 packed.write_i32::<BigEndian>(new_mtime)?;
146 packed.write_i32::<BigEndian>(new_filename.len() as i32)?;
146 packed.write_i32::<BigEndian>(new_filename.len() as i32)?;
147 packed.extend(new_filename)
147 packed.extend(new_filename)
148 }
148 }
149
149
150 if packed.len() != expected_size {
150 if packed.len() != expected_size {
151 return Err(DirstatePackError::BadSize(expected_size, packed.len()));
151 return Err(DirstatePackError::BadSize(expected_size, packed.len()));
152 }
152 }
153
153
154 state_map.extend(new_state_map);
154 state_map.extend(new_state_map);
155
155
156 Ok(packed)
156 Ok(packed)
157 }
157 }
158
158
159 #[cfg(test)]
159 #[cfg(test)]
160 mod tests {
160 mod tests {
161 use super::*;
161 use super::*;
162 use crate::{utils::hg_path::HgPathBuf, FastHashMap};
162 use crate::{utils::hg_path::HgPathBuf, FastHashMap};
163
163
164 #[test]
164 #[test]
165 fn test_pack_dirstate_empty() {
165 fn test_pack_dirstate_empty() {
166 let mut state_map: StateMap = FastHashMap::default();
166 let mut state_map: StateMap = FastHashMap::default();
167 let copymap = FastHashMap::default();
167 let copymap = FastHashMap::default();
168 let parents = DirstateParents {
168 let parents = DirstateParents {
169 p1: *b"12345678910111213141",
169 p1: *b"12345678910111213141",
170 p2: *b"00000000000000000000",
170 p2: *b"00000000000000000000",
171 };
171 };
172 let now = Duration::new(15000000, 0);
172 let now = Duration::new(15000000, 0);
173 let expected = b"1234567891011121314100000000000000000000".to_vec();
173 let expected = b"1234567891011121314100000000000000000000".to_vec();
174
174
175 assert_eq!(
175 assert_eq!(
176 expected,
176 expected,
177 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
177 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
178 );
178 );
179
179
180 assert!(state_map.is_empty())
180 assert!(state_map.is_empty())
181 }
181 }
182 #[test]
182 #[test]
183 fn test_pack_dirstate_one_entry() {
183 fn test_pack_dirstate_one_entry() {
184 let expected_state_map: StateMap = [(
184 let expected_state_map: StateMap = [(
185 HgPathBuf::from_bytes(b"f1"),
185 HgPathBuf::from_bytes(b"f1"),
186 DirstateEntry {
186 DirstateEntry {
187 state: EntryState::Normal,
187 state: EntryState::Normal,
188 mode: 0o644,
188 mode: 0o644,
189 size: 0,
189 size: 0,
190 mtime: 791231220,
190 mtime: 791231220,
191 },
191 },
192 )]
192 )]
193 .iter()
193 .iter()
194 .cloned()
194 .cloned()
195 .collect();
195 .collect();
196 let mut state_map = expected_state_map.clone();
196 let mut state_map = expected_state_map.clone();
197
197
198 let copymap = FastHashMap::default();
198 let copymap = FastHashMap::default();
199 let parents = DirstateParents {
199 let parents = DirstateParents {
200 p1: *b"12345678910111213141",
200 p1: *b"12345678910111213141",
201 p2: *b"00000000000000000000",
201 p2: *b"00000000000000000000",
202 };
202 };
203 let now = Duration::new(15000000, 0);
203 let now = Duration::new(15000000, 0);
204 let expected = [
204 let expected = [
205 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
205 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
206 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
206 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
207 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
207 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
208 41, 58, 244, 0, 0, 0, 2, 102, 49,
208 41, 58, 244, 0, 0, 0, 2, 102, 49,
209 ]
209 ]
210 .to_vec();
210 .to_vec();
211
211
212 assert_eq!(
212 assert_eq!(
213 expected,
213 expected,
214 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
214 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
215 );
215 );
216
216
217 assert_eq!(expected_state_map, state_map);
217 assert_eq!(expected_state_map, state_map);
218 }
218 }
219 #[test]
219 #[test]
220 fn test_pack_dirstate_one_entry_with_copy() {
220 fn test_pack_dirstate_one_entry_with_copy() {
221 let expected_state_map: StateMap = [(
221 let expected_state_map: StateMap = [(
222 HgPathBuf::from_bytes(b"f1"),
222 HgPathBuf::from_bytes(b"f1"),
223 DirstateEntry {
223 DirstateEntry {
224 state: EntryState::Normal,
224 state: EntryState::Normal,
225 mode: 0o644,
225 mode: 0o644,
226 size: 0,
226 size: 0,
227 mtime: 791231220,
227 mtime: 791231220,
228 },
228 },
229 )]
229 )]
230 .iter()
230 .iter()
231 .cloned()
231 .cloned()
232 .collect();
232 .collect();
233 let mut state_map = expected_state_map.clone();
233 let mut state_map = expected_state_map.clone();
234 let mut copymap = FastHashMap::default();
234 let mut copymap = FastHashMap::default();
235 copymap.insert(
235 copymap.insert(
236 HgPathBuf::from_bytes(b"f1"),
236 HgPathBuf::from_bytes(b"f1"),
237 HgPathBuf::from_bytes(b"copyname"),
237 HgPathBuf::from_bytes(b"copyname"),
238 );
238 );
239 let parents = DirstateParents {
239 let parents = DirstateParents {
240 p1: *b"12345678910111213141",
240 p1: *b"12345678910111213141",
241 p2: *b"00000000000000000000",
241 p2: *b"00000000000000000000",
242 };
242 };
243 let now = Duration::new(15000000, 0);
243 let now = Duration::new(15000000, 0);
244 let expected = [
244 let expected = [
245 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
245 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
246 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
246 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
247 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
247 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
248 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
248 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
249 109, 101,
249 109, 101,
250 ]
250 ]
251 .to_vec();
251 .to_vec();
252
252
253 assert_eq!(
253 assert_eq!(
254 expected,
254 expected,
255 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
255 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
256 );
256 );
257 assert_eq!(expected_state_map, state_map);
257 assert_eq!(expected_state_map, state_map);
258 }
258 }
259
259
260 #[test]
260 #[test]
261 fn test_parse_pack_one_entry_with_copy() {
261 fn test_parse_pack_one_entry_with_copy() {
262 let mut state_map: StateMap = [(
262 let mut state_map: StateMap = [(
263 HgPathBuf::from_bytes(b"f1"),
263 HgPathBuf::from_bytes(b"f1"),
264 DirstateEntry {
264 DirstateEntry {
265 state: EntryState::Normal,
265 state: EntryState::Normal,
266 mode: 0o644,
266 mode: 0o644,
267 size: 0,
267 size: 0,
268 mtime: 791231220,
268 mtime: 791231220,
269 },
269 },
270 )]
270 )]
271 .iter()
271 .iter()
272 .cloned()
272 .cloned()
273 .collect();
273 .collect();
274 let mut copymap = FastHashMap::default();
274 let mut copymap = FastHashMap::default();
275 copymap.insert(
275 copymap.insert(
276 HgPathBuf::from_bytes(b"f1"),
276 HgPathBuf::from_bytes(b"f1"),
277 HgPathBuf::from_bytes(b"copyname"),
277 HgPathBuf::from_bytes(b"copyname"),
278 );
278 );
279 let parents = DirstateParents {
279 let parents = DirstateParents {
280 p1: *b"12345678910111213141",
280 p1: *b"12345678910111213141",
281 p2: *b"00000000000000000000",
281 p2: *b"00000000000000000000",
282 };
282 };
283 let now = Duration::new(15000000, 0);
283 let now = Duration::new(15000000, 0);
284 let result =
284 let result =
285 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
285 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
286 .unwrap();
286 .unwrap();
287
287
288 let mut new_state_map: StateMap = FastHashMap::default();
288 let mut new_state_map: StateMap = FastHashMap::default();
289 let mut new_copy_map: CopyMap = FastHashMap::default();
289 let mut new_copy_map: CopyMap = FastHashMap::default();
290 let new_parents = parse_dirstate(
290 let new_parents = parse_dirstate(
291 &mut new_state_map,
291 &mut new_state_map,
292 &mut new_copy_map,
292 &mut new_copy_map,
293 result.as_slice(),
293 result.as_slice(),
294 )
294 )
295 .unwrap();
295 .unwrap();
296 assert_eq!(
296 assert_eq!(
297 (parents, state_map, copymap),
297 (parents, state_map, copymap),
298 (new_parents, new_state_map, new_copy_map)
298 (new_parents, new_state_map, new_copy_map)
299 )
299 )
300 }
300 }
301
301
302 #[test]
302 #[test]
303 fn test_parse_pack_multiple_entries_with_copy() {
303 fn test_parse_pack_multiple_entries_with_copy() {
304 let mut state_map: StateMap = [
304 let mut state_map: StateMap = [
305 (
305 (
306 HgPathBuf::from_bytes(b"f1"),
306 HgPathBuf::from_bytes(b"f1"),
307 DirstateEntry {
307 DirstateEntry {
308 state: EntryState::Normal,
308 state: EntryState::Normal,
309 mode: 0o644,
309 mode: 0o644,
310 size: 0,
310 size: 0,
311 mtime: 791231220,
311 mtime: 791231220,
312 },
312 },
313 ),
313 ),
314 (
314 (
315 HgPathBuf::from_bytes(b"f2"),
315 HgPathBuf::from_bytes(b"f2"),
316 DirstateEntry {
316 DirstateEntry {
317 state: EntryState::Merged,
317 state: EntryState::Merged,
318 mode: 0o777,
318 mode: 0o777,
319 size: 1000,
319 size: 1000,
320 mtime: 791231220,
320 mtime: 791231220,
321 },
321 },
322 ),
322 ),
323 (
323 (
324 HgPathBuf::from_bytes(b"f3"),
324 HgPathBuf::from_bytes(b"f3"),
325 DirstateEntry {
325 DirstateEntry {
326 state: EntryState::Removed,
326 state: EntryState::Removed,
327 mode: 0o644,
327 mode: 0o644,
328 size: 234553,
328 size: 234553,
329 mtime: 791231220,
329 mtime: 791231220,
330 },
330 },
331 ),
331 ),
332 (
332 (
333 HgPathBuf::from_bytes(b"f4\xF6"),
333 HgPathBuf::from_bytes(b"f4\xF6"),
334 DirstateEntry {
334 DirstateEntry {
335 state: EntryState::Added,
335 state: EntryState::Added,
336 mode: 0o644,
336 mode: 0o644,
337 size: -1,
337 size: -1,
338 mtime: -1,
338 mtime: -1,
339 },
339 },
340 ),
340 ),
341 ]
341 ]
342 .iter()
342 .iter()
343 .cloned()
343 .cloned()
344 .collect();
344 .collect();
345 let mut copymap = FastHashMap::default();
345 let mut copymap = FastHashMap::default();
346 copymap.insert(
346 copymap.insert(
347 HgPathBuf::from_bytes(b"f1"),
347 HgPathBuf::from_bytes(b"f1"),
348 HgPathBuf::from_bytes(b"copyname"),
348 HgPathBuf::from_bytes(b"copyname"),
349 );
349 );
350 copymap.insert(
350 copymap.insert(
351 HgPathBuf::from_bytes(b"f4\xF6"),
351 HgPathBuf::from_bytes(b"f4\xF6"),
352 HgPathBuf::from_bytes(b"copyname2"),
352 HgPathBuf::from_bytes(b"copyname2"),
353 );
353 );
354 let parents = DirstateParents {
354 let parents = DirstateParents {
355 p1: *b"12345678910111213141",
355 p1: *b"12345678910111213141",
356 p2: *b"00000000000000000000",
356 p2: *b"00000000000000000000",
357 };
357 };
358 let now = Duration::new(15000000, 0);
358 let now = Duration::new(15000000, 0);
359 let result =
359 let result =
360 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
360 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
361 .unwrap();
361 .unwrap();
362
362
363 let mut new_state_map: StateMap = FastHashMap::default();
363 let mut new_state_map: StateMap = FastHashMap::default();
364 let mut new_copy_map: CopyMap = FastHashMap::default();
364 let mut new_copy_map: CopyMap = FastHashMap::default();
365 let new_parents = parse_dirstate(
365 let new_parents = parse_dirstate(
366 &mut new_state_map,
366 &mut new_state_map,
367 &mut new_copy_map,
367 &mut new_copy_map,
368 result.as_slice(),
368 result.as_slice(),
369 )
369 )
370 .unwrap();
370 .unwrap();
371 assert_eq!(
371 assert_eq!(
372 (parents, state_map, copymap),
372 (parents, state_map, copymap),
373 (new_parents, new_state_map, new_copy_map)
373 (new_parents, new_state_map, new_copy_map)
374 )
374 )
375 }
375 }
376
376
377 #[test]
377 #[test]
378 /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
378 /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
379 fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
379 fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
380 let mut state_map: StateMap = [(
380 let mut state_map: StateMap = [(
381 HgPathBuf::from_bytes(b"f1"),
381 HgPathBuf::from_bytes(b"f1"),
382 DirstateEntry {
382 DirstateEntry {
383 state: EntryState::Normal,
383 state: EntryState::Normal,
384 mode: 0o644,
384 mode: 0o644,
385 size: 0,
385 size: 0,
386 mtime: 15000000,
386 mtime: 15000000,
387 },
387 },
388 )]
388 )]
389 .iter()
389 .iter()
390 .cloned()
390 .cloned()
391 .collect();
391 .collect();
392 let mut copymap = FastHashMap::default();
392 let mut copymap = FastHashMap::default();
393 copymap.insert(
393 copymap.insert(
394 HgPathBuf::from_bytes(b"f1"),
394 HgPathBuf::from_bytes(b"f1"),
395 HgPathBuf::from_bytes(b"copyname"),
395 HgPathBuf::from_bytes(b"copyname"),
396 );
396 );
397 let parents = DirstateParents {
397 let parents = DirstateParents {
398 p1: *b"12345678910111213141",
398 p1: *b"12345678910111213141",
399 p2: *b"00000000000000000000",
399 p2: *b"00000000000000000000",
400 };
400 };
401 let now = Duration::new(15000000, 0);
401 let now = Duration::new(15000000, 0);
402 let result =
402 let result =
403 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
403 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
404 .unwrap();
404 .unwrap();
405
405
406 let mut new_state_map: StateMap = FastHashMap::default();
406 let mut new_state_map: StateMap = FastHashMap::default();
407 let mut new_copy_map: CopyMap = FastHashMap::default();
407 let mut new_copy_map: CopyMap = FastHashMap::default();
408 let new_parents = parse_dirstate(
408 let new_parents = parse_dirstate(
409 &mut new_state_map,
409 &mut new_state_map,
410 &mut new_copy_map,
410 &mut new_copy_map,
411 result.as_slice(),
411 result.as_slice(),
412 )
412 )
413 .unwrap();
413 .unwrap();
414
414
415 assert_eq!(
415 assert_eq!(
416 (
416 (
417 parents,
417 parents,
418 [(
418 [(
419 HgPathBuf::from_bytes(b"f1"),
419 HgPathBuf::from_bytes(b"f1"),
420 DirstateEntry {
420 DirstateEntry {
421 state: EntryState::Normal,
421 state: EntryState::Normal,
422 mode: 0o644,
422 mode: 0o644,
423 size: 0,
423 size: 0,
424 mtime: -1
424 mtime: -1
425 }
425 }
426 )]
426 )]
427 .iter()
427 .iter()
428 .cloned()
428 .cloned()
429 .collect::<StateMap>(),
429 .collect::<StateMap>(),
430 copymap,
430 copymap,
431 ),
431 ),
432 (new_parents, new_state_map, new_copy_map)
432 (new_parents, new_state_map, new_copy_map)
433 )
433 )
434 }
434 }
435 }
435 }
@@ -1,956 +1,953
1 // status.rs
1 // status.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Rust implementation of dirstate.status (dirstate.py).
8 //! Rust implementation of dirstate.status (dirstate.py).
9 //! It is currently missing a lot of functionality compared to the Python one
9 //! It is currently missing a lot of functionality compared to the Python one
10 //! and will only be triggered in narrow cases.
10 //! and will only be triggered in narrow cases.
11
11
12 use crate::{
12 use crate::{
13 dirstate::SIZE_FROM_OTHER_PARENT,
13 dirstate::SIZE_FROM_OTHER_PARENT,
14 filepatterns::PatternFileWarning,
14 filepatterns::PatternFileWarning,
15 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
15 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
16 utils::{
16 utils::{
17 files::{find_dirs, HgMetadata},
17 files::{find_dirs, HgMetadata},
18 hg_path::{
18 hg_path::{
19 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
19 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
20 HgPathError,
20 HgPathError,
21 },
21 },
22 path_auditor::PathAuditor,
22 path_auditor::PathAuditor,
23 },
23 },
24 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
24 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
25 PatternError,
25 PatternError,
26 };
26 };
27 use lazy_static::lazy_static;
27 use lazy_static::lazy_static;
28 use micro_timer::timed;
28 use micro_timer::timed;
29 use rayon::prelude::*;
29 use rayon::prelude::*;
30 use std::{
30 use std::{
31 borrow::Cow,
31 borrow::Cow,
32 collections::HashSet,
32 collections::HashSet,
33 fs::{read_dir, DirEntry},
33 fs::{read_dir, DirEntry},
34 io::ErrorKind,
34 io::ErrorKind,
35 ops::Deref,
35 ops::Deref,
36 path::{Path, PathBuf},
36 path::{Path, PathBuf},
37 };
37 };
38
38
39 /// Wrong type of file from a `BadMatch`
39 /// Wrong type of file from a `BadMatch`
40 /// Note: a lot of those don't exist on all platforms.
40 /// Note: a lot of those don't exist on all platforms.
41 #[derive(Debug, Copy, Clone)]
41 #[derive(Debug, Copy, Clone)]
42 pub enum BadType {
42 pub enum BadType {
43 CharacterDevice,
43 CharacterDevice,
44 BlockDevice,
44 BlockDevice,
45 FIFO,
45 FIFO,
46 Socket,
46 Socket,
47 Directory,
47 Directory,
48 Unknown,
48 Unknown,
49 }
49 }
50
50
51 impl ToString for BadType {
51 impl ToString for BadType {
52 fn to_string(&self) -> String {
52 fn to_string(&self) -> String {
53 match self {
53 match self {
54 BadType::CharacterDevice => "character device",
54 BadType::CharacterDevice => "character device",
55 BadType::BlockDevice => "block device",
55 BadType::BlockDevice => "block device",
56 BadType::FIFO => "fifo",
56 BadType::FIFO => "fifo",
57 BadType::Socket => "socket",
57 BadType::Socket => "socket",
58 BadType::Directory => "directory",
58 BadType::Directory => "directory",
59 BadType::Unknown => "unknown",
59 BadType::Unknown => "unknown",
60 }
60 }
61 .to_string()
61 .to_string()
62 }
62 }
63 }
63 }
64
64
65 /// Was explicitly matched but cannot be found/accessed
65 /// Was explicitly matched but cannot be found/accessed
66 #[derive(Debug, Copy, Clone)]
66 #[derive(Debug, Copy, Clone)]
67 pub enum BadMatch {
67 pub enum BadMatch {
68 OsError(i32),
68 OsError(i32),
69 BadType(BadType),
69 BadType(BadType),
70 }
70 }
71
71
72 /// Marker enum used to dispatch new status entries into the right collections.
72 /// Marker enum used to dispatch new status entries into the right collections.
73 /// Is similar to `crate::EntryState`, but represents the transient state of
73 /// Is similar to `crate::EntryState`, but represents the transient state of
74 /// entries during the lifetime of a command.
74 /// entries during the lifetime of a command.
75 #[derive(Debug, Copy, Clone)]
75 #[derive(Debug, Copy, Clone)]
76 enum Dispatch {
76 enum Dispatch {
77 Unsure,
77 Unsure,
78 Modified,
78 Modified,
79 Added,
79 Added,
80 Removed,
80 Removed,
81 Deleted,
81 Deleted,
82 Clean,
82 Clean,
83 Unknown,
83 Unknown,
84 Ignored,
84 Ignored,
85 /// Empty dispatch, the file is not worth listing
85 /// Empty dispatch, the file is not worth listing
86 None,
86 None,
87 /// Was explicitly matched but cannot be found/accessed
87 /// Was explicitly matched but cannot be found/accessed
88 Bad(BadMatch),
88 Bad(BadMatch),
89 Directory {
89 Directory {
90 /// True if the directory used to be a file in the dmap so we can say
90 /// True if the directory used to be a file in the dmap so we can say
91 /// that it's been removed.
91 /// that it's been removed.
92 was_file: bool,
92 was_file: bool,
93 },
93 },
94 }
94 }
95
95
96 type IoResult<T> = std::io::Result<T>;
96 type IoResult<T> = std::io::Result<T>;
97 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
97 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
98 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
98 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
99 type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
99 type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
100
100
101 /// Dates and times that are outside the 31-bit signed range are compared
101 /// Dates and times that are outside the 31-bit signed range are compared
102 /// modulo 2^31. This should prevent hg from behaving badly with very large
102 /// modulo 2^31. This should prevent hg from behaving badly with very large
103 /// files or corrupt dates while still having a high probability of detecting
103 /// files or corrupt dates while still having a high probability of detecting
104 /// changes. (issue2608)
104 /// changes. (issue2608)
105 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
105 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
106 /// is not defined for `i32`, and there is no `As` trait. This forces the
106 /// is not defined for `i32`, and there is no `As` trait. This forces the
107 /// caller to cast `b` as `i32`.
107 /// caller to cast `b` as `i32`.
108 fn mod_compare(a: i32, b: i32) -> bool {
108 fn mod_compare(a: i32, b: i32) -> bool {
109 a & i32::max_value() != b & i32::max_value()
109 a & i32::max_value() != b & i32::max_value()
110 }
110 }
111
111
112 /// Return a sorted list containing information about the entries
112 /// Return a sorted list containing information about the entries
113 /// in the directory.
113 /// in the directory.
114 ///
114 ///
115 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
115 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
116 fn list_directory(
116 fn list_directory(
117 path: impl AsRef<Path>,
117 path: impl AsRef<Path>,
118 skip_dot_hg: bool,
118 skip_dot_hg: bool,
119 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
119 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
120 let mut results = vec![];
120 let mut results = vec![];
121 let entries = read_dir(path.as_ref())?;
121 let entries = read_dir(path.as_ref())?;
122
122
123 for entry in entries {
123 for entry in entries {
124 let entry = entry?;
124 let entry = entry?;
125 let filename = os_string_to_hg_path_buf(entry.file_name())?;
125 let filename = os_string_to_hg_path_buf(entry.file_name())?;
126 let file_type = entry.file_type()?;
126 let file_type = entry.file_type()?;
127 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
127 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
128 return Ok(vec![]);
128 return Ok(vec![]);
129 } else {
129 } else {
130 results.push((HgPathBuf::from(filename), entry))
130 results.push((filename, entry))
131 }
131 }
132 }
132 }
133
133
134 results.sort_unstable_by_key(|e| e.0.clone());
134 results.sort_unstable_by_key(|e| e.0.clone());
135 Ok(results)
135 Ok(results)
136 }
136 }
137
137
138 /// The file corresponding to the dirstate entry was found on the filesystem.
138 /// The file corresponding to the dirstate entry was found on the filesystem.
139 fn dispatch_found(
139 fn dispatch_found(
140 filename: impl AsRef<HgPath>,
140 filename: impl AsRef<HgPath>,
141 entry: DirstateEntry,
141 entry: DirstateEntry,
142 metadata: HgMetadata,
142 metadata: HgMetadata,
143 copy_map: &CopyMap,
143 copy_map: &CopyMap,
144 options: StatusOptions,
144 options: StatusOptions,
145 ) -> Dispatch {
145 ) -> Dispatch {
146 let DirstateEntry {
146 let DirstateEntry {
147 state,
147 state,
148 mode,
148 mode,
149 mtime,
149 mtime,
150 size,
150 size,
151 } = entry;
151 } = entry;
152
152
153 let HgMetadata {
153 let HgMetadata {
154 st_mode,
154 st_mode,
155 st_size,
155 st_size,
156 st_mtime,
156 st_mtime,
157 ..
157 ..
158 } = metadata;
158 } = metadata;
159
159
160 match state {
160 match state {
161 EntryState::Normal => {
161 EntryState::Normal => {
162 let size_changed = mod_compare(size, st_size as i32);
162 let size_changed = mod_compare(size, st_size as i32);
163 let mode_changed =
163 let mode_changed =
164 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
164 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
165 let metadata_changed = size >= 0 && (size_changed || mode_changed);
165 let metadata_changed = size >= 0 && (size_changed || mode_changed);
166 let other_parent = size == SIZE_FROM_OTHER_PARENT;
166 let other_parent = size == SIZE_FROM_OTHER_PARENT;
167
167 if metadata_changed
168 if metadata_changed
168 || other_parent
169 || other_parent
169 || copy_map.contains_key(filename.as_ref())
170 || copy_map.contains_key(filename.as_ref())
170 {
171 {
171 Dispatch::Modified
172 Dispatch::Modified
172 } else if mod_compare(mtime, st_mtime as i32) {
173 } else if mod_compare(mtime, st_mtime as i32)
173 Dispatch::Unsure
174 || st_mtime == options.last_normal_time
174 } else if st_mtime == options.last_normal_time {
175 {
175 // the file may have just been marked as normal and
176 // the file may have just been marked as normal and
176 // it may have changed in the same second without
177 // it may have changed in the same second without
177 // changing its size. This can happen if we quickly
178 // changing its size. This can happen if we quickly
178 // do multiple commits. Force lookup, so we don't
179 // do multiple commits. Force lookup, so we don't
179 // miss such a racy file change.
180 // miss such a racy file change.
180 Dispatch::Unsure
181 Dispatch::Unsure
181 } else if options.list_clean {
182 } else if options.list_clean {
182 Dispatch::Clean
183 Dispatch::Clean
183 } else {
184 } else {
184 Dispatch::None
185 Dispatch::None
185 }
186 }
186 }
187 }
187 EntryState::Merged => Dispatch::Modified,
188 EntryState::Merged => Dispatch::Modified,
188 EntryState::Added => Dispatch::Added,
189 EntryState::Added => Dispatch::Added,
189 EntryState::Removed => Dispatch::Removed,
190 EntryState::Removed => Dispatch::Removed,
190 EntryState::Unknown => Dispatch::Unknown,
191 EntryState::Unknown => Dispatch::Unknown,
191 }
192 }
192 }
193 }
193
194
194 /// The file corresponding to this Dirstate entry is missing.
195 /// The file corresponding to this Dirstate entry is missing.
195 fn dispatch_missing(state: EntryState) -> Dispatch {
196 fn dispatch_missing(state: EntryState) -> Dispatch {
196 match state {
197 match state {
197 // File was removed from the filesystem during commands
198 // File was removed from the filesystem during commands
198 EntryState::Normal | EntryState::Merged | EntryState::Added => {
199 EntryState::Normal | EntryState::Merged | EntryState::Added => {
199 Dispatch::Deleted
200 Dispatch::Deleted
200 }
201 }
201 // File was removed, everything is normal
202 // File was removed, everything is normal
202 EntryState::Removed => Dispatch::Removed,
203 EntryState::Removed => Dispatch::Removed,
203 // File is unknown to Mercurial, everything is normal
204 // File is unknown to Mercurial, everything is normal
204 EntryState::Unknown => Dispatch::Unknown,
205 EntryState::Unknown => Dispatch::Unknown,
205 }
206 }
206 }
207 }
207
208
208 lazy_static! {
209 lazy_static! {
209 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
210 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
210 let mut h = HashSet::new();
211 let mut h = HashSet::new();
211 h.insert(HgPath::new(b""));
212 h.insert(HgPath::new(b""));
212 h
213 h
213 };
214 };
214 }
215 }
215
216
216 /// Get stat data about the files explicitly specified by match.
217 /// Get stat data about the files explicitly specified by match.
217 /// TODO subrepos
218 /// TODO subrepos
218 #[timed]
219 #[timed]
219 fn walk_explicit<'a>(
220 fn walk_explicit<'a>(
220 files: Option<&'a HashSet<&HgPath>>,
221 files: Option<&'a HashSet<&HgPath>>,
221 dmap: &'a DirstateMap,
222 dmap: &'a DirstateMap,
222 root_dir: impl AsRef<Path> + Sync + Send + 'a,
223 root_dir: impl AsRef<Path> + Sync + Send + 'a,
223 options: StatusOptions,
224 options: StatusOptions,
224 traversed_sender: crossbeam::Sender<HgPathBuf>,
225 traversed_sender: crossbeam::Sender<HgPathBuf>,
225 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
226 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
226 files
227 files
227 .unwrap_or(&DEFAULT_WORK)
228 .unwrap_or(&DEFAULT_WORK)
228 .par_iter()
229 .par_iter()
229 .map(move |filename| {
230 .map(move |&filename| {
230 // TODO normalization
231 // TODO normalization
231 let normalized = filename.as_ref();
232 let normalized = filename;
232
233
233 let buf = match hg_path_to_path_buf(normalized) {
234 let buf = match hg_path_to_path_buf(normalized) {
234 Ok(x) => x,
235 Ok(x) => x,
235 Err(e) => return Some(Err(e.into())),
236 Err(e) => return Some(Err(e.into())),
236 };
237 };
237 let target = root_dir.as_ref().join(buf);
238 let target = root_dir.as_ref().join(buf);
238 let st = target.symlink_metadata();
239 let st = target.symlink_metadata();
239 let in_dmap = dmap.get(normalized);
240 let in_dmap = dmap.get(normalized);
240 match st {
241 match st {
241 Ok(meta) => {
242 Ok(meta) => {
242 let file_type = meta.file_type();
243 let file_type = meta.file_type();
243 return if file_type.is_file() || file_type.is_symlink() {
244 return if file_type.is_file() || file_type.is_symlink() {
244 if let Some(entry) = in_dmap {
245 if let Some(entry) = in_dmap {
245 return Some(Ok((
246 return Some(Ok((
246 normalized,
247 normalized,
247 dispatch_found(
248 dispatch_found(
248 &normalized,
249 &normalized,
249 *entry,
250 *entry,
250 HgMetadata::from_metadata(meta),
251 HgMetadata::from_metadata(meta),
251 &dmap.copy_map,
252 &dmap.copy_map,
252 options,
253 options,
253 ),
254 ),
254 )));
255 )));
255 }
256 }
256 Some(Ok((normalized, Dispatch::Unknown)))
257 Some(Ok((normalized, Dispatch::Unknown)))
258 } else if file_type.is_dir() {
259 if options.collect_traversed_dirs {
260 traversed_sender
261 .send(normalized.to_owned())
262 .expect("receiver should outlive sender");
263 }
264 Some(Ok((
265 normalized,
266 Dispatch::Directory {
267 was_file: in_dmap.is_some(),
268 },
269 )))
257 } else {
270 } else {
258 if file_type.is_dir() {
271 Some(Ok((
259 if options.collect_traversed_dirs {
272 normalized,
260 traversed_sender
273 Dispatch::Bad(BadMatch::BadType(
261 .send(normalized.to_owned())
274 // TODO do more than unknown
262 .expect("receiver should outlive sender");
275 // Support for all `BadType` variant
263 }
276 // varies greatly between platforms.
264 Some(Ok((
277 // So far, no tests check the type and
265 normalized,
278 // this should be good enough for most
266 Dispatch::Directory {
279 // users.
267 was_file: in_dmap.is_some(),
280 BadType::Unknown,
268 },
281 )),
269 )))
282 )))
270 } else {
271 Some(Ok((
272 normalized,
273 Dispatch::Bad(BadMatch::BadType(
274 // TODO do more than unknown
275 // Support for all `BadType` variant
276 // varies greatly between platforms.
277 // So far, no tests check the type and
278 // this should be good enough for most
279 // users.
280 BadType::Unknown,
281 )),
282 )))
283 }
284 };
283 };
285 }
284 }
286 Err(_) => {
285 Err(_) => {
287 if let Some(entry) = in_dmap {
286 if let Some(entry) = in_dmap {
288 return Some(Ok((
287 return Some(Ok((
289 normalized,
288 normalized,
290 dispatch_missing(entry.state),
289 dispatch_missing(entry.state),
291 )));
290 )));
292 }
291 }
293 }
292 }
294 };
293 };
295 None
294 None
296 })
295 })
297 .flatten()
296 .flatten()
298 }
297 }
299
298
300 #[derive(Debug, Copy, Clone)]
299 #[derive(Debug, Copy, Clone)]
301 pub struct StatusOptions {
300 pub struct StatusOptions {
302 /// Remember the most recent modification timeslot for status, to make
301 /// Remember the most recent modification timeslot for status, to make
303 /// sure we won't miss future size-preserving file content modifications
302 /// sure we won't miss future size-preserving file content modifications
304 /// that happen within the same timeslot.
303 /// that happen within the same timeslot.
305 pub last_normal_time: i64,
304 pub last_normal_time: i64,
306 /// Whether we are on a filesystem with UNIX-like exec flags
305 /// Whether we are on a filesystem with UNIX-like exec flags
307 pub check_exec: bool,
306 pub check_exec: bool,
308 pub list_clean: bool,
307 pub list_clean: bool,
309 pub list_unknown: bool,
308 pub list_unknown: bool,
310 pub list_ignored: bool,
309 pub list_ignored: bool,
311 /// Whether to collect traversed dirs for applying a callback later.
310 /// Whether to collect traversed dirs for applying a callback later.
312 /// Used by `hg purge` for example.
311 /// Used by `hg purge` for example.
313 pub collect_traversed_dirs: bool,
312 pub collect_traversed_dirs: bool,
314 }
313 }
315
314
316 /// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
315 /// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
317 /// If the entry is a folder that needs to be traversed, it will be handled
316 /// If the entry is a folder that needs to be traversed, it will be handled
318 /// in a separate thread.
317 /// in a separate thread.
319 fn handle_traversed_entry<'a>(
318 fn handle_traversed_entry<'a>(
320 scope: &rayon::Scope<'a>,
319 scope: &rayon::Scope<'a>,
321 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
320 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
322 matcher: &'a (impl Matcher + Sync),
321 matcher: &'a (impl Matcher + Sync),
323 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
322 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
324 dmap: &'a DirstateMap,
323 dmap: &'a DirstateMap,
325 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
324 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
326 ignore_fn: &'a IgnoreFnType,
325 ignore_fn: &'a IgnoreFnType,
327 dir_ignore_fn: &'a IgnoreFnType,
326 dir_ignore_fn: &'a IgnoreFnType,
328 options: StatusOptions,
327 options: StatusOptions,
329 filename: HgPathBuf,
328 filename: HgPathBuf,
330 dir_entry: DirEntry,
329 dir_entry: DirEntry,
331 traversed_sender: crossbeam::Sender<HgPathBuf>,
330 traversed_sender: crossbeam::Sender<HgPathBuf>,
332 ) -> IoResult<()> {
331 ) -> IoResult<()> {
333 let file_type = dir_entry.file_type()?;
332 let file_type = dir_entry.file_type()?;
334 let entry_option = dmap.get(&filename);
333 let entry_option = dmap.get(&filename);
335
334
336 if filename.as_bytes() == b".hg" {
335 if filename.as_bytes() == b".hg" {
337 // Could be a directory or a symlink
336 // Could be a directory or a symlink
338 return Ok(());
337 return Ok(());
339 }
338 }
340
339
341 if file_type.is_dir() {
340 if file_type.is_dir() {
342 handle_traversed_dir(
341 handle_traversed_dir(
343 scope,
342 scope,
344 files_sender,
343 files_sender,
345 matcher,
344 matcher,
346 root_dir,
345 root_dir,
347 dmap,
346 dmap,
348 old_results,
347 old_results,
349 ignore_fn,
348 ignore_fn,
350 dir_ignore_fn,
349 dir_ignore_fn,
351 options,
350 options,
352 entry_option,
351 entry_option,
353 filename,
352 filename,
354 traversed_sender,
353 traversed_sender,
355 );
354 );
356 } else if file_type.is_file() || file_type.is_symlink() {
355 } else if file_type.is_file() || file_type.is_symlink() {
357 if let Some(entry) = entry_option {
356 if let Some(entry) = entry_option {
358 if matcher.matches_everything() || matcher.matches(&filename) {
357 if matcher.matches_everything() || matcher.matches(&filename) {
359 let metadata = dir_entry.metadata()?;
358 let metadata = dir_entry.metadata()?;
360 files_sender
359 files_sender
361 .send(Ok((
360 .send(Ok((
362 filename.to_owned(),
361 filename.to_owned(),
363 dispatch_found(
362 dispatch_found(
364 &filename,
363 &filename,
365 *entry,
364 *entry,
366 HgMetadata::from_metadata(metadata),
365 HgMetadata::from_metadata(metadata),
367 &dmap.copy_map,
366 &dmap.copy_map,
368 options,
367 options,
369 ),
368 ),
370 )))
369 )))
371 .unwrap();
370 .unwrap();
372 }
371 }
373 } else if (matcher.matches_everything() || matcher.matches(&filename))
372 } else if (matcher.matches_everything() || matcher.matches(&filename))
374 && !ignore_fn(&filename)
373 && !ignore_fn(&filename)
375 {
374 {
376 if (options.list_ignored || matcher.exact_match(&filename))
375 if (options.list_ignored || matcher.exact_match(&filename))
377 && dir_ignore_fn(&filename)
376 && dir_ignore_fn(&filename)
378 {
377 {
379 if options.list_ignored {
378 if options.list_ignored {
380 files_sender
379 files_sender
381 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
380 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
382 .unwrap();
381 .unwrap();
383 }
382 }
384 } else {
383 } else if options.list_unknown {
385 if options.list_unknown {
384 files_sender
386 files_sender
385 .send(Ok((filename.to_owned(), Dispatch::Unknown)))
387 .send(Ok((filename.to_owned(), Dispatch::Unknown)))
386 .unwrap();
388 .unwrap();
389 }
390 }
387 }
391 } else if ignore_fn(&filename) && options.list_ignored {
388 } else if ignore_fn(&filename) && options.list_ignored {
392 files_sender
389 files_sender
393 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
390 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
394 .unwrap();
391 .unwrap();
395 }
392 }
396 } else if let Some(entry) = entry_option {
393 } else if let Some(entry) = entry_option {
397 // Used to be a file or a folder, now something else.
394 // Used to be a file or a folder, now something else.
398 if matcher.matches_everything() || matcher.matches(&filename) {
395 if matcher.matches_everything() || matcher.matches(&filename) {
399 files_sender
396 files_sender
400 .send(Ok((filename.to_owned(), dispatch_missing(entry.state))))
397 .send(Ok((filename.to_owned(), dispatch_missing(entry.state))))
401 .unwrap();
398 .unwrap();
402 }
399 }
403 }
400 }
404
401
405 Ok(())
402 Ok(())
406 }
403 }
407
404
408 /// A directory was found in the filesystem and needs to be traversed
405 /// A directory was found in the filesystem and needs to be traversed
409 fn handle_traversed_dir<'a>(
406 fn handle_traversed_dir<'a>(
410 scope: &rayon::Scope<'a>,
407 scope: &rayon::Scope<'a>,
411 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
408 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
412 matcher: &'a (impl Matcher + Sync),
409 matcher: &'a (impl Matcher + Sync),
413 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
410 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
414 dmap: &'a DirstateMap,
411 dmap: &'a DirstateMap,
415 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
412 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
416 ignore_fn: &'a IgnoreFnType,
413 ignore_fn: &'a IgnoreFnType,
417 dir_ignore_fn: &'a IgnoreFnType,
414 dir_ignore_fn: &'a IgnoreFnType,
418 options: StatusOptions,
415 options: StatusOptions,
419 entry_option: Option<&'a DirstateEntry>,
416 entry_option: Option<&'a DirstateEntry>,
420 directory: HgPathBuf,
417 directory: HgPathBuf,
421 traversed_sender: crossbeam::Sender<HgPathBuf>,
418 traversed_sender: crossbeam::Sender<HgPathBuf>,
422 ) {
419 ) {
423 scope.spawn(move |_| {
420 scope.spawn(move |_| {
424 // Nested `if` until `rust-lang/rust#53668` is stable
421 // Nested `if` until `rust-lang/rust#53668` is stable
425 if let Some(entry) = entry_option {
422 if let Some(entry) = entry_option {
426 // Used to be a file, is now a folder
423 // Used to be a file, is now a folder
427 if matcher.matches_everything() || matcher.matches(&directory) {
424 if matcher.matches_everything() || matcher.matches(&directory) {
428 files_sender
425 files_sender
429 .send(Ok((
426 .send(Ok((
430 directory.to_owned(),
427 directory.to_owned(),
431 dispatch_missing(entry.state),
428 dispatch_missing(entry.state),
432 )))
429 )))
433 .unwrap();
430 .unwrap();
434 }
431 }
435 }
432 }
436 // Do we need to traverse it?
433 // Do we need to traverse it?
437 if !ignore_fn(&directory) || options.list_ignored {
434 if !ignore_fn(&directory) || options.list_ignored {
438 traverse_dir(
435 traverse_dir(
439 files_sender,
436 files_sender,
440 matcher,
437 matcher,
441 root_dir,
438 root_dir,
442 dmap,
439 dmap,
443 directory,
440 directory,
444 &old_results,
441 &old_results,
445 ignore_fn,
442 ignore_fn,
446 dir_ignore_fn,
443 dir_ignore_fn,
447 options,
444 options,
448 traversed_sender,
445 traversed_sender,
449 )
446 )
450 .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
447 .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
451 }
448 }
452 });
449 });
453 }
450 }
454
451
455 /// Decides whether the directory needs to be listed, and if so handles the
452 /// Decides whether the directory needs to be listed, and if so handles the
456 /// entries in a separate thread.
453 /// entries in a separate thread.
457 fn traverse_dir<'a>(
454 fn traverse_dir<'a>(
458 files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
455 files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
459 matcher: &'a (impl Matcher + Sync),
456 matcher: &'a (impl Matcher + Sync),
460 root_dir: impl AsRef<Path> + Sync + Send + Copy,
457 root_dir: impl AsRef<Path> + Sync + Send + Copy,
461 dmap: &'a DirstateMap,
458 dmap: &'a DirstateMap,
462 directory: impl AsRef<HgPath>,
459 directory: impl AsRef<HgPath>,
463 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
460 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
464 ignore_fn: &IgnoreFnType,
461 ignore_fn: &IgnoreFnType,
465 dir_ignore_fn: &IgnoreFnType,
462 dir_ignore_fn: &IgnoreFnType,
466 options: StatusOptions,
463 options: StatusOptions,
467 traversed_sender: crossbeam::Sender<HgPathBuf>,
464 traversed_sender: crossbeam::Sender<HgPathBuf>,
468 ) -> IoResult<()> {
465 ) -> IoResult<()> {
469 let directory = directory.as_ref();
466 let directory = directory.as_ref();
470
467
471 if options.collect_traversed_dirs {
468 if options.collect_traversed_dirs {
472 traversed_sender
469 traversed_sender
473 .send(directory.to_owned())
470 .send(directory.to_owned())
474 .expect("receiver should outlive sender");
471 .expect("receiver should outlive sender");
475 }
472 }
476
473
477 let visit_entries = match matcher.visit_children_set(directory) {
474 let visit_entries = match matcher.visit_children_set(directory) {
478 VisitChildrenSet::Empty => return Ok(()),
475 VisitChildrenSet::Empty => return Ok(()),
479 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
476 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
480 VisitChildrenSet::Set(set) => Some(set),
477 VisitChildrenSet::Set(set) => Some(set),
481 };
478 };
482 let buf = hg_path_to_path_buf(directory)?;
479 let buf = hg_path_to_path_buf(directory)?;
483 let dir_path = root_dir.as_ref().join(buf);
480 let dir_path = root_dir.as_ref().join(buf);
484
481
485 let skip_dot_hg = !directory.as_bytes().is_empty();
482 let skip_dot_hg = !directory.as_bytes().is_empty();
486 let entries = match list_directory(dir_path, skip_dot_hg) {
483 let entries = match list_directory(dir_path, skip_dot_hg) {
487 Err(e) => match e.kind() {
484 Err(e) => match e.kind() {
488 ErrorKind::NotFound | ErrorKind::PermissionDenied => {
485 ErrorKind::NotFound | ErrorKind::PermissionDenied => {
489 files_sender
486 files_sender
490 .send(Ok((
487 .send(Ok((
491 directory.to_owned(),
488 directory.to_owned(),
492 Dispatch::Bad(BadMatch::OsError(
489 Dispatch::Bad(BadMatch::OsError(
493 // Unwrapping here is OK because the error always
490 // Unwrapping here is OK because the error always
494 // is a real os error
491 // is a real os error
495 e.raw_os_error().unwrap(),
492 e.raw_os_error().unwrap(),
496 )),
493 )),
497 )))
494 )))
498 .unwrap();
495 .unwrap();
499 return Ok(());
496 return Ok(());
500 }
497 }
501 _ => return Err(e),
498 _ => return Err(e),
502 },
499 },
503 Ok(entries) => entries,
500 Ok(entries) => entries,
504 };
501 };
505
502
506 rayon::scope(|scope| -> IoResult<()> {
503 rayon::scope(|scope| -> IoResult<()> {
507 for (filename, dir_entry) in entries {
504 for (filename, dir_entry) in entries {
508 if let Some(ref set) = visit_entries {
505 if let Some(ref set) = visit_entries {
509 if !set.contains(filename.deref()) {
506 if !set.contains(filename.deref()) {
510 continue;
507 continue;
511 }
508 }
512 }
509 }
513 // TODO normalize
510 // TODO normalize
514 let filename = if directory.is_empty() {
511 let filename = if directory.is_empty() {
515 filename.to_owned()
512 filename.to_owned()
516 } else {
513 } else {
517 directory.join(&filename)
514 directory.join(&filename)
518 };
515 };
519
516
520 if !old_results.contains_key(filename.deref()) {
517 if !old_results.contains_key(filename.deref()) {
521 handle_traversed_entry(
518 handle_traversed_entry(
522 scope,
519 scope,
523 files_sender,
520 files_sender,
524 matcher,
521 matcher,
525 root_dir,
522 root_dir,
526 dmap,
523 dmap,
527 old_results,
524 old_results,
528 ignore_fn,
525 ignore_fn,
529 dir_ignore_fn,
526 dir_ignore_fn,
530 options,
527 options,
531 filename,
528 filename,
532 dir_entry,
529 dir_entry,
533 traversed_sender.clone(),
530 traversed_sender.clone(),
534 )?;
531 )?;
535 }
532 }
536 }
533 }
537 Ok(())
534 Ok(())
538 })
535 })
539 }
536 }
540
537
541 /// Walk the working directory recursively to look for changes compared to the
538 /// Walk the working directory recursively to look for changes compared to the
542 /// current `DirstateMap`.
539 /// current `DirstateMap`.
543 ///
540 ///
544 /// This takes a mutable reference to the results to account for the `extend`
541 /// This takes a mutable reference to the results to account for the `extend`
545 /// in timings
542 /// in timings
546 #[timed]
543 #[timed]
547 fn traverse<'a>(
544 fn traverse<'a>(
548 matcher: &'a (impl Matcher + Sync),
545 matcher: &'a (impl Matcher + Sync),
549 root_dir: impl AsRef<Path> + Sync + Send + Copy,
546 root_dir: impl AsRef<Path> + Sync + Send + Copy,
550 dmap: &'a DirstateMap,
547 dmap: &'a DirstateMap,
551 path: impl AsRef<HgPath>,
548 path: impl AsRef<HgPath>,
552 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
549 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
553 ignore_fn: &IgnoreFnType,
550 ignore_fn: &IgnoreFnType,
554 dir_ignore_fn: &IgnoreFnType,
551 dir_ignore_fn: &IgnoreFnType,
555 options: StatusOptions,
552 options: StatusOptions,
556 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
553 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
557 traversed_sender: crossbeam::Sender<HgPathBuf>,
554 traversed_sender: crossbeam::Sender<HgPathBuf>,
558 ) -> IoResult<()> {
555 ) -> IoResult<()> {
559 let root_dir = root_dir.as_ref();
556 let root_dir = root_dir.as_ref();
560
557
561 // The traversal is done in parallel, so use a channel to gather entries.
558 // The traversal is done in parallel, so use a channel to gather entries.
562 // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
559 // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
563 let (files_transmitter, files_receiver) = crossbeam::channel::unbounded();
560 let (files_transmitter, files_receiver) = crossbeam::channel::unbounded();
564
561
565 traverse_dir(
562 traverse_dir(
566 &files_transmitter,
563 &files_transmitter,
567 matcher,
564 matcher,
568 root_dir,
565 root_dir,
569 &dmap,
566 &dmap,
570 path,
567 path,
571 &old_results,
568 &old_results,
572 &ignore_fn,
569 &ignore_fn,
573 &dir_ignore_fn,
570 &dir_ignore_fn,
574 options,
571 options,
575 traversed_sender,
572 traversed_sender,
576 )?;
573 )?;
577
574
578 // Disconnect the channel so the receiver stops waiting
575 // Disconnect the channel so the receiver stops waiting
579 drop(files_transmitter);
576 drop(files_transmitter);
580
577
581 // TODO don't collect. Find a way of replicating the behavior of
578 // TODO don't collect. Find a way of replicating the behavior of
582 // `itertools::process_results`, but for `rayon::ParallelIterator`
579 // `itertools::process_results`, but for `rayon::ParallelIterator`
583 let new_results: IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> =
580 let new_results: IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> =
584 files_receiver
581 files_receiver
585 .into_iter()
582 .into_iter()
586 .map(|item| {
583 .map(|item| {
587 let (f, d) = item?;
584 let (f, d) = item?;
588 Ok((Cow::Owned(f), d))
585 Ok((Cow::Owned(f), d))
589 })
586 })
590 .collect();
587 .collect();
591
588
592 results.par_extend(new_results?);
589 results.par_extend(new_results?);
593
590
594 Ok(())
591 Ok(())
595 }
592 }
596
593
597 /// Stat all entries in the `DirstateMap` and mark them for dispatch.
594 /// Stat all entries in the `DirstateMap` and mark them for dispatch.
598 fn stat_dmap_entries(
595 fn stat_dmap_entries(
599 dmap: &DirstateMap,
596 dmap: &DirstateMap,
600 root_dir: impl AsRef<Path> + Sync + Send,
597 root_dir: impl AsRef<Path> + Sync + Send,
601 options: StatusOptions,
598 options: StatusOptions,
602 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
599 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
603 dmap.par_iter().map(move |(filename, entry)| {
600 dmap.par_iter().map(move |(filename, entry)| {
604 let filename: &HgPath = filename;
601 let filename: &HgPath = filename;
605 let filename_as_path = hg_path_to_path_buf(filename)?;
602 let filename_as_path = hg_path_to_path_buf(filename)?;
606 let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
603 let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
607
604
608 match meta {
605 match meta {
609 Ok(ref m)
606 Ok(ref m)
610 if !(m.file_type().is_file()
607 if !(m.file_type().is_file()
611 || m.file_type().is_symlink()) =>
608 || m.file_type().is_symlink()) =>
612 {
609 {
613 Ok((filename, dispatch_missing(entry.state)))
610 Ok((filename, dispatch_missing(entry.state)))
614 }
611 }
615 Ok(m) => Ok((
612 Ok(m) => Ok((
616 filename,
613 filename,
617 dispatch_found(
614 dispatch_found(
618 filename,
615 filename,
619 *entry,
616 *entry,
620 HgMetadata::from_metadata(m),
617 HgMetadata::from_metadata(m),
621 &dmap.copy_map,
618 &dmap.copy_map,
622 options,
619 options,
623 ),
620 ),
624 )),
621 )),
625 Err(ref e)
622 Err(ref e)
626 if e.kind() == ErrorKind::NotFound
623 if e.kind() == ErrorKind::NotFound
627 || e.raw_os_error() == Some(20) =>
624 || e.raw_os_error() == Some(20) =>
628 {
625 {
629 // Rust does not yet have an `ErrorKind` for
626 // Rust does not yet have an `ErrorKind` for
630 // `NotADirectory` (errno 20)
627 // `NotADirectory` (errno 20)
631 // It happens if the dirstate contains `foo/bar` and
628 // It happens if the dirstate contains `foo/bar` and
632 // foo is not a directory
629 // foo is not a directory
633 Ok((filename, dispatch_missing(entry.state)))
630 Ok((filename, dispatch_missing(entry.state)))
634 }
631 }
635 Err(e) => Err(e),
632 Err(e) => Err(e),
636 }
633 }
637 })
634 })
638 }
635 }
639
636
640 /// This takes a mutable reference to the results to account for the `extend`
637 /// This takes a mutable reference to the results to account for the `extend`
641 /// in timings
638 /// in timings
642 #[timed]
639 #[timed]
643 fn extend_from_dmap<'a>(
640 fn extend_from_dmap<'a>(
644 dmap: &'a DirstateMap,
641 dmap: &'a DirstateMap,
645 root_dir: impl AsRef<Path> + Sync + Send,
642 root_dir: impl AsRef<Path> + Sync + Send,
646 options: StatusOptions,
643 options: StatusOptions,
647 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
644 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
648 ) {
645 ) {
649 results.par_extend(
646 results.par_extend(
650 stat_dmap_entries(dmap, root_dir, options)
647 stat_dmap_entries(dmap, root_dir, options)
651 .flatten()
648 .flatten()
652 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch)),
649 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch)),
653 );
650 );
654 }
651 }
655
652
656 #[derive(Debug)]
653 #[derive(Debug)]
657 pub struct DirstateStatus<'a> {
654 pub struct DirstateStatus<'a> {
658 pub modified: Vec<Cow<'a, HgPath>>,
655 pub modified: Vec<Cow<'a, HgPath>>,
659 pub added: Vec<Cow<'a, HgPath>>,
656 pub added: Vec<Cow<'a, HgPath>>,
660 pub removed: Vec<Cow<'a, HgPath>>,
657 pub removed: Vec<Cow<'a, HgPath>>,
661 pub deleted: Vec<Cow<'a, HgPath>>,
658 pub deleted: Vec<Cow<'a, HgPath>>,
662 pub clean: Vec<Cow<'a, HgPath>>,
659 pub clean: Vec<Cow<'a, HgPath>>,
663 pub ignored: Vec<Cow<'a, HgPath>>,
660 pub ignored: Vec<Cow<'a, HgPath>>,
664 pub unknown: Vec<Cow<'a, HgPath>>,
661 pub unknown: Vec<Cow<'a, HgPath>>,
665 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
662 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
666 /// Only filled if `collect_traversed_dirs` is `true`
663 /// Only filled if `collect_traversed_dirs` is `true`
667 pub traversed: Vec<HgPathBuf>,
664 pub traversed: Vec<HgPathBuf>,
668 }
665 }
669
666
670 #[timed]
667 #[timed]
671 fn build_response<'a>(
668 fn build_response<'a>(
672 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
669 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
673 traversed: Vec<HgPathBuf>,
670 traversed: Vec<HgPathBuf>,
674 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
671 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
675 let mut lookup = vec![];
672 let mut lookup = vec![];
676 let mut modified = vec![];
673 let mut modified = vec![];
677 let mut added = vec![];
674 let mut added = vec![];
678 let mut removed = vec![];
675 let mut removed = vec![];
679 let mut deleted = vec![];
676 let mut deleted = vec![];
680 let mut clean = vec![];
677 let mut clean = vec![];
681 let mut ignored = vec![];
678 let mut ignored = vec![];
682 let mut unknown = vec![];
679 let mut unknown = vec![];
683 let mut bad = vec![];
680 let mut bad = vec![];
684
681
685 for (filename, dispatch) in results.into_iter() {
682 for (filename, dispatch) in results.into_iter() {
686 match dispatch {
683 match dispatch {
687 Dispatch::Unknown => unknown.push(filename),
684 Dispatch::Unknown => unknown.push(filename),
688 Dispatch::Unsure => lookup.push(filename),
685 Dispatch::Unsure => lookup.push(filename),
689 Dispatch::Modified => modified.push(filename),
686 Dispatch::Modified => modified.push(filename),
690 Dispatch::Added => added.push(filename),
687 Dispatch::Added => added.push(filename),
691 Dispatch::Removed => removed.push(filename),
688 Dispatch::Removed => removed.push(filename),
692 Dispatch::Deleted => deleted.push(filename),
689 Dispatch::Deleted => deleted.push(filename),
693 Dispatch::Clean => clean.push(filename),
690 Dispatch::Clean => clean.push(filename),
694 Dispatch::Ignored => ignored.push(filename),
691 Dispatch::Ignored => ignored.push(filename),
695 Dispatch::None => {}
692 Dispatch::None => {}
696 Dispatch::Bad(reason) => bad.push((filename, reason)),
693 Dispatch::Bad(reason) => bad.push((filename, reason)),
697 Dispatch::Directory { .. } => {}
694 Dispatch::Directory { .. } => {}
698 }
695 }
699 }
696 }
700
697
701 (
698 (
702 lookup,
699 lookup,
703 DirstateStatus {
700 DirstateStatus {
704 modified,
701 modified,
705 added,
702 added,
706 removed,
703 removed,
707 deleted,
704 deleted,
708 clean,
705 clean,
709 ignored,
706 ignored,
710 unknown,
707 unknown,
711 bad,
708 bad,
712 traversed,
709 traversed,
713 },
710 },
714 )
711 )
715 }
712 }
716
713
717 #[derive(Debug)]
714 #[derive(Debug)]
718 pub enum StatusError {
715 pub enum StatusError {
719 IO(std::io::Error),
716 IO(std::io::Error),
720 Path(HgPathError),
717 Path(HgPathError),
721 Pattern(PatternError),
718 Pattern(PatternError),
722 }
719 }
723
720
724 pub type StatusResult<T> = Result<T, StatusError>;
721 pub type StatusResult<T> = Result<T, StatusError>;
725
722
726 impl From<PatternError> for StatusError {
723 impl From<PatternError> for StatusError {
727 fn from(e: PatternError) -> Self {
724 fn from(e: PatternError) -> Self {
728 StatusError::Pattern(e)
725 StatusError::Pattern(e)
729 }
726 }
730 }
727 }
731 impl From<HgPathError> for StatusError {
728 impl From<HgPathError> for StatusError {
732 fn from(e: HgPathError) -> Self {
729 fn from(e: HgPathError) -> Self {
733 StatusError::Path(e)
730 StatusError::Path(e)
734 }
731 }
735 }
732 }
736 impl From<std::io::Error> for StatusError {
733 impl From<std::io::Error> for StatusError {
737 fn from(e: std::io::Error) -> Self {
734 fn from(e: std::io::Error) -> Self {
738 StatusError::IO(e)
735 StatusError::IO(e)
739 }
736 }
740 }
737 }
741
738
742 impl ToString for StatusError {
739 impl ToString for StatusError {
743 fn to_string(&self) -> String {
740 fn to_string(&self) -> String {
744 match self {
741 match self {
745 StatusError::IO(e) => e.to_string(),
742 StatusError::IO(e) => e.to_string(),
746 StatusError::Path(e) => e.to_string(),
743 StatusError::Path(e) => e.to_string(),
747 StatusError::Pattern(e) => e.to_string(),
744 StatusError::Pattern(e) => e.to_string(),
748 }
745 }
749 }
746 }
750 }
747 }
751
748
752 /// This takes a mutable reference to the results to account for the `extend`
749 /// This takes a mutable reference to the results to account for the `extend`
753 /// in timings
750 /// in timings
754 #[timed]
751 #[timed]
755 fn handle_unknowns<'a>(
752 fn handle_unknowns<'a>(
756 dmap: &'a DirstateMap,
753 dmap: &'a DirstateMap,
757 matcher: &(impl Matcher + Sync),
754 matcher: &(impl Matcher + Sync),
758 root_dir: impl AsRef<Path> + Sync + Send + Copy,
755 root_dir: impl AsRef<Path> + Sync + Send + Copy,
759 options: StatusOptions,
756 options: StatusOptions,
760 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
757 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
761 ) -> IoResult<()> {
758 ) -> IoResult<()> {
762 let to_visit: Vec<(&HgPath, &DirstateEntry)> = if results.is_empty()
759 let to_visit: Vec<(&HgPath, &DirstateEntry)> = if results.is_empty()
763 && matcher.matches_everything()
760 && matcher.matches_everything()
764 {
761 {
765 dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
762 dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
766 } else {
763 } else {
767 // Only convert to a hashmap if needed.
764 // Only convert to a hashmap if needed.
768 let old_results: FastHashMap<_, _> = results.iter().cloned().collect();
765 let old_results: FastHashMap<_, _> = results.iter().cloned().collect();
769 dmap.iter()
766 dmap.iter()
770 .filter_map(move |(f, e)| {
767 .filter_map(move |(f, e)| {
771 if !old_results.contains_key(f.deref()) && matcher.matches(f) {
768 if !old_results.contains_key(f.deref()) && matcher.matches(f) {
772 Some((f.deref(), e))
769 Some((f.deref(), e))
773 } else {
770 } else {
774 None
771 None
775 }
772 }
776 })
773 })
777 .collect()
774 .collect()
778 };
775 };
779
776
780 // We walked all dirs under the roots that weren't ignored, and
777 // We walked all dirs under the roots that weren't ignored, and
781 // everything that matched was stat'ed and is already in results.
778 // everything that matched was stat'ed and is already in results.
782 // The rest must thus be ignored or under a symlink.
779 // The rest must thus be ignored or under a symlink.
783 let path_auditor = PathAuditor::new(root_dir);
780 let path_auditor = PathAuditor::new(root_dir);
784
781
785 // TODO don't collect. Find a way of replicating the behavior of
782 // TODO don't collect. Find a way of replicating the behavior of
786 // `itertools::process_results`, but for `rayon::ParallelIterator`
783 // `itertools::process_results`, but for `rayon::ParallelIterator`
787 let new_results: IoResult<Vec<_>> = to_visit
784 let new_results: IoResult<Vec<_>> = to_visit
788 .into_par_iter()
785 .into_par_iter()
789 .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
786 .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
790 // Report ignored items in the dmap as long as they are not
787 // Report ignored items in the dmap as long as they are not
791 // under a symlink directory.
788 // under a symlink directory.
792 if path_auditor.check(filename) {
789 if path_auditor.check(filename) {
793 // TODO normalize for case-insensitive filesystems
790 // TODO normalize for case-insensitive filesystems
794 let buf = match hg_path_to_path_buf(filename) {
791 let buf = match hg_path_to_path_buf(filename) {
795 Ok(x) => x,
792 Ok(x) => x,
796 Err(e) => return Some(Err(e.into())),
793 Err(e) => return Some(Err(e.into())),
797 };
794 };
798 Some(Ok((
795 Some(Ok((
799 Cow::Borrowed(filename),
796 Cow::Borrowed(filename),
800 match root_dir.as_ref().join(&buf).symlink_metadata() {
797 match root_dir.as_ref().join(&buf).symlink_metadata() {
801 // File was just ignored, no links, and exists
798 // File was just ignored, no links, and exists
802 Ok(meta) => {
799 Ok(meta) => {
803 let metadata = HgMetadata::from_metadata(meta);
800 let metadata = HgMetadata::from_metadata(meta);
804 dispatch_found(
801 dispatch_found(
805 filename,
802 filename,
806 *entry,
803 *entry,
807 metadata,
804 metadata,
808 &dmap.copy_map,
805 &dmap.copy_map,
809 options,
806 options,
810 )
807 )
811 }
808 }
812 // File doesn't exist
809 // File doesn't exist
813 Err(_) => dispatch_missing(entry.state),
810 Err(_) => dispatch_missing(entry.state),
814 },
811 },
815 )))
812 )))
816 } else {
813 } else {
817 // It's either missing or under a symlink directory which
814 // It's either missing or under a symlink directory which
818 // we, in this case, report as missing.
815 // we, in this case, report as missing.
819 Some(Ok((
816 Some(Ok((
820 Cow::Borrowed(filename),
817 Cow::Borrowed(filename),
821 dispatch_missing(entry.state),
818 dispatch_missing(entry.state),
822 )))
819 )))
823 }
820 }
824 })
821 })
825 .collect();
822 .collect();
826
823
827 results.par_extend(new_results?);
824 results.par_extend(new_results?);
828
825
829 Ok(())
826 Ok(())
830 }
827 }
831
828
832 /// Get the status of files in the working directory.
829 /// Get the status of files in the working directory.
833 ///
830 ///
834 /// This is the current entry-point for `hg-core` and is realistically unusable
831 /// This is the current entry-point for `hg-core` and is realistically unusable
835 /// outside of a Python context because its arguments need to provide a lot of
832 /// outside of a Python context because its arguments need to provide a lot of
836 /// information that will not be necessary in the future.
833 /// information that will not be necessary in the future.
837 #[timed]
834 #[timed]
838 pub fn status<'a: 'c, 'b: 'c, 'c>(
835 pub fn status<'a: 'c, 'b: 'c, 'c>(
839 dmap: &'a DirstateMap,
836 dmap: &'a DirstateMap,
840 matcher: &'b (impl Matcher + Sync),
837 matcher: &'b (impl Matcher + Sync),
841 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
838 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
842 ignore_files: Vec<PathBuf>,
839 ignore_files: Vec<PathBuf>,
843 options: StatusOptions,
840 options: StatusOptions,
844 ) -> StatusResult<(
841 ) -> StatusResult<(
845 (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
842 (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
846 Vec<PatternFileWarning>,
843 Vec<PatternFileWarning>,
847 )> {
844 )> {
848 // Needs to outlive `dir_ignore_fn` since it's captured.
845 // Needs to outlive `dir_ignore_fn` since it's captured.
849 let ignore_fn: IgnoreFnType;
846 let ignore_fn: IgnoreFnType;
850
847
851 // Only involve real ignore mechanism if we're listing unknowns or ignored.
848 // Only involve real ignore mechanism if we're listing unknowns or ignored.
852 let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored
849 let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored
853 || options.list_unknown
850 || options.list_unknown
854 {
851 {
855 let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?;
852 let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?;
856
853
857 ignore_fn = ignore;
854 ignore_fn = ignore;
858 let dir_ignore_fn = Box::new(|dir: &_| {
855 let dir_ignore_fn = Box::new(|dir: &_| {
859 // Is the path or one of its ancestors ignored?
856 // Is the path or one of its ancestors ignored?
860 if ignore_fn(dir) {
857 if ignore_fn(dir) {
861 true
858 true
862 } else {
859 } else {
863 for p in find_dirs(dir) {
860 for p in find_dirs(dir) {
864 if ignore_fn(p) {
861 if ignore_fn(p) {
865 return true;
862 return true;
866 }
863 }
867 }
864 }
868 false
865 false
869 }
866 }
870 });
867 });
871 (dir_ignore_fn, warnings)
868 (dir_ignore_fn, warnings)
872 } else {
869 } else {
873 ignore_fn = Box::new(|&_| true);
870 ignore_fn = Box::new(|&_| true);
874 (Box::new(|&_| true), vec![])
871 (Box::new(|&_| true), vec![])
875 };
872 };
876
873
877 let files = matcher.file_set();
874 let files = matcher.file_set();
878
875
879 // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
876 // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
880 let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
877 let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
881
878
882 // Step 1: check the files explicitly mentioned by the user
879 // Step 1: check the files explicitly mentioned by the user
883 let explicit = walk_explicit(
880 let explicit = walk_explicit(
884 files,
881 files,
885 &dmap,
882 &dmap,
886 root_dir,
883 root_dir,
887 options,
884 options,
888 traversed_sender.clone(),
885 traversed_sender.clone(),
889 );
886 );
890
887
891 // Collect results into a `Vec` because we do very few lookups in most
888 // Collect results into a `Vec` because we do very few lookups in most
892 // cases.
889 // cases.
893 let (work, mut results): (Vec<_>, Vec<_>) = explicit
890 let (work, mut results): (Vec<_>, Vec<_>) = explicit
894 .filter_map(Result::ok)
891 .filter_map(Result::ok)
895 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
892 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
896 .partition(|(_, dispatch)| match dispatch {
893 .partition(|(_, dispatch)| match dispatch {
897 Dispatch::Directory { .. } => true,
894 Dispatch::Directory { .. } => true,
898 _ => false,
895 _ => false,
899 });
896 });
900
897
901 if !work.is_empty() {
898 if !work.is_empty() {
902 // Hashmaps are quite a bit slower to build than vecs, so only build it
899 // Hashmaps are quite a bit slower to build than vecs, so only build it
903 // if needed.
900 // if needed.
904 let old_results = results.iter().cloned().collect();
901 let old_results = results.iter().cloned().collect();
905
902
906 // Step 2: recursively check the working directory for changes if
903 // Step 2: recursively check the working directory for changes if
907 // needed
904 // needed
908 for (dir, dispatch) in work {
905 for (dir, dispatch) in work {
909 match dispatch {
906 match dispatch {
910 Dispatch::Directory { was_file } => {
907 Dispatch::Directory { was_file } => {
911 if was_file {
908 if was_file {
912 results.push((dir.to_owned(), Dispatch::Removed));
909 results.push((dir.to_owned(), Dispatch::Removed));
913 }
910 }
914 if options.list_ignored
911 if options.list_ignored
915 || options.list_unknown && !dir_ignore_fn(&dir)
912 || options.list_unknown && !dir_ignore_fn(&dir)
916 {
913 {
917 traverse(
914 traverse(
918 matcher,
915 matcher,
919 root_dir,
916 root_dir,
920 &dmap,
917 &dmap,
921 &dir,
918 &dir,
922 &old_results,
919 &old_results,
923 &ignore_fn,
920 &ignore_fn,
924 &dir_ignore_fn,
921 &dir_ignore_fn,
925 options,
922 options,
926 &mut results,
923 &mut results,
927 traversed_sender.clone(),
924 traversed_sender.clone(),
928 )?;
925 )?;
929 }
926 }
930 }
927 }
931 _ => unreachable!("There can only be directories in `work`"),
928 _ => unreachable!("There can only be directories in `work`"),
932 }
929 }
933 }
930 }
934 }
931 }
935
932
936 if !matcher.is_exact() {
933 if !matcher.is_exact() {
937 // Step 3: Check the remaining files from the dmap.
934 // Step 3: Check the remaining files from the dmap.
938 // If a dmap file is not in results yet, it was either
935 // If a dmap file is not in results yet, it was either
939 // a) not matched b) ignored, c) missing, or d) under a
936 // a) not matched b) ignored, c) missing, or d) under a
940 // symlink directory.
937 // symlink directory.
941
938
942 if options.list_unknown {
939 if options.list_unknown {
943 handle_unknowns(dmap, matcher, root_dir, options, &mut results)?;
940 handle_unknowns(dmap, matcher, root_dir, options, &mut results)?;
944 } else {
941 } else {
945 // We may not have walked the full directory tree above, so stat
942 // We may not have walked the full directory tree above, so stat
946 // and check everything we missed.
943 // and check everything we missed.
947 extend_from_dmap(&dmap, root_dir, options, &mut results);
944 extend_from_dmap(&dmap, root_dir, options, &mut results);
948 }
945 }
949 }
946 }
950
947
951 // Close the channel
948 // Close the channel
952 drop(traversed_sender);
949 drop(traversed_sender);
953 let traversed_dirs = traversed_recv.into_iter().collect();
950 let traversed_dirs = traversed_recv.into_iter().collect();
954
951
955 Ok((build_response(results, traversed_dirs), warnings))
952 Ok((build_response(results, traversed_dirs), warnings))
956 }
953 }
@@ -1,695 +1,695
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
15 use rand::seq::SliceRandom;
15 use rand::seq::SliceRandom;
16 use rand::{thread_rng, RngCore, SeedableRng};
16 use rand::{thread_rng, RngCore, SeedableRng};
17 use std::cmp::{max, min};
17 use std::cmp::{max, min};
18 use std::collections::{HashSet, VecDeque};
18 use std::collections::{HashSet, VecDeque};
19
19
20 type Rng = rand_pcg::Pcg32;
20 type Rng = rand_pcg::Pcg32;
21 type Seed = [u8; 16];
21 type Seed = [u8; 16];
22
22
23 pub struct PartialDiscovery<G: Graph + Clone> {
23 pub struct PartialDiscovery<G: Graph + Clone> {
24 target_heads: Option<Vec<Revision>>,
24 target_heads: Option<Vec<Revision>>,
25 graph: G, // plays the role of self._repo
25 graph: G, // plays the role of self._repo
26 common: MissingAncestors<G>,
26 common: MissingAncestors<G>,
27 undecided: Option<HashSet<Revision>>,
27 undecided: Option<HashSet<Revision>>,
28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
29 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
30 rng: Rng,
30 rng: Rng,
31 respect_size: bool,
31 respect_size: bool,
32 randomize: bool,
32 randomize: bool,
33 }
33 }
34
34
35 pub struct DiscoveryStats {
35 pub struct DiscoveryStats {
36 pub undecided: Option<usize>,
36 pub undecided: Option<usize>,
37 }
37 }
38
38
39 /// Update an existing sample to match the expected size
39 /// Update an existing sample to match the expected size
40 ///
40 ///
41 /// The sample is updated with revisions exponentially distant from each
41 /// The sample is updated with revisions exponentially distant from each
42 /// element of `heads`.
42 /// element of `heads`.
43 ///
43 ///
44 /// If a target size is specified, the sampling will stop once this size is
44 /// If a target size is specified, the sampling will stop once this size is
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
46 /// reached.
46 /// reached.
47 ///
47 ///
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
49 /// represented by `parentfn`
49 /// represented by `parentfn`
50 /// - `heads`: set of DAG head revs
50 /// - `heads`: set of DAG head revs
51 /// - `sample`: a sample to update
51 /// - `sample`: a sample to update
52 /// - `parentfn`: a callable to resolve parents for a revision
52 /// - `parentfn`: a callable to resolve parents for a revision
53 /// - `quicksamplesize`: optional target size of the sample
53 /// - `quicksamplesize`: optional target size of the sample
54 fn update_sample<I>(
54 fn update_sample<I>(
55 revs: Option<&HashSet<Revision>>,
55 revs: Option<&HashSet<Revision>>,
56 heads: impl IntoIterator<Item = Revision>,
56 heads: impl IntoIterator<Item = Revision>,
57 sample: &mut HashSet<Revision>,
57 sample: &mut HashSet<Revision>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
59 quicksamplesize: Option<usize>,
59 quicksamplesize: Option<usize>,
60 ) -> Result<(), GraphError>
60 ) -> Result<(), GraphError>
61 where
61 where
62 I: Iterator<Item = Revision>,
62 I: Iterator<Item = Revision>,
63 {
63 {
64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
66 let mut factor: u32 = 1;
66 let mut factor: u32 = 1;
67 let mut seen: HashSet<Revision> = HashSet::new();
67 let mut seen: HashSet<Revision> = HashSet::new();
68 while let Some(current) = visit.pop_front() {
68 while let Some(current) = visit.pop_front() {
69 if !seen.insert(current) {
69 if !seen.insert(current) {
70 continue;
70 continue;
71 }
71 }
72
72
73 let d = *distances.entry(current).or_insert(1);
73 let d = *distances.entry(current).or_insert(1);
74 if d > factor {
74 if d > factor {
75 factor *= 2;
75 factor *= 2;
76 }
76 }
77 if d == factor {
77 if d == factor {
78 sample.insert(current);
78 sample.insert(current);
79 if let Some(sz) = quicksamplesize {
79 if let Some(sz) = quicksamplesize {
80 if sample.len() >= sz {
80 if sample.len() >= sz {
81 return Ok(());
81 return Ok(());
82 }
82 }
83 }
83 }
84 }
84 }
85 for p in parentsfn(current)? {
85 for p in parentsfn(current)? {
86 if let Some(revs) = revs {
86 if let Some(revs) = revs {
87 if !revs.contains(&p) {
87 if !revs.contains(&p) {
88 continue;
88 continue;
89 }
89 }
90 }
90 }
91 distances.entry(p).or_insert(d + 1);
91 distances.entry(p).or_insert(d + 1);
92 visit.push_back(p);
92 visit.push_back(p);
93 }
93 }
94 }
94 }
95 Ok(())
95 Ok(())
96 }
96 }
97
97
98 struct ParentsIterator {
98 struct ParentsIterator {
99 parents: [Revision; 2],
99 parents: [Revision; 2],
100 cur: usize,
100 cur: usize,
101 }
101 }
102
102
103 impl ParentsIterator {
103 impl ParentsIterator {
104 fn graph_parents(
104 fn graph_parents(
105 graph: &impl Graph,
105 graph: &impl Graph,
106 r: Revision,
106 r: Revision,
107 ) -> Result<ParentsIterator, GraphError> {
107 ) -> Result<ParentsIterator, GraphError> {
108 Ok(ParentsIterator {
108 Ok(ParentsIterator {
109 parents: graph.parents(r)?,
109 parents: graph.parents(r)?,
110 cur: 0,
110 cur: 0,
111 })
111 })
112 }
112 }
113 }
113 }
114
114
115 impl Iterator for ParentsIterator {
115 impl Iterator for ParentsIterator {
116 type Item = Revision;
116 type Item = Revision;
117
117
118 fn next(&mut self) -> Option<Revision> {
118 fn next(&mut self) -> Option<Revision> {
119 if self.cur > 1 {
119 if self.cur > 1 {
120 return None;
120 return None;
121 }
121 }
122 let rev = self.parents[self.cur];
122 let rev = self.parents[self.cur];
123 self.cur += 1;
123 self.cur += 1;
124 if rev == NULL_REVISION {
124 if rev == NULL_REVISION {
125 return self.next();
125 return self.next();
126 }
126 }
127 Some(rev)
127 Some(rev)
128 }
128 }
129 }
129 }
130
130
131 impl<G: Graph + Clone> PartialDiscovery<G> {
131 impl<G: Graph + Clone> PartialDiscovery<G> {
132 /// Create a PartialDiscovery object, with the intent
132 /// Create a PartialDiscovery object, with the intent
133 /// of comparing our `::<target_heads>` revset to the contents of another
133 /// of comparing our `::<target_heads>` revset to the contents of another
134 /// repo.
134 /// repo.
135 ///
135 ///
136 /// For now `target_heads` is passed as a vector, and will be used
136 /// For now `target_heads` is passed as a vector, and will be used
137 /// at the first call to `ensure_undecided()`.
137 /// at the first call to `ensure_undecided()`.
138 ///
138 ///
139 /// If we want to make the signature more flexible,
139 /// If we want to make the signature more flexible,
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
141 /// object since we'll keep it in the meanwhile
141 /// object since we'll keep it in the meanwhile
142 ///
142 ///
143 /// The `respect_size` boolean controls how the sampling methods
143 /// The `respect_size` boolean controls how the sampling methods
144 /// will interpret the size argument requested by the caller. If it's
144 /// will interpret the size argument requested by the caller. If it's
145 /// `false`, they are allowed to produce a sample whose size is more
145 /// `false`, they are allowed to produce a sample whose size is more
146 /// appropriate to the situation (typically bigger).
146 /// appropriate to the situation (typically bigger).
147 ///
147 ///
148 /// The `randomize` boolean affects sampling, and specifically how
148 /// The `randomize` boolean affects sampling, and specifically how
149 /// limiting or last-minute expanding is been done:
149 /// limiting or last-minute expanding is been done:
150 ///
150 ///
151 /// If `true`, both will perform random picking from `self.undecided`.
151 /// If `true`, both will perform random picking from `self.undecided`.
152 /// This is currently the best for actual discoveries.
152 /// This is currently the best for actual discoveries.
153 ///
153 ///
154 /// If `false`, a reproductible picking strategy is performed. This is
154 /// If `false`, a reproductible picking strategy is performed. This is
155 /// useful for integration tests.
155 /// useful for integration tests.
156 pub fn new(
156 pub fn new(
157 graph: G,
157 graph: G,
158 target_heads: Vec<Revision>,
158 target_heads: Vec<Revision>,
159 respect_size: bool,
159 respect_size: bool,
160 randomize: bool,
160 randomize: bool,
161 ) -> Self {
161 ) -> Self {
162 let mut seed = [0; 16];
162 let mut seed = [0; 16];
163 if randomize {
163 if randomize {
164 thread_rng().fill_bytes(&mut seed);
164 thread_rng().fill_bytes(&mut seed);
165 }
165 }
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
167 }
167 }
168
168
169 pub fn new_with_seed(
169 pub fn new_with_seed(
170 graph: G,
170 graph: G,
171 target_heads: Vec<Revision>,
171 target_heads: Vec<Revision>,
172 seed: Seed,
172 seed: Seed,
173 respect_size: bool,
173 respect_size: bool,
174 randomize: bool,
174 randomize: bool,
175 ) -> Self {
175 ) -> Self {
176 PartialDiscovery {
176 PartialDiscovery {
177 undecided: None,
177 undecided: None,
178 children_cache: None,
178 children_cache: None,
179 target_heads: Some(target_heads),
179 target_heads: Some(target_heads),
180 graph: graph.clone(),
180 graph: graph.clone(),
181 common: MissingAncestors::new(graph, vec![]),
181 common: MissingAncestors::new(graph, vec![]),
182 missing: HashSet::new(),
182 missing: HashSet::new(),
183 rng: Rng::from_seed(seed),
183 rng: Rng::from_seed(seed),
184 respect_size: respect_size,
184 respect_size,
185 randomize: randomize,
185 randomize,
186 }
186 }
187 }
187 }
188
188
189 /// Extract at most `size` random elements from sample and return them
189 /// Extract at most `size` random elements from sample and return them
190 /// as a vector
190 /// as a vector
191 fn limit_sample(
191 fn limit_sample(
192 &mut self,
192 &mut self,
193 mut sample: Vec<Revision>,
193 mut sample: Vec<Revision>,
194 size: usize,
194 size: usize,
195 ) -> Vec<Revision> {
195 ) -> Vec<Revision> {
196 if !self.randomize {
196 if !self.randomize {
197 sample.sort();
197 sample.sort();
198 sample.truncate(size);
198 sample.truncate(size);
199 return sample;
199 return sample;
200 }
200 }
201 let sample_len = sample.len();
201 let sample_len = sample.len();
202 if sample_len <= size {
202 if sample_len <= size {
203 return sample;
203 return sample;
204 }
204 }
205 let rng = &mut self.rng;
205 let rng = &mut self.rng;
206 let dropped_size = sample_len - size;
206 let dropped_size = sample_len - size;
207 let limited_slice = if size < dropped_size {
207 let limited_slice = if size < dropped_size {
208 sample.partial_shuffle(rng, size).0
208 sample.partial_shuffle(rng, size).0
209 } else {
209 } else {
210 sample.partial_shuffle(rng, dropped_size).1
210 sample.partial_shuffle(rng, dropped_size).1
211 };
211 };
212 limited_slice.to_owned()
212 limited_slice.to_owned()
213 }
213 }
214
214
215 /// Register revisions known as being common
215 /// Register revisions known as being common
216 pub fn add_common_revisions(
216 pub fn add_common_revisions(
217 &mut self,
217 &mut self,
218 common: impl IntoIterator<Item = Revision>,
218 common: impl IntoIterator<Item = Revision>,
219 ) -> Result<(), GraphError> {
219 ) -> Result<(), GraphError> {
220 let before_len = self.common.get_bases().len();
220 let before_len = self.common.get_bases().len();
221 self.common.add_bases(common);
221 self.common.add_bases(common);
222 if self.common.get_bases().len() == before_len {
222 if self.common.get_bases().len() == before_len {
223 return Ok(());
223 return Ok(());
224 }
224 }
225 if let Some(ref mut undecided) = self.undecided {
225 if let Some(ref mut undecided) = self.undecided {
226 self.common.remove_ancestors_from(undecided)?;
226 self.common.remove_ancestors_from(undecided)?;
227 }
227 }
228 Ok(())
228 Ok(())
229 }
229 }
230
230
231 /// Register revisions known as being missing
231 /// Register revisions known as being missing
232 ///
232 ///
233 /// # Performance note
233 /// # Performance note
234 ///
234 ///
235 /// Except in the most trivial case, the first call of this method has
235 /// Except in the most trivial case, the first call of this method has
236 /// the side effect of computing `self.undecided` set for the first time,
236 /// the side effect of computing `self.undecided` set for the first time,
237 /// and the related caches it might need for efficiency of its internal
237 /// and the related caches it might need for efficiency of its internal
238 /// computation. This is typically faster if more information is
238 /// computation. This is typically faster if more information is
239 /// available in `self.common`. Therefore, for good performance, the
239 /// available in `self.common`. Therefore, for good performance, the
240 /// caller should avoid calling this too early.
240 /// caller should avoid calling this too early.
241 pub fn add_missing_revisions(
241 pub fn add_missing_revisions(
242 &mut self,
242 &mut self,
243 missing: impl IntoIterator<Item = Revision>,
243 missing: impl IntoIterator<Item = Revision>,
244 ) -> Result<(), GraphError> {
244 ) -> Result<(), GraphError> {
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
246 if tovisit.is_empty() {
246 if tovisit.is_empty() {
247 return Ok(());
247 return Ok(());
248 }
248 }
249 self.ensure_children_cache()?;
249 self.ensure_children_cache()?;
250 self.ensure_undecided()?; // for safety of possible future refactors
250 self.ensure_undecided()?; // for safety of possible future refactors
251 let children = self.children_cache.as_ref().unwrap();
251 let children = self.children_cache.as_ref().unwrap();
252 let mut seen: HashSet<Revision> = HashSet::new();
252 let mut seen: HashSet<Revision> = HashSet::new();
253 let undecided_mut = self.undecided.as_mut().unwrap();
253 let undecided_mut = self.undecided.as_mut().unwrap();
254 while let Some(rev) = tovisit.pop_front() {
254 while let Some(rev) = tovisit.pop_front() {
255 if !self.missing.insert(rev) {
255 if !self.missing.insert(rev) {
256 // either it's known to be missing from a previous
256 // either it's known to be missing from a previous
257 // invocation, and there's no need to iterate on its
257 // invocation, and there's no need to iterate on its
258 // children (we now they are all missing)
258 // children (we now they are all missing)
259 // or it's from a previous iteration of this loop
259 // or it's from a previous iteration of this loop
260 // and its children have already been queued
260 // and its children have already been queued
261 continue;
261 continue;
262 }
262 }
263 undecided_mut.remove(&rev);
263 undecided_mut.remove(&rev);
264 match children.get(&rev) {
264 match children.get(&rev) {
265 None => {
265 None => {
266 continue;
266 continue;
267 }
267 }
268 Some(this_children) => {
268 Some(this_children) => {
269 for child in this_children.iter().cloned() {
269 for child in this_children.iter().cloned() {
270 if seen.insert(child) {
270 if seen.insert(child) {
271 tovisit.push_back(child);
271 tovisit.push_back(child);
272 }
272 }
273 }
273 }
274 }
274 }
275 }
275 }
276 }
276 }
277 Ok(())
277 Ok(())
278 }
278 }
279
279
280 /// Do we have any information about the peer?
280 /// Do we have any information about the peer?
281 pub fn has_info(&self) -> bool {
281 pub fn has_info(&self) -> bool {
282 self.common.has_bases()
282 self.common.has_bases()
283 }
283 }
284
284
285 /// Did we acquire full knowledge of our Revisions that the peer has?
285 /// Did we acquire full knowledge of our Revisions that the peer has?
286 pub fn is_complete(&self) -> bool {
286 pub fn is_complete(&self) -> bool {
287 self.undecided.as_ref().map_or(false, |s| s.is_empty())
287 self.undecided.as_ref().map_or(false, HashSet::is_empty)
288 }
288 }
289
289
290 /// Return the heads of the currently known common set of revisions.
290 /// Return the heads of the currently known common set of revisions.
291 ///
291 ///
292 /// If the discovery process is not complete (see `is_complete()`), the
292 /// If the discovery process is not complete (see `is_complete()`), the
293 /// caller must be aware that this is an intermediate state.
293 /// caller must be aware that this is an intermediate state.
294 ///
294 ///
295 /// On the other hand, if it is complete, then this is currently
295 /// On the other hand, if it is complete, then this is currently
296 /// the only way to retrieve the end results of the discovery process.
296 /// the only way to retrieve the end results of the discovery process.
297 ///
297 ///
298 /// We may introduce in the future an `into_common_heads` call that
298 /// We may introduce in the future an `into_common_heads` call that
299 /// would be more appropriate for normal Rust callers, dropping `self`
299 /// would be more appropriate for normal Rust callers, dropping `self`
300 /// if it is complete.
300 /// if it is complete.
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
302 self.common.bases_heads()
302 self.common.bases_heads()
303 }
303 }
304
304
305 /// Force first computation of `self.undecided`
305 /// Force first computation of `self.undecided`
306 ///
306 ///
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
308 /// unwrapped to get workable immutable or mutable references without
308 /// unwrapped to get workable immutable or mutable references without
309 /// any panic.
309 /// any panic.
310 ///
310 ///
311 /// This is an imperative call instead of an access with added lazyness
311 /// This is an imperative call instead of an access with added lazyness
312 /// to reduce easily the scope of mutable borrow for the caller,
312 /// to reduce easily the scope of mutable borrow for the caller,
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
314 /// as long as the resulting immutable one.
314 /// as long as the resulting immutable one.
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
316 if self.undecided.is_some() {
316 if self.undecided.is_some() {
317 return Ok(());
317 return Ok(());
318 }
318 }
319 let tgt = self.target_heads.take().unwrap();
319 let tgt = self.target_heads.take().unwrap();
320 self.undecided =
320 self.undecided =
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
322 Ok(())
322 Ok(())
323 }
323 }
324
324
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
326 if self.children_cache.is_some() {
326 if self.children_cache.is_some() {
327 return Ok(());
327 return Ok(());
328 }
328 }
329 self.ensure_undecided()?;
329 self.ensure_undecided()?;
330
330
331 let mut children: FastHashMap<Revision, Vec<Revision>> =
331 let mut children: FastHashMap<Revision, Vec<Revision>> =
332 FastHashMap::default();
332 FastHashMap::default();
333 for &rev in self.undecided.as_ref().unwrap() {
333 for &rev in self.undecided.as_ref().unwrap() {
334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
335 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
335 children.entry(p).or_insert_with(Vec::new).push(rev);
336 }
336 }
337 }
337 }
338 self.children_cache = Some(children);
338 self.children_cache = Some(children);
339 Ok(())
339 Ok(())
340 }
340 }
341
341
342 /// Provide statistics about the current state of the discovery process
342 /// Provide statistics about the current state of the discovery process
343 pub fn stats(&self) -> DiscoveryStats {
343 pub fn stats(&self) -> DiscoveryStats {
344 DiscoveryStats {
344 DiscoveryStats {
345 undecided: self.undecided.as_ref().map(|s| s.len()),
345 undecided: self.undecided.as_ref().map(HashSet::len),
346 }
346 }
347 }
347 }
348
348
349 pub fn take_quick_sample(
349 pub fn take_quick_sample(
350 &mut self,
350 &mut self,
351 headrevs: impl IntoIterator<Item = Revision>,
351 headrevs: impl IntoIterator<Item = Revision>,
352 size: usize,
352 size: usize,
353 ) -> Result<Vec<Revision>, GraphError> {
353 ) -> Result<Vec<Revision>, GraphError> {
354 self.ensure_undecided()?;
354 self.ensure_undecided()?;
355 let mut sample = {
355 let mut sample = {
356 let undecided = self.undecided.as_ref().unwrap();
356 let undecided = self.undecided.as_ref().unwrap();
357 if undecided.len() <= size {
357 if undecided.len() <= size {
358 return Ok(undecided.iter().cloned().collect());
358 return Ok(undecided.iter().cloned().collect());
359 }
359 }
360 dagops::heads(&self.graph, undecided.iter())?
360 dagops::heads(&self.graph, undecided.iter())?
361 };
361 };
362 if sample.len() >= size {
362 if sample.len() >= size {
363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
364 }
364 }
365 update_sample(
365 update_sample(
366 None,
366 None,
367 headrevs,
367 headrevs,
368 &mut sample,
368 &mut sample,
369 |r| ParentsIterator::graph_parents(&self.graph, r),
369 |r| ParentsIterator::graph_parents(&self.graph, r),
370 Some(size),
370 Some(size),
371 )?;
371 )?;
372 Ok(sample.into_iter().collect())
372 Ok(sample.into_iter().collect())
373 }
373 }
374
374
375 /// Extract a sample from `self.undecided`, going from its heads and roots.
375 /// Extract a sample from `self.undecided`, going from its heads and roots.
376 ///
376 ///
377 /// The `size` parameter is used to avoid useless computations if
377 /// The `size` parameter is used to avoid useless computations if
378 /// it turns out to be bigger than the whole set of undecided Revisions.
378 /// it turns out to be bigger than the whole set of undecided Revisions.
379 ///
379 ///
380 /// The sample is taken by using `update_sample` from the heads, then
380 /// The sample is taken by using `update_sample` from the heads, then
381 /// from the roots, working on the reverse DAG,
381 /// from the roots, working on the reverse DAG,
382 /// expressed by `self.children_cache`.
382 /// expressed by `self.children_cache`.
383 ///
383 ///
384 /// No effort is being made to complete or limit the sample to `size`
384 /// No effort is being made to complete or limit the sample to `size`
385 /// but this method returns another interesting size that it derives
385 /// but this method returns another interesting size that it derives
386 /// from its knowledge of the structure of the various sets, leaving
386 /// from its knowledge of the structure of the various sets, leaving
387 /// to the caller the decision to use it or not.
387 /// to the caller the decision to use it or not.
388 fn bidirectional_sample(
388 fn bidirectional_sample(
389 &mut self,
389 &mut self,
390 size: usize,
390 size: usize,
391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
392 self.ensure_undecided()?;
392 self.ensure_undecided()?;
393 {
393 {
394 // we don't want to compute children_cache before this
394 // we don't want to compute children_cache before this
395 // but doing it after extracting self.undecided takes a mutable
395 // but doing it after extracting self.undecided takes a mutable
396 // ref to self while a shareable one is still active.
396 // ref to self while a shareable one is still active.
397 let undecided = self.undecided.as_ref().unwrap();
397 let undecided = self.undecided.as_ref().unwrap();
398 if undecided.len() <= size {
398 if undecided.len() <= size {
399 return Ok((undecided.clone(), size));
399 return Ok((undecided.clone(), size));
400 }
400 }
401 }
401 }
402
402
403 self.ensure_children_cache()?;
403 self.ensure_children_cache()?;
404 let revs = self.undecided.as_ref().unwrap();
404 let revs = self.undecided.as_ref().unwrap();
405 let mut sample: HashSet<Revision> = revs.clone();
405 let mut sample: HashSet<Revision> = revs.clone();
406
406
407 // it's possible that leveraging the children cache would be more
407 // it's possible that leveraging the children cache would be more
408 // efficient here
408 // efficient here
409 dagops::retain_heads(&self.graph, &mut sample)?;
409 dagops::retain_heads(&self.graph, &mut sample)?;
410 let revsheads = sample.clone(); // was again heads(revs) in python
410 let revsheads = sample.clone(); // was again heads(revs) in python
411
411
412 // update from heads
412 // update from heads
413 update_sample(
413 update_sample(
414 Some(revs),
414 Some(revs),
415 revsheads.iter().cloned(),
415 revsheads.iter().cloned(),
416 &mut sample,
416 &mut sample,
417 |r| ParentsIterator::graph_parents(&self.graph, r),
417 |r| ParentsIterator::graph_parents(&self.graph, r),
418 None,
418 None,
419 )?;
419 )?;
420
420
421 // update from roots
421 // update from roots
422 let revroots: HashSet<Revision> =
422 let revroots: HashSet<Revision> =
423 dagops::roots(&self.graph, revs)?.into_iter().collect();
423 dagops::roots(&self.graph, revs)?.into_iter().collect();
424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
425
425
426 let children = self.children_cache.as_ref().unwrap();
426 let children = self.children_cache.as_ref().unwrap();
427 let empty_vec: Vec<Revision> = Vec::new();
427 let empty_vec: Vec<Revision> = Vec::new();
428 update_sample(
428 update_sample(
429 Some(revs),
429 Some(revs),
430 revroots,
430 revroots,
431 &mut sample,
431 &mut sample,
432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
433 None,
433 None,
434 )?;
434 )?;
435 Ok((sample, prescribed_size))
435 Ok((sample, prescribed_size))
436 }
436 }
437
437
438 /// Fill up sample up to the wished size with random undecided Revisions.
438 /// Fill up sample up to the wished size with random undecided Revisions.
439 ///
439 ///
440 /// This is intended to be used as a last resort completion if the
440 /// This is intended to be used as a last resort completion if the
441 /// regular sampling algorithm returns too few elements.
441 /// regular sampling algorithm returns too few elements.
442 fn random_complete_sample(
442 fn random_complete_sample(
443 &mut self,
443 &mut self,
444 sample: &mut Vec<Revision>,
444 sample: &mut Vec<Revision>,
445 size: usize,
445 size: usize,
446 ) {
446 ) {
447 let sample_len = sample.len();
447 let sample_len = sample.len();
448 if size <= sample_len {
448 if size <= sample_len {
449 return;
449 return;
450 }
450 }
451 let take_from: Vec<Revision> = self
451 let take_from: Vec<Revision> = self
452 .undecided
452 .undecided
453 .as_ref()
453 .as_ref()
454 .unwrap()
454 .unwrap()
455 .iter()
455 .iter()
456 .filter(|&r| !sample.contains(r))
456 .filter(|&r| !sample.contains(r))
457 .cloned()
457 .cloned()
458 .collect();
458 .collect();
459 sample.extend(self.limit_sample(take_from, size - sample_len));
459 sample.extend(self.limit_sample(take_from, size - sample_len));
460 }
460 }
461
461
462 pub fn take_full_sample(
462 pub fn take_full_sample(
463 &mut self,
463 &mut self,
464 size: usize,
464 size: usize,
465 ) -> Result<Vec<Revision>, GraphError> {
465 ) -> Result<Vec<Revision>, GraphError> {
466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
467 let size = if self.respect_size {
467 let size = if self.respect_size {
468 size
468 size
469 } else {
469 } else {
470 prescribed_size
470 prescribed_size
471 };
471 };
472 let mut sample =
472 let mut sample =
473 self.limit_sample(sample_set.into_iter().collect(), size);
473 self.limit_sample(sample_set.into_iter().collect(), size);
474 self.random_complete_sample(&mut sample, size);
474 self.random_complete_sample(&mut sample, size);
475 Ok(sample)
475 Ok(sample)
476 }
476 }
477 }
477 }
478
478
479 #[cfg(test)]
479 #[cfg(test)]
480 mod tests {
480 mod tests {
481 use super::*;
481 use super::*;
482 use crate::testing::SampleGraph;
482 use crate::testing::SampleGraph;
483
483
484 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
484 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
485 ///
485 ///
486 /// To avoid actual randomness in these tests, we give it a fixed
486 /// To avoid actual randomness in these tests, we give it a fixed
487 /// random seed, but by default we'll test the random version.
487 /// random seed, but by default we'll test the random version.
488 fn full_disco() -> PartialDiscovery<SampleGraph> {
488 fn full_disco() -> PartialDiscovery<SampleGraph> {
489 PartialDiscovery::new_with_seed(
489 PartialDiscovery::new_with_seed(
490 SampleGraph,
490 SampleGraph,
491 vec![10, 11, 12, 13],
491 vec![10, 11, 12, 13],
492 [0; 16],
492 [0; 16],
493 true,
493 true,
494 true,
494 true,
495 )
495 )
496 }
496 }
497
497
498 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
498 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
499 ///
499 ///
500 /// To avoid actual randomness in tests, we give it a fixed random seed.
500 /// To avoid actual randomness in tests, we give it a fixed random seed.
501 fn disco12() -> PartialDiscovery<SampleGraph> {
501 fn disco12() -> PartialDiscovery<SampleGraph> {
502 PartialDiscovery::new_with_seed(
502 PartialDiscovery::new_with_seed(
503 SampleGraph,
503 SampleGraph,
504 vec![12],
504 vec![12],
505 [0; 16],
505 [0; 16],
506 true,
506 true,
507 true,
507 true,
508 )
508 )
509 }
509 }
510
510
511 fn sorted_undecided(
511 fn sorted_undecided(
512 disco: &PartialDiscovery<SampleGraph>,
512 disco: &PartialDiscovery<SampleGraph>,
513 ) -> Vec<Revision> {
513 ) -> Vec<Revision> {
514 let mut as_vec: Vec<Revision> =
514 let mut as_vec: Vec<Revision> =
515 disco.undecided.as_ref().unwrap().iter().cloned().collect();
515 disco.undecided.as_ref().unwrap().iter().cloned().collect();
516 as_vec.sort();
516 as_vec.sort();
517 as_vec
517 as_vec
518 }
518 }
519
519
520 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
520 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
521 let mut as_vec: Vec<Revision> =
521 let mut as_vec: Vec<Revision> =
522 disco.missing.iter().cloned().collect();
522 disco.missing.iter().cloned().collect();
523 as_vec.sort();
523 as_vec.sort();
524 as_vec
524 as_vec
525 }
525 }
526
526
527 fn sorted_common_heads(
527 fn sorted_common_heads(
528 disco: &PartialDiscovery<SampleGraph>,
528 disco: &PartialDiscovery<SampleGraph>,
529 ) -> Result<Vec<Revision>, GraphError> {
529 ) -> Result<Vec<Revision>, GraphError> {
530 let mut as_vec: Vec<Revision> =
530 let mut as_vec: Vec<Revision> =
531 disco.common_heads()?.iter().cloned().collect();
531 disco.common_heads()?.iter().cloned().collect();
532 as_vec.sort();
532 as_vec.sort();
533 Ok(as_vec)
533 Ok(as_vec)
534 }
534 }
535
535
536 #[test]
536 #[test]
537 fn test_add_common_get_undecided() -> Result<(), GraphError> {
537 fn test_add_common_get_undecided() -> Result<(), GraphError> {
538 let mut disco = full_disco();
538 let mut disco = full_disco();
539 assert_eq!(disco.undecided, None);
539 assert_eq!(disco.undecided, None);
540 assert!(!disco.has_info());
540 assert!(!disco.has_info());
541 assert_eq!(disco.stats().undecided, None);
541 assert_eq!(disco.stats().undecided, None);
542
542
543 disco.add_common_revisions(vec![11, 12])?;
543 disco.add_common_revisions(vec![11, 12])?;
544 assert!(disco.has_info());
544 assert!(disco.has_info());
545 assert!(!disco.is_complete());
545 assert!(!disco.is_complete());
546 assert!(disco.missing.is_empty());
546 assert!(disco.missing.is_empty());
547
547
548 // add_common_revisions did not trigger a premature computation
548 // add_common_revisions did not trigger a premature computation
549 // of `undecided`, let's check that and ask for them
549 // of `undecided`, let's check that and ask for them
550 assert_eq!(disco.undecided, None);
550 assert_eq!(disco.undecided, None);
551 disco.ensure_undecided()?;
551 disco.ensure_undecided()?;
552 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
552 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
553 assert_eq!(disco.stats().undecided, Some(4));
553 assert_eq!(disco.stats().undecided, Some(4));
554 Ok(())
554 Ok(())
555 }
555 }
556
556
557 /// in this test, we pretend that our peer misses exactly (8+10)::
557 /// in this test, we pretend that our peer misses exactly (8+10)::
558 /// and we're comparing all our repo to it (as in a bare push)
558 /// and we're comparing all our repo to it (as in a bare push)
559 #[test]
559 #[test]
560 fn test_discovery() -> Result<(), GraphError> {
560 fn test_discovery() -> Result<(), GraphError> {
561 let mut disco = full_disco();
561 let mut disco = full_disco();
562 disco.add_common_revisions(vec![11, 12])?;
562 disco.add_common_revisions(vec![11, 12])?;
563 disco.add_missing_revisions(vec![8, 10])?;
563 disco.add_missing_revisions(vec![8, 10])?;
564 assert_eq!(sorted_undecided(&disco), vec![5]);
564 assert_eq!(sorted_undecided(&disco), vec![5]);
565 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
565 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
566 assert!(!disco.is_complete());
566 assert!(!disco.is_complete());
567
567
568 disco.add_common_revisions(vec![5])?;
568 disco.add_common_revisions(vec![5])?;
569 assert_eq!(sorted_undecided(&disco), vec![]);
569 assert_eq!(sorted_undecided(&disco), vec![]);
570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
571 assert!(disco.is_complete());
571 assert!(disco.is_complete());
572 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
572 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
573 Ok(())
573 Ok(())
574 }
574 }
575
575
576 #[test]
576 #[test]
577 fn test_add_missing_early_continue() -> Result<(), GraphError> {
577 fn test_add_missing_early_continue() -> Result<(), GraphError> {
578 eprintln!("test_add_missing_early_stop");
578 eprintln!("test_add_missing_early_stop");
579 let mut disco = full_disco();
579 let mut disco = full_disco();
580 disco.add_common_revisions(vec![13, 3, 4])?;
580 disco.add_common_revisions(vec![13, 3, 4])?;
581 disco.ensure_children_cache()?;
581 disco.ensure_children_cache()?;
582 // 12 is grand-child of 6 through 9
582 // 12 is grand-child of 6 through 9
583 // passing them in this order maximizes the chances of the
583 // passing them in this order maximizes the chances of the
584 // early continue to do the wrong thing
584 // early continue to do the wrong thing
585 disco.add_missing_revisions(vec![6, 9, 12])?;
585 disco.add_missing_revisions(vec![6, 9, 12])?;
586 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
586 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
587 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
587 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
588 assert!(!disco.is_complete());
588 assert!(!disco.is_complete());
589 Ok(())
589 Ok(())
590 }
590 }
591
591
592 #[test]
592 #[test]
593 fn test_limit_sample_no_need_to() {
593 fn test_limit_sample_no_need_to() {
594 let sample = vec![1, 2, 3, 4];
594 let sample = vec![1, 2, 3, 4];
595 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
595 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
596 }
596 }
597
597
598 #[test]
598 #[test]
599 fn test_limit_sample_less_than_half() {
599 fn test_limit_sample_less_than_half() {
600 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![2, 5]);
600 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![2, 5]);
601 }
601 }
602
602
603 #[test]
603 #[test]
604 fn test_limit_sample_more_than_half() {
604 fn test_limit_sample_more_than_half() {
605 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![1, 2]);
605 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![1, 2]);
606 }
606 }
607
607
608 #[test]
608 #[test]
609 fn test_limit_sample_no_random() {
609 fn test_limit_sample_no_random() {
610 let mut disco = full_disco();
610 let mut disco = full_disco();
611 disco.randomize = false;
611 disco.randomize = false;
612 assert_eq!(
612 assert_eq!(
613 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
613 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
614 vec![1, 3, 5, 7]
614 vec![1, 3, 5, 7]
615 );
615 );
616 }
616 }
617
617
618 #[test]
618 #[test]
619 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
619 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
620 let mut disco = full_disco();
620 let mut disco = full_disco();
621 disco.undecided = Some((1..=13).collect());
621 disco.undecided = Some((1..=13).collect());
622
622
623 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
623 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
624 sample_vec.sort();
624 sample_vec.sort();
625 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
625 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
626 Ok(())
626 Ok(())
627 }
627 }
628
628
629 #[test]
629 #[test]
630 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
630 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
631 let mut disco = disco12();
631 let mut disco = disco12();
632 disco.ensure_undecided()?;
632 disco.ensure_undecided()?;
633
633
634 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
634 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
635 sample_vec.sort();
635 sample_vec.sort();
636 // r12's only parent is r9, whose unique grand-parent through the
636 // r12's only parent is r9, whose unique grand-parent through the
637 // diamond shape is r4. This ends there because the distance from r4
637 // diamond shape is r4. This ends there because the distance from r4
638 // to the root is only 3.
638 // to the root is only 3.
639 assert_eq!(sample_vec, vec![4, 9, 12]);
639 assert_eq!(sample_vec, vec![4, 9, 12]);
640 Ok(())
640 Ok(())
641 }
641 }
642
642
643 #[test]
643 #[test]
644 fn test_children_cache() -> Result<(), GraphError> {
644 fn test_children_cache() -> Result<(), GraphError> {
645 let mut disco = full_disco();
645 let mut disco = full_disco();
646 disco.ensure_children_cache()?;
646 disco.ensure_children_cache()?;
647
647
648 let cache = disco.children_cache.unwrap();
648 let cache = disco.children_cache.unwrap();
649 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
649 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
650 assert_eq!(cache.get(&10).cloned(), None);
650 assert_eq!(cache.get(&10).cloned(), None);
651
651
652 let mut children_4 = cache.get(&4).cloned().unwrap();
652 let mut children_4 = cache.get(&4).cloned().unwrap();
653 children_4.sort();
653 children_4.sort();
654 assert_eq!(children_4, vec![5, 6, 7]);
654 assert_eq!(children_4, vec![5, 6, 7]);
655
655
656 let mut children_7 = cache.get(&7).cloned().unwrap();
656 let mut children_7 = cache.get(&7).cloned().unwrap();
657 children_7.sort();
657 children_7.sort();
658 assert_eq!(children_7, vec![9, 11]);
658 assert_eq!(children_7, vec![9, 11]);
659
659
660 Ok(())
660 Ok(())
661 }
661 }
662
662
663 #[test]
663 #[test]
664 fn test_complete_sample() {
664 fn test_complete_sample() {
665 let mut disco = full_disco();
665 let mut disco = full_disco();
666 let undecided: HashSet<Revision> =
666 let undecided: HashSet<Revision> =
667 [4, 7, 9, 2, 3].iter().cloned().collect();
667 [4, 7, 9, 2, 3].iter().cloned().collect();
668 disco.undecided = Some(undecided);
668 disco.undecided = Some(undecided);
669
669
670 let mut sample = vec![0];
670 let mut sample = vec![0];
671 disco.random_complete_sample(&mut sample, 3);
671 disco.random_complete_sample(&mut sample, 3);
672 assert_eq!(sample.len(), 3);
672 assert_eq!(sample.len(), 3);
673
673
674 let mut sample = vec![2, 4, 7];
674 let mut sample = vec![2, 4, 7];
675 disco.random_complete_sample(&mut sample, 1);
675 disco.random_complete_sample(&mut sample, 1);
676 assert_eq!(sample.len(), 3);
676 assert_eq!(sample.len(), 3);
677 }
677 }
678
678
679 #[test]
679 #[test]
680 fn test_bidirectional_sample() -> Result<(), GraphError> {
680 fn test_bidirectional_sample() -> Result<(), GraphError> {
681 let mut disco = full_disco();
681 let mut disco = full_disco();
682 disco.undecided = Some((0..=13).into_iter().collect());
682 disco.undecided = Some((0..=13).into_iter().collect());
683
683
684 let (sample_set, size) = disco.bidirectional_sample(7)?;
684 let (sample_set, size) = disco.bidirectional_sample(7)?;
685 assert_eq!(size, 7);
685 assert_eq!(size, 7);
686 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
686 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
687 sample.sort();
687 sample.sort();
688 // our DAG is a bit too small for the results to be really interesting
688 // our DAG is a bit too small for the results to be really interesting
689 // at least it shows that
689 // at least it shows that
690 // - we went both ways
690 // - we went both ways
691 // - we didn't take all Revisions (6 is not in the sample)
691 // - we didn't take all Revisions (6 is not in the sample)
692 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
692 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
693 Ok(())
693 Ok(())
694 }
694 }
695 }
695 }
@@ -1,669 +1,670
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{
11 utils::{
11 utils::{
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 SliceExt,
14 SliceExt,
15 },
15 },
16 FastHashMap, PatternError,
16 FastHashMap, PatternError,
17 };
17 };
18 use lazy_static::lazy_static;
18 use lazy_static::lazy_static;
19 use regex::bytes::{NoExpand, Regex};
19 use regex::bytes::{NoExpand, Regex};
20 use std::fs::File;
20 use std::fs::File;
21 use std::io::Read;
21 use std::io::Read;
22 use std::ops::Deref;
22 use std::ops::Deref;
23 use std::path::{Path, PathBuf};
23 use std::path::{Path, PathBuf};
24 use std::vec::Vec;
24 use std::vec::Vec;
25
25
26 lazy_static! {
26 lazy_static! {
27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
30 for byte in to_escape {
30 for byte in to_escape {
31 v[*byte as usize].insert(0, b'\\');
31 v[*byte as usize].insert(0, b'\\');
32 }
32 }
33 v
33 v
34 };
34 };
35 }
35 }
36
36
37 /// These are matched in order
37 /// These are matched in order
38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
40
40
41 /// Appended to the regexp of globs
41 /// Appended to the regexp of globs
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
43
43
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
45 pub enum PatternSyntax {
45 pub enum PatternSyntax {
46 /// A regular expression
46 /// A regular expression
47 Regexp,
47 Regexp,
48 /// Glob that matches at the front of the path
48 /// Glob that matches at the front of the path
49 RootGlob,
49 RootGlob,
50 /// Glob that matches at any suffix of the path (still anchored at
50 /// Glob that matches at any suffix of the path (still anchored at
51 /// slashes)
51 /// slashes)
52 Glob,
52 Glob,
53 /// a path relative to repository root, which is matched recursively
53 /// a path relative to repository root, which is matched recursively
54 Path,
54 Path,
55 /// A path relative to cwd
55 /// A path relative to cwd
56 RelPath,
56 RelPath,
57 /// an unrooted glob (*.rs matches Rust files in all dirs)
57 /// an unrooted glob (*.rs matches Rust files in all dirs)
58 RelGlob,
58 RelGlob,
59 /// A regexp that needn't match the start of a name
59 /// A regexp that needn't match the start of a name
60 RelRegexp,
60 RelRegexp,
61 /// A path relative to repository root, which is matched non-recursively
61 /// A path relative to repository root, which is matched non-recursively
62 /// (will not match subdirectories)
62 /// (will not match subdirectories)
63 RootFiles,
63 RootFiles,
64 /// A file of patterns to read and include
64 /// A file of patterns to read and include
65 Include,
65 Include,
66 /// A file of patterns to match against files under the same directory
66 /// A file of patterns to match against files under the same directory
67 SubInclude,
67 SubInclude,
68 }
68 }
69
69
70 /// Transforms a glob pattern into a regex
70 /// Transforms a glob pattern into a regex
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
72 let mut input = pat;
72 let mut input = pat;
73 let mut res: Vec<u8> = vec![];
73 let mut res: Vec<u8> = vec![];
74 let mut group_depth = 0;
74 let mut group_depth = 0;
75
75
76 while let Some((c, rest)) = input.split_first() {
76 while let Some((c, rest)) = input.split_first() {
77 input = rest;
77 input = rest;
78
78
79 match c {
79 match c {
80 b'*' => {
80 b'*' => {
81 for (source, repl) in GLOB_REPLACEMENTS {
81 for (source, repl) in GLOB_REPLACEMENTS {
82 if let Some(rest) = input.drop_prefix(source) {
82 if let Some(rest) = input.drop_prefix(source) {
83 input = rest;
83 input = rest;
84 res.extend(*repl);
84 res.extend(*repl);
85 break;
85 break;
86 }
86 }
87 }
87 }
88 }
88 }
89 b'?' => res.extend(b"."),
89 b'?' => res.extend(b"."),
90 b'[' => {
90 b'[' => {
91 match input.iter().skip(1).position(|b| *b == b']') {
91 match input.iter().skip(1).position(|b| *b == b']') {
92 None => res.extend(b"\\["),
92 None => res.extend(b"\\["),
93 Some(end) => {
93 Some(end) => {
94 // Account for the one we skipped
94 // Account for the one we skipped
95 let end = end + 1;
95 let end = end + 1;
96
96
97 res.extend(b"[");
97 res.extend(b"[");
98
98
99 for (i, b) in input[..end].iter().enumerate() {
99 for (i, b) in input[..end].iter().enumerate() {
100 if *b == b'!' && i == 0 {
100 if *b == b'!' && i == 0 {
101 res.extend(b"^")
101 res.extend(b"^")
102 } else if *b == b'^' && i == 0 {
102 } else if *b == b'^' && i == 0 {
103 res.extend(b"\\^")
103 res.extend(b"\\^")
104 } else if *b == b'\\' {
104 } else if *b == b'\\' {
105 res.extend(b"\\\\")
105 res.extend(b"\\\\")
106 } else {
106 } else {
107 res.push(*b)
107 res.push(*b)
108 }
108 }
109 }
109 }
110 res.extend(b"]");
110 res.extend(b"]");
111 input = &input[end + 1..];
111 input = &input[end + 1..];
112 }
112 }
113 }
113 }
114 }
114 }
115 b'{' => {
115 b'{' => {
116 group_depth += 1;
116 group_depth += 1;
117 res.extend(b"(?:")
117 res.extend(b"(?:")
118 }
118 }
119 b'}' if group_depth > 0 => {
119 b'}' if group_depth > 0 => {
120 group_depth -= 1;
120 group_depth -= 1;
121 res.extend(b")");
121 res.extend(b")");
122 }
122 }
123 b',' if group_depth > 0 => res.extend(b"|"),
123 b',' if group_depth > 0 => res.extend(b"|"),
124 b'\\' => {
124 b'\\' => {
125 let c = {
125 let c = {
126 if let Some((c, rest)) = input.split_first() {
126 if let Some((c, rest)) = input.split_first() {
127 input = rest;
127 input = rest;
128 c
128 c
129 } else {
129 } else {
130 c
130 c
131 }
131 }
132 };
132 };
133 res.extend(&RE_ESCAPE[*c as usize])
133 res.extend(&RE_ESCAPE[*c as usize])
134 }
134 }
135 _ => res.extend(&RE_ESCAPE[*c as usize]),
135 _ => res.extend(&RE_ESCAPE[*c as usize]),
136 }
136 }
137 }
137 }
138 res
138 res
139 }
139 }
140
140
141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
142 pattern
142 pattern
143 .iter()
143 .iter()
144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
145 .collect()
145 .collect()
146 }
146 }
147
147
148 pub fn parse_pattern_syntax(
148 pub fn parse_pattern_syntax(
149 kind: &[u8],
149 kind: &[u8],
150 ) -> Result<PatternSyntax, PatternError> {
150 ) -> Result<PatternSyntax, PatternError> {
151 match kind {
151 match kind {
152 b"re:" => Ok(PatternSyntax::Regexp),
152 b"re:" => Ok(PatternSyntax::Regexp),
153 b"path:" => Ok(PatternSyntax::Path),
153 b"path:" => Ok(PatternSyntax::Path),
154 b"relpath:" => Ok(PatternSyntax::RelPath),
154 b"relpath:" => Ok(PatternSyntax::RelPath),
155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
156 b"relglob:" => Ok(PatternSyntax::RelGlob),
156 b"relglob:" => Ok(PatternSyntax::RelGlob),
157 b"relre:" => Ok(PatternSyntax::RelRegexp),
157 b"relre:" => Ok(PatternSyntax::RelRegexp),
158 b"glob:" => Ok(PatternSyntax::Glob),
158 b"glob:" => Ok(PatternSyntax::Glob),
159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
160 b"include:" => Ok(PatternSyntax::Include),
160 b"include:" => Ok(PatternSyntax::Include),
161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
162 _ => Err(PatternError::UnsupportedSyntax(
162 _ => Err(PatternError::UnsupportedSyntax(
163 String::from_utf8_lossy(kind).to_string(),
163 String::from_utf8_lossy(kind).to_string(),
164 )),
164 )),
165 }
165 }
166 }
166 }
167
167
168 /// Builds the regex that corresponds to the given pattern.
168 /// Builds the regex that corresponds to the given pattern.
169 /// If within a `syntax: regexp` context, returns the pattern,
169 /// If within a `syntax: regexp` context, returns the pattern,
170 /// otherwise, returns the corresponding regex.
170 /// otherwise, returns the corresponding regex.
171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
172 let IgnorePattern {
172 let IgnorePattern {
173 syntax, pattern, ..
173 syntax, pattern, ..
174 } = entry;
174 } = entry;
175 if pattern.is_empty() {
175 if pattern.is_empty() {
176 return vec![];
176 return vec![];
177 }
177 }
178 match syntax {
178 match syntax {
179 PatternSyntax::Regexp => pattern.to_owned(),
179 PatternSyntax::Regexp => pattern.to_owned(),
180 PatternSyntax::RelRegexp => {
180 PatternSyntax::RelRegexp => {
181 // The `regex` crate accepts `**` while `re2` and Python's `re`
181 // The `regex` crate accepts `**` while `re2` and Python's `re`
182 // do not. Checking for `*` correctly triggers the same error all
182 // do not. Checking for `*` correctly triggers the same error all
183 // engines.
183 // engines.
184 if pattern[0] == b'^'
184 if pattern[0] == b'^'
185 || pattern[0] == b'*'
185 || pattern[0] == b'*'
186 || pattern.starts_with(b".*")
186 || pattern.starts_with(b".*")
187 {
187 {
188 return pattern.to_owned();
188 return pattern.to_owned();
189 }
189 }
190 [&b".*"[..], pattern].concat()
190 [&b".*"[..], pattern].concat()
191 }
191 }
192 PatternSyntax::Path | PatternSyntax::RelPath => {
192 PatternSyntax::Path | PatternSyntax::RelPath => {
193 if pattern == b"." {
193 if pattern == b"." {
194 return vec![];
194 return vec![];
195 }
195 }
196 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
196 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
197 }
197 }
198 PatternSyntax::RootFiles => {
198 PatternSyntax::RootFiles => {
199 let mut res = if pattern == b"." {
199 let mut res = if pattern == b"." {
200 vec![]
200 vec![]
201 } else {
201 } else {
202 // Pattern is a directory name.
202 // Pattern is a directory name.
203 [escape_pattern(pattern).as_slice(), b"/"].concat()
203 [escape_pattern(pattern).as_slice(), b"/"].concat()
204 };
204 };
205
205
206 // Anything after the pattern must be a non-directory.
206 // Anything after the pattern must be a non-directory.
207 res.extend(b"[^/]+$");
207 res.extend(b"[^/]+$");
208 res
208 res
209 }
209 }
210 PatternSyntax::RelGlob => {
210 PatternSyntax::RelGlob => {
211 let glob_re = glob_to_re(pattern);
211 let glob_re = glob_to_re(pattern);
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
213 [b".*", rest, GLOB_SUFFIX].concat()
213 [b".*", rest, GLOB_SUFFIX].concat()
214 } else {
214 } else {
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
216 }
216 }
217 }
217 }
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
219 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 }
220 }
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
222 }
222 }
223 }
223 }
224
224
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
227
227
228 /// TODO support other platforms
228 /// TODO support other platforms
229 #[cfg(unix)]
229 #[cfg(unix)]
230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
231 if bytes.is_empty() {
231 if bytes.is_empty() {
232 return b".".to_vec();
232 return b".".to_vec();
233 }
233 }
234 let sep = b'/';
234 let sep = b'/';
235
235
236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
237 if initial_slashes > 2 {
237 if initial_slashes > 2 {
238 // POSIX allows one or two initial slashes, but treats three or more
238 // POSIX allows one or two initial slashes, but treats three or more
239 // as single slash.
239 // as single slash.
240 initial_slashes = 1;
240 initial_slashes = 1;
241 }
241 }
242 let components = bytes
242 let components = bytes
243 .split(|b| *b == sep)
243 .split(|b| *b == sep)
244 .filter(|c| !(c.is_empty() || c == b"."))
244 .filter(|c| !(c.is_empty() || c == b"."))
245 .fold(vec![], |mut acc, component| {
245 .fold(vec![], |mut acc, component| {
246 if component != b".."
246 if component != b".."
247 || (initial_slashes == 0 && acc.is_empty())
247 || (initial_slashes == 0 && acc.is_empty())
248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
249 {
249 {
250 acc.push(component)
250 acc.push(component)
251 } else if !acc.is_empty() {
251 } else if !acc.is_empty() {
252 acc.pop();
252 acc.pop();
253 }
253 }
254 acc
254 acc
255 });
255 });
256 let mut new_bytes = components.join(&sep);
256 let mut new_bytes = components.join(&sep);
257
257
258 if initial_slashes > 0 {
258 if initial_slashes > 0 {
259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
260 buf.extend(new_bytes);
260 buf.extend(new_bytes);
261 new_bytes = buf;
261 new_bytes = buf;
262 }
262 }
263 if new_bytes.is_empty() {
263 if new_bytes.is_empty() {
264 b".".to_vec()
264 b".".to_vec()
265 } else {
265 } else {
266 new_bytes
266 new_bytes
267 }
267 }
268 }
268 }
269
269
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
271 /// that don't need to be transformed into a regex.
271 /// that don't need to be transformed into a regex.
272 pub fn build_single_regex(
272 pub fn build_single_regex(
273 entry: &IgnorePattern,
273 entry: &IgnorePattern,
274 ) -> Result<Option<Vec<u8>>, PatternError> {
274 ) -> Result<Option<Vec<u8>>, PatternError> {
275 let IgnorePattern {
275 let IgnorePattern {
276 pattern, syntax, ..
276 pattern, syntax, ..
277 } = entry;
277 } = entry;
278 let pattern = match syntax {
278 let pattern = match syntax {
279 PatternSyntax::RootGlob
279 PatternSyntax::RootGlob
280 | PatternSyntax::Path
280 | PatternSyntax::Path
281 | PatternSyntax::RelGlob
281 | PatternSyntax::RelGlob
282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
283 PatternSyntax::Include | PatternSyntax::SubInclude => {
283 PatternSyntax::Include | PatternSyntax::SubInclude => {
284 return Err(PatternError::NonRegexPattern(entry.clone()))
284 return Err(PatternError::NonRegexPattern(entry.clone()))
285 }
285 }
286 _ => pattern.to_owned(),
286 _ => pattern.to_owned(),
287 };
287 };
288 if *syntax == PatternSyntax::RootGlob
288 if *syntax == PatternSyntax::RootGlob
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
290 {
290 {
291 Ok(None)
291 Ok(None)
292 } else {
292 } else {
293 let mut entry = entry.clone();
293 let mut entry = entry.clone();
294 entry.pattern = pattern;
294 entry.pattern = pattern;
295 Ok(Some(_build_single_regex(&entry)))
295 Ok(Some(_build_single_regex(&entry)))
296 }
296 }
297 }
297 }
298
298
299 lazy_static! {
299 lazy_static! {
300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
301 let mut m = FastHashMap::default();
301 let mut m = FastHashMap::default();
302
302
303 m.insert(b"re".as_ref(), b"relre:".as_ref());
303 m.insert(b"re".as_ref(), b"relre:".as_ref());
304 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
304 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
305 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
305 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
306 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
306 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
307 m.insert(b"include".as_ref(), b"include:".as_ref());
307 m.insert(b"include".as_ref(), b"include:".as_ref());
308 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
308 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
309 m
309 m
310 };
310 };
311 }
311 }
312
312
313 #[derive(Debug)]
313 #[derive(Debug)]
314 pub enum PatternFileWarning {
314 pub enum PatternFileWarning {
315 /// (file path, syntax bytes)
315 /// (file path, syntax bytes)
316 InvalidSyntax(PathBuf, Vec<u8>),
316 InvalidSyntax(PathBuf, Vec<u8>),
317 /// File path
317 /// File path
318 NoSuchFile(PathBuf),
318 NoSuchFile(PathBuf),
319 }
319 }
320
320
321 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
321 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
322 lines: &[u8],
322 lines: &[u8],
323 file_path: P,
323 file_path: P,
324 warn: bool,
324 warn: bool,
325 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
325 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
326 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
326 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
327
328 #[allow(clippy::trivial_regex)]
327 let comment_escape_regex = Regex::new(r"\\#").unwrap();
329 let comment_escape_regex = Regex::new(r"\\#").unwrap();
328 let mut inputs: Vec<IgnorePattern> = vec![];
330 let mut inputs: Vec<IgnorePattern> = vec![];
329 let mut warnings: Vec<PatternFileWarning> = vec![];
331 let mut warnings: Vec<PatternFileWarning> = vec![];
330
332
331 let mut current_syntax = b"relre:".as_ref();
333 let mut current_syntax = b"relre:".as_ref();
332
334
333 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
335 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
334 let line_number = line_number + 1;
336 let line_number = line_number + 1;
335
337
336 let line_buf;
338 let line_buf;
337 if line.contains(&b'#') {
339 if line.contains(&b'#') {
338 if let Some(cap) = comment_regex.captures(line) {
340 if let Some(cap) = comment_regex.captures(line) {
339 line = &line[..cap.get(1).unwrap().end()]
341 line = &line[..cap.get(1).unwrap().end()]
340 }
342 }
341 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
343 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
342 line = &line_buf;
344 line = &line_buf;
343 }
345 }
344
346
345 let mut line = line.trim_end();
347 let mut line = line.trim_end();
346
348
347 if line.is_empty() {
349 if line.is_empty() {
348 continue;
350 continue;
349 }
351 }
350
352
351 if let Some(syntax) = line.drop_prefix(b"syntax:") {
353 if let Some(syntax) = line.drop_prefix(b"syntax:") {
352 let syntax = syntax.trim();
354 let syntax = syntax.trim();
353
355
354 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
356 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
355 current_syntax = rel_syntax;
357 current_syntax = rel_syntax;
356 } else if warn {
358 } else if warn {
357 warnings.push(PatternFileWarning::InvalidSyntax(
359 warnings.push(PatternFileWarning::InvalidSyntax(
358 file_path.as_ref().to_owned(),
360 file_path.as_ref().to_owned(),
359 syntax.to_owned(),
361 syntax.to_owned(),
360 ));
362 ));
361 }
363 }
362 continue;
364 continue;
363 }
365 }
364
366
365 let mut line_syntax: &[u8] = &current_syntax;
367 let mut line_syntax: &[u8] = &current_syntax;
366
368
367 for (s, rels) in SYNTAXES.iter() {
369 for (s, rels) in SYNTAXES.iter() {
368 if let Some(rest) = line.drop_prefix(rels) {
370 if let Some(rest) = line.drop_prefix(rels) {
369 line_syntax = rels;
371 line_syntax = rels;
370 line = rest;
372 line = rest;
371 break;
373 break;
372 }
374 }
373 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
375 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
374 line_syntax = rels;
376 line_syntax = rels;
375 line = rest;
377 line = rest;
376 break;
378 break;
377 }
379 }
378 }
380 }
379
381
380 inputs.push(IgnorePattern::new(
382 inputs.push(IgnorePattern::new(
381 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
383 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
382 PatternError::UnsupportedSyntax(syntax) => {
384 PatternError::UnsupportedSyntax(syntax) => {
383 PatternError::UnsupportedSyntaxInFile(
385 PatternError::UnsupportedSyntaxInFile(
384 syntax,
386 syntax,
385 file_path.as_ref().to_string_lossy().into(),
387 file_path.as_ref().to_string_lossy().into(),
386 line_number,
388 line_number,
387 )
389 )
388 }
390 }
389 _ => e,
391 _ => e,
390 })?,
392 })?,
391 &line,
393 &line,
392 &file_path,
394 &file_path,
393 ));
395 ));
394 }
396 }
395 Ok((inputs, warnings))
397 Ok((inputs, warnings))
396 }
398 }
397
399
398 pub fn read_pattern_file<P: AsRef<Path>>(
400 pub fn read_pattern_file<P: AsRef<Path>>(
399 file_path: P,
401 file_path: P,
400 warn: bool,
402 warn: bool,
401 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
403 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
402 let mut f = match File::open(file_path.as_ref()) {
404 let mut f = match File::open(file_path.as_ref()) {
403 Ok(f) => Ok(f),
405 Ok(f) => Ok(f),
404 Err(e) => match e.kind() {
406 Err(e) => match e.kind() {
405 std::io::ErrorKind::NotFound => {
407 std::io::ErrorKind::NotFound => {
406 return Ok((
408 return Ok((
407 vec![],
409 vec![],
408 vec![PatternFileWarning::NoSuchFile(
410 vec![PatternFileWarning::NoSuchFile(
409 file_path.as_ref().to_owned(),
411 file_path.as_ref().to_owned(),
410 )],
412 )],
411 ))
413 ))
412 }
414 }
413 _ => Err(e),
415 _ => Err(e),
414 },
416 },
415 }?;
417 }?;
416 let mut contents = Vec::new();
418 let mut contents = Vec::new();
417
419
418 f.read_to_end(&mut contents)?;
420 f.read_to_end(&mut contents)?;
419
421
420 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
422 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
421 }
423 }
422
424
423 /// Represents an entry in an "ignore" file.
425 /// Represents an entry in an "ignore" file.
424 #[derive(Debug, Eq, PartialEq, Clone)]
426 #[derive(Debug, Eq, PartialEq, Clone)]
425 pub struct IgnorePattern {
427 pub struct IgnorePattern {
426 pub syntax: PatternSyntax,
428 pub syntax: PatternSyntax,
427 pub pattern: Vec<u8>,
429 pub pattern: Vec<u8>,
428 pub source: PathBuf,
430 pub source: PathBuf,
429 }
431 }
430
432
431 impl IgnorePattern {
433 impl IgnorePattern {
432 pub fn new(
434 pub fn new(
433 syntax: PatternSyntax,
435 syntax: PatternSyntax,
434 pattern: &[u8],
436 pattern: &[u8],
435 source: impl AsRef<Path>,
437 source: impl AsRef<Path>,
436 ) -> Self {
438 ) -> Self {
437 Self {
439 Self {
438 syntax,
440 syntax,
439 pattern: pattern.to_owned(),
441 pattern: pattern.to_owned(),
440 source: source.as_ref().to_owned(),
442 source: source.as_ref().to_owned(),
441 }
443 }
442 }
444 }
443 }
445 }
444
446
445 pub type PatternResult<T> = Result<T, PatternError>;
447 pub type PatternResult<T> = Result<T, PatternError>;
446
448
447 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
449 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
448 /// patterns.
450 /// patterns.
449 ///
451 ///
450 /// `subinclude:` is not treated as a special pattern here: unraveling them
452 /// `subinclude:` is not treated as a special pattern here: unraveling them
451 /// needs to occur in the "ignore" phase.
453 /// needs to occur in the "ignore" phase.
452 pub fn get_patterns_from_file(
454 pub fn get_patterns_from_file(
453 pattern_file: impl AsRef<Path>,
455 pattern_file: impl AsRef<Path>,
454 root_dir: impl AsRef<Path>,
456 root_dir: impl AsRef<Path>,
455 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
457 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
456 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
458 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
457 let patterns = patterns
459 let patterns = patterns
458 .into_iter()
460 .into_iter()
459 .flat_map(|entry| -> PatternResult<_> {
461 .flat_map(|entry| -> PatternResult<_> {
460 let IgnorePattern {
462 let IgnorePattern {
461 syntax,
463 syntax, pattern, ..
462 pattern,
463 source: _,
464 } = &entry;
464 } = &entry;
465 Ok(match syntax {
465 Ok(match syntax {
466 PatternSyntax::Include => {
466 PatternSyntax::Include => {
467 let inner_include =
467 let inner_include =
468 root_dir.as_ref().join(get_path_from_bytes(&pattern));
468 root_dir.as_ref().join(get_path_from_bytes(&pattern));
469 let (inner_pats, inner_warnings) = get_patterns_from_file(
469 let (inner_pats, inner_warnings) = get_patterns_from_file(
470 &inner_include,
470 &inner_include,
471 root_dir.as_ref(),
471 root_dir.as_ref(),
472 )?;
472 )?;
473 warnings.extend(inner_warnings);
473 warnings.extend(inner_warnings);
474 inner_pats
474 inner_pats
475 }
475 }
476 _ => vec![entry],
476 _ => vec![entry],
477 })
477 })
478 })
478 })
479 .flatten()
479 .flatten()
480 .collect();
480 .collect();
481
481
482 Ok((patterns, warnings))
482 Ok((patterns, warnings))
483 }
483 }
484
484
485 /// Holds all the information needed to handle a `subinclude:` pattern.
485 /// Holds all the information needed to handle a `subinclude:` pattern.
486 pub struct SubInclude {
486 pub struct SubInclude {
487 /// Will be used for repository (hg) paths that start with this prefix.
487 /// Will be used for repository (hg) paths that start with this prefix.
488 /// It is relative to the current working directory, so comparing against
488 /// It is relative to the current working directory, so comparing against
489 /// repository paths is painless.
489 /// repository paths is painless.
490 pub prefix: HgPathBuf,
490 pub prefix: HgPathBuf,
491 /// The file itself, containing the patterns
491 /// The file itself, containing the patterns
492 pub path: PathBuf,
492 pub path: PathBuf,
493 /// Folder in the filesystem where this it applies
493 /// Folder in the filesystem where this it applies
494 pub root: PathBuf,
494 pub root: PathBuf,
495 }
495 }
496
496
497 impl SubInclude {
497 impl SubInclude {
498 pub fn new(
498 pub fn new(
499 root_dir: impl AsRef<Path>,
499 root_dir: impl AsRef<Path>,
500 pattern: &[u8],
500 pattern: &[u8],
501 source: impl AsRef<Path>,
501 source: impl AsRef<Path>,
502 ) -> Result<SubInclude, HgPathError> {
502 ) -> Result<SubInclude, HgPathError> {
503 let normalized_source =
503 let normalized_source =
504 normalize_path_bytes(&get_bytes_from_path(source));
504 normalize_path_bytes(&get_bytes_from_path(source));
505
505
506 let source_root = get_path_from_bytes(&normalized_source);
506 let source_root = get_path_from_bytes(&normalized_source);
507 let source_root = source_root.parent().unwrap_or(source_root.deref());
507 let source_root =
508 source_root.parent().unwrap_or_else(|| source_root.deref());
508
509
509 let path = source_root.join(get_path_from_bytes(pattern));
510 let path = source_root.join(get_path_from_bytes(pattern));
510 let new_root = path.parent().unwrap_or(path.deref());
511 let new_root = path.parent().unwrap_or_else(|| path.deref());
511
512
512 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
513 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
513
514
514 Ok(Self {
515 Ok(Self {
515 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
516 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
516 if !p.is_empty() {
517 if !p.is_empty() {
517 p.push(b'/');
518 p.push(b'/');
518 }
519 }
519 Ok(p)
520 Ok(p)
520 })?,
521 })?,
521 path: path.to_owned(),
522 path: path.to_owned(),
522 root: new_root.to_owned(),
523 root: new_root.to_owned(),
523 })
524 })
524 }
525 }
525 }
526 }
526
527
527 /// Separate and pre-process subincludes from other patterns for the "ignore"
528 /// Separate and pre-process subincludes from other patterns for the "ignore"
528 /// phase.
529 /// phase.
529 pub fn filter_subincludes(
530 pub fn filter_subincludes(
530 ignore_patterns: &[IgnorePattern],
531 ignore_patterns: &[IgnorePattern],
531 root_dir: impl AsRef<Path>,
532 root_dir: impl AsRef<Path>,
532 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
533 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
533 let mut subincludes = vec![];
534 let mut subincludes = vec![];
534 let mut others = vec![];
535 let mut others = vec![];
535
536
536 for ignore_pattern in ignore_patterns.iter() {
537 for ignore_pattern in ignore_patterns.iter() {
537 let IgnorePattern {
538 let IgnorePattern {
538 syntax,
539 syntax,
539 pattern,
540 pattern,
540 source,
541 source,
541 } = ignore_pattern;
542 } = ignore_pattern;
542 if *syntax == PatternSyntax::SubInclude {
543 if *syntax == PatternSyntax::SubInclude {
543 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
544 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
544 } else {
545 } else {
545 others.push(ignore_pattern)
546 others.push(ignore_pattern)
546 }
547 }
547 }
548 }
548 Ok((subincludes, others))
549 Ok((subincludes, others))
549 }
550 }
550
551
551 #[cfg(test)]
552 #[cfg(test)]
552 mod tests {
553 mod tests {
553 use super::*;
554 use super::*;
554 use pretty_assertions::assert_eq;
555 use pretty_assertions::assert_eq;
555
556
556 #[test]
557 #[test]
557 fn escape_pattern_test() {
558 fn escape_pattern_test() {
558 let untouched =
559 let untouched =
559 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
560 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
560 assert_eq!(escape_pattern(untouched), untouched.to_vec());
561 assert_eq!(escape_pattern(untouched), untouched.to_vec());
561 // All escape codes
562 // All escape codes
562 assert_eq!(
563 assert_eq!(
563 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
564 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
564 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
565 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
565 .to_vec()
566 .to_vec()
566 );
567 );
567 }
568 }
568
569
569 #[test]
570 #[test]
570 fn glob_test() {
571 fn glob_test() {
571 assert_eq!(glob_to_re(br#"?"#), br#"."#);
572 assert_eq!(glob_to_re(br#"?"#), br#"."#);
572 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
573 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
573 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
574 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
574 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
575 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
575 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
576 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
576 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
577 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
577 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
578 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
578 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
579 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
579 }
580 }
580
581
581 #[test]
582 #[test]
582 fn test_parse_pattern_file_contents() {
583 fn test_parse_pattern_file_contents() {
583 let lines = b"syntax: glob\n*.elc";
584 let lines = b"syntax: glob\n*.elc";
584
585
585 assert_eq!(
586 assert_eq!(
586 parse_pattern_file_contents(lines, Path::new("file_path"), false)
587 parse_pattern_file_contents(lines, Path::new("file_path"), false)
587 .unwrap()
588 .unwrap()
588 .0,
589 .0,
589 vec![IgnorePattern::new(
590 vec![IgnorePattern::new(
590 PatternSyntax::RelGlob,
591 PatternSyntax::RelGlob,
591 b"*.elc",
592 b"*.elc",
592 Path::new("file_path")
593 Path::new("file_path")
593 )],
594 )],
594 );
595 );
595
596
596 let lines = b"syntax: include\nsyntax: glob";
597 let lines = b"syntax: include\nsyntax: glob";
597
598
598 assert_eq!(
599 assert_eq!(
599 parse_pattern_file_contents(lines, Path::new("file_path"), false)
600 parse_pattern_file_contents(lines, Path::new("file_path"), false)
600 .unwrap()
601 .unwrap()
601 .0,
602 .0,
602 vec![]
603 vec![]
603 );
604 );
604 let lines = b"glob:**.o";
605 let lines = b"glob:**.o";
605 assert_eq!(
606 assert_eq!(
606 parse_pattern_file_contents(lines, Path::new("file_path"), false)
607 parse_pattern_file_contents(lines, Path::new("file_path"), false)
607 .unwrap()
608 .unwrap()
608 .0,
609 .0,
609 vec![IgnorePattern::new(
610 vec![IgnorePattern::new(
610 PatternSyntax::RelGlob,
611 PatternSyntax::RelGlob,
611 b"**.o",
612 b"**.o",
612 Path::new("file_path")
613 Path::new("file_path")
613 )]
614 )]
614 );
615 );
615 }
616 }
616
617
617 #[test]
618 #[test]
618 fn test_build_single_regex() {
619 fn test_build_single_regex() {
619 assert_eq!(
620 assert_eq!(
620 build_single_regex(&IgnorePattern::new(
621 build_single_regex(&IgnorePattern::new(
621 PatternSyntax::RelGlob,
622 PatternSyntax::RelGlob,
622 b"rust/target/",
623 b"rust/target/",
623 Path::new("")
624 Path::new("")
624 ))
625 ))
625 .unwrap(),
626 .unwrap(),
626 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
627 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
627 );
628 );
628 assert_eq!(
629 assert_eq!(
629 build_single_regex(&IgnorePattern::new(
630 build_single_regex(&IgnorePattern::new(
630 PatternSyntax::Regexp,
631 PatternSyntax::Regexp,
631 br"rust/target/\d+",
632 br"rust/target/\d+",
632 Path::new("")
633 Path::new("")
633 ))
634 ))
634 .unwrap(),
635 .unwrap(),
635 Some(br"rust/target/\d+".to_vec()),
636 Some(br"rust/target/\d+".to_vec()),
636 );
637 );
637 }
638 }
638
639
639 #[test]
640 #[test]
640 fn test_build_single_regex_shortcut() {
641 fn test_build_single_regex_shortcut() {
641 assert_eq!(
642 assert_eq!(
642 build_single_regex(&IgnorePattern::new(
643 build_single_regex(&IgnorePattern::new(
643 PatternSyntax::RootGlob,
644 PatternSyntax::RootGlob,
644 b"",
645 b"",
645 Path::new("")
646 Path::new("")
646 ))
647 ))
647 .unwrap(),
648 .unwrap(),
648 None,
649 None,
649 );
650 );
650 assert_eq!(
651 assert_eq!(
651 build_single_regex(&IgnorePattern::new(
652 build_single_regex(&IgnorePattern::new(
652 PatternSyntax::RootGlob,
653 PatternSyntax::RootGlob,
653 b"whatever",
654 b"whatever",
654 Path::new("")
655 Path::new("")
655 ))
656 ))
656 .unwrap(),
657 .unwrap(),
657 None,
658 None,
658 );
659 );
659 assert_eq!(
660 assert_eq!(
660 build_single_regex(&IgnorePattern::new(
661 build_single_regex(&IgnorePattern::new(
661 PatternSyntax::RootGlob,
662 PatternSyntax::RootGlob,
662 b"*.o",
663 b"*.o",
663 Path::new("")
664 Path::new("")
664 ))
665 ))
665 .unwrap(),
666 .unwrap(),
666 Some(br"[^/]*\.o(?:/|$)".to_vec()),
667 Some(br"[^/]*\.o(?:/|$)".to_vec()),
667 );
668 );
668 }
669 }
669 }
670 }
@@ -1,937 +1,937
1 // matchers.rs
1 // matchers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Structs and types for matching files and directories.
8 //! Structs and types for matching files and directories.
9
9
10 use crate::{
10 use crate::{
11 dirstate::dirs_multiset::DirsChildrenMultiset,
11 dirstate::dirs_multiset::DirsChildrenMultiset,
12 filepatterns::{
12 filepatterns::{
13 build_single_regex, filter_subincludes, get_patterns_from_file,
13 build_single_regex, filter_subincludes, get_patterns_from_file,
14 PatternFileWarning, PatternResult, SubInclude,
14 PatternFileWarning, PatternResult, SubInclude,
15 },
15 },
16 utils::{
16 utils::{
17 files::find_dirs,
17 files::find_dirs,
18 hg_path::{HgPath, HgPathBuf},
18 hg_path::{HgPath, HgPathBuf},
19 Escaped,
19 Escaped,
20 },
20 },
21 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
21 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
22 PatternSyntax,
22 PatternSyntax,
23 };
23 };
24
24
25 use crate::filepatterns::normalize_path_bytes;
25 use crate::filepatterns::normalize_path_bytes;
26 use std::borrow::ToOwned;
26 use std::borrow::ToOwned;
27 use std::collections::HashSet;
27 use std::collections::HashSet;
28 use std::fmt::{Display, Error, Formatter};
28 use std::fmt::{Display, Error, Formatter};
29 use std::iter::FromIterator;
29 use std::iter::FromIterator;
30 use std::ops::Deref;
30 use std::ops::Deref;
31 use std::path::{Path, PathBuf};
31 use std::path::{Path, PathBuf};
32
32
33 use micro_timer::timed;
33 use micro_timer::timed;
34
34
35 #[derive(Debug, PartialEq)]
35 #[derive(Debug, PartialEq)]
36 pub enum VisitChildrenSet<'a> {
36 pub enum VisitChildrenSet<'a> {
37 /// Don't visit anything
37 /// Don't visit anything
38 Empty,
38 Empty,
39 /// Only visit this directory
39 /// Only visit this directory
40 This,
40 This,
41 /// Visit this directory and these subdirectories
41 /// Visit this directory and these subdirectories
42 /// TODO Should we implement a `NonEmptyHashSet`?
42 /// TODO Should we implement a `NonEmptyHashSet`?
43 Set(HashSet<&'a HgPath>),
43 Set(HashSet<&'a HgPath>),
44 /// Visit this directory and all subdirectories
44 /// Visit this directory and all subdirectories
45 Recursive,
45 Recursive,
46 }
46 }
47
47
48 pub trait Matcher {
48 pub trait Matcher {
49 /// Explicitly listed files
49 /// Explicitly listed files
50 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
50 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
51 /// Returns whether `filename` is in `file_set`
51 /// Returns whether `filename` is in `file_set`
52 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
52 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
53 /// Returns whether `filename` is matched by this matcher
53 /// Returns whether `filename` is matched by this matcher
54 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
54 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
55 /// Decides whether a directory should be visited based on whether it
55 /// Decides whether a directory should be visited based on whether it
56 /// has potential matches in it or one of its subdirectories, and
56 /// has potential matches in it or one of its subdirectories, and
57 /// potentially lists which subdirectories of that directory should be
57 /// potentially lists which subdirectories of that directory should be
58 /// visited. This is based on the match's primary, included, and excluded
58 /// visited. This is based on the match's primary, included, and excluded
59 /// patterns.
59 /// patterns.
60 ///
60 ///
61 /// # Example
61 /// # Example
62 ///
62 ///
63 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
63 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
64 /// return the following values (assuming the implementation of
64 /// return the following values (assuming the implementation of
65 /// visit_children_set is capable of recognizing this; some implementations
65 /// visit_children_set is capable of recognizing this; some implementations
66 /// are not).
66 /// are not).
67 ///
67 ///
68 /// ```text
68 /// ```text
69 /// ```ignore
69 /// ```ignore
70 /// '' -> {'foo', 'qux'}
70 /// '' -> {'foo', 'qux'}
71 /// 'baz' -> set()
71 /// 'baz' -> set()
72 /// 'foo' -> {'bar'}
72 /// 'foo' -> {'bar'}
73 /// // Ideally this would be `Recursive`, but since the prefix nature of
73 /// // Ideally this would be `Recursive`, but since the prefix nature of
74 /// // matchers is applied to the entire matcher, we have to downgrade this
74 /// // matchers is applied to the entire matcher, we have to downgrade this
75 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
75 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
76 /// // `RootFilesIn'-kind matcher being mixed in.
76 /// // `RootFilesIn'-kind matcher being mixed in.
77 /// 'foo/bar' -> 'this'
77 /// 'foo/bar' -> 'this'
78 /// 'qux' -> 'this'
78 /// 'qux' -> 'this'
79 /// ```
79 /// ```
80 /// # Important
80 /// # Important
81 ///
81 ///
82 /// Most matchers do not know if they're representing files or
82 /// Most matchers do not know if they're representing files or
83 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
83 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
84 /// file or a directory, so `visit_children_set('dir')` for most matchers
84 /// file or a directory, so `visit_children_set('dir')` for most matchers
85 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
85 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
86 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
86 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
87 /// it may return `VisitChildrenSet::This`.
87 /// it may return `VisitChildrenSet::This`.
88 /// Do not rely on the return being a `HashSet` indicating that there are
88 /// Do not rely on the return being a `HashSet` indicating that there are
89 /// no files in this dir to investigate (or equivalently that if there are
89 /// no files in this dir to investigate (or equivalently that if there are
90 /// files to investigate in 'dir' that it will always return
90 /// files to investigate in 'dir' that it will always return
91 /// `VisitChildrenSet::This`).
91 /// `VisitChildrenSet::This`).
92 fn visit_children_set(
92 fn visit_children_set(
93 &self,
93 &self,
94 directory: impl AsRef<HgPath>,
94 directory: impl AsRef<HgPath>,
95 ) -> VisitChildrenSet;
95 ) -> VisitChildrenSet;
96 /// Matcher will match everything and `files_set()` will be empty:
96 /// Matcher will match everything and `files_set()` will be empty:
97 /// optimization might be possible.
97 /// optimization might be possible.
98 fn matches_everything(&self) -> bool;
98 fn matches_everything(&self) -> bool;
99 /// Matcher will match exactly the files in `files_set()`: optimization
99 /// Matcher will match exactly the files in `files_set()`: optimization
100 /// might be possible.
100 /// might be possible.
101 fn is_exact(&self) -> bool;
101 fn is_exact(&self) -> bool;
102 }
102 }
103
103
104 /// Matches everything.
104 /// Matches everything.
105 ///```
105 ///```
106 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
106 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
107 ///
107 ///
108 /// let matcher = AlwaysMatcher;
108 /// let matcher = AlwaysMatcher;
109 ///
109 ///
110 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
111 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
111 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
112 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
112 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
113 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
113 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
114 /// ```
114 /// ```
115 #[derive(Debug)]
115 #[derive(Debug)]
116 pub struct AlwaysMatcher;
116 pub struct AlwaysMatcher;
117
117
118 impl Matcher for AlwaysMatcher {
118 impl Matcher for AlwaysMatcher {
119 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
119 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
120 None
120 None
121 }
121 }
122 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
122 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
123 false
123 false
124 }
124 }
125 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
125 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
126 true
126 true
127 }
127 }
128 fn visit_children_set(
128 fn visit_children_set(
129 &self,
129 &self,
130 _directory: impl AsRef<HgPath>,
130 _directory: impl AsRef<HgPath>,
131 ) -> VisitChildrenSet {
131 ) -> VisitChildrenSet {
132 VisitChildrenSet::Recursive
132 VisitChildrenSet::Recursive
133 }
133 }
134 fn matches_everything(&self) -> bool {
134 fn matches_everything(&self) -> bool {
135 true
135 true
136 }
136 }
137 fn is_exact(&self) -> bool {
137 fn is_exact(&self) -> bool {
138 false
138 false
139 }
139 }
140 }
140 }
141
141
142 /// Matches the input files exactly. They are interpreted as paths, not
142 /// Matches the input files exactly. They are interpreted as paths, not
143 /// patterns.
143 /// patterns.
144 ///
144 ///
145 ///```
145 ///```
146 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
146 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
147 ///
147 ///
148 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
148 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
149 /// let matcher = FileMatcher::new(&files).unwrap();
149 /// let matcher = FileMatcher::new(&files).unwrap();
150 ///
150 ///
151 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
151 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
152 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
152 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
153 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
153 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
154 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
154 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
155 /// ```
155 /// ```
156 #[derive(Debug)]
156 #[derive(Debug)]
157 pub struct FileMatcher<'a> {
157 pub struct FileMatcher<'a> {
158 files: HashSet<&'a HgPath>,
158 files: HashSet<&'a HgPath>,
159 dirs: DirsMultiset,
159 dirs: DirsMultiset,
160 }
160 }
161
161
162 impl<'a> FileMatcher<'a> {
162 impl<'a> FileMatcher<'a> {
163 pub fn new(
163 pub fn new(
164 files: &'a [impl AsRef<HgPath>],
164 files: &'a [impl AsRef<HgPath>],
165 ) -> Result<Self, DirstateMapError> {
165 ) -> Result<Self, DirstateMapError> {
166 Ok(Self {
166 Ok(Self {
167 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
167 files: HashSet::from_iter(files.iter().map(AsRef::as_ref)),
168 dirs: DirsMultiset::from_manifest(files)?,
168 dirs: DirsMultiset::from_manifest(files)?,
169 })
169 })
170 }
170 }
171 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
171 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
172 self.files.contains(filename.as_ref())
172 self.files.contains(filename.as_ref())
173 }
173 }
174 }
174 }
175
175
176 impl<'a> Matcher for FileMatcher<'a> {
176 impl<'a> Matcher for FileMatcher<'a> {
177 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
177 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
178 Some(&self.files)
178 Some(&self.files)
179 }
179 }
180 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
180 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
181 self.inner_matches(filename)
181 self.inner_matches(filename)
182 }
182 }
183 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
183 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
184 self.inner_matches(filename)
184 self.inner_matches(filename)
185 }
185 }
186 fn visit_children_set(
186 fn visit_children_set(
187 &self,
187 &self,
188 directory: impl AsRef<HgPath>,
188 directory: impl AsRef<HgPath>,
189 ) -> VisitChildrenSet {
189 ) -> VisitChildrenSet {
190 if self.files.is_empty() || !self.dirs.contains(&directory) {
190 if self.files.is_empty() || !self.dirs.contains(&directory) {
191 return VisitChildrenSet::Empty;
191 return VisitChildrenSet::Empty;
192 }
192 }
193 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
193 let dirs_as_set = self.dirs.iter().map(Deref::deref).collect();
194
194
195 let mut candidates: HashSet<&HgPath> =
195 let mut candidates: HashSet<&HgPath> =
196 self.files.union(&dirs_as_set).map(|k| *k).collect();
196 self.files.union(&dirs_as_set).cloned().collect();
197 candidates.remove(HgPath::new(b""));
197 candidates.remove(HgPath::new(b""));
198
198
199 if !directory.as_ref().is_empty() {
199 if !directory.as_ref().is_empty() {
200 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
200 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
201 candidates = candidates
201 candidates = candidates
202 .iter()
202 .iter()
203 .filter_map(|c| {
203 .filter_map(|c| {
204 if c.as_bytes().starts_with(&directory) {
204 if c.as_bytes().starts_with(&directory) {
205 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
205 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
206 } else {
206 } else {
207 None
207 None
208 }
208 }
209 })
209 })
210 .collect();
210 .collect();
211 }
211 }
212
212
213 // `self.dirs` includes all of the directories, recursively, so if
213 // `self.dirs` includes all of the directories, recursively, so if
214 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
214 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
215 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
215 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
216 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
216 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
217 // subdir will be in there without a slash.
217 // subdir will be in there without a slash.
218 VisitChildrenSet::Set(
218 VisitChildrenSet::Set(
219 candidates
219 candidates
220 .iter()
220 .iter()
221 .filter_map(|c| {
221 .filter_map(|c| {
222 if c.bytes().all(|b| *b != b'/') {
222 if c.bytes().all(|b| *b != b'/') {
223 Some(*c)
223 Some(*c)
224 } else {
224 } else {
225 None
225 None
226 }
226 }
227 })
227 })
228 .collect(),
228 .collect(),
229 )
229 )
230 }
230 }
231 fn matches_everything(&self) -> bool {
231 fn matches_everything(&self) -> bool {
232 false
232 false
233 }
233 }
234 fn is_exact(&self) -> bool {
234 fn is_exact(&self) -> bool {
235 true
235 true
236 }
236 }
237 }
237 }
238
238
239 /// Matches files that are included in the ignore rules.
239 /// Matches files that are included in the ignore rules.
240 /// ```
240 /// ```
241 /// use hg::{
241 /// use hg::{
242 /// matchers::{IncludeMatcher, Matcher},
242 /// matchers::{IncludeMatcher, Matcher},
243 /// IgnorePattern,
243 /// IgnorePattern,
244 /// PatternSyntax,
244 /// PatternSyntax,
245 /// utils::hg_path::HgPath
245 /// utils::hg_path::HgPath
246 /// };
246 /// };
247 /// use std::path::Path;
247 /// use std::path::Path;
248 /// ///
248 /// ///
249 /// let ignore_patterns =
249 /// let ignore_patterns =
250 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
250 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
251 /// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
251 /// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
252 /// ///
252 /// ///
253 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
253 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
254 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
254 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
255 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
255 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
256 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
256 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
257 /// ```
257 /// ```
258 pub struct IncludeMatcher<'a> {
258 pub struct IncludeMatcher<'a> {
259 patterns: Vec<u8>,
259 patterns: Vec<u8>,
260 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
260 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
261 /// Whether all the patterns match a prefix (i.e. recursively)
261 /// Whether all the patterns match a prefix (i.e. recursively)
262 prefix: bool,
262 prefix: bool,
263 roots: HashSet<HgPathBuf>,
263 roots: HashSet<HgPathBuf>,
264 dirs: HashSet<HgPathBuf>,
264 dirs: HashSet<HgPathBuf>,
265 parents: HashSet<HgPathBuf>,
265 parents: HashSet<HgPathBuf>,
266 }
266 }
267
267
268 impl<'a> Matcher for IncludeMatcher<'a> {
268 impl<'a> Matcher for IncludeMatcher<'a> {
269 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
269 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
270 None
270 None
271 }
271 }
272
272
273 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
273 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
274 false
274 false
275 }
275 }
276
276
277 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
277 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
278 (self.match_fn)(filename.as_ref())
278 (self.match_fn)(filename.as_ref())
279 }
279 }
280
280
281 fn visit_children_set(
281 fn visit_children_set(
282 &self,
282 &self,
283 directory: impl AsRef<HgPath>,
283 directory: impl AsRef<HgPath>,
284 ) -> VisitChildrenSet {
284 ) -> VisitChildrenSet {
285 let dir = directory.as_ref();
285 let dir = directory.as_ref();
286 if self.prefix && self.roots.contains(dir) {
286 if self.prefix && self.roots.contains(dir) {
287 return VisitChildrenSet::Recursive;
287 return VisitChildrenSet::Recursive;
288 }
288 }
289 if self.roots.contains(HgPath::new(b""))
289 if self.roots.contains(HgPath::new(b""))
290 || self.roots.contains(dir)
290 || self.roots.contains(dir)
291 || self.dirs.contains(dir)
291 || self.dirs.contains(dir)
292 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
292 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
293 {
293 {
294 return VisitChildrenSet::This;
294 return VisitChildrenSet::This;
295 }
295 }
296
296
297 if self.parents.contains(directory.as_ref()) {
297 if self.parents.contains(directory.as_ref()) {
298 let multiset = self.get_all_parents_children();
298 let multiset = self.get_all_parents_children();
299 if let Some(children) = multiset.get(dir) {
299 if let Some(children) = multiset.get(dir) {
300 return VisitChildrenSet::Set(children.to_owned());
300 return VisitChildrenSet::Set(children.to_owned());
301 }
301 }
302 }
302 }
303 VisitChildrenSet::Empty
303 VisitChildrenSet::Empty
304 }
304 }
305
305
306 fn matches_everything(&self) -> bool {
306 fn matches_everything(&self) -> bool {
307 false
307 false
308 }
308 }
309
309
310 fn is_exact(&self) -> bool {
310 fn is_exact(&self) -> bool {
311 false
311 false
312 }
312 }
313 }
313 }
314
314
315 /// Returns a function that matches an `HgPath` against the given regex
315 /// Returns a function that matches an `HgPath` against the given regex
316 /// pattern.
316 /// pattern.
317 ///
317 ///
318 /// This can fail when the pattern is invalid or not supported by the
318 /// This can fail when the pattern is invalid or not supported by the
319 /// underlying engine (the `regex` crate), for instance anything with
319 /// underlying engine (the `regex` crate), for instance anything with
320 /// back-references.
320 /// back-references.
321 #[timed]
321 #[timed]
322 fn re_matcher(
322 fn re_matcher(
323 pattern: &[u8],
323 pattern: &[u8],
324 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
324 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
325 use std::io::Write;
325 use std::io::Write;
326
326
327 // The `regex` crate adds `.*` to the start and end of expressions if there
327 // The `regex` crate adds `.*` to the start and end of expressions if there
328 // are no anchors, so add the start anchor.
328 // are no anchors, so add the start anchor.
329 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
329 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
330 for byte in pattern {
330 for byte in pattern {
331 if *byte > 127 {
331 if *byte > 127 {
332 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
332 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
333 } else {
333 } else {
334 escaped_bytes.push(*byte);
334 escaped_bytes.push(*byte);
335 }
335 }
336 }
336 }
337 escaped_bytes.push(b')');
337 escaped_bytes.push(b')');
338
338
339 // Avoid the cost of UTF8 checking
339 // Avoid the cost of UTF8 checking
340 //
340 //
341 // # Safety
341 // # Safety
342 // This is safe because we escaped all non-ASCII bytes.
342 // This is safe because we escaped all non-ASCII bytes.
343 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
343 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
344 let re = regex::bytes::RegexBuilder::new(&pattern_string)
344 let re = regex::bytes::RegexBuilder::new(&pattern_string)
345 .unicode(false)
345 .unicode(false)
346 // Big repos with big `.hgignore` will hit the default limit and
346 // Big repos with big `.hgignore` will hit the default limit and
347 // incur a significant performance hit. One repo's `hg status` hit
347 // incur a significant performance hit. One repo's `hg status` hit
348 // multiple *minutes*.
348 // multiple *minutes*.
349 .dfa_size_limit(50 * (1 << 20))
349 .dfa_size_limit(50 * (1 << 20))
350 .build()
350 .build()
351 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
351 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
352
352
353 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
353 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
354 }
354 }
355
355
356 /// Returns the regex pattern and a function that matches an `HgPath` against
356 /// Returns the regex pattern and a function that matches an `HgPath` against
357 /// said regex formed by the given ignore patterns.
357 /// said regex formed by the given ignore patterns.
358 fn build_regex_match<'a>(
358 fn build_regex_match<'a>(
359 ignore_patterns: &'a [&'a IgnorePattern],
359 ignore_patterns: &'a [&'a IgnorePattern],
360 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
360 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
361 let mut regexps = vec![];
361 let mut regexps = vec![];
362 let mut exact_set = HashSet::new();
362 let mut exact_set = HashSet::new();
363
363
364 for pattern in ignore_patterns {
364 for pattern in ignore_patterns {
365 if let Some(re) = build_single_regex(pattern)? {
365 if let Some(re) = build_single_regex(pattern)? {
366 regexps.push(re);
366 regexps.push(re);
367 } else {
367 } else {
368 let exact = normalize_path_bytes(&pattern.pattern);
368 let exact = normalize_path_bytes(&pattern.pattern);
369 exact_set.insert(HgPathBuf::from_bytes(&exact));
369 exact_set.insert(HgPathBuf::from_bytes(&exact));
370 }
370 }
371 }
371 }
372
372
373 let full_regex = regexps.join(&b'|');
373 let full_regex = regexps.join(&b'|');
374
374
375 // An empty pattern would cause the regex engine to incorrectly match the
375 // An empty pattern would cause the regex engine to incorrectly match the
376 // (empty) root directory
376 // (empty) root directory
377 let func = if !(regexps.is_empty()) {
377 let func = if !(regexps.is_empty()) {
378 let matcher = re_matcher(&full_regex)?;
378 let matcher = re_matcher(&full_regex)?;
379 let func = move |filename: &HgPath| {
379 let func = move |filename: &HgPath| {
380 exact_set.contains(filename) || matcher(filename)
380 exact_set.contains(filename) || matcher(filename)
381 };
381 };
382 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
382 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
383 } else {
383 } else {
384 let func = move |filename: &HgPath| exact_set.contains(filename);
384 let func = move |filename: &HgPath| exact_set.contains(filename);
385 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
385 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
386 };
386 };
387
387
388 Ok((full_regex, func))
388 Ok((full_regex, func))
389 }
389 }
390
390
391 /// Returns roots and directories corresponding to each pattern.
391 /// Returns roots and directories corresponding to each pattern.
392 ///
392 ///
393 /// This calculates the roots and directories exactly matching the patterns and
393 /// This calculates the roots and directories exactly matching the patterns and
394 /// returns a tuple of (roots, dirs). It does not return other directories
394 /// returns a tuple of (roots, dirs). It does not return other directories
395 /// which may also need to be considered, like the parent directories.
395 /// which may also need to be considered, like the parent directories.
396 fn roots_and_dirs(
396 fn roots_and_dirs(
397 ignore_patterns: &[IgnorePattern],
397 ignore_patterns: &[IgnorePattern],
398 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
398 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
399 let mut roots = Vec::new();
399 let mut roots = Vec::new();
400 let mut dirs = Vec::new();
400 let mut dirs = Vec::new();
401
401
402 for ignore_pattern in ignore_patterns {
402 for ignore_pattern in ignore_patterns {
403 let IgnorePattern {
403 let IgnorePattern {
404 syntax, pattern, ..
404 syntax, pattern, ..
405 } = ignore_pattern;
405 } = ignore_pattern;
406 match syntax {
406 match syntax {
407 PatternSyntax::RootGlob | PatternSyntax::Glob => {
407 PatternSyntax::RootGlob | PatternSyntax::Glob => {
408 let mut root = vec![];
408 let mut root = vec![];
409
409
410 for p in pattern.split(|c| *c == b'/') {
410 for p in pattern.split(|c| *c == b'/') {
411 if p.iter().any(|c| match *c {
411 if p.iter().any(|c| match *c {
412 b'[' | b'{' | b'*' | b'?' => true,
412 b'[' | b'{' | b'*' | b'?' => true,
413 _ => false,
413 _ => false,
414 }) {
414 }) {
415 break;
415 break;
416 }
416 }
417 root.push(HgPathBuf::from_bytes(p));
417 root.push(HgPathBuf::from_bytes(p));
418 }
418 }
419 let buf =
419 let buf =
420 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
420 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
421 roots.push(buf);
421 roots.push(buf);
422 }
422 }
423 PatternSyntax::Path | PatternSyntax::RelPath => {
423 PatternSyntax::Path | PatternSyntax::RelPath => {
424 let pat = HgPath::new(if pattern == b"." {
424 let pat = HgPath::new(if pattern == b"." {
425 &[] as &[u8]
425 &[] as &[u8]
426 } else {
426 } else {
427 pattern
427 pattern
428 });
428 });
429 roots.push(pat.to_owned());
429 roots.push(pat.to_owned());
430 }
430 }
431 PatternSyntax::RootFiles => {
431 PatternSyntax::RootFiles => {
432 let pat = if pattern == b"." {
432 let pat = if pattern == b"." {
433 &[] as &[u8]
433 &[] as &[u8]
434 } else {
434 } else {
435 pattern
435 pattern
436 };
436 };
437 dirs.push(HgPathBuf::from_bytes(pat));
437 dirs.push(HgPathBuf::from_bytes(pat));
438 }
438 }
439 _ => {
439 _ => {
440 roots.push(HgPathBuf::new());
440 roots.push(HgPathBuf::new());
441 }
441 }
442 }
442 }
443 }
443 }
444 (roots, dirs)
444 (roots, dirs)
445 }
445 }
446
446
447 /// Paths extracted from patterns
447 /// Paths extracted from patterns
448 #[derive(Debug, PartialEq)]
448 #[derive(Debug, PartialEq)]
449 struct RootsDirsAndParents {
449 struct RootsDirsAndParents {
450 /// Directories to match recursively
450 /// Directories to match recursively
451 pub roots: HashSet<HgPathBuf>,
451 pub roots: HashSet<HgPathBuf>,
452 /// Directories to match non-recursively
452 /// Directories to match non-recursively
453 pub dirs: HashSet<HgPathBuf>,
453 pub dirs: HashSet<HgPathBuf>,
454 /// Implicitly required directories to go to items in either roots or dirs
454 /// Implicitly required directories to go to items in either roots or dirs
455 pub parents: HashSet<HgPathBuf>,
455 pub parents: HashSet<HgPathBuf>,
456 }
456 }
457
457
458 /// Extract roots, dirs and parents from patterns.
458 /// Extract roots, dirs and parents from patterns.
459 fn roots_dirs_and_parents(
459 fn roots_dirs_and_parents(
460 ignore_patterns: &[IgnorePattern],
460 ignore_patterns: &[IgnorePattern],
461 ) -> PatternResult<RootsDirsAndParents> {
461 ) -> PatternResult<RootsDirsAndParents> {
462 let (roots, dirs) = roots_and_dirs(ignore_patterns);
462 let (roots, dirs) = roots_and_dirs(ignore_patterns);
463
463
464 let mut parents = HashSet::new();
464 let mut parents = HashSet::new();
465
465
466 parents.extend(
466 parents.extend(
467 DirsMultiset::from_manifest(&dirs)
467 DirsMultiset::from_manifest(&dirs)
468 .map_err(|e| match e {
468 .map_err(|e| match e {
469 DirstateMapError::InvalidPath(e) => e,
469 DirstateMapError::InvalidPath(e) => e,
470 _ => unreachable!(),
470 _ => unreachable!(),
471 })?
471 })?
472 .iter()
472 .iter()
473 .map(|k| k.to_owned()),
473 .map(ToOwned::to_owned),
474 );
474 );
475 parents.extend(
475 parents.extend(
476 DirsMultiset::from_manifest(&roots)
476 DirsMultiset::from_manifest(&roots)
477 .map_err(|e| match e {
477 .map_err(|e| match e {
478 DirstateMapError::InvalidPath(e) => e,
478 DirstateMapError::InvalidPath(e) => e,
479 _ => unreachable!(),
479 _ => unreachable!(),
480 })?
480 })?
481 .iter()
481 .iter()
482 .map(|k| k.to_owned()),
482 .map(ToOwned::to_owned),
483 );
483 );
484
484
485 Ok(RootsDirsAndParents {
485 Ok(RootsDirsAndParents {
486 roots: HashSet::from_iter(roots),
486 roots: HashSet::from_iter(roots),
487 dirs: HashSet::from_iter(dirs),
487 dirs: HashSet::from_iter(dirs),
488 parents,
488 parents,
489 })
489 })
490 }
490 }
491
491
492 /// Returns a function that checks whether a given file (in the general sense)
492 /// Returns a function that checks whether a given file (in the general sense)
493 /// should be matched.
493 /// should be matched.
494 fn build_match<'a, 'b>(
494 fn build_match<'a, 'b>(
495 ignore_patterns: &'a [IgnorePattern],
495 ignore_patterns: &'a [IgnorePattern],
496 root_dir: impl AsRef<Path>,
496 root_dir: impl AsRef<Path>,
497 ) -> PatternResult<(
497 ) -> PatternResult<(
498 Vec<u8>,
498 Vec<u8>,
499 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
499 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
500 Vec<PatternFileWarning>,
500 Vec<PatternFileWarning>,
501 )> {
501 )> {
502 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
502 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
503 // For debugging and printing
503 // For debugging and printing
504 let mut patterns = vec![];
504 let mut patterns = vec![];
505 let mut all_warnings = vec![];
505 let mut all_warnings = vec![];
506
506
507 let (subincludes, ignore_patterns) =
507 let (subincludes, ignore_patterns) =
508 filter_subincludes(ignore_patterns, root_dir)?;
508 filter_subincludes(ignore_patterns, root_dir)?;
509
509
510 if !subincludes.is_empty() {
510 if !subincludes.is_empty() {
511 // Build prefix-based matcher functions for subincludes
511 // Build prefix-based matcher functions for subincludes
512 let mut submatchers = FastHashMap::default();
512 let mut submatchers = FastHashMap::default();
513 let mut prefixes = vec![];
513 let mut prefixes = vec![];
514
514
515 for SubInclude { prefix, root, path } in subincludes.into_iter() {
515 for SubInclude { prefix, root, path } in subincludes.into_iter() {
516 let (match_fn, warnings) =
516 let (match_fn, warnings) =
517 get_ignore_function(vec![path.to_path_buf()], root)?;
517 get_ignore_function(vec![path.to_path_buf()], root)?;
518 all_warnings.extend(warnings);
518 all_warnings.extend(warnings);
519 prefixes.push(prefix.to_owned());
519 prefixes.push(prefix.to_owned());
520 submatchers.insert(prefix.to_owned(), match_fn);
520 submatchers.insert(prefix.to_owned(), match_fn);
521 }
521 }
522
522
523 let match_subinclude = move |filename: &HgPath| {
523 let match_subinclude = move |filename: &HgPath| {
524 for prefix in prefixes.iter() {
524 for prefix in prefixes.iter() {
525 if let Some(rel) = filename.relative_to(prefix) {
525 if let Some(rel) = filename.relative_to(prefix) {
526 if (submatchers.get(prefix).unwrap())(rel) {
526 if (submatchers[prefix])(rel) {
527 return true;
527 return true;
528 }
528 }
529 }
529 }
530 }
530 }
531 false
531 false
532 };
532 };
533
533
534 match_funcs.push(Box::new(match_subinclude));
534 match_funcs.push(Box::new(match_subinclude));
535 }
535 }
536
536
537 if !ignore_patterns.is_empty() {
537 if !ignore_patterns.is_empty() {
538 // Either do dumb matching if all patterns are rootfiles, or match
538 // Either do dumb matching if all patterns are rootfiles, or match
539 // with a regex.
539 // with a regex.
540 if ignore_patterns
540 if ignore_patterns
541 .iter()
541 .iter()
542 .all(|k| k.syntax == PatternSyntax::RootFiles)
542 .all(|k| k.syntax == PatternSyntax::RootFiles)
543 {
543 {
544 let dirs: HashSet<_> = ignore_patterns
544 let dirs: HashSet<_> = ignore_patterns
545 .iter()
545 .iter()
546 .map(|k| k.pattern.to_owned())
546 .map(|k| k.pattern.to_owned())
547 .collect();
547 .collect();
548 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
548 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
549
549
550 let match_func = move |path: &HgPath| -> bool {
550 let match_func = move |path: &HgPath| -> bool {
551 let path = path.as_bytes();
551 let path = path.as_bytes();
552 let i = path.iter().rfind(|a| **a == b'/');
552 let i = path.iter().rfind(|a| **a == b'/');
553 let dir = if let Some(i) = i {
553 let dir = if let Some(i) = i {
554 &path[..*i as usize]
554 &path[..*i as usize]
555 } else {
555 } else {
556 b"."
556 b"."
557 };
557 };
558 dirs.contains(dir.deref())
558 dirs.contains(dir.deref())
559 };
559 };
560 match_funcs.push(Box::new(match_func));
560 match_funcs.push(Box::new(match_func));
561
561
562 patterns.extend(b"rootfilesin: ");
562 patterns.extend(b"rootfilesin: ");
563 dirs_vec.sort();
563 dirs_vec.sort();
564 patterns.extend(dirs_vec.escaped_bytes());
564 patterns.extend(dirs_vec.escaped_bytes());
565 } else {
565 } else {
566 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
566 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
567 patterns = new_re;
567 patterns = new_re;
568 match_funcs.push(match_func)
568 match_funcs.push(match_func)
569 }
569 }
570 }
570 }
571
571
572 Ok(if match_funcs.len() == 1 {
572 Ok(if match_funcs.len() == 1 {
573 (patterns, match_funcs.remove(0), all_warnings)
573 (patterns, match_funcs.remove(0), all_warnings)
574 } else {
574 } else {
575 (
575 (
576 patterns,
576 patterns,
577 Box::new(move |f: &HgPath| -> bool {
577 Box::new(move |f: &HgPath| -> bool {
578 match_funcs.iter().any(|match_func| match_func(f))
578 match_funcs.iter().any(|match_func| match_func(f))
579 }),
579 }),
580 all_warnings,
580 all_warnings,
581 )
581 )
582 })
582 })
583 }
583 }
584
584
585 /// Parses all "ignore" files with their recursive includes and returns a
585 /// Parses all "ignore" files with their recursive includes and returns a
586 /// function that checks whether a given file (in the general sense) should be
586 /// function that checks whether a given file (in the general sense) should be
587 /// ignored.
587 /// ignored.
588 pub fn get_ignore_function<'a>(
588 pub fn get_ignore_function<'a>(
589 all_pattern_files: Vec<PathBuf>,
589 all_pattern_files: Vec<PathBuf>,
590 root_dir: impl AsRef<Path>,
590 root_dir: impl AsRef<Path>,
591 ) -> PatternResult<(
591 ) -> PatternResult<(
592 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
592 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
593 Vec<PatternFileWarning>,
593 Vec<PatternFileWarning>,
594 )> {
594 )> {
595 let mut all_patterns = vec![];
595 let mut all_patterns = vec![];
596 let mut all_warnings = vec![];
596 let mut all_warnings = vec![];
597
597
598 for pattern_file in all_pattern_files.into_iter() {
598 for pattern_file in all_pattern_files.into_iter() {
599 let (patterns, warnings) =
599 let (patterns, warnings) =
600 get_patterns_from_file(pattern_file, &root_dir)?;
600 get_patterns_from_file(pattern_file, &root_dir)?;
601
601
602 all_patterns.extend(patterns.to_owned());
602 all_patterns.extend(patterns.to_owned());
603 all_warnings.extend(warnings);
603 all_warnings.extend(warnings);
604 }
604 }
605 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
605 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
606 all_warnings.extend(warnings);
606 all_warnings.extend(warnings);
607 Ok((
607 Ok((
608 Box::new(move |path: &HgPath| matcher.matches(path)),
608 Box::new(move |path: &HgPath| matcher.matches(path)),
609 all_warnings,
609 all_warnings,
610 ))
610 ))
611 }
611 }
612
612
613 impl<'a> IncludeMatcher<'a> {
613 impl<'a> IncludeMatcher<'a> {
614 pub fn new(
614 pub fn new(
615 ignore_patterns: Vec<IgnorePattern>,
615 ignore_patterns: Vec<IgnorePattern>,
616 root_dir: impl AsRef<Path>,
616 root_dir: impl AsRef<Path>,
617 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
617 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
618 let (patterns, match_fn, warnings) =
618 let (patterns, match_fn, warnings) =
619 build_match(&ignore_patterns, root_dir)?;
619 build_match(&ignore_patterns, root_dir)?;
620 let RootsDirsAndParents {
620 let RootsDirsAndParents {
621 roots,
621 roots,
622 dirs,
622 dirs,
623 parents,
623 parents,
624 } = roots_dirs_and_parents(&ignore_patterns)?;
624 } = roots_dirs_and_parents(&ignore_patterns)?;
625
625
626 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
626 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
627 PatternSyntax::Path | PatternSyntax::RelPath => true,
627 PatternSyntax::Path | PatternSyntax::RelPath => true,
628 _ => false,
628 _ => false,
629 });
629 });
630
630
631 Ok((
631 Ok((
632 Self {
632 Self {
633 patterns,
633 patterns,
634 match_fn,
634 match_fn,
635 prefix,
635 prefix,
636 roots,
636 roots,
637 dirs,
637 dirs,
638 parents,
638 parents,
639 },
639 },
640 warnings,
640 warnings,
641 ))
641 ))
642 }
642 }
643
643
644 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
644 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
645 // TODO cache
645 // TODO cache
646 let thing = self
646 let thing = self
647 .dirs
647 .dirs
648 .iter()
648 .iter()
649 .chain(self.roots.iter())
649 .chain(self.roots.iter())
650 .chain(self.parents.iter());
650 .chain(self.parents.iter());
651 DirsChildrenMultiset::new(thing, Some(&self.parents))
651 DirsChildrenMultiset::new(thing, Some(&self.parents))
652 }
652 }
653 }
653 }
654
654
655 impl<'a> Display for IncludeMatcher<'a> {
655 impl<'a> Display for IncludeMatcher<'a> {
656 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
656 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
657 // XXX What about exact matches?
657 // XXX What about exact matches?
658 // I'm not sure it's worth it to clone the HashSet and keep it
658 // I'm not sure it's worth it to clone the HashSet and keep it
659 // around just in case someone wants to display the matcher, plus
659 // around just in case someone wants to display the matcher, plus
660 // it's going to be unreadable after a few entries, but we need to
660 // it's going to be unreadable after a few entries, but we need to
661 // inform in this display that exact matches are being used and are
661 // inform in this display that exact matches are being used and are
662 // (on purpose) missing from the `includes`.
662 // (on purpose) missing from the `includes`.
663 write!(
663 write!(
664 f,
664 f,
665 "IncludeMatcher(includes='{}')",
665 "IncludeMatcher(includes='{}')",
666 String::from_utf8_lossy(&self.patterns.escaped_bytes())
666 String::from_utf8_lossy(&self.patterns.escaped_bytes())
667 )
667 )
668 }
668 }
669 }
669 }
670
670
671 #[cfg(test)]
671 #[cfg(test)]
672 mod tests {
672 mod tests {
673 use super::*;
673 use super::*;
674 use pretty_assertions::assert_eq;
674 use pretty_assertions::assert_eq;
675 use std::path::Path;
675 use std::path::Path;
676
676
677 #[test]
677 #[test]
678 fn test_roots_and_dirs() {
678 fn test_roots_and_dirs() {
679 let pats = vec![
679 let pats = vec![
680 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
680 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
681 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
681 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
682 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
682 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
683 ];
683 ];
684 let (roots, dirs) = roots_and_dirs(&pats);
684 let (roots, dirs) = roots_and_dirs(&pats);
685
685
686 assert_eq!(
686 assert_eq!(
687 roots,
687 roots,
688 vec!(
688 vec!(
689 HgPathBuf::from_bytes(b"g/h"),
689 HgPathBuf::from_bytes(b"g/h"),
690 HgPathBuf::from_bytes(b"g/h"),
690 HgPathBuf::from_bytes(b"g/h"),
691 HgPathBuf::new()
691 HgPathBuf::new()
692 ),
692 ),
693 );
693 );
694 assert_eq!(dirs, vec!());
694 assert_eq!(dirs, vec!());
695 }
695 }
696
696
697 #[test]
697 #[test]
698 fn test_roots_dirs_and_parents() {
698 fn test_roots_dirs_and_parents() {
699 let pats = vec![
699 let pats = vec![
700 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
700 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
701 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
701 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
702 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
702 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
703 ];
703 ];
704
704
705 let mut roots = HashSet::new();
705 let mut roots = HashSet::new();
706 roots.insert(HgPathBuf::from_bytes(b"g/h"));
706 roots.insert(HgPathBuf::from_bytes(b"g/h"));
707 roots.insert(HgPathBuf::new());
707 roots.insert(HgPathBuf::new());
708
708
709 let dirs = HashSet::new();
709 let dirs = HashSet::new();
710
710
711 let mut parents = HashSet::new();
711 let mut parents = HashSet::new();
712 parents.insert(HgPathBuf::new());
712 parents.insert(HgPathBuf::new());
713 parents.insert(HgPathBuf::from_bytes(b"g"));
713 parents.insert(HgPathBuf::from_bytes(b"g"));
714
714
715 assert_eq!(
715 assert_eq!(
716 roots_dirs_and_parents(&pats).unwrap(),
716 roots_dirs_and_parents(&pats).unwrap(),
717 RootsDirsAndParents {
717 RootsDirsAndParents {
718 roots,
718 roots,
719 dirs,
719 dirs,
720 parents
720 parents
721 }
721 }
722 );
722 );
723 }
723 }
724
724
725 #[test]
725 #[test]
726 fn test_filematcher_visit_children_set() {
726 fn test_filematcher_visit_children_set() {
727 // Visitchildrenset
727 // Visitchildrenset
728 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
728 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
729 let matcher = FileMatcher::new(&files).unwrap();
729 let matcher = FileMatcher::new(&files).unwrap();
730
730
731 let mut set = HashSet::new();
731 let mut set = HashSet::new();
732 set.insert(HgPath::new(b"dir"));
732 set.insert(HgPath::new(b"dir"));
733 assert_eq!(
733 assert_eq!(
734 matcher.visit_children_set(HgPath::new(b"")),
734 matcher.visit_children_set(HgPath::new(b"")),
735 VisitChildrenSet::Set(set)
735 VisitChildrenSet::Set(set)
736 );
736 );
737
737
738 let mut set = HashSet::new();
738 let mut set = HashSet::new();
739 set.insert(HgPath::new(b"subdir"));
739 set.insert(HgPath::new(b"subdir"));
740 assert_eq!(
740 assert_eq!(
741 matcher.visit_children_set(HgPath::new(b"dir")),
741 matcher.visit_children_set(HgPath::new(b"dir")),
742 VisitChildrenSet::Set(set)
742 VisitChildrenSet::Set(set)
743 );
743 );
744
744
745 let mut set = HashSet::new();
745 let mut set = HashSet::new();
746 set.insert(HgPath::new(b"foo.txt"));
746 set.insert(HgPath::new(b"foo.txt"));
747 assert_eq!(
747 assert_eq!(
748 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
748 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
749 VisitChildrenSet::Set(set)
749 VisitChildrenSet::Set(set)
750 );
750 );
751
751
752 assert_eq!(
752 assert_eq!(
753 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
753 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
754 VisitChildrenSet::Empty
754 VisitChildrenSet::Empty
755 );
755 );
756 assert_eq!(
756 assert_eq!(
757 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
757 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
758 VisitChildrenSet::Empty
758 VisitChildrenSet::Empty
759 );
759 );
760 assert_eq!(
760 assert_eq!(
761 matcher.visit_children_set(HgPath::new(b"folder")),
761 matcher.visit_children_set(HgPath::new(b"folder")),
762 VisitChildrenSet::Empty
762 VisitChildrenSet::Empty
763 );
763 );
764 }
764 }
765
765
766 #[test]
766 #[test]
767 fn test_filematcher_visit_children_set_files_and_dirs() {
767 fn test_filematcher_visit_children_set_files_and_dirs() {
768 let files = vec![
768 let files = vec![
769 HgPath::new(b"rootfile.txt"),
769 HgPath::new(b"rootfile.txt"),
770 HgPath::new(b"a/file1.txt"),
770 HgPath::new(b"a/file1.txt"),
771 HgPath::new(b"a/b/file2.txt"),
771 HgPath::new(b"a/b/file2.txt"),
772 // No file in a/b/c
772 // No file in a/b/c
773 HgPath::new(b"a/b/c/d/file4.txt"),
773 HgPath::new(b"a/b/c/d/file4.txt"),
774 ];
774 ];
775 let matcher = FileMatcher::new(&files).unwrap();
775 let matcher = FileMatcher::new(&files).unwrap();
776
776
777 let mut set = HashSet::new();
777 let mut set = HashSet::new();
778 set.insert(HgPath::new(b"a"));
778 set.insert(HgPath::new(b"a"));
779 set.insert(HgPath::new(b"rootfile.txt"));
779 set.insert(HgPath::new(b"rootfile.txt"));
780 assert_eq!(
780 assert_eq!(
781 matcher.visit_children_set(HgPath::new(b"")),
781 matcher.visit_children_set(HgPath::new(b"")),
782 VisitChildrenSet::Set(set)
782 VisitChildrenSet::Set(set)
783 );
783 );
784
784
785 let mut set = HashSet::new();
785 let mut set = HashSet::new();
786 set.insert(HgPath::new(b"b"));
786 set.insert(HgPath::new(b"b"));
787 set.insert(HgPath::new(b"file1.txt"));
787 set.insert(HgPath::new(b"file1.txt"));
788 assert_eq!(
788 assert_eq!(
789 matcher.visit_children_set(HgPath::new(b"a")),
789 matcher.visit_children_set(HgPath::new(b"a")),
790 VisitChildrenSet::Set(set)
790 VisitChildrenSet::Set(set)
791 );
791 );
792
792
793 let mut set = HashSet::new();
793 let mut set = HashSet::new();
794 set.insert(HgPath::new(b"c"));
794 set.insert(HgPath::new(b"c"));
795 set.insert(HgPath::new(b"file2.txt"));
795 set.insert(HgPath::new(b"file2.txt"));
796 assert_eq!(
796 assert_eq!(
797 matcher.visit_children_set(HgPath::new(b"a/b")),
797 matcher.visit_children_set(HgPath::new(b"a/b")),
798 VisitChildrenSet::Set(set)
798 VisitChildrenSet::Set(set)
799 );
799 );
800
800
801 let mut set = HashSet::new();
801 let mut set = HashSet::new();
802 set.insert(HgPath::new(b"d"));
802 set.insert(HgPath::new(b"d"));
803 assert_eq!(
803 assert_eq!(
804 matcher.visit_children_set(HgPath::new(b"a/b/c")),
804 matcher.visit_children_set(HgPath::new(b"a/b/c")),
805 VisitChildrenSet::Set(set)
805 VisitChildrenSet::Set(set)
806 );
806 );
807 let mut set = HashSet::new();
807 let mut set = HashSet::new();
808 set.insert(HgPath::new(b"file4.txt"));
808 set.insert(HgPath::new(b"file4.txt"));
809 assert_eq!(
809 assert_eq!(
810 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
810 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
811 VisitChildrenSet::Set(set)
811 VisitChildrenSet::Set(set)
812 );
812 );
813
813
814 assert_eq!(
814 assert_eq!(
815 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
815 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
816 VisitChildrenSet::Empty
816 VisitChildrenSet::Empty
817 );
817 );
818 assert_eq!(
818 assert_eq!(
819 matcher.visit_children_set(HgPath::new(b"folder")),
819 matcher.visit_children_set(HgPath::new(b"folder")),
820 VisitChildrenSet::Empty
820 VisitChildrenSet::Empty
821 );
821 );
822 }
822 }
823
823
824 #[test]
824 #[test]
825 fn test_includematcher() {
825 fn test_includematcher() {
826 // VisitchildrensetPrefix
826 // VisitchildrensetPrefix
827 let (matcher, _) = IncludeMatcher::new(
827 let (matcher, _) = IncludeMatcher::new(
828 vec![IgnorePattern::new(
828 vec![IgnorePattern::new(
829 PatternSyntax::RelPath,
829 PatternSyntax::RelPath,
830 b"dir/subdir",
830 b"dir/subdir",
831 Path::new(""),
831 Path::new(""),
832 )],
832 )],
833 "",
833 "",
834 )
834 )
835 .unwrap();
835 .unwrap();
836
836
837 let mut set = HashSet::new();
837 let mut set = HashSet::new();
838 set.insert(HgPath::new(b"dir"));
838 set.insert(HgPath::new(b"dir"));
839 assert_eq!(
839 assert_eq!(
840 matcher.visit_children_set(HgPath::new(b"")),
840 matcher.visit_children_set(HgPath::new(b"")),
841 VisitChildrenSet::Set(set)
841 VisitChildrenSet::Set(set)
842 );
842 );
843
843
844 let mut set = HashSet::new();
844 let mut set = HashSet::new();
845 set.insert(HgPath::new(b"subdir"));
845 set.insert(HgPath::new(b"subdir"));
846 assert_eq!(
846 assert_eq!(
847 matcher.visit_children_set(HgPath::new(b"dir")),
847 matcher.visit_children_set(HgPath::new(b"dir")),
848 VisitChildrenSet::Set(set)
848 VisitChildrenSet::Set(set)
849 );
849 );
850 assert_eq!(
850 assert_eq!(
851 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
851 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
852 VisitChildrenSet::Recursive
852 VisitChildrenSet::Recursive
853 );
853 );
854 // OPT: This should probably be 'all' if its parent is?
854 // OPT: This should probably be 'all' if its parent is?
855 assert_eq!(
855 assert_eq!(
856 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
856 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
857 VisitChildrenSet::This
857 VisitChildrenSet::This
858 );
858 );
859 assert_eq!(
859 assert_eq!(
860 matcher.visit_children_set(HgPath::new(b"folder")),
860 matcher.visit_children_set(HgPath::new(b"folder")),
861 VisitChildrenSet::Empty
861 VisitChildrenSet::Empty
862 );
862 );
863
863
864 // VisitchildrensetRootfilesin
864 // VisitchildrensetRootfilesin
865 let (matcher, _) = IncludeMatcher::new(
865 let (matcher, _) = IncludeMatcher::new(
866 vec![IgnorePattern::new(
866 vec![IgnorePattern::new(
867 PatternSyntax::RootFiles,
867 PatternSyntax::RootFiles,
868 b"dir/subdir",
868 b"dir/subdir",
869 Path::new(""),
869 Path::new(""),
870 )],
870 )],
871 "",
871 "",
872 )
872 )
873 .unwrap();
873 .unwrap();
874
874
875 let mut set = HashSet::new();
875 let mut set = HashSet::new();
876 set.insert(HgPath::new(b"dir"));
876 set.insert(HgPath::new(b"dir"));
877 assert_eq!(
877 assert_eq!(
878 matcher.visit_children_set(HgPath::new(b"")),
878 matcher.visit_children_set(HgPath::new(b"")),
879 VisitChildrenSet::Set(set)
879 VisitChildrenSet::Set(set)
880 );
880 );
881
881
882 let mut set = HashSet::new();
882 let mut set = HashSet::new();
883 set.insert(HgPath::new(b"subdir"));
883 set.insert(HgPath::new(b"subdir"));
884 assert_eq!(
884 assert_eq!(
885 matcher.visit_children_set(HgPath::new(b"dir")),
885 matcher.visit_children_set(HgPath::new(b"dir")),
886 VisitChildrenSet::Set(set)
886 VisitChildrenSet::Set(set)
887 );
887 );
888
888
889 assert_eq!(
889 assert_eq!(
890 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
890 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
891 VisitChildrenSet::This
891 VisitChildrenSet::This
892 );
892 );
893 assert_eq!(
893 assert_eq!(
894 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
894 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
895 VisitChildrenSet::Empty
895 VisitChildrenSet::Empty
896 );
896 );
897 assert_eq!(
897 assert_eq!(
898 matcher.visit_children_set(HgPath::new(b"folder")),
898 matcher.visit_children_set(HgPath::new(b"folder")),
899 VisitChildrenSet::Empty
899 VisitChildrenSet::Empty
900 );
900 );
901
901
902 // VisitchildrensetGlob
902 // VisitchildrensetGlob
903 let (matcher, _) = IncludeMatcher::new(
903 let (matcher, _) = IncludeMatcher::new(
904 vec![IgnorePattern::new(
904 vec![IgnorePattern::new(
905 PatternSyntax::Glob,
905 PatternSyntax::Glob,
906 b"dir/z*",
906 b"dir/z*",
907 Path::new(""),
907 Path::new(""),
908 )],
908 )],
909 "",
909 "",
910 )
910 )
911 .unwrap();
911 .unwrap();
912
912
913 let mut set = HashSet::new();
913 let mut set = HashSet::new();
914 set.insert(HgPath::new(b"dir"));
914 set.insert(HgPath::new(b"dir"));
915 assert_eq!(
915 assert_eq!(
916 matcher.visit_children_set(HgPath::new(b"")),
916 matcher.visit_children_set(HgPath::new(b"")),
917 VisitChildrenSet::Set(set)
917 VisitChildrenSet::Set(set)
918 );
918 );
919 assert_eq!(
919 assert_eq!(
920 matcher.visit_children_set(HgPath::new(b"folder")),
920 matcher.visit_children_set(HgPath::new(b"folder")),
921 VisitChildrenSet::Empty
921 VisitChildrenSet::Empty
922 );
922 );
923 assert_eq!(
923 assert_eq!(
924 matcher.visit_children_set(HgPath::new(b"dir")),
924 matcher.visit_children_set(HgPath::new(b"dir")),
925 VisitChildrenSet::This
925 VisitChildrenSet::This
926 );
926 );
927 // OPT: these should probably be set().
927 // OPT: these should probably be set().
928 assert_eq!(
928 assert_eq!(
929 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
929 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
930 VisitChildrenSet::This
930 VisitChildrenSet::This
931 );
931 );
932 assert_eq!(
932 assert_eq!(
933 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
933 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
934 VisitChildrenSet::This
934 VisitChildrenSet::This
935 );
935 );
936 }
936 }
937 }
937 }
@@ -1,56 +1,61
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
10 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
11
11
12 /// Mercurial revision numbers
12 /// Mercurial revision numbers
13 ///
13 ///
14 /// As noted in revlog.c, revision numbers are actually encoded in
14 /// As noted in revlog.c, revision numbers are actually encoded in
15 /// 4 bytes, and are liberally converted to ints, whence the i32
15 /// 4 bytes, and are liberally converted to ints, whence the i32
16 pub type Revision = i32;
16 pub type Revision = i32;
17
17
18 /// Marker expressing the absence of a parent
18 /// Marker expressing the absence of a parent
19 ///
19 ///
20 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
20 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
21 /// to be smaller than all existing revisions.
21 /// to be smaller than all existing revisions.
22 pub const NULL_REVISION: Revision = -1;
22 pub const NULL_REVISION: Revision = -1;
23
23
24 /// Same as `mercurial.node.wdirrev`
24 /// Same as `mercurial.node.wdirrev`
25 ///
25 ///
26 /// This is also equal to `i32::max_value()`, but it's better to spell
26 /// This is also equal to `i32::max_value()`, but it's better to spell
27 /// it out explicitely, same as in `mercurial.node`
27 /// it out explicitely, same as in `mercurial.node`
28 #[allow(clippy::unreadable_literal)]
28 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
29 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
29
30
30 /// The simplest expression of what we need of Mercurial DAGs.
31 /// The simplest expression of what we need of Mercurial DAGs.
31 pub trait Graph {
32 pub trait Graph {
32 /// Return the two parents of the given `Revision`.
33 /// Return the two parents of the given `Revision`.
33 ///
34 ///
34 /// Each of the parents can be independently `NULL_REVISION`
35 /// Each of the parents can be independently `NULL_REVISION`
35 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
36 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
36 }
37 }
37
38
38 #[derive(Clone, Debug, PartialEq)]
39 #[derive(Clone, Debug, PartialEq)]
39 pub enum GraphError {
40 pub enum GraphError {
40 ParentOutOfRange(Revision),
41 ParentOutOfRange(Revision),
41 WorkingDirectoryUnsupported,
42 WorkingDirectoryUnsupported,
42 }
43 }
43
44
44 /// The Mercurial Revlog Index
45 /// The Mercurial Revlog Index
45 ///
46 ///
46 /// This is currently limited to the minimal interface that is needed for
47 /// This is currently limited to the minimal interface that is needed for
47 /// the [`nodemap`](nodemap/index.html) module
48 /// the [`nodemap`](nodemap/index.html) module
48 pub trait RevlogIndex {
49 pub trait RevlogIndex {
49 /// Total number of Revisions referenced in this index
50 /// Total number of Revisions referenced in this index
50 fn len(&self) -> usize;
51 fn len(&self) -> usize;
51
52
53 fn is_empty(&self) -> bool {
54 self.len() == 0
55 }
56
52 /// Return a reference to the Node or `None` if rev is out of bounds
57 /// Return a reference to the Node or `None` if rev is out of bounds
53 ///
58 ///
54 /// `NULL_REVISION` is not considered to be out of bounds.
59 /// `NULL_REVISION` is not considered to be out of bounds.
55 fn node(&self, rev: Revision) -> Option<&Node>;
60 fn node(&self, rev: Revision) -> Option<&Node>;
56 }
61 }
@@ -1,429 +1,433
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5
5
6 //! Definitions and utilities for Revision nodes
6 //! Definitions and utilities for Revision nodes
7 //!
7 //!
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 //! of a revision.
9 //! of a revision.
10
10
11 use hex::{self, FromHex, FromHexError};
11 use hex::{self, FromHex, FromHexError};
12
12
13 /// The length in bytes of a `Node`
13 /// The length in bytes of a `Node`
14 ///
14 ///
15 /// This constant is meant to ease refactors of this module, and
15 /// This constant is meant to ease refactors of this module, and
16 /// are private so that calling code does not expect all nodes have
16 /// are private so that calling code does not expect all nodes have
17 /// the same size, should we support several formats concurrently in
17 /// the same size, should we support several formats concurrently in
18 /// the future.
18 /// the future.
19 const NODE_BYTES_LENGTH: usize = 20;
19 const NODE_BYTES_LENGTH: usize = 20;
20
20
21 /// The length in bytes of a `Node`
21 /// The length in bytes of a `Node`
22 ///
22 ///
23 /// see also `NODES_BYTES_LENGTH` about it being private.
23 /// see also `NODES_BYTES_LENGTH` about it being private.
24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
25
25
26 /// Private alias for readability and to ease future change
26 /// Private alias for readability and to ease future change
27 type NodeData = [u8; NODE_BYTES_LENGTH];
27 type NodeData = [u8; NODE_BYTES_LENGTH];
28
28
29 /// Binary revision SHA
29 /// Binary revision SHA
30 ///
30 ///
31 /// ## Future changes of hash size
31 /// ## Future changes of hash size
32 ///
32 ///
33 /// To accomodate future changes of hash size, Rust callers
33 /// To accomodate future changes of hash size, Rust callers
34 /// should use the conversion methods at the boundaries (FFI, actual
34 /// should use the conversion methods at the boundaries (FFI, actual
35 /// computation of hashes and I/O) only, and only if required.
35 /// computation of hashes and I/O) only, and only if required.
36 ///
36 ///
37 /// All other callers outside of unit tests should just handle `Node` values
37 /// All other callers outside of unit tests should just handle `Node` values
38 /// and never make any assumption on the actual length, using [`nybbles_len`]
38 /// and never make any assumption on the actual length, using [`nybbles_len`]
39 /// if they need a loop boundary.
39 /// if they need a loop boundary.
40 ///
40 ///
41 /// All methods that create a `Node` either take a type that enforces
41 /// All methods that create a `Node` either take a type that enforces
42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
43 ///
43 ///
44 /// [`nybbles_len`]: #method.nybbles_len
44 /// [`nybbles_len`]: #method.nybbles_len
45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
46 #[derive(Clone, Debug, PartialEq)]
46 #[derive(Clone, Debug, PartialEq)]
47 #[repr(transparent)]
47 #[repr(transparent)]
48 pub struct Node {
48 pub struct Node {
49 data: NodeData,
49 data: NodeData,
50 }
50 }
51
51
52 /// The node value for NULL_REVISION
52 /// The node value for NULL_REVISION
53 pub const NULL_NODE: Node = Node {
53 pub const NULL_NODE: Node = Node {
54 data: [0; NODE_BYTES_LENGTH],
54 data: [0; NODE_BYTES_LENGTH],
55 };
55 };
56
56
57 impl From<NodeData> for Node {
57 impl From<NodeData> for Node {
58 fn from(data: NodeData) -> Node {
58 fn from(data: NodeData) -> Node {
59 Node { data }
59 Node { data }
60 }
60 }
61 }
61 }
62
62
63 #[derive(Debug, PartialEq)]
63 #[derive(Debug, PartialEq)]
64 pub enum NodeError {
64 pub enum NodeError {
65 ExactLengthRequired(usize, String),
65 ExactLengthRequired(usize, String),
66 PrefixTooLong(String),
66 PrefixTooLong(String),
67 HexError(FromHexError, String),
67 HexError(FromHexError, String),
68 }
68 }
69
69
70 /// Low level utility function, also for prefixes
70 /// Low level utility function, also for prefixes
71 fn get_nybble(s: &[u8], i: usize) -> u8 {
71 fn get_nybble(s: &[u8], i: usize) -> u8 {
72 if i % 2 == 0 {
72 if i % 2 == 0 {
73 s[i / 2] >> 4
73 s[i / 2] >> 4
74 } else {
74 } else {
75 s[i / 2] & 0x0f
75 s[i / 2] & 0x0f
76 }
76 }
77 }
77 }
78
78
79 impl Node {
79 impl Node {
80 /// Retrieve the `i`th half-byte of the binary data.
80 /// Retrieve the `i`th half-byte of the binary data.
81 ///
81 ///
82 /// This is also the `i`th hexadecimal digit in numeric form,
82 /// This is also the `i`th hexadecimal digit in numeric form,
83 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
83 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
84 pub fn get_nybble(&self, i: usize) -> u8 {
84 pub fn get_nybble(&self, i: usize) -> u8 {
85 get_nybble(&self.data, i)
85 get_nybble(&self.data, i)
86 }
86 }
87
87
88 /// Length of the data, in nybbles
88 /// Length of the data, in nybbles
89 pub fn nybbles_len(&self) -> usize {
89 pub fn nybbles_len(&self) -> usize {
90 // public exposure as an instance method only, so that we can
90 // public exposure as an instance method only, so that we can
91 // easily support several sizes of hashes if needed in the future.
91 // easily support several sizes of hashes if needed in the future.
92 NODE_NYBBLES_LENGTH
92 NODE_NYBBLES_LENGTH
93 }
93 }
94
94
95 /// Convert from hexadecimal string representation
95 /// Convert from hexadecimal string representation
96 ///
96 ///
97 /// Exact length is required.
97 /// Exact length is required.
98 ///
98 ///
99 /// To be used in FFI and I/O only, in order to facilitate future
99 /// To be used in FFI and I/O only, in order to facilitate future
100 /// changes of hash format.
100 /// changes of hash format.
101 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
101 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
102 Ok(NodeData::from_hex(hex)
102 Ok(NodeData::from_hex(hex)
103 .map_err(|e| NodeError::from((e, hex)))?
103 .map_err(|e| NodeError::from((e, hex)))?
104 .into())
104 .into())
105 }
105 }
106
106
107 /// Convert to hexadecimal string representation
107 /// Convert to hexadecimal string representation
108 ///
108 ///
109 /// To be used in FFI and I/O only, in order to facilitate future
109 /// To be used in FFI and I/O only, in order to facilitate future
110 /// changes of hash format.
110 /// changes of hash format.
111 pub fn encode_hex(&self) -> String {
111 pub fn encode_hex(&self) -> String {
112 hex::encode(self.data)
112 hex::encode(self.data)
113 }
113 }
114
114
115 /// Provide access to binary data
115 /// Provide access to binary data
116 ///
116 ///
117 /// This is needed by FFI layers, for instance to return expected
117 /// This is needed by FFI layers, for instance to return expected
118 /// binary values to Python.
118 /// binary values to Python.
119 pub fn as_bytes(&self) -> &[u8] {
119 pub fn as_bytes(&self) -> &[u8] {
120 &self.data
120 &self.data
121 }
121 }
122 }
122 }
123
123
124 impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
124 impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
125 fn from(err_offender: (FromHexError, T)) -> Self {
125 fn from(err_offender: (FromHexError, T)) -> Self {
126 let (err, offender) = err_offender;
126 let (err, offender) = err_offender;
127 match err {
127 match err {
128 FromHexError::InvalidStringLength => {
128 FromHexError::InvalidStringLength => {
129 NodeError::ExactLengthRequired(
129 NodeError::ExactLengthRequired(
130 NODE_NYBBLES_LENGTH,
130 NODE_NYBBLES_LENGTH,
131 offender.as_ref().to_owned(),
131 offender.as_ref().to_owned(),
132 )
132 )
133 }
133 }
134 _ => NodeError::HexError(err, offender.as_ref().to_owned()),
134 _ => NodeError::HexError(err, offender.as_ref().to_owned()),
135 }
135 }
136 }
136 }
137 }
137 }
138
138
139 /// The beginning of a binary revision SHA.
139 /// The beginning of a binary revision SHA.
140 ///
140 ///
141 /// Since it can potentially come from an hexadecimal representation with
141 /// Since it can potentially come from an hexadecimal representation with
142 /// odd length, it needs to carry around whether the last 4 bits are relevant
142 /// odd length, it needs to carry around whether the last 4 bits are relevant
143 /// or not.
143 /// or not.
144 #[derive(Debug, PartialEq)]
144 #[derive(Debug, PartialEq)]
145 pub struct NodePrefix {
145 pub struct NodePrefix {
146 buf: Vec<u8>,
146 buf: Vec<u8>,
147 is_odd: bool,
147 is_odd: bool,
148 }
148 }
149
149
150 impl NodePrefix {
150 impl NodePrefix {
151 /// Convert from hexadecimal string representation
151 /// Convert from hexadecimal string representation
152 ///
152 ///
153 /// Similarly to `hex::decode`, can be used with Unicode string types
153 /// Similarly to `hex::decode`, can be used with Unicode string types
154 /// (`String`, `&str`) as well as bytes.
154 /// (`String`, `&str`) as well as bytes.
155 ///
155 ///
156 /// To be used in FFI and I/O only, in order to facilitate future
156 /// To be used in FFI and I/O only, in order to facilitate future
157 /// changes of hash format.
157 /// changes of hash format.
158 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
158 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
159 let hex = hex.as_ref();
159 let hex = hex.as_ref();
160 let len = hex.len();
160 let len = hex.len();
161 if len > NODE_NYBBLES_LENGTH {
161 if len > NODE_NYBBLES_LENGTH {
162 return Err(NodeError::PrefixTooLong(
162 return Err(NodeError::PrefixTooLong(
163 String::from_utf8_lossy(hex).to_owned().to_string(),
163 String::from_utf8_lossy(hex).to_owned().to_string(),
164 ));
164 ));
165 }
165 }
166
166
167 let is_odd = len % 2 == 1;
167 let is_odd = len % 2 == 1;
168 let even_part = if is_odd { &hex[..len - 1] } else { hex };
168 let even_part = if is_odd { &hex[..len - 1] } else { hex };
169 let mut buf: Vec<u8> = Vec::from_hex(&even_part)
169 let mut buf: Vec<u8> = Vec::from_hex(&even_part)
170 .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
170 .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
171
171
172 if is_odd {
172 if is_odd {
173 let latest_char = char::from(hex[len - 1]);
173 let latest_char = char::from(hex[len - 1]);
174 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
174 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
175 (
175 (
176 FromHexError::InvalidHexCharacter {
176 FromHexError::InvalidHexCharacter {
177 c: latest_char,
177 c: latest_char,
178 index: len - 1,
178 index: len - 1,
179 },
179 },
180 String::from_utf8_lossy(hex),
180 String::from_utf8_lossy(hex),
181 )
181 )
182 })? as u8;
182 })? as u8;
183 buf.push(latest_nybble << 4);
183 buf.push(latest_nybble << 4);
184 }
184 }
185 Ok(NodePrefix { buf, is_odd })
185 Ok(NodePrefix { buf, is_odd })
186 }
186 }
187
187
188 pub fn borrow(&self) -> NodePrefixRef {
188 pub fn borrow(&self) -> NodePrefixRef {
189 NodePrefixRef {
189 NodePrefixRef {
190 buf: &self.buf,
190 buf: &self.buf,
191 is_odd: self.is_odd,
191 is_odd: self.is_odd,
192 }
192 }
193 }
193 }
194 }
194 }
195
195
196 #[derive(Clone, Debug, PartialEq)]
196 #[derive(Clone, Debug, PartialEq)]
197 pub struct NodePrefixRef<'a> {
197 pub struct NodePrefixRef<'a> {
198 buf: &'a [u8],
198 buf: &'a [u8],
199 is_odd: bool,
199 is_odd: bool,
200 }
200 }
201
201
202 impl<'a> NodePrefixRef<'a> {
202 impl<'a> NodePrefixRef<'a> {
203 pub fn len(&self) -> usize {
203 pub fn len(&self) -> usize {
204 if self.is_odd {
204 if self.is_odd {
205 self.buf.len() * 2 - 1
205 self.buf.len() * 2 - 1
206 } else {
206 } else {
207 self.buf.len() * 2
207 self.buf.len() * 2
208 }
208 }
209 }
209 }
210
210
211 pub fn is_empty(&self) -> bool {
212 self.len() == 0
213 }
214
211 pub fn is_prefix_of(&self, node: &Node) -> bool {
215 pub fn is_prefix_of(&self, node: &Node) -> bool {
212 if self.is_odd {
216 if self.is_odd {
213 let buf = self.buf;
217 let buf = self.buf;
214 let last_pos = buf.len() - 1;
218 let last_pos = buf.len() - 1;
215 node.data.starts_with(buf.split_at(last_pos).0)
219 node.data.starts_with(buf.split_at(last_pos).0)
216 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
220 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
217 } else {
221 } else {
218 node.data.starts_with(self.buf)
222 node.data.starts_with(self.buf)
219 }
223 }
220 }
224 }
221
225
222 /// Retrieve the `i`th half-byte from the prefix.
226 /// Retrieve the `i`th half-byte from the prefix.
223 ///
227 ///
224 /// This is also the `i`th hexadecimal digit in numeric form,
228 /// This is also the `i`th hexadecimal digit in numeric form,
225 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
229 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
226 pub fn get_nybble(&self, i: usize) -> u8 {
230 pub fn get_nybble(&self, i: usize) -> u8 {
227 assert!(i < self.len());
231 assert!(i < self.len());
228 get_nybble(self.buf, i)
232 get_nybble(self.buf, i)
229 }
233 }
230
234
231 /// Return the index first nybble that's different from `node`
235 /// Return the index first nybble that's different from `node`
232 ///
236 ///
233 /// If the return value is `None` that means that `self` is
237 /// If the return value is `None` that means that `self` is
234 /// a prefix of `node`, but the current method is a bit slower
238 /// a prefix of `node`, but the current method is a bit slower
235 /// than `is_prefix_of`.
239 /// than `is_prefix_of`.
236 ///
240 ///
237 /// Returned index is as in `get_nybble`, i.e., starting at 0.
241 /// Returned index is as in `get_nybble`, i.e., starting at 0.
238 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
242 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
239 let buf = self.buf;
243 let buf = self.buf;
240 let until = if self.is_odd {
244 let until = if self.is_odd {
241 buf.len() - 1
245 buf.len() - 1
242 } else {
246 } else {
243 buf.len()
247 buf.len()
244 };
248 };
245 for i in 0..until {
249 for (i, item) in buf.iter().enumerate().take(until) {
246 if buf[i] != node.data[i] {
250 if *item != node.data[i] {
247 if buf[i] & 0xf0 == node.data[i] & 0xf0 {
251 return if *item & 0xf0 == node.data[i] & 0xf0 {
248 return Some(2 * i + 1);
252 Some(2 * i + 1)
249 } else {
253 } else {
250 return Some(2 * i);
254 Some(2 * i)
251 }
255 };
252 }
256 }
253 }
257 }
254 if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
258 if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
255 Some(until * 2)
259 Some(until * 2)
256 } else {
260 } else {
257 None
261 None
258 }
262 }
259 }
263 }
260 }
264 }
261
265
262 /// A shortcut for full `Node` references
266 /// A shortcut for full `Node` references
263 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
267 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
264 fn from(node: &'a Node) -> Self {
268 fn from(node: &'a Node) -> Self {
265 NodePrefixRef {
269 NodePrefixRef {
266 buf: &node.data,
270 buf: &node.data,
267 is_odd: false,
271 is_odd: false,
268 }
272 }
269 }
273 }
270 }
274 }
271
275
272 #[cfg(test)]
276 #[cfg(test)]
273 mod tests {
277 mod tests {
274 use super::*;
278 use super::*;
275
279
276 fn sample_node() -> Node {
280 fn sample_node() -> Node {
277 let mut data = [0; NODE_BYTES_LENGTH];
281 let mut data = [0; NODE_BYTES_LENGTH];
278 data.copy_from_slice(&[
282 data.copy_from_slice(&[
279 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
283 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
280 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
284 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
281 ]);
285 ]);
282 data.into()
286 data.into()
283 }
287 }
284
288
285 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
289 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
286 ///
290 ///
287 /// The padding is made with zeros
291 /// The padding is made with zeros
288 pub fn hex_pad_right(hex: &str) -> String {
292 pub fn hex_pad_right(hex: &str) -> String {
289 let mut res = hex.to_string();
293 let mut res = hex.to_string();
290 while res.len() < NODE_NYBBLES_LENGTH {
294 while res.len() < NODE_NYBBLES_LENGTH {
291 res.push('0');
295 res.push('0');
292 }
296 }
293 res
297 res
294 }
298 }
295
299
296 fn sample_node_hex() -> String {
300 fn sample_node_hex() -> String {
297 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
301 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
298 }
302 }
299
303
300 #[test]
304 #[test]
301 fn test_node_from_hex() {
305 fn test_node_from_hex() {
302 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
306 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
303
307
304 let mut short = hex_pad_right("0123");
308 let mut short = hex_pad_right("0123");
305 short.pop();
309 short.pop();
306 short.pop();
310 short.pop();
307 assert_eq!(
311 assert_eq!(
308 Node::from_hex(&short),
312 Node::from_hex(&short),
309 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
313 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
310 );
314 );
311
315
312 let not_hex = hex_pad_right("012... oops");
316 let not_hex = hex_pad_right("012... oops");
313 assert_eq!(
317 assert_eq!(
314 Node::from_hex(&not_hex),
318 Node::from_hex(&not_hex),
315 Err(NodeError::HexError(
319 Err(NodeError::HexError(
316 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
320 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
317 not_hex,
321 not_hex,
318 )),
322 )),
319 );
323 );
320 }
324 }
321
325
322 #[test]
326 #[test]
323 fn test_node_encode_hex() {
327 fn test_node_encode_hex() {
324 assert_eq!(sample_node().encode_hex(), sample_node_hex());
328 assert_eq!(sample_node().encode_hex(), sample_node_hex());
325 }
329 }
326
330
327 #[test]
331 #[test]
328 fn test_prefix_from_hex() -> Result<(), NodeError> {
332 fn test_prefix_from_hex() -> Result<(), NodeError> {
329 assert_eq!(
333 assert_eq!(
330 NodePrefix::from_hex("0e1")?,
334 NodePrefix::from_hex("0e1")?,
331 NodePrefix {
335 NodePrefix {
332 buf: vec![14, 16],
336 buf: vec![14, 16],
333 is_odd: true
337 is_odd: true
334 }
338 }
335 );
339 );
336 assert_eq!(
340 assert_eq!(
337 NodePrefix::from_hex("0e1a")?,
341 NodePrefix::from_hex("0e1a")?,
338 NodePrefix {
342 NodePrefix {
339 buf: vec![14, 26],
343 buf: vec![14, 26],
340 is_odd: false
344 is_odd: false
341 }
345 }
342 );
346 );
343
347
344 // checking limit case
348 // checking limit case
345 let node_as_vec = sample_node().data.iter().cloned().collect();
349 let node_as_vec = sample_node().data.iter().cloned().collect();
346 assert_eq!(
350 assert_eq!(
347 NodePrefix::from_hex(sample_node_hex())?,
351 NodePrefix::from_hex(sample_node_hex())?,
348 NodePrefix {
352 NodePrefix {
349 buf: node_as_vec,
353 buf: node_as_vec,
350 is_odd: false
354 is_odd: false
351 }
355 }
352 );
356 );
353
357
354 Ok(())
358 Ok(())
355 }
359 }
356
360
357 #[test]
361 #[test]
358 fn test_prefix_from_hex_errors() {
362 fn test_prefix_from_hex_errors() {
359 assert_eq!(
363 assert_eq!(
360 NodePrefix::from_hex("testgr"),
364 NodePrefix::from_hex("testgr"),
361 Err(NodeError::HexError(
365 Err(NodeError::HexError(
362 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
366 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
363 "testgr".to_string()
367 "testgr".to_string()
364 ))
368 ))
365 );
369 );
366 let mut long = NULL_NODE.encode_hex();
370 let mut long = NULL_NODE.encode_hex();
367 long.push('c');
371 long.push('c');
368 match NodePrefix::from_hex(&long)
372 match NodePrefix::from_hex(&long)
369 .expect_err("should be refused as too long")
373 .expect_err("should be refused as too long")
370 {
374 {
371 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
375 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
372 err => panic!(format!("Should have been TooLong, got {:?}", err)),
376 err => panic!(format!("Should have been TooLong, got {:?}", err)),
373 }
377 }
374 }
378 }
375
379
376 #[test]
380 #[test]
377 fn test_is_prefix_of() -> Result<(), NodeError> {
381 fn test_is_prefix_of() -> Result<(), NodeError> {
378 let mut node_data = [0; NODE_BYTES_LENGTH];
382 let mut node_data = [0; NODE_BYTES_LENGTH];
379 node_data[0] = 0x12;
383 node_data[0] = 0x12;
380 node_data[1] = 0xca;
384 node_data[1] = 0xca;
381 let node = Node::from(node_data);
385 let node = Node::from(node_data);
382 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
386 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
383 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
387 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
384 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
388 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
385 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
389 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
386 Ok(())
390 Ok(())
387 }
391 }
388
392
389 #[test]
393 #[test]
390 fn test_get_nybble() -> Result<(), NodeError> {
394 fn test_get_nybble() -> Result<(), NodeError> {
391 let prefix = NodePrefix::from_hex("dead6789cafe")?;
395 let prefix = NodePrefix::from_hex("dead6789cafe")?;
392 assert_eq!(prefix.borrow().get_nybble(0), 13);
396 assert_eq!(prefix.borrow().get_nybble(0), 13);
393 assert_eq!(prefix.borrow().get_nybble(7), 9);
397 assert_eq!(prefix.borrow().get_nybble(7), 9);
394 Ok(())
398 Ok(())
395 }
399 }
396
400
397 #[test]
401 #[test]
398 fn test_first_different_nybble_even_prefix() {
402 fn test_first_different_nybble_even_prefix() {
399 let prefix = NodePrefix::from_hex("12ca").unwrap();
403 let prefix = NodePrefix::from_hex("12ca").unwrap();
400 let prefref = prefix.borrow();
404 let prefref = prefix.borrow();
401 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
405 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
402 assert_eq!(prefref.first_different_nybble(&node), Some(0));
406 assert_eq!(prefref.first_different_nybble(&node), Some(0));
403 node.data[0] = 0x13;
407 node.data[0] = 0x13;
404 assert_eq!(prefref.first_different_nybble(&node), Some(1));
408 assert_eq!(prefref.first_different_nybble(&node), Some(1));
405 node.data[0] = 0x12;
409 node.data[0] = 0x12;
406 assert_eq!(prefref.first_different_nybble(&node), Some(2));
410 assert_eq!(prefref.first_different_nybble(&node), Some(2));
407 node.data[1] = 0xca;
411 node.data[1] = 0xca;
408 // now it is a prefix
412 // now it is a prefix
409 assert_eq!(prefref.first_different_nybble(&node), None);
413 assert_eq!(prefref.first_different_nybble(&node), None);
410 }
414 }
411
415
412 #[test]
416 #[test]
413 fn test_first_different_nybble_odd_prefix() {
417 fn test_first_different_nybble_odd_prefix() {
414 let prefix = NodePrefix::from_hex("12c").unwrap();
418 let prefix = NodePrefix::from_hex("12c").unwrap();
415 let prefref = prefix.borrow();
419 let prefref = prefix.borrow();
416 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
420 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
417 assert_eq!(prefref.first_different_nybble(&node), Some(0));
421 assert_eq!(prefref.first_different_nybble(&node), Some(0));
418 node.data[0] = 0x13;
422 node.data[0] = 0x13;
419 assert_eq!(prefref.first_different_nybble(&node), Some(1));
423 assert_eq!(prefref.first_different_nybble(&node), Some(1));
420 node.data[0] = 0x12;
424 node.data[0] = 0x12;
421 assert_eq!(prefref.first_different_nybble(&node), Some(2));
425 assert_eq!(prefref.first_different_nybble(&node), Some(2));
422 node.data[1] = 0xca;
426 node.data[1] = 0xca;
423 // now it is a prefix
427 // now it is a prefix
424 assert_eq!(prefref.first_different_nybble(&node), None);
428 assert_eq!(prefref.first_different_nybble(&node), None);
425 }
429 }
426 }
430 }
427
431
428 #[cfg(test)]
432 #[cfg(test)]
429 pub use tests::hex_pad_right;
433 pub use tests::hex_pad_right;
@@ -1,1122 +1,1118
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 //!
7 //!
8 //! This provides a variation on the 16-ary radix tree that is
8 //! This provides a variation on the 16-ary radix tree that is
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 //! on disk.
10 //! on disk.
11 //!
11 //!
12 //! Following existing implicit conventions, the "nodemap" terminology
12 //! Following existing implicit conventions, the "nodemap" terminology
13 //! is used in a more abstract context.
13 //! is used in a more abstract context.
14
14
15 use super::{
15 use super::{
16 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
16 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
17 RevlogIndex, NULL_REVISION,
17 RevlogIndex, NULL_REVISION,
18 };
18 };
19
19
20 use std::cmp::max;
20 use std::cmp::max;
21 use std::fmt;
21 use std::fmt;
22 use std::mem;
22 use std::mem;
23 use std::ops::Deref;
23 use std::ops::Deref;
24 use std::ops::Index;
24 use std::ops::Index;
25 use std::slice;
25 use std::slice;
26
26
27 #[derive(Debug, PartialEq)]
27 #[derive(Debug, PartialEq)]
28 pub enum NodeMapError {
28 pub enum NodeMapError {
29 MultipleResults,
29 MultipleResults,
30 InvalidNodePrefix(NodeError),
30 InvalidNodePrefix(NodeError),
31 /// A `Revision` stored in the nodemap could not be found in the index
31 /// A `Revision` stored in the nodemap could not be found in the index
32 RevisionNotInIndex(Revision),
32 RevisionNotInIndex(Revision),
33 }
33 }
34
34
35 impl From<NodeError> for NodeMapError {
35 impl From<NodeError> for NodeMapError {
36 fn from(err: NodeError) -> Self {
36 fn from(err: NodeError) -> Self {
37 NodeMapError::InvalidNodePrefix(err)
37 NodeMapError::InvalidNodePrefix(err)
38 }
38 }
39 }
39 }
40
40
41 /// Mapping system from Mercurial nodes to revision numbers.
41 /// Mapping system from Mercurial nodes to revision numbers.
42 ///
42 ///
43 /// ## `RevlogIndex` and `NodeMap`
43 /// ## `RevlogIndex` and `NodeMap`
44 ///
44 ///
45 /// One way to think about their relationship is that
45 /// One way to think about their relationship is that
46 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
46 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
47 /// carried by a [`RevlogIndex`].
47 /// carried by a [`RevlogIndex`].
48 ///
48 ///
49 /// Many of the methods in this trait take a `RevlogIndex` argument
49 /// Many of the methods in this trait take a `RevlogIndex` argument
50 /// which is used for validation of their results. This index must naturally
50 /// which is used for validation of their results. This index must naturally
51 /// be the one the `NodeMap` is about, and it must be consistent.
51 /// be the one the `NodeMap` is about, and it must be consistent.
52 ///
52 ///
53 /// Notably, the `NodeMap` must not store
53 /// Notably, the `NodeMap` must not store
54 /// information about more `Revision` values than there are in the index.
54 /// information about more `Revision` values than there are in the index.
55 /// In these methods, an encountered `Revision` is not in the index, a
55 /// In these methods, an encountered `Revision` is not in the index, a
56 /// [`RevisionNotInIndex`] error is returned.
56 /// [`RevisionNotInIndex`] error is returned.
57 ///
57 ///
58 /// In insert operations, the rule is thus that the `NodeMap` must always
58 /// In insert operations, the rule is thus that the `NodeMap` must always
59 /// be updated after the `RevlogIndex`
59 /// be updated after the `RevlogIndex`
60 /// be updated first, and the `NodeMap` second.
60 /// be updated first, and the `NodeMap` second.
61 ///
61 ///
62 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
62 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
63 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
63 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
64 pub trait NodeMap {
64 pub trait NodeMap {
65 /// Find the unique `Revision` having the given `Node`
65 /// Find the unique `Revision` having the given `Node`
66 ///
66 ///
67 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
67 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
68 fn find_node(
68 fn find_node(
69 &self,
69 &self,
70 index: &impl RevlogIndex,
70 index: &impl RevlogIndex,
71 node: &Node,
71 node: &Node,
72 ) -> Result<Option<Revision>, NodeMapError> {
72 ) -> Result<Option<Revision>, NodeMapError> {
73 self.find_bin(index, node.into())
73 self.find_bin(index, node.into())
74 }
74 }
75
75
76 /// Find the unique Revision whose `Node` starts with a given binary prefix
76 /// Find the unique Revision whose `Node` starts with a given binary prefix
77 ///
77 ///
78 /// If no Revision matches the given prefix, `Ok(None)` is returned.
78 /// If no Revision matches the given prefix, `Ok(None)` is returned.
79 ///
79 ///
80 /// If several Revisions match the given prefix, a [`MultipleResults`]
80 /// If several Revisions match the given prefix, a [`MultipleResults`]
81 /// error is returned.
81 /// error is returned.
82 fn find_bin<'a>(
82 fn find_bin<'a>(
83 &self,
83 &self,
84 idx: &impl RevlogIndex,
84 idx: &impl RevlogIndex,
85 prefix: NodePrefixRef<'a>,
85 prefix: NodePrefixRef<'a>,
86 ) -> Result<Option<Revision>, NodeMapError>;
86 ) -> Result<Option<Revision>, NodeMapError>;
87
87
88 /// Find the unique Revision whose `Node` hexadecimal string representation
88 /// Find the unique Revision whose `Node` hexadecimal string representation
89 /// starts with a given prefix
89 /// starts with a given prefix
90 ///
90 ///
91 /// If no Revision matches the given prefix, `Ok(None)` is returned.
91 /// If no Revision matches the given prefix, `Ok(None)` is returned.
92 ///
92 ///
93 /// If several Revisions match the given prefix, a [`MultipleResults`]
93 /// If several Revisions match the given prefix, a [`MultipleResults`]
94 /// error is returned.
94 /// error is returned.
95 fn find_hex(
95 fn find_hex(
96 &self,
96 &self,
97 idx: &impl RevlogIndex,
97 idx: &impl RevlogIndex,
98 prefix: &str,
98 prefix: &str,
99 ) -> Result<Option<Revision>, NodeMapError> {
99 ) -> Result<Option<Revision>, NodeMapError> {
100 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
100 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
101 }
101 }
102
102
103 /// Give the size of the shortest node prefix that determines
103 /// Give the size of the shortest node prefix that determines
104 /// the revision uniquely.
104 /// the revision uniquely.
105 ///
105 ///
106 /// From a binary node prefix, if it is matched in the node map, this
106 /// From a binary node prefix, if it is matched in the node map, this
107 /// returns the number of hexadecimal digits that would had sufficed
107 /// returns the number of hexadecimal digits that would had sufficed
108 /// to find the revision uniquely.
108 /// to find the revision uniquely.
109 ///
109 ///
110 /// Returns `None` if no `Revision` could be found for the prefix.
110 /// Returns `None` if no `Revision` could be found for the prefix.
111 ///
111 ///
112 /// If several Revisions match the given prefix, a [`MultipleResults`]
112 /// If several Revisions match the given prefix, a [`MultipleResults`]
113 /// error is returned.
113 /// error is returned.
114 fn unique_prefix_len_bin<'a>(
114 fn unique_prefix_len_bin<'a>(
115 &self,
115 &self,
116 idx: &impl RevlogIndex,
116 idx: &impl RevlogIndex,
117 node_prefix: NodePrefixRef<'a>,
117 node_prefix: NodePrefixRef<'a>,
118 ) -> Result<Option<usize>, NodeMapError>;
118 ) -> Result<Option<usize>, NodeMapError>;
119
119
120 /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
120 /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
121 /// of the prefix as input.
121 /// of the prefix as input.
122 fn unique_prefix_len_hex(
122 fn unique_prefix_len_hex(
123 &self,
123 &self,
124 idx: &impl RevlogIndex,
124 idx: &impl RevlogIndex,
125 prefix: &str,
125 prefix: &str,
126 ) -> Result<Option<usize>, NodeMapError> {
126 ) -> Result<Option<usize>, NodeMapError> {
127 self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
127 self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
128 }
128 }
129
129
130 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
130 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
131 fn unique_prefix_len_node(
131 fn unique_prefix_len_node(
132 &self,
132 &self,
133 idx: &impl RevlogIndex,
133 idx: &impl RevlogIndex,
134 node: &Node,
134 node: &Node,
135 ) -> Result<Option<usize>, NodeMapError> {
135 ) -> Result<Option<usize>, NodeMapError> {
136 self.unique_prefix_len_bin(idx, node.into())
136 self.unique_prefix_len_bin(idx, node.into())
137 }
137 }
138 }
138 }
139
139
140 pub trait MutableNodeMap: NodeMap {
140 pub trait MutableNodeMap: NodeMap {
141 fn insert<I: RevlogIndex>(
141 fn insert<I: RevlogIndex>(
142 &mut self,
142 &mut self,
143 index: &I,
143 index: &I,
144 node: &Node,
144 node: &Node,
145 rev: Revision,
145 rev: Revision,
146 ) -> Result<(), NodeMapError>;
146 ) -> Result<(), NodeMapError>;
147 }
147 }
148
148
149 /// Low level NodeTree [`Blocks`] elements
149 /// Low level NodeTree [`Blocks`] elements
150 ///
150 ///
151 /// These are exactly as for instance on persistent storage.
151 /// These are exactly as for instance on persistent storage.
152 type RawElement = i32;
152 type RawElement = i32;
153
153
154 /// High level representation of values in NodeTree
154 /// High level representation of values in NodeTree
155 /// [`Blocks`](struct.Block.html)
155 /// [`Blocks`](struct.Block.html)
156 ///
156 ///
157 /// This is the high level representation that most algorithms should
157 /// This is the high level representation that most algorithms should
158 /// use.
158 /// use.
159 #[derive(Clone, Debug, Eq, PartialEq)]
159 #[derive(Clone, Debug, Eq, PartialEq)]
160 enum Element {
160 enum Element {
161 Rev(Revision),
161 Rev(Revision),
162 Block(usize),
162 Block(usize),
163 None,
163 None,
164 }
164 }
165
165
166 impl From<RawElement> for Element {
166 impl From<RawElement> for Element {
167 /// Conversion from low level representation, after endianness conversion.
167 /// Conversion from low level representation, after endianness conversion.
168 ///
168 ///
169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
170 fn from(raw: RawElement) -> Element {
170 fn from(raw: RawElement) -> Element {
171 if raw >= 0 {
171 if raw >= 0 {
172 Element::Block(raw as usize)
172 Element::Block(raw as usize)
173 } else if raw == -1 {
173 } else if raw == -1 {
174 Element::None
174 Element::None
175 } else {
175 } else {
176 Element::Rev(-raw - 2)
176 Element::Rev(-raw - 2)
177 }
177 }
178 }
178 }
179 }
179 }
180
180
181 impl From<Element> for RawElement {
181 impl From<Element> for RawElement {
182 fn from(element: Element) -> RawElement {
182 fn from(element: Element) -> RawElement {
183 match element {
183 match element {
184 Element::None => 0,
184 Element::None => 0,
185 Element::Block(i) => i as RawElement,
185 Element::Block(i) => i as RawElement,
186 Element::Rev(rev) => -rev - 2,
186 Element::Rev(rev) => -rev - 2,
187 }
187 }
188 }
188 }
189 }
189 }
190
190
191 /// A logical block of the `NodeTree`, packed with a fixed size.
191 /// A logical block of the `NodeTree`, packed with a fixed size.
192 ///
192 ///
193 /// These are always used in container types implementing `Index<Block>`,
193 /// These are always used in container types implementing `Index<Block>`,
194 /// such as `&Block`
194 /// such as `&Block`
195 ///
195 ///
196 /// As an array of integers, its ith element encodes that the
196 /// As an array of integers, its ith element encodes that the
197 /// ith potential edge from the block, representing the ith hexadecimal digit
197 /// ith potential edge from the block, representing the ith hexadecimal digit
198 /// (nybble) `i` is either:
198 /// (nybble) `i` is either:
199 ///
199 ///
200 /// - absent (value -1)
200 /// - absent (value -1)
201 /// - another `Block` in the same indexable container (value ≥ 0)
201 /// - another `Block` in the same indexable container (value ≥ 0)
202 /// - a `Revision` leaf (value ≤ -2)
202 /// - a `Revision` leaf (value ≤ -2)
203 ///
203 ///
204 /// Endianness has to be fixed for consistency on shared storage across
204 /// Endianness has to be fixed for consistency on shared storage across
205 /// different architectures.
205 /// different architectures.
206 ///
206 ///
207 /// A key difference with the C `nodetree` is that we need to be
207 /// A key difference with the C `nodetree` is that we need to be
208 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
208 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
209 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
209 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
210 ///
210 ///
211 /// Another related difference is that `NULL_REVISION` (-1) is not
211 /// Another related difference is that `NULL_REVISION` (-1) is not
212 /// represented at all, because we want an immutable empty nodetree
212 /// represented at all, because we want an immutable empty nodetree
213 /// to be valid.
213 /// to be valid.
214
214
215 #[derive(Copy, Clone)]
215 #[derive(Copy, Clone)]
216 pub struct Block([u8; BLOCK_SIZE]);
216 pub struct Block([u8; BLOCK_SIZE]);
217
217
218 /// Not derivable for arrays of length >32 until const generics are stable
218 /// Not derivable for arrays of length >32 until const generics are stable
219 impl PartialEq for Block {
219 impl PartialEq for Block {
220 fn eq(&self, other: &Self) -> bool {
220 fn eq(&self, other: &Self) -> bool {
221 &self.0[..] == &other.0[..]
221 self.0[..] == other.0[..]
222 }
222 }
223 }
223 }
224
224
225 pub const BLOCK_SIZE: usize = 64;
225 pub const BLOCK_SIZE: usize = 64;
226
226
227 impl Block {
227 impl Block {
228 fn new() -> Self {
228 fn new() -> Self {
229 // -1 in 2's complement to create an absent node
229 // -1 in 2's complement to create an absent node
230 let byte: u8 = 255;
230 let byte: u8 = 255;
231 Block([byte; BLOCK_SIZE])
231 Block([byte; BLOCK_SIZE])
232 }
232 }
233
233
234 fn get(&self, nybble: u8) -> Element {
234 fn get(&self, nybble: u8) -> Element {
235 let index = nybble as usize * mem::size_of::<RawElement>();
235 let index = nybble as usize * mem::size_of::<RawElement>();
236 Element::from(RawElement::from_be_bytes([
236 Element::from(RawElement::from_be_bytes([
237 self.0[index],
237 self.0[index],
238 self.0[index + 1],
238 self.0[index + 1],
239 self.0[index + 2],
239 self.0[index + 2],
240 self.0[index + 3],
240 self.0[index + 3],
241 ]))
241 ]))
242 }
242 }
243
243
244 fn set(&mut self, nybble: u8, element: Element) {
244 fn set(&mut self, nybble: u8, element: Element) {
245 let values = RawElement::to_be_bytes(element.into());
245 let values = RawElement::to_be_bytes(element.into());
246 let index = nybble as usize * mem::size_of::<RawElement>();
246 let index = nybble as usize * mem::size_of::<RawElement>();
247 self.0[index] = values[0];
247 self.0[index] = values[0];
248 self.0[index + 1] = values[1];
248 self.0[index + 1] = values[1];
249 self.0[index + 2] = values[2];
249 self.0[index + 2] = values[2];
250 self.0[index + 3] = values[3];
250 self.0[index + 3] = values[3];
251 }
251 }
252 }
252 }
253
253
254 impl fmt::Debug for Block {
254 impl fmt::Debug for Block {
255 /// sparse representation for testing and debugging purposes
255 /// sparse representation for testing and debugging purposes
256 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
256 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
257 f.debug_map()
257 f.debug_map()
258 .entries((0..16).filter_map(|i| match self.get(i) {
258 .entries((0..16).filter_map(|i| match self.get(i) {
259 Element::None => None,
259 Element::None => None,
260 element => Some((i, element)),
260 element => Some((i, element)),
261 }))
261 }))
262 .finish()
262 .finish()
263 }
263 }
264 }
264 }
265
265
266 /// A mutable 16-radix tree with the root block logically at the end
266 /// A mutable 16-radix tree with the root block logically at the end
267 ///
267 ///
268 /// Because of the append only nature of our node trees, we need to
268 /// Because of the append only nature of our node trees, we need to
269 /// keep the original untouched and store new blocks separately.
269 /// keep the original untouched and store new blocks separately.
270 ///
270 ///
271 /// The mutable root `Block` is kept apart so that we don't have to rebump
271 /// The mutable root `Block` is kept apart so that we don't have to rebump
272 /// it on each insertion.
272 /// it on each insertion.
273 pub struct NodeTree {
273 pub struct NodeTree {
274 readonly: Box<dyn Deref<Target = [Block]> + Send>,
274 readonly: Box<dyn Deref<Target = [Block]> + Send>,
275 growable: Vec<Block>,
275 growable: Vec<Block>,
276 root: Block,
276 root: Block,
277 masked_inner_blocks: usize,
277 masked_inner_blocks: usize,
278 }
278 }
279
279
280 impl Index<usize> for NodeTree {
280 impl Index<usize> for NodeTree {
281 type Output = Block;
281 type Output = Block;
282
282
283 fn index(&self, i: usize) -> &Block {
283 fn index(&self, i: usize) -> &Block {
284 let ro_len = self.readonly.len();
284 let ro_len = self.readonly.len();
285 if i < ro_len {
285 if i < ro_len {
286 &self.readonly[i]
286 &self.readonly[i]
287 } else if i == ro_len + self.growable.len() {
287 } else if i == ro_len + self.growable.len() {
288 &self.root
288 &self.root
289 } else {
289 } else {
290 &self.growable[i - ro_len]
290 &self.growable[i - ro_len]
291 }
291 }
292 }
292 }
293 }
293 }
294
294
295 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
295 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
296 fn has_prefix_or_none(
296 fn has_prefix_or_none(
297 idx: &impl RevlogIndex,
297 idx: &impl RevlogIndex,
298 prefix: NodePrefixRef,
298 prefix: NodePrefixRef,
299 rev: Revision,
299 rev: Revision,
300 ) -> Result<Option<Revision>, NodeMapError> {
300 ) -> Result<Option<Revision>, NodeMapError> {
301 idx.node(rev)
301 idx.node(rev)
302 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
302 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
303 .map(|node| {
303 .map(|node| {
304 if prefix.is_prefix_of(node) {
304 if prefix.is_prefix_of(node) {
305 Some(rev)
305 Some(rev)
306 } else {
306 } else {
307 None
307 None
308 }
308 }
309 })
309 })
310 }
310 }
311
311
312 /// validate that the candidate's node starts indeed with given prefix,
312 /// validate that the candidate's node starts indeed with given prefix,
313 /// and treat ambiguities related to `NULL_REVISION`.
313 /// and treat ambiguities related to `NULL_REVISION`.
314 ///
314 ///
315 /// From the data in the NodeTree, one can only conclude that some
315 /// From the data in the NodeTree, one can only conclude that some
316 /// revision is the only one for a *subprefix* of the one being looked up.
316 /// revision is the only one for a *subprefix* of the one being looked up.
317 fn validate_candidate(
317 fn validate_candidate(
318 idx: &impl RevlogIndex,
318 idx: &impl RevlogIndex,
319 prefix: NodePrefixRef,
319 prefix: NodePrefixRef,
320 candidate: (Option<Revision>, usize),
320 candidate: (Option<Revision>, usize),
321 ) -> Result<(Option<Revision>, usize), NodeMapError> {
321 ) -> Result<(Option<Revision>, usize), NodeMapError> {
322 let (rev, steps) = candidate;
322 let (rev, steps) = candidate;
323 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
323 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
324 rev.map_or(Ok((None, steps)), |r| {
324 rev.map_or(Ok((None, steps)), |r| {
325 has_prefix_or_none(idx, prefix, r)
325 has_prefix_or_none(idx, prefix, r)
326 .map(|opt| (opt, max(steps, nz_nybble + 1)))
326 .map(|opt| (opt, max(steps, nz_nybble + 1)))
327 })
327 })
328 } else {
328 } else {
329 // the prefix is only made of zeros; NULL_REVISION always matches it
329 // the prefix is only made of zeros; NULL_REVISION always matches it
330 // and any other *valid* result is an ambiguity
330 // and any other *valid* result is an ambiguity
331 match rev {
331 match rev {
332 None => Ok((Some(NULL_REVISION), steps + 1)),
332 None => Ok((Some(NULL_REVISION), steps + 1)),
333 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
333 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
334 None => Ok((Some(NULL_REVISION), steps + 1)),
334 None => Ok((Some(NULL_REVISION), steps + 1)),
335 _ => Err(NodeMapError::MultipleResults),
335 _ => Err(NodeMapError::MultipleResults),
336 },
336 },
337 }
337 }
338 }
338 }
339 }
339 }
340
340
341 impl NodeTree {
341 impl NodeTree {
342 /// Initiate a NodeTree from an immutable slice-like of `Block`
342 /// Initiate a NodeTree from an immutable slice-like of `Block`
343 ///
343 ///
344 /// We keep `readonly` and clone its root block if it isn't empty.
344 /// We keep `readonly` and clone its root block if it isn't empty.
345 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
345 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
346 let root = readonly
346 let root = readonly.last().cloned().unwrap_or_else(Block::new);
347 .last()
348 .map(|b| b.clone())
349 .unwrap_or_else(|| Block::new());
350 NodeTree {
347 NodeTree {
351 readonly: readonly,
348 readonly,
352 growable: Vec::new(),
349 growable: Vec::new(),
353 root: root,
350 root,
354 masked_inner_blocks: 0,
351 masked_inner_blocks: 0,
355 }
352 }
356 }
353 }
357
354
358 /// Create from an opaque bunch of bytes
355 /// Create from an opaque bunch of bytes
359 ///
356 ///
360 /// The created `NodeTreeBytes` from `buffer`,
357 /// The created `NodeTreeBytes` from `buffer`,
361 /// of which exactly `amount` bytes are used.
358 /// of which exactly `amount` bytes are used.
362 ///
359 ///
363 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
360 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
364 /// - `offset` allows for the final file format to include fixed data
361 /// - `offset` allows for the final file format to include fixed data
365 /// (generation number, behavioural flags)
362 /// (generation number, behavioural flags)
366 /// - `amount` is expressed in bytes, and is not automatically derived from
363 /// - `amount` is expressed in bytes, and is not automatically derived from
367 /// `bytes`, so that a caller that manages them atomically can perform
364 /// `bytes`, so that a caller that manages them atomically can perform
368 /// temporary disk serializations and still rollback easily if needed.
365 /// temporary disk serializations and still rollback easily if needed.
369 /// First use-case for this would be to support Mercurial shell hooks.
366 /// First use-case for this would be to support Mercurial shell hooks.
370 ///
367 ///
371 /// panics if `buffer` is smaller than `amount`
368 /// panics if `buffer` is smaller than `amount`
372 pub fn load_bytes(
369 pub fn load_bytes(
373 bytes: Box<dyn Deref<Target = [u8]> + Send>,
370 bytes: Box<dyn Deref<Target = [u8]> + Send>,
374 amount: usize,
371 amount: usize,
375 ) -> Self {
372 ) -> Self {
376 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
373 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
377 }
374 }
378
375
379 /// Retrieve added `Block` and the original immutable data
376 /// Retrieve added `Block` and the original immutable data
380 pub fn into_readonly_and_added(
377 pub fn into_readonly_and_added(
381 self,
378 self,
382 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
379 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
383 let mut vec = self.growable;
380 let mut vec = self.growable;
384 let readonly = self.readonly;
381 let readonly = self.readonly;
385 if readonly.last() != Some(&self.root) {
382 if readonly.last() != Some(&self.root) {
386 vec.push(self.root);
383 vec.push(self.root);
387 }
384 }
388 (readonly, vec)
385 (readonly, vec)
389 }
386 }
390
387
391 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
388 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
392 /// storage
389 /// storage
393 pub fn into_readonly_and_added_bytes(
390 pub fn into_readonly_and_added_bytes(
394 self,
391 self,
395 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
392 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
396 let (readonly, vec) = self.into_readonly_and_added();
393 let (readonly, vec) = self.into_readonly_and_added();
397 // Prevent running `v`'s destructor so we are in complete control
394 // Prevent running `v`'s destructor so we are in complete control
398 // of the allocation.
395 // of the allocation.
399 let vec = mem::ManuallyDrop::new(vec);
396 let vec = mem::ManuallyDrop::new(vec);
400
397
401 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
398 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
402 // bytes, so this is perfectly safe.
399 // bytes, so this is perfectly safe.
403 let bytes = unsafe {
400 let bytes = unsafe {
404 // Assert that `Block` hasn't been changed and has no padding
401 // Assert that `Block` hasn't been changed and has no padding
405 let _: [u8; 4 * BLOCK_SIZE] =
402 let _: [u8; 4 * BLOCK_SIZE] =
406 std::mem::transmute([Block::new(); 4]);
403 std::mem::transmute([Block::new(); 4]);
407
404
408 // /!\ Any use of `vec` after this is use-after-free.
405 // /!\ Any use of `vec` after this is use-after-free.
409 // TODO: use `into_raw_parts` once stabilized
406 // TODO: use `into_raw_parts` once stabilized
410 Vec::from_raw_parts(
407 Vec::from_raw_parts(
411 vec.as_ptr() as *mut u8,
408 vec.as_ptr() as *mut u8,
412 vec.len() * BLOCK_SIZE,
409 vec.len() * BLOCK_SIZE,
413 vec.capacity() * BLOCK_SIZE,
410 vec.capacity() * BLOCK_SIZE,
414 )
411 )
415 };
412 };
416 (readonly, bytes)
413 (readonly, bytes)
417 }
414 }
418
415
419 /// Total number of blocks
416 /// Total number of blocks
420 fn len(&self) -> usize {
417 fn len(&self) -> usize {
421 self.readonly.len() + self.growable.len() + 1
418 self.readonly.len() + self.growable.len() + 1
422 }
419 }
423
420
424 /// Implemented for completeness
421 /// Implemented for completeness
425 ///
422 ///
426 /// A `NodeTree` always has at least the mutable root block.
423 /// A `NodeTree` always has at least the mutable root block.
427 #[allow(dead_code)]
424 #[allow(dead_code)]
428 fn is_empty(&self) -> bool {
425 fn is_empty(&self) -> bool {
429 false
426 false
430 }
427 }
431
428
432 /// Main working method for `NodeTree` searches
429 /// Main working method for `NodeTree` searches
433 ///
430 ///
434 /// The first returned value is the result of analysing `NodeTree` data
431 /// The first returned value is the result of analysing `NodeTree` data
435 /// *alone*: whereas `None` guarantees that the given prefix is absent
432 /// *alone*: whereas `None` guarantees that the given prefix is absent
436 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
433 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
437 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
434 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
438 /// that could match the prefix. Actually, all that can be inferred from
435 /// that could match the prefix. Actually, all that can be inferred from
439 /// the `NodeTree` data is that `rev` is the revision with the longest
436 /// the `NodeTree` data is that `rev` is the revision with the longest
440 /// common node prefix with the given prefix.
437 /// common node prefix with the given prefix.
441 ///
438 ///
442 /// The second returned value is the size of the smallest subprefix
439 /// The second returned value is the size of the smallest subprefix
443 /// of `prefix` that would give the same result, i.e. not the
440 /// of `prefix` that would give the same result, i.e. not the
444 /// `MultipleResults` error variant (again, using only the data of the
441 /// `MultipleResults` error variant (again, using only the data of the
445 /// `NodeTree`).
442 /// `NodeTree`).
446 fn lookup(
443 fn lookup(
447 &self,
444 &self,
448 prefix: NodePrefixRef,
445 prefix: NodePrefixRef,
449 ) -> Result<(Option<Revision>, usize), NodeMapError> {
446 ) -> Result<(Option<Revision>, usize), NodeMapError> {
450 for (i, visit_item) in self.visit(prefix).enumerate() {
447 for (i, visit_item) in self.visit(prefix).enumerate() {
451 if let Some(opt) = visit_item.final_revision() {
448 if let Some(opt) = visit_item.final_revision() {
452 return Ok((opt, i + 1));
449 return Ok((opt, i + 1));
453 }
450 }
454 }
451 }
455 Err(NodeMapError::MultipleResults)
452 Err(NodeMapError::MultipleResults)
456 }
453 }
457
454
458 fn visit<'n, 'p>(
455 fn visit<'n, 'p>(
459 &'n self,
456 &'n self,
460 prefix: NodePrefixRef<'p>,
457 prefix: NodePrefixRef<'p>,
461 ) -> NodeTreeVisitor<'n, 'p> {
458 ) -> NodeTreeVisitor<'n, 'p> {
462 NodeTreeVisitor {
459 NodeTreeVisitor {
463 nt: self,
460 nt: self,
464 prefix: prefix,
461 prefix,
465 visit: self.len() - 1,
462 visit: self.len() - 1,
466 nybble_idx: 0,
463 nybble_idx: 0,
467 done: false,
464 done: false,
468 }
465 }
469 }
466 }
470 /// Return a mutable reference for `Block` at index `idx`.
467 /// Return a mutable reference for `Block` at index `idx`.
471 ///
468 ///
472 /// If `idx` lies in the immutable area, then the reference is to
469 /// If `idx` lies in the immutable area, then the reference is to
473 /// a newly appended copy.
470 /// a newly appended copy.
474 ///
471 ///
475 /// Returns (new_idx, glen, mut_ref) where
472 /// Returns (new_idx, glen, mut_ref) where
476 ///
473 ///
477 /// - `new_idx` is the index of the mutable `Block`
474 /// - `new_idx` is the index of the mutable `Block`
478 /// - `mut_ref` is a mutable reference to the mutable Block.
475 /// - `mut_ref` is a mutable reference to the mutable Block.
479 /// - `glen` is the new length of `self.growable`
476 /// - `glen` is the new length of `self.growable`
480 ///
477 ///
481 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
478 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
482 /// itself because of the mutable borrow taken with the returned `Block`
479 /// itself because of the mutable borrow taken with the returned `Block`
483 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
480 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
484 let ro_blocks = &self.readonly;
481 let ro_blocks = &self.readonly;
485 let ro_len = ro_blocks.len();
482 let ro_len = ro_blocks.len();
486 let glen = self.growable.len();
483 let glen = self.growable.len();
487 if idx < ro_len {
484 if idx < ro_len {
488 self.masked_inner_blocks += 1;
485 self.masked_inner_blocks += 1;
489 // TODO OPTIM I think this makes two copies
486 self.growable.push(ro_blocks[idx]);
490 self.growable.push(ro_blocks[idx].clone());
491 (glen + ro_len, &mut self.growable[glen], glen + 1)
487 (glen + ro_len, &mut self.growable[glen], glen + 1)
492 } else if glen + ro_len == idx {
488 } else if glen + ro_len == idx {
493 (idx, &mut self.root, glen)
489 (idx, &mut self.root, glen)
494 } else {
490 } else {
495 (idx, &mut self.growable[idx - ro_len], glen)
491 (idx, &mut self.growable[idx - ro_len], glen)
496 }
492 }
497 }
493 }
498
494
499 /// Main insertion method
495 /// Main insertion method
500 ///
496 ///
501 /// This will dive in the node tree to find the deepest `Block` for
497 /// This will dive in the node tree to find the deepest `Block` for
502 /// `node`, split it as much as needed and record `node` in there.
498 /// `node`, split it as much as needed and record `node` in there.
503 /// The method then backtracks, updating references in all the visited
499 /// The method then backtracks, updating references in all the visited
504 /// blocks from the root.
500 /// blocks from the root.
505 ///
501 ///
506 /// All the mutated `Block` are copied first to the growable part if
502 /// All the mutated `Block` are copied first to the growable part if
507 /// needed. That happens for those in the immutable part except the root.
503 /// needed. That happens for those in the immutable part except the root.
508 pub fn insert<I: RevlogIndex>(
504 pub fn insert<I: RevlogIndex>(
509 &mut self,
505 &mut self,
510 index: &I,
506 index: &I,
511 node: &Node,
507 node: &Node,
512 rev: Revision,
508 rev: Revision,
513 ) -> Result<(), NodeMapError> {
509 ) -> Result<(), NodeMapError> {
514 let ro_len = &self.readonly.len();
510 let ro_len = &self.readonly.len();
515
511
516 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
512 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
517 let read_nybbles = visit_steps.len();
513 let read_nybbles = visit_steps.len();
518 // visit_steps cannot be empty, since we always visit the root block
514 // visit_steps cannot be empty, since we always visit the root block
519 let deepest = visit_steps.pop().unwrap();
515 let deepest = visit_steps.pop().unwrap();
520
516
521 let (mut block_idx, mut block, mut glen) =
517 let (mut block_idx, mut block, mut glen) =
522 self.mutable_block(deepest.block_idx);
518 self.mutable_block(deepest.block_idx);
523
519
524 if let Element::Rev(old_rev) = deepest.element {
520 if let Element::Rev(old_rev) = deepest.element {
525 let old_node = index
521 let old_node = index
526 .node(old_rev)
522 .node(old_rev)
527 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
523 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
528 if old_node == node {
524 if old_node == node {
529 return Ok(()); // avoid creating lots of useless blocks
525 return Ok(()); // avoid creating lots of useless blocks
530 }
526 }
531
527
532 // Looping over the tail of nybbles in both nodes, creating
528 // Looping over the tail of nybbles in both nodes, creating
533 // new blocks until we find the difference
529 // new blocks until we find the difference
534 let mut new_block_idx = ro_len + glen;
530 let mut new_block_idx = ro_len + glen;
535 let mut nybble = deepest.nybble;
531 let mut nybble = deepest.nybble;
536 for nybble_pos in read_nybbles..node.nybbles_len() {
532 for nybble_pos in read_nybbles..node.nybbles_len() {
537 block.set(nybble, Element::Block(new_block_idx));
533 block.set(nybble, Element::Block(new_block_idx));
538
534
539 let new_nybble = node.get_nybble(nybble_pos);
535 let new_nybble = node.get_nybble(nybble_pos);
540 let old_nybble = old_node.get_nybble(nybble_pos);
536 let old_nybble = old_node.get_nybble(nybble_pos);
541
537
542 if old_nybble == new_nybble {
538 if old_nybble == new_nybble {
543 self.growable.push(Block::new());
539 self.growable.push(Block::new());
544 block = &mut self.growable[glen];
540 block = &mut self.growable[glen];
545 glen += 1;
541 glen += 1;
546 new_block_idx += 1;
542 new_block_idx += 1;
547 nybble = new_nybble;
543 nybble = new_nybble;
548 } else {
544 } else {
549 let mut new_block = Block::new();
545 let mut new_block = Block::new();
550 new_block.set(old_nybble, Element::Rev(old_rev));
546 new_block.set(old_nybble, Element::Rev(old_rev));
551 new_block.set(new_nybble, Element::Rev(rev));
547 new_block.set(new_nybble, Element::Rev(rev));
552 self.growable.push(new_block);
548 self.growable.push(new_block);
553 break;
549 break;
554 }
550 }
555 }
551 }
556 } else {
552 } else {
557 // Free slot in the deepest block: no splitting has to be done
553 // Free slot in the deepest block: no splitting has to be done
558 block.set(deepest.nybble, Element::Rev(rev));
554 block.set(deepest.nybble, Element::Rev(rev));
559 }
555 }
560
556
561 // Backtrack over visit steps to update references
557 // Backtrack over visit steps to update references
562 while let Some(visited) = visit_steps.pop() {
558 while let Some(visited) = visit_steps.pop() {
563 let to_write = Element::Block(block_idx);
559 let to_write = Element::Block(block_idx);
564 if visit_steps.is_empty() {
560 if visit_steps.is_empty() {
565 self.root.set(visited.nybble, to_write);
561 self.root.set(visited.nybble, to_write);
566 break;
562 break;
567 }
563 }
568 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
564 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
569 if block.get(visited.nybble) == to_write {
565 if block.get(visited.nybble) == to_write {
570 break;
566 break;
571 }
567 }
572 block.set(visited.nybble, to_write);
568 block.set(visited.nybble, to_write);
573 block_idx = new_idx;
569 block_idx = new_idx;
574 }
570 }
575 Ok(())
571 Ok(())
576 }
572 }
577
573
578 /// Make the whole `NodeTree` logically empty, without touching the
574 /// Make the whole `NodeTree` logically empty, without touching the
579 /// immutable part.
575 /// immutable part.
580 pub fn invalidate_all(&mut self) {
576 pub fn invalidate_all(&mut self) {
581 self.root = Block::new();
577 self.root = Block::new();
582 self.growable = Vec::new();
578 self.growable = Vec::new();
583 self.masked_inner_blocks = self.readonly.len();
579 self.masked_inner_blocks = self.readonly.len();
584 }
580 }
585
581
586 /// Return the number of blocks in the readonly part that are currently
582 /// Return the number of blocks in the readonly part that are currently
587 /// masked in the mutable part.
583 /// masked in the mutable part.
588 ///
584 ///
589 /// The `NodeTree` structure has no efficient way to know how many blocks
585 /// The `NodeTree` structure has no efficient way to know how many blocks
590 /// are already unreachable in the readonly part.
586 /// are already unreachable in the readonly part.
591 ///
587 ///
592 /// After a call to `invalidate_all()`, the returned number can be actually
588 /// After a call to `invalidate_all()`, the returned number can be actually
593 /// bigger than the whole readonly part, a conventional way to mean that
589 /// bigger than the whole readonly part, a conventional way to mean that
594 /// all the readonly blocks have been masked. This is what is really
590 /// all the readonly blocks have been masked. This is what is really
595 /// useful to the caller and does not require to know how many were
591 /// useful to the caller and does not require to know how many were
596 /// actually unreachable to begin with.
592 /// actually unreachable to begin with.
597 pub fn masked_readonly_blocks(&self) -> usize {
593 pub fn masked_readonly_blocks(&self) -> usize {
598 if let Some(readonly_root) = self.readonly.last() {
594 if let Some(readonly_root) = self.readonly.last() {
599 if readonly_root == &self.root {
595 if readonly_root == &self.root {
600 return 0;
596 return 0;
601 }
597 }
602 } else {
598 } else {
603 return 0;
599 return 0;
604 }
600 }
605 self.masked_inner_blocks + 1
601 self.masked_inner_blocks + 1
606 }
602 }
607 }
603 }
608
604
609 pub struct NodeTreeBytes {
605 pub struct NodeTreeBytes {
610 buffer: Box<dyn Deref<Target = [u8]> + Send>,
606 buffer: Box<dyn Deref<Target = [u8]> + Send>,
611 len_in_blocks: usize,
607 len_in_blocks: usize,
612 }
608 }
613
609
614 impl NodeTreeBytes {
610 impl NodeTreeBytes {
615 fn new(
611 fn new(
616 buffer: Box<dyn Deref<Target = [u8]> + Send>,
612 buffer: Box<dyn Deref<Target = [u8]> + Send>,
617 amount: usize,
613 amount: usize,
618 ) -> Self {
614 ) -> Self {
619 assert!(buffer.len() >= amount);
615 assert!(buffer.len() >= amount);
620 let len_in_blocks = amount / BLOCK_SIZE;
616 let len_in_blocks = amount / BLOCK_SIZE;
621 NodeTreeBytes {
617 NodeTreeBytes {
622 buffer,
618 buffer,
623 len_in_blocks,
619 len_in_blocks,
624 }
620 }
625 }
621 }
626 }
622 }
627
623
628 impl Deref for NodeTreeBytes {
624 impl Deref for NodeTreeBytes {
629 type Target = [Block];
625 type Target = [Block];
630
626
631 fn deref(&self) -> &[Block] {
627 fn deref(&self) -> &[Block] {
632 unsafe {
628 unsafe {
633 slice::from_raw_parts(
629 slice::from_raw_parts(
634 (&self.buffer).as_ptr() as *const Block,
630 (&self.buffer).as_ptr() as *const Block,
635 self.len_in_blocks,
631 self.len_in_blocks,
636 )
632 )
637 }
633 }
638 }
634 }
639 }
635 }
640
636
641 struct NodeTreeVisitor<'n, 'p> {
637 struct NodeTreeVisitor<'n, 'p> {
642 nt: &'n NodeTree,
638 nt: &'n NodeTree,
643 prefix: NodePrefixRef<'p>,
639 prefix: NodePrefixRef<'p>,
644 visit: usize,
640 visit: usize,
645 nybble_idx: usize,
641 nybble_idx: usize,
646 done: bool,
642 done: bool,
647 }
643 }
648
644
649 #[derive(Debug, PartialEq, Clone)]
645 #[derive(Debug, PartialEq, Clone)]
650 struct NodeTreeVisitItem {
646 struct NodeTreeVisitItem {
651 block_idx: usize,
647 block_idx: usize,
652 nybble: u8,
648 nybble: u8,
653 element: Element,
649 element: Element,
654 }
650 }
655
651
656 impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
652 impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
657 type Item = NodeTreeVisitItem;
653 type Item = NodeTreeVisitItem;
658
654
659 fn next(&mut self) -> Option<Self::Item> {
655 fn next(&mut self) -> Option<Self::Item> {
660 if self.done || self.nybble_idx >= self.prefix.len() {
656 if self.done || self.nybble_idx >= self.prefix.len() {
661 return None;
657 return None;
662 }
658 }
663
659
664 let nybble = self.prefix.get_nybble(self.nybble_idx);
660 let nybble = self.prefix.get_nybble(self.nybble_idx);
665 self.nybble_idx += 1;
661 self.nybble_idx += 1;
666
662
667 let visit = self.visit;
663 let visit = self.visit;
668 let element = self.nt[visit].get(nybble);
664 let element = self.nt[visit].get(nybble);
669 if let Element::Block(idx) = element {
665 if let Element::Block(idx) = element {
670 self.visit = idx;
666 self.visit = idx;
671 } else {
667 } else {
672 self.done = true;
668 self.done = true;
673 }
669 }
674
670
675 Some(NodeTreeVisitItem {
671 Some(NodeTreeVisitItem {
676 block_idx: visit,
672 block_idx: visit,
677 nybble: nybble,
673 nybble,
678 element: element,
674 element,
679 })
675 })
680 }
676 }
681 }
677 }
682
678
683 impl NodeTreeVisitItem {
679 impl NodeTreeVisitItem {
684 // Return `Some(opt)` if this item is final, with `opt` being the
680 // Return `Some(opt)` if this item is final, with `opt` being the
685 // `Revision` that it may represent.
681 // `Revision` that it may represent.
686 //
682 //
687 // If the item is not terminal, return `None`
683 // If the item is not terminal, return `None`
688 fn final_revision(&self) -> Option<Option<Revision>> {
684 fn final_revision(&self) -> Option<Option<Revision>> {
689 match self.element {
685 match self.element {
690 Element::Block(_) => None,
686 Element::Block(_) => None,
691 Element::Rev(r) => Some(Some(r)),
687 Element::Rev(r) => Some(Some(r)),
692 Element::None => Some(None),
688 Element::None => Some(None),
693 }
689 }
694 }
690 }
695 }
691 }
696
692
697 impl From<Vec<Block>> for NodeTree {
693 impl From<Vec<Block>> for NodeTree {
698 fn from(vec: Vec<Block>) -> Self {
694 fn from(vec: Vec<Block>) -> Self {
699 Self::new(Box::new(vec))
695 Self::new(Box::new(vec))
700 }
696 }
701 }
697 }
702
698
703 impl fmt::Debug for NodeTree {
699 impl fmt::Debug for NodeTree {
704 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
700 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
705 let readonly: &[Block] = &*self.readonly;
701 let readonly: &[Block] = &*self.readonly;
706 write!(
702 write!(
707 f,
703 f,
708 "readonly: {:?}, growable: {:?}, root: {:?}",
704 "readonly: {:?}, growable: {:?}, root: {:?}",
709 readonly, self.growable, self.root
705 readonly, self.growable, self.root
710 )
706 )
711 }
707 }
712 }
708 }
713
709
714 impl Default for NodeTree {
710 impl Default for NodeTree {
715 /// Create a fully mutable empty NodeTree
711 /// Create a fully mutable empty NodeTree
716 fn default() -> Self {
712 fn default() -> Self {
717 NodeTree::new(Box::new(Vec::new()))
713 NodeTree::new(Box::new(Vec::new()))
718 }
714 }
719 }
715 }
720
716
721 impl NodeMap for NodeTree {
717 impl NodeMap for NodeTree {
722 fn find_bin<'a>(
718 fn find_bin<'a>(
723 &self,
719 &self,
724 idx: &impl RevlogIndex,
720 idx: &impl RevlogIndex,
725 prefix: NodePrefixRef<'a>,
721 prefix: NodePrefixRef<'a>,
726 ) -> Result<Option<Revision>, NodeMapError> {
722 ) -> Result<Option<Revision>, NodeMapError> {
727 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
723 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
728 .map(|(opt, _shortest)| opt)
724 .map(|(opt, _shortest)| opt)
729 }
725 }
730
726
731 fn unique_prefix_len_bin<'a>(
727 fn unique_prefix_len_bin<'a>(
732 &self,
728 &self,
733 idx: &impl RevlogIndex,
729 idx: &impl RevlogIndex,
734 prefix: NodePrefixRef<'a>,
730 prefix: NodePrefixRef<'a>,
735 ) -> Result<Option<usize>, NodeMapError> {
731 ) -> Result<Option<usize>, NodeMapError> {
736 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
732 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
737 .map(|(opt, shortest)| opt.map(|_rev| shortest))
733 .map(|(opt, shortest)| opt.map(|_rev| shortest))
738 }
734 }
739 }
735 }
740
736
741 #[cfg(test)]
737 #[cfg(test)]
742 mod tests {
738 mod tests {
743 use super::NodeMapError::*;
739 use super::NodeMapError::*;
744 use super::*;
740 use super::*;
745 use crate::revlog::node::{hex_pad_right, Node};
741 use crate::revlog::node::{hex_pad_right, Node};
746 use std::collections::HashMap;
742 use std::collections::HashMap;
747
743
748 /// Creates a `Block` using a syntax close to the `Debug` output
744 /// Creates a `Block` using a syntax close to the `Debug` output
749 macro_rules! block {
745 macro_rules! block {
750 {$($nybble:tt : $variant:ident($val:tt)),*} => (
746 {$($nybble:tt : $variant:ident($val:tt)),*} => (
751 {
747 {
752 let mut block = Block::new();
748 let mut block = Block::new();
753 $(block.set($nybble, Element::$variant($val)));*;
749 $(block.set($nybble, Element::$variant($val)));*;
754 block
750 block
755 }
751 }
756 )
752 )
757 }
753 }
758
754
759 #[test]
755 #[test]
760 fn test_block_debug() {
756 fn test_block_debug() {
761 let mut block = Block::new();
757 let mut block = Block::new();
762 block.set(1, Element::Rev(3));
758 block.set(1, Element::Rev(3));
763 block.set(10, Element::Block(0));
759 block.set(10, Element::Block(0));
764 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
760 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
765 }
761 }
766
762
767 #[test]
763 #[test]
768 fn test_block_macro() {
764 fn test_block_macro() {
769 let block = block! {5: Block(2)};
765 let block = block! {5: Block(2)};
770 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
766 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
771
767
772 let block = block! {13: Rev(15), 5: Block(2)};
768 let block = block! {13: Rev(15), 5: Block(2)};
773 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
769 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
774 }
770 }
775
771
776 #[test]
772 #[test]
777 fn test_raw_block() {
773 fn test_raw_block() {
778 let mut raw = [255u8; 64];
774 let mut raw = [255u8; 64];
779
775
780 let mut counter = 0;
776 let mut counter = 0;
781 for val in [0, 15, -2, -1, -3].iter() {
777 for val in [0, 15, -2, -1, -3].iter() {
782 for byte in RawElement::to_be_bytes(*val).iter() {
778 for byte in RawElement::to_be_bytes(*val).iter() {
783 raw[counter] = *byte;
779 raw[counter] = *byte;
784 counter += 1;
780 counter += 1;
785 }
781 }
786 }
782 }
787 let block = Block(raw);
783 let block = Block(raw);
788 assert_eq!(block.get(0), Element::Block(0));
784 assert_eq!(block.get(0), Element::Block(0));
789 assert_eq!(block.get(1), Element::Block(15));
785 assert_eq!(block.get(1), Element::Block(15));
790 assert_eq!(block.get(3), Element::None);
786 assert_eq!(block.get(3), Element::None);
791 assert_eq!(block.get(2), Element::Rev(0));
787 assert_eq!(block.get(2), Element::Rev(0));
792 assert_eq!(block.get(4), Element::Rev(1));
788 assert_eq!(block.get(4), Element::Rev(1));
793 }
789 }
794
790
795 type TestIndex = HashMap<Revision, Node>;
791 type TestIndex = HashMap<Revision, Node>;
796
792
797 impl RevlogIndex for TestIndex {
793 impl RevlogIndex for TestIndex {
798 fn node(&self, rev: Revision) -> Option<&Node> {
794 fn node(&self, rev: Revision) -> Option<&Node> {
799 self.get(&rev)
795 self.get(&rev)
800 }
796 }
801
797
802 fn len(&self) -> usize {
798 fn len(&self) -> usize {
803 self.len()
799 self.len()
804 }
800 }
805 }
801 }
806
802
807 /// Pad hexadecimal Node prefix with zeros on the right
803 /// Pad hexadecimal Node prefix with zeros on the right
808 ///
804 ///
809 /// This avoids having to repeatedly write very long hexadecimal
805 /// This avoids having to repeatedly write very long hexadecimal
810 /// strings for test data, and brings actual hash size independency.
806 /// strings for test data, and brings actual hash size independency.
811 #[cfg(test)]
807 #[cfg(test)]
812 fn pad_node(hex: &str) -> Node {
808 fn pad_node(hex: &str) -> Node {
813 Node::from_hex(&hex_pad_right(hex)).unwrap()
809 Node::from_hex(&hex_pad_right(hex)).unwrap()
814 }
810 }
815
811
816 /// Pad hexadecimal Node prefix with zeros on the right, then insert
812 /// Pad hexadecimal Node prefix with zeros on the right, then insert
817 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
813 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
818 idx.insert(rev, pad_node(hex));
814 idx.insert(rev, pad_node(hex));
819 }
815 }
820
816
821 fn sample_nodetree() -> NodeTree {
817 fn sample_nodetree() -> NodeTree {
822 NodeTree::from(vec![
818 NodeTree::from(vec![
823 block![0: Rev(9)],
819 block![0: Rev(9)],
824 block![0: Rev(0), 1: Rev(9)],
820 block![0: Rev(0), 1: Rev(9)],
825 block![0: Block(1), 1:Rev(1)],
821 block![0: Block(1), 1:Rev(1)],
826 ])
822 ])
827 }
823 }
828
824
829 #[test]
825 #[test]
830 fn test_nt_debug() {
826 fn test_nt_debug() {
831 let nt = sample_nodetree();
827 let nt = sample_nodetree();
832 assert_eq!(
828 assert_eq!(
833 format!("{:?}", nt),
829 format!("{:?}", nt),
834 "readonly: \
830 "readonly: \
835 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
831 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
836 growable: [], \
832 growable: [], \
837 root: {0: Block(1), 1: Rev(1)}",
833 root: {0: Block(1), 1: Rev(1)}",
838 );
834 );
839 }
835 }
840
836
841 #[test]
837 #[test]
842 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
838 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
843 let mut idx: TestIndex = HashMap::new();
839 let mut idx: TestIndex = HashMap::new();
844 pad_insert(&mut idx, 1, "1234deadcafe");
840 pad_insert(&mut idx, 1, "1234deadcafe");
845
841
846 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
842 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
847 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
843 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
848 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
844 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
849 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
845 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
850 assert_eq!(nt.find_hex(&idx, "1a")?, None);
846 assert_eq!(nt.find_hex(&idx, "1a")?, None);
851 assert_eq!(nt.find_hex(&idx, "ab")?, None);
847 assert_eq!(nt.find_hex(&idx, "ab")?, None);
852
848
853 // and with full binary Nodes
849 // and with full binary Nodes
854 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
850 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
855 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
851 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
856 assert_eq!(nt.find_node(&idx, &unknown)?, None);
852 assert_eq!(nt.find_node(&idx, &unknown)?, None);
857 Ok(())
853 Ok(())
858 }
854 }
859
855
860 #[test]
856 #[test]
861 fn test_immutable_find_one_jump() {
857 fn test_immutable_find_one_jump() {
862 let mut idx = TestIndex::new();
858 let mut idx = TestIndex::new();
863 pad_insert(&mut idx, 9, "012");
859 pad_insert(&mut idx, 9, "012");
864 pad_insert(&mut idx, 0, "00a");
860 pad_insert(&mut idx, 0, "00a");
865
861
866 let nt = sample_nodetree();
862 let nt = sample_nodetree();
867
863
868 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
864 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
869 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
865 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
870 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
866 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
871 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
867 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
872 assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
868 assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
873 assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
869 assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
874 }
870 }
875
871
876 #[test]
872 #[test]
877 fn test_mutated_find() -> Result<(), NodeMapError> {
873 fn test_mutated_find() -> Result<(), NodeMapError> {
878 let mut idx = TestIndex::new();
874 let mut idx = TestIndex::new();
879 pad_insert(&mut idx, 9, "012");
875 pad_insert(&mut idx, 9, "012");
880 pad_insert(&mut idx, 0, "00a");
876 pad_insert(&mut idx, 0, "00a");
881 pad_insert(&mut idx, 2, "cafe");
877 pad_insert(&mut idx, 2, "cafe");
882 pad_insert(&mut idx, 3, "15");
878 pad_insert(&mut idx, 3, "15");
883 pad_insert(&mut idx, 1, "10");
879 pad_insert(&mut idx, 1, "10");
884
880
885 let nt = NodeTree {
881 let nt = NodeTree {
886 readonly: sample_nodetree().readonly,
882 readonly: sample_nodetree().readonly,
887 growable: vec![block![0: Rev(1), 5: Rev(3)]],
883 growable: vec![block![0: Rev(1), 5: Rev(3)]],
888 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
884 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
889 masked_inner_blocks: 1,
885 masked_inner_blocks: 1,
890 };
886 };
891 assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
887 assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
892 assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
888 assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
893 assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
889 assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
894 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
890 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
895 assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
891 assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
896 assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
892 assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
897 assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
893 assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
898 assert_eq!(nt.masked_readonly_blocks(), 2);
894 assert_eq!(nt.masked_readonly_blocks(), 2);
899 Ok(())
895 Ok(())
900 }
896 }
901
897
902 struct TestNtIndex {
898 struct TestNtIndex {
903 index: TestIndex,
899 index: TestIndex,
904 nt: NodeTree,
900 nt: NodeTree,
905 }
901 }
906
902
907 impl TestNtIndex {
903 impl TestNtIndex {
908 fn new() -> Self {
904 fn new() -> Self {
909 TestNtIndex {
905 TestNtIndex {
910 index: HashMap::new(),
906 index: HashMap::new(),
911 nt: NodeTree::default(),
907 nt: NodeTree::default(),
912 }
908 }
913 }
909 }
914
910
915 fn insert(
911 fn insert(
916 &mut self,
912 &mut self,
917 rev: Revision,
913 rev: Revision,
918 hex: &str,
914 hex: &str,
919 ) -> Result<(), NodeMapError> {
915 ) -> Result<(), NodeMapError> {
920 let node = pad_node(hex);
916 let node = pad_node(hex);
921 self.index.insert(rev, node.clone());
917 self.index.insert(rev, node.clone());
922 self.nt.insert(&self.index, &node, rev)?;
918 self.nt.insert(&self.index, &node, rev)?;
923 Ok(())
919 Ok(())
924 }
920 }
925
921
926 fn find_hex(
922 fn find_hex(
927 &self,
923 &self,
928 prefix: &str,
924 prefix: &str,
929 ) -> Result<Option<Revision>, NodeMapError> {
925 ) -> Result<Option<Revision>, NodeMapError> {
930 self.nt.find_hex(&self.index, prefix)
926 self.nt.find_hex(&self.index, prefix)
931 }
927 }
932
928
933 fn unique_prefix_len_hex(
929 fn unique_prefix_len_hex(
934 &self,
930 &self,
935 prefix: &str,
931 prefix: &str,
936 ) -> Result<Option<usize>, NodeMapError> {
932 ) -> Result<Option<usize>, NodeMapError> {
937 self.nt.unique_prefix_len_hex(&self.index, prefix)
933 self.nt.unique_prefix_len_hex(&self.index, prefix)
938 }
934 }
939
935
940 /// Drain `added` and restart a new one
936 /// Drain `added` and restart a new one
941 fn commit(self) -> Self {
937 fn commit(self) -> Self {
942 let mut as_vec: Vec<Block> =
938 let mut as_vec: Vec<Block> =
943 self.nt.readonly.iter().map(|block| block.clone()).collect();
939 self.nt.readonly.iter().map(|block| block.clone()).collect();
944 as_vec.extend(self.nt.growable);
940 as_vec.extend(self.nt.growable);
945 as_vec.push(self.nt.root);
941 as_vec.push(self.nt.root);
946
942
947 Self {
943 Self {
948 index: self.index,
944 index: self.index,
949 nt: NodeTree::from(as_vec).into(),
945 nt: NodeTree::from(as_vec).into(),
950 }
946 }
951 }
947 }
952 }
948 }
953
949
954 #[test]
950 #[test]
955 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
951 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
956 let mut idx = TestNtIndex::new();
952 let mut idx = TestNtIndex::new();
957 idx.insert(0, "1234")?;
953 idx.insert(0, "1234")?;
958 assert_eq!(idx.find_hex("1")?, Some(0));
954 assert_eq!(idx.find_hex("1")?, Some(0));
959 assert_eq!(idx.find_hex("12")?, Some(0));
955 assert_eq!(idx.find_hex("12")?, Some(0));
960
956
961 // let's trigger a simple split
957 // let's trigger a simple split
962 idx.insert(1, "1a34")?;
958 idx.insert(1, "1a34")?;
963 assert_eq!(idx.nt.growable.len(), 1);
959 assert_eq!(idx.nt.growable.len(), 1);
964 assert_eq!(idx.find_hex("12")?, Some(0));
960 assert_eq!(idx.find_hex("12")?, Some(0));
965 assert_eq!(idx.find_hex("1a")?, Some(1));
961 assert_eq!(idx.find_hex("1a")?, Some(1));
966
962
967 // reinserting is a no_op
963 // reinserting is a no_op
968 idx.insert(1, "1a34")?;
964 idx.insert(1, "1a34")?;
969 assert_eq!(idx.nt.growable.len(), 1);
965 assert_eq!(idx.nt.growable.len(), 1);
970 assert_eq!(idx.find_hex("12")?, Some(0));
966 assert_eq!(idx.find_hex("12")?, Some(0));
971 assert_eq!(idx.find_hex("1a")?, Some(1));
967 assert_eq!(idx.find_hex("1a")?, Some(1));
972
968
973 idx.insert(2, "1a01")?;
969 idx.insert(2, "1a01")?;
974 assert_eq!(idx.nt.growable.len(), 2);
970 assert_eq!(idx.nt.growable.len(), 2);
975 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
971 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
976 assert_eq!(idx.find_hex("12")?, Some(0));
972 assert_eq!(idx.find_hex("12")?, Some(0));
977 assert_eq!(idx.find_hex("1a3")?, Some(1));
973 assert_eq!(idx.find_hex("1a3")?, Some(1));
978 assert_eq!(idx.find_hex("1a0")?, Some(2));
974 assert_eq!(idx.find_hex("1a0")?, Some(2));
979 assert_eq!(idx.find_hex("1a12")?, None);
975 assert_eq!(idx.find_hex("1a12")?, None);
980
976
981 // now let's make it split and create more than one additional block
977 // now let's make it split and create more than one additional block
982 idx.insert(3, "1a345")?;
978 idx.insert(3, "1a345")?;
983 assert_eq!(idx.nt.growable.len(), 4);
979 assert_eq!(idx.nt.growable.len(), 4);
984 assert_eq!(idx.find_hex("1a340")?, Some(1));
980 assert_eq!(idx.find_hex("1a340")?, Some(1));
985 assert_eq!(idx.find_hex("1a345")?, Some(3));
981 assert_eq!(idx.find_hex("1a345")?, Some(3));
986 assert_eq!(idx.find_hex("1a341")?, None);
982 assert_eq!(idx.find_hex("1a341")?, None);
987
983
988 // there's no readonly block to mask
984 // there's no readonly block to mask
989 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
985 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
990 Ok(())
986 Ok(())
991 }
987 }
992
988
993 #[test]
989 #[test]
994 fn test_unique_prefix_len_zero_prefix() {
990 fn test_unique_prefix_len_zero_prefix() {
995 let mut idx = TestNtIndex::new();
991 let mut idx = TestNtIndex::new();
996 idx.insert(0, "00000abcd").unwrap();
992 idx.insert(0, "00000abcd").unwrap();
997
993
998 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
994 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
999 // in the nodetree proper, this will be found at the first nybble
995 // in the nodetree proper, this will be found at the first nybble
1000 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
996 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
1001 // but the first difference with `NULL_NODE`
997 // but the first difference with `NULL_NODE`
1002 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
998 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
1003 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
999 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
1004
1000
1005 // same with odd result
1001 // same with odd result
1006 idx.insert(1, "00123").unwrap();
1002 idx.insert(1, "00123").unwrap();
1007 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
1003 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
1008 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
1004 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
1009
1005
1010 // these are unchanged of course
1006 // these are unchanged of course
1011 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
1007 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
1012 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
1008 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
1013 }
1009 }
1014
1010
1015 #[test]
1011 #[test]
1016 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
1012 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
1017 // check that the splitting loop is long enough
1013 // check that the splitting loop is long enough
1018 let mut nt_idx = TestNtIndex::new();
1014 let mut nt_idx = TestNtIndex::new();
1019 let nt = &mut nt_idx.nt;
1015 let nt = &mut nt_idx.nt;
1020 let idx = &mut nt_idx.index;
1016 let idx = &mut nt_idx.index;
1021
1017
1022 let node0_hex = hex_pad_right("444444");
1018 let node0_hex = hex_pad_right("444444");
1023 let mut node1_hex = hex_pad_right("444444").clone();
1019 let mut node1_hex = hex_pad_right("444444").clone();
1024 node1_hex.pop();
1020 node1_hex.pop();
1025 node1_hex.push('5');
1021 node1_hex.push('5');
1026 let node0 = Node::from_hex(&node0_hex).unwrap();
1022 let node0 = Node::from_hex(&node0_hex).unwrap();
1027 let node1 = Node::from_hex(&node1_hex).unwrap();
1023 let node1 = Node::from_hex(&node1_hex).unwrap();
1028
1024
1029 idx.insert(0, node0.clone());
1025 idx.insert(0, node0.clone());
1030 nt.insert(idx, &node0, 0)?;
1026 nt.insert(idx, &node0, 0)?;
1031 idx.insert(1, node1.clone());
1027 idx.insert(1, node1.clone());
1032 nt.insert(idx, &node1, 1)?;
1028 nt.insert(idx, &node1, 1)?;
1033
1029
1034 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
1030 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
1035 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
1031 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
1036 Ok(())
1032 Ok(())
1037 }
1033 }
1038
1034
1039 #[test]
1035 #[test]
1040 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1036 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1041 let mut idx = TestNtIndex::new();
1037 let mut idx = TestNtIndex::new();
1042 idx.insert(0, "1234")?;
1038 idx.insert(0, "1234")?;
1043 idx.insert(1, "1235")?;
1039 idx.insert(1, "1235")?;
1044 idx.insert(2, "131")?;
1040 idx.insert(2, "131")?;
1045 idx.insert(3, "cafe")?;
1041 idx.insert(3, "cafe")?;
1046 let mut idx = idx.commit();
1042 let mut idx = idx.commit();
1047 assert_eq!(idx.find_hex("1234")?, Some(0));
1043 assert_eq!(idx.find_hex("1234")?, Some(0));
1048 assert_eq!(idx.find_hex("1235")?, Some(1));
1044 assert_eq!(idx.find_hex("1235")?, Some(1));
1049 assert_eq!(idx.find_hex("131")?, Some(2));
1045 assert_eq!(idx.find_hex("131")?, Some(2));
1050 assert_eq!(idx.find_hex("cafe")?, Some(3));
1046 assert_eq!(idx.find_hex("cafe")?, Some(3));
1051 // we did not add anything since init from readonly
1047 // we did not add anything since init from readonly
1052 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1048 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1053
1049
1054 idx.insert(4, "123A")?;
1050 idx.insert(4, "123A")?;
1055 assert_eq!(idx.find_hex("1234")?, Some(0));
1051 assert_eq!(idx.find_hex("1234")?, Some(0));
1056 assert_eq!(idx.find_hex("1235")?, Some(1));
1052 assert_eq!(idx.find_hex("1235")?, Some(1));
1057 assert_eq!(idx.find_hex("131")?, Some(2));
1053 assert_eq!(idx.find_hex("131")?, Some(2));
1058 assert_eq!(idx.find_hex("cafe")?, Some(3));
1054 assert_eq!(idx.find_hex("cafe")?, Some(3));
1059 assert_eq!(idx.find_hex("123A")?, Some(4));
1055 assert_eq!(idx.find_hex("123A")?, Some(4));
1060 // we masked blocks for all prefixes of "123", including the root
1056 // we masked blocks for all prefixes of "123", including the root
1061 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1057 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1062
1058
1063 eprintln!("{:?}", idx.nt);
1059 eprintln!("{:?}", idx.nt);
1064 idx.insert(5, "c0")?;
1060 idx.insert(5, "c0")?;
1065 assert_eq!(idx.find_hex("cafe")?, Some(3));
1061 assert_eq!(idx.find_hex("cafe")?, Some(3));
1066 assert_eq!(idx.find_hex("c0")?, Some(5));
1062 assert_eq!(idx.find_hex("c0")?, Some(5));
1067 assert_eq!(idx.find_hex("c1")?, None);
1063 assert_eq!(idx.find_hex("c1")?, None);
1068 assert_eq!(idx.find_hex("1234")?, Some(0));
1064 assert_eq!(idx.find_hex("1234")?, Some(0));
1069 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1065 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1070 // it doesn't mask anything
1066 // it doesn't mask anything
1071 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1067 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1072
1068
1073 Ok(())
1069 Ok(())
1074 }
1070 }
1075
1071
1076 #[test]
1072 #[test]
1077 fn test_invalidate_all() -> Result<(), NodeMapError> {
1073 fn test_invalidate_all() -> Result<(), NodeMapError> {
1078 let mut idx = TestNtIndex::new();
1074 let mut idx = TestNtIndex::new();
1079 idx.insert(0, "1234")?;
1075 idx.insert(0, "1234")?;
1080 idx.insert(1, "1235")?;
1076 idx.insert(1, "1235")?;
1081 idx.insert(2, "131")?;
1077 idx.insert(2, "131")?;
1082 idx.insert(3, "cafe")?;
1078 idx.insert(3, "cafe")?;
1083 let mut idx = idx.commit();
1079 let mut idx = idx.commit();
1084
1080
1085 idx.nt.invalidate_all();
1081 idx.nt.invalidate_all();
1086
1082
1087 assert_eq!(idx.find_hex("1234")?, None);
1083 assert_eq!(idx.find_hex("1234")?, None);
1088 assert_eq!(idx.find_hex("1235")?, None);
1084 assert_eq!(idx.find_hex("1235")?, None);
1089 assert_eq!(idx.find_hex("131")?, None);
1085 assert_eq!(idx.find_hex("131")?, None);
1090 assert_eq!(idx.find_hex("cafe")?, None);
1086 assert_eq!(idx.find_hex("cafe")?, None);
1091 // all the readonly blocks have been masked, this is the
1087 // all the readonly blocks have been masked, this is the
1092 // conventional expected response
1088 // conventional expected response
1093 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1089 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1094 Ok(())
1090 Ok(())
1095 }
1091 }
1096
1092
1097 #[test]
1093 #[test]
1098 fn test_into_added_empty() {
1094 fn test_into_added_empty() {
1099 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1095 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1100 assert!(sample_nodetree()
1096 assert!(sample_nodetree()
1101 .into_readonly_and_added_bytes()
1097 .into_readonly_and_added_bytes()
1102 .1
1098 .1
1103 .is_empty());
1099 .is_empty());
1104 }
1100 }
1105
1101
1106 #[test]
1102 #[test]
1107 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1103 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1108 let mut idx = TestNtIndex::new();
1104 let mut idx = TestNtIndex::new();
1109 idx.insert(0, "1234")?;
1105 idx.insert(0, "1234")?;
1110 let mut idx = idx.commit();
1106 let mut idx = idx.commit();
1111 idx.insert(4, "cafe")?;
1107 idx.insert(4, "cafe")?;
1112 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1108 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1113
1109
1114 // only the root block has been changed
1110 // only the root block has been changed
1115 assert_eq!(bytes.len(), BLOCK_SIZE);
1111 assert_eq!(bytes.len(), BLOCK_SIZE);
1116 // big endian for -2
1112 // big endian for -2
1117 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1113 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1118 // big endian for -6
1114 // big endian for -6
1119 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1115 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1120 Ok(())
1116 Ok(())
1121 }
1117 }
1122 }
1118 }
@@ -1,168 +1,169
1 // utils module
1 // utils module
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Contains useful functions, traits, structs, etc. for use in core.
8 //! Contains useful functions, traits, structs, etc. for use in core.
9
9
10 use crate::utils::hg_path::HgPath;
10 use crate::utils::hg_path::HgPath;
11 use std::{io::Write, ops::Deref};
11 use std::{io::Write, ops::Deref};
12
12
13 pub mod files;
13 pub mod files;
14 pub mod hg_path;
14 pub mod hg_path;
15 pub mod path_auditor;
15 pub mod path_auditor;
16
16
17 /// Useful until rust/issues/56345 is stable
17 /// Useful until rust/issues/56345 is stable
18 ///
18 ///
19 /// # Examples
19 /// # Examples
20 ///
20 ///
21 /// ```
21 /// ```
22 /// use crate::hg::utils::find_slice_in_slice;
22 /// use crate::hg::utils::find_slice_in_slice;
23 ///
23 ///
24 /// let haystack = b"This is the haystack".to_vec();
24 /// let haystack = b"This is the haystack".to_vec();
25 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
25 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
26 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
26 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
27 /// ```
27 /// ```
28 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
28 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
29 where
29 where
30 for<'a> &'a [T]: PartialEq,
30 for<'a> &'a [T]: PartialEq,
31 {
31 {
32 slice
32 slice
33 .windows(needle.len())
33 .windows(needle.len())
34 .position(|window| window == needle)
34 .position(|window| window == needle)
35 }
35 }
36
36
37 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
37 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
38 ///
38 ///
39 /// # Examples
39 /// # Examples
40 ///
40 ///
41 /// ```
41 /// ```
42 /// use crate::hg::utils::replace_slice;
42 /// use crate::hg::utils::replace_slice;
43 /// let mut line = b"I hate writing tests!".to_vec();
43 /// let mut line = b"I hate writing tests!".to_vec();
44 /// replace_slice(&mut line, b"hate", b"love");
44 /// replace_slice(&mut line, b"hate", b"love");
45 /// assert_eq!(
45 /// assert_eq!(
46 /// line,
46 /// line,
47 /// b"I love writing tests!".to_vec()
47 /// b"I love writing tests!".to_vec()
48 /// );
48 /// );
49 /// ```
49 /// ```
50 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
50 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
51 where
51 where
52 T: Clone + PartialEq,
52 T: Clone + PartialEq,
53 {
53 {
54 if buf.len() < from.len() || from.len() != to.len() {
54 if buf.len() < from.len() || from.len() != to.len() {
55 return;
55 return;
56 }
56 }
57 for i in 0..=buf.len() - from.len() {
57 for i in 0..=buf.len() - from.len() {
58 if buf[i..].starts_with(from) {
58 if buf[i..].starts_with(from) {
59 buf[i..(i + from.len())].clone_from_slice(to);
59 buf[i..(i + from.len())].clone_from_slice(to);
60 }
60 }
61 }
61 }
62 }
62 }
63
63
64 pub trait SliceExt {
64 pub trait SliceExt {
65 fn trim_end(&self) -> &Self;
65 fn trim_end(&self) -> &Self;
66 fn trim_start(&self) -> &Self;
66 fn trim_start(&self) -> &Self;
67 fn trim(&self) -> &Self;
67 fn trim(&self) -> &Self;
68 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
68 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
69 }
69 }
70
70
71 #[allow(clippy::trivially_copy_pass_by_ref)]
71 fn is_not_whitespace(c: &u8) -> bool {
72 fn is_not_whitespace(c: &u8) -> bool {
72 !(*c as char).is_whitespace()
73 !(*c as char).is_whitespace()
73 }
74 }
74
75
75 impl SliceExt for [u8] {
76 impl SliceExt for [u8] {
76 fn trim_end(&self) -> &[u8] {
77 fn trim_end(&self) -> &[u8] {
77 if let Some(last) = self.iter().rposition(is_not_whitespace) {
78 if let Some(last) = self.iter().rposition(is_not_whitespace) {
78 &self[..last + 1]
79 &self[..=last]
79 } else {
80 } else {
80 &[]
81 &[]
81 }
82 }
82 }
83 }
83 fn trim_start(&self) -> &[u8] {
84 fn trim_start(&self) -> &[u8] {
84 if let Some(first) = self.iter().position(is_not_whitespace) {
85 if let Some(first) = self.iter().position(is_not_whitespace) {
85 &self[first..]
86 &self[first..]
86 } else {
87 } else {
87 &[]
88 &[]
88 }
89 }
89 }
90 }
90
91
91 /// ```
92 /// ```
92 /// use hg::utils::SliceExt;
93 /// use hg::utils::SliceExt;
93 /// assert_eq!(
94 /// assert_eq!(
94 /// b" to trim ".trim(),
95 /// b" to trim ".trim(),
95 /// b"to trim"
96 /// b"to trim"
96 /// );
97 /// );
97 /// assert_eq!(
98 /// assert_eq!(
98 /// b"to trim ".trim(),
99 /// b"to trim ".trim(),
99 /// b"to trim"
100 /// b"to trim"
100 /// );
101 /// );
101 /// assert_eq!(
102 /// assert_eq!(
102 /// b" to trim".trim(),
103 /// b" to trim".trim(),
103 /// b"to trim"
104 /// b"to trim"
104 /// );
105 /// );
105 /// ```
106 /// ```
106 fn trim(&self) -> &[u8] {
107 fn trim(&self) -> &[u8] {
107 self.trim_start().trim_end()
108 self.trim_start().trim_end()
108 }
109 }
109
110
110 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
111 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
111 if self.starts_with(needle) {
112 if self.starts_with(needle) {
112 Some(&self[needle.len()..])
113 Some(&self[needle.len()..])
113 } else {
114 } else {
114 None
115 None
115 }
116 }
116 }
117 }
117 }
118 }
118
119
119 pub trait Escaped {
120 pub trait Escaped {
120 /// Return bytes escaped for display to the user
121 /// Return bytes escaped for display to the user
121 fn escaped_bytes(&self) -> Vec<u8>;
122 fn escaped_bytes(&self) -> Vec<u8>;
122 }
123 }
123
124
124 impl Escaped for u8 {
125 impl Escaped for u8 {
125 fn escaped_bytes(&self) -> Vec<u8> {
126 fn escaped_bytes(&self) -> Vec<u8> {
126 let mut acc = vec![];
127 let mut acc = vec![];
127 match self {
128 match self {
128 c @ b'\'' | c @ b'\\' => {
129 c @ b'\'' | c @ b'\\' => {
129 acc.push(b'\\');
130 acc.push(b'\\');
130 acc.push(*c);
131 acc.push(*c);
131 }
132 }
132 b'\t' => {
133 b'\t' => {
133 acc.extend(br"\\t");
134 acc.extend(br"\\t");
134 }
135 }
135 b'\n' => {
136 b'\n' => {
136 acc.extend(br"\\n");
137 acc.extend(br"\\n");
137 }
138 }
138 b'\r' => {
139 b'\r' => {
139 acc.extend(br"\\r");
140 acc.extend(br"\\r");
140 }
141 }
141 c if (*c < b' ' || *c >= 127) => {
142 c if (*c < b' ' || *c >= 127) => {
142 write!(acc, "\\x{:x}", self).unwrap();
143 write!(acc, "\\x{:x}", self).unwrap();
143 }
144 }
144 c => {
145 c => {
145 acc.push(*c);
146 acc.push(*c);
146 }
147 }
147 }
148 }
148 acc
149 acc
149 }
150 }
150 }
151 }
151
152
152 impl<'a, T: Escaped> Escaped for &'a [T] {
153 impl<'a, T: Escaped> Escaped for &'a [T] {
153 fn escaped_bytes(&self) -> Vec<u8> {
154 fn escaped_bytes(&self) -> Vec<u8> {
154 self.iter().flat_map(|item| item.escaped_bytes()).collect()
155 self.iter().flat_map(Escaped::escaped_bytes).collect()
155 }
156 }
156 }
157 }
157
158
158 impl<T: Escaped> Escaped for Vec<T> {
159 impl<T: Escaped> Escaped for Vec<T> {
159 fn escaped_bytes(&self) -> Vec<u8> {
160 fn escaped_bytes(&self) -> Vec<u8> {
160 self.deref().escaped_bytes()
161 self.deref().escaped_bytes()
161 }
162 }
162 }
163 }
163
164
164 impl<'a> Escaped for &'a HgPath {
165 impl<'a> Escaped for &'a HgPath {
165 fn escaped_bytes(&self) -> Vec<u8> {
166 fn escaped_bytes(&self) -> Vec<u8> {
166 self.as_bytes().escaped_bytes()
167 self.as_bytes().escaped_bytes()
167 }
168 }
168 }
169 }
@@ -1,384 +1,382
1 // files.rs
1 // files.rs
2 //
2 //
3 // Copyright 2019
3 // Copyright 2019
4 // Raphaël Gomès <rgomes@octobus.net>,
4 // Raphaël Gomès <rgomes@octobus.net>,
5 // Yuya Nishihara <yuya@tcha.org>
5 // Yuya Nishihara <yuya@tcha.org>
6 //
6 //
7 // This software may be used and distributed according to the terms of the
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
8 // GNU General Public License version 2 or any later version.
9
9
10 //! Functions for fiddling with files.
10 //! Functions for fiddling with files.
11
11
12 use crate::utils::{
12 use crate::utils::{
13 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
14 path_auditor::PathAuditor,
14 path_auditor::PathAuditor,
15 replace_slice,
15 replace_slice,
16 };
16 };
17 use lazy_static::lazy_static;
17 use lazy_static::lazy_static;
18 use same_file::is_same_file;
18 use same_file::is_same_file;
19 use std::borrow::ToOwned;
19 use std::borrow::ToOwned;
20 use std::fs::Metadata;
20 use std::fs::Metadata;
21 use std::iter::FusedIterator;
21 use std::iter::FusedIterator;
22 use std::ops::Deref;
22 use std::ops::Deref;
23 use std::path::{Path, PathBuf};
23 use std::path::{Path, PathBuf};
24
24
25 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
25 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
26 let os_str;
26 let os_str;
27 #[cfg(unix)]
27 #[cfg(unix)]
28 {
28 {
29 use std::os::unix::ffi::OsStrExt;
29 use std::os::unix::ffi::OsStrExt;
30 os_str = std::ffi::OsStr::from_bytes(bytes);
30 os_str = std::ffi::OsStr::from_bytes(bytes);
31 }
31 }
32 // TODO Handle other platforms
32 // TODO Handle other platforms
33 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
33 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
34 // Perhaps, the return type would have to be Result<PathBuf>.
34 // Perhaps, the return type would have to be Result<PathBuf>.
35
35
36 Path::new(os_str)
36 Path::new(os_str)
37 }
37 }
38
38
39 // TODO: need to convert from WTF8 to MBCS bytes on Windows.
39 // TODO: need to convert from WTF8 to MBCS bytes on Windows.
40 // that's why Vec<u8> is returned.
40 // that's why Vec<u8> is returned.
41 #[cfg(unix)]
41 #[cfg(unix)]
42 pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
42 pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
43 use std::os::unix::ffi::OsStrExt;
43 use std::os::unix::ffi::OsStrExt;
44 path.as_ref().as_os_str().as_bytes().to_vec()
44 path.as_ref().as_os_str().as_bytes().to_vec()
45 }
45 }
46
46
47 /// An iterator over repository path yielding itself and its ancestors.
47 /// An iterator over repository path yielding itself and its ancestors.
48 #[derive(Copy, Clone, Debug)]
48 #[derive(Copy, Clone, Debug)]
49 pub struct Ancestors<'a> {
49 pub struct Ancestors<'a> {
50 next: Option<&'a HgPath>,
50 next: Option<&'a HgPath>,
51 }
51 }
52
52
53 impl<'a> Iterator for Ancestors<'a> {
53 impl<'a> Iterator for Ancestors<'a> {
54 type Item = &'a HgPath;
54 type Item = &'a HgPath;
55
55
56 fn next(&mut self) -> Option<Self::Item> {
56 fn next(&mut self) -> Option<Self::Item> {
57 let next = self.next;
57 let next = self.next;
58 self.next = match self.next {
58 self.next = match self.next {
59 Some(s) if s.is_empty() => None,
59 Some(s) if s.is_empty() => None,
60 Some(s) => {
60 Some(s) => {
61 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
61 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
62 Some(HgPath::new(&s.as_bytes()[..p]))
62 Some(HgPath::new(&s.as_bytes()[..p]))
63 }
63 }
64 None => None,
64 None => None,
65 };
65 };
66 next
66 next
67 }
67 }
68 }
68 }
69
69
70 impl<'a> FusedIterator for Ancestors<'a> {}
70 impl<'a> FusedIterator for Ancestors<'a> {}
71
71
72 /// An iterator over repository path yielding itself and its ancestors.
72 /// An iterator over repository path yielding itself and its ancestors.
73 #[derive(Copy, Clone, Debug)]
73 #[derive(Copy, Clone, Debug)]
74 pub(crate) struct AncestorsWithBase<'a> {
74 pub(crate) struct AncestorsWithBase<'a> {
75 next: Option<(&'a HgPath, &'a HgPath)>,
75 next: Option<(&'a HgPath, &'a HgPath)>,
76 }
76 }
77
77
78 impl<'a> Iterator for AncestorsWithBase<'a> {
78 impl<'a> Iterator for AncestorsWithBase<'a> {
79 type Item = (&'a HgPath, &'a HgPath);
79 type Item = (&'a HgPath, &'a HgPath);
80
80
81 fn next(&mut self) -> Option<Self::Item> {
81 fn next(&mut self) -> Option<Self::Item> {
82 let next = self.next;
82 let next = self.next;
83 self.next = match self.next {
83 self.next = match self.next {
84 Some((s, _)) if s.is_empty() => None,
84 Some((s, _)) if s.is_empty() => None,
85 Some((s, _)) => Some(s.split_filename()),
85 Some((s, _)) => Some(s.split_filename()),
86 None => None,
86 None => None,
87 };
87 };
88 next
88 next
89 }
89 }
90 }
90 }
91
91
92 impl<'a> FusedIterator for AncestorsWithBase<'a> {}
92 impl<'a> FusedIterator for AncestorsWithBase<'a> {}
93
93
94 /// Returns an iterator yielding ancestor directories of the given repository
94 /// Returns an iterator yielding ancestor directories of the given repository
95 /// path.
95 /// path.
96 ///
96 ///
97 /// The path is separated by '/', and must not start with '/'.
97 /// The path is separated by '/', and must not start with '/'.
98 ///
98 ///
99 /// The path itself isn't included unless it is b"" (meaning the root
99 /// The path itself isn't included unless it is b"" (meaning the root
100 /// directory.)
100 /// directory.)
101 pub fn find_dirs<'a>(path: &'a HgPath) -> Ancestors<'a> {
101 pub fn find_dirs(path: &HgPath) -> Ancestors {
102 let mut dirs = Ancestors { next: Some(path) };
102 let mut dirs = Ancestors { next: Some(path) };
103 if !path.is_empty() {
103 if !path.is_empty() {
104 dirs.next(); // skip itself
104 dirs.next(); // skip itself
105 }
105 }
106 dirs
106 dirs
107 }
107 }
108
108
109 /// Returns an iterator yielding ancestor directories of the given repository
109 /// Returns an iterator yielding ancestor directories of the given repository
110 /// path.
110 /// path.
111 ///
111 ///
112 /// The path is separated by '/', and must not start with '/'.
112 /// The path is separated by '/', and must not start with '/'.
113 ///
113 ///
114 /// The path itself isn't included unless it is b"" (meaning the root
114 /// The path itself isn't included unless it is b"" (meaning the root
115 /// directory.)
115 /// directory.)
116 pub(crate) fn find_dirs_with_base<'a>(
116 pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
117 path: &'a HgPath,
118 ) -> AncestorsWithBase<'a> {
119 let mut dirs = AncestorsWithBase {
117 let mut dirs = AncestorsWithBase {
120 next: Some((path, HgPath::new(b""))),
118 next: Some((path, HgPath::new(b""))),
121 };
119 };
122 if !path.is_empty() {
120 if !path.is_empty() {
123 dirs.next(); // skip itself
121 dirs.next(); // skip itself
124 }
122 }
125 dirs
123 dirs
126 }
124 }
127
125
128 /// TODO more than ASCII?
126 /// TODO more than ASCII?
129 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
127 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
130 #[cfg(windows)] // NTFS compares via upper()
128 #[cfg(windows)] // NTFS compares via upper()
131 return path.to_ascii_uppercase();
129 return path.to_ascii_uppercase();
132 #[cfg(unix)]
130 #[cfg(unix)]
133 path.to_ascii_lowercase()
131 path.to_ascii_lowercase()
134 }
132 }
135
133
136 lazy_static! {
134 lazy_static! {
137 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
135 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
138 [
136 [
139 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
137 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
140 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
138 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
141 ]
139 ]
142 .iter()
140 .iter()
143 .map(|code| {
141 .map(|code| {
144 std::char::from_u32(*code)
142 std::char::from_u32(*code)
145 .unwrap()
143 .unwrap()
146 .encode_utf8(&mut [0; 3])
144 .encode_utf8(&mut [0; 3])
147 .bytes()
145 .bytes()
148 .collect()
146 .collect()
149 })
147 })
150 .collect()
148 .collect()
151 };
149 };
152 }
150 }
153
151
154 fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
152 fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
155 let mut buf = bytes.to_owned();
153 let mut buf = bytes.to_owned();
156 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
154 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
157 if needs_escaping {
155 if needs_escaping {
158 for forbidden in IGNORED_CHARS.iter() {
156 for forbidden in IGNORED_CHARS.iter() {
159 replace_slice(&mut buf, forbidden, &[])
157 replace_slice(&mut buf, forbidden, &[])
160 }
158 }
161 buf
159 buf
162 } else {
160 } else {
163 buf
161 buf
164 }
162 }
165 }
163 }
166
164
167 pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
165 pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
168 hfs_ignore_clean(&bytes.to_ascii_lowercase())
166 hfs_ignore_clean(&bytes.to_ascii_lowercase())
169 }
167 }
170
168
171 #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
169 #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
172 pub struct HgMetadata {
170 pub struct HgMetadata {
173 pub st_dev: u64,
171 pub st_dev: u64,
174 pub st_mode: u32,
172 pub st_mode: u32,
175 pub st_nlink: u64,
173 pub st_nlink: u64,
176 pub st_size: u64,
174 pub st_size: u64,
177 pub st_mtime: i64,
175 pub st_mtime: i64,
178 pub st_ctime: i64,
176 pub st_ctime: i64,
179 }
177 }
180
178
181 // TODO support other plaforms
179 // TODO support other plaforms
182 #[cfg(unix)]
180 #[cfg(unix)]
183 impl HgMetadata {
181 impl HgMetadata {
184 pub fn from_metadata(metadata: Metadata) -> Self {
182 pub fn from_metadata(metadata: Metadata) -> Self {
185 use std::os::unix::fs::MetadataExt;
183 use std::os::unix::fs::MetadataExt;
186 Self {
184 Self {
187 st_dev: metadata.dev(),
185 st_dev: metadata.dev(),
188 st_mode: metadata.mode(),
186 st_mode: metadata.mode(),
189 st_nlink: metadata.nlink(),
187 st_nlink: metadata.nlink(),
190 st_size: metadata.size(),
188 st_size: metadata.size(),
191 st_mtime: metadata.mtime(),
189 st_mtime: metadata.mtime(),
192 st_ctime: metadata.ctime(),
190 st_ctime: metadata.ctime(),
193 }
191 }
194 }
192 }
195 }
193 }
196
194
197 /// Returns the canonical path of `name`, given `cwd` and `root`
195 /// Returns the canonical path of `name`, given `cwd` and `root`
198 pub fn canonical_path(
196 pub fn canonical_path(
199 root: impl AsRef<Path>,
197 root: impl AsRef<Path>,
200 cwd: impl AsRef<Path>,
198 cwd: impl AsRef<Path>,
201 name: impl AsRef<Path>,
199 name: impl AsRef<Path>,
202 ) -> Result<PathBuf, HgPathError> {
200 ) -> Result<PathBuf, HgPathError> {
203 // TODO add missing normalization for other platforms
201 // TODO add missing normalization for other platforms
204 let root = root.as_ref();
202 let root = root.as_ref();
205 let cwd = cwd.as_ref();
203 let cwd = cwd.as_ref();
206 let name = name.as_ref();
204 let name = name.as_ref();
207
205
208 let name = if !name.is_absolute() {
206 let name = if !name.is_absolute() {
209 root.join(&cwd).join(&name)
207 root.join(&cwd).join(&name)
210 } else {
208 } else {
211 name.to_owned()
209 name.to_owned()
212 };
210 };
213 let auditor = PathAuditor::new(&root);
211 let auditor = PathAuditor::new(&root);
214 if name != root && name.starts_with(&root) {
212 if name != root && name.starts_with(&root) {
215 let name = name.strip_prefix(&root).unwrap();
213 let name = name.strip_prefix(&root).unwrap();
216 auditor.audit_path(path_to_hg_path_buf(name)?)?;
214 auditor.audit_path(path_to_hg_path_buf(name)?)?;
217 return Ok(name.to_owned());
215 Ok(name.to_owned())
218 } else if name == root {
216 } else if name == root {
219 return Ok("".into());
217 Ok("".into())
220 } else {
218 } else {
221 // Determine whether `name' is in the hierarchy at or beneath `root',
219 // Determine whether `name' is in the hierarchy at or beneath `root',
222 // by iterating name=name.parent() until it returns `None` (can't
220 // by iterating name=name.parent() until it returns `None` (can't
223 // check name == '/', because that doesn't work on windows).
221 // check name == '/', because that doesn't work on windows).
224 let mut name = name.deref();
222 let mut name = name.deref();
225 let original_name = name.to_owned();
223 let original_name = name.to_owned();
226 loop {
224 loop {
227 let same = is_same_file(&name, &root).unwrap_or(false);
225 let same = is_same_file(&name, &root).unwrap_or(false);
228 if same {
226 if same {
229 if name == original_name {
227 if name == original_name {
230 // `name` was actually the same as root (maybe a symlink)
228 // `name` was actually the same as root (maybe a symlink)
231 return Ok("".into());
229 return Ok("".into());
232 }
230 }
233 // `name` is a symlink to root, so `original_name` is under
231 // `name` is a symlink to root, so `original_name` is under
234 // root
232 // root
235 let rel_path = original_name.strip_prefix(&name).unwrap();
233 let rel_path = original_name.strip_prefix(&name).unwrap();
236 auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
234 auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
237 return Ok(rel_path.to_owned());
235 return Ok(rel_path.to_owned());
238 }
236 }
239 name = match name.parent() {
237 name = match name.parent() {
240 None => break,
238 None => break,
241 Some(p) => p,
239 Some(p) => p,
242 };
240 };
243 }
241 }
244 // TODO hint to the user about using --cwd
242 // TODO hint to the user about using --cwd
245 // Bubble up the responsibility to Python for now
243 // Bubble up the responsibility to Python for now
246 Err(HgPathError::NotUnderRoot {
244 Err(HgPathError::NotUnderRoot {
247 path: original_name.to_owned(),
245 path: original_name.to_owned(),
248 root: root.to_owned(),
246 root: root.to_owned(),
249 })
247 })
250 }
248 }
251 }
249 }
252
250
253 #[cfg(test)]
251 #[cfg(test)]
254 mod tests {
252 mod tests {
255 use super::*;
253 use super::*;
256 use pretty_assertions::assert_eq;
254 use pretty_assertions::assert_eq;
257
255
258 #[test]
256 #[test]
259 fn find_dirs_some() {
257 fn find_dirs_some() {
260 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
258 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
261 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
259 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
262 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
260 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
263 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
261 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
264 assert_eq!(dirs.next(), None);
262 assert_eq!(dirs.next(), None);
265 assert_eq!(dirs.next(), None);
263 assert_eq!(dirs.next(), None);
266 }
264 }
267
265
268 #[test]
266 #[test]
269 fn find_dirs_empty() {
267 fn find_dirs_empty() {
270 // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
268 // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
271 let mut dirs = super::find_dirs(HgPath::new(b""));
269 let mut dirs = super::find_dirs(HgPath::new(b""));
272 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
270 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
273 assert_eq!(dirs.next(), None);
271 assert_eq!(dirs.next(), None);
274 assert_eq!(dirs.next(), None);
272 assert_eq!(dirs.next(), None);
275 }
273 }
276
274
277 #[test]
275 #[test]
278 fn test_find_dirs_with_base_some() {
276 fn test_find_dirs_with_base_some() {
279 let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
277 let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
280 assert_eq!(
278 assert_eq!(
281 dirs.next(),
279 dirs.next(),
282 Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
280 Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
283 );
281 );
284 assert_eq!(
282 assert_eq!(
285 dirs.next(),
283 dirs.next(),
286 Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
284 Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
287 );
285 );
288 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
286 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
289 assert_eq!(dirs.next(), None);
287 assert_eq!(dirs.next(), None);
290 assert_eq!(dirs.next(), None);
288 assert_eq!(dirs.next(), None);
291 }
289 }
292
290
293 #[test]
291 #[test]
294 fn test_find_dirs_with_base_empty() {
292 fn test_find_dirs_with_base_empty() {
295 let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
293 let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
296 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
294 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
297 assert_eq!(dirs.next(), None);
295 assert_eq!(dirs.next(), None);
298 assert_eq!(dirs.next(), None);
296 assert_eq!(dirs.next(), None);
299 }
297 }
300
298
301 #[test]
299 #[test]
302 fn test_canonical_path() {
300 fn test_canonical_path() {
303 let root = Path::new("/repo");
301 let root = Path::new("/repo");
304 let cwd = Path::new("/dir");
302 let cwd = Path::new("/dir");
305 let name = Path::new("filename");
303 let name = Path::new("filename");
306 assert_eq!(
304 assert_eq!(
307 canonical_path(root, cwd, name),
305 canonical_path(root, cwd, name),
308 Err(HgPathError::NotUnderRoot {
306 Err(HgPathError::NotUnderRoot {
309 path: PathBuf::from("/dir/filename"),
307 path: PathBuf::from("/dir/filename"),
310 root: root.to_path_buf()
308 root: root.to_path_buf()
311 })
309 })
312 );
310 );
313
311
314 let root = Path::new("/repo");
312 let root = Path::new("/repo");
315 let cwd = Path::new("/");
313 let cwd = Path::new("/");
316 let name = Path::new("filename");
314 let name = Path::new("filename");
317 assert_eq!(
315 assert_eq!(
318 canonical_path(root, cwd, name),
316 canonical_path(root, cwd, name),
319 Err(HgPathError::NotUnderRoot {
317 Err(HgPathError::NotUnderRoot {
320 path: PathBuf::from("/filename"),
318 path: PathBuf::from("/filename"),
321 root: root.to_path_buf()
319 root: root.to_path_buf()
322 })
320 })
323 );
321 );
324
322
325 let root = Path::new("/repo");
323 let root = Path::new("/repo");
326 let cwd = Path::new("/");
324 let cwd = Path::new("/");
327 let name = Path::new("repo/filename");
325 let name = Path::new("repo/filename");
328 assert_eq!(
326 assert_eq!(
329 canonical_path(root, cwd, name),
327 canonical_path(root, cwd, name),
330 Ok(PathBuf::from("filename"))
328 Ok(PathBuf::from("filename"))
331 );
329 );
332
330
333 let root = Path::new("/repo");
331 let root = Path::new("/repo");
334 let cwd = Path::new("/repo");
332 let cwd = Path::new("/repo");
335 let name = Path::new("filename");
333 let name = Path::new("filename");
336 assert_eq!(
334 assert_eq!(
337 canonical_path(root, cwd, name),
335 canonical_path(root, cwd, name),
338 Ok(PathBuf::from("filename"))
336 Ok(PathBuf::from("filename"))
339 );
337 );
340
338
341 let root = Path::new("/repo");
339 let root = Path::new("/repo");
342 let cwd = Path::new("/repo/subdir");
340 let cwd = Path::new("/repo/subdir");
343 let name = Path::new("filename");
341 let name = Path::new("filename");
344 assert_eq!(
342 assert_eq!(
345 canonical_path(root, cwd, name),
343 canonical_path(root, cwd, name),
346 Ok(PathBuf::from("subdir/filename"))
344 Ok(PathBuf::from("subdir/filename"))
347 );
345 );
348 }
346 }
349
347
350 #[test]
348 #[test]
351 fn test_canonical_path_not_rooted() {
349 fn test_canonical_path_not_rooted() {
352 use std::fs::create_dir;
350 use std::fs::create_dir;
353 use tempfile::tempdir;
351 use tempfile::tempdir;
354
352
355 let base_dir = tempdir().unwrap();
353 let base_dir = tempdir().unwrap();
356 let base_dir_path = base_dir.path();
354 let base_dir_path = base_dir.path();
357 let beneath_repo = base_dir_path.join("a");
355 let beneath_repo = base_dir_path.join("a");
358 let root = base_dir_path.join("a/b");
356 let root = base_dir_path.join("a/b");
359 let out_of_repo = base_dir_path.join("c");
357 let out_of_repo = base_dir_path.join("c");
360 let under_repo_symlink = out_of_repo.join("d");
358 let under_repo_symlink = out_of_repo.join("d");
361
359
362 create_dir(&beneath_repo).unwrap();
360 create_dir(&beneath_repo).unwrap();
363 create_dir(&root).unwrap();
361 create_dir(&root).unwrap();
364
362
365 // TODO make portable
363 // TODO make portable
366 std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
364 std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
367
365
368 assert_eq!(
366 assert_eq!(
369 canonical_path(&root, Path::new(""), out_of_repo),
367 canonical_path(&root, Path::new(""), out_of_repo),
370 Ok(PathBuf::from(""))
368 Ok(PathBuf::from(""))
371 );
369 );
372 assert_eq!(
370 assert_eq!(
373 canonical_path(&root, Path::new(""), &beneath_repo),
371 canonical_path(&root, Path::new(""), &beneath_repo),
374 Err(HgPathError::NotUnderRoot {
372 Err(HgPathError::NotUnderRoot {
375 path: beneath_repo.to_owned(),
373 path: beneath_repo.to_owned(),
376 root: root.to_owned()
374 root: root.to_owned()
377 })
375 })
378 );
376 );
379 assert_eq!(
377 assert_eq!(
380 canonical_path(&root, Path::new(""), &under_repo_symlink),
378 canonical_path(&root, Path::new(""), &under_repo_symlink),
381 Ok(PathBuf::from("d"))
379 Ok(PathBuf::from("d"))
382 );
380 );
383 }
381 }
384 }
382 }
@@ -1,768 +1,765
1 // hg_path.rs
1 // hg_path.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use std::borrow::Borrow;
8 use std::borrow::Borrow;
9 use std::ffi::{OsStr, OsString};
9 use std::ffi::{OsStr, OsString};
10 use std::fmt;
10 use std::fmt;
11 use std::ops::Deref;
11 use std::ops::Deref;
12 use std::path::{Path, PathBuf};
12 use std::path::{Path, PathBuf};
13
13
14 #[derive(Debug, Eq, PartialEq)]
14 #[derive(Debug, Eq, PartialEq)]
15 pub enum HgPathError {
15 pub enum HgPathError {
16 /// Bytes from the invalid `HgPath`
16 /// Bytes from the invalid `HgPath`
17 LeadingSlash(Vec<u8>),
17 LeadingSlash(Vec<u8>),
18 ConsecutiveSlashes {
18 ConsecutiveSlashes {
19 bytes: Vec<u8>,
19 bytes: Vec<u8>,
20 second_slash_index: usize,
20 second_slash_index: usize,
21 },
21 },
22 ContainsNullByte {
22 ContainsNullByte {
23 bytes: Vec<u8>,
23 bytes: Vec<u8>,
24 null_byte_index: usize,
24 null_byte_index: usize,
25 },
25 },
26 /// Bytes
26 /// Bytes
27 DecodeError(Vec<u8>),
27 DecodeError(Vec<u8>),
28 /// The rest come from audit errors
28 /// The rest come from audit errors
29 EndsWithSlash(HgPathBuf),
29 EndsWithSlash(HgPathBuf),
30 ContainsIllegalComponent(HgPathBuf),
30 ContainsIllegalComponent(HgPathBuf),
31 /// Path is inside the `.hg` folder
31 /// Path is inside the `.hg` folder
32 InsideDotHg(HgPathBuf),
32 InsideDotHg(HgPathBuf),
33 IsInsideNestedRepo {
33 IsInsideNestedRepo {
34 path: HgPathBuf,
34 path: HgPathBuf,
35 nested_repo: HgPathBuf,
35 nested_repo: HgPathBuf,
36 },
36 },
37 TraversesSymbolicLink {
37 TraversesSymbolicLink {
38 path: HgPathBuf,
38 path: HgPathBuf,
39 symlink: HgPathBuf,
39 symlink: HgPathBuf,
40 },
40 },
41 NotFsCompliant(HgPathBuf),
41 NotFsCompliant(HgPathBuf),
42 /// `path` is the smallest invalid path
42 /// `path` is the smallest invalid path
43 NotUnderRoot {
43 NotUnderRoot {
44 path: PathBuf,
44 path: PathBuf,
45 root: PathBuf,
45 root: PathBuf,
46 },
46 },
47 }
47 }
48
48
49 impl ToString for HgPathError {
49 impl ToString for HgPathError {
50 fn to_string(&self) -> String {
50 fn to_string(&self) -> String {
51 match self {
51 match self {
52 HgPathError::LeadingSlash(bytes) => {
52 HgPathError::LeadingSlash(bytes) => {
53 format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
53 format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
54 }
54 }
55 HgPathError::ConsecutiveSlashes {
55 HgPathError::ConsecutiveSlashes {
56 bytes,
56 bytes,
57 second_slash_index: pos,
57 second_slash_index: pos,
58 } => format!(
58 } => format!(
59 "Invalid HgPath '{:?}': consecutive slashes at pos {}.",
59 "Invalid HgPath '{:?}': consecutive slashes at pos {}.",
60 bytes, pos
60 bytes, pos
61 ),
61 ),
62 HgPathError::ContainsNullByte {
62 HgPathError::ContainsNullByte {
63 bytes,
63 bytes,
64 null_byte_index: pos,
64 null_byte_index: pos,
65 } => format!(
65 } => format!(
66 "Invalid HgPath '{:?}': contains null byte at pos {}.",
66 "Invalid HgPath '{:?}': contains null byte at pos {}.",
67 bytes, pos
67 bytes, pos
68 ),
68 ),
69 HgPathError::DecodeError(bytes) => {
69 HgPathError::DecodeError(bytes) => {
70 format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
70 format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
71 }
71 }
72 HgPathError::EndsWithSlash(path) => {
72 HgPathError::EndsWithSlash(path) => {
73 format!("Audit failed for '{}': ends with a slash.", path)
73 format!("Audit failed for '{}': ends with a slash.", path)
74 }
74 }
75 HgPathError::ContainsIllegalComponent(path) => format!(
75 HgPathError::ContainsIllegalComponent(path) => format!(
76 "Audit failed for '{}': contains an illegal component.",
76 "Audit failed for '{}': contains an illegal component.",
77 path
77 path
78 ),
78 ),
79 HgPathError::InsideDotHg(path) => format!(
79 HgPathError::InsideDotHg(path) => format!(
80 "Audit failed for '{}': is inside the '.hg' folder.",
80 "Audit failed for '{}': is inside the '.hg' folder.",
81 path
81 path
82 ),
82 ),
83 HgPathError::IsInsideNestedRepo {
83 HgPathError::IsInsideNestedRepo {
84 path,
84 path,
85 nested_repo: nested,
85 nested_repo: nested,
86 } => format!(
86 } => format!(
87 "Audit failed for '{}': is inside a nested repository '{}'.",
87 "Audit failed for '{}': is inside a nested repository '{}'.",
88 path, nested
88 path, nested
89 ),
89 ),
90 HgPathError::TraversesSymbolicLink { path, symlink } => format!(
90 HgPathError::TraversesSymbolicLink { path, symlink } => format!(
91 "Audit failed for '{}': traverses symbolic link '{}'.",
91 "Audit failed for '{}': traverses symbolic link '{}'.",
92 path, symlink
92 path, symlink
93 ),
93 ),
94 HgPathError::NotFsCompliant(path) => format!(
94 HgPathError::NotFsCompliant(path) => format!(
95 "Audit failed for '{}': cannot be turned into a \
95 "Audit failed for '{}': cannot be turned into a \
96 filesystem path.",
96 filesystem path.",
97 path
97 path
98 ),
98 ),
99 HgPathError::NotUnderRoot { path, root } => format!(
99 HgPathError::NotUnderRoot { path, root } => format!(
100 "Audit failed for '{}': not under root {}.",
100 "Audit failed for '{}': not under root {}.",
101 path.display(),
101 path.display(),
102 root.display()
102 root.display()
103 ),
103 ),
104 }
104 }
105 }
105 }
106 }
106 }
107
107
108 impl From<HgPathError> for std::io::Error {
108 impl From<HgPathError> for std::io::Error {
109 fn from(e: HgPathError) -> Self {
109 fn from(e: HgPathError) -> Self {
110 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
110 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
111 }
111 }
112 }
112 }
113
113
114 /// This is a repository-relative path (or canonical path):
114 /// This is a repository-relative path (or canonical path):
115 /// - no null characters
115 /// - no null characters
116 /// - `/` separates directories
116 /// - `/` separates directories
117 /// - no consecutive slashes
117 /// - no consecutive slashes
118 /// - no leading slash,
118 /// - no leading slash,
119 /// - no `.` nor `..` of special meaning
119 /// - no `.` nor `..` of special meaning
120 /// - stored in repository and shared across platforms
120 /// - stored in repository and shared across platforms
121 ///
121 ///
122 /// Note: there is no guarantee of any `HgPath` being well-formed at any point
122 /// Note: there is no guarantee of any `HgPath` being well-formed at any point
123 /// in its lifetime for performance reasons and to ease ergonomics. It is
123 /// in its lifetime for performance reasons and to ease ergonomics. It is
124 /// however checked using the `check_state` method before any file-system
124 /// however checked using the `check_state` method before any file-system
125 /// operation.
125 /// operation.
126 ///
126 ///
127 /// This allows us to be encoding-transparent as much as possible, until really
127 /// This allows us to be encoding-transparent as much as possible, until really
128 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
128 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
129 /// or `Path`) whenever more complex operations are needed:
129 /// or `Path`) whenever more complex operations are needed:
130 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
130 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
131 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
131 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
132 /// character encoding will be determined on a per-repository basis.
132 /// character encoding will be determined on a per-repository basis.
133 //
133 //
134 // FIXME: (adapted from a comment in the stdlib)
134 // FIXME: (adapted from a comment in the stdlib)
135 // `HgPath::new()` current implementation relies on `Slice` being
135 // `HgPath::new()` current implementation relies on `Slice` being
136 // layout-compatible with `[u8]`.
136 // layout-compatible with `[u8]`.
137 // When attribute privacy is implemented, `Slice` should be annotated as
137 // When attribute privacy is implemented, `Slice` should be annotated as
138 // `#[repr(transparent)]`.
138 // `#[repr(transparent)]`.
139 // Anyway, `Slice` representation and layout are considered implementation
139 // Anyway, `Slice` representation and layout are considered implementation
140 // detail, are not documented and must not be relied upon.
140 // detail, are not documented and must not be relied upon.
141 #[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
141 #[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
142 pub struct HgPath {
142 pub struct HgPath {
143 inner: [u8],
143 inner: [u8],
144 }
144 }
145
145
146 impl HgPath {
146 impl HgPath {
147 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
147 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
148 unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
148 unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
149 }
149 }
150 pub fn is_empty(&self) -> bool {
150 pub fn is_empty(&self) -> bool {
151 self.inner.is_empty()
151 self.inner.is_empty()
152 }
152 }
153 pub fn len(&self) -> usize {
153 pub fn len(&self) -> usize {
154 self.inner.len()
154 self.inner.len()
155 }
155 }
156 fn to_hg_path_buf(&self) -> HgPathBuf {
156 fn to_hg_path_buf(&self) -> HgPathBuf {
157 HgPathBuf {
157 HgPathBuf {
158 inner: self.inner.to_owned(),
158 inner: self.inner.to_owned(),
159 }
159 }
160 }
160 }
161 pub fn bytes(&self) -> std::slice::Iter<u8> {
161 pub fn bytes(&self) -> std::slice::Iter<u8> {
162 self.inner.iter()
162 self.inner.iter()
163 }
163 }
164 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
164 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
165 HgPathBuf::from(self.inner.to_ascii_uppercase())
165 HgPathBuf::from(self.inner.to_ascii_uppercase())
166 }
166 }
167 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
167 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
168 HgPathBuf::from(self.inner.to_ascii_lowercase())
168 HgPathBuf::from(self.inner.to_ascii_lowercase())
169 }
169 }
170 pub fn as_bytes(&self) -> &[u8] {
170 pub fn as_bytes(&self) -> &[u8] {
171 &self.inner
171 &self.inner
172 }
172 }
173 pub fn contains(&self, other: u8) -> bool {
173 pub fn contains(&self, other: u8) -> bool {
174 self.inner.contains(&other)
174 self.inner.contains(&other)
175 }
175 }
176 pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
176 pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
177 self.inner.starts_with(needle.as_ref().as_bytes())
177 self.inner.starts_with(needle.as_ref().as_bytes())
178 }
178 }
179 pub fn trim_trailing_slash(&self) -> &Self {
179 pub fn trim_trailing_slash(&self) -> &Self {
180 Self::new(if self.inner.last() == Some(&b'/') {
180 Self::new(if self.inner.last() == Some(&b'/') {
181 &self.inner[..self.inner.len() - 1]
181 &self.inner[..self.inner.len() - 1]
182 } else {
182 } else {
183 &self.inner[..]
183 &self.inner[..]
184 })
184 })
185 }
185 }
186 /// Returns a tuple of slices `(base, filename)` resulting from the split
186 /// Returns a tuple of slices `(base, filename)` resulting from the split
187 /// at the rightmost `/`, if any.
187 /// at the rightmost `/`, if any.
188 ///
188 ///
189 /// # Examples:
189 /// # Examples:
190 ///
190 ///
191 /// ```
191 /// ```
192 /// use hg::utils::hg_path::HgPath;
192 /// use hg::utils::hg_path::HgPath;
193 ///
193 ///
194 /// let path = HgPath::new(b"cool/hg/path").split_filename();
194 /// let path = HgPath::new(b"cool/hg/path").split_filename();
195 /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
195 /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
196 ///
196 ///
197 /// let path = HgPath::new(b"pathwithoutsep").split_filename();
197 /// let path = HgPath::new(b"pathwithoutsep").split_filename();
198 /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
198 /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
199 /// ```
199 /// ```
200 pub fn split_filename(&self) -> (&Self, &Self) {
200 pub fn split_filename(&self) -> (&Self, &Self) {
201 match &self.inner.iter().rposition(|c| *c == b'/') {
201 match &self.inner.iter().rposition(|c| *c == b'/') {
202 None => (HgPath::new(""), &self),
202 None => (HgPath::new(""), &self),
203 Some(size) => (
203 Some(size) => (
204 HgPath::new(&self.inner[..*size]),
204 HgPath::new(&self.inner[..*size]),
205 HgPath::new(&self.inner[*size + 1..]),
205 HgPath::new(&self.inner[*size + 1..]),
206 ),
206 ),
207 }
207 }
208 }
208 }
209 pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
209 pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
210 let mut inner = self.inner.to_owned();
210 let mut inner = self.inner.to_owned();
211 if inner.len() != 0 && inner.last() != Some(&b'/') {
211 if !inner.is_empty() && inner.last() != Some(&b'/') {
212 inner.push(b'/');
212 inner.push(b'/');
213 }
213 }
214 inner.extend(other.as_ref().bytes());
214 inner.extend(other.as_ref().bytes());
215 HgPathBuf::from_bytes(&inner)
215 HgPathBuf::from_bytes(&inner)
216 }
216 }
217 pub fn parent(&self) -> &Self {
217 pub fn parent(&self) -> &Self {
218 let inner = self.as_bytes();
218 let inner = self.as_bytes();
219 HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
219 HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
220 Some(pos) => &inner[..pos],
220 Some(pos) => &inner[..pos],
221 None => &[],
221 None => &[],
222 })
222 })
223 }
223 }
224 /// Given a base directory, returns the slice of `self` relative to the
224 /// Given a base directory, returns the slice of `self` relative to the
225 /// base directory. If `base` is not a directory (does not end with a
225 /// base directory. If `base` is not a directory (does not end with a
226 /// `b'/'`), returns `None`.
226 /// `b'/'`), returns `None`.
227 pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
227 pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
228 let base = base.as_ref();
228 let base = base.as_ref();
229 if base.is_empty() {
229 if base.is_empty() {
230 return Some(self);
230 return Some(self);
231 }
231 }
232 let is_dir = base.as_bytes().ends_with(b"/");
232 let is_dir = base.as_bytes().ends_with(b"/");
233 if is_dir && self.starts_with(base) {
233 if is_dir && self.starts_with(base) {
234 Some(Self::new(&self.inner[base.len()..]))
234 Some(Self::new(&self.inner[base.len()..]))
235 } else {
235 } else {
236 None
236 None
237 }
237 }
238 }
238 }
239
239
240 #[cfg(windows)]
240 #[cfg(windows)]
241 /// Copied from the Python stdlib's `os.path.splitdrive` implementation.
241 /// Copied from the Python stdlib's `os.path.splitdrive` implementation.
242 ///
242 ///
243 /// Split a pathname into drive/UNC sharepoint and relative path
243 /// Split a pathname into drive/UNC sharepoint and relative path
244 /// specifiers. Returns a 2-tuple (drive_or_unc, path); either part may
244 /// specifiers. Returns a 2-tuple (drive_or_unc, path); either part may
245 /// be empty.
245 /// be empty.
246 ///
246 ///
247 /// If you assign
247 /// If you assign
248 /// result = split_drive(p)
248 /// result = split_drive(p)
249 /// It is always true that:
249 /// It is always true that:
250 /// result[0] + result[1] == p
250 /// result[0] + result[1] == p
251 ///
251 ///
252 /// If the path contained a drive letter, drive_or_unc will contain
252 /// If the path contained a drive letter, drive_or_unc will contain
253 /// everything up to and including the colon.
253 /// everything up to and including the colon.
254 /// e.g. split_drive("c:/dir") returns ("c:", "/dir")
254 /// e.g. split_drive("c:/dir") returns ("c:", "/dir")
255 ///
255 ///
256 /// If the path contained a UNC path, the drive_or_unc will contain the
256 /// If the path contained a UNC path, the drive_or_unc will contain the
257 /// host name and share up to but not including the fourth directory
257 /// host name and share up to but not including the fourth directory
258 /// separator character.
258 /// separator character.
259 /// e.g. split_drive("//host/computer/dir") returns ("//host/computer",
259 /// e.g. split_drive("//host/computer/dir") returns ("//host/computer",
260 /// "/dir")
260 /// "/dir")
261 ///
261 ///
262 /// Paths cannot contain both a drive letter and a UNC path.
262 /// Paths cannot contain both a drive letter and a UNC path.
263 pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
263 pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
264 let bytes = self.as_bytes();
264 let bytes = self.as_bytes();
265 let is_sep = |b| std::path::is_separator(b as char);
265 let is_sep = |b| std::path::is_separator(b as char);
266
266
267 if self.len() < 2 {
267 if self.len() < 2 {
268 (HgPath::new(b""), &self)
268 (HgPath::new(b""), &self)
269 } else if is_sep(bytes[0])
269 } else if is_sep(bytes[0])
270 && is_sep(bytes[1])
270 && is_sep(bytes[1])
271 && (self.len() == 2 || !is_sep(bytes[2]))
271 && (self.len() == 2 || !is_sep(bytes[2]))
272 {
272 {
273 // Is a UNC path:
273 // Is a UNC path:
274 // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
274 // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
275 // \\machine\mountpoint\directory\etc\...
275 // \\machine\mountpoint\directory\etc\...
276 // directory ^^^^^^^^^^^^^^^
276 // directory ^^^^^^^^^^^^^^^
277
277
278 let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
278 let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
279 let mountpoint_start_index = if let Some(i) = machine_end_index {
279 let mountpoint_start_index = if let Some(i) = machine_end_index {
280 i + 2
280 i + 2
281 } else {
281 } else {
282 return (HgPath::new(b""), &self);
282 return (HgPath::new(b""), &self);
283 };
283 };
284
284
285 match bytes[mountpoint_start_index + 1..]
285 match bytes[mountpoint_start_index + 1..]
286 .iter()
286 .iter()
287 .position(|b| is_sep(*b))
287 .position(|b| is_sep(*b))
288 {
288 {
289 // A UNC path can't have two slashes in a row
289 // A UNC path can't have two slashes in a row
290 // (after the initial two)
290 // (after the initial two)
291 Some(0) => (HgPath::new(b""), &self),
291 Some(0) => (HgPath::new(b""), &self),
292 Some(i) => {
292 Some(i) => {
293 let (a, b) =
293 let (a, b) =
294 bytes.split_at(mountpoint_start_index + 1 + i);
294 bytes.split_at(mountpoint_start_index + 1 + i);
295 (HgPath::new(a), HgPath::new(b))
295 (HgPath::new(a), HgPath::new(b))
296 }
296 }
297 None => (&self, HgPath::new(b"")),
297 None => (&self, HgPath::new(b"")),
298 }
298 }
299 } else if bytes[1] == b':' {
299 } else if bytes[1] == b':' {
300 // Drive path c:\directory
300 // Drive path c:\directory
301 let (a, b) = bytes.split_at(2);
301 let (a, b) = bytes.split_at(2);
302 (HgPath::new(a), HgPath::new(b))
302 (HgPath::new(a), HgPath::new(b))
303 } else {
303 } else {
304 (HgPath::new(b""), &self)
304 (HgPath::new(b""), &self)
305 }
305 }
306 }
306 }
307
307
308 #[cfg(unix)]
308 #[cfg(unix)]
309 /// Split a pathname into drive and path. On Posix, drive is always empty.
309 /// Split a pathname into drive and path. On Posix, drive is always empty.
310 pub fn split_drive(&self) -> (&HgPath, &HgPath) {
310 pub fn split_drive(&self) -> (&HgPath, &HgPath) {
311 (HgPath::new(b""), &self)
311 (HgPath::new(b""), &self)
312 }
312 }
313
313
314 /// Checks for errors in the path, short-circuiting at the first one.
314 /// Checks for errors in the path, short-circuiting at the first one.
315 /// This generates fine-grained errors useful for debugging.
315 /// This generates fine-grained errors useful for debugging.
316 /// To simply check if the path is valid during tests, use `is_valid`.
316 /// To simply check if the path is valid during tests, use `is_valid`.
317 pub fn check_state(&self) -> Result<(), HgPathError> {
317 pub fn check_state(&self) -> Result<(), HgPathError> {
318 if self.len() == 0 {
318 if self.is_empty() {
319 return Ok(());
319 return Ok(());
320 }
320 }
321 let bytes = self.as_bytes();
321 let bytes = self.as_bytes();
322 let mut previous_byte = None;
322 let mut previous_byte = None;
323
323
324 if bytes[0] == b'/' {
324 if bytes[0] == b'/' {
325 return Err(HgPathError::LeadingSlash(bytes.to_vec()));
325 return Err(HgPathError::LeadingSlash(bytes.to_vec()));
326 }
326 }
327 for (index, byte) in bytes.iter().enumerate() {
327 for (index, byte) in bytes.iter().enumerate() {
328 match byte {
328 match byte {
329 0 => {
329 0 => {
330 return Err(HgPathError::ContainsNullByte {
330 return Err(HgPathError::ContainsNullByte {
331 bytes: bytes.to_vec(),
331 bytes: bytes.to_vec(),
332 null_byte_index: index,
332 null_byte_index: index,
333 })
333 })
334 }
334 }
335 b'/' => {
335 b'/' => {
336 if previous_byte.is_some() && previous_byte == Some(b'/') {
336 if previous_byte.is_some() && previous_byte == Some(b'/') {
337 return Err(HgPathError::ConsecutiveSlashes {
337 return Err(HgPathError::ConsecutiveSlashes {
338 bytes: bytes.to_vec(),
338 bytes: bytes.to_vec(),
339 second_slash_index: index,
339 second_slash_index: index,
340 });
340 });
341 }
341 }
342 }
342 }
343 _ => (),
343 _ => (),
344 };
344 };
345 previous_byte = Some(*byte);
345 previous_byte = Some(*byte);
346 }
346 }
347 Ok(())
347 Ok(())
348 }
348 }
349
349
350 #[cfg(test)]
350 #[cfg(test)]
351 /// Only usable during tests to force developers to handle invalid states
351 /// Only usable during tests to force developers to handle invalid states
352 fn is_valid(&self) -> bool {
352 fn is_valid(&self) -> bool {
353 self.check_state().is_ok()
353 self.check_state().is_ok()
354 }
354 }
355 }
355 }
356
356
357 impl fmt::Debug for HgPath {
357 impl fmt::Debug for HgPath {
358 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
358 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
359 write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
359 write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
360 }
360 }
361 }
361 }
362
362
363 impl fmt::Display for HgPath {
363 impl fmt::Display for HgPath {
364 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
364 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
365 write!(f, "{}", String::from_utf8_lossy(&self.inner))
365 write!(f, "{}", String::from_utf8_lossy(&self.inner))
366 }
366 }
367 }
367 }
368
368
369 #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
369 #[derive(Default, Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
370 pub struct HgPathBuf {
370 pub struct HgPathBuf {
371 inner: Vec<u8>,
371 inner: Vec<u8>,
372 }
372 }
373
373
374 impl HgPathBuf {
374 impl HgPathBuf {
375 pub fn new() -> Self {
375 pub fn new() -> Self {
376 Self { inner: Vec::new() }
376 Default::default()
377 }
377 }
378 pub fn push(&mut self, byte: u8) {
378 pub fn push(&mut self, byte: u8) {
379 self.inner.push(byte);
379 self.inner.push(byte);
380 }
380 }
381 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
381 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
382 HgPath::new(s).to_owned()
382 HgPath::new(s).to_owned()
383 }
383 }
384 pub fn into_vec(self) -> Vec<u8> {
384 pub fn into_vec(self) -> Vec<u8> {
385 self.inner
385 self.inner
386 }
386 }
387 pub fn as_ref(&self) -> &[u8] {
388 self.inner.as_ref()
389 }
390 }
387 }
391
388
392 impl fmt::Debug for HgPathBuf {
389 impl fmt::Debug for HgPathBuf {
393 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
390 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
394 write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
391 write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
395 }
392 }
396 }
393 }
397
394
398 impl fmt::Display for HgPathBuf {
395 impl fmt::Display for HgPathBuf {
399 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
396 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
400 write!(f, "{}", String::from_utf8_lossy(&self.inner))
397 write!(f, "{}", String::from_utf8_lossy(&self.inner))
401 }
398 }
402 }
399 }
403
400
404 impl Deref for HgPathBuf {
401 impl Deref for HgPathBuf {
405 type Target = HgPath;
402 type Target = HgPath;
406
403
407 #[inline]
404 #[inline]
408 fn deref(&self) -> &HgPath {
405 fn deref(&self) -> &HgPath {
409 &HgPath::new(&self.inner)
406 &HgPath::new(&self.inner)
410 }
407 }
411 }
408 }
412
409
413 impl From<Vec<u8>> for HgPathBuf {
410 impl From<Vec<u8>> for HgPathBuf {
414 fn from(vec: Vec<u8>) -> Self {
411 fn from(vec: Vec<u8>) -> Self {
415 Self { inner: vec }
412 Self { inner: vec }
416 }
413 }
417 }
414 }
418
415
419 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
416 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
420 fn from(s: &T) -> HgPathBuf {
417 fn from(s: &T) -> HgPathBuf {
421 s.as_ref().to_owned()
418 s.as_ref().to_owned()
422 }
419 }
423 }
420 }
424
421
425 impl Into<Vec<u8>> for HgPathBuf {
422 impl Into<Vec<u8>> for HgPathBuf {
426 fn into(self) -> Vec<u8> {
423 fn into(self) -> Vec<u8> {
427 self.inner
424 self.inner
428 }
425 }
429 }
426 }
430
427
431 impl Borrow<HgPath> for HgPathBuf {
428 impl Borrow<HgPath> for HgPathBuf {
432 fn borrow(&self) -> &HgPath {
429 fn borrow(&self) -> &HgPath {
433 &HgPath::new(self.as_bytes())
430 &HgPath::new(self.as_bytes())
434 }
431 }
435 }
432 }
436
433
437 impl ToOwned for HgPath {
434 impl ToOwned for HgPath {
438 type Owned = HgPathBuf;
435 type Owned = HgPathBuf;
439
436
440 fn to_owned(&self) -> HgPathBuf {
437 fn to_owned(&self) -> HgPathBuf {
441 self.to_hg_path_buf()
438 self.to_hg_path_buf()
442 }
439 }
443 }
440 }
444
441
445 impl AsRef<HgPath> for HgPath {
442 impl AsRef<HgPath> for HgPath {
446 fn as_ref(&self) -> &HgPath {
443 fn as_ref(&self) -> &HgPath {
447 self
444 self
448 }
445 }
449 }
446 }
450
447
451 impl AsRef<HgPath> for HgPathBuf {
448 impl AsRef<HgPath> for HgPathBuf {
452 fn as_ref(&self) -> &HgPath {
449 fn as_ref(&self) -> &HgPath {
453 self
450 self
454 }
451 }
455 }
452 }
456
453
457 impl Extend<u8> for HgPathBuf {
454 impl Extend<u8> for HgPathBuf {
458 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
455 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
459 self.inner.extend(iter);
456 self.inner.extend(iter);
460 }
457 }
461 }
458 }
462
459
463 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
460 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
464 /// implemented, these conversion utils will have to work differently depending
461 /// implemented, these conversion utils will have to work differently depending
465 /// on the repository encoding: either `UTF-8` or `MBCS`.
462 /// on the repository encoding: either `UTF-8` or `MBCS`.
466
463
467 pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
464 pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
468 hg_path: P,
465 hg_path: P,
469 ) -> Result<OsString, HgPathError> {
466 ) -> Result<OsString, HgPathError> {
470 hg_path.as_ref().check_state()?;
467 hg_path.as_ref().check_state()?;
471 let os_str;
468 let os_str;
472 #[cfg(unix)]
469 #[cfg(unix)]
473 {
470 {
474 use std::os::unix::ffi::OsStrExt;
471 use std::os::unix::ffi::OsStrExt;
475 os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
472 os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
476 }
473 }
477 // TODO Handle other platforms
474 // TODO Handle other platforms
478 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
475 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
479 Ok(os_str.to_os_string())
476 Ok(os_str.to_os_string())
480 }
477 }
481
478
482 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
479 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
483 hg_path: P,
480 hg_path: P,
484 ) -> Result<PathBuf, HgPathError> {
481 ) -> Result<PathBuf, HgPathError> {
485 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
482 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
486 }
483 }
487
484
488 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
485 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
489 os_string: S,
486 os_string: S,
490 ) -> Result<HgPathBuf, HgPathError> {
487 ) -> Result<HgPathBuf, HgPathError> {
491 let buf;
488 let buf;
492 #[cfg(unix)]
489 #[cfg(unix)]
493 {
490 {
494 use std::os::unix::ffi::OsStrExt;
491 use std::os::unix::ffi::OsStrExt;
495 buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
492 buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
496 }
493 }
497 // TODO Handle other platforms
494 // TODO Handle other platforms
498 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
495 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
499
496
500 buf.check_state()?;
497 buf.check_state()?;
501 Ok(buf)
498 Ok(buf)
502 }
499 }
503
500
504 pub fn path_to_hg_path_buf<P: AsRef<Path>>(
501 pub fn path_to_hg_path_buf<P: AsRef<Path>>(
505 path: P,
502 path: P,
506 ) -> Result<HgPathBuf, HgPathError> {
503 ) -> Result<HgPathBuf, HgPathError> {
507 let buf;
504 let buf;
508 let os_str = path.as_ref().as_os_str();
505 let os_str = path.as_ref().as_os_str();
509 #[cfg(unix)]
506 #[cfg(unix)]
510 {
507 {
511 use std::os::unix::ffi::OsStrExt;
508 use std::os::unix::ffi::OsStrExt;
512 buf = HgPathBuf::from_bytes(&os_str.as_bytes());
509 buf = HgPathBuf::from_bytes(&os_str.as_bytes());
513 }
510 }
514 // TODO Handle other platforms
511 // TODO Handle other platforms
515 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
512 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
516
513
517 buf.check_state()?;
514 buf.check_state()?;
518 Ok(buf)
515 Ok(buf)
519 }
516 }
520
517
521 #[cfg(test)]
518 #[cfg(test)]
522 mod tests {
519 mod tests {
523 use super::*;
520 use super::*;
524 use pretty_assertions::assert_eq;
521 use pretty_assertions::assert_eq;
525
522
526 #[test]
523 #[test]
527 fn test_path_states() {
524 fn test_path_states() {
528 assert_eq!(
525 assert_eq!(
529 Err(HgPathError::LeadingSlash(b"/".to_vec())),
526 Err(HgPathError::LeadingSlash(b"/".to_vec())),
530 HgPath::new(b"/").check_state()
527 HgPath::new(b"/").check_state()
531 );
528 );
532 assert_eq!(
529 assert_eq!(
533 Err(HgPathError::ConsecutiveSlashes {
530 Err(HgPathError::ConsecutiveSlashes {
534 bytes: b"a/b//c".to_vec(),
531 bytes: b"a/b//c".to_vec(),
535 second_slash_index: 4
532 second_slash_index: 4
536 }),
533 }),
537 HgPath::new(b"a/b//c").check_state()
534 HgPath::new(b"a/b//c").check_state()
538 );
535 );
539 assert_eq!(
536 assert_eq!(
540 Err(HgPathError::ContainsNullByte {
537 Err(HgPathError::ContainsNullByte {
541 bytes: b"a/b/\0c".to_vec(),
538 bytes: b"a/b/\0c".to_vec(),
542 null_byte_index: 4
539 null_byte_index: 4
543 }),
540 }),
544 HgPath::new(b"a/b/\0c").check_state()
541 HgPath::new(b"a/b/\0c").check_state()
545 );
542 );
546 // TODO test HgPathError::DecodeError for the Windows implementation.
543 // TODO test HgPathError::DecodeError for the Windows implementation.
547 assert_eq!(true, HgPath::new(b"").is_valid());
544 assert_eq!(true, HgPath::new(b"").is_valid());
548 assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
545 assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
549 // Backslashes in paths are not significant, but allowed
546 // Backslashes in paths are not significant, but allowed
550 assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
547 assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
551 // Dots in paths are not significant, but allowed
548 // Dots in paths are not significant, but allowed
552 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
549 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
553 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
550 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
554 }
551 }
555
552
556 #[test]
553 #[test]
557 fn test_iter() {
554 fn test_iter() {
558 let path = HgPath::new(b"a");
555 let path = HgPath::new(b"a");
559 let mut iter = path.bytes();
556 let mut iter = path.bytes();
560 assert_eq!(Some(&b'a'), iter.next());
557 assert_eq!(Some(&b'a'), iter.next());
561 assert_eq!(None, iter.next_back());
558 assert_eq!(None, iter.next_back());
562 assert_eq!(None, iter.next());
559 assert_eq!(None, iter.next());
563
560
564 let path = HgPath::new(b"a");
561 let path = HgPath::new(b"a");
565 let mut iter = path.bytes();
562 let mut iter = path.bytes();
566 assert_eq!(Some(&b'a'), iter.next_back());
563 assert_eq!(Some(&b'a'), iter.next_back());
567 assert_eq!(None, iter.next_back());
564 assert_eq!(None, iter.next_back());
568 assert_eq!(None, iter.next());
565 assert_eq!(None, iter.next());
569
566
570 let path = HgPath::new(b"abc");
567 let path = HgPath::new(b"abc");
571 let mut iter = path.bytes();
568 let mut iter = path.bytes();
572 assert_eq!(Some(&b'a'), iter.next());
569 assert_eq!(Some(&b'a'), iter.next());
573 assert_eq!(Some(&b'c'), iter.next_back());
570 assert_eq!(Some(&b'c'), iter.next_back());
574 assert_eq!(Some(&b'b'), iter.next_back());
571 assert_eq!(Some(&b'b'), iter.next_back());
575 assert_eq!(None, iter.next_back());
572 assert_eq!(None, iter.next_back());
576 assert_eq!(None, iter.next());
573 assert_eq!(None, iter.next());
577
574
578 let path = HgPath::new(b"abc");
575 let path = HgPath::new(b"abc");
579 let mut iter = path.bytes();
576 let mut iter = path.bytes();
580 assert_eq!(Some(&b'a'), iter.next());
577 assert_eq!(Some(&b'a'), iter.next());
581 assert_eq!(Some(&b'b'), iter.next());
578 assert_eq!(Some(&b'b'), iter.next());
582 assert_eq!(Some(&b'c'), iter.next());
579 assert_eq!(Some(&b'c'), iter.next());
583 assert_eq!(None, iter.next_back());
580 assert_eq!(None, iter.next_back());
584 assert_eq!(None, iter.next());
581 assert_eq!(None, iter.next());
585
582
586 let path = HgPath::new(b"abc");
583 let path = HgPath::new(b"abc");
587 let iter = path.bytes();
584 let iter = path.bytes();
588 let mut vec = Vec::new();
585 let mut vec = Vec::new();
589 vec.extend(iter);
586 vec.extend(iter);
590 assert_eq!(vec![b'a', b'b', b'c'], vec);
587 assert_eq!(vec![b'a', b'b', b'c'], vec);
591
588
592 let path = HgPath::new(b"abc");
589 let path = HgPath::new(b"abc");
593 let mut iter = path.bytes();
590 let mut iter = path.bytes();
594 assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
591 assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
595
592
596 let path = HgPath::new(b"abc");
593 let path = HgPath::new(b"abc");
597 let mut iter = path.bytes();
594 let mut iter = path.bytes();
598 assert_eq!(None, iter.rposition(|c| *c == b'd'));
595 assert_eq!(None, iter.rposition(|c| *c == b'd'));
599 }
596 }
600
597
601 #[test]
598 #[test]
602 fn test_join() {
599 fn test_join() {
603 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
600 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
604 assert_eq!(b"a/b", path.as_bytes());
601 assert_eq!(b"a/b", path.as_bytes());
605
602
606 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
603 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
607 assert_eq!(b"a/b/c", path.as_bytes());
604 assert_eq!(b"a/b/c", path.as_bytes());
608
605
609 // No leading slash if empty before join
606 // No leading slash if empty before join
610 let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
607 let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
611 assert_eq!(b"b/c", path.as_bytes());
608 assert_eq!(b"b/c", path.as_bytes());
612
609
613 // The leading slash is an invalid representation of an `HgPath`, but
610 // The leading slash is an invalid representation of an `HgPath`, but
614 // it can happen. This creates another invalid representation of
611 // it can happen. This creates another invalid representation of
615 // consecutive bytes.
612 // consecutive bytes.
616 // TODO What should be done in this case? Should we silently remove
613 // TODO What should be done in this case? Should we silently remove
617 // the extra slash? Should we change the signature to a problematic
614 // the extra slash? Should we change the signature to a problematic
618 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
615 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
619 // let the error happen upon filesystem interaction?
616 // let the error happen upon filesystem interaction?
620 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
617 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
621 assert_eq!(b"a//b", path.as_bytes());
618 assert_eq!(b"a//b", path.as_bytes());
622 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
619 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
623 assert_eq!(b"a//b", path.as_bytes());
620 assert_eq!(b"a//b", path.as_bytes());
624 }
621 }
625
622
626 #[test]
623 #[test]
627 fn test_relative_to() {
624 fn test_relative_to() {
628 let path = HgPath::new(b"");
625 let path = HgPath::new(b"");
629 let base = HgPath::new(b"");
626 let base = HgPath::new(b"");
630 assert_eq!(Some(path), path.relative_to(base));
627 assert_eq!(Some(path), path.relative_to(base));
631
628
632 let path = HgPath::new(b"path");
629 let path = HgPath::new(b"path");
633 let base = HgPath::new(b"");
630 let base = HgPath::new(b"");
634 assert_eq!(Some(path), path.relative_to(base));
631 assert_eq!(Some(path), path.relative_to(base));
635
632
636 let path = HgPath::new(b"a");
633 let path = HgPath::new(b"a");
637 let base = HgPath::new(b"b");
634 let base = HgPath::new(b"b");
638 assert_eq!(None, path.relative_to(base));
635 assert_eq!(None, path.relative_to(base));
639
636
640 let path = HgPath::new(b"a/b");
637 let path = HgPath::new(b"a/b");
641 let base = HgPath::new(b"a");
638 let base = HgPath::new(b"a");
642 assert_eq!(None, path.relative_to(base));
639 assert_eq!(None, path.relative_to(base));
643
640
644 let path = HgPath::new(b"a/b");
641 let path = HgPath::new(b"a/b");
645 let base = HgPath::new(b"a/");
642 let base = HgPath::new(b"a/");
646 assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
643 assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
647
644
648 let path = HgPath::new(b"nested/path/to/b");
645 let path = HgPath::new(b"nested/path/to/b");
649 let base = HgPath::new(b"nested/path/");
646 let base = HgPath::new(b"nested/path/");
650 assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
647 assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
651
648
652 let path = HgPath::new(b"ends/with/dir/");
649 let path = HgPath::new(b"ends/with/dir/");
653 let base = HgPath::new(b"ends/");
650 let base = HgPath::new(b"ends/");
654 assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
651 assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
655 }
652 }
656
653
657 #[test]
654 #[test]
658 #[cfg(unix)]
655 #[cfg(unix)]
659 fn test_split_drive() {
656 fn test_split_drive() {
660 // Taken from the Python stdlib's tests
657 // Taken from the Python stdlib's tests
661 assert_eq!(
658 assert_eq!(
662 HgPath::new(br"/foo/bar").split_drive(),
659 HgPath::new(br"/foo/bar").split_drive(),
663 (HgPath::new(b""), HgPath::new(br"/foo/bar"))
660 (HgPath::new(b""), HgPath::new(br"/foo/bar"))
664 );
661 );
665 assert_eq!(
662 assert_eq!(
666 HgPath::new(br"foo:bar").split_drive(),
663 HgPath::new(br"foo:bar").split_drive(),
667 (HgPath::new(b""), HgPath::new(br"foo:bar"))
664 (HgPath::new(b""), HgPath::new(br"foo:bar"))
668 );
665 );
669 assert_eq!(
666 assert_eq!(
670 HgPath::new(br":foo:bar").split_drive(),
667 HgPath::new(br":foo:bar").split_drive(),
671 (HgPath::new(b""), HgPath::new(br":foo:bar"))
668 (HgPath::new(b""), HgPath::new(br":foo:bar"))
672 );
669 );
673 // Also try NT paths; should not split them
670 // Also try NT paths; should not split them
674 assert_eq!(
671 assert_eq!(
675 HgPath::new(br"c:\foo\bar").split_drive(),
672 HgPath::new(br"c:\foo\bar").split_drive(),
676 (HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
673 (HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
677 );
674 );
678 assert_eq!(
675 assert_eq!(
679 HgPath::new(b"c:/foo/bar").split_drive(),
676 HgPath::new(b"c:/foo/bar").split_drive(),
680 (HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
677 (HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
681 );
678 );
682 assert_eq!(
679 assert_eq!(
683 HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
680 HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
684 (
681 (
685 HgPath::new(b""),
682 HgPath::new(b""),
686 HgPath::new(br"\\conky\mountpoint\foo\bar")
683 HgPath::new(br"\\conky\mountpoint\foo\bar")
687 )
684 )
688 );
685 );
689 }
686 }
690
687
691 #[test]
688 #[test]
692 #[cfg(windows)]
689 #[cfg(windows)]
693 fn test_split_drive() {
690 fn test_split_drive() {
694 assert_eq!(
691 assert_eq!(
695 HgPath::new(br"c:\foo\bar").split_drive(),
692 HgPath::new(br"c:\foo\bar").split_drive(),
696 (HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
693 (HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
697 );
694 );
698 assert_eq!(
695 assert_eq!(
699 HgPath::new(b"c:/foo/bar").split_drive(),
696 HgPath::new(b"c:/foo/bar").split_drive(),
700 (HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
697 (HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
701 );
698 );
702 assert_eq!(
699 assert_eq!(
703 HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
700 HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
704 (
701 (
705 HgPath::new(br"\\conky\mountpoint"),
702 HgPath::new(br"\\conky\mountpoint"),
706 HgPath::new(br"\foo\bar")
703 HgPath::new(br"\foo\bar")
707 )
704 )
708 );
705 );
709 assert_eq!(
706 assert_eq!(
710 HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
707 HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
711 (
708 (
712 HgPath::new(br"//conky/mountpoint"),
709 HgPath::new(br"//conky/mountpoint"),
713 HgPath::new(br"/foo/bar")
710 HgPath::new(br"/foo/bar")
714 )
711 )
715 );
712 );
716 assert_eq!(
713 assert_eq!(
717 HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
714 HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
718 (
715 (
719 HgPath::new(br""),
716 HgPath::new(br""),
720 HgPath::new(br"\\\conky\mountpoint\foo\bar")
717 HgPath::new(br"\\\conky\mountpoint\foo\bar")
721 )
718 )
722 );
719 );
723 assert_eq!(
720 assert_eq!(
724 HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
721 HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
725 (
722 (
726 HgPath::new(br""),
723 HgPath::new(br""),
727 HgPath::new(br"///conky/mountpoint/foo/bar")
724 HgPath::new(br"///conky/mountpoint/foo/bar")
728 )
725 )
729 );
726 );
730 assert_eq!(
727 assert_eq!(
731 HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
728 HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
732 (
729 (
733 HgPath::new(br""),
730 HgPath::new(br""),
734 HgPath::new(br"\\conky\\mountpoint\foo\bar")
731 HgPath::new(br"\\conky\\mountpoint\foo\bar")
735 )
732 )
736 );
733 );
737 assert_eq!(
734 assert_eq!(
738 HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
735 HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
739 (
736 (
740 HgPath::new(br""),
737 HgPath::new(br""),
741 HgPath::new(br"//conky//mountpoint/foo/bar")
738 HgPath::new(br"//conky//mountpoint/foo/bar")
742 )
739 )
743 );
740 );
744 // UNC part containing U+0130
741 // UNC part containing U+0130
745 assert_eq!(
742 assert_eq!(
746 HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
743 HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
747 (
744 (
748 HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
745 HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
749 HgPath::new(br"/foo/bar")
746 HgPath::new(br"/foo/bar")
750 )
747 )
751 );
748 );
752 }
749 }
753
750
754 #[test]
751 #[test]
755 fn test_parent() {
752 fn test_parent() {
756 let path = HgPath::new(b"");
753 let path = HgPath::new(b"");
757 assert_eq!(path.parent(), path);
754 assert_eq!(path.parent(), path);
758
755
759 let path = HgPath::new(b"a");
756 let path = HgPath::new(b"a");
760 assert_eq!(path.parent(), HgPath::new(b""));
757 assert_eq!(path.parent(), HgPath::new(b""));
761
758
762 let path = HgPath::new(b"a/b");
759 let path = HgPath::new(b"a/b");
763 assert_eq!(path.parent(), HgPath::new(b"a"));
760 assert_eq!(path.parent(), HgPath::new(b"a"));
764
761
765 let path = HgPath::new(b"a/other/b");
762 let path = HgPath::new(b"a/other/b");
766 assert_eq!(path.parent(), HgPath::new(b"a/other"));
763 assert_eq!(path.parent(), HgPath::new(b"a/other"));
767 }
764 }
768 }
765 }
@@ -1,232 +1,232
1 // path_auditor.rs
1 // path_auditor.rs
2 //
2 //
3 // Copyright 2020
3 // Copyright 2020
4 // Raphaël Gomès <rgomes@octobus.net>,
4 // Raphaël Gomès <rgomes@octobus.net>,
5 //
5 //
6 // This software may be used and distributed according to the terms of the
6 // This software may be used and distributed according to the terms of the
7 // GNU General Public License version 2 or any later version.
7 // GNU General Public License version 2 or any later version.
8
8
9 use crate::utils::{
9 use crate::utils::{
10 files::lower_clean,
10 files::lower_clean,
11 find_slice_in_slice,
11 find_slice_in_slice,
12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
13 };
13 };
14 use std::collections::HashSet;
14 use std::collections::HashSet;
15 use std::path::{Path, PathBuf};
15 use std::path::{Path, PathBuf};
16 use std::sync::{Mutex, RwLock};
16 use std::sync::{Mutex, RwLock};
17
17
18 /// Ensures that a path is valid for use in the repository i.e. does not use
18 /// Ensures that a path is valid for use in the repository i.e. does not use
19 /// any banned components, does not traverse a symlink, etc.
19 /// any banned components, does not traverse a symlink, etc.
20 #[derive(Debug, Default)]
20 #[derive(Debug, Default)]
21 pub struct PathAuditor {
21 pub struct PathAuditor {
22 audited: Mutex<HashSet<HgPathBuf>>,
22 audited: Mutex<HashSet<HgPathBuf>>,
23 audited_dirs: RwLock<HashSet<HgPathBuf>>,
23 audited_dirs: RwLock<HashSet<HgPathBuf>>,
24 root: PathBuf,
24 root: PathBuf,
25 }
25 }
26
26
27 impl PathAuditor {
27 impl PathAuditor {
28 pub fn new(root: impl AsRef<Path>) -> Self {
28 pub fn new(root: impl AsRef<Path>) -> Self {
29 Self {
29 Self {
30 root: root.as_ref().to_owned(),
30 root: root.as_ref().to_owned(),
31 ..Default::default()
31 ..Default::default()
32 }
32 }
33 }
33 }
34 pub fn audit_path(
34 pub fn audit_path(
35 &self,
35 &self,
36 path: impl AsRef<HgPath>,
36 path: impl AsRef<HgPath>,
37 ) -> Result<(), HgPathError> {
37 ) -> Result<(), HgPathError> {
38 // TODO windows "localpath" normalization
38 // TODO windows "localpath" normalization
39 let path = path.as_ref();
39 let path = path.as_ref();
40 if path.is_empty() {
40 if path.is_empty() {
41 return Ok(());
41 return Ok(());
42 }
42 }
43 // TODO case normalization
43 // TODO case normalization
44 if self.audited.lock().unwrap().contains(path) {
44 if self.audited.lock().unwrap().contains(path) {
45 return Ok(());
45 return Ok(());
46 }
46 }
47 // AIX ignores "/" at end of path, others raise EISDIR.
47 // AIX ignores "/" at end of path, others raise EISDIR.
48 let last_byte = path.as_bytes()[path.len() - 1];
48 let last_byte = path.as_bytes()[path.len() - 1];
49 if last_byte == b'/' || last_byte == b'\\' {
49 if last_byte == b'/' || last_byte == b'\\' {
50 return Err(HgPathError::EndsWithSlash(path.to_owned()));
50 return Err(HgPathError::EndsWithSlash(path.to_owned()));
51 }
51 }
52 let parts: Vec<_> = path
52 let parts: Vec<_> = path
53 .as_bytes()
53 .as_bytes()
54 .split(|b| std::path::is_separator(*b as char))
54 .split(|b| std::path::is_separator(*b as char))
55 .collect();
55 .collect();
56
56
57 let first_component = lower_clean(parts[0]);
57 let first_component = lower_clean(parts[0]);
58 let first_component = first_component.as_slice();
58 let first_component = first_component.as_slice();
59 if !path.split_drive().0.is_empty()
59 if !path.split_drive().0.is_empty()
60 || (first_component == b".hg"
60 || (first_component == b".hg"
61 || first_component == b".hg."
61 || first_component == b".hg."
62 || first_component == b"")
62 || first_component == b"")
63 || parts.iter().any(|c| c == b"..")
63 || parts.iter().any(|c| c == b"..")
64 {
64 {
65 return Err(HgPathError::InsideDotHg(path.to_owned()));
65 return Err(HgPathError::InsideDotHg(path.to_owned()));
66 }
66 }
67
67
68 // Windows shortname aliases
68 // Windows shortname aliases
69 for part in parts.iter() {
69 for part in parts.iter() {
70 if part.contains(&b'~') {
70 if part.contains(&b'~') {
71 let mut split = part.splitn(2, |b| *b == b'~');
71 let mut split = part.splitn(2, |b| *b == b'~');
72 let first =
72 let first =
73 split.next().unwrap().to_owned().to_ascii_uppercase();
73 split.next().unwrap().to_owned().to_ascii_uppercase();
74 let last = split.next().unwrap();
74 let last = split.next().unwrap();
75 if last.iter().all(u8::is_ascii_digit)
75 if last.iter().all(u8::is_ascii_digit)
76 && (first == b"HG" || first == b"HG8B6C")
76 && (first == b"HG" || first == b"HG8B6C")
77 {
77 {
78 return Err(HgPathError::ContainsIllegalComponent(
78 return Err(HgPathError::ContainsIllegalComponent(
79 path.to_owned(),
79 path.to_owned(),
80 ));
80 ));
81 }
81 }
82 }
82 }
83 }
83 }
84 let lower_path = lower_clean(path.as_bytes());
84 let lower_path = lower_clean(path.as_bytes());
85 if find_slice_in_slice(&lower_path, b".hg").is_some() {
85 if find_slice_in_slice(&lower_path, b".hg").is_some() {
86 let lower_parts: Vec<_> = path
86 let lower_parts: Vec<_> = path
87 .as_bytes()
87 .as_bytes()
88 .split(|b| std::path::is_separator(*b as char))
88 .split(|b| std::path::is_separator(*b as char))
89 .collect();
89 .collect();
90 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
90 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
91 if let Some(pos) = lower_parts[1..]
91 if let Some(pos) = lower_parts[1..]
92 .iter()
92 .iter()
93 .position(|part| part == &pattern.as_slice())
93 .position(|part| part == &pattern.as_slice())
94 {
94 {
95 let base = lower_parts[..=pos]
95 let base = lower_parts[..=pos]
96 .iter()
96 .iter()
97 .fold(HgPathBuf::new(), |acc, p| {
97 .fold(HgPathBuf::new(), |acc, p| {
98 acc.join(HgPath::new(p))
98 acc.join(HgPath::new(p))
99 });
99 });
100 return Err(HgPathError::IsInsideNestedRepo {
100 return Err(HgPathError::IsInsideNestedRepo {
101 path: path.to_owned(),
101 path: path.to_owned(),
102 nested_repo: base,
102 nested_repo: base,
103 });
103 });
104 }
104 }
105 }
105 }
106 }
106 }
107
107
108 let parts = &parts[..parts.len().saturating_sub(1)];
108 let parts = &parts[..parts.len().saturating_sub(1)];
109
109
110 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
110 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
111 // if there's a "foo/.hg" directory. This also means we won't
111 // if there's a "foo/.hg" directory. This also means we won't
112 // accidentally traverse a symlink into some other filesystem (which
112 // accidentally traverse a symlink into some other filesystem (which
113 // is potentially expensive to access).
113 // is potentially expensive to access).
114 for index in 0..parts.len() {
114 for index in 0..parts.len() {
115 let prefix = &parts[..index + 1].join(&b'/');
115 let prefix = &parts[..=index].join(&b'/');
116 let prefix = HgPath::new(prefix);
116 let prefix = HgPath::new(prefix);
117 if self.audited_dirs.read().unwrap().contains(prefix) {
117 if self.audited_dirs.read().unwrap().contains(prefix) {
118 continue;
118 continue;
119 }
119 }
120 self.check_filesystem(&prefix, &path)?;
120 self.check_filesystem(&prefix, &path)?;
121 self.audited_dirs.write().unwrap().insert(prefix.to_owned());
121 self.audited_dirs.write().unwrap().insert(prefix.to_owned());
122 }
122 }
123
123
124 self.audited.lock().unwrap().insert(path.to_owned());
124 self.audited.lock().unwrap().insert(path.to_owned());
125
125
126 Ok(())
126 Ok(())
127 }
127 }
128
128
129 pub fn check_filesystem(
129 pub fn check_filesystem(
130 &self,
130 &self,
131 prefix: impl AsRef<HgPath>,
131 prefix: impl AsRef<HgPath>,
132 path: impl AsRef<HgPath>,
132 path: impl AsRef<HgPath>,
133 ) -> Result<(), HgPathError> {
133 ) -> Result<(), HgPathError> {
134 let prefix = prefix.as_ref();
134 let prefix = prefix.as_ref();
135 let path = path.as_ref();
135 let path = path.as_ref();
136 let current_path = self.root.join(
136 let current_path = self.root.join(
137 hg_path_to_path_buf(prefix)
137 hg_path_to_path_buf(prefix)
138 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
138 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
139 );
139 );
140 match std::fs::symlink_metadata(&current_path) {
140 match std::fs::symlink_metadata(&current_path) {
141 Err(e) => {
141 Err(e) => {
142 // EINVAL can be raised as invalid path syntax under win32.
142 // EINVAL can be raised as invalid path syntax under win32.
143 if e.kind() != std::io::ErrorKind::NotFound
143 if e.kind() != std::io::ErrorKind::NotFound
144 && e.kind() != std::io::ErrorKind::InvalidInput
144 && e.kind() != std::io::ErrorKind::InvalidInput
145 && e.raw_os_error() != Some(20)
145 && e.raw_os_error() != Some(20)
146 {
146 {
147 // Rust does not yet have an `ErrorKind` for
147 // Rust does not yet have an `ErrorKind` for
148 // `NotADirectory` (errno 20)
148 // `NotADirectory` (errno 20)
149 // It happens if the dirstate contains `foo/bar` and
149 // It happens if the dirstate contains `foo/bar` and
150 // foo is not a directory
150 // foo is not a directory
151 return Err(HgPathError::NotFsCompliant(path.to_owned()));
151 return Err(HgPathError::NotFsCompliant(path.to_owned()));
152 }
152 }
153 }
153 }
154 Ok(meta) => {
154 Ok(meta) => {
155 if meta.file_type().is_symlink() {
155 if meta.file_type().is_symlink() {
156 return Err(HgPathError::TraversesSymbolicLink {
156 return Err(HgPathError::TraversesSymbolicLink {
157 path: path.to_owned(),
157 path: path.to_owned(),
158 symlink: prefix.to_owned(),
158 symlink: prefix.to_owned(),
159 });
159 });
160 }
160 }
161 if meta.file_type().is_dir()
161 if meta.file_type().is_dir()
162 && current_path.join(".hg").is_dir()
162 && current_path.join(".hg").is_dir()
163 {
163 {
164 return Err(HgPathError::IsInsideNestedRepo {
164 return Err(HgPathError::IsInsideNestedRepo {
165 path: path.to_owned(),
165 path: path.to_owned(),
166 nested_repo: prefix.to_owned(),
166 nested_repo: prefix.to_owned(),
167 });
167 });
168 }
168 }
169 }
169 }
170 };
170 };
171
171
172 Ok(())
172 Ok(())
173 }
173 }
174
174
175 pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
175 pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
176 self.audit_path(path).is_ok()
176 self.audit_path(path).is_ok()
177 }
177 }
178 }
178 }
179
179
180 #[cfg(test)]
180 #[cfg(test)]
181 mod tests {
181 mod tests {
182 use super::*;
182 use super::*;
183 use crate::utils::files::get_path_from_bytes;
183 use crate::utils::files::get_path_from_bytes;
184 use crate::utils::hg_path::path_to_hg_path_buf;
184 use crate::utils::hg_path::path_to_hg_path_buf;
185
185
186 #[test]
186 #[test]
187 fn test_path_auditor() {
187 fn test_path_auditor() {
188 let auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
188 let auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
189
189
190 let path = HgPath::new(b".hg/00changelog.i");
190 let path = HgPath::new(b".hg/00changelog.i");
191 assert_eq!(
191 assert_eq!(
192 auditor.audit_path(path),
192 auditor.audit_path(path),
193 Err(HgPathError::InsideDotHg(path.to_owned()))
193 Err(HgPathError::InsideDotHg(path.to_owned()))
194 );
194 );
195 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
195 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
196 assert_eq!(
196 assert_eq!(
197 auditor.audit_path(path),
197 auditor.audit_path(path),
198 Err(HgPathError::IsInsideNestedRepo {
198 Err(HgPathError::IsInsideNestedRepo {
199 path: path.to_owned(),
199 path: path.to_owned(),
200 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
200 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
201 })
201 })
202 );
202 );
203
203
204 use std::fs::{create_dir, File};
204 use std::fs::{create_dir, File};
205 use tempfile::tempdir;
205 use tempfile::tempdir;
206
206
207 let base_dir = tempdir().unwrap();
207 let base_dir = tempdir().unwrap();
208 let base_dir_path = base_dir.path();
208 let base_dir_path = base_dir.path();
209 let a = base_dir_path.join("a");
209 let a = base_dir_path.join("a");
210 let b = base_dir_path.join("b");
210 let b = base_dir_path.join("b");
211 create_dir(&a).unwrap();
211 create_dir(&a).unwrap();
212 let in_a_path = a.join("in_a");
212 let in_a_path = a.join("in_a");
213 File::create(in_a_path).unwrap();
213 File::create(in_a_path).unwrap();
214
214
215 // TODO make portable
215 // TODO make portable
216 std::os::unix::fs::symlink(&a, &b).unwrap();
216 std::os::unix::fs::symlink(&a, &b).unwrap();
217
217
218 let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
218 let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
219 eprintln!("buf: {}", buf.display());
219 eprintln!("buf: {}", buf.display());
220 let path = path_to_hg_path_buf(buf).unwrap();
220 let path = path_to_hg_path_buf(buf).unwrap();
221 assert_eq!(
221 assert_eq!(
222 auditor.audit_path(&path),
222 auditor.audit_path(&path),
223 Err(HgPathError::TraversesSymbolicLink {
223 Err(HgPathError::TraversesSymbolicLink {
224 path: path,
224 path: path,
225 symlink: path_to_hg_path_buf(
225 symlink: path_to_hg_path_buf(
226 b.components().skip(2).collect::<PathBuf>()
226 b.components().skip(2).collect::<PathBuf>()
227 )
227 )
228 .unwrap()
228 .unwrap()
229 })
229 })
230 );
230 );
231 }
231 }
232 }
232 }
@@ -1,179 +1,176
1 // cindex.rs
1 // cindex.rs
2 //
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings to use the Index defined by the parsers C extension
8 //! Bindings to use the Index defined by the parsers C extension
9 //!
9 //!
10 //! Ideally, we should use an Index entirely implemented in Rust,
10 //! Ideally, we should use an Index entirely implemented in Rust,
11 //! but this will take some time to get there.
11 //! but this will take some time to get there.
12
12
13 use cpython::{
13 use cpython::{
14 exc::ImportError, ObjectProtocol, PyClone, PyErr, PyObject, PyResult,
14 exc::ImportError, ObjectProtocol, PyClone, PyErr, PyObject, PyResult,
15 PyTuple, Python, PythonObject,
15 PyTuple, Python, PythonObject,
16 };
16 };
17 use hg::revlog::{Node, RevlogIndex};
17 use hg::revlog::{Node, RevlogIndex};
18 use hg::{Graph, GraphError, Revision, WORKING_DIRECTORY_REVISION};
18 use hg::{Graph, GraphError, Revision, WORKING_DIRECTORY_REVISION};
19 use libc::c_int;
19 use libc::c_int;
20
20
21 const REVLOG_CABI_VERSION: c_int = 2;
21 const REVLOG_CABI_VERSION: c_int = 2;
22
22
23 #[repr(C)]
23 #[repr(C)]
24 pub struct Revlog_CAPI {
24 pub struct Revlog_CAPI {
25 abi_version: c_int,
25 abi_version: c_int,
26 index_length:
26 index_length:
27 unsafe extern "C" fn(index: *mut revlog_capi::RawPyObject) -> c_int,
27 unsafe extern "C" fn(index: *mut revlog_capi::RawPyObject) -> c_int,
28 index_node: unsafe extern "C" fn(
28 index_node: unsafe extern "C" fn(
29 index: *mut revlog_capi::RawPyObject,
29 index: *mut revlog_capi::RawPyObject,
30 rev: c_int,
30 rev: c_int,
31 ) -> *const Node,
31 ) -> *const Node,
32 index_parents: unsafe extern "C" fn(
32 index_parents: unsafe extern "C" fn(
33 index: *mut revlog_capi::RawPyObject,
33 index: *mut revlog_capi::RawPyObject,
34 rev: c_int,
34 rev: c_int,
35 ps: *mut [c_int; 2],
35 ps: *mut [c_int; 2],
36 ) -> c_int,
36 ) -> c_int,
37 }
37 }
38
38
39 py_capsule!(
39 py_capsule!(
40 from mercurial.cext.parsers import revlog_CAPI
40 from mercurial.cext.parsers import revlog_CAPI
41 as revlog_capi for Revlog_CAPI);
41 as revlog_capi for Revlog_CAPI);
42
42
43 /// A `Graph` backed up by objects and functions from revlog.c
43 /// A `Graph` backed up by objects and functions from revlog.c
44 ///
44 ///
45 /// This implementation of the `Graph` trait, relies on (pointers to)
45 /// This implementation of the `Graph` trait, relies on (pointers to)
46 /// - the C index object (`index` member)
46 /// - the C index object (`index` member)
47 /// - the `index_get_parents()` function (`parents` member)
47 /// - the `index_get_parents()` function (`parents` member)
48 ///
48 ///
49 /// # Safety
49 /// # Safety
50 ///
50 ///
51 /// The C index itself is mutable, and this Rust exposition is **not
51 /// The C index itself is mutable, and this Rust exposition is **not
52 /// protected by the GIL**, meaning that this construct isn't safe with respect
52 /// protected by the GIL**, meaning that this construct isn't safe with respect
53 /// to Python threads.
53 /// to Python threads.
54 ///
54 ///
55 /// All callers of this `Index` must acquire the GIL and must not release it
55 /// All callers of this `Index` must acquire the GIL and must not release it
56 /// while working.
56 /// while working.
57 ///
57 ///
58 /// # TODO find a solution to make it GIL safe again.
58 /// # TODO find a solution to make it GIL safe again.
59 ///
59 ///
60 /// This is non trivial, and can wait until we have a clearer picture with
60 /// This is non trivial, and can wait until we have a clearer picture with
61 /// more Rust Mercurial constructs.
61 /// more Rust Mercurial constructs.
62 ///
62 ///
63 /// One possibility would be to a `GILProtectedIndex` wrapper enclosing
63 /// One possibility would be to a `GILProtectedIndex` wrapper enclosing
64 /// a `Python<'p>` marker and have it be the one implementing the
64 /// a `Python<'p>` marker and have it be the one implementing the
65 /// `Graph` trait, but this would mean the `Graph` implementor would become
65 /// `Graph` trait, but this would mean the `Graph` implementor would become
66 /// likely to change between subsequent method invocations of the `hg-core`
66 /// likely to change between subsequent method invocations of the `hg-core`
67 /// objects (a serious change of the `hg-core` API):
67 /// objects (a serious change of the `hg-core` API):
68 /// either exposing ways to mutate the `Graph`, or making it a non persistent
68 /// either exposing ways to mutate the `Graph`, or making it a non persistent
69 /// parameter in the relevant methods that need one.
69 /// parameter in the relevant methods that need one.
70 ///
70 ///
71 /// Another possibility would be to introduce an abstract lock handle into
71 /// Another possibility would be to introduce an abstract lock handle into
72 /// the core API, that would be tied to `GILGuard` / `Python<'p>`
72 /// the core API, that would be tied to `GILGuard` / `Python<'p>`
73 /// in the case of the `cpython` crate bindings yet could leave room for other
73 /// in the case of the `cpython` crate bindings yet could leave room for other
74 /// mechanisms in other contexts.
74 /// mechanisms in other contexts.
75 pub struct Index {
75 pub struct Index {
76 index: PyObject,
76 index: PyObject,
77 capi: &'static Revlog_CAPI,
77 capi: &'static Revlog_CAPI,
78 }
78 }
79
79
80 impl Index {
80 impl Index {
81 pub fn new(py: Python, index: PyObject) -> PyResult<Self> {
81 pub fn new(py: Python, index: PyObject) -> PyResult<Self> {
82 let capi = unsafe { revlog_capi::retrieve(py)? };
82 let capi = unsafe { revlog_capi::retrieve(py)? };
83 if capi.abi_version != REVLOG_CABI_VERSION {
83 if capi.abi_version != REVLOG_CABI_VERSION {
84 return Err(PyErr::new::<ImportError, _>(
84 return Err(PyErr::new::<ImportError, _>(
85 py,
85 py,
86 format!(
86 format!(
87 "ABI version mismatch: the C ABI revlog version {} \
87 "ABI version mismatch: the C ABI revlog version {} \
88 does not match the {} expected by Rust hg-cpython",
88 does not match the {} expected by Rust hg-cpython",
89 capi.abi_version, REVLOG_CABI_VERSION
89 capi.abi_version, REVLOG_CABI_VERSION
90 ),
90 ),
91 ));
91 ));
92 }
92 }
93 Ok(Index {
93 Ok(Index { index, capi })
94 index: index,
95 capi: capi,
96 })
97 }
94 }
98
95
99 /// return a reference to the CPython Index object in this Struct
96 /// return a reference to the CPython Index object in this Struct
100 pub fn inner(&self) -> &PyObject {
97 pub fn inner(&self) -> &PyObject {
101 &self.index
98 &self.index
102 }
99 }
103
100
104 pub fn append(&mut self, py: Python, tup: PyTuple) -> PyResult<PyObject> {
101 pub fn append(&mut self, py: Python, tup: PyTuple) -> PyResult<PyObject> {
105 self.index.call_method(
102 self.index.call_method(
106 py,
103 py,
107 "append",
104 "append",
108 PyTuple::new(py, &[tup.into_object()]),
105 PyTuple::new(py, &[tup.into_object()]),
109 None,
106 None,
110 )
107 )
111 }
108 }
112 }
109 }
113
110
114 impl Clone for Index {
111 impl Clone for Index {
115 fn clone(&self) -> Self {
112 fn clone(&self) -> Self {
116 let guard = Python::acquire_gil();
113 let guard = Python::acquire_gil();
117 Index {
114 Index {
118 index: self.index.clone_ref(guard.python()),
115 index: self.index.clone_ref(guard.python()),
119 capi: self.capi,
116 capi: self.capi,
120 }
117 }
121 }
118 }
122 }
119 }
123
120
124 impl PyClone for Index {
121 impl PyClone for Index {
125 fn clone_ref(&self, py: Python) -> Self {
122 fn clone_ref(&self, py: Python) -> Self {
126 Index {
123 Index {
127 index: self.index.clone_ref(py),
124 index: self.index.clone_ref(py),
128 capi: self.capi,
125 capi: self.capi,
129 }
126 }
130 }
127 }
131 }
128 }
132
129
133 impl Graph for Index {
130 impl Graph for Index {
134 /// wrap a call to the C extern parents function
131 /// wrap a call to the C extern parents function
135 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
132 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
136 if rev == WORKING_DIRECTORY_REVISION {
133 if rev == WORKING_DIRECTORY_REVISION {
137 return Err(GraphError::WorkingDirectoryUnsupported);
134 return Err(GraphError::WorkingDirectoryUnsupported);
138 }
135 }
139 let mut res: [c_int; 2] = [0; 2];
136 let mut res: [c_int; 2] = [0; 2];
140 let code = unsafe {
137 let code = unsafe {
141 (self.capi.index_parents)(
138 (self.capi.index_parents)(
142 self.index.as_ptr(),
139 self.index.as_ptr(),
143 rev as c_int,
140 rev as c_int,
144 &mut res as *mut [c_int; 2],
141 &mut res as *mut [c_int; 2],
145 )
142 )
146 };
143 };
147 match code {
144 match code {
148 0 => Ok(res),
145 0 => Ok(res),
149 _ => Err(GraphError::ParentOutOfRange(rev)),
146 _ => Err(GraphError::ParentOutOfRange(rev)),
150 }
147 }
151 }
148 }
152 }
149 }
153
150
154 impl RevlogIndex for Index {
151 impl RevlogIndex for Index {
155 /// Note C return type is Py_ssize_t (hence signed), but we shall
152 /// Note C return type is Py_ssize_t (hence signed), but we shall
156 /// force it to unsigned, because it's a length
153 /// force it to unsigned, because it's a length
157 fn len(&self) -> usize {
154 fn len(&self) -> usize {
158 unsafe { (self.capi.index_length)(self.index.as_ptr()) as usize }
155 unsafe { (self.capi.index_length)(self.index.as_ptr()) as usize }
159 }
156 }
160
157
161 fn node<'a>(&'a self, rev: Revision) -> Option<&'a Node> {
158 fn node(&self, rev: Revision) -> Option<&Node> {
162 let raw = unsafe {
159 let raw = unsafe {
163 (self.capi.index_node)(self.index.as_ptr(), rev as c_int)
160 (self.capi.index_node)(self.index.as_ptr(), rev as c_int)
164 };
161 };
165 if raw.is_null() {
162 if raw.is_null() {
166 None
163 None
167 } else {
164 } else {
168 // TODO it would be much better for the C layer to give us
165 // TODO it would be much better for the C layer to give us
169 // a length, since the hash length will change in the near
166 // a length, since the hash length will change in the near
170 // future, but that's probably out of scope for the nodemap
167 // future, but that's probably out of scope for the nodemap
171 // patch series.
168 // patch series.
172 //
169 //
173 // The root of that unsafety relies in the signature of
170 // The root of that unsafety relies in the signature of
174 // `capi.index_node()` itself: returning a `Node` pointer
171 // `capi.index_node()` itself: returning a `Node` pointer
175 // whereas it's a `char *` in the C counterpart.
172 // whereas it's a `char *` in the C counterpart.
176 Some(unsafe { &*raw })
173 Some(unsafe { &*raw })
177 }
174 }
178 }
175 }
179 }
176 }
@@ -1,118 +1,118
1 // copymap.rs
1 // copymap.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings for `hg::dirstate::dirstate_map::CopyMap` provided by the
8 //! Bindings for `hg::dirstate::dirstate_map::CopyMap` provided by the
9 //! `hg-core` package.
9 //! `hg-core` package.
10
10
11 use cpython::{
11 use cpython::{
12 PyBytes, PyClone, PyDict, PyObject, PyResult, Python, UnsafePyLeaked,
12 PyBytes, PyClone, PyDict, PyObject, PyResult, Python, UnsafePyLeaked,
13 };
13 };
14 use std::cell::RefCell;
14 use std::cell::RefCell;
15
15
16 use crate::dirstate::dirstate_map::DirstateMap;
16 use crate::dirstate::dirstate_map::DirstateMap;
17 use hg::{utils::hg_path::HgPathBuf, CopyMapIter};
17 use hg::{utils::hg_path::HgPathBuf, CopyMapIter};
18
18
19 py_class!(pub class CopyMap |py| {
19 py_class!(pub class CopyMap |py| {
20 data dirstate_map: DirstateMap;
20 data dirstate_map: DirstateMap;
21
21
22 def __getitem__(&self, key: PyObject) -> PyResult<PyBytes> {
22 def __getitem__(&self, key: PyObject) -> PyResult<PyBytes> {
23 (*self.dirstate_map(py)).copymapgetitem(py, key)
23 (*self.dirstate_map(py)).copymapgetitem(py, key)
24 }
24 }
25
25
26 def __len__(&self) -> PyResult<usize> {
26 def __len__(&self) -> PyResult<usize> {
27 self.dirstate_map(py).copymaplen(py)
27 self.dirstate_map(py).copymaplen(py)
28 }
28 }
29
29
30 def __contains__(&self, key: PyObject) -> PyResult<bool> {
30 def __contains__(&self, key: PyObject) -> PyResult<bool> {
31 self.dirstate_map(py).copymapcontains(py, key)
31 self.dirstate_map(py).copymapcontains(py, key)
32 }
32 }
33
33
34 def get(
34 def get(
35 &self,
35 &self,
36 key: PyObject,
36 key: PyObject,
37 default: Option<PyObject> = None
37 default: Option<PyObject> = None
38 ) -> PyResult<Option<PyObject>> {
38 ) -> PyResult<Option<PyObject>> {
39 self.dirstate_map(py).copymapget(py, key, default)
39 self.dirstate_map(py).copymapget(py, key, default)
40 }
40 }
41
41
42 def pop(
42 def pop(
43 &self,
43 &self,
44 key: PyObject,
44 key: PyObject,
45 default: Option<PyObject> = None
45 default: Option<PyObject> = None
46 ) -> PyResult<Option<PyObject>> {
46 ) -> PyResult<Option<PyObject>> {
47 self.dirstate_map(py).copymappop(py, key, default)
47 self.dirstate_map(py).copymappop(py, key, default)
48 }
48 }
49
49
50 def __iter__(&self) -> PyResult<CopyMapKeysIterator> {
50 def __iter__(&self) -> PyResult<CopyMapKeysIterator> {
51 self.dirstate_map(py).copymapiter(py)
51 self.dirstate_map(py).copymapiter(py)
52 }
52 }
53
53
54 // Python's `dict()` builtin works with either a subclass of dict
54 // Python's `dict()` builtin works with either a subclass of dict
55 // or an abstract mapping. Said mapping needs to implement `__getitem__`
55 // or an abstract mapping. Said mapping needs to implement `__getitem__`
56 // and `keys`.
56 // and `keys`.
57 def keys(&self) -> PyResult<CopyMapKeysIterator> {
57 def keys(&self) -> PyResult<CopyMapKeysIterator> {
58 self.dirstate_map(py).copymapiter(py)
58 self.dirstate_map(py).copymapiter(py)
59 }
59 }
60
60
61 def items(&self) -> PyResult<CopyMapItemsIterator> {
61 def items(&self) -> PyResult<CopyMapItemsIterator> {
62 self.dirstate_map(py).copymapitemsiter(py)
62 self.dirstate_map(py).copymapitemsiter(py)
63 }
63 }
64
64
65 def iteritems(&self) -> PyResult<CopyMapItemsIterator> {
65 def iteritems(&self) -> PyResult<CopyMapItemsIterator> {
66 self.dirstate_map(py).copymapitemsiter(py)
66 self.dirstate_map(py).copymapitemsiter(py)
67 }
67 }
68
68
69 def __setitem__(
69 def __setitem__(
70 &self,
70 &self,
71 key: PyObject,
71 key: PyObject,
72 item: PyObject
72 item: PyObject
73 ) -> PyResult<()> {
73 ) -> PyResult<()> {
74 self.dirstate_map(py).copymapsetitem(py, key, item)?;
74 self.dirstate_map(py).copymapsetitem(py, key, item)?;
75 Ok(())
75 Ok(())
76 }
76 }
77
77
78 def copy(&self) -> PyResult<PyDict> {
78 def copy(&self) -> PyResult<PyDict> {
79 self.dirstate_map(py).copymapcopy(py)
79 self.dirstate_map(py).copymapcopy(py)
80 }
80 }
81
81
82 });
82 });
83
83
84 impl CopyMap {
84 impl CopyMap {
85 pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
85 pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
86 Self::create_instance(py, dm)
86 Self::create_instance(py, dm)
87 }
87 }
88 fn translate_key(
88 fn translate_key(
89 py: Python,
89 py: Python,
90 res: (&HgPathBuf, &HgPathBuf),
90 res: (&HgPathBuf, &HgPathBuf),
91 ) -> PyResult<Option<PyBytes>> {
91 ) -> PyResult<Option<PyBytes>> {
92 Ok(Some(PyBytes::new(py, res.0.as_ref())))
92 Ok(Some(PyBytes::new(py, res.0.as_bytes())))
93 }
93 }
94 fn translate_key_value(
94 fn translate_key_value(
95 py: Python,
95 py: Python,
96 res: (&HgPathBuf, &HgPathBuf),
96 res: (&HgPathBuf, &HgPathBuf),
97 ) -> PyResult<Option<(PyBytes, PyBytes)>> {
97 ) -> PyResult<Option<(PyBytes, PyBytes)>> {
98 let (k, v) = res;
98 let (k, v) = res;
99 Ok(Some((
99 Ok(Some((
100 PyBytes::new(py, k.as_ref()),
100 PyBytes::new(py, k.as_bytes()),
101 PyBytes::new(py, v.as_ref()),
101 PyBytes::new(py, v.as_bytes()),
102 )))
102 )))
103 }
103 }
104 }
104 }
105
105
106 py_shared_iterator!(
106 py_shared_iterator!(
107 CopyMapKeysIterator,
107 CopyMapKeysIterator,
108 UnsafePyLeaked<CopyMapIter<'static>>,
108 UnsafePyLeaked<CopyMapIter<'static>>,
109 CopyMap::translate_key,
109 CopyMap::translate_key,
110 Option<PyBytes>
110 Option<PyBytes>
111 );
111 );
112
112
113 py_shared_iterator!(
113 py_shared_iterator!(
114 CopyMapItemsIterator,
114 CopyMapItemsIterator,
115 UnsafePyLeaked<CopyMapIter<'static>>,
115 UnsafePyLeaked<CopyMapIter<'static>>,
116 CopyMap::translate_key_value,
116 CopyMap::translate_key_value,
117 Option<(PyBytes, PyBytes)>
117 Option<(PyBytes, PyBytes)>
118 );
118 );
@@ -1,140 +1,140
1 // dirs_multiset.rs
1 // dirs_multiset.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings for the `hg::dirstate::dirs_multiset` file provided by the
8 //! Bindings for the `hg::dirstate::dirs_multiset` file provided by the
9 //! `hg-core` package.
9 //! `hg-core` package.
10
10
11 use std::cell::RefCell;
11 use std::cell::RefCell;
12 use std::convert::TryInto;
12 use std::convert::TryInto;
13
13
14 use cpython::{
14 use cpython::{
15 exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult,
15 exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult,
16 Python, UnsafePyLeaked,
16 Python, UnsafePyLeaked,
17 };
17 };
18
18
19 use crate::dirstate::extract_dirstate;
19 use crate::dirstate::extract_dirstate;
20 use hg::{
20 use hg::{
21 utils::hg_path::{HgPath, HgPathBuf},
21 utils::hg_path::{HgPath, HgPathBuf},
22 DirsMultiset, DirsMultisetIter, DirstateMapError, DirstateParseError,
22 DirsMultiset, DirsMultisetIter, DirstateMapError, DirstateParseError,
23 EntryState,
23 EntryState,
24 };
24 };
25
25
26 py_class!(pub class Dirs |py| {
26 py_class!(pub class Dirs |py| {
27 @shared data inner: DirsMultiset;
27 @shared data inner: DirsMultiset;
28
28
29 // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
29 // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
30 // a `list`)
30 // a `list`)
31 def __new__(
31 def __new__(
32 _cls,
32 _cls,
33 map: PyObject,
33 map: PyObject,
34 skip: Option<PyObject> = None
34 skip: Option<PyObject> = None
35 ) -> PyResult<Self> {
35 ) -> PyResult<Self> {
36 let mut skip_state: Option<EntryState> = None;
36 let mut skip_state: Option<EntryState> = None;
37 if let Some(skip) = skip {
37 if let Some(skip) = skip {
38 skip_state = Some(
38 skip_state = Some(
39 skip.extract::<PyBytes>(py)?.data(py)[0]
39 skip.extract::<PyBytes>(py)?.data(py)[0]
40 .try_into()
40 .try_into()
41 .map_err(|e: DirstateParseError| {
41 .map_err(|e: DirstateParseError| {
42 PyErr::new::<exc::ValueError, _>(py, e.to_string())
42 PyErr::new::<exc::ValueError, _>(py, e.to_string())
43 })?,
43 })?,
44 );
44 );
45 }
45 }
46 let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
46 let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
47 let dirstate = extract_dirstate(py, &map)?;
47 let dirstate = extract_dirstate(py, &map)?;
48 DirsMultiset::from_dirstate(&dirstate, skip_state)
48 DirsMultiset::from_dirstate(&dirstate, skip_state)
49 .map_err(|e| {
49 .map_err(|e| {
50 PyErr::new::<exc::ValueError, _>(py, e.to_string())
50 PyErr::new::<exc::ValueError, _>(py, e.to_string())
51 })?
51 })?
52 } else {
52 } else {
53 let map: Result<Vec<HgPathBuf>, PyErr> = map
53 let map: Result<Vec<HgPathBuf>, PyErr> = map
54 .iter(py)?
54 .iter(py)?
55 .map(|o| {
55 .map(|o| {
56 Ok(HgPathBuf::from_bytes(
56 Ok(HgPathBuf::from_bytes(
57 o?.extract::<PyBytes>(py)?.data(py),
57 o?.extract::<PyBytes>(py)?.data(py),
58 ))
58 ))
59 })
59 })
60 .collect();
60 .collect();
61 DirsMultiset::from_manifest(&map?)
61 DirsMultiset::from_manifest(&map?)
62 .map_err(|e| {
62 .map_err(|e| {
63 PyErr::new::<exc::ValueError, _>(py, e.to_string())
63 PyErr::new::<exc::ValueError, _>(py, e.to_string())
64 })?
64 })?
65 };
65 };
66
66
67 Self::create_instance(py, inner)
67 Self::create_instance(py, inner)
68 }
68 }
69
69
70 def addpath(&self, path: PyObject) -> PyResult<PyObject> {
70 def addpath(&self, path: PyObject) -> PyResult<PyObject> {
71 self.inner(py).borrow_mut().add_path(
71 self.inner(py).borrow_mut().add_path(
72 HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
72 HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
73 ).and(Ok(py.None())).or_else(|e| {
73 ).and(Ok(py.None())).or_else(|e| {
74 match e {
74 match e {
75 DirstateMapError::EmptyPath => {
75 DirstateMapError::EmptyPath => {
76 Ok(py.None())
76 Ok(py.None())
77 },
77 },
78 e => {
78 e => {
79 Err(PyErr::new::<exc::ValueError, _>(
79 Err(PyErr::new::<exc::ValueError, _>(
80 py,
80 py,
81 e.to_string(),
81 e.to_string(),
82 ))
82 ))
83 }
83 }
84 }
84 }
85 })
85 })
86 }
86 }
87
87
88 def delpath(&self, path: PyObject) -> PyResult<PyObject> {
88 def delpath(&self, path: PyObject) -> PyResult<PyObject> {
89 self.inner(py).borrow_mut().delete_path(
89 self.inner(py).borrow_mut().delete_path(
90 HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
90 HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
91 )
91 )
92 .and(Ok(py.None()))
92 .and(Ok(py.None()))
93 .or_else(|e| {
93 .or_else(|e| {
94 match e {
94 match e {
95 DirstateMapError::EmptyPath => {
95 DirstateMapError::EmptyPath => {
96 Ok(py.None())
96 Ok(py.None())
97 },
97 },
98 e => {
98 e => {
99 Err(PyErr::new::<exc::ValueError, _>(
99 Err(PyErr::new::<exc::ValueError, _>(
100 py,
100 py,
101 e.to_string(),
101 e.to_string(),
102 ))
102 ))
103 }
103 }
104 }
104 }
105 })
105 })
106 }
106 }
107 def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
107 def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
108 let leaked_ref = self.inner(py).leak_immutable();
108 let leaked_ref = self.inner(py).leak_immutable();
109 DirsMultisetKeysIterator::from_inner(
109 DirsMultisetKeysIterator::from_inner(
110 py,
110 py,
111 unsafe { leaked_ref.map(py, |o| o.iter()) },
111 unsafe { leaked_ref.map(py, |o| o.iter()) },
112 )
112 )
113 }
113 }
114
114
115 def __contains__(&self, item: PyObject) -> PyResult<bool> {
115 def __contains__(&self, item: PyObject) -> PyResult<bool> {
116 Ok(self.inner(py).borrow().contains(HgPath::new(
116 Ok(self.inner(py).borrow().contains(HgPath::new(
117 item.extract::<PyBytes>(py)?.data(py).as_ref(),
117 item.extract::<PyBytes>(py)?.data(py).as_ref(),
118 )))
118 )))
119 }
119 }
120 });
120 });
121
121
122 impl Dirs {
122 impl Dirs {
123 pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult<Self> {
123 pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult<Self> {
124 Self::create_instance(py, d)
124 Self::create_instance(py, d)
125 }
125 }
126
126
127 fn translate_key(
127 fn translate_key(
128 py: Python,
128 py: Python,
129 res: &HgPathBuf,
129 res: &HgPathBuf,
130 ) -> PyResult<Option<PyBytes>> {
130 ) -> PyResult<Option<PyBytes>> {
131 Ok(Some(PyBytes::new(py, res.as_ref())))
131 Ok(Some(PyBytes::new(py, res.as_bytes())))
132 }
132 }
133 }
133 }
134
134
135 py_shared_iterator!(
135 py_shared_iterator!(
136 DirsMultisetKeysIterator,
136 DirsMultisetKeysIterator,
137 UnsafePyLeaked<DirsMultisetIter<'static>>,
137 UnsafePyLeaked<DirsMultisetIter<'static>>,
138 Dirs::translate_key,
138 Dirs::translate_key,
139 Option<PyBytes>
139 Option<PyBytes>
140 );
140 );
@@ -1,586 +1,590
1 // dirstate_map.rs
1 // dirstate_map.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings for the `hg::dirstate::dirstate_map` file provided by the
8 //! Bindings for the `hg::dirstate::dirstate_map` file provided by the
9 //! `hg-core` package.
9 //! `hg-core` package.
10
10
11 use std::cell::{Ref, RefCell};
11 use std::cell::{Ref, RefCell};
12 use std::convert::TryInto;
12 use std::convert::TryInto;
13 use std::time::Duration;
13 use std::time::Duration;
14
14
15 use cpython::{
15 use cpython::{
16 exc, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList,
16 exc, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList,
17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
18 UnsafePyLeaked,
18 UnsafePyLeaked,
19 };
19 };
20
20
21 use crate::{
21 use crate::{
22 dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
22 dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
23 dirstate::non_normal_entries::{
23 dirstate::non_normal_entries::{
24 NonNormalEntries, NonNormalEntriesIterator,
24 NonNormalEntries, NonNormalEntriesIterator,
25 },
25 },
26 dirstate::{dirs_multiset::Dirs, make_dirstate_tuple},
26 dirstate::{dirs_multiset::Dirs, make_dirstate_tuple},
27 };
27 };
28 use hg::{
28 use hg::{
29 utils::hg_path::{HgPath, HgPathBuf},
29 utils::hg_path::{HgPath, HgPathBuf},
30 DirsMultiset, DirstateEntry, DirstateMap as RustDirstateMap,
30 DirsMultiset, DirstateEntry, DirstateMap as RustDirstateMap,
31 DirstateMapError, DirstateParents, DirstateParseError, EntryState,
31 DirstateMapError, DirstateParents, DirstateParseError, EntryState,
32 StateMapIter, PARENT_SIZE,
32 StateMapIter, PARENT_SIZE,
33 };
33 };
34
34
35 // TODO
35 // TODO
36 // This object needs to share references to multiple members of its Rust
36 // This object needs to share references to multiple members of its Rust
37 // inner struct, namely `copy_map`, `dirs` and `all_dirs`.
37 // inner struct, namely `copy_map`, `dirs` and `all_dirs`.
38 // Right now `CopyMap` is done, but it needs to have an explicit reference
38 // Right now `CopyMap` is done, but it needs to have an explicit reference
39 // to `RustDirstateMap` which itself needs to have an encapsulation for
39 // to `RustDirstateMap` which itself needs to have an encapsulation for
40 // every method in `CopyMap` (copymapcopy, etc.).
40 // every method in `CopyMap` (copymapcopy, etc.).
41 // This is ugly and hard to maintain.
41 // This is ugly and hard to maintain.
42 // The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
42 // The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
43 // `py_class!` is already implemented and does not mention
43 // `py_class!` is already implemented and does not mention
44 // `RustDirstateMap`, rightfully so.
44 // `RustDirstateMap`, rightfully so.
45 // All attributes also have to have a separate refcount data attribute for
45 // All attributes also have to have a separate refcount data attribute for
46 // leaks, with all methods that go along for reference sharing.
46 // leaks, with all methods that go along for reference sharing.
47 py_class!(pub class DirstateMap |py| {
47 py_class!(pub class DirstateMap |py| {
48 @shared data inner: RustDirstateMap;
48 @shared data inner: RustDirstateMap;
49
49
50 def __new__(_cls, _root: PyObject) -> PyResult<Self> {
50 def __new__(_cls, _root: PyObject) -> PyResult<Self> {
51 let inner = RustDirstateMap::default();
51 let inner = RustDirstateMap::default();
52 Self::create_instance(py, inner)
52 Self::create_instance(py, inner)
53 }
53 }
54
54
55 def clear(&self) -> PyResult<PyObject> {
55 def clear(&self) -> PyResult<PyObject> {
56 self.inner(py).borrow_mut().clear();
56 self.inner(py).borrow_mut().clear();
57 Ok(py.None())
57 Ok(py.None())
58 }
58 }
59
59
60 def get(
60 def get(
61 &self,
61 &self,
62 key: PyObject,
62 key: PyObject,
63 default: Option<PyObject> = None
63 default: Option<PyObject> = None
64 ) -> PyResult<Option<PyObject>> {
64 ) -> PyResult<Option<PyObject>> {
65 let key = key.extract::<PyBytes>(py)?;
65 let key = key.extract::<PyBytes>(py)?;
66 match self.inner(py).borrow().get(HgPath::new(key.data(py))) {
66 match self.inner(py).borrow().get(HgPath::new(key.data(py))) {
67 Some(entry) => {
67 Some(entry) => {
68 Ok(Some(make_dirstate_tuple(py, entry)?))
68 Ok(Some(make_dirstate_tuple(py, entry)?))
69 },
69 },
70 None => Ok(default)
70 None => Ok(default)
71 }
71 }
72 }
72 }
73
73
74 def addfile(
74 def addfile(
75 &self,
75 &self,
76 f: PyObject,
76 f: PyObject,
77 oldstate: PyObject,
77 oldstate: PyObject,
78 state: PyObject,
78 state: PyObject,
79 mode: PyObject,
79 mode: PyObject,
80 size: PyObject,
80 size: PyObject,
81 mtime: PyObject
81 mtime: PyObject
82 ) -> PyResult<PyObject> {
82 ) -> PyResult<PyObject> {
83 self.inner(py).borrow_mut().add_file(
83 self.inner(py).borrow_mut().add_file(
84 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
84 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
85 oldstate.extract::<PyBytes>(py)?.data(py)[0]
85 oldstate.extract::<PyBytes>(py)?.data(py)[0]
86 .try_into()
86 .try_into()
87 .map_err(|e: DirstateParseError| {
87 .map_err(|e: DirstateParseError| {
88 PyErr::new::<exc::ValueError, _>(py, e.to_string())
88 PyErr::new::<exc::ValueError, _>(py, e.to_string())
89 })?,
89 })?,
90 DirstateEntry {
90 DirstateEntry {
91 state: state.extract::<PyBytes>(py)?.data(py)[0]
91 state: state.extract::<PyBytes>(py)?.data(py)[0]
92 .try_into()
92 .try_into()
93 .map_err(|e: DirstateParseError| {
93 .map_err(|e: DirstateParseError| {
94 PyErr::new::<exc::ValueError, _>(py, e.to_string())
94 PyErr::new::<exc::ValueError, _>(py, e.to_string())
95 })?,
95 })?,
96 mode: mode.extract(py)?,
96 mode: mode.extract(py)?,
97 size: size.extract(py)?,
97 size: size.extract(py)?,
98 mtime: mtime.extract(py)?,
98 mtime: mtime.extract(py)?,
99 },
99 },
100 ).and(Ok(py.None())).or_else(|e: DirstateMapError| {
100 ).and(Ok(py.None())).or_else(|e: DirstateMapError| {
101 Err(PyErr::new::<exc::ValueError, _>(py, e.to_string()))
101 Err(PyErr::new::<exc::ValueError, _>(py, e.to_string()))
102 })
102 })
103 }
103 }
104
104
105 def removefile(
105 def removefile(
106 &self,
106 &self,
107 f: PyObject,
107 f: PyObject,
108 oldstate: PyObject,
108 oldstate: PyObject,
109 size: PyObject
109 size: PyObject
110 ) -> PyResult<PyObject> {
110 ) -> PyResult<PyObject> {
111 self.inner(py).borrow_mut()
111 self.inner(py).borrow_mut()
112 .remove_file(
112 .remove_file(
113 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
113 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
114 oldstate.extract::<PyBytes>(py)?.data(py)[0]
114 oldstate.extract::<PyBytes>(py)?.data(py)[0]
115 .try_into()
115 .try_into()
116 .map_err(|e: DirstateParseError| {
116 .map_err(|e: DirstateParseError| {
117 PyErr::new::<exc::ValueError, _>(py, e.to_string())
117 PyErr::new::<exc::ValueError, _>(py, e.to_string())
118 })?,
118 })?,
119 size.extract(py)?,
119 size.extract(py)?,
120 )
120 )
121 .or_else(|_| {
121 .or_else(|_| {
122 Err(PyErr::new::<exc::OSError, _>(
122 Err(PyErr::new::<exc::OSError, _>(
123 py,
123 py,
124 "Dirstate error".to_string(),
124 "Dirstate error".to_string(),
125 ))
125 ))
126 })?;
126 })?;
127 Ok(py.None())
127 Ok(py.None())
128 }
128 }
129
129
130 def dropfile(
130 def dropfile(
131 &self,
131 &self,
132 f: PyObject,
132 f: PyObject,
133 oldstate: PyObject
133 oldstate: PyObject
134 ) -> PyResult<PyBool> {
134 ) -> PyResult<PyBool> {
135 self.inner(py).borrow_mut()
135 self.inner(py).borrow_mut()
136 .drop_file(
136 .drop_file(
137 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
137 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
138 oldstate.extract::<PyBytes>(py)?.data(py)[0]
138 oldstate.extract::<PyBytes>(py)?.data(py)[0]
139 .try_into()
139 .try_into()
140 .map_err(|e: DirstateParseError| {
140 .map_err(|e: DirstateParseError| {
141 PyErr::new::<exc::ValueError, _>(py, e.to_string())
141 PyErr::new::<exc::ValueError, _>(py, e.to_string())
142 })?,
142 })?,
143 )
143 )
144 .and_then(|b| Ok(b.to_py_object(py)))
144 .and_then(|b| Ok(b.to_py_object(py)))
145 .or_else(|_| {
145 .or_else(|_| {
146 Err(PyErr::new::<exc::OSError, _>(
146 Err(PyErr::new::<exc::OSError, _>(
147 py,
147 py,
148 "Dirstate error".to_string(),
148 "Dirstate error".to_string(),
149 ))
149 ))
150 })
150 })
151 }
151 }
152
152
153 def clearambiguoustimes(
153 def clearambiguoustimes(
154 &self,
154 &self,
155 files: PyObject,
155 files: PyObject,
156 now: PyObject
156 now: PyObject
157 ) -> PyResult<PyObject> {
157 ) -> PyResult<PyObject> {
158 let files: PyResult<Vec<HgPathBuf>> = files
158 let files: PyResult<Vec<HgPathBuf>> = files
159 .iter(py)?
159 .iter(py)?
160 .map(|filename| {
160 .map(|filename| {
161 Ok(HgPathBuf::from_bytes(
161 Ok(HgPathBuf::from_bytes(
162 filename?.extract::<PyBytes>(py)?.data(py),
162 filename?.extract::<PyBytes>(py)?.data(py),
163 ))
163 ))
164 })
164 })
165 .collect();
165 .collect();
166 self.inner(py).borrow_mut()
166 self.inner(py).borrow_mut()
167 .clear_ambiguous_times(files?, now.extract(py)?);
167 .clear_ambiguous_times(files?, now.extract(py)?);
168 Ok(py.None())
168 Ok(py.None())
169 }
169 }
170
170
171 def other_parent_entries(&self) -> PyResult<PyObject> {
171 def other_parent_entries(&self) -> PyResult<PyObject> {
172 let mut inner_shared = self.inner(py).borrow_mut();
172 let mut inner_shared = self.inner(py).borrow_mut();
173 let (_, other_parent) =
173 let (_, other_parent) =
174 inner_shared.get_non_normal_other_parent_entries();
174 inner_shared.get_non_normal_other_parent_entries();
175
175
176 let locals = PyDict::new(py);
176 let locals = PyDict::new(py);
177 locals.set_item(
177 locals.set_item(
178 py,
178 py,
179 "other_parent",
179 "other_parent",
180 other_parent
180 other_parent
181 .iter()
181 .iter()
182 .map(|v| PyBytes::new(py, v.as_ref()))
182 .map(|v| PyBytes::new(py, v.as_bytes()))
183 .collect::<Vec<PyBytes>>()
183 .collect::<Vec<PyBytes>>()
184 .to_py_object(py),
184 .to_py_object(py),
185 )?;
185 )?;
186
186
187 py.eval("set(other_parent)", None, Some(&locals))
187 py.eval("set(other_parent)", None, Some(&locals))
188 }
188 }
189
189
190 def non_normal_entries(&self) -> PyResult<NonNormalEntries> {
190 def non_normal_entries(&self) -> PyResult<NonNormalEntries> {
191 NonNormalEntries::from_inner(py, self.clone_ref(py))
191 NonNormalEntries::from_inner(py, self.clone_ref(py))
192 }
192 }
193
193
194 def non_normal_entries_contains(&self, key: PyObject) -> PyResult<bool> {
194 def non_normal_entries_contains(&self, key: PyObject) -> PyResult<bool> {
195 let key = key.extract::<PyBytes>(py)?;
195 let key = key.extract::<PyBytes>(py)?;
196 Ok(self
196 Ok(self
197 .inner(py)
197 .inner(py)
198 .borrow_mut()
198 .borrow_mut()
199 .get_non_normal_other_parent_entries().0
199 .get_non_normal_other_parent_entries().0
200 .contains(HgPath::new(key.data(py))))
200 .contains(HgPath::new(key.data(py))))
201 }
201 }
202
202
203 def non_normal_entries_display(&self) -> PyResult<PyString> {
203 def non_normal_entries_display(&self) -> PyResult<PyString> {
204 Ok(
204 Ok(
205 PyString::new(
205 PyString::new(
206 py,
206 py,
207 &format!(
207 &format!(
208 "NonNormalEntries: {:?}",
208 "NonNormalEntries: {:?}",
209 self
209 self
210 .inner(py)
210 .inner(py)
211 .borrow_mut()
211 .borrow_mut()
212 .get_non_normal_other_parent_entries().0
212 .get_non_normal_other_parent_entries().0
213 .iter().map(|o| o))
213 .iter().map(|o| o))
214 )
214 )
215 )
215 )
216 }
216 }
217
217
218 def non_normal_entries_remove(&self, key: PyObject) -> PyResult<PyObject> {
218 def non_normal_entries_remove(&self, key: PyObject) -> PyResult<PyObject> {
219 let key = key.extract::<PyBytes>(py)?;
219 let key = key.extract::<PyBytes>(py)?;
220 self
220 self
221 .inner(py)
221 .inner(py)
222 .borrow_mut()
222 .borrow_mut()
223 .non_normal_entries_remove(HgPath::new(key.data(py)));
223 .non_normal_entries_remove(HgPath::new(key.data(py)));
224 Ok(py.None())
224 Ok(py.None())
225 }
225 }
226
226
227 def non_normal_entries_union(&self, other: PyObject) -> PyResult<PyList> {
227 def non_normal_entries_union(&self, other: PyObject) -> PyResult<PyList> {
228 let other: PyResult<_> = other.iter(py)?
228 let other: PyResult<_> = other.iter(py)?
229 .map(|f| {
229 .map(|f| {
230 Ok(HgPathBuf::from_bytes(
230 Ok(HgPathBuf::from_bytes(
231 f?.extract::<PyBytes>(py)?.data(py),
231 f?.extract::<PyBytes>(py)?.data(py),
232 ))
232 ))
233 })
233 })
234 .collect();
234 .collect();
235
235
236 let res = self
236 let res = self
237 .inner(py)
237 .inner(py)
238 .borrow_mut()
238 .borrow_mut()
239 .non_normal_entries_union(other?);
239 .non_normal_entries_union(other?);
240
240
241 let ret = PyList::new(py, &[]);
241 let ret = PyList::new(py, &[]);
242 for filename in res.iter() {
242 for filename in res.iter() {
243 let as_pystring = PyBytes::new(py, filename.as_bytes());
243 let as_pystring = PyBytes::new(py, filename.as_bytes());
244 ret.append(py, as_pystring.into_object());
244 ret.append(py, as_pystring.into_object());
245 }
245 }
246 Ok(ret)
246 Ok(ret)
247 }
247 }
248
248
249 def non_normal_entries_iter(&self) -> PyResult<NonNormalEntriesIterator> {
249 def non_normal_entries_iter(&self) -> PyResult<NonNormalEntriesIterator> {
250 // Make sure the sets are defined before we no longer have a mutable
250 // Make sure the sets are defined before we no longer have a mutable
251 // reference to the dmap.
251 // reference to the dmap.
252 self.inner(py)
252 self.inner(py)
253 .borrow_mut()
253 .borrow_mut()
254 .set_non_normal_other_parent_entries(false);
254 .set_non_normal_other_parent_entries(false);
255
255
256 let leaked_ref = self.inner(py).leak_immutable();
256 let leaked_ref = self.inner(py).leak_immutable();
257
257
258 NonNormalEntriesIterator::from_inner(py, unsafe {
258 NonNormalEntriesIterator::from_inner(py, unsafe {
259 leaked_ref.map(py, |o| {
259 leaked_ref.map(py, |o| {
260 o.get_non_normal_other_parent_entries_panic().0.iter()
260 o.get_non_normal_other_parent_entries_panic().0.iter()
261 })
261 })
262 })
262 })
263 }
263 }
264
264
265 def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
265 def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
266 let d = d.extract::<PyBytes>(py)?;
266 let d = d.extract::<PyBytes>(py)?;
267 Ok(self.inner(py).borrow_mut()
267 Ok(self.inner(py).borrow_mut()
268 .has_tracked_dir(HgPath::new(d.data(py)))
268 .has_tracked_dir(HgPath::new(d.data(py)))
269 .map_err(|e| {
269 .map_err(|e| {
270 PyErr::new::<exc::ValueError, _>(py, e.to_string())
270 PyErr::new::<exc::ValueError, _>(py, e.to_string())
271 })?
271 })?
272 .to_py_object(py))
272 .to_py_object(py))
273 }
273 }
274
274
275 def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
275 def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
276 let d = d.extract::<PyBytes>(py)?;
276 let d = d.extract::<PyBytes>(py)?;
277 Ok(self.inner(py).borrow_mut()
277 Ok(self.inner(py).borrow_mut()
278 .has_dir(HgPath::new(d.data(py)))
278 .has_dir(HgPath::new(d.data(py)))
279 .map_err(|e| {
279 .map_err(|e| {
280 PyErr::new::<exc::ValueError, _>(py, e.to_string())
280 PyErr::new::<exc::ValueError, _>(py, e.to_string())
281 })?
281 })?
282 .to_py_object(py))
282 .to_py_object(py))
283 }
283 }
284
284
285 def parents(&self, st: PyObject) -> PyResult<PyTuple> {
285 def parents(&self, st: PyObject) -> PyResult<PyTuple> {
286 self.inner(py).borrow_mut()
286 self.inner(py).borrow_mut()
287 .parents(st.extract::<PyBytes>(py)?.data(py))
287 .parents(st.extract::<PyBytes>(py)?.data(py))
288 .and_then(|d| {
288 .and_then(|d| {
289 Ok((PyBytes::new(py, &d.p1), PyBytes::new(py, &d.p2))
289 Ok((PyBytes::new(py, &d.p1), PyBytes::new(py, &d.p2))
290 .to_py_object(py))
290 .to_py_object(py))
291 })
291 })
292 .or_else(|_| {
292 .or_else(|_| {
293 Err(PyErr::new::<exc::OSError, _>(
293 Err(PyErr::new::<exc::OSError, _>(
294 py,
294 py,
295 "Dirstate error".to_string(),
295 "Dirstate error".to_string(),
296 ))
296 ))
297 })
297 })
298 }
298 }
299
299
300 def setparents(&self, p1: PyObject, p2: PyObject) -> PyResult<PyObject> {
300 def setparents(&self, p1: PyObject, p2: PyObject) -> PyResult<PyObject> {
301 let p1 = extract_node_id(py, &p1)?;
301 let p1 = extract_node_id(py, &p1)?;
302 let p2 = extract_node_id(py, &p2)?;
302 let p2 = extract_node_id(py, &p2)?;
303
303
304 self.inner(py).borrow_mut()
304 self.inner(py).borrow_mut()
305 .set_parents(&DirstateParents { p1, p2 });
305 .set_parents(&DirstateParents { p1, p2 });
306 Ok(py.None())
306 Ok(py.None())
307 }
307 }
308
308
309 def read(&self, st: PyObject) -> PyResult<Option<PyObject>> {
309 def read(&self, st: PyObject) -> PyResult<Option<PyObject>> {
310 match self.inner(py).borrow_mut()
310 match self.inner(py).borrow_mut()
311 .read(st.extract::<PyBytes>(py)?.data(py))
311 .read(st.extract::<PyBytes>(py)?.data(py))
312 {
312 {
313 Ok(Some(parents)) => Ok(Some(
313 Ok(Some(parents)) => Ok(Some(
314 (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
314 (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
315 .to_py_object(py)
315 .to_py_object(py)
316 .into_object(),
316 .into_object(),
317 )),
317 )),
318 Ok(None) => Ok(Some(py.None())),
318 Ok(None) => Ok(Some(py.None())),
319 Err(_) => Err(PyErr::new::<exc::OSError, _>(
319 Err(_) => Err(PyErr::new::<exc::OSError, _>(
320 py,
320 py,
321 "Dirstate error".to_string(),
321 "Dirstate error".to_string(),
322 )),
322 )),
323 }
323 }
324 }
324 }
325 def write(
325 def write(
326 &self,
326 &self,
327 p1: PyObject,
327 p1: PyObject,
328 p2: PyObject,
328 p2: PyObject,
329 now: PyObject
329 now: PyObject
330 ) -> PyResult<PyBytes> {
330 ) -> PyResult<PyBytes> {
331 let now = Duration::new(now.extract(py)?, 0);
331 let now = Duration::new(now.extract(py)?, 0);
332 let parents = DirstateParents {
332 let parents = DirstateParents {
333 p1: extract_node_id(py, &p1)?,
333 p1: extract_node_id(py, &p1)?,
334 p2: extract_node_id(py, &p2)?,
334 p2: extract_node_id(py, &p2)?,
335 };
335 };
336
336
337 match self.inner(py).borrow_mut().pack(parents, now) {
337 match self.inner(py).borrow_mut().pack(parents, now) {
338 Ok(packed) => Ok(PyBytes::new(py, &packed)),
338 Ok(packed) => Ok(PyBytes::new(py, &packed)),
339 Err(_) => Err(PyErr::new::<exc::OSError, _>(
339 Err(_) => Err(PyErr::new::<exc::OSError, _>(
340 py,
340 py,
341 "Dirstate error".to_string(),
341 "Dirstate error".to_string(),
342 )),
342 )),
343 }
343 }
344 }
344 }
345
345
346 def filefoldmapasdict(&self) -> PyResult<PyDict> {
346 def filefoldmapasdict(&self) -> PyResult<PyDict> {
347 let dict = PyDict::new(py);
347 let dict = PyDict::new(py);
348 for (key, value) in
348 for (key, value) in
349 self.inner(py).borrow_mut().build_file_fold_map().iter()
349 self.inner(py).borrow_mut().build_file_fold_map().iter()
350 {
350 {
351 dict.set_item(py, key.as_ref().to_vec(), value.as_ref().to_vec())?;
351 dict.set_item(
352 py,
353 key.as_bytes().to_vec(),
354 value.as_bytes().to_vec(),
355 )?;
352 }
356 }
353 Ok(dict)
357 Ok(dict)
354 }
358 }
355
359
356 def __len__(&self) -> PyResult<usize> {
360 def __len__(&self) -> PyResult<usize> {
357 Ok(self.inner(py).borrow().len())
361 Ok(self.inner(py).borrow().len())
358 }
362 }
359
363
360 def __contains__(&self, key: PyObject) -> PyResult<bool> {
364 def __contains__(&self, key: PyObject) -> PyResult<bool> {
361 let key = key.extract::<PyBytes>(py)?;
365 let key = key.extract::<PyBytes>(py)?;
362 Ok(self.inner(py).borrow().contains_key(HgPath::new(key.data(py))))
366 Ok(self.inner(py).borrow().contains_key(HgPath::new(key.data(py))))
363 }
367 }
364
368
365 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
369 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
366 let key = key.extract::<PyBytes>(py)?;
370 let key = key.extract::<PyBytes>(py)?;
367 let key = HgPath::new(key.data(py));
371 let key = HgPath::new(key.data(py));
368 match self.inner(py).borrow().get(key) {
372 match self.inner(py).borrow().get(key) {
369 Some(entry) => {
373 Some(entry) => {
370 Ok(make_dirstate_tuple(py, entry)?)
374 Ok(make_dirstate_tuple(py, entry)?)
371 },
375 },
372 None => Err(PyErr::new::<exc::KeyError, _>(
376 None => Err(PyErr::new::<exc::KeyError, _>(
373 py,
377 py,
374 String::from_utf8_lossy(key.as_bytes()),
378 String::from_utf8_lossy(key.as_bytes()),
375 )),
379 )),
376 }
380 }
377 }
381 }
378
382
379 def keys(&self) -> PyResult<DirstateMapKeysIterator> {
383 def keys(&self) -> PyResult<DirstateMapKeysIterator> {
380 let leaked_ref = self.inner(py).leak_immutable();
384 let leaked_ref = self.inner(py).leak_immutable();
381 DirstateMapKeysIterator::from_inner(
385 DirstateMapKeysIterator::from_inner(
382 py,
386 py,
383 unsafe { leaked_ref.map(py, |o| o.iter()) },
387 unsafe { leaked_ref.map(py, |o| o.iter()) },
384 )
388 )
385 }
389 }
386
390
387 def items(&self) -> PyResult<DirstateMapItemsIterator> {
391 def items(&self) -> PyResult<DirstateMapItemsIterator> {
388 let leaked_ref = self.inner(py).leak_immutable();
392 let leaked_ref = self.inner(py).leak_immutable();
389 DirstateMapItemsIterator::from_inner(
393 DirstateMapItemsIterator::from_inner(
390 py,
394 py,
391 unsafe { leaked_ref.map(py, |o| o.iter()) },
395 unsafe { leaked_ref.map(py, |o| o.iter()) },
392 )
396 )
393 }
397 }
394
398
395 def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
399 def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
396 let leaked_ref = self.inner(py).leak_immutable();
400 let leaked_ref = self.inner(py).leak_immutable();
397 DirstateMapKeysIterator::from_inner(
401 DirstateMapKeysIterator::from_inner(
398 py,
402 py,
399 unsafe { leaked_ref.map(py, |o| o.iter()) },
403 unsafe { leaked_ref.map(py, |o| o.iter()) },
400 )
404 )
401 }
405 }
402
406
403 def getdirs(&self) -> PyResult<Dirs> {
407 def getdirs(&self) -> PyResult<Dirs> {
404 // TODO don't copy, share the reference
408 // TODO don't copy, share the reference
405 self.inner(py).borrow_mut().set_dirs()
409 self.inner(py).borrow_mut().set_dirs()
406 .map_err(|e| {
410 .map_err(|e| {
407 PyErr::new::<exc::ValueError, _>(py, e.to_string())
411 PyErr::new::<exc::ValueError, _>(py, e.to_string())
408 })?;
412 })?;
409 Dirs::from_inner(
413 Dirs::from_inner(
410 py,
414 py,
411 DirsMultiset::from_dirstate(
415 DirsMultiset::from_dirstate(
412 &self.inner(py).borrow(),
416 &self.inner(py).borrow(),
413 Some(EntryState::Removed),
417 Some(EntryState::Removed),
414 )
418 )
415 .map_err(|e| {
419 .map_err(|e| {
416 PyErr::new::<exc::ValueError, _>(py, e.to_string())
420 PyErr::new::<exc::ValueError, _>(py, e.to_string())
417 })?,
421 })?,
418 )
422 )
419 }
423 }
420 def getalldirs(&self) -> PyResult<Dirs> {
424 def getalldirs(&self) -> PyResult<Dirs> {
421 // TODO don't copy, share the reference
425 // TODO don't copy, share the reference
422 self.inner(py).borrow_mut().set_all_dirs()
426 self.inner(py).borrow_mut().set_all_dirs()
423 .map_err(|e| {
427 .map_err(|e| {
424 PyErr::new::<exc::ValueError, _>(py, e.to_string())
428 PyErr::new::<exc::ValueError, _>(py, e.to_string())
425 })?;
429 })?;
426 Dirs::from_inner(
430 Dirs::from_inner(
427 py,
431 py,
428 DirsMultiset::from_dirstate(
432 DirsMultiset::from_dirstate(
429 &self.inner(py).borrow(),
433 &self.inner(py).borrow(),
430 None,
434 None,
431 ).map_err(|e| {
435 ).map_err(|e| {
432 PyErr::new::<exc::ValueError, _>(py, e.to_string())
436 PyErr::new::<exc::ValueError, _>(py, e.to_string())
433 })?,
437 })?,
434 )
438 )
435 }
439 }
436
440
437 // TODO all copymap* methods, see docstring above
441 // TODO all copymap* methods, see docstring above
438 def copymapcopy(&self) -> PyResult<PyDict> {
442 def copymapcopy(&self) -> PyResult<PyDict> {
439 let dict = PyDict::new(py);
443 let dict = PyDict::new(py);
440 for (key, value) in self.inner(py).borrow().copy_map.iter() {
444 for (key, value) in self.inner(py).borrow().copy_map.iter() {
441 dict.set_item(
445 dict.set_item(
442 py,
446 py,
443 PyBytes::new(py, key.as_ref()),
447 PyBytes::new(py, key.as_bytes()),
444 PyBytes::new(py, value.as_ref()),
448 PyBytes::new(py, value.as_bytes()),
445 )?;
449 )?;
446 }
450 }
447 Ok(dict)
451 Ok(dict)
448 }
452 }
449
453
450 def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
454 def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
451 let key = key.extract::<PyBytes>(py)?;
455 let key = key.extract::<PyBytes>(py)?;
452 match self.inner(py).borrow().copy_map.get(HgPath::new(key.data(py))) {
456 match self.inner(py).borrow().copy_map.get(HgPath::new(key.data(py))) {
453 Some(copy) => Ok(PyBytes::new(py, copy.as_ref())),
457 Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())),
454 None => Err(PyErr::new::<exc::KeyError, _>(
458 None => Err(PyErr::new::<exc::KeyError, _>(
455 py,
459 py,
456 String::from_utf8_lossy(key.data(py)),
460 String::from_utf8_lossy(key.data(py)),
457 )),
461 )),
458 }
462 }
459 }
463 }
460 def copymap(&self) -> PyResult<CopyMap> {
464 def copymap(&self) -> PyResult<CopyMap> {
461 CopyMap::from_inner(py, self.clone_ref(py))
465 CopyMap::from_inner(py, self.clone_ref(py))
462 }
466 }
463
467
464 def copymaplen(&self) -> PyResult<usize> {
468 def copymaplen(&self) -> PyResult<usize> {
465 Ok(self.inner(py).borrow().copy_map.len())
469 Ok(self.inner(py).borrow().copy_map.len())
466 }
470 }
467 def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
471 def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
468 let key = key.extract::<PyBytes>(py)?;
472 let key = key.extract::<PyBytes>(py)?;
469 Ok(self
473 Ok(self
470 .inner(py)
474 .inner(py)
471 .borrow()
475 .borrow()
472 .copy_map
476 .copy_map
473 .contains_key(HgPath::new(key.data(py))))
477 .contains_key(HgPath::new(key.data(py))))
474 }
478 }
475 def copymapget(
479 def copymapget(
476 &self,
480 &self,
477 key: PyObject,
481 key: PyObject,
478 default: Option<PyObject>
482 default: Option<PyObject>
479 ) -> PyResult<Option<PyObject>> {
483 ) -> PyResult<Option<PyObject>> {
480 let key = key.extract::<PyBytes>(py)?;
484 let key = key.extract::<PyBytes>(py)?;
481 match self
485 match self
482 .inner(py)
486 .inner(py)
483 .borrow()
487 .borrow()
484 .copy_map
488 .copy_map
485 .get(HgPath::new(key.data(py)))
489 .get(HgPath::new(key.data(py)))
486 {
490 {
487 Some(copy) => Ok(Some(
491 Some(copy) => Ok(Some(
488 PyBytes::new(py, copy.as_ref()).into_object(),
492 PyBytes::new(py, copy.as_bytes()).into_object(),
489 )),
493 )),
490 None => Ok(default),
494 None => Ok(default),
491 }
495 }
492 }
496 }
493 def copymapsetitem(
497 def copymapsetitem(
494 &self,
498 &self,
495 key: PyObject,
499 key: PyObject,
496 value: PyObject
500 value: PyObject
497 ) -> PyResult<PyObject> {
501 ) -> PyResult<PyObject> {
498 let key = key.extract::<PyBytes>(py)?;
502 let key = key.extract::<PyBytes>(py)?;
499 let value = value.extract::<PyBytes>(py)?;
503 let value = value.extract::<PyBytes>(py)?;
500 self.inner(py).borrow_mut().copy_map.insert(
504 self.inner(py).borrow_mut().copy_map.insert(
501 HgPathBuf::from_bytes(key.data(py)),
505 HgPathBuf::from_bytes(key.data(py)),
502 HgPathBuf::from_bytes(value.data(py)),
506 HgPathBuf::from_bytes(value.data(py)),
503 );
507 );
504 Ok(py.None())
508 Ok(py.None())
505 }
509 }
506 def copymappop(
510 def copymappop(
507 &self,
511 &self,
508 key: PyObject,
512 key: PyObject,
509 default: Option<PyObject>
513 default: Option<PyObject>
510 ) -> PyResult<Option<PyObject>> {
514 ) -> PyResult<Option<PyObject>> {
511 let key = key.extract::<PyBytes>(py)?;
515 let key = key.extract::<PyBytes>(py)?;
512 match self
516 match self
513 .inner(py)
517 .inner(py)
514 .borrow_mut()
518 .borrow_mut()
515 .copy_map
519 .copy_map
516 .remove(HgPath::new(key.data(py)))
520 .remove(HgPath::new(key.data(py)))
517 {
521 {
518 Some(_) => Ok(None),
522 Some(_) => Ok(None),
519 None => Ok(default),
523 None => Ok(default),
520 }
524 }
521 }
525 }
522
526
523 def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
527 def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
524 let leaked_ref = self.inner(py).leak_immutable();
528 let leaked_ref = self.inner(py).leak_immutable();
525 CopyMapKeysIterator::from_inner(
529 CopyMapKeysIterator::from_inner(
526 py,
530 py,
527 unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
531 unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
528 )
532 )
529 }
533 }
530
534
531 def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
535 def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
532 let leaked_ref = self.inner(py).leak_immutable();
536 let leaked_ref = self.inner(py).leak_immutable();
533 CopyMapItemsIterator::from_inner(
537 CopyMapItemsIterator::from_inner(
534 py,
538 py,
535 unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
539 unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
536 )
540 )
537 }
541 }
538
542
539 });
543 });
540
544
541 impl DirstateMap {
545 impl DirstateMap {
542 pub fn get_inner<'a>(
546 pub fn get_inner<'a>(
543 &'a self,
547 &'a self,
544 py: Python<'a>,
548 py: Python<'a>,
545 ) -> Ref<'a, RustDirstateMap> {
549 ) -> Ref<'a, RustDirstateMap> {
546 self.inner(py).borrow()
550 self.inner(py).borrow()
547 }
551 }
548 fn translate_key(
552 fn translate_key(
549 py: Python,
553 py: Python,
550 res: (&HgPathBuf, &DirstateEntry),
554 res: (&HgPathBuf, &DirstateEntry),
551 ) -> PyResult<Option<PyBytes>> {
555 ) -> PyResult<Option<PyBytes>> {
552 Ok(Some(PyBytes::new(py, res.0.as_ref())))
556 Ok(Some(PyBytes::new(py, res.0.as_bytes())))
553 }
557 }
554 fn translate_key_value(
558 fn translate_key_value(
555 py: Python,
559 py: Python,
556 res: (&HgPathBuf, &DirstateEntry),
560 res: (&HgPathBuf, &DirstateEntry),
557 ) -> PyResult<Option<(PyBytes, PyObject)>> {
561 ) -> PyResult<Option<(PyBytes, PyObject)>> {
558 let (f, entry) = res;
562 let (f, entry) = res;
559 Ok(Some((
563 Ok(Some((
560 PyBytes::new(py, f.as_ref()),
564 PyBytes::new(py, f.as_bytes()),
561 make_dirstate_tuple(py, entry)?,
565 make_dirstate_tuple(py, entry)?,
562 )))
566 )))
563 }
567 }
564 }
568 }
565
569
566 py_shared_iterator!(
570 py_shared_iterator!(
567 DirstateMapKeysIterator,
571 DirstateMapKeysIterator,
568 UnsafePyLeaked<StateMapIter<'static>>,
572 UnsafePyLeaked<StateMapIter<'static>>,
569 DirstateMap::translate_key,
573 DirstateMap::translate_key,
570 Option<PyBytes>
574 Option<PyBytes>
571 );
575 );
572
576
573 py_shared_iterator!(
577 py_shared_iterator!(
574 DirstateMapItemsIterator,
578 DirstateMapItemsIterator,
575 UnsafePyLeaked<StateMapIter<'static>>,
579 UnsafePyLeaked<StateMapIter<'static>>,
576 DirstateMap::translate_key_value,
580 DirstateMap::translate_key_value,
577 Option<(PyBytes, PyObject)>
581 Option<(PyBytes, PyObject)>
578 );
582 );
579
583
580 fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<[u8; PARENT_SIZE]> {
584 fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<[u8; PARENT_SIZE]> {
581 let bytes = obj.extract::<PyBytes>(py)?;
585 let bytes = obj.extract::<PyBytes>(py)?;
582 match bytes.data(py).try_into() {
586 match bytes.data(py).try_into() {
583 Ok(s) => Ok(s),
587 Ok(s) => Ok(s),
584 Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
588 Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
585 }
589 }
586 }
590 }
@@ -1,76 +1,76
1 // non_normal_other_parent_entries.rs
1 // non_normal_other_parent_entries.rs
2 //
2 //
3 // Copyright 2020 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2020 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use cpython::{
8 use cpython::{
9 exc::NotImplementedError, CompareOp, ObjectProtocol, PyBytes, PyClone,
9 exc::NotImplementedError, CompareOp, ObjectProtocol, PyBytes, PyClone,
10 PyErr, PyList, PyObject, PyResult, PyString, Python, PythonObject,
10 PyErr, PyList, PyObject, PyResult, PyString, Python, PythonObject,
11 ToPyObject, UnsafePyLeaked,
11 ToPyObject, UnsafePyLeaked,
12 };
12 };
13
13
14 use crate::dirstate::DirstateMap;
14 use crate::dirstate::DirstateMap;
15 use hg::utils::hg_path::HgPathBuf;
15 use hg::utils::hg_path::HgPathBuf;
16 use std::cell::RefCell;
16 use std::cell::RefCell;
17 use std::collections::hash_set;
17 use std::collections::hash_set;
18
18
19 py_class!(pub class NonNormalEntries |py| {
19 py_class!(pub class NonNormalEntries |py| {
20 data dmap: DirstateMap;
20 data dmap: DirstateMap;
21
21
22 def __contains__(&self, key: PyObject) -> PyResult<bool> {
22 def __contains__(&self, key: PyObject) -> PyResult<bool> {
23 self.dmap(py).non_normal_entries_contains(py, key)
23 self.dmap(py).non_normal_entries_contains(py, key)
24 }
24 }
25 def remove(&self, key: PyObject) -> PyResult<PyObject> {
25 def remove(&self, key: PyObject) -> PyResult<PyObject> {
26 self.dmap(py).non_normal_entries_remove(py, key)
26 self.dmap(py).non_normal_entries_remove(py, key)
27 }
27 }
28 def union(&self, other: PyObject) -> PyResult<PyList> {
28 def union(&self, other: PyObject) -> PyResult<PyList> {
29 self.dmap(py).non_normal_entries_union(py, other)
29 self.dmap(py).non_normal_entries_union(py, other)
30 }
30 }
31 def __richcmp__(&self, other: PyObject, op: CompareOp) -> PyResult<bool> {
31 def __richcmp__(&self, other: PyObject, op: CompareOp) -> PyResult<bool> {
32 match op {
32 match op {
33 CompareOp::Eq => self.is_equal_to(py, other),
33 CompareOp::Eq => self.is_equal_to(py, other),
34 CompareOp::Ne => Ok(!self.is_equal_to(py, other)?),
34 CompareOp::Ne => Ok(!self.is_equal_to(py, other)?),
35 _ => Err(PyErr::new::<NotImplementedError, _>(py, ""))
35 _ => Err(PyErr::new::<NotImplementedError, _>(py, ""))
36 }
36 }
37 }
37 }
38 def __repr__(&self) -> PyResult<PyString> {
38 def __repr__(&self) -> PyResult<PyString> {
39 self.dmap(py).non_normal_entries_display(py)
39 self.dmap(py).non_normal_entries_display(py)
40 }
40 }
41
41
42 def __iter__(&self) -> PyResult<NonNormalEntriesIterator> {
42 def __iter__(&self) -> PyResult<NonNormalEntriesIterator> {
43 self.dmap(py).non_normal_entries_iter(py)
43 self.dmap(py).non_normal_entries_iter(py)
44 }
44 }
45 });
45 });
46
46
47 impl NonNormalEntries {
47 impl NonNormalEntries {
48 pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
48 pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
49 Self::create_instance(py, dm)
49 Self::create_instance(py, dm)
50 }
50 }
51
51
52 fn is_equal_to(&self, py: Python, other: PyObject) -> PyResult<bool> {
52 fn is_equal_to(&self, py: Python, other: PyObject) -> PyResult<bool> {
53 for item in other.iter(py)? {
53 for item in other.iter(py)? {
54 if !self.dmap(py).non_normal_entries_contains(py, item?)? {
54 if !self.dmap(py).non_normal_entries_contains(py, item?)? {
55 return Ok(false);
55 return Ok(false);
56 }
56 }
57 }
57 }
58 Ok(true)
58 Ok(true)
59 }
59 }
60
60
61 fn translate_key(
61 fn translate_key(
62 py: Python,
62 py: Python,
63 key: &HgPathBuf,
63 key: &HgPathBuf,
64 ) -> PyResult<Option<PyBytes>> {
64 ) -> PyResult<Option<PyBytes>> {
65 Ok(Some(PyBytes::new(py, key.as_ref())))
65 Ok(Some(PyBytes::new(py, key.as_bytes())))
66 }
66 }
67 }
67 }
68
68
69 type NonNormalEntriesIter<'a> = hash_set::Iter<'a, HgPathBuf>;
69 type NonNormalEntriesIter<'a> = hash_set::Iter<'a, HgPathBuf>;
70
70
71 py_shared_iterator!(
71 py_shared_iterator!(
72 NonNormalEntriesIterator,
72 NonNormalEntriesIterator,
73 UnsafePyLeaked<NonNormalEntriesIter<'static>>,
73 UnsafePyLeaked<NonNormalEntriesIter<'static>>,
74 NonNormalEntries::translate_key,
74 NonNormalEntries::translate_key,
75 Option<PyBytes>
75 Option<PyBytes>
76 );
76 );
@@ -1,303 +1,301
1 // status.rs
1 // status.rs
2 //
2 //
3 // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings for the `hg::status` module provided by the
8 //! Bindings for the `hg::status` module provided by the
9 //! `hg-core` crate. From Python, this will be seen as
9 //! `hg-core` crate. From Python, this will be seen as
10 //! `rustext.dirstate.status`.
10 //! `rustext.dirstate.status`.
11
11
12 use crate::{dirstate::DirstateMap, exceptions::FallbackError};
12 use crate::{dirstate::DirstateMap, exceptions::FallbackError};
13 use cpython::{
13 use cpython::{
14 exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
14 exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
15 PyResult, PyTuple, Python, PythonObject, ToPyObject,
15 PyResult, PyTuple, Python, PythonObject, ToPyObject,
16 };
16 };
17 use hg::{
17 use hg::{
18 matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
18 matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
19 parse_pattern_syntax, status,
19 parse_pattern_syntax, status,
20 utils::{
20 utils::{
21 files::{get_bytes_from_path, get_path_from_bytes},
21 files::{get_bytes_from_path, get_path_from_bytes},
22 hg_path::{HgPath, HgPathBuf},
22 hg_path::{HgPath, HgPathBuf},
23 },
23 },
24 BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
24 BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
25 StatusOptions,
25 StatusOptions,
26 };
26 };
27 use std::borrow::{Borrow, Cow};
27 use std::borrow::{Borrow, Cow};
28
28
29 /// This will be useless once trait impls for collection are added to `PyBytes`
29 /// This will be useless once trait impls for collection are added to `PyBytes`
30 /// upstream.
30 /// upstream.
31 fn collect_pybytes_list(
31 fn collect_pybytes_list(
32 py: Python,
32 py: Python,
33 collection: &[impl AsRef<HgPath>],
33 collection: &[impl AsRef<HgPath>],
34 ) -> PyList {
34 ) -> PyList {
35 let list = PyList::new(py, &[]);
35 let list = PyList::new(py, &[]);
36
36
37 for path in collection.iter() {
37 for path in collection.iter() {
38 list.append(
38 list.append(
39 py,
39 py,
40 PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
40 PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
41 )
41 )
42 }
42 }
43
43
44 list
44 list
45 }
45 }
46
46
47 fn collect_bad_matches(
47 fn collect_bad_matches(
48 py: Python,
48 py: Python,
49 collection: &[(impl AsRef<HgPath>, BadMatch)],
49 collection: &[(impl AsRef<HgPath>, BadMatch)],
50 ) -> PyResult<PyList> {
50 ) -> PyResult<PyList> {
51 let list = PyList::new(py, &[]);
51 let list = PyList::new(py, &[]);
52
52
53 let os = py.import("os")?;
53 let os = py.import("os")?;
54 let get_error_message = |code: i32| -> PyResult<_> {
54 let get_error_message = |code: i32| -> PyResult<_> {
55 os.call(
55 os.call(
56 py,
56 py,
57 "strerror",
57 "strerror",
58 PyTuple::new(py, &[code.to_py_object(py).into_object()]),
58 PyTuple::new(py, &[code.to_py_object(py).into_object()]),
59 None,
59 None,
60 )
60 )
61 };
61 };
62
62
63 for (path, bad_match) in collection.iter() {
63 for (path, bad_match) in collection.iter() {
64 let message = match bad_match {
64 let message = match bad_match {
65 BadMatch::OsError(code) => get_error_message(*code)?,
65 BadMatch::OsError(code) => get_error_message(*code)?,
66 BadMatch::BadType(bad_type) => format!(
66 BadMatch::BadType(bad_type) => format!(
67 "unsupported file type (type is {})",
67 "unsupported file type (type is {})",
68 bad_type.to_string()
68 bad_type.to_string()
69 )
69 )
70 .to_py_object(py)
70 .to_py_object(py)
71 .into_object(),
71 .into_object(),
72 };
72 };
73 list.append(
73 list.append(
74 py,
74 py,
75 (PyBytes::new(py, path.as_ref().as_bytes()), message)
75 (PyBytes::new(py, path.as_ref().as_bytes()), message)
76 .to_py_object(py)
76 .to_py_object(py)
77 .into_object(),
77 .into_object(),
78 )
78 )
79 }
79 }
80
80
81 Ok(list)
81 Ok(list)
82 }
82 }
83
83
84 fn handle_fallback(py: Python, err: StatusError) -> PyErr {
84 fn handle_fallback(py: Python, err: StatusError) -> PyErr {
85 match err {
85 match err {
86 StatusError::Pattern(e) => {
86 StatusError::Pattern(e) => {
87 let as_string = e.to_string();
87 let as_string = e.to_string();
88 log::trace!("Rust status fallback: `{}`", &as_string);
88 log::trace!("Rust status fallback: `{}`", &as_string);
89
89
90 PyErr::new::<FallbackError, _>(py, &as_string)
90 PyErr::new::<FallbackError, _>(py, &as_string)
91 }
91 }
92 e => PyErr::new::<ValueError, _>(py, e.to_string()),
92 e => PyErr::new::<ValueError, _>(py, e.to_string()),
93 }
93 }
94 }
94 }
95
95
96 pub fn status_wrapper(
96 pub fn status_wrapper(
97 py: Python,
97 py: Python,
98 dmap: DirstateMap,
98 dmap: DirstateMap,
99 matcher: PyObject,
99 matcher: PyObject,
100 root_dir: PyObject,
100 root_dir: PyObject,
101 ignore_files: PyList,
101 ignore_files: PyList,
102 check_exec: bool,
102 check_exec: bool,
103 last_normal_time: i64,
103 last_normal_time: i64,
104 list_clean: bool,
104 list_clean: bool,
105 list_ignored: bool,
105 list_ignored: bool,
106 list_unknown: bool,
106 list_unknown: bool,
107 collect_traversed_dirs: bool,
107 collect_traversed_dirs: bool,
108 ) -> PyResult<PyTuple> {
108 ) -> PyResult<PyTuple> {
109 let bytes = root_dir.extract::<PyBytes>(py)?;
109 let bytes = root_dir.extract::<PyBytes>(py)?;
110 let root_dir = get_path_from_bytes(bytes.data(py));
110 let root_dir = get_path_from_bytes(bytes.data(py));
111
111
112 let dmap: DirstateMap = dmap.to_py_object(py);
112 let dmap: DirstateMap = dmap.to_py_object(py);
113 let dmap = dmap.get_inner(py);
113 let dmap = dmap.get_inner(py);
114
114
115 let ignore_files: PyResult<Vec<_>> = ignore_files
115 let ignore_files: PyResult<Vec<_>> = ignore_files
116 .iter(py)
116 .iter(py)
117 .map(|b| {
117 .map(|b| {
118 let file = b.extract::<PyBytes>(py)?;
118 let file = b.extract::<PyBytes>(py)?;
119 Ok(get_path_from_bytes(file.data(py)).to_owned())
119 Ok(get_path_from_bytes(file.data(py)).to_owned())
120 })
120 })
121 .collect();
121 .collect();
122 let ignore_files = ignore_files?;
122 let ignore_files = ignore_files?;
123
123
124 match matcher.get_type(py).name(py).borrow() {
124 match matcher.get_type(py).name(py).borrow() {
125 "alwaysmatcher" => {
125 "alwaysmatcher" => {
126 let matcher = AlwaysMatcher;
126 let matcher = AlwaysMatcher;
127 let ((lookup, status_res), warnings) = status(
127 let ((lookup, status_res), warnings) = status(
128 &dmap,
128 &dmap,
129 &matcher,
129 &matcher,
130 &root_dir,
130 &root_dir,
131 ignore_files,
131 ignore_files,
132 StatusOptions {
132 StatusOptions {
133 check_exec,
133 check_exec,
134 last_normal_time,
134 last_normal_time,
135 list_clean,
135 list_clean,
136 list_ignored,
136 list_ignored,
137 list_unknown,
137 list_unknown,
138 collect_traversed_dirs,
138 collect_traversed_dirs,
139 },
139 },
140 )
140 )
141 .map_err(|e| handle_fallback(py, e))?;
141 .map_err(|e| handle_fallback(py, e))?;
142 build_response(py, lookup, status_res, warnings)
142 build_response(py, lookup, status_res, warnings)
143 }
143 }
144 "exactmatcher" => {
144 "exactmatcher" => {
145 let files = matcher.call_method(
145 let files = matcher.call_method(
146 py,
146 py,
147 "files",
147 "files",
148 PyTuple::new(py, &[]),
148 PyTuple::new(py, &[]),
149 None,
149 None,
150 )?;
150 )?;
151 let files: PyList = files.cast_into(py)?;
151 let files: PyList = files.cast_into(py)?;
152 let files: PyResult<Vec<HgPathBuf>> = files
152 let files: PyResult<Vec<HgPathBuf>> = files
153 .iter(py)
153 .iter(py)
154 .map(|f| {
154 .map(|f| {
155 Ok(HgPathBuf::from_bytes(
155 Ok(HgPathBuf::from_bytes(
156 f.extract::<PyBytes>(py)?.data(py),
156 f.extract::<PyBytes>(py)?.data(py),
157 ))
157 ))
158 })
158 })
159 .collect();
159 .collect();
160
160
161 let files = files?;
161 let files = files?;
162 let matcher = FileMatcher::new(&files)
162 let matcher = FileMatcher::new(&files)
163 .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
163 .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
164 let ((lookup, status_res), warnings) = status(
164 let ((lookup, status_res), warnings) = status(
165 &dmap,
165 &dmap,
166 &matcher,
166 &matcher,
167 &root_dir,
167 &root_dir,
168 ignore_files,
168 ignore_files,
169 StatusOptions {
169 StatusOptions {
170 check_exec,
170 check_exec,
171 last_normal_time,
171 last_normal_time,
172 list_clean,
172 list_clean,
173 list_ignored,
173 list_ignored,
174 list_unknown,
174 list_unknown,
175 collect_traversed_dirs,
175 collect_traversed_dirs,
176 },
176 },
177 )
177 )
178 .map_err(|e| handle_fallback(py, e))?;
178 .map_err(|e| handle_fallback(py, e))?;
179 build_response(py, lookup, status_res, warnings)
179 build_response(py, lookup, status_res, warnings)
180 }
180 }
181 "includematcher" => {
181 "includematcher" => {
182 // Get the patterns from Python even though most of them are
182 // Get the patterns from Python even though most of them are
183 // redundant with those we will parse later on, as they include
183 // redundant with those we will parse later on, as they include
184 // those passed from the command line.
184 // those passed from the command line.
185 let ignore_patterns: PyResult<Vec<_>> = matcher
185 let ignore_patterns: PyResult<Vec<_>> = matcher
186 .getattr(py, "_kindpats")?
186 .getattr(py, "_kindpats")?
187 .iter(py)?
187 .iter(py)?
188 .map(|k| {
188 .map(|k| {
189 let k = k?;
189 let k = k?;
190 let syntax = parse_pattern_syntax(
190 let syntax = parse_pattern_syntax(
191 &[
191 &[
192 k.get_item(py, 0)?
192 k.get_item(py, 0)?
193 .extract::<PyBytes>(py)?
193 .extract::<PyBytes>(py)?
194 .data(py),
194 .data(py),
195 &b":"[..],
195 &b":"[..],
196 ]
196 ]
197 .concat(),
197 .concat(),
198 )
198 )
199 .map_err(|e| {
199 .map_err(|e| {
200 handle_fallback(py, StatusError::Pattern(e))
200 handle_fallback(py, StatusError::Pattern(e))
201 })?;
201 })?;
202 let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
202 let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
203 let pattern = pattern.data(py);
203 let pattern = pattern.data(py);
204 let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
204 let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
205 let source = get_path_from_bytes(source.data(py));
205 let source = get_path_from_bytes(source.data(py));
206 let new = IgnorePattern::new(syntax, pattern, source);
206 let new = IgnorePattern::new(syntax, pattern, source);
207 Ok(new)
207 Ok(new)
208 })
208 })
209 .collect();
209 .collect();
210
210
211 let ignore_patterns = ignore_patterns?;
211 let ignore_patterns = ignore_patterns?;
212 let mut all_warnings = vec![];
212 let mut all_warnings = vec![];
213
213
214 let (matcher, warnings) =
214 let (matcher, warnings) =
215 IncludeMatcher::new(ignore_patterns, &root_dir)
215 IncludeMatcher::new(ignore_patterns, &root_dir)
216 .map_err(|e| handle_fallback(py, e.into()))?;
216 .map_err(|e| handle_fallback(py, e.into()))?;
217 all_warnings.extend(warnings);
217 all_warnings.extend(warnings);
218
218
219 let ((lookup, status_res), warnings) = status(
219 let ((lookup, status_res), warnings) = status(
220 &dmap,
220 &dmap,
221 &matcher,
221 &matcher,
222 &root_dir,
222 &root_dir,
223 ignore_files,
223 ignore_files,
224 StatusOptions {
224 StatusOptions {
225 check_exec,
225 check_exec,
226 last_normal_time,
226 last_normal_time,
227 list_clean,
227 list_clean,
228 list_ignored,
228 list_ignored,
229 list_unknown,
229 list_unknown,
230 collect_traversed_dirs,
230 collect_traversed_dirs,
231 },
231 },
232 )
232 )
233 .map_err(|e| handle_fallback(py, e))?;
233 .map_err(|e| handle_fallback(py, e))?;
234
234
235 all_warnings.extend(warnings);
235 all_warnings.extend(warnings);
236
236
237 build_response(py, lookup, status_res, all_warnings)
237 build_response(py, lookup, status_res, all_warnings)
238 }
238 }
239 e => {
239 e => Err(PyErr::new::<ValueError, _>(
240 return Err(PyErr::new::<ValueError, _>(
240 py,
241 py,
241 format!("Unsupported matcher {}", e),
242 format!("Unsupported matcher {}", e),
242 )),
243 ));
244 }
245 }
243 }
246 }
244 }
247
245
248 fn build_response(
246 fn build_response(
249 py: Python,
247 py: Python,
250 lookup: Vec<Cow<HgPath>>,
248 lookup: Vec<Cow<HgPath>>,
251 status_res: DirstateStatus,
249 status_res: DirstateStatus,
252 warnings: Vec<PatternFileWarning>,
250 warnings: Vec<PatternFileWarning>,
253 ) -> PyResult<PyTuple> {
251 ) -> PyResult<PyTuple> {
254 let modified = collect_pybytes_list(py, status_res.modified.as_ref());
252 let modified = collect_pybytes_list(py, status_res.modified.as_ref());
255 let added = collect_pybytes_list(py, status_res.added.as_ref());
253 let added = collect_pybytes_list(py, status_res.added.as_ref());
256 let removed = collect_pybytes_list(py, status_res.removed.as_ref());
254 let removed = collect_pybytes_list(py, status_res.removed.as_ref());
257 let deleted = collect_pybytes_list(py, status_res.deleted.as_ref());
255 let deleted = collect_pybytes_list(py, status_res.deleted.as_ref());
258 let clean = collect_pybytes_list(py, status_res.clean.as_ref());
256 let clean = collect_pybytes_list(py, status_res.clean.as_ref());
259 let ignored = collect_pybytes_list(py, status_res.ignored.as_ref());
257 let ignored = collect_pybytes_list(py, status_res.ignored.as_ref());
260 let unknown = collect_pybytes_list(py, status_res.unknown.as_ref());
258 let unknown = collect_pybytes_list(py, status_res.unknown.as_ref());
261 let lookup = collect_pybytes_list(py, lookup.as_ref());
259 let lookup = collect_pybytes_list(py, lookup.as_ref());
262 let bad = collect_bad_matches(py, status_res.bad.as_ref())?;
260 let bad = collect_bad_matches(py, status_res.bad.as_ref())?;
263 let traversed = collect_pybytes_list(py, status_res.traversed.as_ref());
261 let traversed = collect_pybytes_list(py, status_res.traversed.as_ref());
264 let py_warnings = PyList::new(py, &[]);
262 let py_warnings = PyList::new(py, &[]);
265 for warning in warnings.iter() {
263 for warning in warnings.iter() {
266 // We use duck-typing on the Python side for dispatch, good enough for
264 // We use duck-typing on the Python side for dispatch, good enough for
267 // now.
265 // now.
268 match warning {
266 match warning {
269 PatternFileWarning::InvalidSyntax(file, syn) => {
267 PatternFileWarning::InvalidSyntax(file, syn) => {
270 py_warnings.append(
268 py_warnings.append(
271 py,
269 py,
272 (
270 (
273 PyBytes::new(py, &get_bytes_from_path(&file)),
271 PyBytes::new(py, &get_bytes_from_path(&file)),
274 PyBytes::new(py, syn),
272 PyBytes::new(py, syn),
275 )
273 )
276 .to_py_object(py)
274 .to_py_object(py)
277 .into_object(),
275 .into_object(),
278 );
276 );
279 }
277 }
280 PatternFileWarning::NoSuchFile(file) => py_warnings.append(
278 PatternFileWarning::NoSuchFile(file) => py_warnings.append(
281 py,
279 py,
282 PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
280 PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
283 ),
281 ),
284 }
282 }
285 }
283 }
286
284
287 Ok(PyTuple::new(
285 Ok(PyTuple::new(
288 py,
286 py,
289 &[
287 &[
290 lookup.into_object(),
288 lookup.into_object(),
291 modified.into_object(),
289 modified.into_object(),
292 added.into_object(),
290 added.into_object(),
293 removed.into_object(),
291 removed.into_object(),
294 deleted.into_object(),
292 deleted.into_object(),
295 clean.into_object(),
293 clean.into_object(),
296 ignored.into_object(),
294 ignored.into_object(),
297 unknown.into_object(),
295 unknown.into_object(),
298 py_warnings.into_object(),
296 py_warnings.into_object(),
299 bad.into_object(),
297 bad.into_object(),
300 traversed.into_object(),
298 traversed.into_object(),
301 ][..],
299 ][..],
302 ))
300 ))
303 }
301 }
@@ -1,175 +1,175
1 // parsers.rs
1 // parsers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings for the `hg::dirstate::parsers` module provided by the
8 //! Bindings for the `hg::dirstate::parsers` module provided by the
9 //! `hg-core` package.
9 //! `hg-core` package.
10 //!
10 //!
11 //! From Python, this will be seen as `mercurial.rustext.parsers`
11 //! From Python, this will be seen as `mercurial.rustext.parsers`
12 use cpython::{
12 use cpython::{
13 exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyResult, PyTuple, Python,
13 exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyResult, PyTuple, Python,
14 PythonObject, ToPyObject,
14 PythonObject, ToPyObject,
15 };
15 };
16 use hg::{
16 use hg::{
17 pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
17 pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
18 DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
18 DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
19 PARENT_SIZE,
19 PARENT_SIZE,
20 };
20 };
21 use std::convert::TryInto;
21 use std::convert::TryInto;
22
22
23 use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
23 use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
24 use std::time::Duration;
24 use std::time::Duration;
25
25
26 fn parse_dirstate_wrapper(
26 fn parse_dirstate_wrapper(
27 py: Python,
27 py: Python,
28 dmap: PyDict,
28 dmap: PyDict,
29 copymap: PyDict,
29 copymap: PyDict,
30 st: PyBytes,
30 st: PyBytes,
31 ) -> PyResult<PyTuple> {
31 ) -> PyResult<PyTuple> {
32 let mut dirstate_map = FastHashMap::default();
32 let mut dirstate_map = FastHashMap::default();
33 let mut copies = FastHashMap::default();
33 let mut copies = FastHashMap::default();
34
34
35 match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
35 match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
36 Ok(parents) => {
36 Ok(parents) => {
37 for (filename, entry) in &dirstate_map {
37 for (filename, entry) in &dirstate_map {
38 dmap.set_item(
38 dmap.set_item(
39 py,
39 py,
40 PyBytes::new(py, filename.as_ref()),
40 PyBytes::new(py, filename.as_bytes()),
41 make_dirstate_tuple(py, entry)?,
41 make_dirstate_tuple(py, entry)?,
42 )?;
42 )?;
43 }
43 }
44 for (path, copy_path) in copies {
44 for (path, copy_path) in copies {
45 copymap.set_item(
45 copymap.set_item(
46 py,
46 py,
47 PyBytes::new(py, path.as_ref()),
47 PyBytes::new(py, path.as_bytes()),
48 PyBytes::new(py, copy_path.as_ref()),
48 PyBytes::new(py, copy_path.as_bytes()),
49 )?;
49 )?;
50 }
50 }
51 Ok(
51 Ok(
52 (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
52 (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
53 .to_py_object(py),
53 .to_py_object(py),
54 )
54 )
55 }
55 }
56 Err(e) => Err(PyErr::new::<exc::ValueError, _>(
56 Err(e) => Err(PyErr::new::<exc::ValueError, _>(
57 py,
57 py,
58 match e {
58 match e {
59 DirstateParseError::TooLittleData => {
59 DirstateParseError::TooLittleData => {
60 "too little data for parents".to_string()
60 "too little data for parents".to_string()
61 }
61 }
62 DirstateParseError::Overflow => {
62 DirstateParseError::Overflow => {
63 "overflow in dirstate".to_string()
63 "overflow in dirstate".to_string()
64 }
64 }
65 DirstateParseError::CorruptedEntry(e) => e,
65 DirstateParseError::CorruptedEntry(e) => e,
66 DirstateParseError::Damaged => {
66 DirstateParseError::Damaged => {
67 "dirstate appears to be damaged".to_string()
67 "dirstate appears to be damaged".to_string()
68 }
68 }
69 },
69 },
70 )),
70 )),
71 }
71 }
72 }
72 }
73
73
74 fn pack_dirstate_wrapper(
74 fn pack_dirstate_wrapper(
75 py: Python,
75 py: Python,
76 dmap: PyDict,
76 dmap: PyDict,
77 copymap: PyDict,
77 copymap: PyDict,
78 pl: PyTuple,
78 pl: PyTuple,
79 now: PyInt,
79 now: PyInt,
80 ) -> PyResult<PyBytes> {
80 ) -> PyResult<PyBytes> {
81 let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
81 let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
82 let p1: &[u8] = p1.data(py);
82 let p1: &[u8] = p1.data(py);
83 let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
83 let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
84 let p2: &[u8] = p2.data(py);
84 let p2: &[u8] = p2.data(py);
85
85
86 let mut dirstate_map = extract_dirstate(py, &dmap)?;
86 let mut dirstate_map = extract_dirstate(py, &dmap)?;
87
87
88 let copies: Result<FastHashMap<HgPathBuf, HgPathBuf>, PyErr> = copymap
88 let copies: Result<FastHashMap<HgPathBuf, HgPathBuf>, PyErr> = copymap
89 .items(py)
89 .items(py)
90 .iter()
90 .iter()
91 .map(|(key, value)| {
91 .map(|(key, value)| {
92 Ok((
92 Ok((
93 HgPathBuf::from_bytes(key.extract::<PyBytes>(py)?.data(py)),
93 HgPathBuf::from_bytes(key.extract::<PyBytes>(py)?.data(py)),
94 HgPathBuf::from_bytes(value.extract::<PyBytes>(py)?.data(py)),
94 HgPathBuf::from_bytes(value.extract::<PyBytes>(py)?.data(py)),
95 ))
95 ))
96 })
96 })
97 .collect();
97 .collect();
98
98
99 if p1.len() != PARENT_SIZE || p2.len() != PARENT_SIZE {
99 if p1.len() != PARENT_SIZE || p2.len() != PARENT_SIZE {
100 return Err(PyErr::new::<exc::ValueError, _>(
100 return Err(PyErr::new::<exc::ValueError, _>(
101 py,
101 py,
102 "expected a 20-byte hash".to_string(),
102 "expected a 20-byte hash".to_string(),
103 ));
103 ));
104 }
104 }
105
105
106 match pack_dirstate(
106 match pack_dirstate(
107 &mut dirstate_map,
107 &mut dirstate_map,
108 &copies?,
108 &copies?,
109 DirstateParents {
109 DirstateParents {
110 p1: p1.try_into().unwrap(),
110 p1: p1.try_into().unwrap(),
111 p2: p2.try_into().unwrap(),
111 p2: p2.try_into().unwrap(),
112 },
112 },
113 Duration::from_secs(now.as_object().extract::<u64>(py)?),
113 Duration::from_secs(now.as_object().extract::<u64>(py)?),
114 ) {
114 ) {
115 Ok(packed) => {
115 Ok(packed) => {
116 for (filename, entry) in &dirstate_map {
116 for (filename, entry) in &dirstate_map {
117 dmap.set_item(
117 dmap.set_item(
118 py,
118 py,
119 PyBytes::new(py, filename.as_ref()),
119 PyBytes::new(py, filename.as_bytes()),
120 make_dirstate_tuple(py, entry)?,
120 make_dirstate_tuple(py, entry)?,
121 )?;
121 )?;
122 }
122 }
123 Ok(PyBytes::new(py, &packed))
123 Ok(PyBytes::new(py, &packed))
124 }
124 }
125 Err(error) => Err(PyErr::new::<exc::ValueError, _>(
125 Err(error) => Err(PyErr::new::<exc::ValueError, _>(
126 py,
126 py,
127 match error {
127 match error {
128 DirstatePackError::CorruptedParent => {
128 DirstatePackError::CorruptedParent => {
129 "expected a 20-byte hash".to_string()
129 "expected a 20-byte hash".to_string()
130 }
130 }
131 DirstatePackError::CorruptedEntry(e) => e,
131 DirstatePackError::CorruptedEntry(e) => e,
132 DirstatePackError::BadSize(expected, actual) => {
132 DirstatePackError::BadSize(expected, actual) => {
133 format!("bad dirstate size: {} != {}", actual, expected)
133 format!("bad dirstate size: {} != {}", actual, expected)
134 }
134 }
135 },
135 },
136 )),
136 )),
137 }
137 }
138 }
138 }
139
139
140 /// Create the module, with `__package__` given from parent
140 /// Create the module, with `__package__` given from parent
141 pub fn init_parsers_module(py: Python, package: &str) -> PyResult<PyModule> {
141 pub fn init_parsers_module(py: Python, package: &str) -> PyResult<PyModule> {
142 let dotted_name = &format!("{}.parsers", package);
142 let dotted_name = &format!("{}.parsers", package);
143 let m = PyModule::new(py, dotted_name)?;
143 let m = PyModule::new(py, dotted_name)?;
144
144
145 m.add(py, "__package__", package)?;
145 m.add(py, "__package__", package)?;
146 m.add(py, "__doc__", "Parsers - Rust implementation")?;
146 m.add(py, "__doc__", "Parsers - Rust implementation")?;
147
147
148 m.add(
148 m.add(
149 py,
149 py,
150 "parse_dirstate",
150 "parse_dirstate",
151 py_fn!(
151 py_fn!(
152 py,
152 py,
153 parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
153 parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
154 ),
154 ),
155 )?;
155 )?;
156 m.add(
156 m.add(
157 py,
157 py,
158 "pack_dirstate",
158 "pack_dirstate",
159 py_fn!(
159 py_fn!(
160 py,
160 py,
161 pack_dirstate_wrapper(
161 pack_dirstate_wrapper(
162 dmap: PyDict,
162 dmap: PyDict,
163 copymap: PyDict,
163 copymap: PyDict,
164 pl: PyTuple,
164 pl: PyTuple,
165 now: PyInt
165 now: PyInt
166 )
166 )
167 ),
167 ),
168 )?;
168 )?;
169
169
170 let sys = PyModule::import(py, "sys")?;
170 let sys = PyModule::import(py, "sys")?;
171 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
171 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
172 sys_modules.set_item(py, dotted_name, &m)?;
172 sys_modules.set_item(py, dotted_name, &m)?;
173
173
174 Ok(m)
174 Ok(m)
175 }
175 }
@@ -1,47 +1,44
1 use cpython::exc::ValueError;
1 use cpython::exc::ValueError;
2 use cpython::{PyBytes, PyDict, PyErr, PyObject, PyResult, PyTuple, Python};
2 use cpython::{PyBytes, PyDict, PyErr, PyObject, PyResult, PyTuple, Python};
3 use hg::revlog::Node;
3 use hg::revlog::Node;
4 use std::convert::TryFrom;
4 use std::convert::TryFrom;
5
5
6 #[allow(unused)]
6 #[allow(unused)]
7 pub fn print_python_trace(py: Python) -> PyResult<PyObject> {
7 pub fn print_python_trace(py: Python) -> PyResult<PyObject> {
8 eprintln!("===============================");
8 eprintln!("===============================");
9 eprintln!("Printing Python stack from Rust");
9 eprintln!("Printing Python stack from Rust");
10 eprintln!("===============================");
10 eprintln!("===============================");
11 let traceback = py.import("traceback")?;
11 let traceback = py.import("traceback")?;
12 let sys = py.import("sys")?;
12 let sys = py.import("sys")?;
13 let kwargs = PyDict::new(py);
13 let kwargs = PyDict::new(py);
14 kwargs.set_item(py, "file", sys.get(py, "stderr")?)?;
14 kwargs.set_item(py, "file", sys.get(py, "stderr")?)?;
15 traceback.call(py, "print_stack", PyTuple::new(py, &[]), Some(&kwargs))
15 traceback.call(py, "print_stack", PyTuple::new(py, &[]), Some(&kwargs))
16 }
16 }
17
17
18 // Necessary evil for the time being, could maybe be moved to
18 // Necessary evil for the time being, could maybe be moved to
19 // a TryFrom in Node itself
19 // a TryFrom in Node itself
20 const NODE_BYTES_LENGTH: usize = 20;
20 const NODE_BYTES_LENGTH: usize = 20;
21 type NodeData = [u8; NODE_BYTES_LENGTH];
21 type NodeData = [u8; NODE_BYTES_LENGTH];
22
22
23 /// Copy incoming Python bytes given as `PyObject` into `Node`,
23 /// Copy incoming Python bytes given as `PyObject` into `Node`,
24 /// doing the necessary checks
24 /// doing the necessary checks
25 pub fn node_from_py_object<'a>(
25 pub fn node_from_py_object<'a>(
26 py: Python,
26 py: Python,
27 bytes: &'a PyObject,
27 bytes: &'a PyObject,
28 ) -> PyResult<Node> {
28 ) -> PyResult<Node> {
29 let as_py_bytes: &'a PyBytes = bytes.extract(py)?;
29 let as_py_bytes: &'a PyBytes = bytes.extract(py)?;
30 node_from_py_bytes(py, as_py_bytes)
30 node_from_py_bytes(py, as_py_bytes)
31 }
31 }
32
32
33 /// Clone incoming Python bytes given as `PyBytes` as a `Node`,
33 /// Clone incoming Python bytes given as `PyBytes` as a `Node`,
34 /// doing the necessary checks.
34 /// doing the necessary checks.
35 pub fn node_from_py_bytes<'a>(
35 pub fn node_from_py_bytes(py: Python, bytes: &PyBytes) -> PyResult<Node> {
36 py: Python,
37 bytes: &'a PyBytes,
38 ) -> PyResult<Node> {
39 <NodeData>::try_from(bytes.data(py))
36 <NodeData>::try_from(bytes.data(py))
40 .map_err(|_| {
37 .map_err(|_| {
41 PyErr::new::<ValueError, _>(
38 PyErr::new::<ValueError, _>(
42 py,
39 py,
43 format!("{}-byte hash required", NODE_BYTES_LENGTH),
40 format!("{}-byte hash required", NODE_BYTES_LENGTH),
44 )
41 )
45 })
42 })
46 .map(|n| n.into())
43 .map(Into::into)
47 }
44 }
General Comments 0
You need to be logged in to leave comments. Login now