##// END OF EJS Templates
rust-clippy: fix most warnings in `hg-core`...
Raphaël Gomès -
r50825:e98fd81b default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -1,701 +1,702 b''
1 // ancestors.rs
1 // ancestors.rs
2 //
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9
9
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 use crate::dagops;
11 use crate::dagops;
12 use std::cmp::max;
12 use std::cmp::max;
13 use std::collections::{BinaryHeap, HashSet};
13 use std::collections::{BinaryHeap, HashSet};
14
14
15 /// Iterator over the ancestors of a given list of revisions
15 /// Iterator over the ancestors of a given list of revisions
16 /// This is a generic type, defined and implemented for any Graph, so that
16 /// This is a generic type, defined and implemented for any Graph, so that
17 /// it's easy to
17 /// it's easy to
18 ///
18 ///
19 /// - unit test in pure Rust
19 /// - unit test in pure Rust
20 /// - bind to main Mercurial code, potentially in several ways and have these
20 /// - bind to main Mercurial code, potentially in several ways and have these
21 /// bindings evolve over time
21 /// bindings evolve over time
22 pub struct AncestorsIterator<G: Graph> {
22 pub struct AncestorsIterator<G: Graph> {
23 graph: G,
23 graph: G,
24 visit: BinaryHeap<Revision>,
24 visit: BinaryHeap<Revision>,
25 seen: HashSet<Revision>,
25 seen: HashSet<Revision>,
26 stoprev: Revision,
26 stoprev: Revision,
27 }
27 }
28
28
29 pub struct MissingAncestors<G: Graph> {
29 pub struct MissingAncestors<G: Graph> {
30 graph: G,
30 graph: G,
31 bases: HashSet<Revision>,
31 bases: HashSet<Revision>,
32 max_base: Revision,
32 max_base: Revision,
33 }
33 }
34
34
35 impl<G: Graph> AncestorsIterator<G> {
35 impl<G: Graph> AncestorsIterator<G> {
36 /// Constructor.
36 /// Constructor.
37 ///
37 ///
38 /// if `inclusive` is true, then the init revisions are emitted in
38 /// if `inclusive` is true, then the init revisions are emitted in
39 /// particular, otherwise iteration starts from their parents.
39 /// particular, otherwise iteration starts from their parents.
40 pub fn new(
40 pub fn new(
41 graph: G,
41 graph: G,
42 initrevs: impl IntoIterator<Item = Revision>,
42 initrevs: impl IntoIterator<Item = Revision>,
43 stoprev: Revision,
43 stoprev: Revision,
44 inclusive: bool,
44 inclusive: bool,
45 ) -> Result<Self, GraphError> {
45 ) -> Result<Self, GraphError> {
46 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
46 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
47 if inclusive {
47 if inclusive {
48 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
48 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
49 let seen = visit.iter().cloned().collect();
49 let seen = visit.iter().cloned().collect();
50 return Ok(AncestorsIterator {
50 return Ok(AncestorsIterator {
51 visit,
51 visit,
52 seen,
52 seen,
53 stoprev,
53 stoprev,
54 graph,
54 graph,
55 });
55 });
56 }
56 }
57 let mut this = AncestorsIterator {
57 let mut this = AncestorsIterator {
58 visit: BinaryHeap::new(),
58 visit: BinaryHeap::new(),
59 seen: HashSet::new(),
59 seen: HashSet::new(),
60 stoprev,
60 stoprev,
61 graph,
61 graph,
62 };
62 };
63 this.seen.insert(NULL_REVISION);
63 this.seen.insert(NULL_REVISION);
64 for rev in filtered_initrevs {
64 for rev in filtered_initrevs {
65 for parent in this.graph.parents(rev)?.iter().cloned() {
65 for parent in this.graph.parents(rev)?.iter().cloned() {
66 this.conditionally_push_rev(parent);
66 this.conditionally_push_rev(parent);
67 }
67 }
68 }
68 }
69 Ok(this)
69 Ok(this)
70 }
70 }
71
71
72 #[inline]
72 #[inline]
73 fn conditionally_push_rev(&mut self, rev: Revision) {
73 fn conditionally_push_rev(&mut self, rev: Revision) {
74 if self.stoprev <= rev && self.seen.insert(rev) {
74 if self.stoprev <= rev && self.seen.insert(rev) {
75 self.visit.push(rev);
75 self.visit.push(rev);
76 }
76 }
77 }
77 }
78
78
79 /// Consumes partially the iterator to tell if the given target
79 /// Consumes partially the iterator to tell if the given target
80 /// revision
80 /// revision
81 /// is in the ancestors it emits.
81 /// is in the ancestors it emits.
82 /// This is meant for iterators actually dedicated to that kind of
82 /// This is meant for iterators actually dedicated to that kind of
83 /// purpose
83 /// purpose
84 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
84 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
85 if self.seen.contains(&target) && target != NULL_REVISION {
85 if self.seen.contains(&target) && target != NULL_REVISION {
86 return Ok(true);
86 return Ok(true);
87 }
87 }
88 for item in self {
88 for item in self {
89 let rev = item?;
89 let rev = item?;
90 if rev == target {
90 if rev == target {
91 return Ok(true);
91 return Ok(true);
92 }
92 }
93 if rev < target {
93 if rev < target {
94 return Ok(false);
94 return Ok(false);
95 }
95 }
96 }
96 }
97 Ok(false)
97 Ok(false)
98 }
98 }
99
99
100 pub fn peek(&self) -> Option<Revision> {
100 pub fn peek(&self) -> Option<Revision> {
101 self.visit.peek().cloned()
101 self.visit.peek().cloned()
102 }
102 }
103
103
104 /// Tell if the iterator is about an empty set
104 /// Tell if the iterator is about an empty set
105 ///
105 ///
106 /// The result does not depend whether the iterator has been consumed
106 /// The result does not depend whether the iterator has been consumed
107 /// or not.
107 /// or not.
108 /// This is mostly meant for iterators backing a lazy ancestors set
108 /// This is mostly meant for iterators backing a lazy ancestors set
109 pub fn is_empty(&self) -> bool {
109 pub fn is_empty(&self) -> bool {
110 if self.visit.len() > 0 {
110 if self.visit.len() > 0 {
111 return false;
111 return false;
112 }
112 }
113 if self.seen.len() > 1 {
113 if self.seen.len() > 1 {
114 return false;
114 return false;
115 }
115 }
116 // at this point, the seen set is at most a singleton.
116 // at this point, the seen set is at most a singleton.
117 // If not `self.inclusive`, it's still possible that it has only
117 // If not `self.inclusive`, it's still possible that it has only
118 // the null revision
118 // the null revision
119 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
119 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
120 }
120 }
121 }
121 }
122
122
123 /// Main implementation for the iterator
123 /// Main implementation for the iterator
124 ///
124 ///
125 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
125 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
126 /// with a few non crucial differences:
126 /// with a few non crucial differences:
127 ///
127 ///
128 /// - there's no filtering of invalid parent revisions. Actually, it should be
128 /// - there's no filtering of invalid parent revisions. Actually, it should be
129 /// consistent and more efficient to filter them from the end caller.
129 /// consistent and more efficient to filter them from the end caller.
130 /// - we don't have the optimization for adjacent revisions (i.e., the case
130 /// - we don't have the optimization for adjacent revisions (i.e., the case
131 /// where `p1 == rev - 1`), because it amounts to update the first element of
131 /// where `p1 == rev - 1`), because it amounts to update the first element of
132 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
132 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
133 /// - we save a few pushes by comparing with `stoprev` before pushing
133 /// - we save a few pushes by comparing with `stoprev` before pushing
134 impl<G: Graph> Iterator for AncestorsIterator<G> {
134 impl<G: Graph> Iterator for AncestorsIterator<G> {
135 type Item = Result<Revision, GraphError>;
135 type Item = Result<Revision, GraphError>;
136
136
137 fn next(&mut self) -> Option<Self::Item> {
137 fn next(&mut self) -> Option<Self::Item> {
138 let current = match self.visit.peek() {
138 let current = match self.visit.peek() {
139 None => {
139 None => {
140 return None;
140 return None;
141 }
141 }
142 Some(c) => *c,
142 Some(c) => *c,
143 };
143 };
144 let [p1, p2] = match self.graph.parents(current) {
144 let [p1, p2] = match self.graph.parents(current) {
145 Ok(ps) => ps,
145 Ok(ps) => ps,
146 Err(e) => return Some(Err(e)),
146 Err(e) => return Some(Err(e)),
147 };
147 };
148 if p1 < self.stoprev || !self.seen.insert(p1) {
148 if p1 < self.stoprev || !self.seen.insert(p1) {
149 self.visit.pop();
149 self.visit.pop();
150 } else {
150 } else {
151 *(self.visit.peek_mut().unwrap()) = p1;
151 *(self.visit.peek_mut().unwrap()) = p1;
152 };
152 };
153
153
154 self.conditionally_push_rev(p2);
154 self.conditionally_push_rev(p2);
155 Some(Ok(current))
155 Some(Ok(current))
156 }
156 }
157 }
157 }
158
158
159 impl<G: Graph> MissingAncestors<G> {
159 impl<G: Graph> MissingAncestors<G> {
160 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
160 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
161 let mut created = MissingAncestors {
161 let mut created = MissingAncestors {
162 graph,
162 graph,
163 bases: HashSet::new(),
163 bases: HashSet::new(),
164 max_base: NULL_REVISION,
164 max_base: NULL_REVISION,
165 };
165 };
166 created.add_bases(bases);
166 created.add_bases(bases);
167 created
167 created
168 }
168 }
169
169
170 pub fn has_bases(&self) -> bool {
170 pub fn has_bases(&self) -> bool {
171 !self.bases.is_empty()
171 !self.bases.is_empty()
172 }
172 }
173
173
174 /// Return a reference to current bases.
174 /// Return a reference to current bases.
175 ///
175 ///
176 /// This is useful in unit tests, but also setdiscovery.py does
176 /// This is useful in unit tests, but also setdiscovery.py does
177 /// read the bases attribute of a ancestor.missingancestors instance.
177 /// read the bases attribute of a ancestor.missingancestors instance.
178 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
178 pub fn get_bases(&self) -> &HashSet<Revision> {
179 &self.bases
179 &self.bases
180 }
180 }
181
181
182 /// Computes the relative heads of current bases.
182 /// Computes the relative heads of current bases.
183 ///
183 ///
184 /// The object is still usable after this.
184 /// The object is still usable after this.
185 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
185 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
186 dagops::heads(&self.graph, self.bases.iter())
186 dagops::heads(&self.graph, self.bases.iter())
187 }
187 }
188
188
189 /// Consumes the object and returns the relative heads of its bases.
189 /// Consumes the object and returns the relative heads of its bases.
190 pub fn into_bases_heads(
190 pub fn into_bases_heads(
191 mut self,
191 mut self,
192 ) -> Result<HashSet<Revision>, GraphError> {
192 ) -> Result<HashSet<Revision>, GraphError> {
193 dagops::retain_heads(&self.graph, &mut self.bases)?;
193 dagops::retain_heads(&self.graph, &mut self.bases)?;
194 Ok(self.bases)
194 Ok(self.bases)
195 }
195 }
196
196
197 /// Add some revisions to `self.bases`
197 /// Add some revisions to `self.bases`
198 ///
198 ///
199 /// Takes care of keeping `self.max_base` up to date.
199 /// Takes care of keeping `self.max_base` up to date.
200 pub fn add_bases(
200 pub fn add_bases(
201 &mut self,
201 &mut self,
202 new_bases: impl IntoIterator<Item = Revision>,
202 new_bases: impl IntoIterator<Item = Revision>,
203 ) {
203 ) {
204 let mut max_base = self.max_base;
204 let mut max_base = self.max_base;
205 self.bases.extend(
205 self.bases.extend(
206 new_bases
206 new_bases
207 .into_iter()
207 .into_iter()
208 .filter(|&rev| rev != NULL_REVISION)
208 .filter(|&rev| rev != NULL_REVISION)
209 .map(|r| {
209 .map(|r| {
210 if r > max_base {
210 if r > max_base {
211 max_base = r;
211 max_base = r;
212 }
212 }
213 r
213 r
214 }),
214 }),
215 );
215 );
216 self.max_base = max_base;
216 self.max_base = max_base;
217 }
217 }
218
218
219 /// Remove all ancestors of self.bases from the revs set (in place)
219 /// Remove all ancestors of self.bases from the revs set (in place)
220 pub fn remove_ancestors_from(
220 pub fn remove_ancestors_from(
221 &mut self,
221 &mut self,
222 revs: &mut HashSet<Revision>,
222 revs: &mut HashSet<Revision>,
223 ) -> Result<(), GraphError> {
223 ) -> Result<(), GraphError> {
224 revs.retain(|r| !self.bases.contains(r));
224 revs.retain(|r| !self.bases.contains(r));
225 // the null revision is always an ancestor. Logically speaking
225 // the null revision is always an ancestor. Logically speaking
226 // it's debatable in case bases is empty, but the Python
226 // it's debatable in case bases is empty, but the Python
227 // implementation always adds NULL_REVISION to bases, making it
227 // implementation always adds NULL_REVISION to bases, making it
228 // unconditionnally true.
228 // unconditionnally true.
229 revs.remove(&NULL_REVISION);
229 revs.remove(&NULL_REVISION);
230 if revs.is_empty() {
230 if revs.is_empty() {
231 return Ok(());
231 return Ok(());
232 }
232 }
233 // anything in revs > start is definitely not an ancestor of bases
233 // anything in revs > start is definitely not an ancestor of bases
234 // revs <= start need to be investigated
234 // revs <= start need to be investigated
235 if self.max_base == NULL_REVISION {
235 if self.max_base == NULL_REVISION {
236 return Ok(());
236 return Ok(());
237 }
237 }
238
238
239 // whatever happens, we'll keep at least keepcount of them
239 // whatever happens, we'll keep at least keepcount of them
240 // knowing this gives us a earlier stop condition than
240 // knowing this gives us a earlier stop condition than
241 // going all the way to the root
241 // going all the way to the root
242 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
242 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
243
243
244 let mut curr = self.max_base;
244 let mut curr = self.max_base;
245 while curr != NULL_REVISION && revs.len() > keepcount {
245 while curr != NULL_REVISION && revs.len() > keepcount {
246 if self.bases.contains(&curr) {
246 if self.bases.contains(&curr) {
247 revs.remove(&curr);
247 revs.remove(&curr);
248 self.add_parents(curr)?;
248 self.add_parents(curr)?;
249 }
249 }
250 curr -= 1;
250 curr -= 1;
251 }
251 }
252 Ok(())
252 Ok(())
253 }
253 }
254
254
255 /// Add the parents of `rev` to `self.bases`
255 /// Add the parents of `rev` to `self.bases`
256 ///
256 ///
257 /// This has no effect on `self.max_base`
257 /// This has no effect on `self.max_base`
258 #[inline]
258 #[inline]
259 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
259 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
260 if rev == NULL_REVISION {
260 if rev == NULL_REVISION {
261 return Ok(());
261 return Ok(());
262 }
262 }
263 for p in self.graph.parents(rev)?.iter().cloned() {
263 for p in self.graph.parents(rev)?.iter().cloned() {
264 // No need to bother the set with inserting NULL_REVISION over and
264 // No need to bother the set with inserting NULL_REVISION over and
265 // over
265 // over
266 if p != NULL_REVISION {
266 if p != NULL_REVISION {
267 self.bases.insert(p);
267 self.bases.insert(p);
268 }
268 }
269 }
269 }
270 Ok(())
270 Ok(())
271 }
271 }
272
272
273 /// Return all the ancestors of revs that are not ancestors of self.bases
273 /// Return all the ancestors of revs that are not ancestors of self.bases
274 ///
274 ///
275 /// This may include elements from revs.
275 /// This may include elements from revs.
276 ///
276 ///
277 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
277 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
278 /// revision number order, which is a topological order.
278 /// revision number order, which is a topological order.
279 pub fn missing_ancestors(
279 pub fn missing_ancestors(
280 &mut self,
280 &mut self,
281 revs: impl IntoIterator<Item = Revision>,
281 revs: impl IntoIterator<Item = Revision>,
282 ) -> Result<Vec<Revision>, GraphError> {
282 ) -> Result<Vec<Revision>, GraphError> {
283 // just for convenience and comparison with Python version
283 // just for convenience and comparison with Python version
284 let bases_visit = &mut self.bases;
284 let bases_visit = &mut self.bases;
285 let mut revs: HashSet<Revision> = revs
285 let mut revs: HashSet<Revision> = revs
286 .into_iter()
286 .into_iter()
287 .filter(|r| !bases_visit.contains(r))
287 .filter(|r| !bases_visit.contains(r))
288 .collect();
288 .collect();
289 let revs_visit = &mut revs;
289 let revs_visit = &mut revs;
290 let mut both_visit: HashSet<Revision> =
290 let mut both_visit: HashSet<Revision> =
291 revs_visit.intersection(&bases_visit).cloned().collect();
291 revs_visit.intersection(bases_visit).cloned().collect();
292 if revs_visit.is_empty() {
292 if revs_visit.is_empty() {
293 return Ok(Vec::new());
293 return Ok(Vec::new());
294 }
294 }
295 let max_revs = revs_visit.iter().cloned().max().unwrap();
295 let max_revs = revs_visit.iter().cloned().max().unwrap();
296 let start = max(self.max_base, max_revs);
296 let start = max(self.max_base, max_revs);
297
297
298 // TODO heuristics for with_capacity()?
298 // TODO heuristics for with_capacity()?
299 let mut missing: Vec<Revision> = Vec::new();
299 let mut missing: Vec<Revision> = Vec::new();
300 for curr in (0..=start).rev() {
300 for curr in (0..=start).rev() {
301 if revs_visit.is_empty() {
301 if revs_visit.is_empty() {
302 break;
302 break;
303 }
303 }
304 if both_visit.remove(&curr) {
304 if both_visit.remove(&curr) {
305 // curr's parents might have made it into revs_visit through
305 // curr's parents might have made it into revs_visit through
306 // another path
306 // another path
307 for p in self.graph.parents(curr)?.iter().cloned() {
307 for p in self.graph.parents(curr)?.iter().cloned() {
308 if p == NULL_REVISION {
308 if p == NULL_REVISION {
309 continue;
309 continue;
310 }
310 }
311 revs_visit.remove(&p);
311 revs_visit.remove(&p);
312 bases_visit.insert(p);
312 bases_visit.insert(p);
313 both_visit.insert(p);
313 both_visit.insert(p);
314 }
314 }
315 } else if revs_visit.remove(&curr) {
315 } else if revs_visit.remove(&curr) {
316 missing.push(curr);
316 missing.push(curr);
317 for p in self.graph.parents(curr)?.iter().cloned() {
317 for p in self.graph.parents(curr)?.iter().cloned() {
318 if p == NULL_REVISION {
318 if p == NULL_REVISION {
319 continue;
319 continue;
320 }
320 }
321 if bases_visit.contains(&p) {
321 if bases_visit.contains(&p) {
322 // p is already known to be an ancestor of revs_visit
322 // p is already known to be an ancestor of revs_visit
323 revs_visit.remove(&p);
323 revs_visit.remove(&p);
324 both_visit.insert(p);
324 both_visit.insert(p);
325 } else if both_visit.contains(&p) {
325 } else if both_visit.contains(&p) {
326 // p should have been in bases_visit
326 // p should have been in bases_visit
327 revs_visit.remove(&p);
327 revs_visit.remove(&p);
328 bases_visit.insert(p);
328 bases_visit.insert(p);
329 } else {
329 } else {
330 // visit later
330 // visit later
331 revs_visit.insert(p);
331 revs_visit.insert(p);
332 }
332 }
333 }
333 }
334 } else if bases_visit.contains(&curr) {
334 } else if bases_visit.contains(&curr) {
335 for p in self.graph.parents(curr)?.iter().cloned() {
335 for p in self.graph.parents(curr)?.iter().cloned() {
336 if p == NULL_REVISION {
336 if p == NULL_REVISION {
337 continue;
337 continue;
338 }
338 }
339 if revs_visit.remove(&p) || both_visit.contains(&p) {
339 if revs_visit.remove(&p) || both_visit.contains(&p) {
340 // p is an ancestor of bases_visit, and is implicitly
340 // p is an ancestor of bases_visit, and is implicitly
341 // in revs_visit, which means p is ::revs & ::bases.
341 // in revs_visit, which means p is ::revs & ::bases.
342 bases_visit.insert(p);
342 bases_visit.insert(p);
343 both_visit.insert(p);
343 both_visit.insert(p);
344 } else {
344 } else {
345 bases_visit.insert(p);
345 bases_visit.insert(p);
346 }
346 }
347 }
347 }
348 }
348 }
349 }
349 }
350 missing.reverse();
350 missing.reverse();
351 Ok(missing)
351 Ok(missing)
352 }
352 }
353 }
353 }
354
354
355 #[cfg(test)]
355 #[cfg(test)]
356 mod tests {
356 mod tests {
357
357
358 use super::*;
358 use super::*;
359 use crate::testing::{SampleGraph, VecGraph};
359 use crate::testing::{SampleGraph, VecGraph};
360
360
361 fn list_ancestors<G: Graph>(
361 fn list_ancestors<G: Graph>(
362 graph: G,
362 graph: G,
363 initrevs: Vec<Revision>,
363 initrevs: Vec<Revision>,
364 stoprev: Revision,
364 stoprev: Revision,
365 inclusive: bool,
365 inclusive: bool,
366 ) -> Vec<Revision> {
366 ) -> Vec<Revision> {
367 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
367 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
368 .unwrap()
368 .unwrap()
369 .map(|res| res.unwrap())
369 .map(|res| res.unwrap())
370 .collect()
370 .collect()
371 }
371 }
372
372
373 #[test]
373 #[test]
374 /// Same tests as test-ancestor.py, without membership
374 /// Same tests as test-ancestor.py, without membership
375 /// (see also test-ancestor.py.out)
375 /// (see also test-ancestor.py.out)
376 fn test_list_ancestor() {
376 fn test_list_ancestor() {
377 assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
377 assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
378 assert_eq!(
378 assert_eq!(
379 list_ancestors(SampleGraph, vec![11, 13], 0, false),
379 list_ancestors(SampleGraph, vec![11, 13], 0, false),
380 vec![8, 7, 4, 3, 2, 1, 0]
380 vec![8, 7, 4, 3, 2, 1, 0]
381 );
381 );
382 assert_eq!(
382 assert_eq!(
383 list_ancestors(SampleGraph, vec![1, 3], 0, false),
383 list_ancestors(SampleGraph, vec![1, 3], 0, false),
384 vec![1, 0]
384 vec![1, 0]
385 );
385 );
386 assert_eq!(
386 assert_eq!(
387 list_ancestors(SampleGraph, vec![11, 13], 0, true),
387 list_ancestors(SampleGraph, vec![11, 13], 0, true),
388 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
388 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
389 );
389 );
390 assert_eq!(
390 assert_eq!(
391 list_ancestors(SampleGraph, vec![11, 13], 6, false),
391 list_ancestors(SampleGraph, vec![11, 13], 6, false),
392 vec![8, 7]
392 vec![8, 7]
393 );
393 );
394 assert_eq!(
394 assert_eq!(
395 list_ancestors(SampleGraph, vec![11, 13], 6, true),
395 list_ancestors(SampleGraph, vec![11, 13], 6, true),
396 vec![13, 11, 8, 7]
396 vec![13, 11, 8, 7]
397 );
397 );
398 assert_eq!(
398 assert_eq!(
399 list_ancestors(SampleGraph, vec![11, 13], 11, true),
399 list_ancestors(SampleGraph, vec![11, 13], 11, true),
400 vec![13, 11]
400 vec![13, 11]
401 );
401 );
402 assert_eq!(
402 assert_eq!(
403 list_ancestors(SampleGraph, vec![11, 13], 12, true),
403 list_ancestors(SampleGraph, vec![11, 13], 12, true),
404 vec![13]
404 vec![13]
405 );
405 );
406 assert_eq!(
406 assert_eq!(
407 list_ancestors(SampleGraph, vec![10, 1], 0, true),
407 list_ancestors(SampleGraph, vec![10, 1], 0, true),
408 vec![10, 5, 4, 2, 1, 0]
408 vec![10, 5, 4, 2, 1, 0]
409 );
409 );
410 }
410 }
411
411
412 #[test]
412 #[test]
413 /// Corner case that's not directly in test-ancestors.py, but
413 /// Corner case that's not directly in test-ancestors.py, but
414 /// that happens quite often, as demonstrated by running the whole
414 /// that happens quite often, as demonstrated by running the whole
415 /// suite.
415 /// suite.
416 /// For instance, run tests/test-obsolete-checkheads.t
416 /// For instance, run tests/test-obsolete-checkheads.t
417 fn test_nullrev_input() {
417 fn test_nullrev_input() {
418 let mut iter =
418 let mut iter =
419 AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
419 AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
420 assert_eq!(iter.next(), None)
420 assert_eq!(iter.next(), None)
421 }
421 }
422
422
423 #[test]
423 #[test]
424 fn test_contains() {
424 fn test_contains() {
425 let mut lazy =
425 let mut lazy =
426 AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
426 AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
427 assert!(lazy.contains(1).unwrap());
427 assert!(lazy.contains(1).unwrap());
428 assert!(!lazy.contains(3).unwrap());
428 assert!(!lazy.contains(3).unwrap());
429
429
430 let mut lazy =
430 let mut lazy =
431 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
431 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
432 assert!(!lazy.contains(NULL_REVISION).unwrap());
432 assert!(!lazy.contains(NULL_REVISION).unwrap());
433 }
433 }
434
434
435 #[test]
435 #[test]
436 fn test_peek() {
436 fn test_peek() {
437 let mut iter =
437 let mut iter =
438 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
438 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
439 // peek() gives us the next value
439 // peek() gives us the next value
440 assert_eq!(iter.peek(), Some(10));
440 assert_eq!(iter.peek(), Some(10));
441 // but it's not been consumed
441 // but it's not been consumed
442 assert_eq!(iter.next(), Some(Ok(10)));
442 assert_eq!(iter.next(), Some(Ok(10)));
443 // and iteration resumes normally
443 // and iteration resumes normally
444 assert_eq!(iter.next(), Some(Ok(5)));
444 assert_eq!(iter.next(), Some(Ok(5)));
445
445
446 // let's drain the iterator to test peek() at the end
446 // let's drain the iterator to test peek() at the end
447 while iter.next().is_some() {}
447 while iter.next().is_some() {}
448 assert_eq!(iter.peek(), None);
448 assert_eq!(iter.peek(), None);
449 }
449 }
450
450
451 #[test]
451 #[test]
452 fn test_empty() {
452 fn test_empty() {
453 let mut iter =
453 let mut iter =
454 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
454 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
455 assert!(!iter.is_empty());
455 assert!(!iter.is_empty());
456 while iter.next().is_some() {}
456 while iter.next().is_some() {}
457 assert!(!iter.is_empty());
457 assert!(!iter.is_empty());
458
458
459 let iter =
459 let iter =
460 AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
460 AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
461 assert!(iter.is_empty());
461 assert!(iter.is_empty());
462
462
463 // case where iter.seen == {NULL_REVISION}
463 // case where iter.seen == {NULL_REVISION}
464 let iter =
464 let iter =
465 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
465 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
466 assert!(iter.is_empty());
466 assert!(iter.is_empty());
467 }
467 }
468
468
469 /// A corrupted Graph, supporting error handling tests
469 /// A corrupted Graph, supporting error handling tests
470 #[derive(Clone, Debug)]
470 #[derive(Clone, Debug)]
471 struct Corrupted;
471 struct Corrupted;
472
472
473 impl Graph for Corrupted {
473 impl Graph for Corrupted {
474 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
474 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
475 match rev {
475 match rev {
476 1 => Ok([0, -1]),
476 1 => Ok([0, -1]),
477 r => Err(GraphError::ParentOutOfRange(r)),
477 r => Err(GraphError::ParentOutOfRange(r)),
478 }
478 }
479 }
479 }
480 }
480 }
481
481
482 #[test]
482 #[test]
483 fn test_initrev_out_of_range() {
483 fn test_initrev_out_of_range() {
484 // inclusive=false looks up initrev's parents right away
484 // inclusive=false looks up initrev's parents right away
485 match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
485 match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
486 Ok(_) => panic!("Should have been ParentOutOfRange"),
486 Ok(_) => panic!("Should have been ParentOutOfRange"),
487 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
487 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
488 }
488 }
489 }
489 }
490
490
491 #[test]
491 #[test]
492 fn test_next_out_of_range() {
492 fn test_next_out_of_range() {
493 // inclusive=false looks up initrev's parents right away
493 // inclusive=false looks up initrev's parents right away
494 let mut iter =
494 let mut iter =
495 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
495 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
496 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
496 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
497 }
497 }
498
498
499 #[test]
499 #[test]
500 /// Test constructor, add/get bases and heads
500 /// Test constructor, add/get bases and heads
501 fn test_missing_bases() -> Result<(), GraphError> {
501 fn test_missing_bases() -> Result<(), GraphError> {
502 let mut missing_ancestors =
502 let mut missing_ancestors =
503 MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
503 MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
504 let mut as_vec: Vec<Revision> =
504 let mut as_vec: Vec<Revision> =
505 missing_ancestors.get_bases().iter().cloned().collect();
505 missing_ancestors.get_bases().iter().cloned().collect();
506 as_vec.sort();
506 as_vec.sort_unstable();
507 assert_eq!(as_vec, [1, 3, 5]);
507 assert_eq!(as_vec, [1, 3, 5]);
508 assert_eq!(missing_ancestors.max_base, 5);
508 assert_eq!(missing_ancestors.max_base, 5);
509
509
510 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
510 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
511 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
511 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
512 as_vec.sort();
512 as_vec.sort_unstable();
513 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
513 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
514 assert_eq!(missing_ancestors.max_base, 8);
514 assert_eq!(missing_ancestors.max_base, 8);
515
515
516 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
516 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
517 as_vec.sort();
517 as_vec.sort_unstable();
518 assert_eq!(as_vec, [3, 5, 7, 8]);
518 assert_eq!(as_vec, [3, 5, 7, 8]);
519 Ok(())
519 Ok(())
520 }
520 }
521
521
522 fn assert_missing_remove(
522 fn assert_missing_remove(
523 bases: &[Revision],
523 bases: &[Revision],
524 revs: &[Revision],
524 revs: &[Revision],
525 expected: &[Revision],
525 expected: &[Revision],
526 ) {
526 ) {
527 let mut missing_ancestors =
527 let mut missing_ancestors =
528 MissingAncestors::new(SampleGraph, bases.iter().cloned());
528 MissingAncestors::new(SampleGraph, bases.iter().cloned());
529 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
529 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
530 missing_ancestors
530 missing_ancestors
531 .remove_ancestors_from(&mut revset)
531 .remove_ancestors_from(&mut revset)
532 .unwrap();
532 .unwrap();
533 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
533 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
534 as_vec.sort();
534 as_vec.sort_unstable();
535 assert_eq!(as_vec.as_slice(), expected);
535 assert_eq!(as_vec.as_slice(), expected);
536 }
536 }
537
537
538 #[test]
538 #[test]
539 fn test_missing_remove() {
539 fn test_missing_remove() {
540 assert_missing_remove(
540 assert_missing_remove(
541 &[1, 2, 3, 4, 7],
541 &[1, 2, 3, 4, 7],
542 Vec::from_iter(1..10).as_slice(),
542 Vec::from_iter(1..10).as_slice(),
543 &[5, 6, 8, 9],
543 &[5, 6, 8, 9],
544 );
544 );
545 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
545 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
546 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
546 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
547 }
547 }
548
548
549 fn assert_missing_ancestors(
549 fn assert_missing_ancestors(
550 bases: &[Revision],
550 bases: &[Revision],
551 revs: &[Revision],
551 revs: &[Revision],
552 expected: &[Revision],
552 expected: &[Revision],
553 ) {
553 ) {
554 let mut missing_ancestors =
554 let mut missing_ancestors =
555 MissingAncestors::new(SampleGraph, bases.iter().cloned());
555 MissingAncestors::new(SampleGraph, bases.iter().cloned());
556 let missing = missing_ancestors
556 let missing = missing_ancestors
557 .missing_ancestors(revs.iter().cloned())
557 .missing_ancestors(revs.iter().cloned())
558 .unwrap();
558 .unwrap();
559 assert_eq!(missing.as_slice(), expected);
559 assert_eq!(missing.as_slice(), expected);
560 }
560 }
561
561
562 #[test]
562 #[test]
563 fn test_missing_ancestors() {
563 fn test_missing_ancestors() {
564 // examples taken from test-ancestors.py by having it run
564 // examples taken from test-ancestors.py by having it run
565 // on the same graph (both naive and fast Python algs)
565 // on the same graph (both naive and fast Python algs)
566 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
566 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
567 assert_missing_ancestors(&[11], &[10], &[5, 10]);
567 assert_missing_ancestors(&[11], &[10], &[5, 10]);
568 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
568 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
569 }
569 }
570
570
571 /// An interesting case found by a random generator similar to
571 /// An interesting case found by a random generator similar to
572 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
572 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
573 /// failed this, yet none of the integration tests of the whole suite
573 /// failed this, yet none of the integration tests of the whole suite
574 /// catched it.
574 /// catched it.
575 #[allow(clippy::unnecessary_cast)]
575 #[test]
576 #[test]
576 fn test_remove_ancestors_from_case1() {
577 fn test_remove_ancestors_from_case1() {
577 let graph: VecGraph = vec![
578 let graph: VecGraph = vec![
578 [NULL_REVISION, NULL_REVISION],
579 [NULL_REVISION, NULL_REVISION],
579 [0, NULL_REVISION],
580 [0, NULL_REVISION],
580 [1, 0],
581 [1, 0],
581 [2, 1],
582 [2, 1],
582 [3, NULL_REVISION],
583 [3, NULL_REVISION],
583 [4, NULL_REVISION],
584 [4, NULL_REVISION],
584 [5, 1],
585 [5, 1],
585 [2, NULL_REVISION],
586 [2, NULL_REVISION],
586 [7, NULL_REVISION],
587 [7, NULL_REVISION],
587 [8, NULL_REVISION],
588 [8, NULL_REVISION],
588 [9, NULL_REVISION],
589 [9, NULL_REVISION],
589 [10, 1],
590 [10, 1],
590 [3, NULL_REVISION],
591 [3, NULL_REVISION],
591 [12, NULL_REVISION],
592 [12, NULL_REVISION],
592 [13, NULL_REVISION],
593 [13, NULL_REVISION],
593 [14, NULL_REVISION],
594 [14, NULL_REVISION],
594 [4, NULL_REVISION],
595 [4, NULL_REVISION],
595 [16, NULL_REVISION],
596 [16, NULL_REVISION],
596 [17, NULL_REVISION],
597 [17, NULL_REVISION],
597 [18, NULL_REVISION],
598 [18, NULL_REVISION],
598 [19, 11],
599 [19, 11],
599 [20, NULL_REVISION],
600 [20, NULL_REVISION],
600 [21, NULL_REVISION],
601 [21, NULL_REVISION],
601 [22, NULL_REVISION],
602 [22, NULL_REVISION],
602 [23, NULL_REVISION],
603 [23, NULL_REVISION],
603 [2, NULL_REVISION],
604 [2, NULL_REVISION],
604 [3, NULL_REVISION],
605 [3, NULL_REVISION],
605 [26, 24],
606 [26, 24],
606 [27, NULL_REVISION],
607 [27, NULL_REVISION],
607 [28, NULL_REVISION],
608 [28, NULL_REVISION],
608 [12, NULL_REVISION],
609 [12, NULL_REVISION],
609 [1, NULL_REVISION],
610 [1, NULL_REVISION],
610 [1, 9],
611 [1, 9],
611 [32, NULL_REVISION],
612 [32, NULL_REVISION],
612 [33, NULL_REVISION],
613 [33, NULL_REVISION],
613 [34, 31],
614 [34, 31],
614 [35, NULL_REVISION],
615 [35, NULL_REVISION],
615 [36, 26],
616 [36, 26],
616 [37, NULL_REVISION],
617 [37, NULL_REVISION],
617 [38, NULL_REVISION],
618 [38, NULL_REVISION],
618 [39, NULL_REVISION],
619 [39, NULL_REVISION],
619 [40, NULL_REVISION],
620 [40, NULL_REVISION],
620 [41, NULL_REVISION],
621 [41, NULL_REVISION],
621 [42, 26],
622 [42, 26],
622 [0, NULL_REVISION],
623 [0, NULL_REVISION],
623 [44, NULL_REVISION],
624 [44, NULL_REVISION],
624 [45, 4],
625 [45, 4],
625 [40, NULL_REVISION],
626 [40, NULL_REVISION],
626 [47, NULL_REVISION],
627 [47, NULL_REVISION],
627 [36, 0],
628 [36, 0],
628 [49, NULL_REVISION],
629 [49, NULL_REVISION],
629 [NULL_REVISION, NULL_REVISION],
630 [NULL_REVISION, NULL_REVISION],
630 [51, NULL_REVISION],
631 [51, NULL_REVISION],
631 [52, NULL_REVISION],
632 [52, NULL_REVISION],
632 [53, NULL_REVISION],
633 [53, NULL_REVISION],
633 [14, NULL_REVISION],
634 [14, NULL_REVISION],
634 [55, NULL_REVISION],
635 [55, NULL_REVISION],
635 [15, NULL_REVISION],
636 [15, NULL_REVISION],
636 [23, NULL_REVISION],
637 [23, NULL_REVISION],
637 [58, NULL_REVISION],
638 [58, NULL_REVISION],
638 [59, NULL_REVISION],
639 [59, NULL_REVISION],
639 [2, NULL_REVISION],
640 [2, NULL_REVISION],
640 [61, 59],
641 [61, 59],
641 [62, NULL_REVISION],
642 [62, NULL_REVISION],
642 [63, NULL_REVISION],
643 [63, NULL_REVISION],
643 [NULL_REVISION, NULL_REVISION],
644 [NULL_REVISION, NULL_REVISION],
644 [65, NULL_REVISION],
645 [65, NULL_REVISION],
645 [66, NULL_REVISION],
646 [66, NULL_REVISION],
646 [67, NULL_REVISION],
647 [67, NULL_REVISION],
647 [68, NULL_REVISION],
648 [68, NULL_REVISION],
648 [37, 28],
649 [37, 28],
649 [69, 25],
650 [69, 25],
650 [71, NULL_REVISION],
651 [71, NULL_REVISION],
651 [72, NULL_REVISION],
652 [72, NULL_REVISION],
652 [50, 2],
653 [50, 2],
653 [74, NULL_REVISION],
654 [74, NULL_REVISION],
654 [12, NULL_REVISION],
655 [12, NULL_REVISION],
655 [18, NULL_REVISION],
656 [18, NULL_REVISION],
656 [77, NULL_REVISION],
657 [77, NULL_REVISION],
657 [78, NULL_REVISION],
658 [78, NULL_REVISION],
658 [79, NULL_REVISION],
659 [79, NULL_REVISION],
659 [43, 33],
660 [43, 33],
660 [81, NULL_REVISION],
661 [81, NULL_REVISION],
661 [82, NULL_REVISION],
662 [82, NULL_REVISION],
662 [83, NULL_REVISION],
663 [83, NULL_REVISION],
663 [84, 45],
664 [84, 45],
664 [85, NULL_REVISION],
665 [85, NULL_REVISION],
665 [86, NULL_REVISION],
666 [86, NULL_REVISION],
666 [NULL_REVISION, NULL_REVISION],
667 [NULL_REVISION, NULL_REVISION],
667 [88, NULL_REVISION],
668 [88, NULL_REVISION],
668 [NULL_REVISION, NULL_REVISION],
669 [NULL_REVISION, NULL_REVISION],
669 [76, 83],
670 [76, 83],
670 [44, NULL_REVISION],
671 [44, NULL_REVISION],
671 [92, NULL_REVISION],
672 [92, NULL_REVISION],
672 [93, NULL_REVISION],
673 [93, NULL_REVISION],
673 [9, NULL_REVISION],
674 [9, NULL_REVISION],
674 [95, 67],
675 [95, 67],
675 [96, NULL_REVISION],
676 [96, NULL_REVISION],
676 [97, NULL_REVISION],
677 [97, NULL_REVISION],
677 [NULL_REVISION, NULL_REVISION],
678 [NULL_REVISION, NULL_REVISION],
678 ];
679 ];
679 let problem_rev = 28 as Revision;
680 let problem_rev = 28 as Revision;
680 let problem_base = 70 as Revision;
681 let problem_base = 70 as Revision;
681 // making the problem obvious: problem_rev is a parent of problem_base
682 // making the problem obvious: problem_rev is a parent of problem_base
682 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
683 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
683
684
684 let mut missing_ancestors: MissingAncestors<VecGraph> =
685 let mut missing_ancestors: MissingAncestors<VecGraph> =
685 MissingAncestors::new(
686 MissingAncestors::new(
686 graph,
687 graph,
687 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
688 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
688 .iter()
689 .iter()
689 .cloned(),
690 .cloned(),
690 );
691 );
691 assert!(missing_ancestors.bases.contains(&problem_base));
692 assert!(missing_ancestors.bases.contains(&problem_base));
692
693
693 let mut revs: HashSet<Revision> =
694 let mut revs: HashSet<Revision> =
694 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
695 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
695 .iter()
696 .iter()
696 .cloned()
697 .cloned()
697 .collect();
698 .collect();
698 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
699 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
699 assert!(!revs.contains(&problem_rev));
700 assert!(!revs.contains(&problem_rev));
700 }
701 }
701 }
702 }
@@ -1,654 +1,654 b''
1 // config.rs
1 // config.rs
2 //
2 //
3 // Copyright 2020
3 // Copyright 2020
4 // Valentin Gatien-Baron,
4 // Valentin Gatien-Baron,
5 // Raphaël Gomès <rgomes@octobus.net>
5 // Raphaël Gomès <rgomes@octobus.net>
6 //
6 //
7 // This software may be used and distributed according to the terms of the
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
8 // GNU General Public License version 2 or any later version.
9
9
10 use super::layer;
10 use super::layer;
11 use super::values;
11 use super::values;
12 use crate::config::layer::{
12 use crate::config::layer::{
13 ConfigError, ConfigLayer, ConfigOrigin, ConfigValue,
13 ConfigError, ConfigLayer, ConfigOrigin, ConfigValue,
14 };
14 };
15 use crate::config::plain_info::PlainInfo;
15 use crate::config::plain_info::PlainInfo;
16 use crate::utils::files::get_bytes_from_os_str;
16 use crate::utils::files::get_bytes_from_os_str;
17 use format_bytes::{write_bytes, DisplayBytes};
17 use format_bytes::{write_bytes, DisplayBytes};
18 use std::collections::HashSet;
18 use std::collections::HashSet;
19 use std::env;
19 use std::env;
20 use std::fmt;
20 use std::fmt;
21 use std::path::{Path, PathBuf};
21 use std::path::{Path, PathBuf};
22 use std::str;
22 use std::str;
23
23
24 use crate::errors::{HgResultExt, IoResultExt};
24 use crate::errors::{HgResultExt, IoResultExt};
25
25
26 /// Holds the config values for the current repository
26 /// Holds the config values for the current repository
27 /// TODO update this docstring once we support more sources
27 /// TODO update this docstring once we support more sources
28 #[derive(Clone)]
28 #[derive(Clone)]
29 pub struct Config {
29 pub struct Config {
30 layers: Vec<layer::ConfigLayer>,
30 layers: Vec<layer::ConfigLayer>,
31 plain: PlainInfo,
31 plain: PlainInfo,
32 }
32 }
33
33
34 impl DisplayBytes for Config {
34 impl DisplayBytes for Config {
35 fn display_bytes(
35 fn display_bytes(
36 &self,
36 &self,
37 out: &mut dyn std::io::Write,
37 out: &mut dyn std::io::Write,
38 ) -> std::io::Result<()> {
38 ) -> std::io::Result<()> {
39 for (index, layer) in self.layers.iter().rev().enumerate() {
39 for (index, layer) in self.layers.iter().rev().enumerate() {
40 write_bytes!(
40 write_bytes!(
41 out,
41 out,
42 b"==== Layer {} (trusted: {}) ====\n{}",
42 b"==== Layer {} (trusted: {}) ====\n{}",
43 index,
43 index,
44 if layer.trusted {
44 if layer.trusted {
45 &b"yes"[..]
45 &b"yes"[..]
46 } else {
46 } else {
47 &b"no"[..]
47 &b"no"[..]
48 },
48 },
49 layer
49 layer
50 )?;
50 )?;
51 }
51 }
52 Ok(())
52 Ok(())
53 }
53 }
54 }
54 }
55
55
56 pub enum ConfigSource {
56 pub enum ConfigSource {
57 /// Absolute path to a config file
57 /// Absolute path to a config file
58 AbsPath(PathBuf),
58 AbsPath(PathBuf),
59 /// Already parsed (from the CLI, env, Python resources, etc.)
59 /// Already parsed (from the CLI, env, Python resources, etc.)
60 Parsed(layer::ConfigLayer),
60 Parsed(layer::ConfigLayer),
61 }
61 }
62
62
63 #[derive(Debug)]
63 #[derive(Debug)]
64 pub struct ConfigValueParseError {
64 pub struct ConfigValueParseError {
65 pub origin: ConfigOrigin,
65 pub origin: ConfigOrigin,
66 pub line: Option<usize>,
66 pub line: Option<usize>,
67 pub section: Vec<u8>,
67 pub section: Vec<u8>,
68 pub item: Vec<u8>,
68 pub item: Vec<u8>,
69 pub value: Vec<u8>,
69 pub value: Vec<u8>,
70 pub expected_type: &'static str,
70 pub expected_type: &'static str,
71 }
71 }
72
72
73 impl fmt::Display for ConfigValueParseError {
73 impl fmt::Display for ConfigValueParseError {
74 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
75 // TODO: add origin and line number information, here and in
75 // TODO: add origin and line number information, here and in
76 // corresponding python code
76 // corresponding python code
77 write!(
77 write!(
78 f,
78 f,
79 "config error: {}.{} is not a {} ('{}')",
79 "config error: {}.{} is not a {} ('{}')",
80 String::from_utf8_lossy(&self.section),
80 String::from_utf8_lossy(&self.section),
81 String::from_utf8_lossy(&self.item),
81 String::from_utf8_lossy(&self.item),
82 self.expected_type,
82 self.expected_type,
83 String::from_utf8_lossy(&self.value)
83 String::from_utf8_lossy(&self.value)
84 )
84 )
85 }
85 }
86 }
86 }
87
87
88 /// Returns true if the config item is disabled by PLAIN or PLAINEXCEPT
88 /// Returns true if the config item is disabled by PLAIN or PLAINEXCEPT
89 fn should_ignore(plain: &PlainInfo, section: &[u8], item: &[u8]) -> bool {
89 fn should_ignore(plain: &PlainInfo, section: &[u8], item: &[u8]) -> bool {
90 // duplication with [_applyconfig] in [ui.py],
90 // duplication with [_applyconfig] in [ui.py],
91 if !plain.is_plain() {
91 if !plain.is_plain() {
92 return false;
92 return false;
93 }
93 }
94 if section == b"alias" {
94 if section == b"alias" {
95 return plain.plainalias();
95 return plain.plainalias();
96 }
96 }
97 if section == b"revsetalias" {
97 if section == b"revsetalias" {
98 return plain.plainrevsetalias();
98 return plain.plainrevsetalias();
99 }
99 }
100 if section == b"templatealias" {
100 if section == b"templatealias" {
101 return plain.plaintemplatealias();
101 return plain.plaintemplatealias();
102 }
102 }
103 if section == b"ui" {
103 if section == b"ui" {
104 let to_delete: &[&[u8]] = &[
104 let to_delete: &[&[u8]] = &[
105 b"debug",
105 b"debug",
106 b"fallbackencoding",
106 b"fallbackencoding",
107 b"quiet",
107 b"quiet",
108 b"slash",
108 b"slash",
109 b"logtemplate",
109 b"logtemplate",
110 b"message-output",
110 b"message-output",
111 b"statuscopies",
111 b"statuscopies",
112 b"style",
112 b"style",
113 b"traceback",
113 b"traceback",
114 b"verbose",
114 b"verbose",
115 ];
115 ];
116 return to_delete.contains(&item);
116 return to_delete.contains(&item);
117 }
117 }
118 let sections_to_delete: &[&[u8]] =
118 let sections_to_delete: &[&[u8]] =
119 &[b"defaults", b"commands", b"command-templates"];
119 &[b"defaults", b"commands", b"command-templates"];
120 return sections_to_delete.contains(&section);
120 sections_to_delete.contains(&section)
121 }
121 }
122
122
123 impl Config {
123 impl Config {
124 /// The configuration to use when printing configuration-loading errors
124 /// The configuration to use when printing configuration-loading errors
125 pub fn empty() -> Self {
125 pub fn empty() -> Self {
126 Self {
126 Self {
127 layers: Vec::new(),
127 layers: Vec::new(),
128 plain: PlainInfo::empty(),
128 plain: PlainInfo::empty(),
129 }
129 }
130 }
130 }
131
131
132 /// Load system and user configuration from various files.
132 /// Load system and user configuration from various files.
133 ///
133 ///
134 /// This is also affected by some environment variables.
134 /// This is also affected by some environment variables.
135 pub fn load_non_repo() -> Result<Self, ConfigError> {
135 pub fn load_non_repo() -> Result<Self, ConfigError> {
136 let mut config = Self::empty();
136 let mut config = Self::empty();
137 let opt_rc_path = env::var_os("HGRCPATH");
137 let opt_rc_path = env::var_os("HGRCPATH");
138 // HGRCPATH replaces system config
138 // HGRCPATH replaces system config
139 if opt_rc_path.is_none() {
139 if opt_rc_path.is_none() {
140 config.add_system_config()?
140 config.add_system_config()?
141 }
141 }
142
142
143 config.add_for_environment_variable("EDITOR", b"ui", b"editor");
143 config.add_for_environment_variable("EDITOR", b"ui", b"editor");
144 config.add_for_environment_variable("VISUAL", b"ui", b"editor");
144 config.add_for_environment_variable("VISUAL", b"ui", b"editor");
145 config.add_for_environment_variable("PAGER", b"pager", b"pager");
145 config.add_for_environment_variable("PAGER", b"pager", b"pager");
146
146
147 // These are set by `run-tests.py --rhg` to enable fallback for the
147 // These are set by `run-tests.py --rhg` to enable fallback for the
148 // entire test suite. Alternatives would be setting configuration
148 // entire test suite. Alternatives would be setting configuration
149 // through `$HGRCPATH` but some tests override that, or changing the
149 // through `$HGRCPATH` but some tests override that, or changing the
150 // `hg` shell alias to include `--config` but that disrupts tests that
150 // `hg` shell alias to include `--config` but that disrupts tests that
151 // print command lines and check expected output.
151 // print command lines and check expected output.
152 config.add_for_environment_variable(
152 config.add_for_environment_variable(
153 "RHG_ON_UNSUPPORTED",
153 "RHG_ON_UNSUPPORTED",
154 b"rhg",
154 b"rhg",
155 b"on-unsupported",
155 b"on-unsupported",
156 );
156 );
157 config.add_for_environment_variable(
157 config.add_for_environment_variable(
158 "RHG_FALLBACK_EXECUTABLE",
158 "RHG_FALLBACK_EXECUTABLE",
159 b"rhg",
159 b"rhg",
160 b"fallback-executable",
160 b"fallback-executable",
161 );
161 );
162
162
163 // HGRCPATH replaces user config
163 // HGRCPATH replaces user config
164 if opt_rc_path.is_none() {
164 if opt_rc_path.is_none() {
165 config.add_user_config()?
165 config.add_user_config()?
166 }
166 }
167 if let Some(rc_path) = &opt_rc_path {
167 if let Some(rc_path) = &opt_rc_path {
168 for path in env::split_paths(rc_path) {
168 for path in env::split_paths(rc_path) {
169 if !path.as_os_str().is_empty() {
169 if !path.as_os_str().is_empty() {
170 if path.is_dir() {
170 if path.is_dir() {
171 config.add_trusted_dir(&path)?
171 config.add_trusted_dir(&path)?
172 } else {
172 } else {
173 config.add_trusted_file(&path)?
173 config.add_trusted_file(&path)?
174 }
174 }
175 }
175 }
176 }
176 }
177 }
177 }
178 Ok(config)
178 Ok(config)
179 }
179 }
180
180
181 pub fn load_cli_args(
181 pub fn load_cli_args(
182 &mut self,
182 &mut self,
183 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
183 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
184 color_arg: Option<Vec<u8>>,
184 color_arg: Option<Vec<u8>>,
185 ) -> Result<(), ConfigError> {
185 ) -> Result<(), ConfigError> {
186 if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
186 if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
187 self.layers.push(layer)
187 self.layers.push(layer)
188 }
188 }
189 if let Some(arg) = color_arg {
189 if let Some(arg) = color_arg {
190 let mut layer = ConfigLayer::new(ConfigOrigin::CommandLineColor);
190 let mut layer = ConfigLayer::new(ConfigOrigin::CommandLineColor);
191 layer.add(b"ui"[..].into(), b"color"[..].into(), arg, None);
191 layer.add(b"ui"[..].into(), b"color"[..].into(), arg, None);
192 self.layers.push(layer)
192 self.layers.push(layer)
193 }
193 }
194 Ok(())
194 Ok(())
195 }
195 }
196
196
197 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
197 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
198 if let Some(entries) = std::fs::read_dir(path)
198 if let Some(entries) = std::fs::read_dir(path)
199 .when_reading_file(path)
199 .when_reading_file(path)
200 .io_not_found_as_none()?
200 .io_not_found_as_none()?
201 {
201 {
202 let mut file_paths = entries
202 let mut file_paths = entries
203 .map(|result| {
203 .map(|result| {
204 result.when_reading_file(path).map(|entry| entry.path())
204 result.when_reading_file(path).map(|entry| entry.path())
205 })
205 })
206 .collect::<Result<Vec<_>, _>>()?;
206 .collect::<Result<Vec<_>, _>>()?;
207 file_paths.sort();
207 file_paths.sort();
208 for file_path in &file_paths {
208 for file_path in &file_paths {
209 if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
209 if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
210 self.add_trusted_file(&file_path)?
210 self.add_trusted_file(file_path)?
211 }
211 }
212 }
212 }
213 }
213 }
214 Ok(())
214 Ok(())
215 }
215 }
216
216
217 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
217 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
218 if let Some(data) = std::fs::read(path)
218 if let Some(data) = std::fs::read(path)
219 .when_reading_file(path)
219 .when_reading_file(path)
220 .io_not_found_as_none()?
220 .io_not_found_as_none()?
221 {
221 {
222 self.layers.extend(ConfigLayer::parse(path, &data)?)
222 self.layers.extend(ConfigLayer::parse(path, &data)?)
223 }
223 }
224 Ok(())
224 Ok(())
225 }
225 }
226
226
227 fn add_for_environment_variable(
227 fn add_for_environment_variable(
228 &mut self,
228 &mut self,
229 var: &str,
229 var: &str,
230 section: &[u8],
230 section: &[u8],
231 key: &[u8],
231 key: &[u8],
232 ) {
232 ) {
233 if let Some(value) = env::var_os(var) {
233 if let Some(value) = env::var_os(var) {
234 let origin = layer::ConfigOrigin::Environment(var.into());
234 let origin = layer::ConfigOrigin::Environment(var.into());
235 let mut layer = ConfigLayer::new(origin);
235 let mut layer = ConfigLayer::new(origin);
236 layer.add(
236 layer.add(
237 section.to_owned(),
237 section.to_owned(),
238 key.to_owned(),
238 key.to_owned(),
239 get_bytes_from_os_str(value),
239 get_bytes_from_os_str(value),
240 None,
240 None,
241 );
241 );
242 self.layers.push(layer)
242 self.layers.push(layer)
243 }
243 }
244 }
244 }
245
245
246 #[cfg(unix)] // TODO: other platforms
246 #[cfg(unix)] // TODO: other platforms
247 fn add_system_config(&mut self) -> Result<(), ConfigError> {
247 fn add_system_config(&mut self) -> Result<(), ConfigError> {
248 let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
248 let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
249 let etc = prefix.join("etc").join("mercurial");
249 let etc = prefix.join("etc").join("mercurial");
250 self.add_trusted_file(&etc.join("hgrc"))?;
250 self.add_trusted_file(&etc.join("hgrc"))?;
251 self.add_trusted_dir(&etc.join("hgrc.d"))
251 self.add_trusted_dir(&etc.join("hgrc.d"))
252 };
252 };
253 let root = Path::new("/");
253 let root = Path::new("/");
254 // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
254 // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
255 // instead? TODO: can this be a relative path?
255 // instead? TODO: can this be a relative path?
256 let hg = crate::utils::current_exe()?;
256 let hg = crate::utils::current_exe()?;
257 // TODO: this order (per-installation then per-system) matches
257 // TODO: this order (per-installation then per-system) matches
258 // `systemrcpath()` in `mercurial/scmposix.py`, but
258 // `systemrcpath()` in `mercurial/scmposix.py`, but
259 // `mercurial/helptext/config.txt` suggests it should be reversed
259 // `mercurial/helptext/config.txt` suggests it should be reversed
260 if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
260 if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
261 if installation_prefix != root {
261 if installation_prefix != root {
262 add_for_prefix(&installation_prefix)?
262 add_for_prefix(installation_prefix)?
263 }
263 }
264 }
264 }
265 add_for_prefix(root)?;
265 add_for_prefix(root)?;
266 Ok(())
266 Ok(())
267 }
267 }
268
268
269 #[cfg(unix)] // TODO: other plateforms
269 #[cfg(unix)] // TODO: other plateforms
270 fn add_user_config(&mut self) -> Result<(), ConfigError> {
270 fn add_user_config(&mut self) -> Result<(), ConfigError> {
271 let opt_home = home::home_dir();
271 let opt_home = home::home_dir();
272 if let Some(home) = &opt_home {
272 if let Some(home) = &opt_home {
273 self.add_trusted_file(&home.join(".hgrc"))?
273 self.add_trusted_file(&home.join(".hgrc"))?
274 }
274 }
275 let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
275 let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
276 if !darwin {
276 if !darwin {
277 if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
277 if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
278 .map(PathBuf::from)
278 .map(PathBuf::from)
279 .or_else(|| opt_home.map(|home| home.join(".config")))
279 .or_else(|| opt_home.map(|home| home.join(".config")))
280 {
280 {
281 self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
281 self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
282 }
282 }
283 }
283 }
284 Ok(())
284 Ok(())
285 }
285 }
286
286
287 /// Loads in order, which means that the precedence is the same
287 /// Loads in order, which means that the precedence is the same
288 /// as the order of `sources`.
288 /// as the order of `sources`.
289 pub fn load_from_explicit_sources(
289 pub fn load_from_explicit_sources(
290 sources: Vec<ConfigSource>,
290 sources: Vec<ConfigSource>,
291 ) -> Result<Self, ConfigError> {
291 ) -> Result<Self, ConfigError> {
292 let mut layers = vec![];
292 let mut layers = vec![];
293
293
294 for source in sources.into_iter() {
294 for source in sources.into_iter() {
295 match source {
295 match source {
296 ConfigSource::Parsed(c) => layers.push(c),
296 ConfigSource::Parsed(c) => layers.push(c),
297 ConfigSource::AbsPath(c) => {
297 ConfigSource::AbsPath(c) => {
298 // TODO check if it should be trusted
298 // TODO check if it should be trusted
299 // mercurial/ui.py:427
299 // mercurial/ui.py:427
300 let data = match std::fs::read(&c) {
300 let data = match std::fs::read(&c) {
301 Err(_) => continue, // same as the python code
301 Err(_) => continue, // same as the python code
302 Ok(data) => data,
302 Ok(data) => data,
303 };
303 };
304 layers.extend(ConfigLayer::parse(&c, &data)?)
304 layers.extend(ConfigLayer::parse(&c, &data)?)
305 }
305 }
306 }
306 }
307 }
307 }
308
308
309 Ok(Config {
309 Ok(Config {
310 layers,
310 layers,
311 plain: PlainInfo::empty(),
311 plain: PlainInfo::empty(),
312 })
312 })
313 }
313 }
314
314
315 /// Loads the per-repository config into a new `Config` which is combined
315 /// Loads the per-repository config into a new `Config` which is combined
316 /// with `self`.
316 /// with `self`.
317 pub(crate) fn combine_with_repo(
317 pub(crate) fn combine_with_repo(
318 &self,
318 &self,
319 repo_config_files: &[PathBuf],
319 repo_config_files: &[PathBuf],
320 ) -> Result<Self, ConfigError> {
320 ) -> Result<Self, ConfigError> {
321 let (cli_layers, other_layers) = self
321 let (cli_layers, other_layers) = self
322 .layers
322 .layers
323 .iter()
323 .iter()
324 .cloned()
324 .cloned()
325 .partition(ConfigLayer::is_from_command_line);
325 .partition(ConfigLayer::is_from_command_line);
326
326
327 let mut repo_config = Self {
327 let mut repo_config = Self {
328 layers: other_layers,
328 layers: other_layers,
329 plain: PlainInfo::empty(),
329 plain: PlainInfo::empty(),
330 };
330 };
331 for path in repo_config_files {
331 for path in repo_config_files {
332 // TODO: check if this file should be trusted:
332 // TODO: check if this file should be trusted:
333 // `mercurial/ui.py:427`
333 // `mercurial/ui.py:427`
334 repo_config.add_trusted_file(path)?;
334 repo_config.add_trusted_file(path)?;
335 }
335 }
336 repo_config.layers.extend(cli_layers);
336 repo_config.layers.extend(cli_layers);
337 Ok(repo_config)
337 Ok(repo_config)
338 }
338 }
339
339
340 pub fn apply_plain(&mut self, plain: PlainInfo) {
340 pub fn apply_plain(&mut self, plain: PlainInfo) {
341 self.plain = plain;
341 self.plain = plain;
342 }
342 }
343
343
344 fn get_parse<'config, T: 'config>(
344 fn get_parse<'config, T: 'config>(
345 &'config self,
345 &'config self,
346 section: &[u8],
346 section: &[u8],
347 item: &[u8],
347 item: &[u8],
348 expected_type: &'static str,
348 expected_type: &'static str,
349 parse: impl Fn(&'config [u8]) -> Option<T>,
349 parse: impl Fn(&'config [u8]) -> Option<T>,
350 ) -> Result<Option<T>, ConfigValueParseError> {
350 ) -> Result<Option<T>, ConfigValueParseError> {
351 match self.get_inner(&section, &item) {
351 match self.get_inner(section, item) {
352 Some((layer, v)) => match parse(&v.bytes) {
352 Some((layer, v)) => match parse(&v.bytes) {
353 Some(b) => Ok(Some(b)),
353 Some(b) => Ok(Some(b)),
354 None => Err(ConfigValueParseError {
354 None => Err(ConfigValueParseError {
355 origin: layer.origin.to_owned(),
355 origin: layer.origin.to_owned(),
356 line: v.line,
356 line: v.line,
357 value: v.bytes.to_owned(),
357 value: v.bytes.to_owned(),
358 section: section.to_owned(),
358 section: section.to_owned(),
359 item: item.to_owned(),
359 item: item.to_owned(),
360 expected_type,
360 expected_type,
361 }),
361 }),
362 },
362 },
363 None => Ok(None),
363 None => Ok(None),
364 }
364 }
365 }
365 }
366
366
367 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
367 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
368 /// Otherwise, returns an `Ok(value)` if found, or `None`.
368 /// Otherwise, returns an `Ok(value)` if found, or `None`.
369 pub fn get_str(
369 pub fn get_str(
370 &self,
370 &self,
371 section: &[u8],
371 section: &[u8],
372 item: &[u8],
372 item: &[u8],
373 ) -> Result<Option<&str>, ConfigValueParseError> {
373 ) -> Result<Option<&str>, ConfigValueParseError> {
374 self.get_parse(section, item, "ASCII or UTF-8 string", |value| {
374 self.get_parse(section, item, "ASCII or UTF-8 string", |value| {
375 str::from_utf8(value).ok()
375 str::from_utf8(value).ok()
376 })
376 })
377 }
377 }
378
378
379 /// Returns an `Err` if the first value found is not a valid unsigned
379 /// Returns an `Err` if the first value found is not a valid unsigned
380 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
380 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
381 pub fn get_u32(
381 pub fn get_u32(
382 &self,
382 &self,
383 section: &[u8],
383 section: &[u8],
384 item: &[u8],
384 item: &[u8],
385 ) -> Result<Option<u32>, ConfigValueParseError> {
385 ) -> Result<Option<u32>, ConfigValueParseError> {
386 self.get_parse(section, item, "valid integer", |value| {
386 self.get_parse(section, item, "valid integer", |value| {
387 str::from_utf8(value).ok()?.parse().ok()
387 str::from_utf8(value).ok()?.parse().ok()
388 })
388 })
389 }
389 }
390
390
391 /// Returns an `Err` if the first value found is not a valid file size
391 /// Returns an `Err` if the first value found is not a valid file size
392 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
392 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
393 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
393 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
394 pub fn get_byte_size(
394 pub fn get_byte_size(
395 &self,
395 &self,
396 section: &[u8],
396 section: &[u8],
397 item: &[u8],
397 item: &[u8],
398 ) -> Result<Option<u64>, ConfigValueParseError> {
398 ) -> Result<Option<u64>, ConfigValueParseError> {
399 self.get_parse(section, item, "byte quantity", values::parse_byte_size)
399 self.get_parse(section, item, "byte quantity", values::parse_byte_size)
400 }
400 }
401
401
402 /// Returns an `Err` if the first value found is not a valid boolean.
402 /// Returns an `Err` if the first value found is not a valid boolean.
403 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
403 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
404 /// found, or `None`.
404 /// found, or `None`.
405 pub fn get_option(
405 pub fn get_option(
406 &self,
406 &self,
407 section: &[u8],
407 section: &[u8],
408 item: &[u8],
408 item: &[u8],
409 ) -> Result<Option<bool>, ConfigValueParseError> {
409 ) -> Result<Option<bool>, ConfigValueParseError> {
410 self.get_parse(section, item, "boolean", values::parse_bool)
410 self.get_parse(section, item, "boolean", values::parse_bool)
411 }
411 }
412
412
413 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
413 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
414 /// if the value is not found, an `Err` if it's not a valid boolean.
414 /// if the value is not found, an `Err` if it's not a valid boolean.
415 pub fn get_bool(
415 pub fn get_bool(
416 &self,
416 &self,
417 section: &[u8],
417 section: &[u8],
418 item: &[u8],
418 item: &[u8],
419 ) -> Result<bool, ConfigValueParseError> {
419 ) -> Result<bool, ConfigValueParseError> {
420 Ok(self.get_option(section, item)?.unwrap_or(false))
420 Ok(self.get_option(section, item)?.unwrap_or(false))
421 }
421 }
422
422
423 /// Returns `true` if the extension is enabled, `false` otherwise
423 /// Returns `true` if the extension is enabled, `false` otherwise
424 pub fn is_extension_enabled(&self, extension: &[u8]) -> bool {
424 pub fn is_extension_enabled(&self, extension: &[u8]) -> bool {
425 let value = self.get(b"extensions", extension);
425 let value = self.get(b"extensions", extension);
426 match value {
426 match value {
427 Some(c) => !c.starts_with(b"!"),
427 Some(c) => !c.starts_with(b"!"),
428 None => false,
428 None => false,
429 }
429 }
430 }
430 }
431
431
432 /// If there is an `item` value in `section`, parse and return a list of
432 /// If there is an `item` value in `section`, parse and return a list of
433 /// byte strings.
433 /// byte strings.
434 pub fn get_list(
434 pub fn get_list(
435 &self,
435 &self,
436 section: &[u8],
436 section: &[u8],
437 item: &[u8],
437 item: &[u8],
438 ) -> Option<Vec<Vec<u8>>> {
438 ) -> Option<Vec<Vec<u8>>> {
439 self.get(section, item).map(values::parse_list)
439 self.get(section, item).map(values::parse_list)
440 }
440 }
441
441
442 /// Returns the raw value bytes of the first one found, or `None`.
442 /// Returns the raw value bytes of the first one found, or `None`.
443 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
443 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
444 self.get_inner(section, item)
444 self.get_inner(section, item)
445 .map(|(_, value)| value.bytes.as_ref())
445 .map(|(_, value)| value.bytes.as_ref())
446 }
446 }
447
447
448 /// Returns the raw value bytes of the first one found, or `None`.
448 /// Returns the raw value bytes of the first one found, or `None`.
449 pub fn get_with_origin(
449 pub fn get_with_origin(
450 &self,
450 &self,
451 section: &[u8],
451 section: &[u8],
452 item: &[u8],
452 item: &[u8],
453 ) -> Option<(&[u8], &ConfigOrigin)> {
453 ) -> Option<(&[u8], &ConfigOrigin)> {
454 self.get_inner(section, item)
454 self.get_inner(section, item)
455 .map(|(layer, value)| (value.bytes.as_ref(), &layer.origin))
455 .map(|(layer, value)| (value.bytes.as_ref(), &layer.origin))
456 }
456 }
457
457
458 /// Returns the layer and the value of the first one found, or `None`.
458 /// Returns the layer and the value of the first one found, or `None`.
459 fn get_inner(
459 fn get_inner(
460 &self,
460 &self,
461 section: &[u8],
461 section: &[u8],
462 item: &[u8],
462 item: &[u8],
463 ) -> Option<(&ConfigLayer, &ConfigValue)> {
463 ) -> Option<(&ConfigLayer, &ConfigValue)> {
464 // Filter out the config items that are hidden by [PLAIN].
464 // Filter out the config items that are hidden by [PLAIN].
465 // This differs from python hg where we delete them from the config.
465 // This differs from python hg where we delete them from the config.
466 let should_ignore = should_ignore(&self.plain, &section, &item);
466 let should_ignore = should_ignore(&self.plain, section, item);
467 for layer in self.layers.iter().rev() {
467 for layer in self.layers.iter().rev() {
468 if !layer.trusted {
468 if !layer.trusted {
469 continue;
469 continue;
470 }
470 }
471 //The [PLAIN] config should not affect the defaults.
471 //The [PLAIN] config should not affect the defaults.
472 //
472 //
473 // However, PLAIN should also affect the "tweaked" defaults (unless
473 // However, PLAIN should also affect the "tweaked" defaults (unless
474 // "tweakdefault" is part of "HGPLAINEXCEPT").
474 // "tweakdefault" is part of "HGPLAINEXCEPT").
475 //
475 //
476 // In practice the tweak-default layer is only added when it is
476 // In practice the tweak-default layer is only added when it is
477 // relevant, so we can safely always take it into
477 // relevant, so we can safely always take it into
478 // account here.
478 // account here.
479 if should_ignore && !(layer.origin == ConfigOrigin::Tweakdefaults)
479 if should_ignore && !(layer.origin == ConfigOrigin::Tweakdefaults)
480 {
480 {
481 continue;
481 continue;
482 }
482 }
483 if let Some(v) = layer.get(&section, &item) {
483 if let Some(v) = layer.get(section, item) {
484 return Some((&layer, v));
484 return Some((layer, v));
485 }
485 }
486 }
486 }
487 None
487 None
488 }
488 }
489
489
490 /// Return all keys defined for the given section
490 /// Return all keys defined for the given section
491 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
491 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
492 self.layers
492 self.layers
493 .iter()
493 .iter()
494 .flat_map(|layer| layer.iter_keys(section))
494 .flat_map(|layer| layer.iter_keys(section))
495 .collect()
495 .collect()
496 }
496 }
497
497
498 /// Returns whether any key is defined in the given section
498 /// Returns whether any key is defined in the given section
499 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
499 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
500 self.layers
500 self.layers
501 .iter()
501 .iter()
502 .any(|layer| layer.has_non_empty_section(section))
502 .any(|layer| layer.has_non_empty_section(section))
503 }
503 }
504
504
505 /// Yields (key, value) pairs for everything in the given section
505 /// Yields (key, value) pairs for everything in the given section
506 pub fn iter_section<'a>(
506 pub fn iter_section<'a>(
507 &'a self,
507 &'a self,
508 section: &'a [u8],
508 section: &'a [u8],
509 ) -> impl Iterator<Item = (&[u8], &[u8])> + 'a {
509 ) -> impl Iterator<Item = (&[u8], &[u8])> + 'a {
510 // TODO: Use `Iterator`’s `.peekable()` when its `peek_mut` is
510 // TODO: Use `Iterator`’s `.peekable()` when its `peek_mut` is
511 // available:
511 // available:
512 // https://doc.rust-lang.org/nightly/std/iter/struct.Peekable.html#method.peek_mut
512 // https://doc.rust-lang.org/nightly/std/iter/struct.Peekable.html#method.peek_mut
513 struct Peekable<I: Iterator> {
513 struct Peekable<I: Iterator> {
514 iter: I,
514 iter: I,
515 /// Remember a peeked value, even if it was None.
515 /// Remember a peeked value, even if it was None.
516 peeked: Option<Option<I::Item>>,
516 peeked: Option<Option<I::Item>>,
517 }
517 }
518
518
519 impl<I: Iterator> Peekable<I> {
519 impl<I: Iterator> Peekable<I> {
520 fn new(iter: I) -> Self {
520 fn new(iter: I) -> Self {
521 Self { iter, peeked: None }
521 Self { iter, peeked: None }
522 }
522 }
523
523
524 fn next(&mut self) {
524 fn next(&mut self) {
525 self.peeked = None
525 self.peeked = None
526 }
526 }
527
527
528 fn peek_mut(&mut self) -> Option<&mut I::Item> {
528 fn peek_mut(&mut self) -> Option<&mut I::Item> {
529 let iter = &mut self.iter;
529 let iter = &mut self.iter;
530 self.peeked.get_or_insert_with(|| iter.next()).as_mut()
530 self.peeked.get_or_insert_with(|| iter.next()).as_mut()
531 }
531 }
532 }
532 }
533
533
534 // Deduplicate keys redefined in multiple layers
534 // Deduplicate keys redefined in multiple layers
535 let mut keys_already_seen = HashSet::new();
535 let mut keys_already_seen = HashSet::new();
536 let mut key_is_new =
536 let mut key_is_new =
537 move |&(key, _value): &(&'a [u8], &'a [u8])| -> bool {
537 move |&(key, _value): &(&'a [u8], &'a [u8])| -> bool {
538 keys_already_seen.insert(key)
538 keys_already_seen.insert(key)
539 };
539 };
540 // This is similar to `flat_map` + `filter_map`, except with a single
540 // This is similar to `flat_map` + `filter_map`, except with a single
541 // closure that owns `key_is_new` (and therefore the
541 // closure that owns `key_is_new` (and therefore the
542 // `keys_already_seen` set):
542 // `keys_already_seen` set):
543 let mut layer_iters = Peekable::new(
543 let mut layer_iters = Peekable::new(
544 self.layers
544 self.layers
545 .iter()
545 .iter()
546 .rev()
546 .rev()
547 .map(move |layer| layer.iter_section(section)),
547 .map(move |layer| layer.iter_section(section)),
548 );
548 );
549 std::iter::from_fn(move || loop {
549 std::iter::from_fn(move || loop {
550 if let Some(pair) = layer_iters.peek_mut()?.find(&mut key_is_new) {
550 if let Some(pair) = layer_iters.peek_mut()?.find(&mut key_is_new) {
551 return Some(pair);
551 return Some(pair);
552 } else {
552 } else {
553 layer_iters.next();
553 layer_iters.next();
554 }
554 }
555 })
555 })
556 }
556 }
557
557
558 /// Get raw values bytes from all layers (even untrusted ones) in order
558 /// Get raw values bytes from all layers (even untrusted ones) in order
559 /// of precedence.
559 /// of precedence.
560 #[cfg(test)]
560 #[cfg(test)]
561 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
561 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
562 let mut res = vec![];
562 let mut res = vec![];
563 for layer in self.layers.iter().rev() {
563 for layer in self.layers.iter().rev() {
564 if let Some(v) = layer.get(&section, &item) {
564 if let Some(v) = layer.get(section, item) {
565 res.push(v.bytes.as_ref());
565 res.push(v.bytes.as_ref());
566 }
566 }
567 }
567 }
568 res
568 res
569 }
569 }
570
570
571 // a config layer that's introduced by ui.tweakdefaults
571 // a config layer that's introduced by ui.tweakdefaults
572 fn tweakdefaults_layer() -> ConfigLayer {
572 fn tweakdefaults_layer() -> ConfigLayer {
573 let mut layer = ConfigLayer::new(ConfigOrigin::Tweakdefaults);
573 let mut layer = ConfigLayer::new(ConfigOrigin::Tweakdefaults);
574
574
575 let mut add = |section: &[u8], item: &[u8], value: &[u8]| {
575 let mut add = |section: &[u8], item: &[u8], value: &[u8]| {
576 layer.add(
576 layer.add(
577 section[..].into(),
577 section[..].into(),
578 item[..].into(),
578 item[..].into(),
579 value[..].into(),
579 value[..].into(),
580 None,
580 None,
581 );
581 );
582 };
582 };
583 // duplication of [tweakrc] from [ui.py]
583 // duplication of [tweakrc] from [ui.py]
584 add(b"ui", b"rollback", b"False");
584 add(b"ui", b"rollback", b"False");
585 add(b"ui", b"statuscopies", b"yes");
585 add(b"ui", b"statuscopies", b"yes");
586 add(b"ui", b"interface", b"curses");
586 add(b"ui", b"interface", b"curses");
587 add(b"ui", b"relative-paths", b"yes");
587 add(b"ui", b"relative-paths", b"yes");
588 add(b"commands", b"grep.all-files", b"True");
588 add(b"commands", b"grep.all-files", b"True");
589 add(b"commands", b"update.check", b"noconflict");
589 add(b"commands", b"update.check", b"noconflict");
590 add(b"commands", b"status.verbose", b"True");
590 add(b"commands", b"status.verbose", b"True");
591 add(b"commands", b"resolve.explicit-re-merge", b"True");
591 add(b"commands", b"resolve.explicit-re-merge", b"True");
592 add(b"git", b"git", b"1");
592 add(b"git", b"git", b"1");
593 add(b"git", b"showfunc", b"1");
593 add(b"git", b"showfunc", b"1");
594 add(b"git", b"word-diff", b"1");
594 add(b"git", b"word-diff", b"1");
595 return layer;
595 layer
596 }
596 }
597
597
598 // introduce the tweaked defaults as implied by ui.tweakdefaults
598 // introduce the tweaked defaults as implied by ui.tweakdefaults
599 pub fn tweakdefaults<'a>(&mut self) -> () {
599 pub fn tweakdefaults(&mut self) {
600 self.layers.insert(0, Config::tweakdefaults_layer());
600 self.layers.insert(0, Config::tweakdefaults_layer());
601 }
601 }
602 }
602 }
603
603
604 #[cfg(test)]
604 #[cfg(test)]
605 mod tests {
605 mod tests {
606 use super::*;
606 use super::*;
607 use pretty_assertions::assert_eq;
607 use pretty_assertions::assert_eq;
608 use std::fs::File;
608 use std::fs::File;
609 use std::io::Write;
609 use std::io::Write;
610
610
611 #[test]
611 #[test]
612 fn test_include_layer_ordering() {
612 fn test_include_layer_ordering() {
613 let tmpdir = tempfile::tempdir().unwrap();
613 let tmpdir = tempfile::tempdir().unwrap();
614 let tmpdir_path = tmpdir.path();
614 let tmpdir_path = tmpdir.path();
615 let mut included_file =
615 let mut included_file =
616 File::create(&tmpdir_path.join("included.rc")).unwrap();
616 File::create(&tmpdir_path.join("included.rc")).unwrap();
617
617
618 included_file.write_all(b"[section]\nitem=value1").unwrap();
618 included_file.write_all(b"[section]\nitem=value1").unwrap();
619 let base_config_path = tmpdir_path.join("base.rc");
619 let base_config_path = tmpdir_path.join("base.rc");
620 let mut config_file = File::create(&base_config_path).unwrap();
620 let mut config_file = File::create(&base_config_path).unwrap();
621 let data =
621 let data =
622 b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
622 b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
623 [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
623 [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
624 config_file.write_all(data).unwrap();
624 config_file.write_all(data).unwrap();
625
625
626 let sources = vec![ConfigSource::AbsPath(base_config_path)];
626 let sources = vec![ConfigSource::AbsPath(base_config_path)];
627 let config = Config::load_from_explicit_sources(sources)
627 let config = Config::load_from_explicit_sources(sources)
628 .expect("expected valid config");
628 .expect("expected valid config");
629
629
630 let (_, value) = config.get_inner(b"section", b"item").unwrap();
630 let (_, value) = config.get_inner(b"section", b"item").unwrap();
631 assert_eq!(
631 assert_eq!(
632 value,
632 value,
633 &ConfigValue {
633 &ConfigValue {
634 bytes: b"value2".to_vec(),
634 bytes: b"value2".to_vec(),
635 line: Some(4)
635 line: Some(4)
636 }
636 }
637 );
637 );
638
638
639 let value = config.get(b"section", b"item").unwrap();
639 let value = config.get(b"section", b"item").unwrap();
640 assert_eq!(value, b"value2",);
640 assert_eq!(value, b"value2",);
641 assert_eq!(
641 assert_eq!(
642 config.get_all(b"section", b"item"),
642 config.get_all(b"section", b"item"),
643 [b"value2", b"value1", b"value0"]
643 [b"value2", b"value1", b"value0"]
644 );
644 );
645
645
646 assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
646 assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
647 assert_eq!(
647 assert_eq!(
648 config.get_byte_size(b"section2", b"size").unwrap(),
648 config.get_byte_size(b"section2", b"size").unwrap(),
649 Some(1024 + 512)
649 Some(1024 + 512)
650 );
650 );
651 assert!(config.get_u32(b"section2", b"not-count").is_err());
651 assert!(config.get_u32(b"section2", b"not-count").is_err());
652 assert!(config.get_byte_size(b"section2", b"not-size").is_err());
652 assert!(config.get_byte_size(b"section2", b"not-size").is_err());
653 }
653 }
654 }
654 }
@@ -1,349 +1,345 b''
1 // layer.rs
1 // layer.rs
2 //
2 //
3 // Copyright 2020
3 // Copyright 2020
4 // Valentin Gatien-Baron,
4 // Valentin Gatien-Baron,
5 // Raphaël Gomès <rgomes@octobus.net>
5 // Raphaël Gomès <rgomes@octobus.net>
6 //
6 //
7 // This software may be used and distributed according to the terms of the
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
8 // GNU General Public License version 2 or any later version.
9
9
10 use crate::errors::HgError;
10 use crate::errors::HgError;
11 use crate::exit_codes::CONFIG_PARSE_ERROR_ABORT;
11 use crate::exit_codes::CONFIG_PARSE_ERROR_ABORT;
12 use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
12 use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
13 use format_bytes::{format_bytes, write_bytes, DisplayBytes};
13 use format_bytes::{format_bytes, write_bytes, DisplayBytes};
14 use lazy_static::lazy_static;
14 use lazy_static::lazy_static;
15 use regex::bytes::Regex;
15 use regex::bytes::Regex;
16 use std::collections::HashMap;
16 use std::collections::HashMap;
17 use std::path::{Path, PathBuf};
17 use std::path::{Path, PathBuf};
18
18
19 lazy_static! {
19 lazy_static! {
20 static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
20 static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
21 static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
21 static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
22 /// Continuation whitespace
22 /// Continuation whitespace
23 static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
23 static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
24 static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
24 static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
25 static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
25 static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
26 /// A directive that allows for removing previous entries
26 /// A directive that allows for removing previous entries
27 static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
27 static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
28 /// A directive that allows for including other config files
28 /// A directive that allows for including other config files
29 static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
29 static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
30 }
30 }
31
31
32 /// All config values separated by layers of precedence.
32 /// All config values separated by layers of precedence.
33 /// Each config source may be split in multiple layers if `%include` directives
33 /// Each config source may be split in multiple layers if `%include` directives
34 /// are used.
34 /// are used.
35 /// TODO detail the general precedence
35 /// TODO detail the general precedence
36 #[derive(Clone)]
36 #[derive(Clone)]
37 pub struct ConfigLayer {
37 pub struct ConfigLayer {
38 /// Mapping of the sections to their items
38 /// Mapping of the sections to their items
39 sections: HashMap<Vec<u8>, ConfigItem>,
39 sections: HashMap<Vec<u8>, ConfigItem>,
40 /// All sections (and their items/values) in a layer share the same origin
40 /// All sections (and their items/values) in a layer share the same origin
41 pub origin: ConfigOrigin,
41 pub origin: ConfigOrigin,
42 /// Whether this layer comes from a trusted user or group
42 /// Whether this layer comes from a trusted user or group
43 pub trusted: bool,
43 pub trusted: bool,
44 }
44 }
45
45
46 impl ConfigLayer {
46 impl ConfigLayer {
47 pub fn new(origin: ConfigOrigin) -> Self {
47 pub fn new(origin: ConfigOrigin) -> Self {
48 ConfigLayer {
48 ConfigLayer {
49 sections: HashMap::new(),
49 sections: HashMap::new(),
50 trusted: true, // TODO check
50 trusted: true, // TODO check
51 origin,
51 origin,
52 }
52 }
53 }
53 }
54
54
55 /// Parse `--config` CLI arguments and return a layer if there’s any
55 /// Parse `--config` CLI arguments and return a layer if there’s any
56 pub(crate) fn parse_cli_args(
56 pub(crate) fn parse_cli_args(
57 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
57 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
58 ) -> Result<Option<Self>, ConfigError> {
58 ) -> Result<Option<Self>, ConfigError> {
59 fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> {
59 fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> {
60 use crate::utils::SliceExt;
60 use crate::utils::SliceExt;
61
61
62 let (section_and_item, value) = arg.split_2(b'=')?;
62 let (section_and_item, value) = arg.split_2(b'=')?;
63 let (section, item) = section_and_item.trim().split_2(b'.')?;
63 let (section, item) = section_and_item.trim().split_2(b'.')?;
64 Some((
64 Some((
65 section.to_owned(),
65 section.to_owned(),
66 item.to_owned(),
66 item.to_owned(),
67 value.trim().to_owned(),
67 value.trim().to_owned(),
68 ))
68 ))
69 }
69 }
70
70
71 let mut layer = Self::new(ConfigOrigin::CommandLine);
71 let mut layer = Self::new(ConfigOrigin::CommandLine);
72 for arg in cli_config_args {
72 for arg in cli_config_args {
73 let arg = arg.as_ref();
73 let arg = arg.as_ref();
74 if let Some((section, item, value)) = parse_one(arg) {
74 if let Some((section, item, value)) = parse_one(arg) {
75 layer.add(section, item, value, None);
75 layer.add(section, item, value, None);
76 } else {
76 } else {
77 Err(HgError::abort(
77 Err(HgError::abort(
78 format!(
78 format!(
79 "abort: malformed --config option: '{}' \
79 "abort: malformed --config option: '{}' \
80 (use --config section.name=value)",
80 (use --config section.name=value)",
81 String::from_utf8_lossy(arg),
81 String::from_utf8_lossy(arg),
82 ),
82 ),
83 CONFIG_PARSE_ERROR_ABORT,
83 CONFIG_PARSE_ERROR_ABORT,
84 None,
84 None,
85 ))?
85 ))?
86 }
86 }
87 }
87 }
88 if layer.sections.is_empty() {
88 if layer.sections.is_empty() {
89 Ok(None)
89 Ok(None)
90 } else {
90 } else {
91 Ok(Some(layer))
91 Ok(Some(layer))
92 }
92 }
93 }
93 }
94
94
95 /// Returns whether this layer comes from `--config` CLI arguments
95 /// Returns whether this layer comes from `--config` CLI arguments
96 pub(crate) fn is_from_command_line(&self) -> bool {
96 pub(crate) fn is_from_command_line(&self) -> bool {
97 if let ConfigOrigin::CommandLine = self.origin {
97 matches!(self.origin, ConfigOrigin::CommandLine)
98 true
99 } else {
100 false
101 }
102 }
98 }
103
99
104 /// Add an entry to the config, overwriting the old one if already present.
100 /// Add an entry to the config, overwriting the old one if already present.
105 pub fn add(
101 pub fn add(
106 &mut self,
102 &mut self,
107 section: Vec<u8>,
103 section: Vec<u8>,
108 item: Vec<u8>,
104 item: Vec<u8>,
109 value: Vec<u8>,
105 value: Vec<u8>,
110 line: Option<usize>,
106 line: Option<usize>,
111 ) {
107 ) {
112 self.sections
108 self.sections
113 .entry(section)
109 .entry(section)
114 .or_insert_with(|| HashMap::new())
110 .or_insert_with(HashMap::new)
115 .insert(item, ConfigValue { bytes: value, line });
111 .insert(item, ConfigValue { bytes: value, line });
116 }
112 }
117
113
118 /// Returns the config value in `<section>.<item>` if it exists
114 /// Returns the config value in `<section>.<item>` if it exists
119 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
115 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
120 Some(self.sections.get(section)?.get(item)?)
116 self.sections.get(section)?.get(item)
121 }
117 }
122
118
123 /// Returns the keys defined in the given section
119 /// Returns the keys defined in the given section
124 pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> {
120 pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> {
125 self.sections
121 self.sections
126 .get(section)
122 .get(section)
127 .into_iter()
123 .into_iter()
128 .flat_map(|section| section.keys().map(|vec| &**vec))
124 .flat_map(|section| section.keys().map(|vec| &**vec))
129 }
125 }
130
126
131 /// Returns the (key, value) pairs defined in the given section
127 /// Returns the (key, value) pairs defined in the given section
132 pub fn iter_section<'layer>(
128 pub fn iter_section<'layer>(
133 &'layer self,
129 &'layer self,
134 section: &[u8],
130 section: &[u8],
135 ) -> impl Iterator<Item = (&'layer [u8], &'layer [u8])> {
131 ) -> impl Iterator<Item = (&'layer [u8], &'layer [u8])> {
136 self.sections
132 self.sections
137 .get(section)
133 .get(section)
138 .into_iter()
134 .into_iter()
139 .flat_map(|section| section.iter().map(|(k, v)| (&**k, &*v.bytes)))
135 .flat_map(|section| section.iter().map(|(k, v)| (&**k, &*v.bytes)))
140 }
136 }
141
137
142 /// Returns whether any key is defined in the given section
138 /// Returns whether any key is defined in the given section
143 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
139 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
144 self.sections
140 self.sections
145 .get(section)
141 .get(section)
146 .map_or(false, |section| !section.is_empty())
142 .map_or(false, |section| !section.is_empty())
147 }
143 }
148
144
149 pub fn is_empty(&self) -> bool {
145 pub fn is_empty(&self) -> bool {
150 self.sections.is_empty()
146 self.sections.is_empty()
151 }
147 }
152
148
153 /// Returns a `Vec` of layers in order of precedence (so, in read order),
149 /// Returns a `Vec` of layers in order of precedence (so, in read order),
154 /// recursively parsing the `%include` directives if any.
150 /// recursively parsing the `%include` directives if any.
155 pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
151 pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
156 let mut layers = vec![];
152 let mut layers = vec![];
157
153
158 // Discard byte order mark if any
154 // Discard byte order mark if any
159 let data = if data.starts_with(b"\xef\xbb\xbf") {
155 let data = if data.starts_with(b"\xef\xbb\xbf") {
160 &data[3..]
156 &data[3..]
161 } else {
157 } else {
162 data
158 data
163 };
159 };
164
160
165 // TODO check if it's trusted
161 // TODO check if it's trusted
166 let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
162 let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
167
163
168 let mut lines_iter =
164 let mut lines_iter =
169 data.split(|b| *b == b'\n').enumerate().peekable();
165 data.split(|b| *b == b'\n').enumerate().peekable();
170 let mut section = b"".to_vec();
166 let mut section = b"".to_vec();
171
167
172 while let Some((index, bytes)) = lines_iter.next() {
168 while let Some((index, bytes)) = lines_iter.next() {
173 let line = Some(index + 1);
169 let line = Some(index + 1);
174 if let Some(m) = INCLUDE_RE.captures(&bytes) {
170 if let Some(m) = INCLUDE_RE.captures(bytes) {
175 let filename_bytes = &m[1];
171 let filename_bytes = &m[1];
176 let filename_bytes = crate::utils::expand_vars(filename_bytes);
172 let filename_bytes = crate::utils::expand_vars(filename_bytes);
177 // `Path::parent` only fails for the root directory,
173 // `Path::parent` only fails for the root directory,
178 // which `src` can’t be since we’ve managed to open it as a
174 // which `src` can’t be since we’ve managed to open it as a
179 // file.
175 // file.
180 let dir = src
176 let dir = src
181 .parent()
177 .parent()
182 .expect("Path::parent fail on a file we’ve read");
178 .expect("Path::parent fail on a file we’ve read");
183 // `Path::join` with an absolute argument correctly ignores the
179 // `Path::join` with an absolute argument correctly ignores the
184 // base path
180 // base path
185 let filename = dir.join(&get_path_from_bytes(&filename_bytes));
181 let filename = dir.join(&get_path_from_bytes(&filename_bytes));
186 match std::fs::read(&filename) {
182 match std::fs::read(&filename) {
187 Ok(data) => {
183 Ok(data) => {
188 layers.push(current_layer);
184 layers.push(current_layer);
189 layers.extend(Self::parse(&filename, &data)?);
185 layers.extend(Self::parse(&filename, &data)?);
190 current_layer =
186 current_layer =
191 Self::new(ConfigOrigin::File(src.to_owned()));
187 Self::new(ConfigOrigin::File(src.to_owned()));
192 }
188 }
193 Err(error) => {
189 Err(error) => {
194 if error.kind() != std::io::ErrorKind::NotFound {
190 if error.kind() != std::io::ErrorKind::NotFound {
195 return Err(ConfigParseError {
191 return Err(ConfigParseError {
196 origin: ConfigOrigin::File(src.to_owned()),
192 origin: ConfigOrigin::File(src.to_owned()),
197 line,
193 line,
198 message: format_bytes!(
194 message: format_bytes!(
199 b"cannot include {} ({})",
195 b"cannot include {} ({})",
200 filename_bytes,
196 filename_bytes,
201 format_bytes::Utf8(error)
197 format_bytes::Utf8(error)
202 ),
198 ),
203 }
199 }
204 .into());
200 .into());
205 }
201 }
206 }
202 }
207 }
203 }
208 } else if let Some(_) = EMPTY_RE.captures(&bytes) {
204 } else if EMPTY_RE.captures(bytes).is_some() {
209 } else if let Some(m) = SECTION_RE.captures(&bytes) {
205 } else if let Some(m) = SECTION_RE.captures(bytes) {
210 section = m[1].to_vec();
206 section = m[1].to_vec();
211 } else if let Some(m) = ITEM_RE.captures(&bytes) {
207 } else if let Some(m) = ITEM_RE.captures(bytes) {
212 let item = m[1].to_vec();
208 let item = m[1].to_vec();
213 let mut value = m[2].to_vec();
209 let mut value = m[2].to_vec();
214 loop {
210 loop {
215 match lines_iter.peek() {
211 match lines_iter.peek() {
216 None => break,
212 None => break,
217 Some((_, v)) => {
213 Some((_, v)) => {
218 if let Some(_) = COMMENT_RE.captures(&v) {
214 if COMMENT_RE.captures(v).is_some() {
219 } else if let Some(_) = CONT_RE.captures(&v) {
215 } else if CONT_RE.captures(v).is_some() {
220 value.extend(b"\n");
216 value.extend(b"\n");
221 value.extend(&m[1]);
217 value.extend(&m[1]);
222 } else {
218 } else {
223 break;
219 break;
224 }
220 }
225 }
221 }
226 };
222 };
227 lines_iter.next();
223 lines_iter.next();
228 }
224 }
229 current_layer.add(section.clone(), item, value, line);
225 current_layer.add(section.clone(), item, value, line);
230 } else if let Some(m) = UNSET_RE.captures(&bytes) {
226 } else if let Some(m) = UNSET_RE.captures(bytes) {
231 if let Some(map) = current_layer.sections.get_mut(&section) {
227 if let Some(map) = current_layer.sections.get_mut(&section) {
232 map.remove(&m[1]);
228 map.remove(&m[1]);
233 }
229 }
234 } else {
230 } else {
235 let message = if bytes.starts_with(b" ") {
231 let message = if bytes.starts_with(b" ") {
236 format_bytes!(b"unexpected leading whitespace: {}", bytes)
232 format_bytes!(b"unexpected leading whitespace: {}", bytes)
237 } else {
233 } else {
238 bytes.to_owned()
234 bytes.to_owned()
239 };
235 };
240 return Err(ConfigParseError {
236 return Err(ConfigParseError {
241 origin: ConfigOrigin::File(src.to_owned()),
237 origin: ConfigOrigin::File(src.to_owned()),
242 line,
238 line,
243 message,
239 message,
244 }
240 }
245 .into());
241 .into());
246 }
242 }
247 }
243 }
248 if !current_layer.is_empty() {
244 if !current_layer.is_empty() {
249 layers.push(current_layer);
245 layers.push(current_layer);
250 }
246 }
251 Ok(layers)
247 Ok(layers)
252 }
248 }
253 }
249 }
254
250
255 impl DisplayBytes for ConfigLayer {
251 impl DisplayBytes for ConfigLayer {
256 fn display_bytes(
252 fn display_bytes(
257 &self,
253 &self,
258 out: &mut dyn std::io::Write,
254 out: &mut dyn std::io::Write,
259 ) -> std::io::Result<()> {
255 ) -> std::io::Result<()> {
260 let mut sections: Vec<_> = self.sections.iter().collect();
256 let mut sections: Vec<_> = self.sections.iter().collect();
261 sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
257 sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
262
258
263 for (section, items) in sections.into_iter() {
259 for (section, items) in sections.into_iter() {
264 let mut items: Vec<_> = items.into_iter().collect();
260 let mut items: Vec<_> = items.iter().collect();
265 items.sort_by(|e0, e1| e0.0.cmp(e1.0));
261 items.sort_by(|e0, e1| e0.0.cmp(e1.0));
266
262
267 for (item, config_entry) in items {
263 for (item, config_entry) in items {
268 write_bytes!(
264 write_bytes!(
269 out,
265 out,
270 b"{}.{}={} # {}\n",
266 b"{}.{}={} # {}\n",
271 section,
267 section,
272 item,
268 item,
273 &config_entry.bytes,
269 &config_entry.bytes,
274 &self.origin,
270 &self.origin,
275 )?
271 )?
276 }
272 }
277 }
273 }
278 Ok(())
274 Ok(())
279 }
275 }
280 }
276 }
281
277
282 /// Mapping of section item to value.
278 /// Mapping of section item to value.
283 /// In the following:
279 /// In the following:
284 /// ```text
280 /// ```text
285 /// [ui]
281 /// [ui]
286 /// paginate=no
282 /// paginate=no
287 /// ```
283 /// ```
288 /// "paginate" is the section item and "no" the value.
284 /// "paginate" is the section item and "no" the value.
289 pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
285 pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
290
286
291 #[derive(Clone, Debug, PartialEq)]
287 #[derive(Clone, Debug, PartialEq)]
292 pub struct ConfigValue {
288 pub struct ConfigValue {
293 /// The raw bytes of the value (be it from the CLI, env or from a file)
289 /// The raw bytes of the value (be it from the CLI, env or from a file)
294 pub bytes: Vec<u8>,
290 pub bytes: Vec<u8>,
295 /// Only present if the value comes from a file, 1-indexed.
291 /// Only present if the value comes from a file, 1-indexed.
296 pub line: Option<usize>,
292 pub line: Option<usize>,
297 }
293 }
298
294
299 #[derive(Clone, Debug, PartialEq, Eq)]
295 #[derive(Clone, Debug, PartialEq, Eq)]
300 pub enum ConfigOrigin {
296 pub enum ConfigOrigin {
301 /// From a configuration file
297 /// From a configuration file
302 File(PathBuf),
298 File(PathBuf),
303 /// From [ui.tweakdefaults]
299 /// From [ui.tweakdefaults]
304 Tweakdefaults,
300 Tweakdefaults,
305 /// From a `--config` CLI argument
301 /// From a `--config` CLI argument
306 CommandLine,
302 CommandLine,
307 /// From a `--color` CLI argument
303 /// From a `--color` CLI argument
308 CommandLineColor,
304 CommandLineColor,
309 /// From environment variables like `$PAGER` or `$EDITOR`
305 /// From environment variables like `$PAGER` or `$EDITOR`
310 Environment(Vec<u8>),
306 Environment(Vec<u8>),
311 /* TODO defaults (configitems.py)
307 /* TODO defaults (configitems.py)
312 * TODO extensions
308 * TODO extensions
313 * TODO Python resources?
309 * TODO Python resources?
314 * Others? */
310 * Others? */
315 }
311 }
316
312
317 impl DisplayBytes for ConfigOrigin {
313 impl DisplayBytes for ConfigOrigin {
318 fn display_bytes(
314 fn display_bytes(
319 &self,
315 &self,
320 out: &mut dyn std::io::Write,
316 out: &mut dyn std::io::Write,
321 ) -> std::io::Result<()> {
317 ) -> std::io::Result<()> {
322 match self {
318 match self {
323 ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)),
319 ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)),
324 ConfigOrigin::CommandLine => out.write_all(b"--config"),
320 ConfigOrigin::CommandLine => out.write_all(b"--config"),
325 ConfigOrigin::CommandLineColor => out.write_all(b"--color"),
321 ConfigOrigin::CommandLineColor => out.write_all(b"--color"),
326 ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e),
322 ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e),
327 ConfigOrigin::Tweakdefaults => {
323 ConfigOrigin::Tweakdefaults => {
328 write_bytes!(out, b"ui.tweakdefaults")
324 write_bytes!(out, b"ui.tweakdefaults")
329 }
325 }
330 }
326 }
331 }
327 }
332 }
328 }
333
329
334 #[derive(Debug)]
330 #[derive(Debug)]
335 pub struct ConfigParseError {
331 pub struct ConfigParseError {
336 pub origin: ConfigOrigin,
332 pub origin: ConfigOrigin,
337 pub line: Option<usize>,
333 pub line: Option<usize>,
338 pub message: Vec<u8>,
334 pub message: Vec<u8>,
339 }
335 }
340
336
341 #[derive(Debug, derive_more::From)]
337 #[derive(Debug, derive_more::From)]
342 pub enum ConfigError {
338 pub enum ConfigError {
343 Parse(ConfigParseError),
339 Parse(ConfigParseError),
344 Other(HgError),
340 Other(HgError),
345 }
341 }
346
342
347 fn make_regex(pattern: &'static str) -> Regex {
343 fn make_regex(pattern: &'static str) -> Regex {
348 Regex::new(pattern).expect("expected a valid regex")
344 Regex::new(pattern).expect("expected a valid regex")
349 }
345 }
@@ -1,271 +1,267 b''
1 //! Parsing functions for various type of configuration values.
1 //! Parsing functions for various type of configuration values.
2 //!
2 //!
3 //! Returning `None` indicates a syntax error. Using a `Result` would be more
3 //! Returning `None` indicates a syntax error. Using a `Result` would be more
4 //! correct but would take more boilerplate for converting between error types,
4 //! correct but would take more boilerplate for converting between error types,
5 //! compared to using `.ok()` on inner results of various error types to
5 //! compared to using `.ok()` on inner results of various error types to
6 //! convert them all to options. The `Config::get_parse` method later converts
6 //! convert them all to options. The `Config::get_parse` method later converts
7 //! those options to results with `ConfigValueParseError`, which contains
7 //! those options to results with `ConfigValueParseError`, which contains
8 //! details about where the value came from (but omits details of what’s
8 //! details about where the value came from (but omits details of what’s
9 //! invalid inside the value).
9 //! invalid inside the value).
10
10
11 use crate::utils::SliceExt;
11 use crate::utils::SliceExt;
12
12
13 pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
13 pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
14 match v.to_ascii_lowercase().as_slice() {
14 match v.to_ascii_lowercase().as_slice() {
15 b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
15 b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
16 b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),
16 b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),
17 _ => None,
17 _ => None,
18 }
18 }
19 }
19 }
20
20
21 pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {
21 pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {
22 let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();
22 let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();
23 const UNITS: &[(&str, u64)] = &[
23 const UNITS: &[(&str, u64)] = &[
24 ("g", 1 << 30),
24 ("g", 1 << 30),
25 ("gb", 1 << 30),
25 ("gb", 1 << 30),
26 ("m", 1 << 20),
26 ("m", 1 << 20),
27 ("mb", 1 << 20),
27 ("mb", 1 << 20),
28 ("k", 1 << 10),
28 ("k", 1 << 10),
29 ("kb", 1 << 10),
29 ("kb", 1 << 10),
30 ("b", 1 << 0), // Needs to be last
30 ("b", 1 << 0), // Needs to be last
31 ];
31 ];
32 for &(unit, multiplier) in UNITS {
32 for &(unit, multiplier) in UNITS {
33 if let Some(value) = value.strip_suffix(unit) {
33 if let Some(value) = value.strip_suffix(unit) {
34 let float: f64 = value.trim().parse().ok()?;
34 let float: f64 = value.trim().parse().ok()?;
35 if float >= 0.0 {
35 if float >= 0.0 {
36 return Some((float * multiplier as f64).round() as u64);
36 return Some((float * multiplier as f64).round() as u64);
37 } else {
37 } else {
38 return None;
38 return None;
39 }
39 }
40 }
40 }
41 }
41 }
42 value.parse().ok()
42 value.parse().ok()
43 }
43 }
44
44
45 /// Parse a config value as a list of sub-values.
45 /// Parse a config value as a list of sub-values.
46 ///
46 ///
47 /// Ported from `parselist` in `mercurial/utils/stringutil.py`
47 /// Ported from `parselist` in `mercurial/utils/stringutil.py`
48
48
49 // Note: keep behavior in sync with the Python one.
49 // Note: keep behavior in sync with the Python one.
50
50
51 // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
51 // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
52 // possible (when there’s no backslash-escapes) but this is probably not worth
52 // possible (when there’s no backslash-escapes) but this is probably not worth
53 // the complexity as config is presumably not accessed inside
53 // the complexity as config is presumably not accessed inside
54 // preformance-sensitive loops.
54 // preformance-sensitive loops.
55 pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
55 pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
56 // Port of Python’s `value.lstrip(b' ,\n')`
56 // Port of Python’s `value.lstrip(b' ,\n')`
57 // TODO: is this really what we want?
57 // TODO: is this really what we want?
58 let input =
58 let input =
59 input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n');
59 input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n');
60 parse_list_without_trim_start(input)
60 parse_list_without_trim_start(input)
61 }
61 }
62
62
63 fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
63 fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
64 // Start of port of Python’s `_configlist`
64 // Start of port of Python’s `_configlist`
65 let input = input.trim_end_matches(|b| b == b' ' || b == b',');
65 let input = input.trim_end_matches(|b| b == b' ' || b == b',');
66 if input.is_empty() {
66 if input.is_empty() {
67 return Vec::new();
67 return Vec::new();
68 }
68 }
69
69
70 // Just to make “a string” less confusable with “a list of strings”.
70 // Just to make “a string” less confusable with “a list of strings”.
71 type ByteString = Vec<u8>;
71 type ByteString = Vec<u8>;
72
72
73 // These correspond to Python’s…
73 // These correspond to Python’s…
74 let mut mode = ParserMode::Plain; // `parser`
74 let mut mode = ParserMode::Plain; // `parser`
75 let mut values = Vec::new(); // `parts[:-1]`
75 let mut values = Vec::new(); // `parts[:-1]`
76 let mut next_value = ByteString::new(); // `parts[-1]`
76 let mut next_value = ByteString::new(); // `parts[-1]`
77 let mut offset = 0; // `offset`
77 let mut offset = 0; // `offset`
78
78
79 // Setting `parser` to `None` is instead handled by returning immediately
79 // Setting `parser` to `None` is instead handled by returning immediately
80 enum ParserMode {
80 enum ParserMode {
81 Plain,
81 Plain,
82 Quoted,
82 Quoted,
83 }
83 }
84
84
85 loop {
85 loop {
86 match mode {
86 match mode {
87 ParserMode::Plain => {
87 ParserMode::Plain => {
88 // Start of port of Python’s `_parse_plain`
88 // Start of port of Python’s `_parse_plain`
89 let mut whitespace = false;
89 let mut whitespace = false;
90 while let Some(&byte) = input.get(offset) {
90 while let Some(&byte) = input.get(offset) {
91 if is_space(byte) || byte == b',' {
91 if is_space(byte) || byte == b',' {
92 whitespace = true;
92 whitespace = true;
93 offset += 1;
93 offset += 1;
94 } else {
94 } else {
95 break;
95 break;
96 }
96 }
97 }
97 }
98 if let Some(&byte) = input.get(offset) {
98 if let Some(&byte) = input.get(offset) {
99 if whitespace {
99 if whitespace {
100 values.push(std::mem::take(&mut next_value))
100 values.push(std::mem::take(&mut next_value))
101 }
101 }
102 if byte == b'"' && next_value.is_empty() {
102 if byte == b'"' && next_value.is_empty() {
103 mode = ParserMode::Quoted;
103 mode = ParserMode::Quoted;
104 } else {
104 } else {
105 if byte == b'"' && next_value.ends_with(b"\\") {
105 if byte == b'"' && next_value.ends_with(b"\\") {
106 next_value.pop();
106 next_value.pop();
107 }
107 }
108 next_value.push(byte);
108 next_value.push(byte);
109 }
109 }
110 offset += 1;
110 offset += 1;
111 } else {
111 } else {
112 values.push(next_value);
112 values.push(next_value);
113 return values;
113 return values;
114 }
114 }
115 }
115 }
116 ParserMode::Quoted => {
116 ParserMode::Quoted => {
117 // Start of port of Python’s `_parse_quote`
117 // Start of port of Python’s `_parse_quote`
118 if let Some(&byte) = input.get(offset) {
118 if let Some(&byte) = input.get(offset) {
119 if byte == b'"' {
119 if byte == b'"' {
120 // The input contains a quoted zero-length value `""`
120 // The input contains a quoted zero-length value `""`
121 debug_assert_eq!(next_value, b"");
121 debug_assert_eq!(next_value, b"");
122 values.push(std::mem::take(&mut next_value));
122 values.push(std::mem::take(&mut next_value));
123 offset += 1;
123 offset += 1;
124 while let Some(&byte) = input.get(offset) {
124 while let Some(&byte) = input.get(offset) {
125 if is_space(byte) || byte == b',' {
125 if is_space(byte) || byte == b',' {
126 offset += 1;
126 offset += 1;
127 } else {
127 } else {
128 break;
128 break;
129 }
129 }
130 }
130 }
131 mode = ParserMode::Plain;
131 mode = ParserMode::Plain;
132 continue;
132 continue;
133 }
133 }
134 }
134 }
135
135
136 while let Some(&byte) = input.get(offset) {
136 while let Some(&byte) = input.get(offset) {
137 if byte == b'"' {
137 if byte == b'"' {
138 break;
138 break;
139 }
139 }
140 if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
140 if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
141 next_value.push(b'"');
141 next_value.push(b'"');
142 offset += 2;
142 offset += 2;
143 } else {
143 } else {
144 next_value.push(byte);
144 next_value.push(byte);
145 offset += 1;
145 offset += 1;
146 }
146 }
147 }
147 }
148
148
149 if offset >= input.len() {
149 if offset >= input.len() {
150 // We didn’t find a closing double-quote,
150 // We didn’t find a closing double-quote,
151 // so treat the opening one as part of an unquoted value
151 // so treat the opening one as part of an unquoted value
152 // instead of delimiting the start of a quoted value.
152 // instead of delimiting the start of a quoted value.
153
153
154 // `next_value` may have had some backslash-escapes
154 // `next_value` may have had some backslash-escapes
155 // unescaped. TODO: shouldn’t we use a slice of `input`
155 // unescaped. TODO: shouldn’t we use a slice of `input`
156 // instead?
156 // instead?
157 let mut real_values =
157 let mut real_values =
158 parse_list_without_trim_start(&next_value);
158 parse_list_without_trim_start(&next_value);
159
159
160 if let Some(first) = real_values.first_mut() {
160 if let Some(first) = real_values.first_mut() {
161 first.insert(0, b'"');
161 first.insert(0, b'"');
162 // Drop `next_value`
162 // Drop `next_value`
163 values.extend(real_values)
163 values.extend(real_values)
164 } else {
164 } else {
165 next_value.push(b'"');
165 next_value.push(b'"');
166 values.push(next_value);
166 values.push(next_value);
167 }
167 }
168 return values;
168 return values;
169 }
169 }
170
170
171 // We’re not at the end of the input, which means the `while`
171 // We’re not at the end of the input, which means the `while`
172 // loop above ended at at double quote. Skip
172 // loop above ended at at double quote. Skip
173 // over that.
173 // over that.
174 offset += 1;
174 offset += 1;
175
175
176 while let Some(&byte) = input.get(offset) {
176 while let Some(&byte) = input.get(offset) {
177 if byte == b' ' || byte == b',' {
177 if byte == b' ' || byte == b',' {
178 offset += 1;
178 offset += 1;
179 } else {
179 } else {
180 break;
180 break;
181 }
181 }
182 }
182 }
183
183
184 if offset >= input.len() {
184 if offset >= input.len() {
185 values.push(next_value);
185 values.push(next_value);
186 return values;
186 return values;
187 }
187 }
188
188
189 if offset + 1 == input.len() && input[offset] == b'"' {
189 if offset + 1 == input.len() && input[offset] == b'"' {
190 next_value.push(b'"');
190 next_value.push(b'"');
191 offset += 1;
191 offset += 1;
192 } else {
192 } else {
193 values.push(std::mem::take(&mut next_value));
193 values.push(std::mem::take(&mut next_value));
194 }
194 }
195
195
196 mode = ParserMode::Plain;
196 mode = ParserMode::Plain;
197 }
197 }
198 }
198 }
199 }
199 }
200
200
201 // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
201 // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
202 fn is_space(byte: u8) -> bool {
202 fn is_space(byte: u8) -> bool {
203 if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte {
203 matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c')
204 true
205 } else {
206 false
207 }
208 }
204 }
209 }
205 }
210
206
211 #[test]
207 #[test]
212 fn test_parse_list() {
208 fn test_parse_list() {
213 // Make `assert_eq` error messages nicer
209 // Make `assert_eq` error messages nicer
214 fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
210 fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
215 values
211 values
216 .iter()
212 .iter()
217 .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
213 .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
218 .collect()
214 .collect()
219 }
215 }
220 macro_rules! assert_parse_list {
216 macro_rules! assert_parse_list {
221 ( $input: expr => [ $( $output: expr ),* ] ) => {
217 ( $input: expr => [ $( $output: expr ),* ] ) => {
222 assert_eq!(
218 assert_eq!(
223 as_strings(&parse_list($input)),
219 as_strings(&parse_list($input)),
224 as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
220 as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
225 );
221 );
226 }
222 }
227 }
223 }
228
224
229 // Keep these Rust tests in sync with the Python ones in
225 // Keep these Rust tests in sync with the Python ones in
230 // `tests/test-config-parselist.py`
226 // `tests/test-config-parselist.py`
231 assert_parse_list!(b"" => []);
227 assert_parse_list!(b"" => []);
232 assert_parse_list!(b"," => []);
228 assert_parse_list!(b"," => []);
233 assert_parse_list!(b"A" => [b"A"]);
229 assert_parse_list!(b"A" => [b"A"]);
234 assert_parse_list!(b"B,B" => [b"B", b"B"]);
230 assert_parse_list!(b"B,B" => [b"B", b"B"]);
235 assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
231 assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
236 assert_parse_list!(b"\"" => [b"\""]);
232 assert_parse_list!(b"\"" => [b"\""]);
237 assert_parse_list!(b"\"\"" => [b"", b""]);
233 assert_parse_list!(b"\"\"" => [b"", b""]);
238 assert_parse_list!(b"D,\"" => [b"D", b"\""]);
234 assert_parse_list!(b"D,\"" => [b"D", b"\""]);
239 assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
235 assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
240 assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
236 assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
241 assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
237 assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
242 assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
238 assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
243 assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
239 assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
244 assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
240 assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
245 assert_parse_list!(b"K K" => [b"K", b"K"]);
241 assert_parse_list!(b"K K" => [b"K", b"K"]);
246 assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
242 assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
247 assert_parse_list!(b"L\tL" => [b"L", b"L"]);
243 assert_parse_list!(b"L\tL" => [b"L", b"L"]);
248 assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
244 assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
249 assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
245 assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
250 assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
246 assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
251 assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
247 assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
252 assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
248 assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
253 }
249 }
254
250
255 #[test]
251 #[test]
256 fn test_parse_byte_size() {
252 fn test_parse_byte_size() {
257 assert_eq!(parse_byte_size(b""), None);
253 assert_eq!(parse_byte_size(b""), None);
258 assert_eq!(parse_byte_size(b"b"), None);
254 assert_eq!(parse_byte_size(b"b"), None);
259
255
260 assert_eq!(parse_byte_size(b"12"), Some(12));
256 assert_eq!(parse_byte_size(b"12"), Some(12));
261 assert_eq!(parse_byte_size(b"12b"), Some(12));
257 assert_eq!(parse_byte_size(b"12b"), Some(12));
262 assert_eq!(parse_byte_size(b"12 b"), Some(12));
258 assert_eq!(parse_byte_size(b"12 b"), Some(12));
263 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
259 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
264 assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));
260 assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));
265 assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));
261 assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));
266
262
267 assert_eq!(parse_byte_size(b"-12 b"), None);
263 assert_eq!(parse_byte_size(b"-12 b"), None);
268 assert_eq!(parse_byte_size(b"-0.1 b"), None);
264 assert_eq!(parse_byte_size(b"-0.1 b"), None);
269 assert_eq!(parse_byte_size(b"0.1 b"), Some(0));
265 assert_eq!(parse_byte_size(b"0.1 b"), Some(0));
270 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
266 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
271 }
267 }
@@ -1,680 +1,677 b''
1 #[cfg(test)]
1 #[cfg(test)]
2 #[macro_use]
2 #[macro_use]
3 mod tests_support;
3 mod tests_support;
4
4
5 #[cfg(test)]
5 #[cfg(test)]
6 mod tests;
6 mod tests;
7
7
8 use crate::utils::hg_path::HgPath;
8 use crate::utils::hg_path::HgPath;
9 use crate::utils::hg_path::HgPathBuf;
9 use crate::utils::hg_path::HgPathBuf;
10 use crate::Revision;
10 use crate::Revision;
11 use crate::NULL_REVISION;
11 use crate::NULL_REVISION;
12
12
13 use bytes_cast::{unaligned, BytesCast};
13 use bytes_cast::{unaligned, BytesCast};
14 use im_rc::ordmap::Entry;
14 use im_rc::ordmap::Entry;
15 use im_rc::ordmap::OrdMap;
15 use im_rc::ordmap::OrdMap;
16 use im_rc::OrdSet;
16 use im_rc::OrdSet;
17
17
18 use std::cmp::Ordering;
18 use std::cmp::Ordering;
19 use std::collections::HashMap;
19 use std::collections::HashMap;
20
20
21 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
21 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
22
22
23 type PathToken = usize;
23 type PathToken = usize;
24
24
25 #[derive(Clone, Debug)]
25 #[derive(Clone, Debug)]
26 struct CopySource {
26 struct CopySource {
27 /// revision at which the copy information was added
27 /// revision at which the copy information was added
28 rev: Revision,
28 rev: Revision,
29 /// the copy source, (Set to None in case of deletion of the associated
29 /// the copy source, (Set to None in case of deletion of the associated
30 /// key)
30 /// key)
31 path: Option<PathToken>,
31 path: Option<PathToken>,
32 /// a set of previous `CopySource.rev` value directly or indirectly
32 /// a set of previous `CopySource.rev` value directly or indirectly
33 /// overwritten by this one.
33 /// overwritten by this one.
34 overwritten: OrdSet<Revision>,
34 overwritten: OrdSet<Revision>,
35 }
35 }
36
36
37 impl CopySource {
37 impl CopySource {
38 /// create a new CopySource
38 /// create a new CopySource
39 ///
39 ///
40 /// Use this when no previous copy source existed.
40 /// Use this when no previous copy source existed.
41 fn new(rev: Revision, path: Option<PathToken>) -> Self {
41 fn new(rev: Revision, path: Option<PathToken>) -> Self {
42 Self {
42 Self {
43 rev,
43 rev,
44 path,
44 path,
45 overwritten: OrdSet::new(),
45 overwritten: OrdSet::new(),
46 }
46 }
47 }
47 }
48
48
49 /// create a new CopySource from merging two others
49 /// create a new CopySource from merging two others
50 ///
50 ///
51 /// Use this when merging two InternalPathCopies requires active merging of
51 /// Use this when merging two InternalPathCopies requires active merging of
52 /// some entries.
52 /// some entries.
53 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
53 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
54 let mut overwritten = OrdSet::new();
54 let mut overwritten = OrdSet::new();
55 overwritten.extend(winner.overwritten.iter().copied());
55 overwritten.extend(winner.overwritten.iter().copied());
56 overwritten.extend(loser.overwritten.iter().copied());
56 overwritten.extend(loser.overwritten.iter().copied());
57 overwritten.insert(winner.rev);
57 overwritten.insert(winner.rev);
58 overwritten.insert(loser.rev);
58 overwritten.insert(loser.rev);
59 Self {
59 Self {
60 rev,
60 rev,
61 path: winner.path,
61 path: winner.path,
62 overwritten: overwritten,
62 overwritten,
63 }
63 }
64 }
64 }
65
65
66 /// Update the value of a pre-existing CopySource
66 /// Update the value of a pre-existing CopySource
67 ///
67 ///
68 /// Use this when recording copy information from parent → child edges
68 /// Use this when recording copy information from parent → child edges
69 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
69 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
70 self.overwritten.insert(self.rev);
70 self.overwritten.insert(self.rev);
71 self.rev = rev;
71 self.rev = rev;
72 self.path = path;
72 self.path = path;
73 }
73 }
74
74
75 /// Mark pre-existing copy information as "dropped" by a file deletion
75 /// Mark pre-existing copy information as "dropped" by a file deletion
76 ///
76 ///
77 /// Use this when recording copy information from parent → child edges
77 /// Use this when recording copy information from parent → child edges
78 fn mark_delete(&mut self, rev: Revision) {
78 fn mark_delete(&mut self, rev: Revision) {
79 self.overwritten.insert(self.rev);
79 self.overwritten.insert(self.rev);
80 self.rev = rev;
80 self.rev = rev;
81 self.path = None;
81 self.path = None;
82 }
82 }
83
83
84 /// Mark pre-existing copy information as "dropped" by a file deletion
84 /// Mark pre-existing copy information as "dropped" by a file deletion
85 ///
85 ///
86 /// Use this when recording copy information from parent → child edges
86 /// Use this when recording copy information from parent → child edges
87 fn mark_delete_with_pair(&mut self, rev: Revision, other: &Self) {
87 fn mark_delete_with_pair(&mut self, rev: Revision, other: &Self) {
88 self.overwritten.insert(self.rev);
88 self.overwritten.insert(self.rev);
89 if other.rev != rev {
89 if other.rev != rev {
90 self.overwritten.insert(other.rev);
90 self.overwritten.insert(other.rev);
91 }
91 }
92 self.overwritten.extend(other.overwritten.iter().copied());
92 self.overwritten.extend(other.overwritten.iter().copied());
93 self.rev = rev;
93 self.rev = rev;
94 self.path = None;
94 self.path = None;
95 }
95 }
96
96
97 fn is_overwritten_by(&self, other: &Self) -> bool {
97 fn is_overwritten_by(&self, other: &Self) -> bool {
98 other.overwritten.contains(&self.rev)
98 other.overwritten.contains(&self.rev)
99 }
99 }
100 }
100 }
101
101
102 // For the same "dest", content generated for a given revision will always be
102 // For the same "dest", content generated for a given revision will always be
103 // the same.
103 // the same.
104 impl PartialEq for CopySource {
104 impl PartialEq for CopySource {
105 fn eq(&self, other: &Self) -> bool {
105 fn eq(&self, other: &Self) -> bool {
106 #[cfg(debug_assertions)]
106 #[cfg(debug_assertions)]
107 {
107 {
108 if self.rev == other.rev {
108 if self.rev == other.rev {
109 debug_assert!(self.path == other.path);
109 debug_assert!(self.path == other.path);
110 debug_assert!(self.overwritten == other.overwritten);
110 debug_assert!(self.overwritten == other.overwritten);
111 }
111 }
112 }
112 }
113 self.rev == other.rev
113 self.rev == other.rev
114 }
114 }
115 }
115 }
116
116
117 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
117 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
118 type InternalPathCopies = OrdMap<PathToken, CopySource>;
118 type InternalPathCopies = OrdMap<PathToken, CopySource>;
119
119
120 /// Represent active changes that affect the copy tracing.
120 /// Represent active changes that affect the copy tracing.
121 enum Action<'a> {
121 enum Action<'a> {
122 /// The parent ? children edge is removing a file
122 /// The parent ? children edge is removing a file
123 ///
123 ///
124 /// (actually, this could be the edge from the other parent, but it does
124 /// (actually, this could be the edge from the other parent, but it does
125 /// not matters)
125 /// not matters)
126 Removed(&'a HgPath),
126 Removed(&'a HgPath),
127 /// The parent ? children edge introduce copy information between (dest,
127 /// The parent ? children edge introduce copy information between (dest,
128 /// source)
128 /// source)
129 CopiedFromP1(&'a HgPath, &'a HgPath),
129 CopiedFromP1(&'a HgPath, &'a HgPath),
130 CopiedFromP2(&'a HgPath, &'a HgPath),
130 CopiedFromP2(&'a HgPath, &'a HgPath),
131 }
131 }
132
132
133 /// This express the possible "special" case we can get in a merge
133 /// This express the possible "special" case we can get in a merge
134 ///
134 ///
135 /// See mercurial/metadata.py for details on these values.
135 /// See mercurial/metadata.py for details on these values.
136 #[derive(PartialEq)]
136 #[derive(PartialEq)]
137 enum MergeCase {
137 enum MergeCase {
138 /// Merged: file had history on both side that needed to be merged
138 /// Merged: file had history on both side that needed to be merged
139 Merged,
139 Merged,
140 /// Salvaged: file was candidate for deletion, but survived the merge
140 /// Salvaged: file was candidate for deletion, but survived the merge
141 Salvaged,
141 Salvaged,
142 /// Normal: Not one of the two cases above
142 /// Normal: Not one of the two cases above
143 Normal,
143 Normal,
144 }
144 }
145
145
146 const COPY_MASK: u8 = 3;
146 const COPY_MASK: u8 = 3;
147 const P1_COPY: u8 = 2;
147 const P1_COPY: u8 = 2;
148 const P2_COPY: u8 = 3;
148 const P2_COPY: u8 = 3;
149 const ACTION_MASK: u8 = 28;
149 const ACTION_MASK: u8 = 28;
150 const REMOVED: u8 = 12;
150 const REMOVED: u8 = 12;
151 const MERGED: u8 = 8;
151 const MERGED: u8 = 8;
152 const SALVAGED: u8 = 16;
152 const SALVAGED: u8 = 16;
153
153
154 #[derive(BytesCast)]
154 #[derive(BytesCast)]
155 #[repr(C)]
155 #[repr(C)]
156 struct ChangedFilesIndexEntry {
156 struct ChangedFilesIndexEntry {
157 flags: u8,
157 flags: u8,
158
158
159 /// Only the end position is stored. The start is at the end of the
159 /// Only the end position is stored. The start is at the end of the
160 /// previous entry.
160 /// previous entry.
161 destination_path_end_position: unaligned::U32Be,
161 destination_path_end_position: unaligned::U32Be,
162
162
163 source_index_entry_position: unaligned::U32Be,
163 source_index_entry_position: unaligned::U32Be,
164 }
164 }
165
165
166 fn _static_assert_size_of() {
166 fn _static_assert_size_of() {
167 let _ = std::mem::transmute::<ChangedFilesIndexEntry, [u8; 9]>;
167 let _ = std::mem::transmute::<ChangedFilesIndexEntry, [u8; 9]>;
168 }
168 }
169
169
170 /// Represents the files affected by a changeset.
170 /// Represents the files affected by a changeset.
171 ///
171 ///
172 /// This holds a subset of `mercurial.metadata.ChangingFiles` as we do not need
172 /// This holds a subset of `mercurial.metadata.ChangingFiles` as we do not need
173 /// all the data categories tracked by it.
173 /// all the data categories tracked by it.
174 pub struct ChangedFiles<'a> {
174 pub struct ChangedFiles<'a> {
175 index: &'a [ChangedFilesIndexEntry],
175 index: &'a [ChangedFilesIndexEntry],
176 paths: &'a [u8],
176 paths: &'a [u8],
177 }
177 }
178
178
179 impl<'a> ChangedFiles<'a> {
179 impl<'a> ChangedFiles<'a> {
180 pub fn new(data: &'a [u8]) -> Self {
180 pub fn new(data: &'a [u8]) -> Self {
181 let (header, rest) = unaligned::U32Be::from_bytes(data).unwrap();
181 let (header, rest) = unaligned::U32Be::from_bytes(data).unwrap();
182 let nb_index_entries = header.get() as usize;
182 let nb_index_entries = header.get() as usize;
183 let (index, paths) =
183 let (index, paths) =
184 ChangedFilesIndexEntry::slice_from_bytes(rest, nb_index_entries)
184 ChangedFilesIndexEntry::slice_from_bytes(rest, nb_index_entries)
185 .unwrap();
185 .unwrap();
186 Self { index, paths }
186 Self { index, paths }
187 }
187 }
188
188
189 pub fn new_empty() -> Self {
189 pub fn new_empty() -> Self {
190 ChangedFiles {
190 ChangedFiles {
191 index: &[],
191 index: &[],
192 paths: &[],
192 paths: &[],
193 }
193 }
194 }
194 }
195
195
196 /// Internal function to return the filename of the entry at a given index
196 /// Internal function to return the filename of the entry at a given index
197 fn path(&self, idx: usize) -> &HgPath {
197 fn path(&self, idx: usize) -> &HgPath {
198 let start = if idx == 0 {
198 let start = if idx == 0 {
199 0
199 0
200 } else {
200 } else {
201 self.index[idx - 1].destination_path_end_position.get() as usize
201 self.index[idx - 1].destination_path_end_position.get() as usize
202 };
202 };
203 let end = self.index[idx].destination_path_end_position.get() as usize;
203 let end = self.index[idx].destination_path_end_position.get() as usize;
204 HgPath::new(&self.paths[start..end])
204 HgPath::new(&self.paths[start..end])
205 }
205 }
206
206
207 /// Return an iterator over all the `Action` in this instance.
207 /// Return an iterator over all the `Action` in this instance.
208 fn iter_actions(&self) -> impl Iterator<Item = Action> {
208 fn iter_actions(&self) -> impl Iterator<Item = Action> {
209 self.index.iter().enumerate().flat_map(move |(idx, entry)| {
209 self.index.iter().enumerate().flat_map(move |(idx, entry)| {
210 let path = self.path(idx);
210 let path = self.path(idx);
211 if (entry.flags & ACTION_MASK) == REMOVED {
211 if (entry.flags & ACTION_MASK) == REMOVED {
212 Some(Action::Removed(path))
212 Some(Action::Removed(path))
213 } else if (entry.flags & COPY_MASK) == P1_COPY {
213 } else if (entry.flags & COPY_MASK) == P1_COPY {
214 let source_idx =
214 let source_idx =
215 entry.source_index_entry_position.get() as usize;
215 entry.source_index_entry_position.get() as usize;
216 Some(Action::CopiedFromP1(path, self.path(source_idx)))
216 Some(Action::CopiedFromP1(path, self.path(source_idx)))
217 } else if (entry.flags & COPY_MASK) == P2_COPY {
217 } else if (entry.flags & COPY_MASK) == P2_COPY {
218 let source_idx =
218 let source_idx =
219 entry.source_index_entry_position.get() as usize;
219 entry.source_index_entry_position.get() as usize;
220 Some(Action::CopiedFromP2(path, self.path(source_idx)))
220 Some(Action::CopiedFromP2(path, self.path(source_idx)))
221 } else {
221 } else {
222 None
222 None
223 }
223 }
224 })
224 })
225 }
225 }
226
226
227 /// return the MergeCase value associated with a filename
227 /// return the MergeCase value associated with a filename
228 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
228 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
229 if self.index.is_empty() {
229 if self.index.is_empty() {
230 return MergeCase::Normal;
230 return MergeCase::Normal;
231 }
231 }
232 let mut low_part = 0;
232 let mut low_part = 0;
233 let mut high_part = self.index.len();
233 let mut high_part = self.index.len();
234
234
235 while low_part < high_part {
235 while low_part < high_part {
236 let cursor = (low_part + high_part - 1) / 2;
236 let cursor = (low_part + high_part - 1) / 2;
237 match path.cmp(self.path(cursor)) {
237 match path.cmp(self.path(cursor)) {
238 Ordering::Less => low_part = cursor + 1,
238 Ordering::Less => low_part = cursor + 1,
239 Ordering::Greater => high_part = cursor,
239 Ordering::Greater => high_part = cursor,
240 Ordering::Equal => {
240 Ordering::Equal => {
241 return match self.index[cursor].flags & ACTION_MASK {
241 return match self.index[cursor].flags & ACTION_MASK {
242 MERGED => MergeCase::Merged,
242 MERGED => MergeCase::Merged,
243 SALVAGED => MergeCase::Salvaged,
243 SALVAGED => MergeCase::Salvaged,
244 _ => MergeCase::Normal,
244 _ => MergeCase::Normal,
245 };
245 };
246 }
246 }
247 }
247 }
248 }
248 }
249 MergeCase::Normal
249 MergeCase::Normal
250 }
250 }
251 }
251 }
252
252
253 /// A small "tokenizer" responsible of turning full HgPath into lighter
253 /// A small "tokenizer" responsible of turning full HgPath into lighter
254 /// PathToken
254 /// PathToken
255 ///
255 ///
256 /// Dealing with small object, like integer is much faster, so HgPath input are
256 /// Dealing with small object, like integer is much faster, so HgPath input are
257 /// turned into integer "PathToken" and converted back in the end.
257 /// turned into integer "PathToken" and converted back in the end.
258 #[derive(Clone, Debug, Default)]
258 #[derive(Clone, Debug, Default)]
259 struct TwoWayPathMap {
259 struct TwoWayPathMap {
260 token: HashMap<HgPathBuf, PathToken>,
260 token: HashMap<HgPathBuf, PathToken>,
261 path: Vec<HgPathBuf>,
261 path: Vec<HgPathBuf>,
262 }
262 }
263
263
264 impl TwoWayPathMap {
264 impl TwoWayPathMap {
265 fn tokenize(&mut self, path: &HgPath) -> PathToken {
265 fn tokenize(&mut self, path: &HgPath) -> PathToken {
266 match self.token.get(path) {
266 match self.token.get(path) {
267 Some(a) => *a,
267 Some(a) => *a,
268 None => {
268 None => {
269 let a = self.token.len();
269 let a = self.token.len();
270 let buf = path.to_owned();
270 let buf = path.to_owned();
271 self.path.push(buf.clone());
271 self.path.push(buf.clone());
272 self.token.insert(buf, a);
272 self.token.insert(buf, a);
273 a
273 a
274 }
274 }
275 }
275 }
276 }
276 }
277
277
278 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
278 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
279 assert!(token < self.path.len(), "Unknown token: {}", token);
279 assert!(token < self.path.len(), "Unknown token: {}", token);
280 &self.path[token]
280 &self.path[token]
281 }
281 }
282 }
282 }
283
283
284 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
284 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
285 pub struct CombineChangesetCopies {
285 pub struct CombineChangesetCopies {
286 all_copies: HashMap<Revision, InternalPathCopies>,
286 all_copies: HashMap<Revision, InternalPathCopies>,
287 path_map: TwoWayPathMap,
287 path_map: TwoWayPathMap,
288 children_count: HashMap<Revision, usize>,
288 children_count: HashMap<Revision, usize>,
289 }
289 }
290
290
291 impl CombineChangesetCopies {
291 impl CombineChangesetCopies {
292 pub fn new(children_count: HashMap<Revision, usize>) -> Self {
292 pub fn new(children_count: HashMap<Revision, usize>) -> Self {
293 Self {
293 Self {
294 all_copies: HashMap::new(),
294 all_copies: HashMap::new(),
295 path_map: TwoWayPathMap::default(),
295 path_map: TwoWayPathMap::default(),
296 children_count,
296 children_count,
297 }
297 }
298 }
298 }
299
299
300 /// Combined the given `changes` data specific to `rev` with the data
300 /// Combined the given `changes` data specific to `rev` with the data
301 /// previously given for its parents (and transitively, its ancestors).
301 /// previously given for its parents (and transitively, its ancestors).
302 pub fn add_revision(
302 pub fn add_revision(
303 &mut self,
303 &mut self,
304 rev: Revision,
304 rev: Revision,
305 p1: Revision,
305 p1: Revision,
306 p2: Revision,
306 p2: Revision,
307 changes: ChangedFiles<'_>,
307 changes: ChangedFiles<'_>,
308 ) {
308 ) {
309 self.add_revision_inner(rev, p1, p2, changes.iter_actions(), |path| {
309 self.add_revision_inner(rev, p1, p2, changes.iter_actions(), |path| {
310 changes.get_merge_case(path)
310 changes.get_merge_case(path)
311 })
311 })
312 }
312 }
313
313
314 /// Separated out from `add_revsion` so that unit tests can call this
314 /// Separated out from `add_revsion` so that unit tests can call this
315 /// without synthetizing a `ChangedFiles` in binary format.
315 /// without synthetizing a `ChangedFiles` in binary format.
316 fn add_revision_inner<'a>(
316 fn add_revision_inner<'a>(
317 &mut self,
317 &mut self,
318 rev: Revision,
318 rev: Revision,
319 p1: Revision,
319 p1: Revision,
320 p2: Revision,
320 p2: Revision,
321 copy_actions: impl Iterator<Item = Action<'a>>,
321 copy_actions: impl Iterator<Item = Action<'a>>,
322 get_merge_case: impl Fn(&HgPath) -> MergeCase + Copy,
322 get_merge_case: impl Fn(&HgPath) -> MergeCase + Copy,
323 ) {
323 ) {
324 // Retrieve data computed in a previous iteration
324 // Retrieve data computed in a previous iteration
325 let p1_copies = match p1 {
325 let p1_copies = match p1 {
326 NULL_REVISION => None,
326 NULL_REVISION => None,
327 _ => get_and_clean_parent_copies(
327 _ => get_and_clean_parent_copies(
328 &mut self.all_copies,
328 &mut self.all_copies,
329 &mut self.children_count,
329 &mut self.children_count,
330 p1,
330 p1,
331 ), // will be None if the vertex is not to be traversed
331 ), // will be None if the vertex is not to be traversed
332 };
332 };
333 let p2_copies = match p2 {
333 let p2_copies = match p2 {
334 NULL_REVISION => None,
334 NULL_REVISION => None,
335 _ => get_and_clean_parent_copies(
335 _ => get_and_clean_parent_copies(
336 &mut self.all_copies,
336 &mut self.all_copies,
337 &mut self.children_count,
337 &mut self.children_count,
338 p2,
338 p2,
339 ), // will be None if the vertex is not to be traversed
339 ), // will be None if the vertex is not to be traversed
340 };
340 };
341 // combine it with data for that revision
341 // combine it with data for that revision
342 let (p1_copies, p2_copies) = chain_changes(
342 let (p1_copies, p2_copies) = chain_changes(
343 &mut self.path_map,
343 &mut self.path_map,
344 p1_copies,
344 p1_copies,
345 p2_copies,
345 p2_copies,
346 copy_actions,
346 copy_actions,
347 rev,
347 rev,
348 );
348 );
349 let copies = match (p1_copies, p2_copies) {
349 let copies = match (p1_copies, p2_copies) {
350 (None, None) => None,
350 (None, None) => None,
351 (c, None) => c,
351 (c, None) => c,
352 (None, c) => c,
352 (None, c) => c,
353 (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
353 (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
354 &self.path_map,
354 &self.path_map,
355 rev,
355 rev,
356 p2_copies,
356 p2_copies,
357 p1_copies,
357 p1_copies,
358 get_merge_case,
358 get_merge_case,
359 )),
359 )),
360 };
360 };
361 if let Some(c) = copies {
361 if let Some(c) = copies {
362 self.all_copies.insert(rev, c);
362 self.all_copies.insert(rev, c);
363 }
363 }
364 }
364 }
365
365
366 /// Drop intermediate data (such as which revision a copy was from) and
366 /// Drop intermediate data (such as which revision a copy was from) and
367 /// return the final mapping.
367 /// return the final mapping.
368 pub fn finish(mut self, target_rev: Revision) -> PathCopies {
368 pub fn finish(mut self, target_rev: Revision) -> PathCopies {
369 let tt_result = self
369 let tt_result = self
370 .all_copies
370 .all_copies
371 .remove(&target_rev)
371 .remove(&target_rev)
372 .expect("target revision was not processed");
372 .expect("target revision was not processed");
373 let mut result = PathCopies::default();
373 let mut result = PathCopies::default();
374 for (dest, tt_source) in tt_result {
374 for (dest, tt_source) in tt_result {
375 if let Some(path) = tt_source.path {
375 if let Some(path) = tt_source.path {
376 let path_dest = self.path_map.untokenize(dest).to_owned();
376 let path_dest = self.path_map.untokenize(dest).to_owned();
377 let path_path = self.path_map.untokenize(path).to_owned();
377 let path_path = self.path_map.untokenize(path).to_owned();
378 result.insert(path_dest, path_path);
378 result.insert(path_dest, path_path);
379 }
379 }
380 }
380 }
381 result
381 result
382 }
382 }
383 }
383 }
384
384
385 /// fetch previous computed information
385 /// fetch previous computed information
386 ///
386 ///
387 /// If no other children are expected to need this information, we drop it from
387 /// If no other children are expected to need this information, we drop it from
388 /// the cache.
388 /// the cache.
389 ///
389 ///
390 /// If parent is not part of the set we are expected to walk, return None.
390 /// If parent is not part of the set we are expected to walk, return None.
391 fn get_and_clean_parent_copies(
391 fn get_and_clean_parent_copies(
392 all_copies: &mut HashMap<Revision, InternalPathCopies>,
392 all_copies: &mut HashMap<Revision, InternalPathCopies>,
393 children_count: &mut HashMap<Revision, usize>,
393 children_count: &mut HashMap<Revision, usize>,
394 parent_rev: Revision,
394 parent_rev: Revision,
395 ) -> Option<InternalPathCopies> {
395 ) -> Option<InternalPathCopies> {
396 let count = children_count.get_mut(&parent_rev)?;
396 let count = children_count.get_mut(&parent_rev)?;
397 *count -= 1;
397 *count -= 1;
398 if *count == 0 {
398 if *count == 0 {
399 match all_copies.remove(&parent_rev) {
399 match all_copies.remove(&parent_rev) {
400 Some(c) => Some(c),
400 Some(c) => Some(c),
401 None => Some(InternalPathCopies::default()),
401 None => Some(InternalPathCopies::default()),
402 }
402 }
403 } else {
403 } else {
404 match all_copies.get(&parent_rev) {
404 match all_copies.get(&parent_rev) {
405 Some(c) => Some(c.clone()),
405 Some(c) => Some(c.clone()),
406 None => Some(InternalPathCopies::default()),
406 None => Some(InternalPathCopies::default()),
407 }
407 }
408 }
408 }
409 }
409 }
410
410
411 /// Combine ChangedFiles with some existing PathCopies information and return
411 /// Combine ChangedFiles with some existing PathCopies information and return
412 /// the result
412 /// the result
413 fn chain_changes<'a>(
413 fn chain_changes<'a>(
414 path_map: &mut TwoWayPathMap,
414 path_map: &mut TwoWayPathMap,
415 base_p1_copies: Option<InternalPathCopies>,
415 base_p1_copies: Option<InternalPathCopies>,
416 base_p2_copies: Option<InternalPathCopies>,
416 base_p2_copies: Option<InternalPathCopies>,
417 copy_actions: impl Iterator<Item = Action<'a>>,
417 copy_actions: impl Iterator<Item = Action<'a>>,
418 current_rev: Revision,
418 current_rev: Revision,
419 ) -> (Option<InternalPathCopies>, Option<InternalPathCopies>) {
419 ) -> (Option<InternalPathCopies>, Option<InternalPathCopies>) {
420 // Fast path the "nothing to do" case.
420 // Fast path the "nothing to do" case.
421 if let (None, None) = (&base_p1_copies, &base_p2_copies) {
421 if let (None, None) = (&base_p1_copies, &base_p2_copies) {
422 return (None, None);
422 return (None, None);
423 }
423 }
424
424
425 let mut p1_copies = base_p1_copies.clone();
425 let mut p1_copies = base_p1_copies.clone();
426 let mut p2_copies = base_p2_copies.clone();
426 let mut p2_copies = base_p2_copies.clone();
427 for action in copy_actions {
427 for action in copy_actions {
428 match action {
428 match action {
429 Action::CopiedFromP1(path_dest, path_source) => {
429 Action::CopiedFromP1(path_dest, path_source) => {
430 match &mut p1_copies {
430 match &mut p1_copies {
431 None => (), // This is not a vertex we should proceed.
431 None => (), // This is not a vertex we should proceed.
432 Some(copies) => add_one_copy(
432 Some(copies) => add_one_copy(
433 current_rev,
433 current_rev,
434 path_map,
434 path_map,
435 copies,
435 copies,
436 base_p1_copies.as_ref().unwrap(),
436 base_p1_copies.as_ref().unwrap(),
437 path_dest,
437 path_dest,
438 path_source,
438 path_source,
439 ),
439 ),
440 }
440 }
441 }
441 }
442 Action::CopiedFromP2(path_dest, path_source) => {
442 Action::CopiedFromP2(path_dest, path_source) => {
443 match &mut p2_copies {
443 match &mut p2_copies {
444 None => (), // This is not a vertex we should proceed.
444 None => (), // This is not a vertex we should proceed.
445 Some(copies) => add_one_copy(
445 Some(copies) => add_one_copy(
446 current_rev,
446 current_rev,
447 path_map,
447 path_map,
448 copies,
448 copies,
449 base_p2_copies.as_ref().unwrap(),
449 base_p2_copies.as_ref().unwrap(),
450 path_dest,
450 path_dest,
451 path_source,
451 path_source,
452 ),
452 ),
453 }
453 }
454 }
454 }
455 Action::Removed(deleted_path) => {
455 Action::Removed(deleted_path) => {
456 // We must drop copy information for removed file.
456 // We must drop copy information for removed file.
457 //
457 //
458 // We need to explicitly record them as dropped to
458 // We need to explicitly record them as dropped to
459 // propagate this information when merging two
459 // propagate this information when merging two
460 // InternalPathCopies object.
460 // InternalPathCopies object.
461 let deleted = path_map.tokenize(deleted_path);
461 let deleted = path_map.tokenize(deleted_path);
462
462
463 let p1_entry = match &mut p1_copies {
463 let p1_entry = match &mut p1_copies {
464 None => None,
464 None => None,
465 Some(copies) => match copies.entry(deleted) {
465 Some(copies) => match copies.entry(deleted) {
466 Entry::Occupied(e) => Some(e),
466 Entry::Occupied(e) => Some(e),
467 Entry::Vacant(_) => None,
467 Entry::Vacant(_) => None,
468 },
468 },
469 };
469 };
470 let p2_entry = match &mut p2_copies {
470 let p2_entry = match &mut p2_copies {
471 None => None,
471 None => None,
472 Some(copies) => match copies.entry(deleted) {
472 Some(copies) => match copies.entry(deleted) {
473 Entry::Occupied(e) => Some(e),
473 Entry::Occupied(e) => Some(e),
474 Entry::Vacant(_) => None,
474 Entry::Vacant(_) => None,
475 },
475 },
476 };
476 };
477
477
478 match (p1_entry, p2_entry) {
478 match (p1_entry, p2_entry) {
479 (None, None) => (),
479 (None, None) => (),
480 (Some(mut e), None) => {
480 (Some(mut e), None) => {
481 e.get_mut().mark_delete(current_rev)
481 e.get_mut().mark_delete(current_rev)
482 }
482 }
483 (None, Some(mut e)) => {
483 (None, Some(mut e)) => {
484 e.get_mut().mark_delete(current_rev)
484 e.get_mut().mark_delete(current_rev)
485 }
485 }
486 (Some(mut e1), Some(mut e2)) => {
486 (Some(mut e1), Some(mut e2)) => {
487 let cs1 = e1.get_mut();
487 let cs1 = e1.get_mut();
488 let cs2 = e2.get();
488 let cs2 = e2.get();
489 if cs1 == cs2 {
489 if cs1 == cs2 {
490 cs1.mark_delete(current_rev);
490 cs1.mark_delete(current_rev);
491 } else {
491 } else {
492 cs1.mark_delete_with_pair(current_rev, &cs2);
492 cs1.mark_delete_with_pair(current_rev, cs2);
493 }
493 }
494 e2.insert(cs1.clone());
494 e2.insert(cs1.clone());
495 }
495 }
496 }
496 }
497 }
497 }
498 }
498 }
499 }
499 }
500 (p1_copies, p2_copies)
500 (p1_copies, p2_copies)
501 }
501 }
502
502
503 // insert one new copy information in an InternalPathCopies
503 // insert one new copy information in an InternalPathCopies
504 //
504 //
505 // This deal with chaining and overwrite.
505 // This deal with chaining and overwrite.
506 fn add_one_copy(
506 fn add_one_copy(
507 current_rev: Revision,
507 current_rev: Revision,
508 path_map: &mut TwoWayPathMap,
508 path_map: &mut TwoWayPathMap,
509 copies: &mut InternalPathCopies,
509 copies: &mut InternalPathCopies,
510 base_copies: &InternalPathCopies,
510 base_copies: &InternalPathCopies,
511 path_dest: &HgPath,
511 path_dest: &HgPath,
512 path_source: &HgPath,
512 path_source: &HgPath,
513 ) {
513 ) {
514 let dest = path_map.tokenize(path_dest);
514 let dest = path_map.tokenize(path_dest);
515 let source = path_map.tokenize(path_source);
515 let source = path_map.tokenize(path_source);
516 let entry;
516 let entry = if let Some(v) = base_copies.get(&source) {
517 if let Some(v) = base_copies.get(&source) {
517 match &v.path {
518 entry = match &v.path {
519 Some(path) => Some((*(path)).to_owned()),
518 Some(path) => Some((*(path)).to_owned()),
520 None => Some(source.to_owned()),
519 None => Some(source.to_owned()),
521 }
520 }
522 } else {
521 } else {
523 entry = Some(source.to_owned());
522 Some(source.to_owned())
524 }
523 };
525 // Each new entry is introduced by the children, we
524 // Each new entry is introduced by the children, we
526 // record this information as we will need it to take
525 // record this information as we will need it to take
527 // the right decision when merging conflicting copy
526 // the right decision when merging conflicting copy
528 // information. See merge_copies_dict for details.
527 // information. See merge_copies_dict for details.
529 match copies.entry(dest) {
528 match copies.entry(dest) {
530 Entry::Vacant(slot) => {
529 Entry::Vacant(slot) => {
531 let ttpc = CopySource::new(current_rev, entry);
530 let ttpc = CopySource::new(current_rev, entry);
532 slot.insert(ttpc);
531 slot.insert(ttpc);
533 }
532 }
534 Entry::Occupied(mut slot) => {
533 Entry::Occupied(mut slot) => {
535 let ttpc = slot.get_mut();
534 let ttpc = slot.get_mut();
536 ttpc.overwrite(current_rev, entry);
535 ttpc.overwrite(current_rev, entry);
537 }
536 }
538 }
537 }
539 }
538 }
540
539
541 /// merge two copies-mapping together, minor and major
540 /// merge two copies-mapping together, minor and major
542 ///
541 ///
543 /// In case of conflict, value from "major" will be picked, unless in some
542 /// In case of conflict, value from "major" will be picked, unless in some
544 /// cases. See inline documentation for details.
543 /// cases. See inline documentation for details.
545 fn merge_copies_dict(
544 fn merge_copies_dict(
546 path_map: &TwoWayPathMap,
545 path_map: &TwoWayPathMap,
547 current_merge: Revision,
546 current_merge: Revision,
548 minor: InternalPathCopies,
547 minor: InternalPathCopies,
549 major: InternalPathCopies,
548 major: InternalPathCopies,
550 get_merge_case: impl Fn(&HgPath) -> MergeCase + Copy,
549 get_merge_case: impl Fn(&HgPath) -> MergeCase + Copy,
551 ) -> InternalPathCopies {
550 ) -> InternalPathCopies {
552 use crate::utils::{ordmap_union_with_merge, MergeResult};
551 use crate::utils::{ordmap_union_with_merge, MergeResult};
553
552
554 ordmap_union_with_merge(minor, major, |&dest, src_minor, src_major| {
553 ordmap_union_with_merge(minor, major, |&dest, src_minor, src_major| {
555 let (pick, overwrite) = compare_value(
554 let (pick, overwrite) = compare_value(
556 current_merge,
555 current_merge,
557 || get_merge_case(path_map.untokenize(dest)),
556 || get_merge_case(path_map.untokenize(dest)),
558 src_minor,
557 src_minor,
559 src_major,
558 src_major,
560 );
559 );
561 if overwrite {
560 if overwrite {
562 let (winner, loser) = match pick {
561 let (winner, loser) = match pick {
563 MergePick::Major | MergePick::Any => (src_major, src_minor),
562 MergePick::Major | MergePick::Any => (src_major, src_minor),
564 MergePick::Minor => (src_minor, src_major),
563 MergePick::Minor => (src_minor, src_major),
565 };
564 };
566 MergeResult::UseNewValue(CopySource::new_from_merge(
565 MergeResult::NewValue(CopySource::new_from_merge(
567 current_merge,
566 current_merge,
568 winner,
567 winner,
569 loser,
568 loser,
570 ))
569 ))
571 } else {
570 } else {
572 match pick {
571 match pick {
573 MergePick::Any | MergePick::Major => {
572 MergePick::Any | MergePick::Major => MergeResult::RightValue,
574 MergeResult::UseRightValue
573 MergePick::Minor => MergeResult::LeftValue,
575 }
576 MergePick::Minor => MergeResult::UseLeftValue,
577 }
574 }
578 }
575 }
579 })
576 })
580 }
577 }
581
578
582 /// represent the side that should prevail when merging two
579 /// represent the side that should prevail when merging two
583 /// InternalPathCopies
580 /// InternalPathCopies
584 #[derive(Debug, PartialEq)]
581 #[derive(Debug, PartialEq)]
585 enum MergePick {
582 enum MergePick {
586 /// The "major" (p1) side prevails
583 /// The "major" (p1) side prevails
587 Major,
584 Major,
588 /// The "minor" (p2) side prevails
585 /// The "minor" (p2) side prevails
589 Minor,
586 Minor,
590 /// Any side could be used (because they are the same)
587 /// Any side could be used (because they are the same)
591 Any,
588 Any,
592 }
589 }
593
590
594 /// decide which side prevails in case of conflicting values
591 /// decide which side prevails in case of conflicting values
595 #[allow(clippy::if_same_then_else)]
592 #[allow(clippy::if_same_then_else)]
596 fn compare_value(
593 fn compare_value(
597 current_merge: Revision,
594 current_merge: Revision,
598 merge_case_for_dest: impl Fn() -> MergeCase,
595 merge_case_for_dest: impl Fn() -> MergeCase,
599 src_minor: &CopySource,
596 src_minor: &CopySource,
600 src_major: &CopySource,
597 src_major: &CopySource,
601 ) -> (MergePick, bool) {
598 ) -> (MergePick, bool) {
602 if src_major == src_minor {
599 if src_major == src_minor {
603 (MergePick::Any, false)
600 (MergePick::Any, false)
604 } else if src_major.rev == current_merge {
601 } else if src_major.rev == current_merge {
605 // minor is different according to per minor == major check earlier
602 // minor is different according to per minor == major check earlier
606 debug_assert!(src_minor.rev != current_merge);
603 debug_assert!(src_minor.rev != current_merge);
607
604
608 // The last value comes the current merge, this value -will- win
605 // The last value comes the current merge, this value -will- win
609 // eventually.
606 // eventually.
610 (MergePick::Major, true)
607 (MergePick::Major, true)
611 } else if src_minor.rev == current_merge {
608 } else if src_minor.rev == current_merge {
612 // The last value comes the current merge, this value -will- win
609 // The last value comes the current merge, this value -will- win
613 // eventually.
610 // eventually.
614 (MergePick::Minor, true)
611 (MergePick::Minor, true)
615 } else if src_major.path == src_minor.path {
612 } else if src_major.path == src_minor.path {
616 debug_assert!(src_major.rev != src_minor.rev);
613 debug_assert!(src_major.rev != src_minor.rev);
617 // we have the same value, but from other source;
614 // we have the same value, but from other source;
618 if src_major.is_overwritten_by(src_minor) {
615 if src_major.is_overwritten_by(src_minor) {
619 (MergePick::Minor, false)
616 (MergePick::Minor, false)
620 } else if src_minor.is_overwritten_by(src_major) {
617 } else if src_minor.is_overwritten_by(src_major) {
621 (MergePick::Major, false)
618 (MergePick::Major, false)
622 } else {
619 } else {
623 (MergePick::Any, true)
620 (MergePick::Any, true)
624 }
621 }
625 } else {
622 } else {
626 debug_assert!(src_major.rev != src_minor.rev);
623 debug_assert!(src_major.rev != src_minor.rev);
627 let action = merge_case_for_dest();
624 let action = merge_case_for_dest();
628 if src_minor.path.is_some()
625 if src_minor.path.is_some()
629 && src_major.path.is_none()
626 && src_major.path.is_none()
630 && action == MergeCase::Salvaged
627 && action == MergeCase::Salvaged
631 {
628 {
632 // If the file is "deleted" in the major side but was
629 // If the file is "deleted" in the major side but was
633 // salvaged by the merge, we keep the minor side alive
630 // salvaged by the merge, we keep the minor side alive
634 (MergePick::Minor, true)
631 (MergePick::Minor, true)
635 } else if src_major.path.is_some()
632 } else if src_major.path.is_some()
636 && src_minor.path.is_none()
633 && src_minor.path.is_none()
637 && action == MergeCase::Salvaged
634 && action == MergeCase::Salvaged
638 {
635 {
639 // If the file is "deleted" in the minor side but was
636 // If the file is "deleted" in the minor side but was
640 // salvaged by the merge, unconditionnaly preserve the
637 // salvaged by the merge, unconditionnaly preserve the
641 // major side.
638 // major side.
642 (MergePick::Major, true)
639 (MergePick::Major, true)
643 } else if src_minor.is_overwritten_by(src_major) {
640 } else if src_minor.is_overwritten_by(src_major) {
644 // The information from the minor version are strictly older than
641 // The information from the minor version are strictly older than
645 // the major version
642 // the major version
646 if action == MergeCase::Merged {
643 if action == MergeCase::Merged {
647 // If the file was actively merged, its means some non-copy
644 // If the file was actively merged, its means some non-copy
648 // activity happened on the other branch. It
645 // activity happened on the other branch. It
649 // mean the older copy information are still relevant.
646 // mean the older copy information are still relevant.
650 //
647 //
651 // The major side wins such conflict.
648 // The major side wins such conflict.
652 (MergePick::Major, true)
649 (MergePick::Major, true)
653 } else {
650 } else {
654 // No activity on the minor branch, pick the newer one.
651 // No activity on the minor branch, pick the newer one.
655 (MergePick::Major, false)
652 (MergePick::Major, false)
656 }
653 }
657 } else if src_major.is_overwritten_by(src_minor) {
654 } else if src_major.is_overwritten_by(src_minor) {
658 if action == MergeCase::Merged {
655 if action == MergeCase::Merged {
659 // If the file was actively merged, its means some non-copy
656 // If the file was actively merged, its means some non-copy
660 // activity happened on the other branch. It
657 // activity happened on the other branch. It
661 // mean the older copy information are still relevant.
658 // mean the older copy information are still relevant.
662 //
659 //
663 // The major side wins such conflict.
660 // The major side wins such conflict.
664 (MergePick::Major, true)
661 (MergePick::Major, true)
665 } else {
662 } else {
666 // No activity on the minor branch, pick the newer one.
663 // No activity on the minor branch, pick the newer one.
667 (MergePick::Minor, false)
664 (MergePick::Minor, false)
668 }
665 }
669 } else if src_minor.path.is_none() {
666 } else if src_minor.path.is_none() {
670 // the minor side has no relevant information, pick the alive one
667 // the minor side has no relevant information, pick the alive one
671 (MergePick::Major, true)
668 (MergePick::Major, true)
672 } else if src_major.path.is_none() {
669 } else if src_major.path.is_none() {
673 // the major side has no relevant information, pick the alive one
670 // the major side has no relevant information, pick the alive one
674 (MergePick::Minor, true)
671 (MergePick::Minor, true)
675 } else {
672 } else {
676 // by default the major side wins
673 // by default the major side wins
677 (MergePick::Major, true)
674 (MergePick::Major, true)
678 }
675 }
679 }
676 }
680 }
677 }
@@ -1,276 +1,276 b''
1 // dagops.rs
1 // dagops.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Miscellaneous DAG operations
8 //! Miscellaneous DAG operations
9 //!
9 //!
10 //! # Terminology
10 //! # Terminology
11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
12 //! mean those revisions that have no children among the collection.
12 //! mean those revisions that have no children among the collection.
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
14 //! whose parents, if any, don't belong to the collection.
14 //! whose parents, if any, don't belong to the collection.
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
16 use crate::ancestors::AncestorsIterator;
16 use crate::ancestors::AncestorsIterator;
17 use std::collections::{BTreeSet, HashSet};
17 use std::collections::{BTreeSet, HashSet};
18
18
19 fn remove_parents<S: std::hash::BuildHasher>(
19 fn remove_parents<S: std::hash::BuildHasher>(
20 graph: &impl Graph,
20 graph: &impl Graph,
21 rev: Revision,
21 rev: Revision,
22 set: &mut HashSet<Revision, S>,
22 set: &mut HashSet<Revision, S>,
23 ) -> Result<(), GraphError> {
23 ) -> Result<(), GraphError> {
24 for parent in graph.parents(rev)?.iter() {
24 for parent in graph.parents(rev)?.iter() {
25 if *parent != NULL_REVISION {
25 if *parent != NULL_REVISION {
26 set.remove(parent);
26 set.remove(parent);
27 }
27 }
28 }
28 }
29 Ok(())
29 Ok(())
30 }
30 }
31
31
32 /// Relative heads out of some revisions, passed as an iterator.
32 /// Relative heads out of some revisions, passed as an iterator.
33 ///
33 ///
34 /// These heads are defined as those revisions that have no children
34 /// These heads are defined as those revisions that have no children
35 /// among those emitted by the iterator.
35 /// among those emitted by the iterator.
36 ///
36 ///
37 /// # Performance notes
37 /// # Performance notes
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
39 ///
39 ///
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
41 /// guarantee that cloning the iterator doesn't result in cloning the full
41 /// guarantee that cloning the iterator doesn't result in cloning the full
42 /// construct it comes from.
42 /// construct it comes from.
43 pub fn heads<'a>(
43 pub fn heads<'a>(
44 graph: &impl Graph,
44 graph: &impl Graph,
45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
46 ) -> Result<HashSet<Revision>, GraphError> {
46 ) -> Result<HashSet<Revision>, GraphError> {
47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
48 heads.remove(&NULL_REVISION);
48 heads.remove(&NULL_REVISION);
49 for rev in iter_revs {
49 for rev in iter_revs {
50 if *rev != NULL_REVISION {
50 if *rev != NULL_REVISION {
51 remove_parents(graph, *rev, &mut heads)?;
51 remove_parents(graph, *rev, &mut heads)?;
52 }
52 }
53 }
53 }
54 Ok(heads)
54 Ok(heads)
55 }
55 }
56
56
57 /// Retain in `revs` only its relative heads.
57 /// Retain in `revs` only its relative heads.
58 ///
58 ///
59 /// This is an in-place operation, so that control of the incoming
59 /// This is an in-place operation, so that control of the incoming
60 /// set is left to the caller.
60 /// set is left to the caller.
61 /// - a direct Python binding would probably need to build its own `HashSet`
61 /// - a direct Python binding would probably need to build its own `HashSet`
62 /// from an incoming iterable, even if its sole purpose is to extract the
62 /// from an incoming iterable, even if its sole purpose is to extract the
63 /// heads.
63 /// heads.
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
65 ///
65 ///
66 /// # Performance notes
66 /// # Performance notes
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
68 pub fn retain_heads<S: std::hash::BuildHasher>(
68 pub fn retain_heads<S: std::hash::BuildHasher>(
69 graph: &impl Graph,
69 graph: &impl Graph,
70 revs: &mut HashSet<Revision, S>,
70 revs: &mut HashSet<Revision, S>,
71 ) -> Result<(), GraphError> {
71 ) -> Result<(), GraphError> {
72 revs.remove(&NULL_REVISION);
72 revs.remove(&NULL_REVISION);
73 // we need to construct an iterable copy of revs to avoid itering while
73 // we need to construct an iterable copy of revs to avoid itering while
74 // mutating
74 // mutating
75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
76 for rev in as_vec {
76 for rev in as_vec {
77 if rev != NULL_REVISION {
77 if rev != NULL_REVISION {
78 remove_parents(graph, rev, revs)?;
78 remove_parents(graph, rev, revs)?;
79 }
79 }
80 }
80 }
81 Ok(())
81 Ok(())
82 }
82 }
83
83
84 /// Roots of `revs`, passed as a `HashSet`
84 /// Roots of `revs`, passed as a `HashSet`
85 ///
85 ///
86 /// They are returned in arbitrary order
86 /// They are returned in arbitrary order
87 pub fn roots<G: Graph, S: std::hash::BuildHasher>(
87 pub fn roots<G: Graph, S: std::hash::BuildHasher>(
88 graph: &G,
88 graph: &G,
89 revs: &HashSet<Revision, S>,
89 revs: &HashSet<Revision, S>,
90 ) -> Result<Vec<Revision>, GraphError> {
90 ) -> Result<Vec<Revision>, GraphError> {
91 let mut roots: Vec<Revision> = Vec::new();
91 let mut roots: Vec<Revision> = Vec::new();
92 for rev in revs {
92 for rev in revs {
93 if graph
93 if graph
94 .parents(*rev)?
94 .parents(*rev)?
95 .iter()
95 .iter()
96 .filter(|p| **p != NULL_REVISION)
96 .filter(|p| **p != NULL_REVISION)
97 .all(|p| !revs.contains(p))
97 .all(|p| !revs.contains(p))
98 {
98 {
99 roots.push(*rev);
99 roots.push(*rev);
100 }
100 }
101 }
101 }
102 Ok(roots)
102 Ok(roots)
103 }
103 }
104
104
105 /// Compute the topological range between two collections of revisions
105 /// Compute the topological range between two collections of revisions
106 ///
106 ///
107 /// This is equivalent to the revset `<roots>::<heads>`.
107 /// This is equivalent to the revset `<roots>::<heads>`.
108 ///
108 ///
109 /// Currently, the given `Graph` has to implement `Clone`, which means
109 /// Currently, the given `Graph` has to implement `Clone`, which means
110 /// actually cloning just a reference-counted Python pointer if
110 /// actually cloning just a reference-counted Python pointer if
111 /// it's passed over through `rust-cpython`. This is due to the internal
111 /// it's passed over through `rust-cpython`. This is due to the internal
112 /// use of `AncestorsIterator`
112 /// use of `AncestorsIterator`
113 ///
113 ///
114 /// # Algorithmic details
114 /// # Algorithmic details
115 ///
115 ///
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
117 /// `mercurial.cext.parsers` does to obtain the same results.
117 /// `mercurial.cext.parsers` does to obtain the same results.
118 ///
118 ///
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
120 /// them in the vector `heads_ancestors` vector, and adding any element of
120 /// them in the vector `heads_ancestors` vector, and adding any element of
121 /// `roots` we find among them to the resulting range.
121 /// `roots` we find among them to the resulting range.
122 /// - Then, we iterate on that recorded vector so that a revision is always
122 /// - Then, we iterate on that recorded vector so that a revision is always
123 /// emitted after its parents and add all revisions whose parents are already
123 /// emitted after its parents and add all revisions whose parents are already
124 /// in the range to the results.
124 /// in the range to the results.
125 ///
125 ///
126 /// # Performance notes
126 /// # Performance notes
127 ///
127 ///
128 /// The main difference with the C implementation is that
128 /// The main difference with the C implementation is that
129 /// the latter uses a flat array with bit flags, instead of complex structures
129 /// the latter uses a flat array with bit flags, instead of complex structures
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
131 /// possible that the present implementation could be more memory efficient
131 /// possible that the present implementation could be more memory efficient
132 /// for very large repositories with many branches.
132 /// for very large repositories with many branches.
133 pub fn range(
133 pub fn range(
134 graph: &(impl Graph + Clone),
134 graph: &(impl Graph + Clone),
135 roots: impl IntoIterator<Item = Revision>,
135 roots: impl IntoIterator<Item = Revision>,
136 heads: impl IntoIterator<Item = Revision>,
136 heads: impl IntoIterator<Item = Revision>,
137 ) -> Result<BTreeSet<Revision>, GraphError> {
137 ) -> Result<BTreeSet<Revision>, GraphError> {
138 let mut range = BTreeSet::new();
138 let mut range = BTreeSet::new();
139 let roots: HashSet<Revision> = roots.into_iter().collect();
139 let roots: HashSet<Revision> = roots.into_iter().collect();
140 let min_root: Revision = match roots.iter().cloned().min() {
140 let min_root: Revision = match roots.iter().cloned().min() {
141 None => {
141 None => {
142 return Ok(range);
142 return Ok(range);
143 }
143 }
144 Some(r) => r,
144 Some(r) => r,
145 };
145 };
146
146
147 // Internally, AncestorsIterator currently maintains a `HashSet`
147 // Internally, AncestorsIterator currently maintains a `HashSet`
148 // of all seen revision, which is also what we record, albeit in an ordered
148 // of all seen revision, which is also what we record, albeit in an ordered
149 // way. There's room for improvement on this duplication.
149 // way. There's room for improvement on this duplication.
150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
151 let mut heads_ancestors: Vec<Revision> = Vec::new();
151 let mut heads_ancestors: Vec<Revision> = Vec::new();
152 for revres in ait {
152 for revres in ait {
153 let rev = revres?;
153 let rev = revres?;
154 if roots.contains(&rev) {
154 if roots.contains(&rev) {
155 range.insert(rev);
155 range.insert(rev);
156 }
156 }
157 heads_ancestors.push(rev);
157 heads_ancestors.push(rev);
158 }
158 }
159
159
160 for rev in heads_ancestors.into_iter().rev() {
160 for rev in heads_ancestors.into_iter().rev() {
161 for parent in graph.parents(rev)?.iter() {
161 for parent in graph.parents(rev)?.iter() {
162 if *parent != NULL_REVISION && range.contains(parent) {
162 if *parent != NULL_REVISION && range.contains(parent) {
163 range.insert(rev);
163 range.insert(rev);
164 }
164 }
165 }
165 }
166 }
166 }
167 Ok(range)
167 Ok(range)
168 }
168 }
169
169
170 #[cfg(test)]
170 #[cfg(test)]
171 mod tests {
171 mod tests {
172
172
173 use super::*;
173 use super::*;
174 use crate::testing::SampleGraph;
174 use crate::testing::SampleGraph;
175
175
176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
177 fn retain_heads_sorted(
177 fn retain_heads_sorted(
178 graph: &impl Graph,
178 graph: &impl Graph,
179 revs: &[Revision],
179 revs: &[Revision],
180 ) -> Result<Vec<Revision>, GraphError> {
180 ) -> Result<Vec<Revision>, GraphError> {
181 let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
181 let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
182 retain_heads(graph, &mut revs)?;
182 retain_heads(graph, &mut revs)?;
183 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
183 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
184 as_vec.sort();
184 as_vec.sort_unstable();
185 Ok(as_vec)
185 Ok(as_vec)
186 }
186 }
187
187
188 #[test]
188 #[test]
189 fn test_retain_heads() -> Result<(), GraphError> {
189 fn test_retain_heads() -> Result<(), GraphError> {
190 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
190 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
191 assert_eq!(
191 assert_eq!(
192 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
192 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
193 vec![1, 6, 12]
193 vec![1, 6, 12]
194 );
194 );
195 assert_eq!(
195 assert_eq!(
196 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
196 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
197 vec![3, 5, 8, 9]
197 vec![3, 5, 8, 9]
198 );
198 );
199 Ok(())
199 Ok(())
200 }
200 }
201
201
202 /// Apply `heads()` to the given slice and return as a sorted `Vec`
202 /// Apply `heads()` to the given slice and return as a sorted `Vec`
203 fn heads_sorted(
203 fn heads_sorted(
204 graph: &impl Graph,
204 graph: &impl Graph,
205 revs: &[Revision],
205 revs: &[Revision],
206 ) -> Result<Vec<Revision>, GraphError> {
206 ) -> Result<Vec<Revision>, GraphError> {
207 let heads = heads(graph, revs.iter())?;
207 let heads = heads(graph, revs.iter())?;
208 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
208 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
209 as_vec.sort();
209 as_vec.sort_unstable();
210 Ok(as_vec)
210 Ok(as_vec)
211 }
211 }
212
212
213 #[test]
213 #[test]
214 fn test_heads() -> Result<(), GraphError> {
214 fn test_heads() -> Result<(), GraphError> {
215 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
215 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
216 assert_eq!(
216 assert_eq!(
217 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
217 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
218 vec![1, 6, 12]
218 vec![1, 6, 12]
219 );
219 );
220 assert_eq!(
220 assert_eq!(
221 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
221 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
222 vec![3, 5, 8, 9]
222 vec![3, 5, 8, 9]
223 );
223 );
224 Ok(())
224 Ok(())
225 }
225 }
226
226
227 /// Apply `roots()` and sort the result for easier comparison
227 /// Apply `roots()` and sort the result for easier comparison
228 fn roots_sorted(
228 fn roots_sorted(
229 graph: &impl Graph,
229 graph: &impl Graph,
230 revs: &[Revision],
230 revs: &[Revision],
231 ) -> Result<Vec<Revision>, GraphError> {
231 ) -> Result<Vec<Revision>, GraphError> {
232 let set: HashSet<_> = revs.iter().cloned().collect();
232 let set: HashSet<_> = revs.iter().cloned().collect();
233 let mut as_vec = roots(graph, &set)?;
233 let mut as_vec = roots(graph, &set)?;
234 as_vec.sort();
234 as_vec.sort_unstable();
235 Ok(as_vec)
235 Ok(as_vec)
236 }
236 }
237
237
238 #[test]
238 #[test]
239 fn test_roots() -> Result<(), GraphError> {
239 fn test_roots() -> Result<(), GraphError> {
240 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
240 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
241 assert_eq!(
241 assert_eq!(
242 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
242 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
243 vec![0, 4, 12]
243 vec![0, 4, 12]
244 );
244 );
245 assert_eq!(
245 assert_eq!(
246 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
246 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
247 vec![1, 8]
247 vec![1, 8]
248 );
248 );
249 Ok(())
249 Ok(())
250 }
250 }
251
251
252 /// Apply `range()` and convert the result into a Vec for easier comparison
252 /// Apply `range()` and convert the result into a Vec for easier comparison
253 fn range_vec(
253 fn range_vec(
254 graph: impl Graph + Clone,
254 graph: impl Graph + Clone,
255 roots: &[Revision],
255 roots: &[Revision],
256 heads: &[Revision],
256 heads: &[Revision],
257 ) -> Result<Vec<Revision>, GraphError> {
257 ) -> Result<Vec<Revision>, GraphError> {
258 range(&graph, roots.iter().cloned(), heads.iter().cloned())
258 range(&graph, roots.iter().cloned(), heads.iter().cloned())
259 .map(|bs| bs.into_iter().collect())
259 .map(|bs| bs.into_iter().collect())
260 }
260 }
261
261
262 #[test]
262 #[test]
263 fn test_range() -> Result<(), GraphError> {
263 fn test_range() -> Result<(), GraphError> {
264 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
264 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
265 assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
265 assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
266 assert_eq!(
266 assert_eq!(
267 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
267 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
268 vec![5, 10]
268 vec![5, 10]
269 );
269 );
270 assert_eq!(
270 assert_eq!(
271 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
271 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
272 vec![5, 6, 9, 10, 12]
272 vec![5, 6, 9, 10, 12]
273 );
273 );
274 Ok(())
274 Ok(())
275 }
275 }
276 }
276 }
@@ -1,50 +1,50 b''
1 // dirstate module
1 // dirstate module
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 use crate::revlog::node::NULL_NODE;
9 use crate::revlog::node::NULL_NODE;
10 use crate::revlog::Node;
10 use crate::revlog::Node;
11 use crate::utils::hg_path::HgPath;
11 use crate::utils::hg_path::HgPath;
12 use bytes_cast::BytesCast;
12 use bytes_cast::BytesCast;
13
13
14 pub mod dirs_multiset;
14 pub mod dirs_multiset;
15 pub mod entry;
15 pub mod entry;
16 pub mod parsers;
16 pub mod parsers;
17 pub mod status;
17 pub mod status;
18
18
19 pub use self::entry::*;
19 pub use self::entry::*;
20
20
21 #[derive(Debug, PartialEq, Copy, Clone, BytesCast)]
21 #[derive(Debug, PartialEq, Copy, Clone, BytesCast)]
22 #[repr(C)]
22 #[repr(C)]
23 pub struct DirstateParents {
23 pub struct DirstateParents {
24 pub p1: Node,
24 pub p1: Node,
25 pub p2: Node,
25 pub p2: Node,
26 }
26 }
27
27
28 impl DirstateParents {
28 impl DirstateParents {
29 pub const NULL: Self = Self {
29 pub const NULL: Self = Self {
30 p1: NULL_NODE,
30 p1: NULL_NODE,
31 p2: NULL_NODE,
31 p2: NULL_NODE,
32 };
32 };
33
33
34 pub fn is_merge(&self) -> bool {
34 pub fn is_merge(&self) -> bool {
35 return !(self.p2 == NULL_NODE);
35 !(self.p2 == NULL_NODE)
36 }
36 }
37 }
37 }
38
38
39 pub type StateMapIter<'a> = Box<
39 pub type StateMapIter<'a> = Box<
40 dyn Iterator<
40 dyn Iterator<
41 Item = Result<(&'a HgPath, DirstateEntry), DirstateV2ParseError>,
41 Item = Result<(&'a HgPath, DirstateEntry), DirstateV2ParseError>,
42 > + Send
42 > + Send
43 + 'a,
43 + 'a,
44 >;
44 >;
45
45
46 pub type CopyMapIter<'a> = Box<
46 pub type CopyMapIter<'a> = Box<
47 dyn Iterator<Item = Result<(&'a HgPath, &'a HgPath), DirstateV2ParseError>>
47 dyn Iterator<Item = Result<(&'a HgPath, &'a HgPath), DirstateV2ParseError>>
48 + Send
48 + Send
49 + 'a,
49 + 'a,
50 >;
50 >;
@@ -1,418 +1,418 b''
1 // dirs_multiset.rs
1 // dirs_multiset.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! A multiset of directory names.
8 //! A multiset of directory names.
9 //!
9 //!
10 //! Used to counts the references to directories in a manifest or dirstate.
10 //! Used to counts the references to directories in a manifest or dirstate.
11 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
11 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
12 use crate::{
12 use crate::{
13 utils::{
13 utils::{
14 files,
14 files,
15 hg_path::{HgPath, HgPathBuf, HgPathError},
15 hg_path::{HgPath, HgPathBuf, HgPathError},
16 },
16 },
17 DirstateEntry, DirstateError, DirstateMapError, FastHashMap,
17 DirstateEntry, DirstateError, DirstateMapError, FastHashMap,
18 };
18 };
19 use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
19 use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
20
20
21 // could be encapsulated if we care API stability more seriously
21 // could be encapsulated if we care API stability more seriously
22 pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
22 pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
23
23
24 #[derive(PartialEq, Debug)]
24 #[derive(PartialEq, Debug)]
25 pub struct DirsMultiset {
25 pub struct DirsMultiset {
26 inner: FastHashMap<HgPathBuf, u32>,
26 inner: FastHashMap<HgPathBuf, u32>,
27 }
27 }
28
28
29 impl DirsMultiset {
29 impl DirsMultiset {
30 /// Initializes the multiset from a dirstate.
30 /// Initializes the multiset from a dirstate.
31 ///
31 ///
32 /// If `skip_state` is provided, skips dirstate entries with equal state.
32 /// If `skip_state` is provided, skips dirstate entries with equal state.
33 pub fn from_dirstate<I, P>(
33 pub fn from_dirstate<I, P>(
34 dirstate: I,
34 dirstate: I,
35 only_tracked: bool,
35 only_tracked: bool,
36 ) -> Result<Self, DirstateError>
36 ) -> Result<Self, DirstateError>
37 where
37 where
38 I: IntoIterator<
38 I: IntoIterator<
39 Item = Result<(P, DirstateEntry), DirstateV2ParseError>,
39 Item = Result<(P, DirstateEntry), DirstateV2ParseError>,
40 >,
40 >,
41 P: AsRef<HgPath>,
41 P: AsRef<HgPath>,
42 {
42 {
43 let mut multiset = DirsMultiset {
43 let mut multiset = DirsMultiset {
44 inner: FastHashMap::default(),
44 inner: FastHashMap::default(),
45 };
45 };
46 for item in dirstate {
46 for item in dirstate {
47 let (filename, entry) = item?;
47 let (filename, entry) = item?;
48 let filename = filename.as_ref();
48 let filename = filename.as_ref();
49 // This `if` is optimized out of the loop
49 // This `if` is optimized out of the loop
50 if only_tracked {
50 if only_tracked {
51 if !entry.removed() {
51 if !entry.removed() {
52 multiset.add_path(filename)?;
52 multiset.add_path(filename)?;
53 }
53 }
54 } else {
54 } else {
55 multiset.add_path(filename)?;
55 multiset.add_path(filename)?;
56 }
56 }
57 }
57 }
58
58
59 Ok(multiset)
59 Ok(multiset)
60 }
60 }
61
61
62 /// Initializes the multiset from a manifest.
62 /// Initializes the multiset from a manifest.
63 pub fn from_manifest(
63 pub fn from_manifest(
64 manifest: &[impl AsRef<HgPath>],
64 manifest: &[impl AsRef<HgPath>],
65 ) -> Result<Self, DirstateMapError> {
65 ) -> Result<Self, DirstateMapError> {
66 let mut multiset = DirsMultiset {
66 let mut multiset = DirsMultiset {
67 inner: FastHashMap::default(),
67 inner: FastHashMap::default(),
68 };
68 };
69
69
70 for filename in manifest {
70 for filename in manifest {
71 multiset.add_path(filename.as_ref())?;
71 multiset.add_path(filename.as_ref())?;
72 }
72 }
73
73
74 Ok(multiset)
74 Ok(multiset)
75 }
75 }
76
76
77 /// Increases the count of deepest directory contained in the path.
77 /// Increases the count of deepest directory contained in the path.
78 ///
78 ///
79 /// If the directory is not yet in the map, adds its parents.
79 /// If the directory is not yet in the map, adds its parents.
80 pub fn add_path(
80 pub fn add_path(
81 &mut self,
81 &mut self,
82 path: impl AsRef<HgPath>,
82 path: impl AsRef<HgPath>,
83 ) -> Result<(), DirstateMapError> {
83 ) -> Result<(), DirstateMapError> {
84 for subpath in files::find_dirs(path.as_ref()) {
84 for subpath in files::find_dirs(path.as_ref()) {
85 if subpath.as_bytes().last() == Some(&b'/') {
85 if subpath.as_bytes().last() == Some(&b'/') {
86 // TODO Remove this once PathAuditor is certified
86 // TODO Remove this once PathAuditor is certified
87 // as the only entrypoint for path data
87 // as the only entrypoint for path data
88 let second_slash_index = subpath.len() - 1;
88 let second_slash_index = subpath.len() - 1;
89
89
90 return Err(DirstateMapError::InvalidPath(
90 return Err(DirstateMapError::InvalidPath(
91 HgPathError::ConsecutiveSlashes {
91 HgPathError::ConsecutiveSlashes {
92 bytes: path.as_ref().as_bytes().to_owned(),
92 bytes: path.as_ref().as_bytes().to_owned(),
93 second_slash_index,
93 second_slash_index,
94 },
94 },
95 ));
95 ));
96 }
96 }
97 if let Some(val) = self.inner.get_mut(subpath) {
97 if let Some(val) = self.inner.get_mut(subpath) {
98 *val += 1;
98 *val += 1;
99 break;
99 break;
100 }
100 }
101 self.inner.insert(subpath.to_owned(), 1);
101 self.inner.insert(subpath.to_owned(), 1);
102 }
102 }
103 Ok(())
103 Ok(())
104 }
104 }
105
105
106 /// Decreases the count of deepest directory contained in the path.
106 /// Decreases the count of deepest directory contained in the path.
107 ///
107 ///
108 /// If it is the only reference, decreases all parents until one is
108 /// If it is the only reference, decreases all parents until one is
109 /// removed.
109 /// removed.
110 /// If the directory is not in the map, something horrible has happened.
110 /// If the directory is not in the map, something horrible has happened.
111 pub fn delete_path(
111 pub fn delete_path(
112 &mut self,
112 &mut self,
113 path: impl AsRef<HgPath>,
113 path: impl AsRef<HgPath>,
114 ) -> Result<(), DirstateMapError> {
114 ) -> Result<(), DirstateMapError> {
115 for subpath in files::find_dirs(path.as_ref()) {
115 for subpath in files::find_dirs(path.as_ref()) {
116 match self.inner.entry(subpath.to_owned()) {
116 match self.inner.entry(subpath.to_owned()) {
117 Entry::Occupied(mut entry) => {
117 Entry::Occupied(mut entry) => {
118 let val = *entry.get();
118 let val = *entry.get();
119 if val > 1 {
119 if val > 1 {
120 entry.insert(val - 1);
120 entry.insert(val - 1);
121 break;
121 break;
122 }
122 }
123 entry.remove();
123 entry.remove();
124 }
124 }
125 Entry::Vacant(_) => {
125 Entry::Vacant(_) => {
126 return Err(DirstateMapError::PathNotFound(
126 return Err(DirstateMapError::PathNotFound(
127 path.as_ref().to_owned(),
127 path.as_ref().to_owned(),
128 ))
128 ))
129 }
129 }
130 };
130 };
131 }
131 }
132
132
133 Ok(())
133 Ok(())
134 }
134 }
135
135
136 pub fn contains(&self, key: impl AsRef<HgPath>) -> bool {
136 pub fn contains(&self, key: impl AsRef<HgPath>) -> bool {
137 self.inner.contains_key(key.as_ref())
137 self.inner.contains_key(key.as_ref())
138 }
138 }
139
139
140 pub fn iter(&self) -> DirsMultisetIter {
140 pub fn iter(&self) -> DirsMultisetIter {
141 self.inner.keys()
141 self.inner.keys()
142 }
142 }
143
143
144 pub fn len(&self) -> usize {
144 pub fn len(&self) -> usize {
145 self.inner.len()
145 self.inner.len()
146 }
146 }
147
147
148 pub fn is_empty(&self) -> bool {
148 pub fn is_empty(&self) -> bool {
149 self.len() == 0
149 self.len() == 0
150 }
150 }
151 }
151 }
152
152
153 /// This is basically a reimplementation of `DirsMultiset` that stores the
153 /// This is basically a reimplementation of `DirsMultiset` that stores the
154 /// children instead of just a count of them, plus a small optional
154 /// children instead of just a count of them, plus a small optional
155 /// optimization to avoid some directories we don't need.
155 /// optimization to avoid some directories we don't need.
156 #[derive(PartialEq, Debug)]
156 #[derive(PartialEq, Debug)]
157 pub struct DirsChildrenMultiset<'a> {
157 pub struct DirsChildrenMultiset<'a> {
158 inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
158 inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
159 only_include: Option<HashSet<&'a HgPath>>,
159 only_include: Option<HashSet<&'a HgPath>>,
160 }
160 }
161
161
162 impl<'a> DirsChildrenMultiset<'a> {
162 impl<'a> DirsChildrenMultiset<'a> {
163 pub fn new(
163 pub fn new(
164 paths: impl Iterator<Item = &'a HgPathBuf>,
164 paths: impl Iterator<Item = &'a HgPathBuf>,
165 only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
165 only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
166 ) -> Self {
166 ) -> Self {
167 let mut new = Self {
167 let mut new = Self {
168 inner: HashMap::default(),
168 inner: HashMap::default(),
169 only_include: only_include
169 only_include: only_include
170 .map(|s| s.iter().map(AsRef::as_ref).collect()),
170 .map(|s| s.iter().map(AsRef::as_ref).collect()),
171 };
171 };
172
172
173 for path in paths {
173 for path in paths {
174 new.add_path(path)
174 new.add_path(path)
175 }
175 }
176
176
177 new
177 new
178 }
178 }
179 fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
179 fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
180 if path.as_ref().is_empty() {
180 if path.as_ref().is_empty() {
181 return;
181 return;
182 }
182 }
183 for (directory, basename) in files::find_dirs_with_base(path.as_ref())
183 for (directory, basename) in files::find_dirs_with_base(path.as_ref())
184 {
184 {
185 if !self.is_dir_included(directory) {
185 if !self.is_dir_included(directory) {
186 continue;
186 continue;
187 }
187 }
188 self.inner
188 self.inner
189 .entry(directory)
189 .entry(directory)
190 .and_modify(|e| {
190 .and_modify(|e| {
191 e.insert(basename);
191 e.insert(basename);
192 })
192 })
193 .or_insert_with(|| {
193 .or_insert_with(|| {
194 let mut set = HashSet::new();
194 let mut set = HashSet::new();
195 set.insert(basename);
195 set.insert(basename);
196 set
196 set
197 });
197 });
198 }
198 }
199 }
199 }
200 fn is_dir_included(&self, dir: impl AsRef<HgPath>) -> bool {
200 fn is_dir_included(&self, dir: impl AsRef<HgPath>) -> bool {
201 match &self.only_include {
201 match &self.only_include {
202 None => false,
202 None => false,
203 Some(i) => i.contains(dir.as_ref()),
203 Some(i) => i.contains(dir.as_ref()),
204 }
204 }
205 }
205 }
206
206
207 pub fn get(
207 pub fn get(
208 &self,
208 &self,
209 path: impl AsRef<HgPath>,
209 path: impl AsRef<HgPath>,
210 ) -> Option<&HashSet<&'a HgPath>> {
210 ) -> Option<&HashSet<&'a HgPath>> {
211 self.inner.get(path.as_ref())
211 self.inner.get(path.as_ref())
212 }
212 }
213 }
213 }
214
214
215 #[cfg(test)]
215 #[cfg(test)]
216 mod tests {
216 mod tests {
217 use crate::EntryState;
217 use crate::EntryState;
218
218
219 use super::*;
219 use super::*;
220
220
221 #[test]
221 #[test]
222 fn test_delete_path_path_not_found() {
222 fn test_delete_path_path_not_found() {
223 let manifest: Vec<HgPathBuf> = vec![];
223 let manifest: Vec<HgPathBuf> = vec![];
224 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
224 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
225 let path = HgPathBuf::from_bytes(b"doesnotexist/");
225 let path = HgPathBuf::from_bytes(b"doesnotexist/");
226 assert_eq!(
226 assert_eq!(
227 Err(DirstateMapError::PathNotFound(path.to_owned())),
227 Err(DirstateMapError::PathNotFound(path.to_owned())),
228 map.delete_path(&path)
228 map.delete_path(&path)
229 );
229 );
230 }
230 }
231
231
232 #[test]
232 #[test]
233 fn test_delete_path_empty_path() {
233 fn test_delete_path_empty_path() {
234 let mut map =
234 let mut map =
235 DirsMultiset::from_manifest(&vec![HgPathBuf::new()]).unwrap();
235 DirsMultiset::from_manifest(&[HgPathBuf::new()]).unwrap();
236 let path = HgPath::new(b"");
236 let path = HgPath::new(b"");
237 assert_eq!(Ok(()), map.delete_path(path));
237 assert_eq!(Ok(()), map.delete_path(path));
238 assert_eq!(
238 assert_eq!(
239 Err(DirstateMapError::PathNotFound(path.to_owned())),
239 Err(DirstateMapError::PathNotFound(path.to_owned())),
240 map.delete_path(path)
240 map.delete_path(path)
241 );
241 );
242 }
242 }
243
243
244 #[test]
244 #[test]
245 fn test_delete_path_successful() {
245 fn test_delete_path_successful() {
246 let mut map = DirsMultiset {
246 let mut map = DirsMultiset {
247 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
247 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
248 .iter()
248 .iter()
249 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
249 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
250 .collect(),
250 .collect(),
251 };
251 };
252
252
253 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
253 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
254 eprintln!("{:?}", map);
254 eprintln!("{:?}", map);
255 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
255 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
256 eprintln!("{:?}", map);
256 eprintln!("{:?}", map);
257 assert_eq!(
257 assert_eq!(
258 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
258 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
259 b"a/b/"
259 b"a/b/"
260 ))),
260 ))),
261 map.delete_path(HgPath::new(b"a/b/"))
261 map.delete_path(HgPath::new(b"a/b/"))
262 );
262 );
263
263
264 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
264 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
265 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
265 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
266 eprintln!("{:?}", map);
266 eprintln!("{:?}", map);
267 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/")));
267 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/")));
268 eprintln!("{:?}", map);
268 eprintln!("{:?}", map);
269
269
270 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/c/")));
270 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/c/")));
271 assert_eq!(
271 assert_eq!(
272 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
272 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
273 b"a/c/"
273 b"a/c/"
274 ))),
274 ))),
275 map.delete_path(HgPath::new(b"a/c/"))
275 map.delete_path(HgPath::new(b"a/c/"))
276 );
276 );
277 }
277 }
278
278
279 #[test]
279 #[test]
280 fn test_add_path_empty_path() {
280 fn test_add_path_empty_path() {
281 let manifest: Vec<HgPathBuf> = vec![];
281 let manifest: Vec<HgPathBuf> = vec![];
282 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
282 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
283 let path = HgPath::new(b"");
283 let path = HgPath::new(b"");
284 map.add_path(path).unwrap();
284 map.add_path(path).unwrap();
285
285
286 assert_eq!(1, map.len());
286 assert_eq!(1, map.len());
287 }
287 }
288
288
289 #[test]
289 #[test]
290 fn test_add_path_successful() {
290 fn test_add_path_successful() {
291 let manifest: Vec<HgPathBuf> = vec![];
291 let manifest: Vec<HgPathBuf> = vec![];
292 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
292 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
293
293
294 map.add_path(HgPath::new(b"a/")).unwrap();
294 map.add_path(HgPath::new(b"a/")).unwrap();
295 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
295 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
296 assert_eq!(1, *map.inner.get(HgPath::new(b"")).unwrap());
296 assert_eq!(1, *map.inner.get(HgPath::new(b"")).unwrap());
297 assert_eq!(2, map.len());
297 assert_eq!(2, map.len());
298
298
299 // Non directory should be ignored
299 // Non directory should be ignored
300 map.add_path(HgPath::new(b"a")).unwrap();
300 map.add_path(HgPath::new(b"a")).unwrap();
301 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
301 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
302 assert_eq!(2, map.len());
302 assert_eq!(2, map.len());
303
303
304 // Non directory will still add its base
304 // Non directory will still add its base
305 map.add_path(HgPath::new(b"a/b")).unwrap();
305 map.add_path(HgPath::new(b"a/b")).unwrap();
306 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
306 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
307 assert_eq!(2, map.len());
307 assert_eq!(2, map.len());
308
308
309 // Duplicate path works
309 // Duplicate path works
310 map.add_path(HgPath::new(b"a/")).unwrap();
310 map.add_path(HgPath::new(b"a/")).unwrap();
311 assert_eq!(3, *map.inner.get(HgPath::new(b"a")).unwrap());
311 assert_eq!(3, *map.inner.get(HgPath::new(b"a")).unwrap());
312
312
313 // Nested dir adds to its base
313 // Nested dir adds to its base
314 map.add_path(HgPath::new(b"a/b/")).unwrap();
314 map.add_path(HgPath::new(b"a/b/")).unwrap();
315 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
315 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
316 assert_eq!(1, *map.inner.get(HgPath::new(b"a/b")).unwrap());
316 assert_eq!(1, *map.inner.get(HgPath::new(b"a/b")).unwrap());
317
317
318 // but not its base's base, because it already existed
318 // but not its base's base, because it already existed
319 map.add_path(HgPath::new(b"a/b/c/")).unwrap();
319 map.add_path(HgPath::new(b"a/b/c/")).unwrap();
320 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
320 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
321 assert_eq!(2, *map.inner.get(HgPath::new(b"a/b")).unwrap());
321 assert_eq!(2, *map.inner.get(HgPath::new(b"a/b")).unwrap());
322
322
323 map.add_path(HgPath::new(b"a/c/")).unwrap();
323 map.add_path(HgPath::new(b"a/c/")).unwrap();
324 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
324 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
325
325
326 let expected = DirsMultiset {
326 let expected = DirsMultiset {
327 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
327 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
328 .iter()
328 .iter()
329 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
329 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
330 .collect(),
330 .collect(),
331 };
331 };
332 assert_eq!(map, expected);
332 assert_eq!(map, expected);
333 }
333 }
334
334
335 #[test]
335 #[test]
336 fn test_dirsmultiset_new_empty() {
336 fn test_dirsmultiset_new_empty() {
337 let manifest: Vec<HgPathBuf> = vec![];
337 let manifest: Vec<HgPathBuf> = vec![];
338 let new = DirsMultiset::from_manifest(&manifest).unwrap();
338 let new = DirsMultiset::from_manifest(&manifest).unwrap();
339 let expected = DirsMultiset {
339 let expected = DirsMultiset {
340 inner: FastHashMap::default(),
340 inner: FastHashMap::default(),
341 };
341 };
342 assert_eq!(expected, new);
342 assert_eq!(expected, new);
343
343
344 let new = DirsMultiset::from_dirstate::<_, HgPathBuf>(
344 let new = DirsMultiset::from_dirstate::<_, HgPathBuf>(
345 std::iter::empty(),
345 std::iter::empty(),
346 false,
346 false,
347 )
347 )
348 .unwrap();
348 .unwrap();
349 let expected = DirsMultiset {
349 let expected = DirsMultiset {
350 inner: FastHashMap::default(),
350 inner: FastHashMap::default(),
351 };
351 };
352 assert_eq!(expected, new);
352 assert_eq!(expected, new);
353 }
353 }
354
354
355 #[test]
355 #[test]
356 fn test_dirsmultiset_new_no_skip() {
356 fn test_dirsmultiset_new_no_skip() {
357 let input_vec: Vec<HgPathBuf> = ["a/", "b/", "a/c", "a/d/"]
357 let input_vec: Vec<HgPathBuf> = ["a/", "b/", "a/c", "a/d/"]
358 .iter()
358 .iter()
359 .map(|e| HgPathBuf::from_bytes(e.as_bytes()))
359 .map(|e| HgPathBuf::from_bytes(e.as_bytes()))
360 .collect();
360 .collect();
361 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
361 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
362 .iter()
362 .iter()
363 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
363 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
364 .collect();
364 .collect();
365
365
366 let new = DirsMultiset::from_manifest(&input_vec).unwrap();
366 let new = DirsMultiset::from_manifest(&input_vec).unwrap();
367 let expected = DirsMultiset {
367 let expected = DirsMultiset {
368 inner: expected_inner,
368 inner: expected_inner,
369 };
369 };
370 assert_eq!(expected, new);
370 assert_eq!(expected, new);
371
371
372 let input_map = ["b/x", "a/c", "a/d/x"].iter().map(|f| {
372 let input_map = ["b/x", "a/c", "a/d/x"].iter().map(|f| {
373 Ok((
373 Ok((
374 HgPathBuf::from_bytes(f.as_bytes()),
374 HgPathBuf::from_bytes(f.as_bytes()),
375 DirstateEntry::from_v1_data(EntryState::Normal, 0, 0, 0),
375 DirstateEntry::from_v1_data(EntryState::Normal, 0, 0, 0),
376 ))
376 ))
377 });
377 });
378 let expected_inner = [("", 2), ("a", 2), ("b", 1), ("a/d", 1)]
378 let expected_inner = [("", 2), ("a", 2), ("b", 1), ("a/d", 1)]
379 .iter()
379 .iter()
380 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
380 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
381 .collect();
381 .collect();
382
382
383 let new = DirsMultiset::from_dirstate(input_map, false).unwrap();
383 let new = DirsMultiset::from_dirstate(input_map, false).unwrap();
384 let expected = DirsMultiset {
384 let expected = DirsMultiset {
385 inner: expected_inner,
385 inner: expected_inner,
386 };
386 };
387 assert_eq!(expected, new);
387 assert_eq!(expected, new);
388 }
388 }
389
389
390 #[test]
390 #[test]
391 fn test_dirsmultiset_new_skip() {
391 fn test_dirsmultiset_new_skip() {
392 let input_map = [
392 let input_map = [
393 ("a/", EntryState::Normal),
393 ("a/", EntryState::Normal),
394 ("a/b", EntryState::Normal),
394 ("a/b", EntryState::Normal),
395 ("a/c", EntryState::Removed),
395 ("a/c", EntryState::Removed),
396 ("a/d", EntryState::Merged),
396 ("a/d", EntryState::Merged),
397 ]
397 ]
398 .iter()
398 .iter()
399 .map(|(f, state)| {
399 .map(|(f, state)| {
400 Ok((
400 Ok((
401 HgPathBuf::from_bytes(f.as_bytes()),
401 HgPathBuf::from_bytes(f.as_bytes()),
402 DirstateEntry::from_v1_data(*state, 0, 0, 0),
402 DirstateEntry::from_v1_data(*state, 0, 0, 0),
403 ))
403 ))
404 });
404 });
405
405
406 // "a" incremented with "a/c" and "a/d/"
406 // "a" incremented with "a/c" and "a/d/"
407 let expected_inner = [("", 1), ("a", 3)]
407 let expected_inner = [("", 1), ("a", 3)]
408 .iter()
408 .iter()
409 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
409 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
410 .collect();
410 .collect();
411
411
412 let new = DirsMultiset::from_dirstate(input_map, true).unwrap();
412 let new = DirsMultiset::from_dirstate(input_map, true).unwrap();
413 let expected = DirsMultiset {
413 let expected = DirsMultiset {
414 inner: expected_inner,
414 inner: expected_inner,
415 };
415 };
416 assert_eq!(expected, new);
416 assert_eq!(expected, new);
417 }
417 }
418 }
418 }
@@ -1,726 +1,722 b''
1 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
1 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
2 use crate::errors::HgError;
2 use crate::errors::HgError;
3 use bitflags::bitflags;
3 use bitflags::bitflags;
4 use std::fs;
4 use std::fs;
5 use std::io;
5 use std::io;
6 use std::time::{SystemTime, UNIX_EPOCH};
6 use std::time::{SystemTime, UNIX_EPOCH};
7
7
8 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
8 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
9 pub enum EntryState {
9 pub enum EntryState {
10 Normal,
10 Normal,
11 Added,
11 Added,
12 Removed,
12 Removed,
13 Merged,
13 Merged,
14 }
14 }
15
15
16 /// `size` and `mtime.seconds` are truncated to 31 bits.
16 /// `size` and `mtime.seconds` are truncated to 31 bits.
17 ///
17 ///
18 /// TODO: double-check status algorithm correctness for files
18 /// TODO: double-check status algorithm correctness for files
19 /// larger than 2 GiB or modified after 2038.
19 /// larger than 2 GiB or modified after 2038.
20 #[derive(Debug, Copy, Clone)]
20 #[derive(Debug, Copy, Clone)]
21 pub struct DirstateEntry {
21 pub struct DirstateEntry {
22 pub(crate) flags: Flags,
22 pub(crate) flags: Flags,
23 mode_size: Option<(u32, u32)>,
23 mode_size: Option<(u32, u32)>,
24 mtime: Option<TruncatedTimestamp>,
24 mtime: Option<TruncatedTimestamp>,
25 }
25 }
26
26
27 bitflags! {
27 bitflags! {
28 pub(crate) struct Flags: u8 {
28 pub(crate) struct Flags: u8 {
29 const WDIR_TRACKED = 1 << 0;
29 const WDIR_TRACKED = 1 << 0;
30 const P1_TRACKED = 1 << 1;
30 const P1_TRACKED = 1 << 1;
31 const P2_INFO = 1 << 2;
31 const P2_INFO = 1 << 2;
32 const HAS_FALLBACK_EXEC = 1 << 3;
32 const HAS_FALLBACK_EXEC = 1 << 3;
33 const FALLBACK_EXEC = 1 << 4;
33 const FALLBACK_EXEC = 1 << 4;
34 const HAS_FALLBACK_SYMLINK = 1 << 5;
34 const HAS_FALLBACK_SYMLINK = 1 << 5;
35 const FALLBACK_SYMLINK = 1 << 6;
35 const FALLBACK_SYMLINK = 1 << 6;
36 }
36 }
37 }
37 }
38
38
39 /// A Unix timestamp with nanoseconds precision
39 /// A Unix timestamp with nanoseconds precision
40 #[derive(Debug, Copy, Clone)]
40 #[derive(Debug, Copy, Clone)]
41 pub struct TruncatedTimestamp {
41 pub struct TruncatedTimestamp {
42 truncated_seconds: u32,
42 truncated_seconds: u32,
43 /// Always in the `0 .. 1_000_000_000` range.
43 /// Always in the `0 .. 1_000_000_000` range.
44 nanoseconds: u32,
44 nanoseconds: u32,
45 /// TODO this should be in DirstateEntry, but the current code needs
45 /// TODO this should be in DirstateEntry, but the current code needs
46 /// refactoring to use DirstateEntry instead of TruncatedTimestamp for
46 /// refactoring to use DirstateEntry instead of TruncatedTimestamp for
47 /// comparison.
47 /// comparison.
48 pub second_ambiguous: bool,
48 pub second_ambiguous: bool,
49 }
49 }
50
50
51 impl TruncatedTimestamp {
51 impl TruncatedTimestamp {
52 /// Constructs from a timestamp potentially outside of the supported range,
52 /// Constructs from a timestamp potentially outside of the supported range,
53 /// and truncate the seconds components to its lower 31 bits.
53 /// and truncate the seconds components to its lower 31 bits.
54 ///
54 ///
55 /// Panics if the nanoseconds components is not in the expected range.
55 /// Panics if the nanoseconds components is not in the expected range.
56 pub fn new_truncate(
56 pub fn new_truncate(
57 seconds: i64,
57 seconds: i64,
58 nanoseconds: u32,
58 nanoseconds: u32,
59 second_ambiguous: bool,
59 second_ambiguous: bool,
60 ) -> Self {
60 ) -> Self {
61 assert!(nanoseconds < NSEC_PER_SEC);
61 assert!(nanoseconds < NSEC_PER_SEC);
62 Self {
62 Self {
63 truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
63 truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
64 nanoseconds,
64 nanoseconds,
65 second_ambiguous,
65 second_ambiguous,
66 }
66 }
67 }
67 }
68
68
69 /// Construct from components. Returns an error if they are not in the
69 /// Construct from components. Returns an error if they are not in the
70 /// expcted range.
70 /// expcted range.
71 pub fn from_already_truncated(
71 pub fn from_already_truncated(
72 truncated_seconds: u32,
72 truncated_seconds: u32,
73 nanoseconds: u32,
73 nanoseconds: u32,
74 second_ambiguous: bool,
74 second_ambiguous: bool,
75 ) -> Result<Self, DirstateV2ParseError> {
75 ) -> Result<Self, DirstateV2ParseError> {
76 if truncated_seconds & !RANGE_MASK_31BIT == 0
76 if truncated_seconds & !RANGE_MASK_31BIT == 0
77 && nanoseconds < NSEC_PER_SEC
77 && nanoseconds < NSEC_PER_SEC
78 {
78 {
79 Ok(Self {
79 Ok(Self {
80 truncated_seconds,
80 truncated_seconds,
81 nanoseconds,
81 nanoseconds,
82 second_ambiguous,
82 second_ambiguous,
83 })
83 })
84 } else {
84 } else {
85 Err(DirstateV2ParseError::new("when reading datetime"))
85 Err(DirstateV2ParseError::new("when reading datetime"))
86 }
86 }
87 }
87 }
88
88
89 /// Returns a `TruncatedTimestamp` for the modification time of `metadata`.
89 /// Returns a `TruncatedTimestamp` for the modification time of `metadata`.
90 ///
90 ///
91 /// Propagates errors from `std` on platforms where modification time
91 /// Propagates errors from `std` on platforms where modification time
92 /// is not available at all.
92 /// is not available at all.
93 pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
93 pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
94 #[cfg(unix)]
94 #[cfg(unix)]
95 {
95 {
96 use std::os::unix::fs::MetadataExt;
96 use std::os::unix::fs::MetadataExt;
97 let seconds = metadata.mtime();
97 let seconds = metadata.mtime();
98 // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range
98 // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range
99 let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
99 let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
100 Ok(Self::new_truncate(seconds, nanoseconds, false))
100 Ok(Self::new_truncate(seconds, nanoseconds, false))
101 }
101 }
102 #[cfg(not(unix))]
102 #[cfg(not(unix))]
103 {
103 {
104 metadata.modified().map(Self::from)
104 metadata.modified().map(Self::from)
105 }
105 }
106 }
106 }
107
107
108 /// Like `for_mtime_of`, but may return `None` or a value with
108 /// Like `for_mtime_of`, but may return `None` or a value with
109 /// `second_ambiguous` set if the mtime is not "reliable".
109 /// `second_ambiguous` set if the mtime is not "reliable".
110 ///
110 ///
111 /// A modification time is reliable if it is older than `boundary` (or
111 /// A modification time is reliable if it is older than `boundary` (or
112 /// sufficiently in the future).
112 /// sufficiently in the future).
113 ///
113 ///
114 /// Otherwise a concurrent modification might happens with the same mtime.
114 /// Otherwise a concurrent modification might happens with the same mtime.
115 pub fn for_reliable_mtime_of(
115 pub fn for_reliable_mtime_of(
116 metadata: &fs::Metadata,
116 metadata: &fs::Metadata,
117 boundary: &Self,
117 boundary: &Self,
118 ) -> io::Result<Option<Self>> {
118 ) -> io::Result<Option<Self>> {
119 let mut mtime = Self::for_mtime_of(metadata)?;
119 let mut mtime = Self::for_mtime_of(metadata)?;
120 // If the mtime of the ambiguous file is younger (or equal) to the
120 // If the mtime of the ambiguous file is younger (or equal) to the
121 // starting point of the `status` walk, we cannot garantee that
121 // starting point of the `status` walk, we cannot garantee that
122 // another, racy, write will not happen right after with the same mtime
122 // another, racy, write will not happen right after with the same mtime
123 // and we cannot cache the information.
123 // and we cannot cache the information.
124 //
124 //
125 // However if the mtime is far away in the future, this is likely some
125 // However if the mtime is far away in the future, this is likely some
126 // mismatch between the current clock and previous file system
126 // mismatch between the current clock and previous file system
127 // operation. So mtime more than one days in the future are considered
127 // operation. So mtime more than one days in the future are considered
128 // fine.
128 // fine.
129 let reliable = if mtime.truncated_seconds == boundary.truncated_seconds
129 let reliable = if mtime.truncated_seconds == boundary.truncated_seconds
130 {
130 {
131 mtime.second_ambiguous = true;
131 mtime.second_ambiguous = true;
132 mtime.nanoseconds != 0
132 mtime.nanoseconds != 0
133 && boundary.nanoseconds != 0
133 && boundary.nanoseconds != 0
134 && mtime.nanoseconds < boundary.nanoseconds
134 && mtime.nanoseconds < boundary.nanoseconds
135 } else {
135 } else {
136 // `truncated_seconds` is less than 2**31,
136 // `truncated_seconds` is less than 2**31,
137 // so this does not overflow `u32`:
137 // so this does not overflow `u32`:
138 let one_day_later = boundary.truncated_seconds + 24 * 3600;
138 let one_day_later = boundary.truncated_seconds + 24 * 3600;
139 mtime.truncated_seconds < boundary.truncated_seconds
139 mtime.truncated_seconds < boundary.truncated_seconds
140 || mtime.truncated_seconds > one_day_later
140 || mtime.truncated_seconds > one_day_later
141 };
141 };
142 if reliable {
142 if reliable {
143 Ok(Some(mtime))
143 Ok(Some(mtime))
144 } else {
144 } else {
145 Ok(None)
145 Ok(None)
146 }
146 }
147 }
147 }
148
148
149 /// The lower 31 bits of the number of seconds since the epoch.
149 /// The lower 31 bits of the number of seconds since the epoch.
150 pub fn truncated_seconds(&self) -> u32 {
150 pub fn truncated_seconds(&self) -> u32 {
151 self.truncated_seconds
151 self.truncated_seconds
152 }
152 }
153
153
154 /// The sub-second component of this timestamp, in nanoseconds.
154 /// The sub-second component of this timestamp, in nanoseconds.
155 /// Always in the `0 .. 1_000_000_000` range.
155 /// Always in the `0 .. 1_000_000_000` range.
156 ///
156 ///
157 /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
157 /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
158 pub fn nanoseconds(&self) -> u32 {
158 pub fn nanoseconds(&self) -> u32 {
159 self.nanoseconds
159 self.nanoseconds
160 }
160 }
161
161
162 /// Returns whether two timestamps are equal modulo 2**31 seconds.
162 /// Returns whether two timestamps are equal modulo 2**31 seconds.
163 ///
163 ///
164 /// If this returns `true`, the original values converted from `SystemTime`
164 /// If this returns `true`, the original values converted from `SystemTime`
165 /// or given to `new_truncate` were very likely equal. A false positive is
165 /// or given to `new_truncate` were very likely equal. A false positive is
166 /// possible if they were exactly a multiple of 2**31 seconds apart (around
166 /// possible if they were exactly a multiple of 2**31 seconds apart (around
167 /// 68 years). This is deemed very unlikely to happen by chance, especially
167 /// 68 years). This is deemed very unlikely to happen by chance, especially
168 /// on filesystems that support sub-second precision.
168 /// on filesystems that support sub-second precision.
169 ///
169 ///
170 /// If someone is manipulating the modification times of some files to
170 /// If someone is manipulating the modification times of some files to
171 /// intentionally make `hg status` return incorrect results, not truncating
171 /// intentionally make `hg status` return incorrect results, not truncating
172 /// wouldn’t help much since they can set exactly the expected timestamp.
172 /// wouldn’t help much since they can set exactly the expected timestamp.
173 ///
173 ///
174 /// Sub-second precision is ignored if it is zero in either value.
174 /// Sub-second precision is ignored if it is zero in either value.
175 /// Some APIs simply return zero when more precision is not available.
175 /// Some APIs simply return zero when more precision is not available.
176 /// When comparing values from different sources, if only one is truncated
176 /// When comparing values from different sources, if only one is truncated
177 /// in that way, doing a simple comparison would cause many false
177 /// in that way, doing a simple comparison would cause many false
178 /// negatives.
178 /// negatives.
179 pub fn likely_equal(self, other: Self) -> bool {
179 pub fn likely_equal(self, other: Self) -> bool {
180 if self.truncated_seconds != other.truncated_seconds {
180 if self.truncated_seconds != other.truncated_seconds {
181 false
181 false
182 } else if self.nanoseconds == 0 || other.nanoseconds == 0 {
182 } else if self.nanoseconds == 0 || other.nanoseconds == 0 {
183 if self.second_ambiguous {
183 !self.second_ambiguous
184 false
185 } else {
186 true
187 }
188 } else {
184 } else {
189 self.nanoseconds == other.nanoseconds
185 self.nanoseconds == other.nanoseconds
190 }
186 }
191 }
187 }
192
188
193 pub fn likely_equal_to_mtime_of(
189 pub fn likely_equal_to_mtime_of(
194 self,
190 self,
195 metadata: &fs::Metadata,
191 metadata: &fs::Metadata,
196 ) -> io::Result<bool> {
192 ) -> io::Result<bool> {
197 Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
193 Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
198 }
194 }
199 }
195 }
200
196
201 impl From<SystemTime> for TruncatedTimestamp {
197 impl From<SystemTime> for TruncatedTimestamp {
202 fn from(system_time: SystemTime) -> Self {
198 fn from(system_time: SystemTime) -> Self {
203 // On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
199 // On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
204 // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
200 // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
205 // We want to effectively access its fields, but the Rust standard
201 // We want to effectively access its fields, but the Rust standard
206 // library does not expose them. The best we can do is:
202 // library does not expose them. The best we can do is:
207 let seconds;
203 let seconds;
208 let nanoseconds;
204 let nanoseconds;
209 match system_time.duration_since(UNIX_EPOCH) {
205 match system_time.duration_since(UNIX_EPOCH) {
210 Ok(duration) => {
206 Ok(duration) => {
211 seconds = duration.as_secs() as i64;
207 seconds = duration.as_secs() as i64;
212 nanoseconds = duration.subsec_nanos();
208 nanoseconds = duration.subsec_nanos();
213 }
209 }
214 Err(error) => {
210 Err(error) => {
215 // `system_time` is before `UNIX_EPOCH`.
211 // `system_time` is before `UNIX_EPOCH`.
216 // We need to undo this algorithm:
212 // We need to undo this algorithm:
217 // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41
213 // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41
218 let negative = error.duration();
214 let negative = error.duration();
219 let negative_secs = negative.as_secs() as i64;
215 let negative_secs = negative.as_secs() as i64;
220 let negative_nanos = negative.subsec_nanos();
216 let negative_nanos = negative.subsec_nanos();
221 if negative_nanos == 0 {
217 if negative_nanos == 0 {
222 seconds = -negative_secs;
218 seconds = -negative_secs;
223 nanoseconds = 0;
219 nanoseconds = 0;
224 } else {
220 } else {
225 // For example if `system_time` was 4.3 seconds before
221 // For example if `system_time` was 4.3 seconds before
226 // the Unix epoch we get a Duration that represents
222 // the Unix epoch we get a Duration that represents
227 // `(-4, -0.3)` but we want `(-5, +0.7)`:
223 // `(-4, -0.3)` but we want `(-5, +0.7)`:
228 seconds = -1 - negative_secs;
224 seconds = -1 - negative_secs;
229 nanoseconds = NSEC_PER_SEC - negative_nanos;
225 nanoseconds = NSEC_PER_SEC - negative_nanos;
230 }
226 }
231 }
227 }
232 };
228 };
233 Self::new_truncate(seconds, nanoseconds, false)
229 Self::new_truncate(seconds, nanoseconds, false)
234 }
230 }
235 }
231 }
236
232
237 const NSEC_PER_SEC: u32 = 1_000_000_000;
233 const NSEC_PER_SEC: u32 = 1_000_000_000;
238 pub const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
234 pub const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
239
235
240 pub const MTIME_UNSET: i32 = -1;
236 pub const MTIME_UNSET: i32 = -1;
241
237
242 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
238 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
243 /// other parent. This allows revert to pick the right status back during a
239 /// other parent. This allows revert to pick the right status back during a
244 /// merge.
240 /// merge.
245 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
241 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
246 /// A special value used for internal representation of special case in
242 /// A special value used for internal representation of special case in
247 /// dirstate v1 format.
243 /// dirstate v1 format.
248 pub const SIZE_NON_NORMAL: i32 = -1;
244 pub const SIZE_NON_NORMAL: i32 = -1;
249
245
250 #[derive(Debug, Default, Copy, Clone)]
246 #[derive(Debug, Default, Copy, Clone)]
251 pub struct DirstateV2Data {
247 pub struct DirstateV2Data {
252 pub wc_tracked: bool,
248 pub wc_tracked: bool,
253 pub p1_tracked: bool,
249 pub p1_tracked: bool,
254 pub p2_info: bool,
250 pub p2_info: bool,
255 pub mode_size: Option<(u32, u32)>,
251 pub mode_size: Option<(u32, u32)>,
256 pub mtime: Option<TruncatedTimestamp>,
252 pub mtime: Option<TruncatedTimestamp>,
257 pub fallback_exec: Option<bool>,
253 pub fallback_exec: Option<bool>,
258 pub fallback_symlink: Option<bool>,
254 pub fallback_symlink: Option<bool>,
259 }
255 }
260
256
261 #[derive(Debug, Default, Copy, Clone)]
257 #[derive(Debug, Default, Copy, Clone)]
262 pub struct ParentFileData {
258 pub struct ParentFileData {
263 pub mode_size: Option<(u32, u32)>,
259 pub mode_size: Option<(u32, u32)>,
264 pub mtime: Option<TruncatedTimestamp>,
260 pub mtime: Option<TruncatedTimestamp>,
265 }
261 }
266
262
267 impl DirstateEntry {
263 impl DirstateEntry {
268 pub fn from_v2_data(v2_data: DirstateV2Data) -> Self {
264 pub fn from_v2_data(v2_data: DirstateV2Data) -> Self {
269 let DirstateV2Data {
265 let DirstateV2Data {
270 wc_tracked,
266 wc_tracked,
271 p1_tracked,
267 p1_tracked,
272 p2_info,
268 p2_info,
273 mode_size,
269 mode_size,
274 mtime,
270 mtime,
275 fallback_exec,
271 fallback_exec,
276 fallback_symlink,
272 fallback_symlink,
277 } = v2_data;
273 } = v2_data;
278 if let Some((mode, size)) = mode_size {
274 if let Some((mode, size)) = mode_size {
279 // TODO: return an error for out of range values?
275 // TODO: return an error for out of range values?
280 assert!(mode & !RANGE_MASK_31BIT == 0);
276 assert!(mode & !RANGE_MASK_31BIT == 0);
281 assert!(size & !RANGE_MASK_31BIT == 0);
277 assert!(size & !RANGE_MASK_31BIT == 0);
282 }
278 }
283 let mut flags = Flags::empty();
279 let mut flags = Flags::empty();
284 flags.set(Flags::WDIR_TRACKED, wc_tracked);
280 flags.set(Flags::WDIR_TRACKED, wc_tracked);
285 flags.set(Flags::P1_TRACKED, p1_tracked);
281 flags.set(Flags::P1_TRACKED, p1_tracked);
286 flags.set(Flags::P2_INFO, p2_info);
282 flags.set(Flags::P2_INFO, p2_info);
287 if let Some(exec) = fallback_exec {
283 if let Some(exec) = fallback_exec {
288 flags.insert(Flags::HAS_FALLBACK_EXEC);
284 flags.insert(Flags::HAS_FALLBACK_EXEC);
289 if exec {
285 if exec {
290 flags.insert(Flags::FALLBACK_EXEC);
286 flags.insert(Flags::FALLBACK_EXEC);
291 }
287 }
292 }
288 }
293 if let Some(exec) = fallback_symlink {
289 if let Some(exec) = fallback_symlink {
294 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
290 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
295 if exec {
291 if exec {
296 flags.insert(Flags::FALLBACK_SYMLINK);
292 flags.insert(Flags::FALLBACK_SYMLINK);
297 }
293 }
298 }
294 }
299 Self {
295 Self {
300 flags,
296 flags,
301 mode_size,
297 mode_size,
302 mtime,
298 mtime,
303 }
299 }
304 }
300 }
305
301
306 pub fn from_v1_data(
302 pub fn from_v1_data(
307 state: EntryState,
303 state: EntryState,
308 mode: i32,
304 mode: i32,
309 size: i32,
305 size: i32,
310 mtime: i32,
306 mtime: i32,
311 ) -> Self {
307 ) -> Self {
312 match state {
308 match state {
313 EntryState::Normal => {
309 EntryState::Normal => {
314 if size == SIZE_FROM_OTHER_PARENT {
310 if size == SIZE_FROM_OTHER_PARENT {
315 Self {
311 Self {
316 // might be missing P1_TRACKED
312 // might be missing P1_TRACKED
317 flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
313 flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
318 mode_size: None,
314 mode_size: None,
319 mtime: None,
315 mtime: None,
320 }
316 }
321 } else if size == SIZE_NON_NORMAL {
317 } else if size == SIZE_NON_NORMAL {
322 Self {
318 Self {
323 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
319 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
324 mode_size: None,
320 mode_size: None,
325 mtime: None,
321 mtime: None,
326 }
322 }
327 } else if mtime == MTIME_UNSET {
323 } else if mtime == MTIME_UNSET {
328 // TODO: return an error for negative values?
324 // TODO: return an error for negative values?
329 let mode = u32::try_from(mode).unwrap();
325 let mode = u32::try_from(mode).unwrap();
330 let size = u32::try_from(size).unwrap();
326 let size = u32::try_from(size).unwrap();
331 Self {
327 Self {
332 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
328 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
333 mode_size: Some((mode, size)),
329 mode_size: Some((mode, size)),
334 mtime: None,
330 mtime: None,
335 }
331 }
336 } else {
332 } else {
337 // TODO: return an error for negative values?
333 // TODO: return an error for negative values?
338 let mode = u32::try_from(mode).unwrap();
334 let mode = u32::try_from(mode).unwrap();
339 let size = u32::try_from(size).unwrap();
335 let size = u32::try_from(size).unwrap();
340 let mtime = u32::try_from(mtime).unwrap();
336 let mtime = u32::try_from(mtime).unwrap();
341 let mtime = TruncatedTimestamp::from_already_truncated(
337 let mtime = TruncatedTimestamp::from_already_truncated(
342 mtime, 0, false,
338 mtime, 0, false,
343 )
339 )
344 .unwrap();
340 .unwrap();
345 Self {
341 Self {
346 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
342 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
347 mode_size: Some((mode, size)),
343 mode_size: Some((mode, size)),
348 mtime: Some(mtime),
344 mtime: Some(mtime),
349 }
345 }
350 }
346 }
351 }
347 }
352 EntryState::Added => Self {
348 EntryState::Added => Self {
353 flags: Flags::WDIR_TRACKED,
349 flags: Flags::WDIR_TRACKED,
354 mode_size: None,
350 mode_size: None,
355 mtime: None,
351 mtime: None,
356 },
352 },
357 EntryState::Removed => Self {
353 EntryState::Removed => Self {
358 flags: if size == SIZE_NON_NORMAL {
354 flags: if size == SIZE_NON_NORMAL {
359 Flags::P1_TRACKED | Flags::P2_INFO
355 Flags::P1_TRACKED | Flags::P2_INFO
360 } else if size == SIZE_FROM_OTHER_PARENT {
356 } else if size == SIZE_FROM_OTHER_PARENT {
361 // We don’t know if P1_TRACKED should be set (file history)
357 // We don’t know if P1_TRACKED should be set (file history)
362 Flags::P2_INFO
358 Flags::P2_INFO
363 } else {
359 } else {
364 Flags::P1_TRACKED
360 Flags::P1_TRACKED
365 },
361 },
366 mode_size: None,
362 mode_size: None,
367 mtime: None,
363 mtime: None,
368 },
364 },
369 EntryState::Merged => Self {
365 EntryState::Merged => Self {
370 flags: Flags::WDIR_TRACKED
366 flags: Flags::WDIR_TRACKED
371 | Flags::P1_TRACKED // might not be true because of rename ?
367 | Flags::P1_TRACKED // might not be true because of rename ?
372 | Flags::P2_INFO, // might not be true because of rename ?
368 | Flags::P2_INFO, // might not be true because of rename ?
373 mode_size: None,
369 mode_size: None,
374 mtime: None,
370 mtime: None,
375 },
371 },
376 }
372 }
377 }
373 }
378
374
379 /// Creates a new entry in "removed" state.
375 /// Creates a new entry in "removed" state.
380 ///
376 ///
381 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
377 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
382 /// `SIZE_FROM_OTHER_PARENT`
378 /// `SIZE_FROM_OTHER_PARENT`
383 pub fn new_removed(size: i32) -> Self {
379 pub fn new_removed(size: i32) -> Self {
384 Self::from_v1_data(EntryState::Removed, 0, size, 0)
380 Self::from_v1_data(EntryState::Removed, 0, size, 0)
385 }
381 }
386
382
387 pub fn new_tracked() -> Self {
383 pub fn new_tracked() -> Self {
388 let data = DirstateV2Data {
384 let data = DirstateV2Data {
389 wc_tracked: true,
385 wc_tracked: true,
390 ..Default::default()
386 ..Default::default()
391 };
387 };
392 Self::from_v2_data(data)
388 Self::from_v2_data(data)
393 }
389 }
394
390
395 pub fn tracked(&self) -> bool {
391 pub fn tracked(&self) -> bool {
396 self.flags.contains(Flags::WDIR_TRACKED)
392 self.flags.contains(Flags::WDIR_TRACKED)
397 }
393 }
398
394
399 pub fn p1_tracked(&self) -> bool {
395 pub fn p1_tracked(&self) -> bool {
400 self.flags.contains(Flags::P1_TRACKED)
396 self.flags.contains(Flags::P1_TRACKED)
401 }
397 }
402
398
403 fn in_either_parent(&self) -> bool {
399 fn in_either_parent(&self) -> bool {
404 self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
400 self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
405 }
401 }
406
402
407 pub fn removed(&self) -> bool {
403 pub fn removed(&self) -> bool {
408 self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
404 self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
409 }
405 }
410
406
411 pub fn p2_info(&self) -> bool {
407 pub fn p2_info(&self) -> bool {
412 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
408 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
413 }
409 }
414
410
415 pub fn added(&self) -> bool {
411 pub fn added(&self) -> bool {
416 self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
412 self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
417 }
413 }
418
414
419 pub fn modified(&self) -> bool {
415 pub fn modified(&self) -> bool {
420 self.flags
416 self.flags
421 .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
417 .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
422 }
418 }
423
419
424 pub fn maybe_clean(&self) -> bool {
420 pub fn maybe_clean(&self) -> bool {
425 #[allow(clippy::if_same_then_else)]
421 #[allow(clippy::if_same_then_else)]
426 #[allow(clippy::needless_bool)]
422 #[allow(clippy::needless_bool)]
427 if !self.flags.contains(Flags::WDIR_TRACKED) {
423 if !self.flags.contains(Flags::WDIR_TRACKED) {
428 false
424 false
429 } else if !self.flags.contains(Flags::P1_TRACKED) {
425 } else if !self.flags.contains(Flags::P1_TRACKED) {
430 false
426 false
431 } else if self.flags.contains(Flags::P2_INFO) {
427 } else if self.flags.contains(Flags::P2_INFO) {
432 false
428 false
433 } else {
429 } else {
434 true
430 true
435 }
431 }
436 }
432 }
437
433
438 pub fn any_tracked(&self) -> bool {
434 pub fn any_tracked(&self) -> bool {
439 self.flags.intersects(
435 self.flags.intersects(
440 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
436 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
441 )
437 )
442 }
438 }
443
439
444 pub(crate) fn v2_data(&self) -> DirstateV2Data {
440 pub(crate) fn v2_data(&self) -> DirstateV2Data {
445 if !self.any_tracked() {
441 if !self.any_tracked() {
446 // TODO: return an Option instead?
442 // TODO: return an Option instead?
447 panic!("Accessing v2_data of an untracked DirstateEntry")
443 panic!("Accessing v2_data of an untracked DirstateEntry")
448 }
444 }
449 let wc_tracked = self.flags.contains(Flags::WDIR_TRACKED);
445 let wc_tracked = self.flags.contains(Flags::WDIR_TRACKED);
450 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
446 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
451 let p2_info = self.flags.contains(Flags::P2_INFO);
447 let p2_info = self.flags.contains(Flags::P2_INFO);
452 let mode_size = self.mode_size;
448 let mode_size = self.mode_size;
453 let mtime = self.mtime;
449 let mtime = self.mtime;
454 DirstateV2Data {
450 DirstateV2Data {
455 wc_tracked,
451 wc_tracked,
456 p1_tracked,
452 p1_tracked,
457 p2_info,
453 p2_info,
458 mode_size,
454 mode_size,
459 mtime,
455 mtime,
460 fallback_exec: self.get_fallback_exec(),
456 fallback_exec: self.get_fallback_exec(),
461 fallback_symlink: self.get_fallback_symlink(),
457 fallback_symlink: self.get_fallback_symlink(),
462 }
458 }
463 }
459 }
464
460
465 fn v1_state(&self) -> EntryState {
461 fn v1_state(&self) -> EntryState {
466 if !self.any_tracked() {
462 if !self.any_tracked() {
467 // TODO: return an Option instead?
463 // TODO: return an Option instead?
468 panic!("Accessing v1_state of an untracked DirstateEntry")
464 panic!("Accessing v1_state of an untracked DirstateEntry")
469 }
465 }
470 if self.removed() {
466 if self.removed() {
471 EntryState::Removed
467 EntryState::Removed
472 } else if self.modified() {
468 } else if self.modified() {
473 EntryState::Merged
469 EntryState::Merged
474 } else if self.added() {
470 } else if self.added() {
475 EntryState::Added
471 EntryState::Added
476 } else {
472 } else {
477 EntryState::Normal
473 EntryState::Normal
478 }
474 }
479 }
475 }
480
476
481 fn v1_mode(&self) -> i32 {
477 fn v1_mode(&self) -> i32 {
482 if let Some((mode, _size)) = self.mode_size {
478 if let Some((mode, _size)) = self.mode_size {
483 i32::try_from(mode).unwrap()
479 i32::try_from(mode).unwrap()
484 } else {
480 } else {
485 0
481 0
486 }
482 }
487 }
483 }
488
484
489 fn v1_size(&self) -> i32 {
485 fn v1_size(&self) -> i32 {
490 if !self.any_tracked() {
486 if !self.any_tracked() {
491 // TODO: return an Option instead?
487 // TODO: return an Option instead?
492 panic!("Accessing v1_size of an untracked DirstateEntry")
488 panic!("Accessing v1_size of an untracked DirstateEntry")
493 }
489 }
494 if self.removed()
490 if self.removed()
495 && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
491 && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
496 {
492 {
497 SIZE_NON_NORMAL
493 SIZE_NON_NORMAL
498 } else if self.flags.contains(Flags::P2_INFO) {
494 } else if self.flags.contains(Flags::P2_INFO) {
499 SIZE_FROM_OTHER_PARENT
495 SIZE_FROM_OTHER_PARENT
500 } else if self.removed() {
496 } else if self.removed() {
501 0
497 0
502 } else if self.added() {
498 } else if self.added() {
503 SIZE_NON_NORMAL
499 SIZE_NON_NORMAL
504 } else if let Some((_mode, size)) = self.mode_size {
500 } else if let Some((_mode, size)) = self.mode_size {
505 i32::try_from(size).unwrap()
501 i32::try_from(size).unwrap()
506 } else {
502 } else {
507 SIZE_NON_NORMAL
503 SIZE_NON_NORMAL
508 }
504 }
509 }
505 }
510
506
511 fn v1_mtime(&self) -> i32 {
507 fn v1_mtime(&self) -> i32 {
512 if !self.any_tracked() {
508 if !self.any_tracked() {
513 // TODO: return an Option instead?
509 // TODO: return an Option instead?
514 panic!("Accessing v1_mtime of an untracked DirstateEntry")
510 panic!("Accessing v1_mtime of an untracked DirstateEntry")
515 }
511 }
516
512
517 #[allow(clippy::if_same_then_else)]
513 #[allow(clippy::if_same_then_else)]
518 if self.removed() {
514 if self.removed() {
519 0
515 0
520 } else if self.flags.contains(Flags::P2_INFO) {
516 } else if self.flags.contains(Flags::P2_INFO) {
521 MTIME_UNSET
517 MTIME_UNSET
522 } else if !self.flags.contains(Flags::P1_TRACKED) {
518 } else if !self.flags.contains(Flags::P1_TRACKED) {
523 MTIME_UNSET
519 MTIME_UNSET
524 } else if let Some(mtime) = self.mtime {
520 } else if let Some(mtime) = self.mtime {
525 if mtime.second_ambiguous {
521 if mtime.second_ambiguous {
526 MTIME_UNSET
522 MTIME_UNSET
527 } else {
523 } else {
528 i32::try_from(mtime.truncated_seconds()).unwrap()
524 i32::try_from(mtime.truncated_seconds()).unwrap()
529 }
525 }
530 } else {
526 } else {
531 MTIME_UNSET
527 MTIME_UNSET
532 }
528 }
533 }
529 }
534
530
535 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
531 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
536 pub fn state(&self) -> EntryState {
532 pub fn state(&self) -> EntryState {
537 self.v1_state()
533 self.v1_state()
538 }
534 }
539
535
540 // TODO: return Option?
536 // TODO: return Option?
541 pub fn mode(&self) -> i32 {
537 pub fn mode(&self) -> i32 {
542 self.v1_mode()
538 self.v1_mode()
543 }
539 }
544
540
545 // TODO: return Option?
541 // TODO: return Option?
546 pub fn size(&self) -> i32 {
542 pub fn size(&self) -> i32 {
547 self.v1_size()
543 self.v1_size()
548 }
544 }
549
545
550 // TODO: return Option?
546 // TODO: return Option?
551 pub fn mtime(&self) -> i32 {
547 pub fn mtime(&self) -> i32 {
552 self.v1_mtime()
548 self.v1_mtime()
553 }
549 }
554
550
555 pub fn get_fallback_exec(&self) -> Option<bool> {
551 pub fn get_fallback_exec(&self) -> Option<bool> {
556 if self.flags.contains(Flags::HAS_FALLBACK_EXEC) {
552 if self.flags.contains(Flags::HAS_FALLBACK_EXEC) {
557 Some(self.flags.contains(Flags::FALLBACK_EXEC))
553 Some(self.flags.contains(Flags::FALLBACK_EXEC))
558 } else {
554 } else {
559 None
555 None
560 }
556 }
561 }
557 }
562
558
563 pub fn set_fallback_exec(&mut self, value: Option<bool>) {
559 pub fn set_fallback_exec(&mut self, value: Option<bool>) {
564 match value {
560 match value {
565 None => {
561 None => {
566 self.flags.remove(Flags::HAS_FALLBACK_EXEC);
562 self.flags.remove(Flags::HAS_FALLBACK_EXEC);
567 self.flags.remove(Flags::FALLBACK_EXEC);
563 self.flags.remove(Flags::FALLBACK_EXEC);
568 }
564 }
569 Some(exec) => {
565 Some(exec) => {
570 self.flags.insert(Flags::HAS_FALLBACK_EXEC);
566 self.flags.insert(Flags::HAS_FALLBACK_EXEC);
571 if exec {
567 if exec {
572 self.flags.insert(Flags::FALLBACK_EXEC);
568 self.flags.insert(Flags::FALLBACK_EXEC);
573 }
569 }
574 }
570 }
575 }
571 }
576 }
572 }
577
573
578 pub fn get_fallback_symlink(&self) -> Option<bool> {
574 pub fn get_fallback_symlink(&self) -> Option<bool> {
579 if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) {
575 if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) {
580 Some(self.flags.contains(Flags::FALLBACK_SYMLINK))
576 Some(self.flags.contains(Flags::FALLBACK_SYMLINK))
581 } else {
577 } else {
582 None
578 None
583 }
579 }
584 }
580 }
585
581
586 pub fn set_fallback_symlink(&mut self, value: Option<bool>) {
582 pub fn set_fallback_symlink(&mut self, value: Option<bool>) {
587 match value {
583 match value {
588 None => {
584 None => {
589 self.flags.remove(Flags::HAS_FALLBACK_SYMLINK);
585 self.flags.remove(Flags::HAS_FALLBACK_SYMLINK);
590 self.flags.remove(Flags::FALLBACK_SYMLINK);
586 self.flags.remove(Flags::FALLBACK_SYMLINK);
591 }
587 }
592 Some(symlink) => {
588 Some(symlink) => {
593 self.flags.insert(Flags::HAS_FALLBACK_SYMLINK);
589 self.flags.insert(Flags::HAS_FALLBACK_SYMLINK);
594 if symlink {
590 if symlink {
595 self.flags.insert(Flags::FALLBACK_SYMLINK);
591 self.flags.insert(Flags::FALLBACK_SYMLINK);
596 }
592 }
597 }
593 }
598 }
594 }
599 }
595 }
600
596
601 pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
597 pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
602 self.mtime
598 self.mtime
603 }
599 }
604
600
605 pub fn drop_merge_data(&mut self) {
601 pub fn drop_merge_data(&mut self) {
606 if self.flags.contains(Flags::P2_INFO) {
602 if self.flags.contains(Flags::P2_INFO) {
607 self.flags.remove(Flags::P2_INFO);
603 self.flags.remove(Flags::P2_INFO);
608 self.mode_size = None;
604 self.mode_size = None;
609 self.mtime = None;
605 self.mtime = None;
610 }
606 }
611 }
607 }
612
608
613 pub fn set_possibly_dirty(&mut self) {
609 pub fn set_possibly_dirty(&mut self) {
614 self.mtime = None
610 self.mtime = None
615 }
611 }
616
612
617 pub fn set_clean(
613 pub fn set_clean(
618 &mut self,
614 &mut self,
619 mode: u32,
615 mode: u32,
620 size: u32,
616 size: u32,
621 mtime: TruncatedTimestamp,
617 mtime: TruncatedTimestamp,
622 ) {
618 ) {
623 let size = size & RANGE_MASK_31BIT;
619 let size = size & RANGE_MASK_31BIT;
624 self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
620 self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
625 self.mode_size = Some((mode, size));
621 self.mode_size = Some((mode, size));
626 self.mtime = Some(mtime);
622 self.mtime = Some(mtime);
627 }
623 }
628
624
629 pub fn set_tracked(&mut self) {
625 pub fn set_tracked(&mut self) {
630 self.flags.insert(Flags::WDIR_TRACKED);
626 self.flags.insert(Flags::WDIR_TRACKED);
631 // `set_tracked` is replacing various `normallookup` call. So we mark
627 // `set_tracked` is replacing various `normallookup` call. So we mark
632 // the files as needing lookup
628 // the files as needing lookup
633 //
629 //
634 // Consider dropping this in the future in favor of something less
630 // Consider dropping this in the future in favor of something less
635 // broad.
631 // broad.
636 self.mtime = None;
632 self.mtime = None;
637 }
633 }
638
634
639 pub fn set_untracked(&mut self) {
635 pub fn set_untracked(&mut self) {
640 self.flags.remove(Flags::WDIR_TRACKED);
636 self.flags.remove(Flags::WDIR_TRACKED);
641 self.mode_size = None;
637 self.mode_size = None;
642 self.mtime = None;
638 self.mtime = None;
643 }
639 }
644
640
645 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
641 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
646 /// in the dirstate-v1 format.
642 /// in the dirstate-v1 format.
647 ///
643 ///
648 /// This includes marker values such as `mtime == -1`. In the future we may
644 /// This includes marker values such as `mtime == -1`. In the future we may
649 /// want to not represent these cases that way in memory, but serialization
645 /// want to not represent these cases that way in memory, but serialization
650 /// will need to keep the same format.
646 /// will need to keep the same format.
651 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
647 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
652 (
648 (
653 self.v1_state().into(),
649 self.v1_state().into(),
654 self.v1_mode(),
650 self.v1_mode(),
655 self.v1_size(),
651 self.v1_size(),
656 self.v1_mtime(),
652 self.v1_mtime(),
657 )
653 )
658 }
654 }
659
655
660 pub(crate) fn is_from_other_parent(&self) -> bool {
656 pub(crate) fn is_from_other_parent(&self) -> bool {
661 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
657 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
662 }
658 }
663
659
664 // TODO: other platforms
660 // TODO: other platforms
665 #[cfg(unix)]
661 #[cfg(unix)]
666 pub fn mode_changed(
662 pub fn mode_changed(
667 &self,
663 &self,
668 filesystem_metadata: &std::fs::Metadata,
664 filesystem_metadata: &std::fs::Metadata,
669 ) -> bool {
665 ) -> bool {
670 let dirstate_exec_bit = (self.mode() as u32 & EXEC_BIT_MASK) != 0;
666 let dirstate_exec_bit = (self.mode() as u32 & EXEC_BIT_MASK) != 0;
671 let fs_exec_bit = has_exec_bit(filesystem_metadata);
667 let fs_exec_bit = has_exec_bit(filesystem_metadata);
672 dirstate_exec_bit != fs_exec_bit
668 dirstate_exec_bit != fs_exec_bit
673 }
669 }
674
670
675 /// Returns a `(state, mode, size, mtime)` tuple as for
671 /// Returns a `(state, mode, size, mtime)` tuple as for
676 /// `DirstateMapMethods::debug_iter`.
672 /// `DirstateMapMethods::debug_iter`.
677 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
673 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
678 (self.state().into(), self.mode(), self.size(), self.mtime())
674 (self.state().into(), self.mode(), self.size(), self.mtime())
679 }
675 }
680 }
676 }
681
677
682 impl EntryState {
678 impl EntryState {
683 pub fn is_tracked(self) -> bool {
679 pub fn is_tracked(self) -> bool {
684 use EntryState::*;
680 use EntryState::*;
685 match self {
681 match self {
686 Normal | Added | Merged => true,
682 Normal | Added | Merged => true,
687 Removed => false,
683 Removed => false,
688 }
684 }
689 }
685 }
690 }
686 }
691
687
692 impl TryFrom<u8> for EntryState {
688 impl TryFrom<u8> for EntryState {
693 type Error = HgError;
689 type Error = HgError;
694
690
695 fn try_from(value: u8) -> Result<Self, Self::Error> {
691 fn try_from(value: u8) -> Result<Self, Self::Error> {
696 match value {
692 match value {
697 b'n' => Ok(EntryState::Normal),
693 b'n' => Ok(EntryState::Normal),
698 b'a' => Ok(EntryState::Added),
694 b'a' => Ok(EntryState::Added),
699 b'r' => Ok(EntryState::Removed),
695 b'r' => Ok(EntryState::Removed),
700 b'm' => Ok(EntryState::Merged),
696 b'm' => Ok(EntryState::Merged),
701 _ => Err(HgError::CorruptedRepository(format!(
697 _ => Err(HgError::CorruptedRepository(format!(
702 "Incorrect dirstate entry state {}",
698 "Incorrect dirstate entry state {}",
703 value
699 value
704 ))),
700 ))),
705 }
701 }
706 }
702 }
707 }
703 }
708
704
709 impl Into<u8> for EntryState {
705 impl From<EntryState> for u8 {
710 fn into(self) -> u8 {
706 fn from(val: EntryState) -> Self {
711 match self {
707 match val {
712 EntryState::Normal => b'n',
708 EntryState::Normal => b'n',
713 EntryState::Added => b'a',
709 EntryState::Added => b'a',
714 EntryState::Removed => b'r',
710 EntryState::Removed => b'r',
715 EntryState::Merged => b'm',
711 EntryState::Merged => b'm',
716 }
712 }
717 }
713 }
718 }
714 }
719
715
720 const EXEC_BIT_MASK: u32 = 0o100;
716 const EXEC_BIT_MASK: u32 = 0o100;
721
717
722 pub fn has_exec_bit(metadata: &std::fs::Metadata) -> bool {
718 pub fn has_exec_bit(metadata: &std::fs::Metadata) -> bool {
723 // TODO: How to handle executable permissions on Windows?
719 // TODO: How to handle executable permissions on Windows?
724 use std::os::unix::fs::MetadataExt;
720 use std::os::unix::fs::MetadataExt;
725 (metadata.mode() & EXEC_BIT_MASK) != 0
721 (metadata.mode() & EXEC_BIT_MASK) != 0
726 }
722 }
@@ -1,1915 +1,1902 b''
1 use bytes_cast::BytesCast;
1 use bytes_cast::BytesCast;
2 use std::borrow::Cow;
2 use std::borrow::Cow;
3 use std::path::PathBuf;
3 use std::path::PathBuf;
4
4
5 use super::on_disk;
5 use super::on_disk;
6 use super::on_disk::DirstateV2ParseError;
6 use super::on_disk::DirstateV2ParseError;
7 use super::owning::OwningDirstateMap;
7 use super::owning::OwningDirstateMap;
8 use super::path_with_basename::WithBasename;
8 use super::path_with_basename::WithBasename;
9 use crate::dirstate::parsers::pack_entry;
9 use crate::dirstate::parsers::pack_entry;
10 use crate::dirstate::parsers::packed_entry_size;
10 use crate::dirstate::parsers::packed_entry_size;
11 use crate::dirstate::parsers::parse_dirstate_entries;
11 use crate::dirstate::parsers::parse_dirstate_entries;
12 use crate::dirstate::CopyMapIter;
12 use crate::dirstate::CopyMapIter;
13 use crate::dirstate::DirstateV2Data;
13 use crate::dirstate::DirstateV2Data;
14 use crate::dirstate::ParentFileData;
14 use crate::dirstate::ParentFileData;
15 use crate::dirstate::StateMapIter;
15 use crate::dirstate::StateMapIter;
16 use crate::dirstate::TruncatedTimestamp;
16 use crate::dirstate::TruncatedTimestamp;
17 use crate::matchers::Matcher;
17 use crate::matchers::Matcher;
18 use crate::utils::hg_path::{HgPath, HgPathBuf};
18 use crate::utils::hg_path::{HgPath, HgPathBuf};
19 use crate::DirstateEntry;
19 use crate::DirstateEntry;
20 use crate::DirstateError;
20 use crate::DirstateError;
21 use crate::DirstateMapError;
21 use crate::DirstateMapError;
22 use crate::DirstateParents;
22 use crate::DirstateParents;
23 use crate::DirstateStatus;
23 use crate::DirstateStatus;
24 use crate::FastHashbrownMap as FastHashMap;
24 use crate::FastHashbrownMap as FastHashMap;
25 use crate::PatternFileWarning;
25 use crate::PatternFileWarning;
26 use crate::StatusError;
26 use crate::StatusError;
27 use crate::StatusOptions;
27 use crate::StatusOptions;
28
28
29 /// Append to an existing data file if the amount of unreachable data (not used
29 /// Append to an existing data file if the amount of unreachable data (not used
30 /// anymore) is less than this fraction of the total amount of existing data.
30 /// anymore) is less than this fraction of the total amount of existing data.
31 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
31 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
32
32
33 #[derive(Debug, PartialEq, Eq)]
33 #[derive(Debug, PartialEq, Eq)]
34 /// Version of the on-disk format
34 /// Version of the on-disk format
35 pub enum DirstateVersion {
35 pub enum DirstateVersion {
36 V1,
36 V1,
37 V2,
37 V2,
38 }
38 }
39
39
40 #[derive(Debug)]
40 #[derive(Debug)]
41 pub struct DirstateMap<'on_disk> {
41 pub struct DirstateMap<'on_disk> {
42 /// Contents of the `.hg/dirstate` file
42 /// Contents of the `.hg/dirstate` file
43 pub(super) on_disk: &'on_disk [u8],
43 pub(super) on_disk: &'on_disk [u8],
44
44
45 pub(super) root: ChildNodes<'on_disk>,
45 pub(super) root: ChildNodes<'on_disk>,
46
46
47 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
47 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
48 pub(super) nodes_with_entry_count: u32,
48 pub(super) nodes_with_entry_count: u32,
49
49
50 /// Number of nodes anywhere in the tree that have
50 /// Number of nodes anywhere in the tree that have
51 /// `.copy_source.is_some()`.
51 /// `.copy_source.is_some()`.
52 pub(super) nodes_with_copy_source_count: u32,
52 pub(super) nodes_with_copy_source_count: u32,
53
53
54 /// See on_disk::Header
54 /// See on_disk::Header
55 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
55 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
56
56
57 /// How many bytes of `on_disk` are not used anymore
57 /// How many bytes of `on_disk` are not used anymore
58 pub(super) unreachable_bytes: u32,
58 pub(super) unreachable_bytes: u32,
59
59
60 /// Size of the data used to first load this `DirstateMap`. Used in case
60 /// Size of the data used to first load this `DirstateMap`. Used in case
61 /// we need to write some new metadata, but no new data on disk.
61 /// we need to write some new metadata, but no new data on disk.
62 pub(super) old_data_size: usize,
62 pub(super) old_data_size: usize,
63
63
64 pub(super) dirstate_version: DirstateVersion,
64 pub(super) dirstate_version: DirstateVersion,
65 }
65 }
66
66
67 /// Using a plain `HgPathBuf` of the full path from the repository root as a
67 /// Using a plain `HgPathBuf` of the full path from the repository root as a
68 /// map key would also work: all paths in a given map have the same parent
68 /// map key would also work: all paths in a given map have the same parent
69 /// path, so comparing full paths gives the same result as comparing base
69 /// path, so comparing full paths gives the same result as comparing base
70 /// names. However `HashMap` would waste time always re-hashing the same
70 /// names. However `HashMap` would waste time always re-hashing the same
71 /// string prefix.
71 /// string prefix.
72 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
72 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
73
73
74 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
74 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
75 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
75 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
76 #[derive(Debug)]
76 #[derive(Debug)]
77 pub(super) enum BorrowedPath<'tree, 'on_disk> {
77 pub(super) enum BorrowedPath<'tree, 'on_disk> {
78 InMemory(&'tree HgPathBuf),
78 InMemory(&'tree HgPathBuf),
79 OnDisk(&'on_disk HgPath),
79 OnDisk(&'on_disk HgPath),
80 }
80 }
81
81
82 #[derive(Debug)]
82 #[derive(Debug)]
83 pub(super) enum ChildNodes<'on_disk> {
83 pub(super) enum ChildNodes<'on_disk> {
84 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
84 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
85 OnDisk(&'on_disk [on_disk::Node]),
85 OnDisk(&'on_disk [on_disk::Node]),
86 }
86 }
87
87
88 #[derive(Debug)]
88 #[derive(Debug)]
89 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
89 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
90 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
90 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
91 OnDisk(&'on_disk [on_disk::Node]),
91 OnDisk(&'on_disk [on_disk::Node]),
92 }
92 }
93
93
94 #[derive(Debug)]
94 #[derive(Debug)]
95 pub(super) enum NodeRef<'tree, 'on_disk> {
95 pub(super) enum NodeRef<'tree, 'on_disk> {
96 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
96 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
97 OnDisk(&'on_disk on_disk::Node),
97 OnDisk(&'on_disk on_disk::Node),
98 }
98 }
99
99
100 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
100 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
101 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
101 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
102 match *self {
102 match *self {
103 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
103 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
104 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
104 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
105 }
105 }
106 }
106 }
107 }
107 }
108
108
109 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
109 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
110 type Target = HgPath;
110 type Target = HgPath;
111
111
112 fn deref(&self) -> &HgPath {
112 fn deref(&self) -> &HgPath {
113 match *self {
113 match *self {
114 BorrowedPath::InMemory(in_memory) => in_memory,
114 BorrowedPath::InMemory(in_memory) => in_memory,
115 BorrowedPath::OnDisk(on_disk) => on_disk,
115 BorrowedPath::OnDisk(on_disk) => on_disk,
116 }
116 }
117 }
117 }
118 }
118 }
119
119
120 impl Default for ChildNodes<'_> {
120 impl Default for ChildNodes<'_> {
121 fn default() -> Self {
121 fn default() -> Self {
122 ChildNodes::InMemory(Default::default())
122 ChildNodes::InMemory(Default::default())
123 }
123 }
124 }
124 }
125
125
126 impl<'on_disk> ChildNodes<'on_disk> {
126 impl<'on_disk> ChildNodes<'on_disk> {
127 pub(super) fn as_ref<'tree>(
127 pub(super) fn as_ref<'tree>(
128 &'tree self,
128 &'tree self,
129 ) -> ChildNodesRef<'tree, 'on_disk> {
129 ) -> ChildNodesRef<'tree, 'on_disk> {
130 match self {
130 match self {
131 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
131 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
132 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
132 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
133 }
133 }
134 }
134 }
135
135
136 pub(super) fn is_empty(&self) -> bool {
136 pub(super) fn is_empty(&self) -> bool {
137 match self {
137 match self {
138 ChildNodes::InMemory(nodes) => nodes.is_empty(),
138 ChildNodes::InMemory(nodes) => nodes.is_empty(),
139 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
139 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
140 }
140 }
141 }
141 }
142
142
143 fn make_mut(
143 fn make_mut(
144 &mut self,
144 &mut self,
145 on_disk: &'on_disk [u8],
145 on_disk: &'on_disk [u8],
146 unreachable_bytes: &mut u32,
146 unreachable_bytes: &mut u32,
147 ) -> Result<
147 ) -> Result<
148 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
148 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
149 DirstateV2ParseError,
149 DirstateV2ParseError,
150 > {
150 > {
151 match self {
151 match self {
152 ChildNodes::InMemory(nodes) => Ok(nodes),
152 ChildNodes::InMemory(nodes) => Ok(nodes),
153 ChildNodes::OnDisk(nodes) => {
153 ChildNodes::OnDisk(nodes) => {
154 *unreachable_bytes +=
154 *unreachable_bytes +=
155 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
155 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
156 let nodes = nodes
156 let nodes = nodes
157 .iter()
157 .iter()
158 .map(|node| {
158 .map(|node| {
159 Ok((
159 Ok((
160 node.path(on_disk)?,
160 node.path(on_disk)?,
161 node.to_in_memory_node(on_disk)?,
161 node.to_in_memory_node(on_disk)?,
162 ))
162 ))
163 })
163 })
164 .collect::<Result<_, _>>()?;
164 .collect::<Result<_, _>>()?;
165 *self = ChildNodes::InMemory(nodes);
165 *self = ChildNodes::InMemory(nodes);
166 match self {
166 match self {
167 ChildNodes::InMemory(nodes) => Ok(nodes),
167 ChildNodes::InMemory(nodes) => Ok(nodes),
168 ChildNodes::OnDisk(_) => unreachable!(),
168 ChildNodes::OnDisk(_) => unreachable!(),
169 }
169 }
170 }
170 }
171 }
171 }
172 }
172 }
173 }
173 }
174
174
175 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
175 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
176 pub(super) fn get(
176 pub(super) fn get(
177 &self,
177 &self,
178 base_name: &HgPath,
178 base_name: &HgPath,
179 on_disk: &'on_disk [u8],
179 on_disk: &'on_disk [u8],
180 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
180 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
181 match self {
181 match self {
182 ChildNodesRef::InMemory(nodes) => Ok(nodes
182 ChildNodesRef::InMemory(nodes) => Ok(nodes
183 .get_key_value(base_name)
183 .get_key_value(base_name)
184 .map(|(k, v)| NodeRef::InMemory(k, v))),
184 .map(|(k, v)| NodeRef::InMemory(k, v))),
185 ChildNodesRef::OnDisk(nodes) => {
185 ChildNodesRef::OnDisk(nodes) => {
186 let mut parse_result = Ok(());
186 let mut parse_result = Ok(());
187 let search_result = nodes.binary_search_by(|node| {
187 let search_result = nodes.binary_search_by(|node| {
188 match node.base_name(on_disk) {
188 match node.base_name(on_disk) {
189 Ok(node_base_name) => node_base_name.cmp(base_name),
189 Ok(node_base_name) => node_base_name.cmp(base_name),
190 Err(e) => {
190 Err(e) => {
191 parse_result = Err(e);
191 parse_result = Err(e);
192 // Dummy comparison result, `search_result` won’t
192 // Dummy comparison result, `search_result` won’t
193 // be used since `parse_result` is an error
193 // be used since `parse_result` is an error
194 std::cmp::Ordering::Equal
194 std::cmp::Ordering::Equal
195 }
195 }
196 }
196 }
197 });
197 });
198 parse_result.map(|()| {
198 parse_result.map(|()| {
199 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
199 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
200 })
200 })
201 }
201 }
202 }
202 }
203 }
203 }
204
204
205 /// Iterate in undefined order
205 /// Iterate in undefined order
206 pub(super) fn iter(
206 pub(super) fn iter(
207 &self,
207 &self,
208 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
208 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
209 match self {
209 match self {
210 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
210 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
211 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
211 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
212 ),
212 ),
213 ChildNodesRef::OnDisk(nodes) => {
213 ChildNodesRef::OnDisk(nodes) => {
214 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
214 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
215 }
215 }
216 }
216 }
217 }
217 }
218
218
219 /// Iterate in parallel in undefined order
219 /// Iterate in parallel in undefined order
220 pub(super) fn par_iter(
220 pub(super) fn par_iter(
221 &self,
221 &self,
222 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
222 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
223 {
223 {
224 use rayon::prelude::*;
224 use rayon::prelude::*;
225 match self {
225 match self {
226 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
226 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
227 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
227 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
228 ),
228 ),
229 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
229 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
230 nodes.par_iter().map(NodeRef::OnDisk),
230 nodes.par_iter().map(NodeRef::OnDisk),
231 ),
231 ),
232 }
232 }
233 }
233 }
234
234
235 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
235 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
236 match self {
236 match self {
237 ChildNodesRef::InMemory(nodes) => {
237 ChildNodesRef::InMemory(nodes) => {
238 let mut vec: Vec<_> = nodes
238 let mut vec: Vec<_> = nodes
239 .iter()
239 .iter()
240 .map(|(k, v)| NodeRef::InMemory(k, v))
240 .map(|(k, v)| NodeRef::InMemory(k, v))
241 .collect();
241 .collect();
242 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
242 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
243 match node {
243 match node {
244 NodeRef::InMemory(path, _node) => path.base_name(),
244 NodeRef::InMemory(path, _node) => path.base_name(),
245 NodeRef::OnDisk(_) => unreachable!(),
245 NodeRef::OnDisk(_) => unreachable!(),
246 }
246 }
247 }
247 }
248 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
248 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
249 // value: https://github.com/rust-lang/rust/issues/34162
249 // value: https://github.com/rust-lang/rust/issues/34162
250 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
250 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
251 vec
251 vec
252 }
252 }
253 ChildNodesRef::OnDisk(nodes) => {
253 ChildNodesRef::OnDisk(nodes) => {
254 // Nodes on disk are already sorted
254 // Nodes on disk are already sorted
255 nodes.iter().map(NodeRef::OnDisk).collect()
255 nodes.iter().map(NodeRef::OnDisk).collect()
256 }
256 }
257 }
257 }
258 }
258 }
259 }
259 }
260
260
261 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
261 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
262 pub(super) fn full_path(
262 pub(super) fn full_path(
263 &self,
263 &self,
264 on_disk: &'on_disk [u8],
264 on_disk: &'on_disk [u8],
265 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
265 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
266 match self {
266 match self {
267 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
267 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
268 NodeRef::OnDisk(node) => node.full_path(on_disk),
268 NodeRef::OnDisk(node) => node.full_path(on_disk),
269 }
269 }
270 }
270 }
271
271
272 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
272 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
273 /// HgPath>` detached from `'tree`
273 /// HgPath>` detached from `'tree`
274 pub(super) fn full_path_borrowed(
274 pub(super) fn full_path_borrowed(
275 &self,
275 &self,
276 on_disk: &'on_disk [u8],
276 on_disk: &'on_disk [u8],
277 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
277 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
278 match self {
278 match self {
279 NodeRef::InMemory(path, _node) => match path.full_path() {
279 NodeRef::InMemory(path, _node) => match path.full_path() {
280 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
280 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
281 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
281 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
282 },
282 },
283 NodeRef::OnDisk(node) => {
283 NodeRef::OnDisk(node) => {
284 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
284 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
285 }
285 }
286 }
286 }
287 }
287 }
288
288
289 pub(super) fn base_name(
289 pub(super) fn base_name(
290 &self,
290 &self,
291 on_disk: &'on_disk [u8],
291 on_disk: &'on_disk [u8],
292 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
292 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
293 match self {
293 match self {
294 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
294 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
295 NodeRef::OnDisk(node) => node.base_name(on_disk),
295 NodeRef::OnDisk(node) => node.base_name(on_disk),
296 }
296 }
297 }
297 }
298
298
299 pub(super) fn children(
299 pub(super) fn children(
300 &self,
300 &self,
301 on_disk: &'on_disk [u8],
301 on_disk: &'on_disk [u8],
302 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
302 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
303 match self {
303 match self {
304 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
304 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
305 NodeRef::OnDisk(node) => {
305 NodeRef::OnDisk(node) => {
306 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
306 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
307 }
307 }
308 }
308 }
309 }
309 }
310
310
311 pub(super) fn has_copy_source(&self) -> bool {
311 pub(super) fn has_copy_source(&self) -> bool {
312 match self {
312 match self {
313 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
313 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
314 NodeRef::OnDisk(node) => node.has_copy_source(),
314 NodeRef::OnDisk(node) => node.has_copy_source(),
315 }
315 }
316 }
316 }
317
317
318 pub(super) fn copy_source(
318 pub(super) fn copy_source(
319 &self,
319 &self,
320 on_disk: &'on_disk [u8],
320 on_disk: &'on_disk [u8],
321 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
321 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
322 match self {
322 match self {
323 NodeRef::InMemory(_path, node) => {
323 NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()),
324 Ok(node.copy_source.as_ref().map(|s| &**s))
325 }
326 NodeRef::OnDisk(node) => node.copy_source(on_disk),
324 NodeRef::OnDisk(node) => node.copy_source(on_disk),
327 }
325 }
328 }
326 }
329 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
327 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
330 /// HgPath>` detached from `'tree`
328 /// HgPath>` detached from `'tree`
331 pub(super) fn copy_source_borrowed(
329 pub(super) fn copy_source_borrowed(
332 &self,
330 &self,
333 on_disk: &'on_disk [u8],
331 on_disk: &'on_disk [u8],
334 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
332 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
335 {
333 {
336 Ok(match self {
334 Ok(match self {
337 NodeRef::InMemory(_path, node) => {
335 NodeRef::InMemory(_path, node) => {
338 node.copy_source.as_ref().map(|source| match source {
336 node.copy_source.as_ref().map(|source| match source {
339 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
337 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
340 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
338 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
341 })
339 })
342 }
340 }
343 NodeRef::OnDisk(node) => node
341 NodeRef::OnDisk(node) => {
344 .copy_source(on_disk)?
342 node.copy_source(on_disk)?.map(BorrowedPath::OnDisk)
345 .map(|source| BorrowedPath::OnDisk(source)),
343 }
346 })
344 })
347 }
345 }
348
346
349 pub(super) fn entry(
347 pub(super) fn entry(
350 &self,
348 &self,
351 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
349 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
352 match self {
350 match self {
353 NodeRef::InMemory(_path, node) => {
351 NodeRef::InMemory(_path, node) => {
354 Ok(node.data.as_entry().copied())
352 Ok(node.data.as_entry().copied())
355 }
353 }
356 NodeRef::OnDisk(node) => node.entry(),
354 NodeRef::OnDisk(node) => node.entry(),
357 }
355 }
358 }
356 }
359
357
360 pub(super) fn cached_directory_mtime(
358 pub(super) fn cached_directory_mtime(
361 &self,
359 &self,
362 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
360 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
363 match self {
361 match self {
364 NodeRef::InMemory(_path, node) => Ok(match node.data {
362 NodeRef::InMemory(_path, node) => Ok(match node.data {
365 NodeData::CachedDirectory { mtime } => Some(mtime),
363 NodeData::CachedDirectory { mtime } => Some(mtime),
366 _ => None,
364 _ => None,
367 }),
365 }),
368 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
366 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
369 }
367 }
370 }
368 }
371
369
372 pub(super) fn descendants_with_entry_count(&self) -> u32 {
370 pub(super) fn descendants_with_entry_count(&self) -> u32 {
373 match self {
371 match self {
374 NodeRef::InMemory(_path, node) => {
372 NodeRef::InMemory(_path, node) => {
375 node.descendants_with_entry_count
373 node.descendants_with_entry_count
376 }
374 }
377 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
375 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
378 }
376 }
379 }
377 }
380
378
381 pub(super) fn tracked_descendants_count(&self) -> u32 {
379 pub(super) fn tracked_descendants_count(&self) -> u32 {
382 match self {
380 match self {
383 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
381 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
384 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
382 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
385 }
383 }
386 }
384 }
387 }
385 }
388
386
389 /// Represents a file or a directory
387 /// Represents a file or a directory
390 #[derive(Default, Debug)]
388 #[derive(Default, Debug)]
391 pub(super) struct Node<'on_disk> {
389 pub(super) struct Node<'on_disk> {
392 pub(super) data: NodeData,
390 pub(super) data: NodeData,
393
391
394 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
392 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
395
393
396 pub(super) children: ChildNodes<'on_disk>,
394 pub(super) children: ChildNodes<'on_disk>,
397
395
398 /// How many (non-inclusive) descendants of this node have an entry.
396 /// How many (non-inclusive) descendants of this node have an entry.
399 pub(super) descendants_with_entry_count: u32,
397 pub(super) descendants_with_entry_count: u32,
400
398
401 /// How many (non-inclusive) descendants of this node have an entry whose
399 /// How many (non-inclusive) descendants of this node have an entry whose
402 /// state is "tracked".
400 /// state is "tracked".
403 pub(super) tracked_descendants_count: u32,
401 pub(super) tracked_descendants_count: u32,
404 }
402 }
405
403
406 #[derive(Debug)]
404 #[derive(Debug)]
407 pub(super) enum NodeData {
405 pub(super) enum NodeData {
408 Entry(DirstateEntry),
406 Entry(DirstateEntry),
409 CachedDirectory { mtime: TruncatedTimestamp },
407 CachedDirectory { mtime: TruncatedTimestamp },
410 None,
408 None,
411 }
409 }
412
410
413 impl Default for NodeData {
411 impl Default for NodeData {
414 fn default() -> Self {
412 fn default() -> Self {
415 NodeData::None
413 NodeData::None
416 }
414 }
417 }
415 }
418
416
419 impl NodeData {
417 impl NodeData {
420 fn has_entry(&self) -> bool {
418 fn has_entry(&self) -> bool {
421 match self {
419 matches!(self, NodeData::Entry(_))
422 NodeData::Entry(_) => true,
423 _ => false,
424 }
425 }
420 }
426
421
427 fn as_entry(&self) -> Option<&DirstateEntry> {
422 fn as_entry(&self) -> Option<&DirstateEntry> {
428 match self {
423 match self {
429 NodeData::Entry(entry) => Some(entry),
424 NodeData::Entry(entry) => Some(entry),
430 _ => None,
425 _ => None,
431 }
426 }
432 }
427 }
433
428
434 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
429 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
435 match self {
430 match self {
436 NodeData::Entry(entry) => Some(entry),
431 NodeData::Entry(entry) => Some(entry),
437 _ => None,
432 _ => None,
438 }
433 }
439 }
434 }
440 }
435 }
441
436
442 impl<'on_disk> DirstateMap<'on_disk> {
437 impl<'on_disk> DirstateMap<'on_disk> {
443 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
438 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
444 Self {
439 Self {
445 on_disk,
440 on_disk,
446 root: ChildNodes::default(),
441 root: ChildNodes::default(),
447 nodes_with_entry_count: 0,
442 nodes_with_entry_count: 0,
448 nodes_with_copy_source_count: 0,
443 nodes_with_copy_source_count: 0,
449 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
444 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
450 unreachable_bytes: 0,
445 unreachable_bytes: 0,
451 old_data_size: 0,
446 old_data_size: 0,
452 dirstate_version: DirstateVersion::V1,
447 dirstate_version: DirstateVersion::V1,
453 }
448 }
454 }
449 }
455
450
456 #[logging_timer::time("trace")]
451 #[logging_timer::time("trace")]
457 pub fn new_v2(
452 pub fn new_v2(
458 on_disk: &'on_disk [u8],
453 on_disk: &'on_disk [u8],
459 data_size: usize,
454 data_size: usize,
460 metadata: &[u8],
455 metadata: &[u8],
461 ) -> Result<Self, DirstateError> {
456 ) -> Result<Self, DirstateError> {
462 if let Some(data) = on_disk.get(..data_size) {
457 if let Some(data) = on_disk.get(..data_size) {
463 Ok(on_disk::read(data, metadata)?)
458 Ok(on_disk::read(data, metadata)?)
464 } else {
459 } else {
465 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
460 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
466 }
461 }
467 }
462 }
468
463
469 #[logging_timer::time("trace")]
464 #[logging_timer::time("trace")]
470 pub fn new_v1(
465 pub fn new_v1(
471 on_disk: &'on_disk [u8],
466 on_disk: &'on_disk [u8],
472 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
467 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
473 let mut map = Self::empty(on_disk);
468 let mut map = Self::empty(on_disk);
474 if map.on_disk.is_empty() {
469 if map.on_disk.is_empty() {
475 return Ok((map, None));
470 return Ok((map, None));
476 }
471 }
477
472
478 let parents = parse_dirstate_entries(
473 let parents = parse_dirstate_entries(
479 map.on_disk,
474 map.on_disk,
480 |path, entry, copy_source| {
475 |path, entry, copy_source| {
481 let tracked = entry.tracked();
476 let tracked = entry.tracked();
482 let node = Self::get_or_insert_node_inner(
477 let node = Self::get_or_insert_node_inner(
483 map.on_disk,
478 map.on_disk,
484 &mut map.unreachable_bytes,
479 &mut map.unreachable_bytes,
485 &mut map.root,
480 &mut map.root,
486 path,
481 path,
487 WithBasename::to_cow_borrowed,
482 WithBasename::to_cow_borrowed,
488 |ancestor| {
483 |ancestor| {
489 if tracked {
484 if tracked {
490 ancestor.tracked_descendants_count += 1
485 ancestor.tracked_descendants_count += 1
491 }
486 }
492 ancestor.descendants_with_entry_count += 1
487 ancestor.descendants_with_entry_count += 1
493 },
488 },
494 )?;
489 )?;
495 assert!(
490 assert!(
496 !node.data.has_entry(),
491 !node.data.has_entry(),
497 "duplicate dirstate entry in read"
492 "duplicate dirstate entry in read"
498 );
493 );
499 assert!(
494 assert!(
500 node.copy_source.is_none(),
495 node.copy_source.is_none(),
501 "duplicate dirstate entry in read"
496 "duplicate dirstate entry in read"
502 );
497 );
503 node.data = NodeData::Entry(*entry);
498 node.data = NodeData::Entry(*entry);
504 node.copy_source = copy_source.map(Cow::Borrowed);
499 node.copy_source = copy_source.map(Cow::Borrowed);
505 map.nodes_with_entry_count += 1;
500 map.nodes_with_entry_count += 1;
506 if copy_source.is_some() {
501 if copy_source.is_some() {
507 map.nodes_with_copy_source_count += 1
502 map.nodes_with_copy_source_count += 1
508 }
503 }
509 Ok(())
504 Ok(())
510 },
505 },
511 )?;
506 )?;
512 let parents = Some(parents.clone());
507 let parents = Some(*parents);
513
508
514 Ok((map, parents))
509 Ok((map, parents))
515 }
510 }
516
511
517 /// Assuming dirstate-v2 format, returns whether the next write should
512 /// Assuming dirstate-v2 format, returns whether the next write should
518 /// append to the existing data file that contains `self.on_disk` (true),
513 /// append to the existing data file that contains `self.on_disk` (true),
519 /// or create a new data file from scratch (false).
514 /// or create a new data file from scratch (false).
520 pub(super) fn write_should_append(&self) -> bool {
515 pub(super) fn write_should_append(&self) -> bool {
521 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
516 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
522 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
517 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
523 }
518 }
524
519
525 fn get_node<'tree>(
520 fn get_node<'tree>(
526 &'tree self,
521 &'tree self,
527 path: &HgPath,
522 path: &HgPath,
528 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
523 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
529 let mut children = self.root.as_ref();
524 let mut children = self.root.as_ref();
530 let mut components = path.components();
525 let mut components = path.components();
531 let mut component =
526 let mut component =
532 components.next().expect("expected at least one components");
527 components.next().expect("expected at least one components");
533 loop {
528 loop {
534 if let Some(child) = children.get(component, self.on_disk)? {
529 if let Some(child) = children.get(component, self.on_disk)? {
535 if let Some(next_component) = components.next() {
530 if let Some(next_component) = components.next() {
536 component = next_component;
531 component = next_component;
537 children = child.children(self.on_disk)?;
532 children = child.children(self.on_disk)?;
538 } else {
533 } else {
539 return Ok(Some(child));
534 return Ok(Some(child));
540 }
535 }
541 } else {
536 } else {
542 return Ok(None);
537 return Ok(None);
543 }
538 }
544 }
539 }
545 }
540 }
546
541
547 /// Returns a mutable reference to the node at `path` if it exists
542 /// Returns a mutable reference to the node at `path` if it exists
548 ///
543 ///
549 /// `each_ancestor` is a callback that is called for each ancestor node
544 /// `each_ancestor` is a callback that is called for each ancestor node
550 /// when descending the tree. It is used to keep the different counters
545 /// when descending the tree. It is used to keep the different counters
551 /// of the `DirstateMap` up-to-date.
546 /// of the `DirstateMap` up-to-date.
552 fn get_node_mut<'tree>(
547 fn get_node_mut<'tree>(
553 &'tree mut self,
548 &'tree mut self,
554 path: &HgPath,
549 path: &HgPath,
555 each_ancestor: impl FnMut(&mut Node),
550 each_ancestor: impl FnMut(&mut Node),
556 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
551 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
557 Self::get_node_mut_inner(
552 Self::get_node_mut_inner(
558 self.on_disk,
553 self.on_disk,
559 &mut self.unreachable_bytes,
554 &mut self.unreachable_bytes,
560 &mut self.root,
555 &mut self.root,
561 path,
556 path,
562 each_ancestor,
557 each_ancestor,
563 )
558 )
564 }
559 }
565
560
566 /// Lower-level version of `get_node_mut`.
561 /// Lower-level version of `get_node_mut`.
567 ///
562 ///
568 /// This takes `root` instead of `&mut self` so that callers can mutate
563 /// This takes `root` instead of `&mut self` so that callers can mutate
569 /// other fields while the returned borrow is still valid.
564 /// other fields while the returned borrow is still valid.
570 ///
565 ///
571 /// `each_ancestor` is a callback that is called for each ancestor node
566 /// `each_ancestor` is a callback that is called for each ancestor node
572 /// when descending the tree. It is used to keep the different counters
567 /// when descending the tree. It is used to keep the different counters
573 /// of the `DirstateMap` up-to-date.
568 /// of the `DirstateMap` up-to-date.
574 fn get_node_mut_inner<'tree>(
569 fn get_node_mut_inner<'tree>(
575 on_disk: &'on_disk [u8],
570 on_disk: &'on_disk [u8],
576 unreachable_bytes: &mut u32,
571 unreachable_bytes: &mut u32,
577 root: &'tree mut ChildNodes<'on_disk>,
572 root: &'tree mut ChildNodes<'on_disk>,
578 path: &HgPath,
573 path: &HgPath,
579 mut each_ancestor: impl FnMut(&mut Node),
574 mut each_ancestor: impl FnMut(&mut Node),
580 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
575 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
581 let mut children = root;
576 let mut children = root;
582 let mut components = path.components();
577 let mut components = path.components();
583 let mut component =
578 let mut component =
584 components.next().expect("expected at least one components");
579 components.next().expect("expected at least one components");
585 loop {
580 loop {
586 if let Some(child) = children
581 if let Some(child) = children
587 .make_mut(on_disk, unreachable_bytes)?
582 .make_mut(on_disk, unreachable_bytes)?
588 .get_mut(component)
583 .get_mut(component)
589 {
584 {
590 if let Some(next_component) = components.next() {
585 if let Some(next_component) = components.next() {
591 each_ancestor(child);
586 each_ancestor(child);
592 component = next_component;
587 component = next_component;
593 children = &mut child.children;
588 children = &mut child.children;
594 } else {
589 } else {
595 return Ok(Some(child));
590 return Ok(Some(child));
596 }
591 }
597 } else {
592 } else {
598 return Ok(None);
593 return Ok(None);
599 }
594 }
600 }
595 }
601 }
596 }
602
597
603 /// Get a mutable reference to the node at `path`, creating it if it does
598 /// Get a mutable reference to the node at `path`, creating it if it does
604 /// not exist.
599 /// not exist.
605 ///
600 ///
606 /// `each_ancestor` is a callback that is called for each ancestor node
601 /// `each_ancestor` is a callback that is called for each ancestor node
607 /// when descending the tree. It is used to keep the different counters
602 /// when descending the tree. It is used to keep the different counters
608 /// of the `DirstateMap` up-to-date.
603 /// of the `DirstateMap` up-to-date.
609 fn get_or_insert_node<'tree, 'path>(
604 fn get_or_insert_node<'tree, 'path>(
610 &'tree mut self,
605 &'tree mut self,
611 path: &'path HgPath,
606 path: &'path HgPath,
612 each_ancestor: impl FnMut(&mut Node),
607 each_ancestor: impl FnMut(&mut Node),
613 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
608 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
614 Self::get_or_insert_node_inner(
609 Self::get_or_insert_node_inner(
615 self.on_disk,
610 self.on_disk,
616 &mut self.unreachable_bytes,
611 &mut self.unreachable_bytes,
617 &mut self.root,
612 &mut self.root,
618 path,
613 path,
619 WithBasename::to_cow_owned,
614 WithBasename::to_cow_owned,
620 each_ancestor,
615 each_ancestor,
621 )
616 )
622 }
617 }
623
618
624 /// Lower-level version of `get_or_insert_node_inner`, which is used when
619 /// Lower-level version of `get_or_insert_node_inner`, which is used when
625 /// parsing disk data to remove allocations for new nodes.
620 /// parsing disk data to remove allocations for new nodes.
626 fn get_or_insert_node_inner<'tree, 'path>(
621 fn get_or_insert_node_inner<'tree, 'path>(
627 on_disk: &'on_disk [u8],
622 on_disk: &'on_disk [u8],
628 unreachable_bytes: &mut u32,
623 unreachable_bytes: &mut u32,
629 root: &'tree mut ChildNodes<'on_disk>,
624 root: &'tree mut ChildNodes<'on_disk>,
630 path: &'path HgPath,
625 path: &'path HgPath,
631 to_cow: impl Fn(
626 to_cow: impl Fn(
632 WithBasename<&'path HgPath>,
627 WithBasename<&'path HgPath>,
633 ) -> WithBasename<Cow<'on_disk, HgPath>>,
628 ) -> WithBasename<Cow<'on_disk, HgPath>>,
634 mut each_ancestor: impl FnMut(&mut Node),
629 mut each_ancestor: impl FnMut(&mut Node),
635 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
630 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
636 let mut child_nodes = root;
631 let mut child_nodes = root;
637 let mut inclusive_ancestor_paths =
632 let mut inclusive_ancestor_paths =
638 WithBasename::inclusive_ancestors_of(path);
633 WithBasename::inclusive_ancestors_of(path);
639 let mut ancestor_path = inclusive_ancestor_paths
634 let mut ancestor_path = inclusive_ancestor_paths
640 .next()
635 .next()
641 .expect("expected at least one inclusive ancestor");
636 .expect("expected at least one inclusive ancestor");
642 loop {
637 loop {
643 let (_, child_node) = child_nodes
638 let (_, child_node) = child_nodes
644 .make_mut(on_disk, unreachable_bytes)?
639 .make_mut(on_disk, unreachable_bytes)?
645 .raw_entry_mut()
640 .raw_entry_mut()
646 .from_key(ancestor_path.base_name())
641 .from_key(ancestor_path.base_name())
647 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
642 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
648 if let Some(next) = inclusive_ancestor_paths.next() {
643 if let Some(next) = inclusive_ancestor_paths.next() {
649 each_ancestor(child_node);
644 each_ancestor(child_node);
650 ancestor_path = next;
645 ancestor_path = next;
651 child_nodes = &mut child_node.children;
646 child_nodes = &mut child_node.children;
652 } else {
647 } else {
653 return Ok(child_node);
648 return Ok(child_node);
654 }
649 }
655 }
650 }
656 }
651 }
657
652
658 #[allow(clippy::too_many_arguments)]
653 #[allow(clippy::too_many_arguments)]
659 fn reset_state(
654 fn reset_state(
660 &mut self,
655 &mut self,
661 filename: &HgPath,
656 filename: &HgPath,
662 old_entry_opt: Option<DirstateEntry>,
657 old_entry_opt: Option<DirstateEntry>,
663 wc_tracked: bool,
658 wc_tracked: bool,
664 p1_tracked: bool,
659 p1_tracked: bool,
665 p2_info: bool,
660 p2_info: bool,
666 has_meaningful_mtime: bool,
661 has_meaningful_mtime: bool,
667 parent_file_data_opt: Option<ParentFileData>,
662 parent_file_data_opt: Option<ParentFileData>,
668 ) -> Result<(), DirstateError> {
663 ) -> Result<(), DirstateError> {
669 let (had_entry, was_tracked) = match old_entry_opt {
664 let (had_entry, was_tracked) = match old_entry_opt {
670 Some(old_entry) => (true, old_entry.tracked()),
665 Some(old_entry) => (true, old_entry.tracked()),
671 None => (false, false),
666 None => (false, false),
672 };
667 };
673 let node = self.get_or_insert_node(filename, |ancestor| {
668 let node = self.get_or_insert_node(filename, |ancestor| {
674 if !had_entry {
669 if !had_entry {
675 ancestor.descendants_with_entry_count += 1;
670 ancestor.descendants_with_entry_count += 1;
676 }
671 }
677 if was_tracked {
672 if was_tracked {
678 if !wc_tracked {
673 if !wc_tracked {
679 ancestor.tracked_descendants_count = ancestor
674 ancestor.tracked_descendants_count = ancestor
680 .tracked_descendants_count
675 .tracked_descendants_count
681 .checked_sub(1)
676 .checked_sub(1)
682 .expect("tracked count to be >= 0");
677 .expect("tracked count to be >= 0");
683 }
678 }
684 } else {
679 } else if wc_tracked {
685 if wc_tracked {
680 ancestor.tracked_descendants_count += 1;
686 ancestor.tracked_descendants_count += 1;
687 }
688 }
681 }
689 })?;
682 })?;
690
683
691 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
684 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
692 DirstateV2Data {
685 DirstateV2Data {
693 wc_tracked,
686 wc_tracked,
694 p1_tracked,
687 p1_tracked,
695 p2_info,
688 p2_info,
696 mode_size: parent_file_data.mode_size,
689 mode_size: parent_file_data.mode_size,
697 mtime: if has_meaningful_mtime {
690 mtime: if has_meaningful_mtime {
698 parent_file_data.mtime
691 parent_file_data.mtime
699 } else {
692 } else {
700 None
693 None
701 },
694 },
702 ..Default::default()
695 ..Default::default()
703 }
696 }
704 } else {
697 } else {
705 DirstateV2Data {
698 DirstateV2Data {
706 wc_tracked,
699 wc_tracked,
707 p1_tracked,
700 p1_tracked,
708 p2_info,
701 p2_info,
709 ..Default::default()
702 ..Default::default()
710 }
703 }
711 };
704 };
712 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
705 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
713 if !had_entry {
706 if !had_entry {
714 self.nodes_with_entry_count += 1;
707 self.nodes_with_entry_count += 1;
715 }
708 }
716 Ok(())
709 Ok(())
717 }
710 }
718
711
719 fn set_tracked(
712 fn set_tracked(
720 &mut self,
713 &mut self,
721 filename: &HgPath,
714 filename: &HgPath,
722 old_entry_opt: Option<DirstateEntry>,
715 old_entry_opt: Option<DirstateEntry>,
723 ) -> Result<bool, DirstateV2ParseError> {
716 ) -> Result<bool, DirstateV2ParseError> {
724 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
717 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
725 let had_entry = old_entry_opt.is_some();
718 let had_entry = old_entry_opt.is_some();
726 let tracked_count_increment = if was_tracked { 0 } else { 1 };
719 let tracked_count_increment = if was_tracked { 0 } else { 1 };
727 let mut new = false;
720 let mut new = false;
728
721
729 let node = self.get_or_insert_node(filename, |ancestor| {
722 let node = self.get_or_insert_node(filename, |ancestor| {
730 if !had_entry {
723 if !had_entry {
731 ancestor.descendants_with_entry_count += 1;
724 ancestor.descendants_with_entry_count += 1;
732 }
725 }
733
726
734 ancestor.tracked_descendants_count += tracked_count_increment;
727 ancestor.tracked_descendants_count += tracked_count_increment;
735 })?;
728 })?;
736 if let Some(old_entry) = old_entry_opt {
729 if let Some(old_entry) = old_entry_opt {
737 let mut e = old_entry.clone();
730 let mut e = old_entry;
738 if e.tracked() {
731 if e.tracked() {
739 // XXX
732 // XXX
740 // This is probably overkill for more case, but we need this to
733 // This is probably overkill for more case, but we need this to
741 // fully replace the `normallookup` call with `set_tracked`
734 // fully replace the `normallookup` call with `set_tracked`
742 // one. Consider smoothing this in the future.
735 // one. Consider smoothing this in the future.
743 e.set_possibly_dirty();
736 e.set_possibly_dirty();
744 } else {
737 } else {
745 new = true;
738 new = true;
746 e.set_tracked();
739 e.set_tracked();
747 }
740 }
748 node.data = NodeData::Entry(e)
741 node.data = NodeData::Entry(e)
749 } else {
742 } else {
750 node.data = NodeData::Entry(DirstateEntry::new_tracked());
743 node.data = NodeData::Entry(DirstateEntry::new_tracked());
751 self.nodes_with_entry_count += 1;
744 self.nodes_with_entry_count += 1;
752 new = true;
745 new = true;
753 };
746 };
754 Ok(new)
747 Ok(new)
755 }
748 }
756
749
757 /// Set a node as untracked in the dirstate.
750 /// Set a node as untracked in the dirstate.
758 ///
751 ///
759 /// It is the responsibility of the caller to remove the copy source and/or
752 /// It is the responsibility of the caller to remove the copy source and/or
760 /// the entry itself if appropriate.
753 /// the entry itself if appropriate.
761 ///
754 ///
762 /// # Panics
755 /// # Panics
763 ///
756 ///
764 /// Panics if the node does not exist.
757 /// Panics if the node does not exist.
765 fn set_untracked(
758 fn set_untracked(
766 &mut self,
759 &mut self,
767 filename: &HgPath,
760 filename: &HgPath,
768 old_entry: DirstateEntry,
761 old_entry: DirstateEntry,
769 ) -> Result<(), DirstateV2ParseError> {
762 ) -> Result<(), DirstateV2ParseError> {
770 let node = self
763 let node = self
771 .get_node_mut(filename, |ancestor| {
764 .get_node_mut(filename, |ancestor| {
772 ancestor.tracked_descendants_count = ancestor
765 ancestor.tracked_descendants_count = ancestor
773 .tracked_descendants_count
766 .tracked_descendants_count
774 .checked_sub(1)
767 .checked_sub(1)
775 .expect("tracked_descendants_count should be >= 0");
768 .expect("tracked_descendants_count should be >= 0");
776 })?
769 })?
777 .expect("node should exist");
770 .expect("node should exist");
778 let mut new_entry = old_entry.clone();
771 let mut new_entry = old_entry;
779 new_entry.set_untracked();
772 new_entry.set_untracked();
780 node.data = NodeData::Entry(new_entry);
773 node.data = NodeData::Entry(new_entry);
781 Ok(())
774 Ok(())
782 }
775 }
783
776
784 /// Set a node as clean in the dirstate.
777 /// Set a node as clean in the dirstate.
785 ///
778 ///
786 /// It is the responsibility of the caller to remove the copy source.
779 /// It is the responsibility of the caller to remove the copy source.
787 ///
780 ///
788 /// # Panics
781 /// # Panics
789 ///
782 ///
790 /// Panics if the node does not exist.
783 /// Panics if the node does not exist.
791 fn set_clean(
784 fn set_clean(
792 &mut self,
785 &mut self,
793 filename: &HgPath,
786 filename: &HgPath,
794 old_entry: DirstateEntry,
787 old_entry: DirstateEntry,
795 mode: u32,
788 mode: u32,
796 size: u32,
789 size: u32,
797 mtime: TruncatedTimestamp,
790 mtime: TruncatedTimestamp,
798 ) -> Result<(), DirstateError> {
791 ) -> Result<(), DirstateError> {
799 let node = self
792 let node = self
800 .get_node_mut(filename, |ancestor| {
793 .get_node_mut(filename, |ancestor| {
801 if !old_entry.tracked() {
794 if !old_entry.tracked() {
802 ancestor.tracked_descendants_count += 1;
795 ancestor.tracked_descendants_count += 1;
803 }
796 }
804 })?
797 })?
805 .expect("node should exist");
798 .expect("node should exist");
806 let mut new_entry = old_entry.clone();
799 let mut new_entry = old_entry;
807 new_entry.set_clean(mode, size, mtime);
800 new_entry.set_clean(mode, size, mtime);
808 node.data = NodeData::Entry(new_entry);
801 node.data = NodeData::Entry(new_entry);
809 Ok(())
802 Ok(())
810 }
803 }
811
804
812 /// Set a node as possibly dirty in the dirstate.
805 /// Set a node as possibly dirty in the dirstate.
813 ///
806 ///
814 /// # Panics
807 /// # Panics
815 ///
808 ///
816 /// Panics if the node does not exist.
809 /// Panics if the node does not exist.
817 fn set_possibly_dirty(
810 fn set_possibly_dirty(
818 &mut self,
811 &mut self,
819 filename: &HgPath,
812 filename: &HgPath,
820 ) -> Result<(), DirstateError> {
813 ) -> Result<(), DirstateError> {
821 let node = self
814 let node = self
822 .get_node_mut(filename, |_ancestor| {})?
815 .get_node_mut(filename, |_ancestor| {})?
823 .expect("node should exist");
816 .expect("node should exist");
824 let entry = node.data.as_entry_mut().expect("entry should exist");
817 let entry = node.data.as_entry_mut().expect("entry should exist");
825 entry.set_possibly_dirty();
818 entry.set_possibly_dirty();
826 node.data = NodeData::Entry(*entry);
819 node.data = NodeData::Entry(*entry);
827 Ok(())
820 Ok(())
828 }
821 }
829
822
830 /// Clears the cached mtime for the (potential) folder at `path`.
823 /// Clears the cached mtime for the (potential) folder at `path`.
831 pub(super) fn clear_cached_mtime(
824 pub(super) fn clear_cached_mtime(
832 &mut self,
825 &mut self,
833 path: &HgPath,
826 path: &HgPath,
834 ) -> Result<(), DirstateV2ParseError> {
827 ) -> Result<(), DirstateV2ParseError> {
835 let node = match self.get_node_mut(path, |_ancestor| {})? {
828 let node = match self.get_node_mut(path, |_ancestor| {})? {
836 Some(node) => node,
829 Some(node) => node,
837 None => return Ok(()),
830 None => return Ok(()),
838 };
831 };
839 if let NodeData::CachedDirectory { .. } = &node.data {
832 if let NodeData::CachedDirectory { .. } = &node.data {
840 node.data = NodeData::None
833 node.data = NodeData::None
841 }
834 }
842 Ok(())
835 Ok(())
843 }
836 }
844
837
845 /// Sets the cached mtime for the (potential) folder at `path`.
838 /// Sets the cached mtime for the (potential) folder at `path`.
846 pub(super) fn set_cached_mtime(
839 pub(super) fn set_cached_mtime(
847 &mut self,
840 &mut self,
848 path: &HgPath,
841 path: &HgPath,
849 mtime: TruncatedTimestamp,
842 mtime: TruncatedTimestamp,
850 ) -> Result<(), DirstateV2ParseError> {
843 ) -> Result<(), DirstateV2ParseError> {
851 let node = match self.get_node_mut(path, |_ancestor| {})? {
844 let node = match self.get_node_mut(path, |_ancestor| {})? {
852 Some(node) => node,
845 Some(node) => node,
853 None => return Ok(()),
846 None => return Ok(()),
854 };
847 };
855 match &node.data {
848 match &node.data {
856 NodeData::Entry(_) => {} // Don’t overwrite an entry
849 NodeData::Entry(_) => {} // Don’t overwrite an entry
857 NodeData::CachedDirectory { .. } | NodeData::None => {
850 NodeData::CachedDirectory { .. } | NodeData::None => {
858 node.data = NodeData::CachedDirectory { mtime }
851 node.data = NodeData::CachedDirectory { mtime }
859 }
852 }
860 }
853 }
861 Ok(())
854 Ok(())
862 }
855 }
863
856
864 fn iter_nodes<'tree>(
857 fn iter_nodes<'tree>(
865 &'tree self,
858 &'tree self,
866 ) -> impl Iterator<
859 ) -> impl Iterator<
867 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
860 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
868 > + 'tree {
861 > + 'tree {
869 // Depth first tree traversal.
862 // Depth first tree traversal.
870 //
863 //
871 // If we could afford internal iteration and recursion,
864 // If we could afford internal iteration and recursion,
872 // this would look like:
865 // this would look like:
873 //
866 //
874 // ```
867 // ```
875 // fn traverse_children(
868 // fn traverse_children(
876 // children: &ChildNodes,
869 // children: &ChildNodes,
877 // each: &mut impl FnMut(&Node),
870 // each: &mut impl FnMut(&Node),
878 // ) {
871 // ) {
879 // for child in children.values() {
872 // for child in children.values() {
880 // traverse_children(&child.children, each);
873 // traverse_children(&child.children, each);
881 // each(child);
874 // each(child);
882 // }
875 // }
883 // }
876 // }
884 // ```
877 // ```
885 //
878 //
886 // However we want an external iterator and therefore can’t use the
879 // However we want an external iterator and therefore can’t use the
887 // call stack. Use an explicit stack instead:
880 // call stack. Use an explicit stack instead:
888 let mut stack = Vec::new();
881 let mut stack = Vec::new();
889 let mut iter = self.root.as_ref().iter();
882 let mut iter = self.root.as_ref().iter();
890 std::iter::from_fn(move || {
883 std::iter::from_fn(move || {
891 while let Some(child_node) = iter.next() {
884 while let Some(child_node) = iter.next() {
892 let children = match child_node.children(self.on_disk) {
885 let children = match child_node.children(self.on_disk) {
893 Ok(children) => children,
886 Ok(children) => children,
894 Err(error) => return Some(Err(error)),
887 Err(error) => return Some(Err(error)),
895 };
888 };
896 // Pseudo-recursion
889 // Pseudo-recursion
897 let new_iter = children.iter();
890 let new_iter = children.iter();
898 let old_iter = std::mem::replace(&mut iter, new_iter);
891 let old_iter = std::mem::replace(&mut iter, new_iter);
899 stack.push((child_node, old_iter));
892 stack.push((child_node, old_iter));
900 }
893 }
901 // Found the end of a `children.iter()` iterator.
894 // Found the end of a `children.iter()` iterator.
902 if let Some((child_node, next_iter)) = stack.pop() {
895 if let Some((child_node, next_iter)) = stack.pop() {
903 // "Return" from pseudo-recursion by restoring state from the
896 // "Return" from pseudo-recursion by restoring state from the
904 // explicit stack
897 // explicit stack
905 iter = next_iter;
898 iter = next_iter;
906
899
907 Some(Ok(child_node))
900 Some(Ok(child_node))
908 } else {
901 } else {
909 // Reached the bottom of the stack, we’re done
902 // Reached the bottom of the stack, we’re done
910 None
903 None
911 }
904 }
912 })
905 })
913 }
906 }
914
907
915 fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
908 fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
916 if let Cow::Borrowed(path) = path {
909 if let Cow::Borrowed(path) = path {
917 *unreachable_bytes += path.len() as u32
910 *unreachable_bytes += path.len() as u32
918 }
911 }
919 }
912 }
920 }
913 }
921
914
922 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
915 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
923 ///
916 ///
924 /// The callback is only called for incoming `Ok` values. Errors are passed
917 /// The callback is only called for incoming `Ok` values. Errors are passed
925 /// through as-is. In order to let it use the `?` operator the callback is
918 /// through as-is. In order to let it use the `?` operator the callback is
926 /// expected to return a `Result` of `Option`, instead of an `Option` of
919 /// expected to return a `Result` of `Option`, instead of an `Option` of
927 /// `Result`.
920 /// `Result`.
928 fn filter_map_results<'a, I, F, A, B, E>(
921 fn filter_map_results<'a, I, F, A, B, E>(
929 iter: I,
922 iter: I,
930 f: F,
923 f: F,
931 ) -> impl Iterator<Item = Result<B, E>> + 'a
924 ) -> impl Iterator<Item = Result<B, E>> + 'a
932 where
925 where
933 I: Iterator<Item = Result<A, E>> + 'a,
926 I: Iterator<Item = Result<A, E>> + 'a,
934 F: Fn(A) -> Result<Option<B>, E> + 'a,
927 F: Fn(A) -> Result<Option<B>, E> + 'a,
935 {
928 {
936 iter.filter_map(move |result| match result {
929 iter.filter_map(move |result| match result {
937 Ok(node) => f(node).transpose(),
930 Ok(node) => f(node).transpose(),
938 Err(e) => Some(Err(e)),
931 Err(e) => Some(Err(e)),
939 })
932 })
940 }
933 }
941
934
942 type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
935 type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
943
936
944 impl OwningDirstateMap {
937 impl OwningDirstateMap {
945 pub fn clear(&mut self) {
938 pub fn clear(&mut self) {
946 self.with_dmap_mut(|map| {
939 self.with_dmap_mut(|map| {
947 map.root = Default::default();
940 map.root = Default::default();
948 map.nodes_with_entry_count = 0;
941 map.nodes_with_entry_count = 0;
949 map.nodes_with_copy_source_count = 0;
942 map.nodes_with_copy_source_count = 0;
950 });
943 });
951 }
944 }
952
945
953 pub fn set_tracked(
946 pub fn set_tracked(
954 &mut self,
947 &mut self,
955 filename: &HgPath,
948 filename: &HgPath,
956 ) -> Result<bool, DirstateV2ParseError> {
949 ) -> Result<bool, DirstateV2ParseError> {
957 let old_entry_opt = self.get(filename)?;
950 let old_entry_opt = self.get(filename)?;
958 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
951 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
959 }
952 }
960
953
961 pub fn set_untracked(
954 pub fn set_untracked(
962 &mut self,
955 &mut self,
963 filename: &HgPath,
956 filename: &HgPath,
964 ) -> Result<bool, DirstateError> {
957 ) -> Result<bool, DirstateError> {
965 let old_entry_opt = self.get(filename)?;
958 let old_entry_opt = self.get(filename)?;
966 match old_entry_opt {
959 match old_entry_opt {
967 None => Ok(false),
960 None => Ok(false),
968 Some(old_entry) => {
961 Some(old_entry) => {
969 if !old_entry.tracked() {
962 if !old_entry.tracked() {
970 // `DirstateMap::set_untracked` is not a noop if
963 // `DirstateMap::set_untracked` is not a noop if
971 // already not tracked as it will decrement the
964 // already not tracked as it will decrement the
972 // tracked counters while going down.
965 // tracked counters while going down.
973 return Ok(true);
966 return Ok(true);
974 }
967 }
975 if old_entry.added() {
968 if old_entry.added() {
976 // Untracking an "added" entry will just result in a
969 // Untracking an "added" entry will just result in a
977 // worthless entry (and other parts of the code will
970 // worthless entry (and other parts of the code will
978 // complain about it), just drop it entirely.
971 // complain about it), just drop it entirely.
979 self.drop_entry_and_copy_source(filename)?;
972 self.drop_entry_and_copy_source(filename)?;
980 return Ok(true);
973 return Ok(true);
981 }
974 }
982 if !old_entry.p2_info() {
975 if !old_entry.p2_info() {
983 self.copy_map_remove(filename)?;
976 self.copy_map_remove(filename)?;
984 }
977 }
985
978
986 self.with_dmap_mut(|map| {
979 self.with_dmap_mut(|map| {
987 map.set_untracked(filename, old_entry)?;
980 map.set_untracked(filename, old_entry)?;
988 Ok(true)
981 Ok(true)
989 })
982 })
990 }
983 }
991 }
984 }
992 }
985 }
993
986
994 pub fn set_clean(
987 pub fn set_clean(
995 &mut self,
988 &mut self,
996 filename: &HgPath,
989 filename: &HgPath,
997 mode: u32,
990 mode: u32,
998 size: u32,
991 size: u32,
999 mtime: TruncatedTimestamp,
992 mtime: TruncatedTimestamp,
1000 ) -> Result<(), DirstateError> {
993 ) -> Result<(), DirstateError> {
1001 let old_entry = match self.get(filename)? {
994 let old_entry = match self.get(filename)? {
1002 None => {
995 None => {
1003 return Err(
996 return Err(
1004 DirstateMapError::PathNotFound(filename.into()).into()
997 DirstateMapError::PathNotFound(filename.into()).into()
1005 )
998 )
1006 }
999 }
1007 Some(e) => e,
1000 Some(e) => e,
1008 };
1001 };
1009 self.copy_map_remove(filename)?;
1002 self.copy_map_remove(filename)?;
1010 self.with_dmap_mut(|map| {
1003 self.with_dmap_mut(|map| {
1011 map.set_clean(filename, old_entry, mode, size, mtime)
1004 map.set_clean(filename, old_entry, mode, size, mtime)
1012 })
1005 })
1013 }
1006 }
1014
1007
1015 pub fn set_possibly_dirty(
1008 pub fn set_possibly_dirty(
1016 &mut self,
1009 &mut self,
1017 filename: &HgPath,
1010 filename: &HgPath,
1018 ) -> Result<(), DirstateError> {
1011 ) -> Result<(), DirstateError> {
1019 if self.get(filename)?.is_none() {
1012 if self.get(filename)?.is_none() {
1020 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1013 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1021 }
1014 }
1022 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1015 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1023 }
1016 }
1024
1017
1025 pub fn reset_state(
1018 pub fn reset_state(
1026 &mut self,
1019 &mut self,
1027 filename: &HgPath,
1020 filename: &HgPath,
1028 wc_tracked: bool,
1021 wc_tracked: bool,
1029 p1_tracked: bool,
1022 p1_tracked: bool,
1030 p2_info: bool,
1023 p2_info: bool,
1031 has_meaningful_mtime: bool,
1024 has_meaningful_mtime: bool,
1032 parent_file_data_opt: Option<ParentFileData>,
1025 parent_file_data_opt: Option<ParentFileData>,
1033 ) -> Result<(), DirstateError> {
1026 ) -> Result<(), DirstateError> {
1034 if !(p1_tracked || p2_info || wc_tracked) {
1027 if !(p1_tracked || p2_info || wc_tracked) {
1035 self.drop_entry_and_copy_source(filename)?;
1028 self.drop_entry_and_copy_source(filename)?;
1036 return Ok(());
1029 return Ok(());
1037 }
1030 }
1038 self.copy_map_remove(filename)?;
1031 self.copy_map_remove(filename)?;
1039 let old_entry_opt = self.get(filename)?;
1032 let old_entry_opt = self.get(filename)?;
1040 self.with_dmap_mut(|map| {
1033 self.with_dmap_mut(|map| {
1041 map.reset_state(
1034 map.reset_state(
1042 filename,
1035 filename,
1043 old_entry_opt,
1036 old_entry_opt,
1044 wc_tracked,
1037 wc_tracked,
1045 p1_tracked,
1038 p1_tracked,
1046 p2_info,
1039 p2_info,
1047 has_meaningful_mtime,
1040 has_meaningful_mtime,
1048 parent_file_data_opt,
1041 parent_file_data_opt,
1049 )
1042 )
1050 })
1043 })
1051 }
1044 }
1052
1045
1053 pub fn drop_entry_and_copy_source(
1046 pub fn drop_entry_and_copy_source(
1054 &mut self,
1047 &mut self,
1055 filename: &HgPath,
1048 filename: &HgPath,
1056 ) -> Result<(), DirstateError> {
1049 ) -> Result<(), DirstateError> {
1057 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1050 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1058 struct Dropped {
1051 struct Dropped {
1059 was_tracked: bool,
1052 was_tracked: bool,
1060 had_entry: bool,
1053 had_entry: bool,
1061 had_copy_source: bool,
1054 had_copy_source: bool,
1062 }
1055 }
1063
1056
1064 /// If this returns `Ok(Some((dropped, removed)))`, then
1057 /// If this returns `Ok(Some((dropped, removed)))`, then
1065 ///
1058 ///
1066 /// * `dropped` is about the leaf node that was at `filename`
1059 /// * `dropped` is about the leaf node that was at `filename`
1067 /// * `removed` is whether this particular level of recursion just
1060 /// * `removed` is whether this particular level of recursion just
1068 /// removed a node in `nodes`.
1061 /// removed a node in `nodes`.
1069 fn recur<'on_disk>(
1062 fn recur<'on_disk>(
1070 on_disk: &'on_disk [u8],
1063 on_disk: &'on_disk [u8],
1071 unreachable_bytes: &mut u32,
1064 unreachable_bytes: &mut u32,
1072 nodes: &mut ChildNodes<'on_disk>,
1065 nodes: &mut ChildNodes<'on_disk>,
1073 path: &HgPath,
1066 path: &HgPath,
1074 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1067 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1075 let (first_path_component, rest_of_path) =
1068 let (first_path_component, rest_of_path) =
1076 path.split_first_component();
1069 path.split_first_component();
1077 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1070 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1078 let node = if let Some(node) = nodes.get_mut(first_path_component)
1071 let node = if let Some(node) = nodes.get_mut(first_path_component)
1079 {
1072 {
1080 node
1073 node
1081 } else {
1074 } else {
1082 return Ok(None);
1075 return Ok(None);
1083 };
1076 };
1084 let dropped;
1077 let dropped;
1085 if let Some(rest) = rest_of_path {
1078 if let Some(rest) = rest_of_path {
1086 if let Some((d, removed)) = recur(
1079 if let Some((d, removed)) = recur(
1087 on_disk,
1080 on_disk,
1088 unreachable_bytes,
1081 unreachable_bytes,
1089 &mut node.children,
1082 &mut node.children,
1090 rest,
1083 rest,
1091 )? {
1084 )? {
1092 dropped = d;
1085 dropped = d;
1093 if dropped.had_entry {
1086 if dropped.had_entry {
1094 node.descendants_with_entry_count = node
1087 node.descendants_with_entry_count = node
1095 .descendants_with_entry_count
1088 .descendants_with_entry_count
1096 .checked_sub(1)
1089 .checked_sub(1)
1097 .expect(
1090 .expect(
1098 "descendants_with_entry_count should be >= 0",
1091 "descendants_with_entry_count should be >= 0",
1099 );
1092 );
1100 }
1093 }
1101 if dropped.was_tracked {
1094 if dropped.was_tracked {
1102 node.tracked_descendants_count = node
1095 node.tracked_descendants_count = node
1103 .tracked_descendants_count
1096 .tracked_descendants_count
1104 .checked_sub(1)
1097 .checked_sub(1)
1105 .expect(
1098 .expect(
1106 "tracked_descendants_count should be >= 0",
1099 "tracked_descendants_count should be >= 0",
1107 );
1100 );
1108 }
1101 }
1109
1102
1110 // Directory caches must be invalidated when removing a
1103 // Directory caches must be invalidated when removing a
1111 // child node
1104 // child node
1112 if removed {
1105 if removed {
1113 if let NodeData::CachedDirectory { .. } = &node.data {
1106 if let NodeData::CachedDirectory { .. } = &node.data {
1114 node.data = NodeData::None
1107 node.data = NodeData::None
1115 }
1108 }
1116 }
1109 }
1117 } else {
1110 } else {
1118 return Ok(None);
1111 return Ok(None);
1119 }
1112 }
1120 } else {
1113 } else {
1121 let entry = node.data.as_entry();
1114 let entry = node.data.as_entry();
1122 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1115 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1123 let had_entry = entry.is_some();
1116 let had_entry = entry.is_some();
1124 if had_entry {
1117 if had_entry {
1125 node.data = NodeData::None
1118 node.data = NodeData::None
1126 }
1119 }
1127 let mut had_copy_source = false;
1120 let mut had_copy_source = false;
1128 if let Some(source) = &node.copy_source {
1121 if let Some(source) = &node.copy_source {
1129 DirstateMap::count_dropped_path(
1122 DirstateMap::count_dropped_path(
1130 unreachable_bytes,
1123 unreachable_bytes,
1131 Cow::Borrowed(source),
1124 Cow::Borrowed(source),
1132 );
1125 );
1133 had_copy_source = true;
1126 had_copy_source = true;
1134 node.copy_source = None
1127 node.copy_source = None
1135 }
1128 }
1136 dropped = Dropped {
1129 dropped = Dropped {
1137 was_tracked,
1130 was_tracked,
1138 had_entry,
1131 had_entry,
1139 had_copy_source,
1132 had_copy_source,
1140 };
1133 };
1141 }
1134 }
1142 // After recursion, for both leaf (rest_of_path is None) nodes and
1135 // After recursion, for both leaf (rest_of_path is None) nodes and
1143 // parent nodes, remove a node if it just became empty.
1136 // parent nodes, remove a node if it just became empty.
1144 let remove = !node.data.has_entry()
1137 let remove = !node.data.has_entry()
1145 && node.copy_source.is_none()
1138 && node.copy_source.is_none()
1146 && node.children.is_empty();
1139 && node.children.is_empty();
1147 if remove {
1140 if remove {
1148 let (key, _) =
1141 let (key, _) =
1149 nodes.remove_entry(first_path_component).unwrap();
1142 nodes.remove_entry(first_path_component).unwrap();
1150 DirstateMap::count_dropped_path(
1143 DirstateMap::count_dropped_path(
1151 unreachable_bytes,
1144 unreachable_bytes,
1152 Cow::Borrowed(key.full_path()),
1145 Cow::Borrowed(key.full_path()),
1153 )
1146 )
1154 }
1147 }
1155 Ok(Some((dropped, remove)))
1148 Ok(Some((dropped, remove)))
1156 }
1149 }
1157
1150
1158 self.with_dmap_mut(|map| {
1151 self.with_dmap_mut(|map| {
1159 if let Some((dropped, _removed)) = recur(
1152 if let Some((dropped, _removed)) = recur(
1160 map.on_disk,
1153 map.on_disk,
1161 &mut map.unreachable_bytes,
1154 &mut map.unreachable_bytes,
1162 &mut map.root,
1155 &mut map.root,
1163 filename,
1156 filename,
1164 )? {
1157 )? {
1165 if dropped.had_entry {
1158 if dropped.had_entry {
1166 map.nodes_with_entry_count = map
1159 map.nodes_with_entry_count = map
1167 .nodes_with_entry_count
1160 .nodes_with_entry_count
1168 .checked_sub(1)
1161 .checked_sub(1)
1169 .expect("nodes_with_entry_count should be >= 0");
1162 .expect("nodes_with_entry_count should be >= 0");
1170 }
1163 }
1171 if dropped.had_copy_source {
1164 if dropped.had_copy_source {
1172 map.nodes_with_copy_source_count = map
1165 map.nodes_with_copy_source_count = map
1173 .nodes_with_copy_source_count
1166 .nodes_with_copy_source_count
1174 .checked_sub(1)
1167 .checked_sub(1)
1175 .expect("nodes_with_copy_source_count should be >= 0");
1168 .expect("nodes_with_copy_source_count should be >= 0");
1176 }
1169 }
1177 } else {
1170 } else {
1178 debug_assert!(!was_tracked);
1171 debug_assert!(!was_tracked);
1179 }
1172 }
1180 Ok(())
1173 Ok(())
1181 })
1174 })
1182 }
1175 }
1183
1176
1184 pub fn has_tracked_dir(
1177 pub fn has_tracked_dir(
1185 &mut self,
1178 &mut self,
1186 directory: &HgPath,
1179 directory: &HgPath,
1187 ) -> Result<bool, DirstateError> {
1180 ) -> Result<bool, DirstateError> {
1188 self.with_dmap_mut(|map| {
1181 self.with_dmap_mut(|map| {
1189 if let Some(node) = map.get_node(directory)? {
1182 if let Some(node) = map.get_node(directory)? {
1190 // A node without a `DirstateEntry` was created to hold child
1183 // A node without a `DirstateEntry` was created to hold child
1191 // nodes, and is therefore a directory.
1184 // nodes, and is therefore a directory.
1192 let is_dir = node.entry()?.is_none();
1185 let is_dir = node.entry()?.is_none();
1193 Ok(is_dir && node.tracked_descendants_count() > 0)
1186 Ok(is_dir && node.tracked_descendants_count() > 0)
1194 } else {
1187 } else {
1195 Ok(false)
1188 Ok(false)
1196 }
1189 }
1197 })
1190 })
1198 }
1191 }
1199
1192
1200 pub fn has_dir(
1193 pub fn has_dir(
1201 &mut self,
1194 &mut self,
1202 directory: &HgPath,
1195 directory: &HgPath,
1203 ) -> Result<bool, DirstateError> {
1196 ) -> Result<bool, DirstateError> {
1204 self.with_dmap_mut(|map| {
1197 self.with_dmap_mut(|map| {
1205 if let Some(node) = map.get_node(directory)? {
1198 if let Some(node) = map.get_node(directory)? {
1206 // A node without a `DirstateEntry` was created to hold child
1199 // A node without a `DirstateEntry` was created to hold child
1207 // nodes, and is therefore a directory.
1200 // nodes, and is therefore a directory.
1208 let is_dir = node.entry()?.is_none();
1201 let is_dir = node.entry()?.is_none();
1209 Ok(is_dir && node.descendants_with_entry_count() > 0)
1202 Ok(is_dir && node.descendants_with_entry_count() > 0)
1210 } else {
1203 } else {
1211 Ok(false)
1204 Ok(false)
1212 }
1205 }
1213 })
1206 })
1214 }
1207 }
1215
1208
1216 #[logging_timer::time("trace")]
1209 #[logging_timer::time("trace")]
1217 pub fn pack_v1(
1210 pub fn pack_v1(
1218 &self,
1211 &self,
1219 parents: DirstateParents,
1212 parents: DirstateParents,
1220 ) -> Result<Vec<u8>, DirstateError> {
1213 ) -> Result<Vec<u8>, DirstateError> {
1221 let map = self.get_map();
1214 let map = self.get_map();
1222 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1215 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1223 // reallocations
1216 // reallocations
1224 let mut size = parents.as_bytes().len();
1217 let mut size = parents.as_bytes().len();
1225 for node in map.iter_nodes() {
1218 for node in map.iter_nodes() {
1226 let node = node?;
1219 let node = node?;
1227 if node.entry()?.is_some() {
1220 if node.entry()?.is_some() {
1228 size += packed_entry_size(
1221 size += packed_entry_size(
1229 node.full_path(map.on_disk)?,
1222 node.full_path(map.on_disk)?,
1230 node.copy_source(map.on_disk)?,
1223 node.copy_source(map.on_disk)?,
1231 );
1224 );
1232 }
1225 }
1233 }
1226 }
1234
1227
1235 let mut packed = Vec::with_capacity(size);
1228 let mut packed = Vec::with_capacity(size);
1236 packed.extend(parents.as_bytes());
1229 packed.extend(parents.as_bytes());
1237
1230
1238 for node in map.iter_nodes() {
1231 for node in map.iter_nodes() {
1239 let node = node?;
1232 let node = node?;
1240 if let Some(entry) = node.entry()? {
1233 if let Some(entry) = node.entry()? {
1241 pack_entry(
1234 pack_entry(
1242 node.full_path(map.on_disk)?,
1235 node.full_path(map.on_disk)?,
1243 &entry,
1236 &entry,
1244 node.copy_source(map.on_disk)?,
1237 node.copy_source(map.on_disk)?,
1245 &mut packed,
1238 &mut packed,
1246 );
1239 );
1247 }
1240 }
1248 }
1241 }
1249 Ok(packed)
1242 Ok(packed)
1250 }
1243 }
1251
1244
1252 /// Returns new data and metadata together with whether that data should be
1245 /// Returns new data and metadata together with whether that data should be
1253 /// appended to the existing data file whose content is at
1246 /// appended to the existing data file whose content is at
1254 /// `map.on_disk` (true), instead of written to a new data file
1247 /// `map.on_disk` (true), instead of written to a new data file
1255 /// (false), and the previous size of data on disk.
1248 /// (false), and the previous size of data on disk.
1256 #[logging_timer::time("trace")]
1249 #[logging_timer::time("trace")]
1257 pub fn pack_v2(
1250 pub fn pack_v2(
1258 &self,
1251 &self,
1259 can_append: bool,
1252 can_append: bool,
1260 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1253 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1261 {
1254 {
1262 let map = self.get_map();
1255 let map = self.get_map();
1263 on_disk::write(map, can_append)
1256 on_disk::write(map, can_append)
1264 }
1257 }
1265
1258
1266 /// `callback` allows the caller to process and do something with the
1259 /// `callback` allows the caller to process and do something with the
1267 /// results of the status. This is needed to do so efficiently (i.e.
1260 /// results of the status. This is needed to do so efficiently (i.e.
1268 /// without cloning the `DirstateStatus` object with its paths) because
1261 /// without cloning the `DirstateStatus` object with its paths) because
1269 /// we need to borrow from `Self`.
1262 /// we need to borrow from `Self`.
1270 pub fn with_status<R>(
1263 pub fn with_status<R>(
1271 &mut self,
1264 &mut self,
1272 matcher: &(dyn Matcher + Sync),
1265 matcher: &(dyn Matcher + Sync),
1273 root_dir: PathBuf,
1266 root_dir: PathBuf,
1274 ignore_files: Vec<PathBuf>,
1267 ignore_files: Vec<PathBuf>,
1275 options: StatusOptions,
1268 options: StatusOptions,
1276 callback: impl for<'r> FnOnce(
1269 callback: impl for<'r> FnOnce(
1277 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1270 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1278 ) -> R,
1271 ) -> R,
1279 ) -> R {
1272 ) -> R {
1280 self.with_dmap_mut(|map| {
1273 self.with_dmap_mut(|map| {
1281 callback(super::status::status(
1274 callback(super::status::status(
1282 map,
1275 map,
1283 matcher,
1276 matcher,
1284 root_dir,
1277 root_dir,
1285 ignore_files,
1278 ignore_files,
1286 options,
1279 options,
1287 ))
1280 ))
1288 })
1281 })
1289 }
1282 }
1290
1283
1291 pub fn copy_map_len(&self) -> usize {
1284 pub fn copy_map_len(&self) -> usize {
1292 let map = self.get_map();
1285 let map = self.get_map();
1293 map.nodes_with_copy_source_count as usize
1286 map.nodes_with_copy_source_count as usize
1294 }
1287 }
1295
1288
1296 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1289 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1297 let map = self.get_map();
1290 let map = self.get_map();
1298 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1291 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1299 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1292 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1300 Some((node.full_path(map.on_disk)?, source))
1293 Some((node.full_path(map.on_disk)?, source))
1301 } else {
1294 } else {
1302 None
1295 None
1303 })
1296 })
1304 }))
1297 }))
1305 }
1298 }
1306
1299
1307 pub fn copy_map_contains_key(
1300 pub fn copy_map_contains_key(
1308 &self,
1301 &self,
1309 key: &HgPath,
1302 key: &HgPath,
1310 ) -> Result<bool, DirstateV2ParseError> {
1303 ) -> Result<bool, DirstateV2ParseError> {
1311 let map = self.get_map();
1304 let map = self.get_map();
1312 Ok(if let Some(node) = map.get_node(key)? {
1305 Ok(if let Some(node) = map.get_node(key)? {
1313 node.has_copy_source()
1306 node.has_copy_source()
1314 } else {
1307 } else {
1315 false
1308 false
1316 })
1309 })
1317 }
1310 }
1318
1311
1319 pub fn copy_map_get(
1312 pub fn copy_map_get(
1320 &self,
1313 &self,
1321 key: &HgPath,
1314 key: &HgPath,
1322 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1315 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1323 let map = self.get_map();
1316 let map = self.get_map();
1324 if let Some(node) = map.get_node(key)? {
1317 if let Some(node) = map.get_node(key)? {
1325 if let Some(source) = node.copy_source(map.on_disk)? {
1318 if let Some(source) = node.copy_source(map.on_disk)? {
1326 return Ok(Some(source));
1319 return Ok(Some(source));
1327 }
1320 }
1328 }
1321 }
1329 Ok(None)
1322 Ok(None)
1330 }
1323 }
1331
1324
1332 pub fn copy_map_remove(
1325 pub fn copy_map_remove(
1333 &mut self,
1326 &mut self,
1334 key: &HgPath,
1327 key: &HgPath,
1335 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1328 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1336 self.with_dmap_mut(|map| {
1329 self.with_dmap_mut(|map| {
1337 let count = &mut map.nodes_with_copy_source_count;
1330 let count = &mut map.nodes_with_copy_source_count;
1338 let unreachable_bytes = &mut map.unreachable_bytes;
1331 let unreachable_bytes = &mut map.unreachable_bytes;
1339 Ok(DirstateMap::get_node_mut_inner(
1332 Ok(DirstateMap::get_node_mut_inner(
1340 map.on_disk,
1333 map.on_disk,
1341 unreachable_bytes,
1334 unreachable_bytes,
1342 &mut map.root,
1335 &mut map.root,
1343 key,
1336 key,
1344 |_ancestor| {},
1337 |_ancestor| {},
1345 )?
1338 )?
1346 .and_then(|node| {
1339 .and_then(|node| {
1347 if let Some(source) = &node.copy_source {
1340 if let Some(source) = &node.copy_source {
1348 *count = count
1341 *count = count
1349 .checked_sub(1)
1342 .checked_sub(1)
1350 .expect("nodes_with_copy_source_count should be >= 0");
1343 .expect("nodes_with_copy_source_count should be >= 0");
1351 DirstateMap::count_dropped_path(
1344 DirstateMap::count_dropped_path(
1352 unreachable_bytes,
1345 unreachable_bytes,
1353 Cow::Borrowed(source),
1346 Cow::Borrowed(source),
1354 );
1347 );
1355 }
1348 }
1356 node.copy_source.take().map(Cow::into_owned)
1349 node.copy_source.take().map(Cow::into_owned)
1357 }))
1350 }))
1358 })
1351 })
1359 }
1352 }
1360
1353
1361 pub fn copy_map_insert(
1354 pub fn copy_map_insert(
1362 &mut self,
1355 &mut self,
1363 key: &HgPath,
1356 key: &HgPath,
1364 value: &HgPath,
1357 value: &HgPath,
1365 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1358 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1366 self.with_dmap_mut(|map| {
1359 self.with_dmap_mut(|map| {
1367 let node = map.get_or_insert_node(&key, |_ancestor| {})?;
1360 let node = map.get_or_insert_node(key, |_ancestor| {})?;
1368 let had_copy_source = node.copy_source.is_none();
1361 let had_copy_source = node.copy_source.is_none();
1369 let old = node
1362 let old = node
1370 .copy_source
1363 .copy_source
1371 .replace(value.to_owned().into())
1364 .replace(value.to_owned().into())
1372 .map(Cow::into_owned);
1365 .map(Cow::into_owned);
1373 if had_copy_source {
1366 if had_copy_source {
1374 map.nodes_with_copy_source_count += 1
1367 map.nodes_with_copy_source_count += 1
1375 }
1368 }
1376 Ok(old)
1369 Ok(old)
1377 })
1370 })
1378 }
1371 }
1379
1372
1380 pub fn len(&self) -> usize {
1373 pub fn len(&self) -> usize {
1381 let map = self.get_map();
1374 let map = self.get_map();
1382 map.nodes_with_entry_count as usize
1375 map.nodes_with_entry_count as usize
1383 }
1376 }
1384
1377
1385 pub fn is_empty(&self) -> bool {
1378 pub fn is_empty(&self) -> bool {
1386 self.len() == 0
1379 self.len() == 0
1387 }
1380 }
1388
1381
1389 pub fn contains_key(
1382 pub fn contains_key(
1390 &self,
1383 &self,
1391 key: &HgPath,
1384 key: &HgPath,
1392 ) -> Result<bool, DirstateV2ParseError> {
1385 ) -> Result<bool, DirstateV2ParseError> {
1393 Ok(self.get(key)?.is_some())
1386 Ok(self.get(key)?.is_some())
1394 }
1387 }
1395
1388
1396 pub fn get(
1389 pub fn get(
1397 &self,
1390 &self,
1398 key: &HgPath,
1391 key: &HgPath,
1399 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1392 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1400 let map = self.get_map();
1393 let map = self.get_map();
1401 Ok(if let Some(node) = map.get_node(key)? {
1394 Ok(if let Some(node) = map.get_node(key)? {
1402 node.entry()?
1395 node.entry()?
1403 } else {
1396 } else {
1404 None
1397 None
1405 })
1398 })
1406 }
1399 }
1407
1400
1408 pub fn iter(&self) -> StateMapIter<'_> {
1401 pub fn iter(&self) -> StateMapIter<'_> {
1409 let map = self.get_map();
1402 let map = self.get_map();
1410 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1403 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1411 Ok(if let Some(entry) = node.entry()? {
1404 Ok(if let Some(entry) = node.entry()? {
1412 Some((node.full_path(map.on_disk)?, entry))
1405 Some((node.full_path(map.on_disk)?, entry))
1413 } else {
1406 } else {
1414 None
1407 None
1415 })
1408 })
1416 }))
1409 }))
1417 }
1410 }
1418
1411
1419 pub fn iter_tracked_dirs(
1412 pub fn iter_tracked_dirs(
1420 &mut self,
1413 &mut self,
1421 ) -> Result<
1414 ) -> Result<
1422 Box<
1415 Box<
1423 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1416 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1424 + Send
1417 + Send
1425 + '_,
1418 + '_,
1426 >,
1419 >,
1427 DirstateError,
1420 DirstateError,
1428 > {
1421 > {
1429 let map = self.get_map();
1422 let map = self.get_map();
1430 let on_disk = map.on_disk;
1423 let on_disk = map.on_disk;
1431 Ok(Box::new(filter_map_results(
1424 Ok(Box::new(filter_map_results(
1432 map.iter_nodes(),
1425 map.iter_nodes(),
1433 move |node| {
1426 move |node| {
1434 Ok(if node.tracked_descendants_count() > 0 {
1427 Ok(if node.tracked_descendants_count() > 0 {
1435 Some(node.full_path(on_disk)?)
1428 Some(node.full_path(on_disk)?)
1436 } else {
1429 } else {
1437 None
1430 None
1438 })
1431 })
1439 },
1432 },
1440 )))
1433 )))
1441 }
1434 }
1442
1435
1443 /// Only public because it needs to be exposed to the Python layer.
1436 /// Only public because it needs to be exposed to the Python layer.
1444 /// It is not the full `setparents` logic, only the parts that mutate the
1437 /// It is not the full `setparents` logic, only the parts that mutate the
1445 /// entries.
1438 /// entries.
1446 pub fn setparents_fixup(
1439 pub fn setparents_fixup(
1447 &mut self,
1440 &mut self,
1448 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1441 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1449 // XXX
1442 // XXX
1450 // All the copying and re-querying is quite inefficient, but this is
1443 // All the copying and re-querying is quite inefficient, but this is
1451 // still a lot better than doing it from Python.
1444 // still a lot better than doing it from Python.
1452 //
1445 //
1453 // The better solution is to develop a mechanism for `iter_mut`,
1446 // The better solution is to develop a mechanism for `iter_mut`,
1454 // which will be a lot more involved: we're dealing with a lazy,
1447 // which will be a lot more involved: we're dealing with a lazy,
1455 // append-mostly, tree-like data structure. This will do for now.
1448 // append-mostly, tree-like data structure. This will do for now.
1456 let mut copies = vec![];
1449 let mut copies = vec![];
1457 let mut files_with_p2_info = vec![];
1450 let mut files_with_p2_info = vec![];
1458 for res in self.iter() {
1451 for res in self.iter() {
1459 let (path, entry) = res?;
1452 let (path, entry) = res?;
1460 if entry.p2_info() {
1453 if entry.p2_info() {
1461 files_with_p2_info.push(path.to_owned())
1454 files_with_p2_info.push(path.to_owned())
1462 }
1455 }
1463 }
1456 }
1464 self.with_dmap_mut(|map| {
1457 self.with_dmap_mut(|map| {
1465 for path in files_with_p2_info.iter() {
1458 for path in files_with_p2_info.iter() {
1466 let node = map.get_or_insert_node(path, |_| {})?;
1459 let node = map.get_or_insert_node(path, |_| {})?;
1467 let entry =
1460 let entry =
1468 node.data.as_entry_mut().expect("entry should exist");
1461 node.data.as_entry_mut().expect("entry should exist");
1469 entry.drop_merge_data();
1462 entry.drop_merge_data();
1470 if let Some(source) = node.copy_source.take().as_deref() {
1463 if let Some(source) = node.copy_source.take().as_deref() {
1471 copies.push((path.to_owned(), source.to_owned()));
1464 copies.push((path.to_owned(), source.to_owned()));
1472 }
1465 }
1473 }
1466 }
1474 Ok(copies)
1467 Ok(copies)
1475 })
1468 })
1476 }
1469 }
1477
1470
1478 pub fn debug_iter(
1471 pub fn debug_iter(
1479 &self,
1472 &self,
1480 all: bool,
1473 all: bool,
1481 ) -> Box<
1474 ) -> Box<
1482 dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
1475 dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
1483 + Send
1476 + Send
1484 + '_,
1477 + '_,
1485 > {
1478 > {
1486 let map = self.get_map();
1479 let map = self.get_map();
1487 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1480 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1488 let debug_tuple = if let Some(entry) = node.entry()? {
1481 let debug_tuple = if let Some(entry) = node.entry()? {
1489 entry.debug_tuple()
1482 entry.debug_tuple()
1490 } else if !all {
1483 } else if !all {
1491 return Ok(None);
1484 return Ok(None);
1492 } else if let Some(mtime) = node.cached_directory_mtime()? {
1485 } else if let Some(mtime) = node.cached_directory_mtime()? {
1493 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1486 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1494 } else {
1487 } else {
1495 (b' ', 0, -1, -1)
1488 (b' ', 0, -1, -1)
1496 };
1489 };
1497 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1490 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1498 }))
1491 }))
1499 }
1492 }
1500 }
1493 }
1501 #[cfg(test)]
1494 #[cfg(test)]
1502 mod tests {
1495 mod tests {
1503 use super::*;
1496 use super::*;
1504
1497
1505 /// Shortcut to return tracked descendants of a path.
1498 /// Shortcut to return tracked descendants of a path.
1506 /// Panics if the path does not exist.
1499 /// Panics if the path does not exist.
1507 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1500 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1508 let path = dbg!(HgPath::new(path));
1501 let path = dbg!(HgPath::new(path));
1509 let node = map.get_map().get_node(path);
1502 let node = map.get_map().get_node(path);
1510 node.unwrap().unwrap().tracked_descendants_count()
1503 node.unwrap().unwrap().tracked_descendants_count()
1511 }
1504 }
1512
1505
1513 /// Shortcut to return descendants with an entry.
1506 /// Shortcut to return descendants with an entry.
1514 /// Panics if the path does not exist.
1507 /// Panics if the path does not exist.
1515 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1508 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1516 let path = dbg!(HgPath::new(path));
1509 let path = dbg!(HgPath::new(path));
1517 let node = map.get_map().get_node(path);
1510 let node = map.get_map().get_node(path);
1518 node.unwrap().unwrap().descendants_with_entry_count()
1511 node.unwrap().unwrap().descendants_with_entry_count()
1519 }
1512 }
1520
1513
1521 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1514 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1522 let path = dbg!(HgPath::new(path));
1515 let path = dbg!(HgPath::new(path));
1523 let node = map.get_map().get_node(path);
1516 let node = map.get_map().get_node(path);
1524 assert!(node.unwrap().is_none());
1517 assert!(node.unwrap().is_none());
1525 }
1518 }
1526
1519
1527 /// Shortcut for path creation in tests
1520 /// Shortcut for path creation in tests
1528 fn p(b: &[u8]) -> &HgPath {
1521 fn p(b: &[u8]) -> &HgPath {
1529 HgPath::new(b)
1522 HgPath::new(b)
1530 }
1523 }
1531
1524
1532 /// Test the very simple case a single tracked file
1525 /// Test the very simple case a single tracked file
1533 #[test]
1526 #[test]
1534 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1527 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1535 let mut map = OwningDirstateMap::new_empty(vec![]);
1528 let mut map = OwningDirstateMap::new_empty(vec![]);
1536 assert_eq!(map.len(), 0);
1529 assert_eq!(map.len(), 0);
1537
1530
1538 map.set_tracked(p(b"some/nested/path"))?;
1531 map.set_tracked(p(b"some/nested/path"))?;
1539
1532
1540 assert_eq!(map.len(), 1);
1533 assert_eq!(map.len(), 1);
1541 assert_eq!(tracked_descendants(&map, b"some"), 1);
1534 assert_eq!(tracked_descendants(&map, b"some"), 1);
1542 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1535 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1543 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1536 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1544
1537
1545 map.set_untracked(p(b"some/nested/path"))?;
1538 map.set_untracked(p(b"some/nested/path"))?;
1546 assert_eq!(map.len(), 0);
1539 assert_eq!(map.len(), 0);
1547 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1540 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1548
1541
1549 Ok(())
1542 Ok(())
1550 }
1543 }
1551
1544
1552 /// Test the simple case of all tracked, but multiple files
1545 /// Test the simple case of all tracked, but multiple files
1553 #[test]
1546 #[test]
1554 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1547 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1555 let mut map = OwningDirstateMap::new_empty(vec![]);
1548 let mut map = OwningDirstateMap::new_empty(vec![]);
1556
1549
1557 map.set_tracked(p(b"some/nested/path"))?;
1550 map.set_tracked(p(b"some/nested/path"))?;
1558 map.set_tracked(p(b"some/nested/file"))?;
1551 map.set_tracked(p(b"some/nested/file"))?;
1559 // one layer without any files to test deletion cascade
1552 // one layer without any files to test deletion cascade
1560 map.set_tracked(p(b"some/other/nested/path"))?;
1553 map.set_tracked(p(b"some/other/nested/path"))?;
1561 map.set_tracked(p(b"root_file"))?;
1554 map.set_tracked(p(b"root_file"))?;
1562 map.set_tracked(p(b"some/file"))?;
1555 map.set_tracked(p(b"some/file"))?;
1563 map.set_tracked(p(b"some/file2"))?;
1556 map.set_tracked(p(b"some/file2"))?;
1564 map.set_tracked(p(b"some/file3"))?;
1557 map.set_tracked(p(b"some/file3"))?;
1565
1558
1566 assert_eq!(map.len(), 7);
1559 assert_eq!(map.len(), 7);
1567 assert_eq!(tracked_descendants(&map, b"some"), 6);
1560 assert_eq!(tracked_descendants(&map, b"some"), 6);
1568 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1561 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1569 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1562 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1570 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1563 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1571 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1564 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1572
1565
1573 map.set_untracked(p(b"some/nested/path"))?;
1566 map.set_untracked(p(b"some/nested/path"))?;
1574 assert_eq!(map.len(), 6);
1567 assert_eq!(map.len(), 6);
1575 assert_eq!(tracked_descendants(&map, b"some"), 5);
1568 assert_eq!(tracked_descendants(&map, b"some"), 5);
1576 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1569 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1577 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1570 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1578 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1571 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1579
1572
1580 map.set_untracked(p(b"some/nested/file"))?;
1573 map.set_untracked(p(b"some/nested/file"))?;
1581 assert_eq!(map.len(), 5);
1574 assert_eq!(map.len(), 5);
1582 assert_eq!(tracked_descendants(&map, b"some"), 4);
1575 assert_eq!(tracked_descendants(&map, b"some"), 4);
1583 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1576 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1584 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1577 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1585 assert_does_not_exist(&map, b"some_nested");
1578 assert_does_not_exist(&map, b"some_nested");
1586
1579
1587 map.set_untracked(p(b"some/other/nested/path"))?;
1580 map.set_untracked(p(b"some/other/nested/path"))?;
1588 assert_eq!(map.len(), 4);
1581 assert_eq!(map.len(), 4);
1589 assert_eq!(tracked_descendants(&map, b"some"), 3);
1582 assert_eq!(tracked_descendants(&map, b"some"), 3);
1590 assert_does_not_exist(&map, b"some/other");
1583 assert_does_not_exist(&map, b"some/other");
1591
1584
1592 map.set_untracked(p(b"root_file"))?;
1585 map.set_untracked(p(b"root_file"))?;
1593 assert_eq!(map.len(), 3);
1586 assert_eq!(map.len(), 3);
1594 assert_eq!(tracked_descendants(&map, b"some"), 3);
1587 assert_eq!(tracked_descendants(&map, b"some"), 3);
1595 assert_does_not_exist(&map, b"root_file");
1588 assert_does_not_exist(&map, b"root_file");
1596
1589
1597 map.set_untracked(p(b"some/file"))?;
1590 map.set_untracked(p(b"some/file"))?;
1598 assert_eq!(map.len(), 2);
1591 assert_eq!(map.len(), 2);
1599 assert_eq!(tracked_descendants(&map, b"some"), 2);
1592 assert_eq!(tracked_descendants(&map, b"some"), 2);
1600 assert_does_not_exist(&map, b"some/file");
1593 assert_does_not_exist(&map, b"some/file");
1601
1594
1602 map.set_untracked(p(b"some/file2"))?;
1595 map.set_untracked(p(b"some/file2"))?;
1603 assert_eq!(map.len(), 1);
1596 assert_eq!(map.len(), 1);
1604 assert_eq!(tracked_descendants(&map, b"some"), 1);
1597 assert_eq!(tracked_descendants(&map, b"some"), 1);
1605 assert_does_not_exist(&map, b"some/file2");
1598 assert_does_not_exist(&map, b"some/file2");
1606
1599
1607 map.set_untracked(p(b"some/file3"))?;
1600 map.set_untracked(p(b"some/file3"))?;
1608 assert_eq!(map.len(), 0);
1601 assert_eq!(map.len(), 0);
1609 assert_does_not_exist(&map, b"some/file3");
1602 assert_does_not_exist(&map, b"some/file3");
1610
1603
1611 Ok(())
1604 Ok(())
1612 }
1605 }
1613
1606
1614 /// Check with a mix of tracked and non-tracked items
1607 /// Check with a mix of tracked and non-tracked items
1615 #[test]
1608 #[test]
1616 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1609 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1617 let mut map = OwningDirstateMap::new_empty(vec![]);
1610 let mut map = OwningDirstateMap::new_empty(vec![]);
1618
1611
1619 // A file that was just added
1612 // A file that was just added
1620 map.set_tracked(p(b"some/nested/path"))?;
1613 map.set_tracked(p(b"some/nested/path"))?;
1621 // This has no information, the dirstate should ignore it
1614 // This has no information, the dirstate should ignore it
1622 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1615 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1623 assert_does_not_exist(&map, b"some/file");
1616 assert_does_not_exist(&map, b"some/file");
1624
1617
1625 // A file that was removed
1618 // A file that was removed
1626 map.reset_state(
1619 map.reset_state(
1627 p(b"some/nested/file"),
1620 p(b"some/nested/file"),
1628 false,
1621 false,
1629 true,
1622 true,
1630 false,
1623 false,
1631 false,
1624 false,
1632 None,
1625 None,
1633 )?;
1626 )?;
1634 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1627 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1635 // Only present in p2
1628 // Only present in p2
1636 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1629 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1637 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1630 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1638 // A file that was merged
1631 // A file that was merged
1639 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1632 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1640 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1633 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1641 // A file that is added, with info from p2
1634 // A file that is added, with info from p2
1642 // XXX is that actually possible?
1635 // XXX is that actually possible?
1643 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1636 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1644 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1637 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1645 // A clean file
1638 // A clean file
1646 // One layer without any files to test deletion cascade
1639 // One layer without any files to test deletion cascade
1647 map.reset_state(
1640 map.reset_state(
1648 p(b"some/other/nested/path"),
1641 p(b"some/other/nested/path"),
1649 true,
1642 true,
1650 true,
1643 true,
1651 false,
1644 false,
1652 false,
1645 false,
1653 None,
1646 None,
1654 )?;
1647 )?;
1655 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1648 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1656
1649
1657 assert_eq!(map.len(), 6);
1650 assert_eq!(map.len(), 6);
1658 assert_eq!(tracked_descendants(&map, b"some"), 3);
1651 assert_eq!(tracked_descendants(&map, b"some"), 3);
1659 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1652 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1660 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1653 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1661 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1654 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1662 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1655 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1663 assert_eq!(
1656 assert_eq!(
1664 descendants_with_an_entry(&map, b"some/other/nested/path"),
1657 descendants_with_an_entry(&map, b"some/other/nested/path"),
1665 0
1658 0
1666 );
1659 );
1667 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1660 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1668 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1661 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1669
1662
1670 // might as well check this
1663 // might as well check this
1671 map.set_untracked(p(b"path/does/not/exist"))?;
1664 map.set_untracked(p(b"path/does/not/exist"))?;
1672 assert_eq!(map.len(), 6);
1665 assert_eq!(map.len(), 6);
1673
1666
1674 map.set_untracked(p(b"some/other/nested/path"))?;
1667 map.set_untracked(p(b"some/other/nested/path"))?;
1675 // It is set untracked but not deleted since it held other information
1668 // It is set untracked but not deleted since it held other information
1676 assert_eq!(map.len(), 6);
1669 assert_eq!(map.len(), 6);
1677 assert_eq!(tracked_descendants(&map, b"some"), 2);
1670 assert_eq!(tracked_descendants(&map, b"some"), 2);
1678 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1671 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1679 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1672 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1680 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1673 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1681 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1674 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1682 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1675 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1683
1676
1684 map.set_untracked(p(b"some/nested/path"))?;
1677 map.set_untracked(p(b"some/nested/path"))?;
1685 // It is set untracked *and* deleted since it was only added
1678 // It is set untracked *and* deleted since it was only added
1686 assert_eq!(map.len(), 5);
1679 assert_eq!(map.len(), 5);
1687 assert_eq!(tracked_descendants(&map, b"some"), 1);
1680 assert_eq!(tracked_descendants(&map, b"some"), 1);
1688 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1681 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1689 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1682 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1690 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1683 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1691 assert_does_not_exist(&map, b"some/nested/path");
1684 assert_does_not_exist(&map, b"some/nested/path");
1692
1685
1693 map.set_untracked(p(b"root_file"))?;
1686 map.set_untracked(p(b"root_file"))?;
1694 // Untracked but not deleted
1687 // Untracked but not deleted
1695 assert_eq!(map.len(), 5);
1688 assert_eq!(map.len(), 5);
1696 assert!(map.get(p(b"root_file"))?.is_some());
1689 assert!(map.get(p(b"root_file"))?.is_some());
1697
1690
1698 map.set_untracked(p(b"some/file2"))?;
1691 map.set_untracked(p(b"some/file2"))?;
1699 assert_eq!(map.len(), 5);
1692 assert_eq!(map.len(), 5);
1700 assert_eq!(tracked_descendants(&map, b"some"), 0);
1693 assert_eq!(tracked_descendants(&map, b"some"), 0);
1701 assert!(map.get(p(b"some/file2"))?.is_some());
1694 assert!(map.get(p(b"some/file2"))?.is_some());
1702
1695
1703 map.set_untracked(p(b"some/file3"))?;
1696 map.set_untracked(p(b"some/file3"))?;
1704 assert_eq!(map.len(), 5);
1697 assert_eq!(map.len(), 5);
1705 assert_eq!(tracked_descendants(&map, b"some"), 0);
1698 assert_eq!(tracked_descendants(&map, b"some"), 0);
1706 assert!(map.get(p(b"some/file3"))?.is_some());
1699 assert!(map.get(p(b"some/file3"))?.is_some());
1707
1700
1708 Ok(())
1701 Ok(())
1709 }
1702 }
1710
1703
1711 /// Check that copies counter is correctly updated
1704 /// Check that copies counter is correctly updated
1712 #[test]
1705 #[test]
1713 fn test_copy_source() -> Result<(), DirstateError> {
1706 fn test_copy_source() -> Result<(), DirstateError> {
1714 let mut map = OwningDirstateMap::new_empty(vec![]);
1707 let mut map = OwningDirstateMap::new_empty(vec![]);
1715
1708
1716 // Clean file
1709 // Clean file
1717 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1710 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1718 // Merged file
1711 // Merged file
1719 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1712 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1720 // Removed file
1713 // Removed file
1721 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1714 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1722 // Added file
1715 // Added file
1723 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1716 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1724 // Add copy
1717 // Add copy
1725 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1718 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1726 assert_eq!(map.copy_map_len(), 1);
1719 assert_eq!(map.copy_map_len(), 1);
1727
1720
1728 // Copy override
1721 // Copy override
1729 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1722 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1730 assert_eq!(map.copy_map_len(), 1);
1723 assert_eq!(map.copy_map_len(), 1);
1731
1724
1732 // Multiple copies
1725 // Multiple copies
1733 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1726 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1734 assert_eq!(map.copy_map_len(), 2);
1727 assert_eq!(map.copy_map_len(), 2);
1735
1728
1736 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1729 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1737 assert_eq!(map.copy_map_len(), 3);
1730 assert_eq!(map.copy_map_len(), 3);
1738
1731
1739 // Added, so the entry is completely removed
1732 // Added, so the entry is completely removed
1740 map.set_untracked(p(b"files/added"))?;
1733 map.set_untracked(p(b"files/added"))?;
1741 assert_does_not_exist(&map, b"files/added");
1734 assert_does_not_exist(&map, b"files/added");
1742 assert_eq!(map.copy_map_len(), 2);
1735 assert_eq!(map.copy_map_len(), 2);
1743
1736
1744 // Removed, so the entry is kept around, so is its copy
1737 // Removed, so the entry is kept around, so is its copy
1745 map.set_untracked(p(b"removed"))?;
1738 map.set_untracked(p(b"removed"))?;
1746 assert!(map.get(p(b"removed"))?.is_some());
1739 assert!(map.get(p(b"removed"))?.is_some());
1747 assert_eq!(map.copy_map_len(), 2);
1740 assert_eq!(map.copy_map_len(), 2);
1748
1741
1749 // Clean, so the entry is kept around, but not its copy
1742 // Clean, so the entry is kept around, but not its copy
1750 map.set_untracked(p(b"files/clean"))?;
1743 map.set_untracked(p(b"files/clean"))?;
1751 assert!(map.get(p(b"files/clean"))?.is_some());
1744 assert!(map.get(p(b"files/clean"))?.is_some());
1752 assert_eq!(map.copy_map_len(), 1);
1745 assert_eq!(map.copy_map_len(), 1);
1753
1746
1754 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1747 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1755 assert_eq!(map.copy_map_len(), 2);
1748 assert_eq!(map.copy_map_len(), 2);
1756
1749
1757 // Info from p2, so its copy source info is kept around
1750 // Info from p2, so its copy source info is kept around
1758 map.set_untracked(p(b"files/from_p2"))?;
1751 map.set_untracked(p(b"files/from_p2"))?;
1759 assert!(map.get(p(b"files/from_p2"))?.is_some());
1752 assert!(map.get(p(b"files/from_p2"))?.is_some());
1760 assert_eq!(map.copy_map_len(), 2);
1753 assert_eq!(map.copy_map_len(), 2);
1761
1754
1762 Ok(())
1755 Ok(())
1763 }
1756 }
1764
1757
1765 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1758 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1766 /// does not actually come from the disk, but it's opaque to the code being
1759 /// does not actually come from the disk, but it's opaque to the code being
1767 /// tested.
1760 /// tested.
1768 #[test]
1761 #[test]
1769 fn test_on_disk() -> Result<(), DirstateError> {
1762 fn test_on_disk() -> Result<(), DirstateError> {
1770 // First let's create some data to put "on disk"
1763 // First let's create some data to put "on disk"
1771 let mut map = OwningDirstateMap::new_empty(vec![]);
1764 let mut map = OwningDirstateMap::new_empty(vec![]);
1772
1765
1773 // A file that was just added
1766 // A file that was just added
1774 map.set_tracked(p(b"some/nested/added"))?;
1767 map.set_tracked(p(b"some/nested/added"))?;
1775 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1768 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1776
1769
1777 // A file that was removed
1770 // A file that was removed
1778 map.reset_state(
1771 map.reset_state(
1779 p(b"some/nested/removed"),
1772 p(b"some/nested/removed"),
1780 false,
1773 false,
1781 true,
1774 true,
1782 false,
1775 false,
1783 false,
1776 false,
1784 None,
1777 None,
1785 )?;
1778 )?;
1786 // Only present in p2
1779 // Only present in p2
1787 map.reset_state(
1780 map.reset_state(
1788 p(b"other/p2_info_only"),
1781 p(b"other/p2_info_only"),
1789 false,
1782 false,
1790 false,
1783 false,
1791 true,
1784 true,
1792 false,
1785 false,
1793 None,
1786 None,
1794 )?;
1787 )?;
1795 map.copy_map_insert(
1788 map.copy_map_insert(
1796 p(b"other/p2_info_only"),
1789 p(b"other/p2_info_only"),
1797 p(b"other/p2_info_copy_source"),
1790 p(b"other/p2_info_copy_source"),
1798 )?;
1791 )?;
1799 // A file that was merged
1792 // A file that was merged
1800 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1793 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1801 // A file that is added, with info from p2
1794 // A file that is added, with info from p2
1802 // XXX is that actually possible?
1795 // XXX is that actually possible?
1803 map.reset_state(
1796 map.reset_state(
1804 p(b"other/added_with_p2"),
1797 p(b"other/added_with_p2"),
1805 true,
1798 true,
1806 false,
1799 false,
1807 true,
1800 true,
1808 false,
1801 false,
1809 None,
1802 None,
1810 )?;
1803 )?;
1811 // One layer without any files to test deletion cascade
1804 // One layer without any files to test deletion cascade
1812 // A clean file
1805 // A clean file
1813 map.reset_state(
1806 map.reset_state(
1814 p(b"some/other/nested/clean"),
1807 p(b"some/other/nested/clean"),
1815 true,
1808 true,
1816 true,
1809 true,
1817 false,
1810 false,
1818 false,
1811 false,
1819 None,
1812 None,
1820 )?;
1813 )?;
1821
1814
1822 let (packed, metadata, _should_append, _old_data_size) =
1815 let (packed, metadata, _should_append, _old_data_size) =
1823 map.pack_v2(false)?;
1816 map.pack_v2(false)?;
1824 let packed_len = packed.len();
1817 let packed_len = packed.len();
1825 assert!(packed_len > 0);
1818 assert!(packed_len > 0);
1826
1819
1827 // Recreate "from disk"
1820 // Recreate "from disk"
1828 let mut map = OwningDirstateMap::new_v2(
1821 let mut map = OwningDirstateMap::new_v2(
1829 packed,
1822 packed,
1830 packed_len,
1823 packed_len,
1831 metadata.as_bytes(),
1824 metadata.as_bytes(),
1832 )?;
1825 )?;
1833
1826
1834 // Check that everything is accounted for
1827 // Check that everything is accounted for
1835 assert!(map.contains_key(p(b"some/nested/added"))?);
1828 assert!(map.contains_key(p(b"some/nested/added"))?);
1836 assert!(map.contains_key(p(b"some/nested/removed"))?);
1829 assert!(map.contains_key(p(b"some/nested/removed"))?);
1837 assert!(map.contains_key(p(b"merged"))?);
1830 assert!(map.contains_key(p(b"merged"))?);
1838 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1831 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1839 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1832 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1840 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1833 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1841 assert_eq!(
1834 assert_eq!(
1842 map.copy_map_get(p(b"some/nested/added"))?,
1835 map.copy_map_get(p(b"some/nested/added"))?,
1843 Some(p(b"added_copy_source"))
1836 Some(p(b"added_copy_source"))
1844 );
1837 );
1845 assert_eq!(
1838 assert_eq!(
1846 map.copy_map_get(p(b"other/p2_info_only"))?,
1839 map.copy_map_get(p(b"other/p2_info_only"))?,
1847 Some(p(b"other/p2_info_copy_source"))
1840 Some(p(b"other/p2_info_copy_source"))
1848 );
1841 );
1849 assert_eq!(tracked_descendants(&map, b"some"), 2);
1842 assert_eq!(tracked_descendants(&map, b"some"), 2);
1850 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1843 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1851 assert_eq!(tracked_descendants(&map, b"other"), 1);
1844 assert_eq!(tracked_descendants(&map, b"other"), 1);
1852 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1845 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1853 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1846 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1854 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1847 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1855 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1848 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1856 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1849 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1857 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1850 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1858 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1851 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1859 assert_eq!(map.len(), 6);
1852 assert_eq!(map.len(), 6);
1860 assert_eq!(map.get_map().unreachable_bytes, 0);
1853 assert_eq!(map.get_map().unreachable_bytes, 0);
1861 assert_eq!(map.copy_map_len(), 2);
1854 assert_eq!(map.copy_map_len(), 2);
1862
1855
1863 // Shouldn't change anything since it's already not tracked
1856 // Shouldn't change anything since it's already not tracked
1864 map.set_untracked(p(b"some/nested/removed"))?;
1857 map.set_untracked(p(b"some/nested/removed"))?;
1865 assert_eq!(map.get_map().unreachable_bytes, 0);
1858 assert_eq!(map.get_map().unreachable_bytes, 0);
1866
1859
1867 match map.get_map().root {
1860 if let ChildNodes::InMemory(_) = map.get_map().root {
1868 ChildNodes::InMemory(_) => {
1861 panic!("root should not have been mutated")
1869 panic!("root should not have been mutated")
1870 }
1871 _ => (),
1872 }
1862 }
1873 // We haven't mutated enough (nothing, actually), we should still be in
1863 // We haven't mutated enough (nothing, actually), we should still be in
1874 // the append strategy
1864 // the append strategy
1875 assert!(map.get_map().write_should_append());
1865 assert!(map.get_map().write_should_append());
1876
1866
1877 // But this mutates the structure, so there should be unreachable_bytes
1867 // But this mutates the structure, so there should be unreachable_bytes
1878 assert!(map.set_untracked(p(b"some/nested/added"))?);
1868 assert!(map.set_untracked(p(b"some/nested/added"))?);
1879 let unreachable_bytes = map.get_map().unreachable_bytes;
1869 let unreachable_bytes = map.get_map().unreachable_bytes;
1880 assert!(unreachable_bytes > 0);
1870 assert!(unreachable_bytes > 0);
1881
1871
1882 match map.get_map().root {
1872 if let ChildNodes::OnDisk(_) = map.get_map().root {
1883 ChildNodes::OnDisk(_) => panic!("root should have been mutated"),
1873 panic!("root should have been mutated")
1884 _ => (),
1885 }
1874 }
1886
1875
1887 // This should not mutate the structure either, since `root` has
1876 // This should not mutate the structure either, since `root` has
1888 // already been mutated along with its direct children.
1877 // already been mutated along with its direct children.
1889 map.set_untracked(p(b"merged"))?;
1878 map.set_untracked(p(b"merged"))?;
1890 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1879 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1891
1880
1892 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1881 if let NodeRef::InMemory(_, _) =
1893 NodeRef::InMemory(_, _) => {
1882 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1894 panic!("'other/added_with_p2' should not have been mutated")
1883 {
1895 }
1884 panic!("'other/added_with_p2' should not have been mutated")
1896 _ => (),
1897 }
1885 }
1898 // But this should, since it's in a different path
1886 // But this should, since it's in a different path
1899 // than `<root>some/nested/add`
1887 // than `<root>some/nested/add`
1900 map.set_untracked(p(b"other/added_with_p2"))?;
1888 map.set_untracked(p(b"other/added_with_p2"))?;
1901 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1889 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1902
1890
1903 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1891 if let NodeRef::OnDisk(_) =
1904 NodeRef::OnDisk(_) => {
1892 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1905 panic!("'other/added_with_p2' should have been mutated")
1893 {
1906 }
1894 panic!("'other/added_with_p2' should have been mutated")
1907 _ => (),
1908 }
1895 }
1909
1896
1910 // We have rewritten most of the tree, we should create a new file
1897 // We have rewritten most of the tree, we should create a new file
1911 assert!(!map.get_map().write_should_append());
1898 assert!(!map.get_map().write_should_append());
1912
1899
1913 Ok(())
1900 Ok(())
1914 }
1901 }
1915 }
1902 }
@@ -1,877 +1,875 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
8 use crate::dirstate_tree::path_with_basename::WithBasename;
8 use crate::dirstate_tree::path_with_basename::WithBasename;
9 use crate::errors::HgError;
9 use crate::errors::HgError;
10 use crate::utils::hg_path::HgPath;
10 use crate::utils::hg_path::HgPath;
11 use crate::DirstateEntry;
11 use crate::DirstateEntry;
12 use crate::DirstateError;
12 use crate::DirstateError;
13 use crate::DirstateParents;
13 use crate::DirstateParents;
14 use bitflags::bitflags;
14 use bitflags::bitflags;
15 use bytes_cast::unaligned::{U16Be, U32Be};
15 use bytes_cast::unaligned::{U16Be, U32Be};
16 use bytes_cast::BytesCast;
16 use bytes_cast::BytesCast;
17 use format_bytes::format_bytes;
17 use format_bytes::format_bytes;
18 use rand::Rng;
18 use rand::Rng;
19 use std::borrow::Cow;
19 use std::borrow::Cow;
20 use std::fmt::Write;
20 use std::fmt::Write;
21
21
22 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
22 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 /// This a redundant sanity check more than an actual "magic number" since
23 /// This a redundant sanity check more than an actual "magic number" since
24 /// `.hg/requires` already governs which format should be used.
24 /// `.hg/requires` already governs which format should be used.
25 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
25 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26
26
27 /// Keep space for 256-bit hashes
27 /// Keep space for 256-bit hashes
28 const STORED_NODE_ID_BYTES: usize = 32;
28 const STORED_NODE_ID_BYTES: usize = 32;
29
29
30 /// … even though only 160 bits are used for now, with SHA-1
30 /// … even though only 160 bits are used for now, with SHA-1
31 const USED_NODE_ID_BYTES: usize = 20;
31 const USED_NODE_ID_BYTES: usize = 20;
32
32
33 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
33 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
34 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35
35
36 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
36 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 const TREE_METADATA_SIZE: usize = 44;
37 const TREE_METADATA_SIZE: usize = 44;
38 const NODE_SIZE: usize = 44;
38 const NODE_SIZE: usize = 44;
39
39
40 /// Make sure that size-affecting changes are made knowingly
40 /// Make sure that size-affecting changes are made knowingly
41 #[allow(unused)]
41 #[allow(unused)]
42 fn static_assert_size_of() {
42 fn static_assert_size_of() {
43 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
43 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
44 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
45 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 }
46 }
47
47
48 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
48 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 #[derive(BytesCast)]
49 #[derive(BytesCast)]
50 #[repr(C)]
50 #[repr(C)]
51 struct DocketHeader {
51 struct DocketHeader {
52 marker: [u8; V2_FORMAT_MARKER.len()],
52 marker: [u8; V2_FORMAT_MARKER.len()],
53 parent_1: [u8; STORED_NODE_ID_BYTES],
53 parent_1: [u8; STORED_NODE_ID_BYTES],
54 parent_2: [u8; STORED_NODE_ID_BYTES],
54 parent_2: [u8; STORED_NODE_ID_BYTES],
55
55
56 metadata: TreeMetadata,
56 metadata: TreeMetadata,
57
57
58 /// Counted in bytes
58 /// Counted in bytes
59 data_size: Size,
59 data_size: Size,
60
60
61 uuid_size: u8,
61 uuid_size: u8,
62 }
62 }
63
63
64 pub struct Docket<'on_disk> {
64 pub struct Docket<'on_disk> {
65 header: &'on_disk DocketHeader,
65 header: &'on_disk DocketHeader,
66 pub uuid: &'on_disk [u8],
66 pub uuid: &'on_disk [u8],
67 }
67 }
68
68
69 /// Fields are documented in the *Tree metadata in the docket file*
69 /// Fields are documented in the *Tree metadata in the docket file*
70 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
70 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 #[derive(BytesCast)]
71 #[derive(BytesCast)]
72 #[repr(C)]
72 #[repr(C)]
73 pub struct TreeMetadata {
73 pub struct TreeMetadata {
74 root_nodes: ChildNodes,
74 root_nodes: ChildNodes,
75 nodes_with_entry_count: Size,
75 nodes_with_entry_count: Size,
76 nodes_with_copy_source_count: Size,
76 nodes_with_copy_source_count: Size,
77 unreachable_bytes: Size,
77 unreachable_bytes: Size,
78 unused: [u8; 4],
78 unused: [u8; 4],
79
79
80 /// See *Optional hash of ignore patterns* section of
80 /// See *Optional hash of ignore patterns* section of
81 /// `mercurial/helptext/internals/dirstate-v2.txt`
81 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 ignore_patterns_hash: IgnorePatternsHash,
82 ignore_patterns_hash: IgnorePatternsHash,
83 }
83 }
84
84
85 /// Fields are documented in the *The data file format*
85 /// Fields are documented in the *The data file format*
86 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
86 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 #[derive(BytesCast, Debug)]
87 #[derive(BytesCast, Debug)]
88 #[repr(C)]
88 #[repr(C)]
89 pub(super) struct Node {
89 pub(super) struct Node {
90 full_path: PathSlice,
90 full_path: PathSlice,
91
91
92 /// In bytes from `self.full_path.start`
92 /// In bytes from `self.full_path.start`
93 base_name_start: PathSize,
93 base_name_start: PathSize,
94
94
95 copy_source: OptPathSlice,
95 copy_source: OptPathSlice,
96 children: ChildNodes,
96 children: ChildNodes,
97 pub(super) descendants_with_entry_count: Size,
97 pub(super) descendants_with_entry_count: Size,
98 pub(super) tracked_descendants_count: Size,
98 pub(super) tracked_descendants_count: Size,
99 flags: U16Be,
99 flags: U16Be,
100 size: U32Be,
100 size: U32Be,
101 mtime: PackedTruncatedTimestamp,
101 mtime: PackedTruncatedTimestamp,
102 }
102 }
103
103
104 bitflags! {
104 bitflags! {
105 #[repr(C)]
105 #[repr(C)]
106 struct Flags: u16 {
106 struct Flags: u16 {
107 const WDIR_TRACKED = 1 << 0;
107 const WDIR_TRACKED = 1 << 0;
108 const P1_TRACKED = 1 << 1;
108 const P1_TRACKED = 1 << 1;
109 const P2_INFO = 1 << 2;
109 const P2_INFO = 1 << 2;
110 const MODE_EXEC_PERM = 1 << 3;
110 const MODE_EXEC_PERM = 1 << 3;
111 const MODE_IS_SYMLINK = 1 << 4;
111 const MODE_IS_SYMLINK = 1 << 4;
112 const HAS_FALLBACK_EXEC = 1 << 5;
112 const HAS_FALLBACK_EXEC = 1 << 5;
113 const FALLBACK_EXEC = 1 << 6;
113 const FALLBACK_EXEC = 1 << 6;
114 const HAS_FALLBACK_SYMLINK = 1 << 7;
114 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 const FALLBACK_SYMLINK = 1 << 8;
115 const FALLBACK_SYMLINK = 1 << 8;
116 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
116 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 const HAS_MODE_AND_SIZE = 1 <<10;
117 const HAS_MODE_AND_SIZE = 1 <<10;
118 const HAS_MTIME = 1 <<11;
118 const HAS_MTIME = 1 <<11;
119 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
119 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 const DIRECTORY = 1 <<13;
120 const DIRECTORY = 1 <<13;
121 const ALL_UNKNOWN_RECORDED = 1 <<14;
121 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 const ALL_IGNORED_RECORDED = 1 <<15;
122 const ALL_IGNORED_RECORDED = 1 <<15;
123 }
123 }
124 }
124 }
125
125
126 /// Duration since the Unix epoch
126 /// Duration since the Unix epoch
127 #[derive(BytesCast, Copy, Clone, Debug)]
127 #[derive(BytesCast, Copy, Clone, Debug)]
128 #[repr(C)]
128 #[repr(C)]
129 struct PackedTruncatedTimestamp {
129 struct PackedTruncatedTimestamp {
130 truncated_seconds: U32Be,
130 truncated_seconds: U32Be,
131 nanoseconds: U32Be,
131 nanoseconds: U32Be,
132 }
132 }
133
133
134 /// Counted in bytes from the start of the file
134 /// Counted in bytes from the start of the file
135 ///
135 ///
136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 type Offset = U32Be;
137 type Offset = U32Be;
138
138
139 /// Counted in number of items
139 /// Counted in number of items
140 ///
140 ///
141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 type Size = U32Be;
142 type Size = U32Be;
143
143
144 /// Counted in bytes
144 /// Counted in bytes
145 ///
145 ///
146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 type PathSize = U16Be;
147 type PathSize = U16Be;
148
148
149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 /// of either some other node or of the repository root.
150 /// of either some other node or of the repository root.
151 ///
151 ///
152 /// Always sorted by ascending `full_path`, to allow binary search.
152 /// Always sorted by ascending `full_path`, to allow binary search.
153 /// Since nodes with the same parent nodes also have the same parent path,
153 /// Since nodes with the same parent nodes also have the same parent path,
154 /// only the `base_name`s need to be compared during binary search.
154 /// only the `base_name`s need to be compared during binary search.
155 #[derive(BytesCast, Copy, Clone, Debug)]
155 #[derive(BytesCast, Copy, Clone, Debug)]
156 #[repr(C)]
156 #[repr(C)]
157 struct ChildNodes {
157 struct ChildNodes {
158 start: Offset,
158 start: Offset,
159 len: Size,
159 len: Size,
160 }
160 }
161
161
162 /// A `HgPath` of `len` bytes
162 /// A `HgPath` of `len` bytes
163 #[derive(BytesCast, Copy, Clone, Debug)]
163 #[derive(BytesCast, Copy, Clone, Debug)]
164 #[repr(C)]
164 #[repr(C)]
165 struct PathSlice {
165 struct PathSlice {
166 start: Offset,
166 start: Offset,
167 len: PathSize,
167 len: PathSize,
168 }
168 }
169
169
170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 type OptPathSlice = PathSlice;
171 type OptPathSlice = PathSlice;
172
172
173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 ///
174 ///
175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 #[derive(Debug)]
176 #[derive(Debug)]
177 pub struct DirstateV2ParseError {
177 pub struct DirstateV2ParseError {
178 message: String,
178 message: String,
179 }
179 }
180
180
181 impl DirstateV2ParseError {
181 impl DirstateV2ParseError {
182 pub fn new<S: Into<String>>(message: S) -> Self {
182 pub fn new<S: Into<String>>(message: S) -> Self {
183 Self {
183 Self {
184 message: message.into(),
184 message: message.into(),
185 }
185 }
186 }
186 }
187 }
187 }
188
188
189 impl From<DirstateV2ParseError> for HgError {
189 impl From<DirstateV2ParseError> for HgError {
190 fn from(e: DirstateV2ParseError) -> Self {
190 fn from(e: DirstateV2ParseError) -> Self {
191 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
191 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
192 }
192 }
193 }
193 }
194
194
195 impl From<DirstateV2ParseError> for crate::DirstateError {
195 impl From<DirstateV2ParseError> for crate::DirstateError {
196 fn from(error: DirstateV2ParseError) -> Self {
196 fn from(error: DirstateV2ParseError) -> Self {
197 HgError::from(error).into()
197 HgError::from(error).into()
198 }
198 }
199 }
199 }
200
200
201 impl TreeMetadata {
201 impl TreeMetadata {
202 pub fn as_bytes(&self) -> &[u8] {
202 pub fn as_bytes(&self) -> &[u8] {
203 BytesCast::as_bytes(self)
203 BytesCast::as_bytes(self)
204 }
204 }
205 }
205 }
206
206
207 impl<'on_disk> Docket<'on_disk> {
207 impl<'on_disk> Docket<'on_disk> {
208 /// Generate the identifier for a new data file
208 /// Generate the identifier for a new data file
209 ///
209 ///
210 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
210 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
211 /// See `mercurial/revlogutils/docket.py`
211 /// See `mercurial/revlogutils/docket.py`
212 pub fn new_uid() -> String {
212 pub fn new_uid() -> String {
213 const ID_LENGTH: usize = 8;
213 const ID_LENGTH: usize = 8;
214 let mut id = String::with_capacity(ID_LENGTH);
214 let mut id = String::with_capacity(ID_LENGTH);
215 let mut rng = rand::thread_rng();
215 let mut rng = rand::thread_rng();
216 for _ in 0..ID_LENGTH {
216 for _ in 0..ID_LENGTH {
217 // One random hexadecimal digit.
217 // One random hexadecimal digit.
218 // `unwrap` never panics because `impl Write for String`
218 // `unwrap` never panics because `impl Write for String`
219 // never returns an error.
219 // never returns an error.
220 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
220 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
221 }
221 }
222 id
222 id
223 }
223 }
224
224
225 pub fn serialize(
225 pub fn serialize(
226 parents: DirstateParents,
226 parents: DirstateParents,
227 tree_metadata: TreeMetadata,
227 tree_metadata: TreeMetadata,
228 data_size: u64,
228 data_size: u64,
229 uuid: &[u8],
229 uuid: &[u8],
230 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
230 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
231 let header = DocketHeader {
231 let header = DocketHeader {
232 marker: *V2_FORMAT_MARKER,
232 marker: *V2_FORMAT_MARKER,
233 parent_1: parents.p1.pad_to_256_bits(),
233 parent_1: parents.p1.pad_to_256_bits(),
234 parent_2: parents.p2.pad_to_256_bits(),
234 parent_2: parents.p2.pad_to_256_bits(),
235 metadata: tree_metadata,
235 metadata: tree_metadata,
236 data_size: u32::try_from(data_size)?.into(),
236 data_size: u32::try_from(data_size)?.into(),
237 uuid_size: uuid.len().try_into()?,
237 uuid_size: uuid.len().try_into()?,
238 };
238 };
239 let header = header.as_bytes();
239 let header = header.as_bytes();
240 let mut docket = Vec::with_capacity(header.len() + uuid.len());
240 let mut docket = Vec::with_capacity(header.len() + uuid.len());
241 docket.extend_from_slice(header);
241 docket.extend_from_slice(header);
242 docket.extend_from_slice(uuid);
242 docket.extend_from_slice(uuid);
243 Ok(docket)
243 Ok(docket)
244 }
244 }
245
245
246 pub fn parents(&self) -> DirstateParents {
246 pub fn parents(&self) -> DirstateParents {
247 use crate::Node;
247 use crate::Node;
248 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
248 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
249 .unwrap()
249 .unwrap();
250 .clone();
251 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
250 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
252 .unwrap()
251 .unwrap();
253 .clone();
254 DirstateParents { p1, p2 }
252 DirstateParents { p1, p2 }
255 }
253 }
256
254
257 pub fn tree_metadata(&self) -> &[u8] {
255 pub fn tree_metadata(&self) -> &[u8] {
258 self.header.metadata.as_bytes()
256 self.header.metadata.as_bytes()
259 }
257 }
260
258
261 pub fn data_size(&self) -> usize {
259 pub fn data_size(&self) -> usize {
262 // This `unwrap` could only panic on a 16-bit CPU
260 // This `unwrap` could only panic on a 16-bit CPU
263 self.header.data_size.get().try_into().unwrap()
261 self.header.data_size.get().try_into().unwrap()
264 }
262 }
265
263
266 pub fn data_filename(&self) -> String {
264 pub fn data_filename(&self) -> String {
267 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
265 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
268 }
266 }
269 }
267 }
270
268
271 pub fn read_docket(
269 pub fn read_docket(
272 on_disk: &[u8],
270 on_disk: &[u8],
273 ) -> Result<Docket<'_>, DirstateV2ParseError> {
271 ) -> Result<Docket<'_>, DirstateV2ParseError> {
274 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
272 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
275 DirstateV2ParseError::new(format!("when reading docket, {}", e))
273 DirstateV2ParseError::new(format!("when reading docket, {}", e))
276 })?;
274 })?;
277 let uuid_size = header.uuid_size as usize;
275 let uuid_size = header.uuid_size as usize;
278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
276 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
279 Ok(Docket { header, uuid })
277 Ok(Docket { header, uuid })
280 } else {
278 } else {
281 Err(DirstateV2ParseError::new(
279 Err(DirstateV2ParseError::new(
282 "invalid format marker or uuid size",
280 "invalid format marker or uuid size",
283 ))
281 ))
284 }
282 }
285 }
283 }
286
284
287 pub(super) fn read<'on_disk>(
285 pub(super) fn read<'on_disk>(
288 on_disk: &'on_disk [u8],
286 on_disk: &'on_disk [u8],
289 metadata: &[u8],
287 metadata: &[u8],
290 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
288 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
291 if on_disk.is_empty() {
289 if on_disk.is_empty() {
292 let mut map = DirstateMap::empty(on_disk);
290 let mut map = DirstateMap::empty(on_disk);
293 map.dirstate_version = DirstateVersion::V2;
291 map.dirstate_version = DirstateVersion::V2;
294 return Ok(map);
292 return Ok(map);
295 }
293 }
296 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
294 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
297 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
295 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
298 })?;
296 })?;
299 let dirstate_map = DirstateMap {
297 let dirstate_map = DirstateMap {
300 on_disk,
298 on_disk,
301 root: dirstate_map::ChildNodes::OnDisk(
299 root: dirstate_map::ChildNodes::OnDisk(
302 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
300 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
303 e.message = format!("{}, when reading root notes", e.message);
301 e.message = format!("{}, when reading root notes", e.message);
304 e
302 e
305 })?,
303 })?,
306 ),
304 ),
307 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
305 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
308 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
306 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
309 ignore_patterns_hash: meta.ignore_patterns_hash,
307 ignore_patterns_hash: meta.ignore_patterns_hash,
310 unreachable_bytes: meta.unreachable_bytes.get(),
308 unreachable_bytes: meta.unreachable_bytes.get(),
311 old_data_size: on_disk.len(),
309 old_data_size: on_disk.len(),
312 dirstate_version: DirstateVersion::V2,
310 dirstate_version: DirstateVersion::V2,
313 };
311 };
314 Ok(dirstate_map)
312 Ok(dirstate_map)
315 }
313 }
316
314
317 impl Node {
315 impl Node {
318 pub(super) fn full_path<'on_disk>(
316 pub(super) fn full_path<'on_disk>(
319 &self,
317 &self,
320 on_disk: &'on_disk [u8],
318 on_disk: &'on_disk [u8],
321 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
319 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
322 read_hg_path(on_disk, self.full_path)
320 read_hg_path(on_disk, self.full_path)
323 }
321 }
324
322
325 pub(super) fn base_name_start<'on_disk>(
323 pub(super) fn base_name_start(
326 &self,
324 &self,
327 ) -> Result<usize, DirstateV2ParseError> {
325 ) -> Result<usize, DirstateV2ParseError> {
328 let start = self.base_name_start.get();
326 let start = self.base_name_start.get();
329 if start < self.full_path.len.get() {
327 if start < self.full_path.len.get() {
330 let start = usize::try_from(start)
328 let start = usize::try_from(start)
331 // u32 -> usize, could only panic on a 16-bit CPU
329 // u32 -> usize, could only panic on a 16-bit CPU
332 .expect("dirstate-v2 base_name_start out of bounds");
330 .expect("dirstate-v2 base_name_start out of bounds");
333 Ok(start)
331 Ok(start)
334 } else {
332 } else {
335 Err(DirstateV2ParseError::new("not enough bytes for base name"))
333 Err(DirstateV2ParseError::new("not enough bytes for base name"))
336 }
334 }
337 }
335 }
338
336
339 pub(super) fn base_name<'on_disk>(
337 pub(super) fn base_name<'on_disk>(
340 &self,
338 &self,
341 on_disk: &'on_disk [u8],
339 on_disk: &'on_disk [u8],
342 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
340 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
343 let full_path = self.full_path(on_disk)?;
341 let full_path = self.full_path(on_disk)?;
344 let base_name_start = self.base_name_start()?;
342 let base_name_start = self.base_name_start()?;
345 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
343 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
346 }
344 }
347
345
348 pub(super) fn path<'on_disk>(
346 pub(super) fn path<'on_disk>(
349 &self,
347 &self,
350 on_disk: &'on_disk [u8],
348 on_disk: &'on_disk [u8],
351 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
349 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
352 Ok(WithBasename::from_raw_parts(
350 Ok(WithBasename::from_raw_parts(
353 Cow::Borrowed(self.full_path(on_disk)?),
351 Cow::Borrowed(self.full_path(on_disk)?),
354 self.base_name_start()?,
352 self.base_name_start()?,
355 ))
353 ))
356 }
354 }
357
355
358 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
356 pub(super) fn has_copy_source(&self) -> bool {
359 self.copy_source.start.get() != 0
357 self.copy_source.start.get() != 0
360 }
358 }
361
359
362 pub(super) fn copy_source<'on_disk>(
360 pub(super) fn copy_source<'on_disk>(
363 &self,
361 &self,
364 on_disk: &'on_disk [u8],
362 on_disk: &'on_disk [u8],
365 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
363 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
366 Ok(if self.has_copy_source() {
364 Ok(if self.has_copy_source() {
367 Some(read_hg_path(on_disk, self.copy_source)?)
365 Some(read_hg_path(on_disk, self.copy_source)?)
368 } else {
366 } else {
369 None
367 None
370 })
368 })
371 }
369 }
372
370
373 fn flags(&self) -> Flags {
371 fn flags(&self) -> Flags {
374 Flags::from_bits_truncate(self.flags.get())
372 Flags::from_bits_truncate(self.flags.get())
375 }
373 }
376
374
377 fn has_entry(&self) -> bool {
375 fn has_entry(&self) -> bool {
378 self.flags().intersects(
376 self.flags().intersects(
379 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
377 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
380 )
378 )
381 }
379 }
382
380
383 pub(super) fn node_data(
381 pub(super) fn node_data(
384 &self,
382 &self,
385 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
383 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
386 if self.has_entry() {
384 if self.has_entry() {
387 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
385 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
388 } else if let Some(mtime) = self.cached_directory_mtime()? {
386 } else if let Some(mtime) = self.cached_directory_mtime()? {
389 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
387 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
390 } else {
388 } else {
391 Ok(dirstate_map::NodeData::None)
389 Ok(dirstate_map::NodeData::None)
392 }
390 }
393 }
391 }
394
392
395 pub(super) fn cached_directory_mtime(
393 pub(super) fn cached_directory_mtime(
396 &self,
394 &self,
397 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
395 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
398 // For now we do not have code to handle the absence of
396 // For now we do not have code to handle the absence of
399 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
397 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
400 // unset.
398 // unset.
401 if self.flags().contains(Flags::DIRECTORY)
399 if self.flags().contains(Flags::DIRECTORY)
402 && self.flags().contains(Flags::HAS_MTIME)
400 && self.flags().contains(Flags::HAS_MTIME)
403 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
401 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
404 {
402 {
405 Ok(Some(self.mtime()?))
403 Ok(Some(self.mtime()?))
406 } else {
404 } else {
407 Ok(None)
405 Ok(None)
408 }
406 }
409 }
407 }
410
408
411 fn synthesize_unix_mode(&self) -> u32 {
409 fn synthesize_unix_mode(&self) -> u32 {
412 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
410 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
413 libc::S_IFLNK
411 libc::S_IFLNK
414 } else {
412 } else {
415 libc::S_IFREG
413 libc::S_IFREG
416 };
414 };
417 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
415 let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
418 0o755
416 0o755
419 } else {
417 } else {
420 0o644
418 0o644
421 };
419 };
422 (file_type | permisions).into()
420 file_type | permissions
423 }
421 }
424
422
425 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
423 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
426 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
424 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
427 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
425 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
428 m.second_ambiguous = true;
426 m.second_ambiguous = true;
429 }
427 }
430 Ok(m)
428 Ok(m)
431 }
429 }
432
430
433 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
431 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
434 // TODO: convert through raw bits instead?
432 // TODO: convert through raw bits instead?
435 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
433 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
436 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
434 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
437 let p2_info = self.flags().contains(Flags::P2_INFO);
435 let p2_info = self.flags().contains(Flags::P2_INFO);
438 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
436 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
439 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
437 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
440 {
438 {
441 Some((self.synthesize_unix_mode(), self.size.into()))
439 Some((self.synthesize_unix_mode(), self.size.into()))
442 } else {
440 } else {
443 None
441 None
444 };
442 };
445 let mtime = if self.flags().contains(Flags::HAS_MTIME)
443 let mtime = if self.flags().contains(Flags::HAS_MTIME)
446 && !self.flags().contains(Flags::DIRECTORY)
444 && !self.flags().contains(Flags::DIRECTORY)
447 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
445 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
448 {
446 {
449 Some(self.mtime()?)
447 Some(self.mtime()?)
450 } else {
448 } else {
451 None
449 None
452 };
450 };
453 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
451 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
454 {
452 {
455 Some(self.flags().contains(Flags::FALLBACK_EXEC))
453 Some(self.flags().contains(Flags::FALLBACK_EXEC))
456 } else {
454 } else {
457 None
455 None
458 };
456 };
459 let fallback_symlink =
457 let fallback_symlink =
460 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
458 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
461 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
459 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
462 } else {
460 } else {
463 None
461 None
464 };
462 };
465 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
463 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
466 wc_tracked,
464 wc_tracked,
467 p1_tracked,
465 p1_tracked,
468 p2_info,
466 p2_info,
469 mode_size,
467 mode_size,
470 mtime,
468 mtime,
471 fallback_exec,
469 fallback_exec,
472 fallback_symlink,
470 fallback_symlink,
473 }))
471 }))
474 }
472 }
475
473
476 pub(super) fn entry(
474 pub(super) fn entry(
477 &self,
475 &self,
478 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
476 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
479 if self.has_entry() {
477 if self.has_entry() {
480 Ok(Some(self.assume_entry()?))
478 Ok(Some(self.assume_entry()?))
481 } else {
479 } else {
482 Ok(None)
480 Ok(None)
483 }
481 }
484 }
482 }
485
483
486 pub(super) fn children<'on_disk>(
484 pub(super) fn children<'on_disk>(
487 &self,
485 &self,
488 on_disk: &'on_disk [u8],
486 on_disk: &'on_disk [u8],
489 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
487 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
490 read_nodes(on_disk, self.children)
488 read_nodes(on_disk, self.children)
491 }
489 }
492
490
493 pub(super) fn to_in_memory_node<'on_disk>(
491 pub(super) fn to_in_memory_node<'on_disk>(
494 &self,
492 &self,
495 on_disk: &'on_disk [u8],
493 on_disk: &'on_disk [u8],
496 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
494 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
497 Ok(dirstate_map::Node {
495 Ok(dirstate_map::Node {
498 children: dirstate_map::ChildNodes::OnDisk(
496 children: dirstate_map::ChildNodes::OnDisk(
499 self.children(on_disk)?,
497 self.children(on_disk)?,
500 ),
498 ),
501 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
499 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
502 data: self.node_data()?,
500 data: self.node_data()?,
503 descendants_with_entry_count: self
501 descendants_with_entry_count: self
504 .descendants_with_entry_count
502 .descendants_with_entry_count
505 .get(),
503 .get(),
506 tracked_descendants_count: self.tracked_descendants_count.get(),
504 tracked_descendants_count: self.tracked_descendants_count.get(),
507 })
505 })
508 }
506 }
509
507
510 fn from_dirstate_entry(
508 fn from_dirstate_entry(
511 entry: &DirstateEntry,
509 entry: &DirstateEntry,
512 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
510 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
513 let DirstateV2Data {
511 let DirstateV2Data {
514 wc_tracked,
512 wc_tracked,
515 p1_tracked,
513 p1_tracked,
516 p2_info,
514 p2_info,
517 mode_size: mode_size_opt,
515 mode_size: mode_size_opt,
518 mtime: mtime_opt,
516 mtime: mtime_opt,
519 fallback_exec,
517 fallback_exec,
520 fallback_symlink,
518 fallback_symlink,
521 } = entry.v2_data();
519 } = entry.v2_data();
522 // TODO: convert through raw flag bits instead?
520 // TODO: convert through raw flag bits instead?
523 let mut flags = Flags::empty();
521 let mut flags = Flags::empty();
524 flags.set(Flags::WDIR_TRACKED, wc_tracked);
522 flags.set(Flags::WDIR_TRACKED, wc_tracked);
525 flags.set(Flags::P1_TRACKED, p1_tracked);
523 flags.set(Flags::P1_TRACKED, p1_tracked);
526 flags.set(Flags::P2_INFO, p2_info);
524 flags.set(Flags::P2_INFO, p2_info);
527 let size = if let Some((m, s)) = mode_size_opt {
525 let size = if let Some((m, s)) = mode_size_opt {
528 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
526 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
529 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
527 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
530 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
528 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
531 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
529 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
532 flags.insert(Flags::HAS_MODE_AND_SIZE);
530 flags.insert(Flags::HAS_MODE_AND_SIZE);
533 s.into()
531 s.into()
534 } else {
532 } else {
535 0.into()
533 0.into()
536 };
534 };
537 let mtime = if let Some(m) = mtime_opt {
535 let mtime = if let Some(m) = mtime_opt {
538 flags.insert(Flags::HAS_MTIME);
536 flags.insert(Flags::HAS_MTIME);
539 if m.second_ambiguous {
537 if m.second_ambiguous {
540 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
538 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
541 };
539 };
542 m.into()
540 m.into()
543 } else {
541 } else {
544 PackedTruncatedTimestamp::null()
542 PackedTruncatedTimestamp::null()
545 };
543 };
546 if let Some(f_exec) = fallback_exec {
544 if let Some(f_exec) = fallback_exec {
547 flags.insert(Flags::HAS_FALLBACK_EXEC);
545 flags.insert(Flags::HAS_FALLBACK_EXEC);
548 if f_exec {
546 if f_exec {
549 flags.insert(Flags::FALLBACK_EXEC);
547 flags.insert(Flags::FALLBACK_EXEC);
550 }
548 }
551 }
549 }
552 if let Some(f_symlink) = fallback_symlink {
550 if let Some(f_symlink) = fallback_symlink {
553 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
551 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
554 if f_symlink {
552 if f_symlink {
555 flags.insert(Flags::FALLBACK_SYMLINK);
553 flags.insert(Flags::FALLBACK_SYMLINK);
556 }
554 }
557 }
555 }
558 (flags, size, mtime)
556 (flags, size, mtime)
559 }
557 }
560 }
558 }
561
559
562 fn read_hg_path(
560 fn read_hg_path(
563 on_disk: &[u8],
561 on_disk: &[u8],
564 slice: PathSlice,
562 slice: PathSlice,
565 ) -> Result<&HgPath, DirstateV2ParseError> {
563 ) -> Result<&HgPath, DirstateV2ParseError> {
566 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
564 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
567 }
565 }
568
566
569 fn read_nodes(
567 fn read_nodes(
570 on_disk: &[u8],
568 on_disk: &[u8],
571 slice: ChildNodes,
569 slice: ChildNodes,
572 ) -> Result<&[Node], DirstateV2ParseError> {
570 ) -> Result<&[Node], DirstateV2ParseError> {
573 read_slice(on_disk, slice.start, slice.len.get())
571 read_slice(on_disk, slice.start, slice.len.get())
574 }
572 }
575
573
576 fn read_slice<T, Len>(
574 fn read_slice<T, Len>(
577 on_disk: &[u8],
575 on_disk: &[u8],
578 start: Offset,
576 start: Offset,
579 len: Len,
577 len: Len,
580 ) -> Result<&[T], DirstateV2ParseError>
578 ) -> Result<&[T], DirstateV2ParseError>
581 where
579 where
582 T: BytesCast,
580 T: BytesCast,
583 Len: TryInto<usize>,
581 Len: TryInto<usize>,
584 {
582 {
585 // Either `usize::MAX` would result in "out of bounds" error since a single
583 // Either `usize::MAX` would result in "out of bounds" error since a single
586 // `&[u8]` cannot occupy the entire addess space.
584 // `&[u8]` cannot occupy the entire addess space.
587 let start = start.get().try_into().unwrap_or(std::usize::MAX);
585 let start = start.get().try_into().unwrap_or(std::usize::MAX);
588 let len = len.try_into().unwrap_or(std::usize::MAX);
586 let len = len.try_into().unwrap_or(std::usize::MAX);
589 let bytes = match on_disk.get(start..) {
587 let bytes = match on_disk.get(start..) {
590 Some(bytes) => bytes,
588 Some(bytes) => bytes,
591 None => {
589 None => {
592 return Err(DirstateV2ParseError::new(
590 return Err(DirstateV2ParseError::new(
593 "not enough bytes from disk",
591 "not enough bytes from disk",
594 ))
592 ))
595 }
593 }
596 };
594 };
597 T::slice_from_bytes(bytes, len)
595 T::slice_from_bytes(bytes, len)
598 .map_err(|e| {
596 .map_err(|e| {
599 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
597 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
600 })
598 })
601 .map(|(slice, _rest)| slice)
599 .map(|(slice, _rest)| slice)
602 }
600 }
603
601
604 pub(crate) fn for_each_tracked_path<'on_disk>(
602 pub(crate) fn for_each_tracked_path<'on_disk>(
605 on_disk: &'on_disk [u8],
603 on_disk: &'on_disk [u8],
606 metadata: &[u8],
604 metadata: &[u8],
607 mut f: impl FnMut(&'on_disk HgPath),
605 mut f: impl FnMut(&'on_disk HgPath),
608 ) -> Result<(), DirstateV2ParseError> {
606 ) -> Result<(), DirstateV2ParseError> {
609 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
607 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
610 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
608 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
611 })?;
609 })?;
612 fn recur<'on_disk>(
610 fn recur<'on_disk>(
613 on_disk: &'on_disk [u8],
611 on_disk: &'on_disk [u8],
614 nodes: ChildNodes,
612 nodes: ChildNodes,
615 f: &mut impl FnMut(&'on_disk HgPath),
613 f: &mut impl FnMut(&'on_disk HgPath),
616 ) -> Result<(), DirstateV2ParseError> {
614 ) -> Result<(), DirstateV2ParseError> {
617 for node in read_nodes(on_disk, nodes)? {
615 for node in read_nodes(on_disk, nodes)? {
618 if let Some(entry) = node.entry()? {
616 if let Some(entry) = node.entry()? {
619 if entry.tracked() {
617 if entry.tracked() {
620 f(node.full_path(on_disk)?)
618 f(node.full_path(on_disk)?)
621 }
619 }
622 }
620 }
623 recur(on_disk, node.children, f)?
621 recur(on_disk, node.children, f)?
624 }
622 }
625 Ok(())
623 Ok(())
626 }
624 }
627 recur(on_disk, meta.root_nodes, &mut f)
625 recur(on_disk, meta.root_nodes, &mut f)
628 }
626 }
629
627
630 /// Returns new data and metadata, together with whether that data should be
628 /// Returns new data and metadata, together with whether that data should be
631 /// appended to the existing data file whose content is at
629 /// appended to the existing data file whose content is at
632 /// `dirstate_map.on_disk` (true), instead of written to a new data file
630 /// `dirstate_map.on_disk` (true), instead of written to a new data file
633 /// (false), and the previous size of data on disk.
631 /// (false), and the previous size of data on disk.
634 pub(super) fn write(
632 pub(super) fn write(
635 dirstate_map: &DirstateMap,
633 dirstate_map: &DirstateMap,
636 can_append: bool,
634 can_append: bool,
637 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
635 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
638 let append = can_append && dirstate_map.write_should_append();
636 let append = can_append && dirstate_map.write_should_append();
639
637
640 // This ignores the space for paths, and for nodes without an entry.
638 // This ignores the space for paths, and for nodes without an entry.
641 // TODO: better estimate? Skip the `Vec` and write to a file directly?
639 // TODO: better estimate? Skip the `Vec` and write to a file directly?
642 let size_guess = std::mem::size_of::<Node>()
640 let size_guess = std::mem::size_of::<Node>()
643 * dirstate_map.nodes_with_entry_count as usize;
641 * dirstate_map.nodes_with_entry_count as usize;
644
642
645 let mut writer = Writer {
643 let mut writer = Writer {
646 dirstate_map,
644 dirstate_map,
647 append,
645 append,
648 out: Vec::with_capacity(size_guess),
646 out: Vec::with_capacity(size_guess),
649 };
647 };
650
648
651 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
649 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
652
650
653 let unreachable_bytes = if append {
651 let unreachable_bytes = if append {
654 dirstate_map.unreachable_bytes
652 dirstate_map.unreachable_bytes
655 } else {
653 } else {
656 0
654 0
657 };
655 };
658 let meta = TreeMetadata {
656 let meta = TreeMetadata {
659 root_nodes,
657 root_nodes,
660 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
658 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
661 nodes_with_copy_source_count: dirstate_map
659 nodes_with_copy_source_count: dirstate_map
662 .nodes_with_copy_source_count
660 .nodes_with_copy_source_count
663 .into(),
661 .into(),
664 unreachable_bytes: unreachable_bytes.into(),
662 unreachable_bytes: unreachable_bytes.into(),
665 unused: [0; 4],
663 unused: [0; 4],
666 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
664 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
667 };
665 };
668 Ok((writer.out, meta, append, dirstate_map.old_data_size))
666 Ok((writer.out, meta, append, dirstate_map.old_data_size))
669 }
667 }
670
668
671 struct Writer<'dmap, 'on_disk> {
669 struct Writer<'dmap, 'on_disk> {
672 dirstate_map: &'dmap DirstateMap<'on_disk>,
670 dirstate_map: &'dmap DirstateMap<'on_disk>,
673 append: bool,
671 append: bool,
674 out: Vec<u8>,
672 out: Vec<u8>,
675 }
673 }
676
674
677 impl Writer<'_, '_> {
675 impl Writer<'_, '_> {
678 fn write_nodes(
676 fn write_nodes(
679 &mut self,
677 &mut self,
680 nodes: dirstate_map::ChildNodesRef,
678 nodes: dirstate_map::ChildNodesRef,
681 ) -> Result<ChildNodes, DirstateError> {
679 ) -> Result<ChildNodes, DirstateError> {
682 // Reuse already-written nodes if possible
680 // Reuse already-written nodes if possible
683 if self.append {
681 if self.append {
684 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
682 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
685 let start = self.on_disk_offset_of(nodes_slice).expect(
683 let start = self.on_disk_offset_of(nodes_slice).expect(
686 "dirstate-v2 OnDisk nodes not found within on_disk",
684 "dirstate-v2 OnDisk nodes not found within on_disk",
687 );
685 );
688 let len = child_nodes_len_from_usize(nodes_slice.len());
686 let len = child_nodes_len_from_usize(nodes_slice.len());
689 return Ok(ChildNodes { start, len });
687 return Ok(ChildNodes { start, len });
690 }
688 }
691 }
689 }
692
690
693 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
691 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
694 // undefined iteration order. Sort to enable binary search in the
692 // undefined iteration order. Sort to enable binary search in the
695 // written file.
693 // written file.
696 let nodes = nodes.sorted();
694 let nodes = nodes.sorted();
697 let nodes_len = nodes.len();
695 let nodes_len = nodes.len();
698
696
699 // First accumulate serialized nodes in a `Vec`
697 // First accumulate serialized nodes in a `Vec`
700 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
698 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
701 for node in nodes {
699 for node in nodes {
702 let children =
700 let children =
703 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
701 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
704 let full_path = node.full_path(self.dirstate_map.on_disk)?;
702 let full_path = node.full_path(self.dirstate_map.on_disk)?;
705 let full_path = self.write_path(full_path.as_bytes());
703 let full_path = self.write_path(full_path.as_bytes());
706 let copy_source = if let Some(source) =
704 let copy_source = if let Some(source) =
707 node.copy_source(self.dirstate_map.on_disk)?
705 node.copy_source(self.dirstate_map.on_disk)?
708 {
706 {
709 self.write_path(source.as_bytes())
707 self.write_path(source.as_bytes())
710 } else {
708 } else {
711 PathSlice {
709 PathSlice {
712 start: 0.into(),
710 start: 0.into(),
713 len: 0.into(),
711 len: 0.into(),
714 }
712 }
715 };
713 };
716 on_disk_nodes.push(match node {
714 on_disk_nodes.push(match node {
717 NodeRef::InMemory(path, node) => {
715 NodeRef::InMemory(path, node) => {
718 let (flags, size, mtime) = match &node.data {
716 let (flags, size, mtime) = match &node.data {
719 dirstate_map::NodeData::Entry(entry) => {
717 dirstate_map::NodeData::Entry(entry) => {
720 Node::from_dirstate_entry(entry)
718 Node::from_dirstate_entry(entry)
721 }
719 }
722 dirstate_map::NodeData::CachedDirectory { mtime } => {
720 dirstate_map::NodeData::CachedDirectory { mtime } => {
723 // we currently never set a mtime if unknown file
721 // we currently never set a mtime if unknown file
724 // are present.
722 // are present.
725 // So if we have a mtime for a directory, we know
723 // So if we have a mtime for a directory, we know
726 // they are no unknown
724 // they are no unknown
727 // files and we
725 // files and we
728 // blindly set ALL_UNKNOWN_RECORDED.
726 // blindly set ALL_UNKNOWN_RECORDED.
729 //
727 //
730 // We never set ALL_IGNORED_RECORDED since we
728 // We never set ALL_IGNORED_RECORDED since we
731 // don't track that case
729 // don't track that case
732 // currently.
730 // currently.
733 let mut flags = Flags::DIRECTORY
731 let mut flags = Flags::DIRECTORY
734 | Flags::HAS_MTIME
732 | Flags::HAS_MTIME
735 | Flags::ALL_UNKNOWN_RECORDED;
733 | Flags::ALL_UNKNOWN_RECORDED;
736 if mtime.second_ambiguous {
734 if mtime.second_ambiguous {
737 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
735 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
738 }
736 }
739 (flags, 0.into(), (*mtime).into())
737 (flags, 0.into(), (*mtime).into())
740 }
738 }
741 dirstate_map::NodeData::None => (
739 dirstate_map::NodeData::None => (
742 Flags::DIRECTORY,
740 Flags::DIRECTORY,
743 0.into(),
741 0.into(),
744 PackedTruncatedTimestamp::null(),
742 PackedTruncatedTimestamp::null(),
745 ),
743 ),
746 };
744 };
747 Node {
745 Node {
748 children,
746 children,
749 copy_source,
747 copy_source,
750 full_path,
748 full_path,
751 base_name_start: u16::try_from(path.base_name_start())
749 base_name_start: u16::try_from(path.base_name_start())
752 // Could only panic for paths over 64 KiB
750 // Could only panic for paths over 64 KiB
753 .expect("dirstate-v2 path length overflow")
751 .expect("dirstate-v2 path length overflow")
754 .into(),
752 .into(),
755 descendants_with_entry_count: node
753 descendants_with_entry_count: node
756 .descendants_with_entry_count
754 .descendants_with_entry_count
757 .into(),
755 .into(),
758 tracked_descendants_count: node
756 tracked_descendants_count: node
759 .tracked_descendants_count
757 .tracked_descendants_count
760 .into(),
758 .into(),
761 flags: flags.bits().into(),
759 flags: flags.bits().into(),
762 size,
760 size,
763 mtime,
761 mtime,
764 }
762 }
765 }
763 }
766 NodeRef::OnDisk(node) => Node {
764 NodeRef::OnDisk(node) => Node {
767 children,
765 children,
768 copy_source,
766 copy_source,
769 full_path,
767 full_path,
770 ..*node
768 ..*node
771 },
769 },
772 })
770 })
773 }
771 }
774 // … so we can write them contiguously, after writing everything else
772 // … so we can write them contiguously, after writing everything else
775 // they refer to.
773 // they refer to.
776 let start = self.current_offset();
774 let start = self.current_offset();
777 let len = child_nodes_len_from_usize(nodes_len);
775 let len = child_nodes_len_from_usize(nodes_len);
778 self.out.extend(on_disk_nodes.as_bytes());
776 self.out.extend(on_disk_nodes.as_bytes());
779 Ok(ChildNodes { start, len })
777 Ok(ChildNodes { start, len })
780 }
778 }
781
779
782 /// If the given slice of items is within `on_disk`, returns its offset
780 /// If the given slice of items is within `on_disk`, returns its offset
783 /// from the start of `on_disk`.
781 /// from the start of `on_disk`.
784 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
782 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
785 where
783 where
786 T: BytesCast,
784 T: BytesCast,
787 {
785 {
788 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
786 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
789 let start = slice.as_ptr() as usize;
787 let start = slice.as_ptr() as usize;
790 let end = start + slice.len();
788 let end = start + slice.len();
791 start..=end
789 start..=end
792 }
790 }
793 let slice_addresses = address_range(slice.as_bytes());
791 let slice_addresses = address_range(slice.as_bytes());
794 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
792 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
795 if on_disk_addresses.contains(slice_addresses.start())
793 if on_disk_addresses.contains(slice_addresses.start())
796 && on_disk_addresses.contains(slice_addresses.end())
794 && on_disk_addresses.contains(slice_addresses.end())
797 {
795 {
798 let offset = slice_addresses.start() - on_disk_addresses.start();
796 let offset = slice_addresses.start() - on_disk_addresses.start();
799 Some(offset_from_usize(offset))
797 Some(offset_from_usize(offset))
800 } else {
798 } else {
801 None
799 None
802 }
800 }
803 }
801 }
804
802
805 fn current_offset(&mut self) -> Offset {
803 fn current_offset(&mut self) -> Offset {
806 let mut offset = self.out.len();
804 let mut offset = self.out.len();
807 if self.append {
805 if self.append {
808 offset += self.dirstate_map.on_disk.len()
806 offset += self.dirstate_map.on_disk.len()
809 }
807 }
810 offset_from_usize(offset)
808 offset_from_usize(offset)
811 }
809 }
812
810
813 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
811 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
814 let len = path_len_from_usize(slice.len());
812 let len = path_len_from_usize(slice.len());
815 // Reuse an already-written path if possible
813 // Reuse an already-written path if possible
816 if self.append {
814 if self.append {
817 if let Some(start) = self.on_disk_offset_of(slice) {
815 if let Some(start) = self.on_disk_offset_of(slice) {
818 return PathSlice { start, len };
816 return PathSlice { start, len };
819 }
817 }
820 }
818 }
821 let start = self.current_offset();
819 let start = self.current_offset();
822 self.out.extend(slice.as_bytes());
820 self.out.extend(slice.as_bytes());
823 PathSlice { start, len }
821 PathSlice { start, len }
824 }
822 }
825 }
823 }
826
824
827 fn offset_from_usize(x: usize) -> Offset {
825 fn offset_from_usize(x: usize) -> Offset {
828 u32::try_from(x)
826 u32::try_from(x)
829 // Could only panic for a dirstate file larger than 4 GiB
827 // Could only panic for a dirstate file larger than 4 GiB
830 .expect("dirstate-v2 offset overflow")
828 .expect("dirstate-v2 offset overflow")
831 .into()
829 .into()
832 }
830 }
833
831
834 fn child_nodes_len_from_usize(x: usize) -> Size {
832 fn child_nodes_len_from_usize(x: usize) -> Size {
835 u32::try_from(x)
833 u32::try_from(x)
836 // Could only panic with over 4 billion nodes
834 // Could only panic with over 4 billion nodes
837 .expect("dirstate-v2 slice length overflow")
835 .expect("dirstate-v2 slice length overflow")
838 .into()
836 .into()
839 }
837 }
840
838
841 fn path_len_from_usize(x: usize) -> PathSize {
839 fn path_len_from_usize(x: usize) -> PathSize {
842 u16::try_from(x)
840 u16::try_from(x)
843 // Could only panic for paths over 64 KiB
841 // Could only panic for paths over 64 KiB
844 .expect("dirstate-v2 path length overflow")
842 .expect("dirstate-v2 path length overflow")
845 .into()
843 .into()
846 }
844 }
847
845
848 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
846 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
849 fn from(timestamp: TruncatedTimestamp) -> Self {
847 fn from(timestamp: TruncatedTimestamp) -> Self {
850 Self {
848 Self {
851 truncated_seconds: timestamp.truncated_seconds().into(),
849 truncated_seconds: timestamp.truncated_seconds().into(),
852 nanoseconds: timestamp.nanoseconds().into(),
850 nanoseconds: timestamp.nanoseconds().into(),
853 }
851 }
854 }
852 }
855 }
853 }
856
854
857 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
855 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
858 type Error = DirstateV2ParseError;
856 type Error = DirstateV2ParseError;
859
857
860 fn try_from(
858 fn try_from(
861 timestamp: PackedTruncatedTimestamp,
859 timestamp: PackedTruncatedTimestamp,
862 ) -> Result<Self, Self::Error> {
860 ) -> Result<Self, Self::Error> {
863 Self::from_already_truncated(
861 Self::from_already_truncated(
864 timestamp.truncated_seconds.get(),
862 timestamp.truncated_seconds.get(),
865 timestamp.nanoseconds.get(),
863 timestamp.nanoseconds.get(),
866 false,
864 false,
867 )
865 )
868 }
866 }
869 }
867 }
870 impl PackedTruncatedTimestamp {
868 impl PackedTruncatedTimestamp {
871 fn null() -> Self {
869 fn null() -> Self {
872 Self {
870 Self {
873 truncated_seconds: 0.into(),
871 truncated_seconds: 0.into(),
874 nanoseconds: 0.into(),
872 nanoseconds: 0.into(),
875 }
873 }
876 }
874 }
877 }
875 }
@@ -1,89 +1,89 b''
1 use crate::{DirstateError, DirstateParents};
1 use crate::{DirstateError, DirstateParents};
2
2
3 use super::dirstate_map::DirstateMap;
3 use super::dirstate_map::DirstateMap;
4 use std::ops::Deref;
4 use std::ops::Deref;
5
5
6 use ouroboros::self_referencing;
6 use ouroboros::self_referencing;
7
7
8 /// Keep a `DirstateMap<'on_disk>` next to the `on_disk` buffer that it
8 /// Keep a `DirstateMap<'on_disk>` next to the `on_disk` buffer that it
9 /// borrows.
9 /// borrows.
10 #[self_referencing]
10 #[self_referencing]
11 pub struct OwningDirstateMap {
11 pub struct OwningDirstateMap {
12 on_disk: Box<dyn Deref<Target = [u8]> + Send>,
12 on_disk: Box<dyn Deref<Target = [u8]> + Send>,
13 #[borrows(on_disk)]
13 #[borrows(on_disk)]
14 #[covariant]
14 #[covariant]
15 map: DirstateMap<'this>,
15 map: DirstateMap<'this>,
16 }
16 }
17
17
18 impl OwningDirstateMap {
18 impl OwningDirstateMap {
19 pub fn new_empty<OnDisk>(on_disk: OnDisk) -> Self
19 pub fn new_empty<OnDisk>(on_disk: OnDisk) -> Self
20 where
20 where
21 OnDisk: Deref<Target = [u8]> + Send + 'static,
21 OnDisk: Deref<Target = [u8]> + Send + 'static,
22 {
22 {
23 let on_disk = Box::new(on_disk);
23 let on_disk = Box::new(on_disk);
24
24
25 OwningDirstateMapBuilder {
25 OwningDirstateMapBuilder {
26 on_disk,
26 on_disk,
27 map_builder: |bytes| DirstateMap::empty(&bytes),
27 map_builder: |bytes| DirstateMap::empty(bytes),
28 }
28 }
29 .build()
29 .build()
30 }
30 }
31
31
32 pub fn new_v1<OnDisk>(
32 pub fn new_v1<OnDisk>(
33 on_disk: OnDisk,
33 on_disk: OnDisk,
34 ) -> Result<(Self, DirstateParents), DirstateError>
34 ) -> Result<(Self, DirstateParents), DirstateError>
35 where
35 where
36 OnDisk: Deref<Target = [u8]> + Send + 'static,
36 OnDisk: Deref<Target = [u8]> + Send + 'static,
37 {
37 {
38 let on_disk = Box::new(on_disk);
38 let on_disk = Box::new(on_disk);
39 let mut parents = DirstateParents::NULL;
39 let mut parents = DirstateParents::NULL;
40
40
41 Ok((
41 Ok((
42 OwningDirstateMapTryBuilder {
42 OwningDirstateMapTryBuilder {
43 on_disk,
43 on_disk,
44 map_builder: |bytes| {
44 map_builder: |bytes| {
45 DirstateMap::new_v1(&bytes).map(|(dmap, p)| {
45 DirstateMap::new_v1(bytes).map(|(dmap, p)| {
46 parents = p.unwrap_or(DirstateParents::NULL);
46 parents = p.unwrap_or(DirstateParents::NULL);
47 dmap
47 dmap
48 })
48 })
49 },
49 },
50 }
50 }
51 .try_build()?,
51 .try_build()?,
52 parents,
52 parents,
53 ))
53 ))
54 }
54 }
55
55
56 pub fn new_v2<OnDisk>(
56 pub fn new_v2<OnDisk>(
57 on_disk: OnDisk,
57 on_disk: OnDisk,
58 data_size: usize,
58 data_size: usize,
59 metadata: &[u8],
59 metadata: &[u8],
60 ) -> Result<Self, DirstateError>
60 ) -> Result<Self, DirstateError>
61 where
61 where
62 OnDisk: Deref<Target = [u8]> + Send + 'static,
62 OnDisk: Deref<Target = [u8]> + Send + 'static,
63 {
63 {
64 let on_disk = Box::new(on_disk);
64 let on_disk = Box::new(on_disk);
65
65
66 OwningDirstateMapTryBuilder {
66 OwningDirstateMapTryBuilder {
67 on_disk,
67 on_disk,
68 map_builder: |bytes| {
68 map_builder: |bytes| {
69 DirstateMap::new_v2(&bytes, data_size, metadata)
69 DirstateMap::new_v2(bytes, data_size, metadata)
70 },
70 },
71 }
71 }
72 .try_build()
72 .try_build()
73 }
73 }
74
74
75 pub fn with_dmap_mut<R>(
75 pub fn with_dmap_mut<R>(
76 &mut self,
76 &mut self,
77 f: impl FnOnce(&mut DirstateMap) -> R,
77 f: impl FnOnce(&mut DirstateMap) -> R,
78 ) -> R {
78 ) -> R {
79 self.with_map_mut(f)
79 self.with_map_mut(f)
80 }
80 }
81
81
82 pub fn get_map(&self) -> &DirstateMap {
82 pub fn get_map(&self) -> &DirstateMap {
83 self.borrow_map()
83 self.borrow_map()
84 }
84 }
85
85
86 pub fn on_disk(&self) -> &[u8] {
86 pub fn on_disk(&self) -> &[u8] {
87 self.borrow_on_disk()
87 self.borrow_on_disk()
88 }
88 }
89 }
89 }
@@ -1,1002 +1,996 b''
1 use crate::dirstate::entry::TruncatedTimestamp;
1 use crate::dirstate::entry::TruncatedTimestamp;
2 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate::status::IgnoreFnType;
3 use crate::dirstate::status::StatusPath;
3 use crate::dirstate::status::StatusPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 use crate::matchers::get_ignore_function;
10 use crate::matchers::get_ignore_function;
11 use crate::matchers::Matcher;
11 use crate::matchers::Matcher;
12 use crate::utils::files::get_bytes_from_os_string;
12 use crate::utils::files::get_bytes_from_os_string;
13 use crate::utils::files::get_bytes_from_path;
13 use crate::utils::files::get_bytes_from_path;
14 use crate::utils::files::get_path_from_bytes;
14 use crate::utils::files::get_path_from_bytes;
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::BadMatch;
16 use crate::BadMatch;
17 use crate::DirstateStatus;
17 use crate::DirstateStatus;
18 use crate::HgPathCow;
18 use crate::HgPathCow;
19 use crate::PatternFileWarning;
19 use crate::PatternFileWarning;
20 use crate::StatusError;
20 use crate::StatusError;
21 use crate::StatusOptions;
21 use crate::StatusOptions;
22 use once_cell::sync::OnceCell;
22 use once_cell::sync::OnceCell;
23 use rayon::prelude::*;
23 use rayon::prelude::*;
24 use sha1::{Digest, Sha1};
24 use sha1::{Digest, Sha1};
25 use std::borrow::Cow;
25 use std::borrow::Cow;
26 use std::io;
26 use std::io;
27 use std::path::Path;
27 use std::path::Path;
28 use std::path::PathBuf;
28 use std::path::PathBuf;
29 use std::sync::Mutex;
29 use std::sync::Mutex;
30 use std::time::SystemTime;
30 use std::time::SystemTime;
31
31
32 /// Returns the status of the working directory compared to its parent
32 /// Returns the status of the working directory compared to its parent
33 /// changeset.
33 /// changeset.
34 ///
34 ///
35 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 /// This algorithm is based on traversing the filesystem tree (`fs` in function
36 /// and variable names) and dirstate tree at the same time. The core of this
36 /// and variable names) and dirstate tree at the same time. The core of this
37 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
38 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 /// and its use of `itertools::merge_join_by`. When reaching a path that only
39 /// exists in one of the two trees, depending on information requested by
39 /// exists in one of the two trees, depending on information requested by
40 /// `options` we may need to traverse the remaining subtree.
40 /// `options` we may need to traverse the remaining subtree.
41 #[logging_timer::time("trace")]
41 #[logging_timer::time("trace")]
42 pub fn status<'dirstate>(
42 pub fn status<'dirstate>(
43 dmap: &'dirstate mut DirstateMap,
43 dmap: &'dirstate mut DirstateMap,
44 matcher: &(dyn Matcher + Sync),
44 matcher: &(dyn Matcher + Sync),
45 root_dir: PathBuf,
45 root_dir: PathBuf,
46 ignore_files: Vec<PathBuf>,
46 ignore_files: Vec<PathBuf>,
47 options: StatusOptions,
47 options: StatusOptions,
48 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
48 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
49 {
49 {
50 // Force the global rayon threadpool to not exceed 16 concurrent threads.
50 // Force the global rayon threadpool to not exceed 16 concurrent threads.
51 // This is a stop-gap measure until we figure out why using more than 16
51 // This is a stop-gap measure until we figure out why using more than 16
52 // threads makes `status` slower for each additional thread.
52 // threads makes `status` slower for each additional thread.
53 // We use `ok()` in case the global threadpool has already been
53 // We use `ok()` in case the global threadpool has already been
54 // instantiated in `rhg` or some other caller.
54 // instantiated in `rhg` or some other caller.
55 // TODO find the underlying cause and fix it, then remove this.
55 // TODO find the underlying cause and fix it, then remove this.
56 rayon::ThreadPoolBuilder::new()
56 rayon::ThreadPoolBuilder::new()
57 .num_threads(16.min(rayon::current_num_threads()))
57 .num_threads(16.min(rayon::current_num_threads()))
58 .build_global()
58 .build_global()
59 .ok();
59 .ok();
60
60
61 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
61 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
62 if options.list_ignored || options.list_unknown {
62 if options.list_ignored || options.list_unknown {
63 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
63 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
64 DirstateVersion::V1 => {
64 DirstateVersion::V1 => {
65 let (ignore_fn, warnings) = get_ignore_function(
65 let (ignore_fn, warnings) = get_ignore_function(
66 ignore_files,
66 ignore_files,
67 &root_dir,
67 &root_dir,
68 &mut |_source, _pattern_bytes| {},
68 &mut |_source, _pattern_bytes| {},
69 )?;
69 )?;
70 (ignore_fn, warnings, None)
70 (ignore_fn, warnings, None)
71 }
71 }
72 DirstateVersion::V2 => {
72 DirstateVersion::V2 => {
73 let mut hasher = Sha1::new();
73 let mut hasher = Sha1::new();
74 let (ignore_fn, warnings) = get_ignore_function(
74 let (ignore_fn, warnings) = get_ignore_function(
75 ignore_files,
75 ignore_files,
76 &root_dir,
76 &root_dir,
77 &mut |source, pattern_bytes| {
77 &mut |source, pattern_bytes| {
78 // If inside the repo, use the relative version to
78 // If inside the repo, use the relative version to
79 // make it deterministic inside tests.
79 // make it deterministic inside tests.
80 // The performance hit should be negligible.
80 // The performance hit should be negligible.
81 let source = source
81 let source = source
82 .strip_prefix(&root_dir)
82 .strip_prefix(&root_dir)
83 .unwrap_or(source);
83 .unwrap_or(source);
84 let source = get_bytes_from_path(source);
84 let source = get_bytes_from_path(source);
85
85
86 let mut subhasher = Sha1::new();
86 let mut subhasher = Sha1::new();
87 subhasher.update(pattern_bytes);
87 subhasher.update(pattern_bytes);
88 let patterns_hash = subhasher.finalize();
88 let patterns_hash = subhasher.finalize();
89
89
90 hasher.update(source);
90 hasher.update(source);
91 hasher.update(b" ");
91 hasher.update(b" ");
92 hasher.update(patterns_hash);
92 hasher.update(patterns_hash);
93 hasher.update(b"\n");
93 hasher.update(b"\n");
94 },
94 },
95 )?;
95 )?;
96 let new_hash = *hasher.finalize().as_ref();
96 let new_hash = *hasher.finalize().as_ref();
97 let changed = new_hash != dmap.ignore_patterns_hash;
97 let changed = new_hash != dmap.ignore_patterns_hash;
98 dmap.ignore_patterns_hash = new_hash;
98 dmap.ignore_patterns_hash = new_hash;
99 (ignore_fn, warnings, Some(changed))
99 (ignore_fn, warnings, Some(changed))
100 }
100 }
101 };
101 };
102 (ignore_fn, warnings, changed)
102 (ignore_fn, warnings, changed)
103 } else {
103 } else {
104 (Box::new(|&_| true), vec![], None)
104 (Box::new(|&_| true), vec![], None)
105 };
105 };
106
106
107 let filesystem_time_at_status_start =
107 let filesystem_time_at_status_start =
108 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
108 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
109
109
110 // If the repository is under the current directory, prefer using a
110 // If the repository is under the current directory, prefer using a
111 // relative path, so the kernel needs to traverse fewer directory in every
111 // relative path, so the kernel needs to traverse fewer directory in every
112 // call to `read_dir` or `symlink_metadata`.
112 // call to `read_dir` or `symlink_metadata`.
113 // This is effective in the common case where the current directory is the
113 // This is effective in the common case where the current directory is the
114 // repository root.
114 // repository root.
115
115
116 // TODO: Better yet would be to use libc functions like `openat` and
116 // TODO: Better yet would be to use libc functions like `openat` and
117 // `fstatat` to remove such repeated traversals entirely, but the standard
117 // `fstatat` to remove such repeated traversals entirely, but the standard
118 // library does not provide APIs based on those.
118 // library does not provide APIs based on those.
119 // Maybe with a crate like https://crates.io/crates/openat instead?
119 // Maybe with a crate like https://crates.io/crates/openat instead?
120 let root_dir = if let Some(relative) = std::env::current_dir()
120 let root_dir = if let Some(relative) = std::env::current_dir()
121 .ok()
121 .ok()
122 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
122 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
123 {
123 {
124 relative
124 relative
125 } else {
125 } else {
126 &root_dir
126 &root_dir
127 };
127 };
128
128
129 let outcome = DirstateStatus {
129 let outcome = DirstateStatus {
130 filesystem_time_at_status_start,
130 filesystem_time_at_status_start,
131 ..Default::default()
131 ..Default::default()
132 };
132 };
133 let common = StatusCommon {
133 let common = StatusCommon {
134 dmap,
134 dmap,
135 options,
135 options,
136 matcher,
136 matcher,
137 ignore_fn,
137 ignore_fn,
138 outcome: Mutex::new(outcome),
138 outcome: Mutex::new(outcome),
139 ignore_patterns_have_changed: patterns_changed,
139 ignore_patterns_have_changed: patterns_changed,
140 new_cacheable_directories: Default::default(),
140 new_cacheable_directories: Default::default(),
141 outdated_cached_directories: Default::default(),
141 outdated_cached_directories: Default::default(),
142 filesystem_time_at_status_start,
142 filesystem_time_at_status_start,
143 };
143 };
144 let is_at_repo_root = true;
144 let is_at_repo_root = true;
145 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
145 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
146 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
146 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
147 let root_cached_mtime = None;
147 let root_cached_mtime = None;
148 // If the path we have for the repository root is a symlink, do follow it.
148 // If the path we have for the repository root is a symlink, do follow it.
149 // (As opposed to symlinks within the working directory which are not
149 // (As opposed to symlinks within the working directory which are not
150 // followed, using `std::fs::symlink_metadata`.)
150 // followed, using `std::fs::symlink_metadata`.)
151 common.traverse_fs_directory_and_dirstate(
151 common.traverse_fs_directory_and_dirstate(
152 &has_ignored_ancestor,
152 &has_ignored_ancestor,
153 dmap.root.as_ref(),
153 dmap.root.as_ref(),
154 hg_path,
154 hg_path,
155 &DirEntry {
155 &DirEntry {
156 hg_path: Cow::Borrowed(HgPath::new(b"")),
156 hg_path: Cow::Borrowed(HgPath::new(b"")),
157 fs_path: Cow::Borrowed(&root_dir),
157 fs_path: Cow::Borrowed(root_dir),
158 symlink_metadata: None,
158 symlink_metadata: None,
159 file_type: FakeFileType::Directory,
159 file_type: FakeFileType::Directory,
160 },
160 },
161 root_cached_mtime,
161 root_cached_mtime,
162 is_at_repo_root,
162 is_at_repo_root,
163 )?;
163 )?;
164 let mut outcome = common.outcome.into_inner().unwrap();
164 let mut outcome = common.outcome.into_inner().unwrap();
165 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
165 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
166 let outdated = common.outdated_cached_directories.into_inner().unwrap();
166 let outdated = common.outdated_cached_directories.into_inner().unwrap();
167
167
168 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
168 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
169 || !outdated.is_empty()
169 || !outdated.is_empty()
170 || (!new_cacheable.is_empty()
170 || (!new_cacheable.is_empty()
171 && dmap.dirstate_version == DirstateVersion::V2);
171 && dmap.dirstate_version == DirstateVersion::V2);
172
172
173 // Remove outdated mtimes before adding new mtimes, in case a given
173 // Remove outdated mtimes before adding new mtimes, in case a given
174 // directory is both
174 // directory is both
175 for path in &outdated {
175 for path in &outdated {
176 dmap.clear_cached_mtime(path)?;
176 dmap.clear_cached_mtime(path)?;
177 }
177 }
178 for (path, mtime) in &new_cacheable {
178 for (path, mtime) in &new_cacheable {
179 dmap.set_cached_mtime(path, *mtime)?;
179 dmap.set_cached_mtime(path, *mtime)?;
180 }
180 }
181
181
182 Ok((outcome, warnings))
182 Ok((outcome, warnings))
183 }
183 }
184
184
185 /// Bag of random things needed by various parts of the algorithm. Reduces the
185 /// Bag of random things needed by various parts of the algorithm. Reduces the
186 /// number of parameters passed to functions.
186 /// number of parameters passed to functions.
187 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
187 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
188 dmap: &'tree DirstateMap<'on_disk>,
188 dmap: &'tree DirstateMap<'on_disk>,
189 options: StatusOptions,
189 options: StatusOptions,
190 matcher: &'a (dyn Matcher + Sync),
190 matcher: &'a (dyn Matcher + Sync),
191 ignore_fn: IgnoreFnType<'a>,
191 ignore_fn: IgnoreFnType<'a>,
192 outcome: Mutex<DirstateStatus<'on_disk>>,
192 outcome: Mutex<DirstateStatus<'on_disk>>,
193 /// New timestamps of directories to be used for caching their readdirs
193 /// New timestamps of directories to be used for caching their readdirs
194 new_cacheable_directories:
194 new_cacheable_directories:
195 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
195 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
196 /// Used to invalidate the readdir cache of directories
196 /// Used to invalidate the readdir cache of directories
197 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
197 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
198
198
199 /// Whether ignore files like `.hgignore` have changed since the previous
199 /// Whether ignore files like `.hgignore` have changed since the previous
200 /// time a `status()` call wrote their hash to the dirstate. `None` means
200 /// time a `status()` call wrote their hash to the dirstate. `None` means
201 /// we don’t know as this run doesn’t list either ignored or uknown files
201 /// we don’t know as this run doesn’t list either ignored or uknown files
202 /// and therefore isn’t reading `.hgignore`.
202 /// and therefore isn’t reading `.hgignore`.
203 ignore_patterns_have_changed: Option<bool>,
203 ignore_patterns_have_changed: Option<bool>,
204
204
205 /// The current time at the start of the `status()` algorithm, as measured
205 /// The current time at the start of the `status()` algorithm, as measured
206 /// and possibly truncated by the filesystem.
206 /// and possibly truncated by the filesystem.
207 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
207 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
208 }
208 }
209
209
210 enum Outcome {
210 enum Outcome {
211 Modified,
211 Modified,
212 Added,
212 Added,
213 Removed,
213 Removed,
214 Deleted,
214 Deleted,
215 Clean,
215 Clean,
216 Ignored,
216 Ignored,
217 Unknown,
217 Unknown,
218 Unsure,
218 Unsure,
219 }
219 }
220
220
221 /// Lazy computation of whether a given path has a hgignored
221 /// Lazy computation of whether a given path has a hgignored
222 /// ancestor.
222 /// ancestor.
223 struct HasIgnoredAncestor<'a> {
223 struct HasIgnoredAncestor<'a> {
224 /// `path` and `parent` constitute the inputs to the computation,
224 /// `path` and `parent` constitute the inputs to the computation,
225 /// `cache` stores the outcome.
225 /// `cache` stores the outcome.
226 path: &'a HgPath,
226 path: &'a HgPath,
227 parent: Option<&'a HasIgnoredAncestor<'a>>,
227 parent: Option<&'a HasIgnoredAncestor<'a>>,
228 cache: OnceCell<bool>,
228 cache: OnceCell<bool>,
229 }
229 }
230
230
231 impl<'a> HasIgnoredAncestor<'a> {
231 impl<'a> HasIgnoredAncestor<'a> {
232 fn create(
232 fn create(
233 parent: Option<&'a HasIgnoredAncestor<'a>>,
233 parent: Option<&'a HasIgnoredAncestor<'a>>,
234 path: &'a HgPath,
234 path: &'a HgPath,
235 ) -> HasIgnoredAncestor<'a> {
235 ) -> HasIgnoredAncestor<'a> {
236 Self {
236 Self {
237 path,
237 path,
238 parent,
238 parent,
239 cache: OnceCell::new(),
239 cache: OnceCell::new(),
240 }
240 }
241 }
241 }
242
242
243 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
243 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
244 match self.parent {
244 match self.parent {
245 None => false,
245 None => false,
246 Some(parent) => {
246 Some(parent) => {
247 *(parent.cache.get_or_init(|| {
247 *(parent.cache.get_or_init(|| {
248 parent.force(ignore_fn) || ignore_fn(&self.path)
248 parent.force(ignore_fn) || ignore_fn(self.path)
249 }))
249 }))
250 }
250 }
251 }
251 }
252 }
252 }
253 }
253 }
254
254
255 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
255 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
256 fn push_outcome(
256 fn push_outcome(
257 &self,
257 &self,
258 which: Outcome,
258 which: Outcome,
259 dirstate_node: &NodeRef<'tree, 'on_disk>,
259 dirstate_node: &NodeRef<'tree, 'on_disk>,
260 ) -> Result<(), DirstateV2ParseError> {
260 ) -> Result<(), DirstateV2ParseError> {
261 let path = dirstate_node
261 let path = dirstate_node
262 .full_path_borrowed(self.dmap.on_disk)?
262 .full_path_borrowed(self.dmap.on_disk)?
263 .detach_from_tree();
263 .detach_from_tree();
264 let copy_source = if self.options.list_copies {
264 let copy_source = if self.options.list_copies {
265 dirstate_node
265 dirstate_node
266 .copy_source_borrowed(self.dmap.on_disk)?
266 .copy_source_borrowed(self.dmap.on_disk)?
267 .map(|source| source.detach_from_tree())
267 .map(|source| source.detach_from_tree())
268 } else {
268 } else {
269 None
269 None
270 };
270 };
271 self.push_outcome_common(which, path, copy_source);
271 self.push_outcome_common(which, path, copy_source);
272 Ok(())
272 Ok(())
273 }
273 }
274
274
275 fn push_outcome_without_copy_source(
275 fn push_outcome_without_copy_source(
276 &self,
276 &self,
277 which: Outcome,
277 which: Outcome,
278 path: &BorrowedPath<'_, 'on_disk>,
278 path: &BorrowedPath<'_, 'on_disk>,
279 ) {
279 ) {
280 self.push_outcome_common(which, path.detach_from_tree(), None)
280 self.push_outcome_common(which, path.detach_from_tree(), None)
281 }
281 }
282
282
283 fn push_outcome_common(
283 fn push_outcome_common(
284 &self,
284 &self,
285 which: Outcome,
285 which: Outcome,
286 path: HgPathCow<'on_disk>,
286 path: HgPathCow<'on_disk>,
287 copy_source: Option<HgPathCow<'on_disk>>,
287 copy_source: Option<HgPathCow<'on_disk>>,
288 ) {
288 ) {
289 let mut outcome = self.outcome.lock().unwrap();
289 let mut outcome = self.outcome.lock().unwrap();
290 let vec = match which {
290 let vec = match which {
291 Outcome::Modified => &mut outcome.modified,
291 Outcome::Modified => &mut outcome.modified,
292 Outcome::Added => &mut outcome.added,
292 Outcome::Added => &mut outcome.added,
293 Outcome::Removed => &mut outcome.removed,
293 Outcome::Removed => &mut outcome.removed,
294 Outcome::Deleted => &mut outcome.deleted,
294 Outcome::Deleted => &mut outcome.deleted,
295 Outcome::Clean => &mut outcome.clean,
295 Outcome::Clean => &mut outcome.clean,
296 Outcome::Ignored => &mut outcome.ignored,
296 Outcome::Ignored => &mut outcome.ignored,
297 Outcome::Unknown => &mut outcome.unknown,
297 Outcome::Unknown => &mut outcome.unknown,
298 Outcome::Unsure => &mut outcome.unsure,
298 Outcome::Unsure => &mut outcome.unsure,
299 };
299 };
300 vec.push(StatusPath { path, copy_source });
300 vec.push(StatusPath { path, copy_source });
301 }
301 }
302
302
303 fn read_dir(
303 fn read_dir(
304 &self,
304 &self,
305 hg_path: &HgPath,
305 hg_path: &HgPath,
306 fs_path: &Path,
306 fs_path: &Path,
307 is_at_repo_root: bool,
307 is_at_repo_root: bool,
308 ) -> Result<Vec<DirEntry>, ()> {
308 ) -> Result<Vec<DirEntry>, ()> {
309 DirEntry::read_dir(fs_path, is_at_repo_root)
309 DirEntry::read_dir(fs_path, is_at_repo_root)
310 .map_err(|error| self.io_error(error, hg_path))
310 .map_err(|error| self.io_error(error, hg_path))
311 }
311 }
312
312
313 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
313 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
314 let errno = error.raw_os_error().expect("expected real OS error");
314 let errno = error.raw_os_error().expect("expected real OS error");
315 self.outcome
315 self.outcome
316 .lock()
316 .lock()
317 .unwrap()
317 .unwrap()
318 .bad
318 .bad
319 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
319 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
320 }
320 }
321
321
322 fn check_for_outdated_directory_cache(
322 fn check_for_outdated_directory_cache(
323 &self,
323 &self,
324 dirstate_node: &NodeRef<'tree, 'on_disk>,
324 dirstate_node: &NodeRef<'tree, 'on_disk>,
325 ) -> Result<bool, DirstateV2ParseError> {
325 ) -> Result<bool, DirstateV2ParseError> {
326 if self.ignore_patterns_have_changed == Some(true)
326 if self.ignore_patterns_have_changed == Some(true)
327 && dirstate_node.cached_directory_mtime()?.is_some()
327 && dirstate_node.cached_directory_mtime()?.is_some()
328 {
328 {
329 self.outdated_cached_directories.lock().unwrap().push(
329 self.outdated_cached_directories.lock().unwrap().push(
330 dirstate_node
330 dirstate_node
331 .full_path_borrowed(self.dmap.on_disk)?
331 .full_path_borrowed(self.dmap.on_disk)?
332 .detach_from_tree(),
332 .detach_from_tree(),
333 );
333 );
334 return Ok(true);
334 return Ok(true);
335 }
335 }
336 Ok(false)
336 Ok(false)
337 }
337 }
338
338
339 /// If this returns true, we can get accurate results by only using
339 /// If this returns true, we can get accurate results by only using
340 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
340 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
341 /// need to call `read_dir`.
341 /// need to call `read_dir`.
342 fn can_skip_fs_readdir(
342 fn can_skip_fs_readdir(
343 &self,
343 &self,
344 directory_entry: &DirEntry,
344 directory_entry: &DirEntry,
345 cached_directory_mtime: Option<TruncatedTimestamp>,
345 cached_directory_mtime: Option<TruncatedTimestamp>,
346 ) -> bool {
346 ) -> bool {
347 if !self.options.list_unknown && !self.options.list_ignored {
347 if !self.options.list_unknown && !self.options.list_ignored {
348 // All states that we care about listing have corresponding
348 // All states that we care about listing have corresponding
349 // dirstate entries.
349 // dirstate entries.
350 // This happens for example with `hg status -mard`.
350 // This happens for example with `hg status -mard`.
351 return true;
351 return true;
352 }
352 }
353 if !self.options.list_ignored
353 if !self.options.list_ignored
354 && self.ignore_patterns_have_changed == Some(false)
354 && self.ignore_patterns_have_changed == Some(false)
355 {
355 {
356 if let Some(cached_mtime) = cached_directory_mtime {
356 if let Some(cached_mtime) = cached_directory_mtime {
357 // The dirstate contains a cached mtime for this directory, set
357 // The dirstate contains a cached mtime for this directory, set
358 // by a previous run of the `status` algorithm which found this
358 // by a previous run of the `status` algorithm which found this
359 // directory eligible for `read_dir` caching.
359 // directory eligible for `read_dir` caching.
360 if let Ok(meta) = directory_entry.symlink_metadata() {
360 if let Ok(meta) = directory_entry.symlink_metadata() {
361 if cached_mtime
361 if cached_mtime
362 .likely_equal_to_mtime_of(&meta)
362 .likely_equal_to_mtime_of(&meta)
363 .unwrap_or(false)
363 .unwrap_or(false)
364 {
364 {
365 // The mtime of that directory has not changed
365 // The mtime of that directory has not changed
366 // since then, which means that the results of
366 // since then, which means that the results of
367 // `read_dir` should also be unchanged.
367 // `read_dir` should also be unchanged.
368 return true;
368 return true;
369 }
369 }
370 }
370 }
371 }
371 }
372 }
372 }
373 false
373 false
374 }
374 }
375
375
376 /// Returns whether all child entries of the filesystem directory have a
376 /// Returns whether all child entries of the filesystem directory have a
377 /// corresponding dirstate node or are ignored.
377 /// corresponding dirstate node or are ignored.
378 fn traverse_fs_directory_and_dirstate<'ancestor>(
378 fn traverse_fs_directory_and_dirstate<'ancestor>(
379 &self,
379 &self,
380 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
380 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
381 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
381 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
382 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
382 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
383 directory_entry: &DirEntry,
383 directory_entry: &DirEntry,
384 cached_directory_mtime: Option<TruncatedTimestamp>,
384 cached_directory_mtime: Option<TruncatedTimestamp>,
385 is_at_repo_root: bool,
385 is_at_repo_root: bool,
386 ) -> Result<bool, DirstateV2ParseError> {
386 ) -> Result<bool, DirstateV2ParseError> {
387 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
387 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
388 dirstate_nodes
388 dirstate_nodes
389 .par_iter()
389 .par_iter()
390 .map(|dirstate_node| {
390 .map(|dirstate_node| {
391 let fs_path = &directory_entry.fs_path;
391 let fs_path = &directory_entry.fs_path;
392 let fs_path = fs_path.join(get_path_from_bytes(
392 let fs_path = fs_path.join(get_path_from_bytes(
393 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
393 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
394 ));
394 ));
395 match std::fs::symlink_metadata(&fs_path) {
395 match std::fs::symlink_metadata(&fs_path) {
396 Ok(fs_metadata) => {
396 Ok(fs_metadata) => {
397 let file_type =
397 let file_type =
398 match fs_metadata.file_type().try_into() {
398 match fs_metadata.file_type().try_into() {
399 Ok(file_type) => file_type,
399 Ok(file_type) => file_type,
400 Err(_) => return Ok(()),
400 Err(_) => return Ok(()),
401 };
401 };
402 let entry = DirEntry {
402 let entry = DirEntry {
403 hg_path: Cow::Borrowed(
403 hg_path: Cow::Borrowed(
404 dirstate_node
404 dirstate_node
405 .full_path(&self.dmap.on_disk)?,
405 .full_path(self.dmap.on_disk)?,
406 ),
406 ),
407 fs_path: Cow::Borrowed(&fs_path),
407 fs_path: Cow::Borrowed(&fs_path),
408 symlink_metadata: Some(fs_metadata),
408 symlink_metadata: Some(fs_metadata),
409 file_type,
409 file_type,
410 };
410 };
411 self.traverse_fs_and_dirstate(
411 self.traverse_fs_and_dirstate(
412 &entry,
412 &entry,
413 dirstate_node,
413 dirstate_node,
414 has_ignored_ancestor,
414 has_ignored_ancestor,
415 )
415 )
416 }
416 }
417 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
417 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
418 self.traverse_dirstate_only(dirstate_node)
418 self.traverse_dirstate_only(dirstate_node)
419 }
419 }
420 Err(error) => {
420 Err(error) => {
421 let hg_path =
421 let hg_path =
422 dirstate_node.full_path(self.dmap.on_disk)?;
422 dirstate_node.full_path(self.dmap.on_disk)?;
423 Ok(self.io_error(error, hg_path))
423 self.io_error(error, hg_path);
424 Ok(())
424 }
425 }
425 }
426 }
426 })
427 })
427 .collect::<Result<_, _>>()?;
428 .collect::<Result<_, _>>()?;
428
429
429 // We don’t know, so conservatively say this isn’t the case
430 // We don’t know, so conservatively say this isn’t the case
430 let children_all_have_dirstate_node_or_are_ignored = false;
431 let children_all_have_dirstate_node_or_are_ignored = false;
431
432
432 return Ok(children_all_have_dirstate_node_or_are_ignored);
433 return Ok(children_all_have_dirstate_node_or_are_ignored);
433 }
434 }
434
435
435 let mut fs_entries = if let Ok(entries) = self.read_dir(
436 let mut fs_entries = if let Ok(entries) = self.read_dir(
436 directory_hg_path,
437 directory_hg_path,
437 &directory_entry.fs_path,
438 &directory_entry.fs_path,
438 is_at_repo_root,
439 is_at_repo_root,
439 ) {
440 ) {
440 entries
441 entries
441 } else {
442 } else {
442 // Treat an unreadable directory (typically because of insufficient
443 // Treat an unreadable directory (typically because of insufficient
443 // permissions) like an empty directory. `self.read_dir` has
444 // permissions) like an empty directory. `self.read_dir` has
444 // already called `self.io_error` so a warning will be emitted.
445 // already called `self.io_error` so a warning will be emitted.
445 Vec::new()
446 Vec::new()
446 };
447 };
447
448
448 // `merge_join_by` requires both its input iterators to be sorted:
449 // `merge_join_by` requires both its input iterators to be sorted:
449
450
450 let dirstate_nodes = dirstate_nodes.sorted();
451 let dirstate_nodes = dirstate_nodes.sorted();
451 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
452 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
452 // https://github.com/rust-lang/rust/issues/34162
453 // https://github.com/rust-lang/rust/issues/34162
453 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
454 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
454
455
455 // Propagate here any error that would happen inside the comparison
456 // Propagate here any error that would happen inside the comparison
456 // callback below
457 // callback below
457 for dirstate_node in &dirstate_nodes {
458 for dirstate_node in &dirstate_nodes {
458 dirstate_node.base_name(self.dmap.on_disk)?;
459 dirstate_node.base_name(self.dmap.on_disk)?;
459 }
460 }
460 itertools::merge_join_by(
461 itertools::merge_join_by(
461 dirstate_nodes,
462 dirstate_nodes,
462 &fs_entries,
463 &fs_entries,
463 |dirstate_node, fs_entry| {
464 |dirstate_node, fs_entry| {
464 // This `unwrap` never panics because we already propagated
465 // This `unwrap` never panics because we already propagated
465 // those errors above
466 // those errors above
466 dirstate_node
467 dirstate_node
467 .base_name(self.dmap.on_disk)
468 .base_name(self.dmap.on_disk)
468 .unwrap()
469 .unwrap()
469 .cmp(&fs_entry.hg_path)
470 .cmp(&fs_entry.hg_path)
470 },
471 },
471 )
472 )
472 .par_bridge()
473 .par_bridge()
473 .map(|pair| {
474 .map(|pair| {
474 use itertools::EitherOrBoth::*;
475 use itertools::EitherOrBoth::*;
475 let has_dirstate_node_or_is_ignored;
476 let has_dirstate_node_or_is_ignored = match pair {
476 match pair {
477 Both(dirstate_node, fs_entry) => {
477 Both(dirstate_node, fs_entry) => {
478 self.traverse_fs_and_dirstate(
478 self.traverse_fs_and_dirstate(
479 &fs_entry,
479 fs_entry,
480 dirstate_node,
480 dirstate_node,
481 has_ignored_ancestor,
481 has_ignored_ancestor,
482 )?;
482 )?;
483 has_dirstate_node_or_is_ignored = true
483 true
484 }
484 }
485 Left(dirstate_node) => {
485 Left(dirstate_node) => {
486 self.traverse_dirstate_only(dirstate_node)?;
486 self.traverse_dirstate_only(dirstate_node)?;
487 has_dirstate_node_or_is_ignored = true;
487 true
488 }
488 }
489 Right(fs_entry) => {
489 Right(fs_entry) => self.traverse_fs_only(
490 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
490 has_ignored_ancestor.force(&self.ignore_fn),
491 has_ignored_ancestor.force(&self.ignore_fn),
491 directory_hg_path,
492 directory_hg_path,
492 fs_entry,
493 fs_entry,
493 ),
494 )
494 };
495 }
496 }
497 Ok(has_dirstate_node_or_is_ignored)
495 Ok(has_dirstate_node_or_is_ignored)
498 })
496 })
499 .try_reduce(|| true, |a, b| Ok(a && b))
497 .try_reduce(|| true, |a, b| Ok(a && b))
500 }
498 }
501
499
502 fn traverse_fs_and_dirstate<'ancestor>(
500 fn traverse_fs_and_dirstate<'ancestor>(
503 &self,
501 &self,
504 fs_entry: &DirEntry,
502 fs_entry: &DirEntry,
505 dirstate_node: NodeRef<'tree, 'on_disk>,
503 dirstate_node: NodeRef<'tree, 'on_disk>,
506 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
504 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
507 ) -> Result<(), DirstateV2ParseError> {
505 ) -> Result<(), DirstateV2ParseError> {
508 let outdated_dircache =
506 let outdated_dircache =
509 self.check_for_outdated_directory_cache(&dirstate_node)?;
507 self.check_for_outdated_directory_cache(&dirstate_node)?;
510 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
508 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
511 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
509 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
512 if !file_or_symlink {
510 if !file_or_symlink {
513 // If we previously had a file here, it was removed (with
511 // If we previously had a file here, it was removed (with
514 // `hg rm` or similar) or deleted before it could be
512 // `hg rm` or similar) or deleted before it could be
515 // replaced by a directory or something else.
513 // replaced by a directory or something else.
516 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
514 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
517 }
515 }
518 if fs_entry.is_dir() {
516 if fs_entry.is_dir() {
519 if self.options.collect_traversed_dirs {
517 if self.options.collect_traversed_dirs {
520 self.outcome
518 self.outcome
521 .lock()
519 .lock()
522 .unwrap()
520 .unwrap()
523 .traversed
521 .traversed
524 .push(hg_path.detach_from_tree())
522 .push(hg_path.detach_from_tree())
525 }
523 }
526 let is_ignored = HasIgnoredAncestor::create(
524 let is_ignored = HasIgnoredAncestor::create(
527 Some(&has_ignored_ancestor),
525 Some(has_ignored_ancestor),
528 hg_path,
526 hg_path,
529 );
527 );
530 let is_at_repo_root = false;
528 let is_at_repo_root = false;
531 let children_all_have_dirstate_node_or_are_ignored = self
529 let children_all_have_dirstate_node_or_are_ignored = self
532 .traverse_fs_directory_and_dirstate(
530 .traverse_fs_directory_and_dirstate(
533 &is_ignored,
531 &is_ignored,
534 dirstate_node.children(self.dmap.on_disk)?,
532 dirstate_node.children(self.dmap.on_disk)?,
535 hg_path,
533 hg_path,
536 fs_entry,
534 fs_entry,
537 dirstate_node.cached_directory_mtime()?,
535 dirstate_node.cached_directory_mtime()?,
538 is_at_repo_root,
536 is_at_repo_root,
539 )?;
537 )?;
540 self.maybe_save_directory_mtime(
538 self.maybe_save_directory_mtime(
541 children_all_have_dirstate_node_or_are_ignored,
539 children_all_have_dirstate_node_or_are_ignored,
542 fs_entry,
540 fs_entry,
543 dirstate_node,
541 dirstate_node,
544 outdated_dircache,
542 outdated_dircache,
545 )?
543 )?
546 } else {
544 } else {
547 if file_or_symlink && self.matcher.matches(&hg_path) {
545 if file_or_symlink && self.matcher.matches(hg_path) {
548 if let Some(entry) = dirstate_node.entry()? {
546 if let Some(entry) = dirstate_node.entry()? {
549 if !entry.any_tracked() {
547 if !entry.any_tracked() {
550 // Forward-compat if we start tracking unknown/ignored
548 // Forward-compat if we start tracking unknown/ignored
551 // files for caching reasons
549 // files for caching reasons
552 self.mark_unknown_or_ignored(
550 self.mark_unknown_or_ignored(
553 has_ignored_ancestor.force(&self.ignore_fn),
551 has_ignored_ancestor.force(&self.ignore_fn),
554 &hg_path,
552 hg_path,
555 );
553 );
556 }
554 }
557 if entry.added() {
555 if entry.added() {
558 self.push_outcome(Outcome::Added, &dirstate_node)?;
556 self.push_outcome(Outcome::Added, &dirstate_node)?;
559 } else if entry.removed() {
557 } else if entry.removed() {
560 self.push_outcome(Outcome::Removed, &dirstate_node)?;
558 self.push_outcome(Outcome::Removed, &dirstate_node)?;
561 } else if entry.modified() {
559 } else if entry.modified() {
562 self.push_outcome(Outcome::Modified, &dirstate_node)?;
560 self.push_outcome(Outcome::Modified, &dirstate_node)?;
563 } else {
561 } else {
564 self.handle_normal_file(&dirstate_node, fs_entry)?;
562 self.handle_normal_file(&dirstate_node, fs_entry)?;
565 }
563 }
566 } else {
564 } else {
567 // `node.entry.is_none()` indicates a "directory"
565 // `node.entry.is_none()` indicates a "directory"
568 // node, but the filesystem has a file
566 // node, but the filesystem has a file
569 self.mark_unknown_or_ignored(
567 self.mark_unknown_or_ignored(
570 has_ignored_ancestor.force(&self.ignore_fn),
568 has_ignored_ancestor.force(&self.ignore_fn),
571 hg_path,
569 hg_path,
572 );
570 );
573 }
571 }
574 }
572 }
575
573
576 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
574 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
577 {
575 {
578 self.traverse_dirstate_only(child_node)?
576 self.traverse_dirstate_only(child_node)?
579 }
577 }
580 }
578 }
581 Ok(())
579 Ok(())
582 }
580 }
583
581
584 /// Save directory mtime if applicable.
582 /// Save directory mtime if applicable.
585 ///
583 ///
586 /// `outdated_directory_cache` is `true` if we've just invalidated the
584 /// `outdated_directory_cache` is `true` if we've just invalidated the
587 /// cache for this directory in `check_for_outdated_directory_cache`,
585 /// cache for this directory in `check_for_outdated_directory_cache`,
588 /// which forces the update.
586 /// which forces the update.
589 fn maybe_save_directory_mtime(
587 fn maybe_save_directory_mtime(
590 &self,
588 &self,
591 children_all_have_dirstate_node_or_are_ignored: bool,
589 children_all_have_dirstate_node_or_are_ignored: bool,
592 directory_entry: &DirEntry,
590 directory_entry: &DirEntry,
593 dirstate_node: NodeRef<'tree, 'on_disk>,
591 dirstate_node: NodeRef<'tree, 'on_disk>,
594 outdated_directory_cache: bool,
592 outdated_directory_cache: bool,
595 ) -> Result<(), DirstateV2ParseError> {
593 ) -> Result<(), DirstateV2ParseError> {
596 if !children_all_have_dirstate_node_or_are_ignored {
594 if !children_all_have_dirstate_node_or_are_ignored {
597 return Ok(());
595 return Ok(());
598 }
596 }
599 // All filesystem directory entries from `read_dir` have a
597 // All filesystem directory entries from `read_dir` have a
600 // corresponding node in the dirstate, so we can reconstitute the
598 // corresponding node in the dirstate, so we can reconstitute the
601 // names of those entries without calling `read_dir` again.
599 // names of those entries without calling `read_dir` again.
602
600
603 // TODO: use let-else here and below when available:
601 // TODO: use let-else here and below when available:
604 // https://github.com/rust-lang/rust/issues/87335
602 // https://github.com/rust-lang/rust/issues/87335
605 let status_start = if let Some(status_start) =
603 let status_start = if let Some(status_start) =
606 &self.filesystem_time_at_status_start
604 &self.filesystem_time_at_status_start
607 {
605 {
608 status_start
606 status_start
609 } else {
607 } else {
610 return Ok(());
608 return Ok(());
611 };
609 };
612
610
613 // Although the Rust standard library’s `SystemTime` type
611 // Although the Rust standard library’s `SystemTime` type
614 // has nanosecond precision, the times reported for a
612 // has nanosecond precision, the times reported for a
615 // directory’s (or file’s) modified time may have lower
613 // directory’s (or file’s) modified time may have lower
616 // resolution based on the filesystem (for example ext3
614 // resolution based on the filesystem (for example ext3
617 // only stores integer seconds), kernel (see
615 // only stores integer seconds), kernel (see
618 // https://stackoverflow.com/a/14393315/1162888), etc.
616 // https://stackoverflow.com/a/14393315/1162888), etc.
619 let metadata = match directory_entry.symlink_metadata() {
617 let metadata = match directory_entry.symlink_metadata() {
620 Ok(meta) => meta,
618 Ok(meta) => meta,
621 Err(_) => return Ok(()),
619 Err(_) => return Ok(()),
622 };
620 };
623 let directory_mtime = if let Ok(option) =
621
624 TruncatedTimestamp::for_reliable_mtime_of(&metadata, status_start)
622 let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
625 {
623 &metadata,
626 if let Some(directory_mtime) = option {
624 status_start,
627 directory_mtime
625 ) {
628 } else {
626 Ok(Some(directory_mtime)) => directory_mtime,
627 Ok(None) => {
629 // The directory was modified too recently,
628 // The directory was modified too recently,
630 // don’t cache its `read_dir` results.
629 // don’t cache its `read_dir` results.
631 //
630 //
632 // 1. A change to this directory (direct child was
631 // 1. A change to this directory (direct child was
633 // added or removed) cause its mtime to be set
632 // added or removed) cause its mtime to be set
634 // (possibly truncated) to `directory_mtime`
633 // (possibly truncated) to `directory_mtime`
635 // 2. This `status` algorithm calls `read_dir`
634 // 2. This `status` algorithm calls `read_dir`
636 // 3. An other change is made to the same directory is
635 // 3. An other change is made to the same directory is
637 // made so that calling `read_dir` agin would give
636 // made so that calling `read_dir` agin would give
638 // different results, but soon enough after 1. that
637 // different results, but soon enough after 1. that
639 // the mtime stays the same
638 // the mtime stays the same
640 //
639 //
641 // On a system where the time resolution poor, this
640 // On a system where the time resolution poor, this
642 // scenario is not unlikely if all three steps are caused
641 // scenario is not unlikely if all three steps are caused
643 // by the same script.
642 // by the same script.
644 return Ok(());
643 return Ok(());
645 }
644 }
646 } else {
645 Err(_) => {
647 // OS/libc does not support mtime?
646 // OS/libc does not support mtime?
648 return Ok(());
647 return Ok(());
648 }
649 };
649 };
650 // We’ve observed (through `status_start`) that time has
650 // We’ve observed (through `status_start`) that time has
651 // “progressed” since `directory_mtime`, so any further
651 // “progressed” since `directory_mtime`, so any further
652 // change to this directory is extremely likely to cause a
652 // change to this directory is extremely likely to cause a
653 // different mtime.
653 // different mtime.
654 //
654 //
655 // Having the same mtime again is not entirely impossible
655 // Having the same mtime again is not entirely impossible
656 // since the system clock is not monotonous. It could jump
656 // since the system clock is not monotonous. It could jump
657 // backward to some point before `directory_mtime`, then a
657 // backward to some point before `directory_mtime`, then a
658 // directory change could potentially happen during exactly
658 // directory change could potentially happen during exactly
659 // the wrong tick.
659 // the wrong tick.
660 //
660 //
661 // We deem this scenario (unlike the previous one) to be
661 // We deem this scenario (unlike the previous one) to be
662 // unlikely enough in practice.
662 // unlikely enough in practice.
663
663
664 let is_up_to_date = if let Some(cached) =
664 let is_up_to_date = if let Some(cached) =
665 dirstate_node.cached_directory_mtime()?
665 dirstate_node.cached_directory_mtime()?
666 {
666 {
667 !outdated_directory_cache && cached.likely_equal(directory_mtime)
667 !outdated_directory_cache && cached.likely_equal(directory_mtime)
668 } else {
668 } else {
669 false
669 false
670 };
670 };
671 if !is_up_to_date {
671 if !is_up_to_date {
672 let hg_path = dirstate_node
672 let hg_path = dirstate_node
673 .full_path_borrowed(self.dmap.on_disk)?
673 .full_path_borrowed(self.dmap.on_disk)?
674 .detach_from_tree();
674 .detach_from_tree();
675 self.new_cacheable_directories
675 self.new_cacheable_directories
676 .lock()
676 .lock()
677 .unwrap()
677 .unwrap()
678 .push((hg_path, directory_mtime))
678 .push((hg_path, directory_mtime))
679 }
679 }
680 Ok(())
680 Ok(())
681 }
681 }
682
682
683 /// A file that is clean in the dirstate was found in the filesystem
683 /// A file that is clean in the dirstate was found in the filesystem
684 fn handle_normal_file(
684 fn handle_normal_file(
685 &self,
685 &self,
686 dirstate_node: &NodeRef<'tree, 'on_disk>,
686 dirstate_node: &NodeRef<'tree, 'on_disk>,
687 fs_entry: &DirEntry,
687 fs_entry: &DirEntry,
688 ) -> Result<(), DirstateV2ParseError> {
688 ) -> Result<(), DirstateV2ParseError> {
689 // Keep the low 31 bits
689 // Keep the low 31 bits
690 fn truncate_u64(value: u64) -> i32 {
690 fn truncate_u64(value: u64) -> i32 {
691 (value & 0x7FFF_FFFF) as i32
691 (value & 0x7FFF_FFFF) as i32
692 }
692 }
693
693
694 let fs_metadata = match fs_entry.symlink_metadata() {
694 let fs_metadata = match fs_entry.symlink_metadata() {
695 Ok(meta) => meta,
695 Ok(meta) => meta,
696 Err(_) => return Ok(()),
696 Err(_) => return Ok(()),
697 };
697 };
698
698
699 let entry = dirstate_node
699 let entry = dirstate_node
700 .entry()?
700 .entry()?
701 .expect("handle_normal_file called with entry-less node");
701 .expect("handle_normal_file called with entry-less node");
702 let mode_changed =
702 let mode_changed =
703 || self.options.check_exec && entry.mode_changed(&fs_metadata);
703 || self.options.check_exec && entry.mode_changed(&fs_metadata);
704 let size = entry.size();
704 let size = entry.size();
705 let size_changed = size != truncate_u64(fs_metadata.len());
705 let size_changed = size != truncate_u64(fs_metadata.len());
706 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
706 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
707 // issue6456: Size returned may be longer due to encryption
707 // issue6456: Size returned may be longer due to encryption
708 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
708 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
709 self.push_outcome(Outcome::Unsure, dirstate_node)?
709 self.push_outcome(Outcome::Unsure, dirstate_node)?
710 } else if dirstate_node.has_copy_source()
710 } else if dirstate_node.has_copy_source()
711 || entry.is_from_other_parent()
711 || entry.is_from_other_parent()
712 || (size >= 0 && (size_changed || mode_changed()))
712 || (size >= 0 && (size_changed || mode_changed()))
713 {
713 {
714 self.push_outcome(Outcome::Modified, dirstate_node)?
714 self.push_outcome(Outcome::Modified, dirstate_node)?
715 } else {
715 } else {
716 let mtime_looks_clean;
716 let mtime_looks_clean = if let Some(dirstate_mtime) =
717 if let Some(dirstate_mtime) = entry.truncated_mtime() {
717 entry.truncated_mtime()
718 {
718 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
719 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
719 .expect("OS/libc does not support mtime?");
720 .expect("OS/libc does not support mtime?");
720 // There might be a change in the future if for example the
721 // There might be a change in the future if for example the
721 // internal clock become off while process run, but this is a
722 // internal clock become off while process run, but this is a
722 // case where the issues the user would face
723 // case where the issues the user would face
723 // would be a lot worse and there is nothing we
724 // would be a lot worse and there is nothing we
724 // can really do.
725 // can really do.
725 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
726 fs_mtime.likely_equal(dirstate_mtime)
726 } else {
727 } else {
727 // No mtime in the dirstate entry
728 // No mtime in the dirstate entry
728 mtime_looks_clean = false
729 false
729 };
730 };
730 if !mtime_looks_clean {
731 if !mtime_looks_clean {
731 self.push_outcome(Outcome::Unsure, dirstate_node)?
732 self.push_outcome(Outcome::Unsure, dirstate_node)?
732 } else if self.options.list_clean {
733 } else if self.options.list_clean {
733 self.push_outcome(Outcome::Clean, dirstate_node)?
734 self.push_outcome(Outcome::Clean, dirstate_node)?
734 }
735 }
735 }
736 }
736 Ok(())
737 Ok(())
737 }
738 }
738
739
739 /// A node in the dirstate tree has no corresponding filesystem entry
740 /// A node in the dirstate tree has no corresponding filesystem entry
740 fn traverse_dirstate_only(
741 fn traverse_dirstate_only(
741 &self,
742 &self,
742 dirstate_node: NodeRef<'tree, 'on_disk>,
743 dirstate_node: NodeRef<'tree, 'on_disk>,
743 ) -> Result<(), DirstateV2ParseError> {
744 ) -> Result<(), DirstateV2ParseError> {
744 self.check_for_outdated_directory_cache(&dirstate_node)?;
745 self.check_for_outdated_directory_cache(&dirstate_node)?;
745 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
746 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
746 dirstate_node
747 dirstate_node
747 .children(self.dmap.on_disk)?
748 .children(self.dmap.on_disk)?
748 .par_iter()
749 .par_iter()
749 .map(|child_node| self.traverse_dirstate_only(child_node))
750 .map(|child_node| self.traverse_dirstate_only(child_node))
750 .collect()
751 .collect()
751 }
752 }
752
753
753 /// A node in the dirstate tree has no corresponding *file* on the
754 /// A node in the dirstate tree has no corresponding *file* on the
754 /// filesystem
755 /// filesystem
755 ///
756 ///
756 /// Does nothing on a "directory" node
757 /// Does nothing on a "directory" node
757 fn mark_removed_or_deleted_if_file(
758 fn mark_removed_or_deleted_if_file(
758 &self,
759 &self,
759 dirstate_node: &NodeRef<'tree, 'on_disk>,
760 dirstate_node: &NodeRef<'tree, 'on_disk>,
760 ) -> Result<(), DirstateV2ParseError> {
761 ) -> Result<(), DirstateV2ParseError> {
761 if let Some(entry) = dirstate_node.entry()? {
762 if let Some(entry) = dirstate_node.entry()? {
762 if !entry.any_tracked() {
763 if !entry.any_tracked() {
763 // Future-compat for when we start storing ignored and unknown
764 // Future-compat for when we start storing ignored and unknown
764 // files for caching reasons
765 // files for caching reasons
765 return Ok(());
766 return Ok(());
766 }
767 }
767 let path = dirstate_node.full_path(self.dmap.on_disk)?;
768 let path = dirstate_node.full_path(self.dmap.on_disk)?;
768 if self.matcher.matches(path) {
769 if self.matcher.matches(path) {
769 if entry.removed() {
770 if entry.removed() {
770 self.push_outcome(Outcome::Removed, dirstate_node)?
771 self.push_outcome(Outcome::Removed, dirstate_node)?
771 } else {
772 } else {
772 self.push_outcome(Outcome::Deleted, &dirstate_node)?
773 self.push_outcome(Outcome::Deleted, dirstate_node)?
773 }
774 }
774 }
775 }
775 }
776 }
776 Ok(())
777 Ok(())
777 }
778 }
778
779
779 /// Something in the filesystem has no corresponding dirstate node
780 /// Something in the filesystem has no corresponding dirstate node
780 ///
781 ///
781 /// Returns whether that path is ignored
782 /// Returns whether that path is ignored
782 fn traverse_fs_only(
783 fn traverse_fs_only(
783 &self,
784 &self,
784 has_ignored_ancestor: bool,
785 has_ignored_ancestor: bool,
785 directory_hg_path: &HgPath,
786 directory_hg_path: &HgPath,
786 fs_entry: &DirEntry,
787 fs_entry: &DirEntry,
787 ) -> bool {
788 ) -> bool {
788 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
789 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
789 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
790 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
790 if fs_entry.is_dir() {
791 if fs_entry.is_dir() {
791 let is_ignored =
792 let is_ignored =
792 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
793 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
793 let traverse_children = if is_ignored {
794 let traverse_children = if is_ignored {
794 // Descendants of an ignored directory are all ignored
795 // Descendants of an ignored directory are all ignored
795 self.options.list_ignored
796 self.options.list_ignored
796 } else {
797 } else {
797 // Descendants of an unknown directory may be either unknown or
798 // Descendants of an unknown directory may be either unknown or
798 // ignored
799 // ignored
799 self.options.list_unknown || self.options.list_ignored
800 self.options.list_unknown || self.options.list_ignored
800 };
801 };
801 if traverse_children {
802 if traverse_children {
802 let is_at_repo_root = false;
803 let is_at_repo_root = false;
803 if let Ok(children_fs_entries) =
804 if let Ok(children_fs_entries) =
804 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
805 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
805 {
806 {
806 children_fs_entries.par_iter().for_each(|child_fs_entry| {
807 children_fs_entries.par_iter().for_each(|child_fs_entry| {
807 self.traverse_fs_only(
808 self.traverse_fs_only(
808 is_ignored,
809 is_ignored,
809 &hg_path,
810 &hg_path,
810 child_fs_entry,
811 child_fs_entry,
811 );
812 );
812 })
813 })
813 }
814 }
814 if self.options.collect_traversed_dirs {
815 if self.options.collect_traversed_dirs {
815 self.outcome.lock().unwrap().traversed.push(hg_path.into())
816 self.outcome.lock().unwrap().traversed.push(hg_path.into())
816 }
817 }
817 }
818 }
818 is_ignored
819 is_ignored
820 } else if file_or_symlink {
821 if self.matcher.matches(&hg_path) {
822 self.mark_unknown_or_ignored(
823 has_ignored_ancestor,
824 &BorrowedPath::InMemory(&hg_path),
825 )
826 } else {
827 // We haven’t computed whether this path is ignored. It
828 // might not be, and a future run of status might have a
829 // different matcher that matches it. So treat it as not
830 // ignored. That is, inhibit readdir caching of the parent
831 // directory.
832 false
833 }
819 } else {
834 } else {
820 if file_or_symlink {
835 // This is neither a directory, a plain file, or a symlink.
821 if self.matcher.matches(&hg_path) {
836 // Treat it like an ignored file.
822 self.mark_unknown_or_ignored(
837 true
823 has_ignored_ancestor,
824 &BorrowedPath::InMemory(&hg_path),
825 )
826 } else {
827 // We haven’t computed whether this path is ignored. It
828 // might not be, and a future run of status might have a
829 // different matcher that matches it. So treat it as not
830 // ignored. That is, inhibit readdir caching of the parent
831 // directory.
832 false
833 }
834 } else {
835 // This is neither a directory, a plain file, or a symlink.
836 // Treat it like an ignored file.
837 true
838 }
839 }
838 }
840 }
839 }
841
840
842 /// Returns whether that path is ignored
841 /// Returns whether that path is ignored
843 fn mark_unknown_or_ignored(
842 fn mark_unknown_or_ignored(
844 &self,
843 &self,
845 has_ignored_ancestor: bool,
844 has_ignored_ancestor: bool,
846 hg_path: &BorrowedPath<'_, 'on_disk>,
845 hg_path: &BorrowedPath<'_, 'on_disk>,
847 ) -> bool {
846 ) -> bool {
848 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
847 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
849 if is_ignored {
848 if is_ignored {
850 if self.options.list_ignored {
849 if self.options.list_ignored {
851 self.push_outcome_without_copy_source(
850 self.push_outcome_without_copy_source(
852 Outcome::Ignored,
851 Outcome::Ignored,
853 hg_path,
852 hg_path,
854 )
853 )
855 }
854 }
856 } else {
855 } else if self.options.list_unknown {
857 if self.options.list_unknown {
856 self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
858 self.push_outcome_without_copy_source(
859 Outcome::Unknown,
860 hg_path,
861 )
862 }
863 }
857 }
864 is_ignored
858 is_ignored
865 }
859 }
866 }
860 }
867
861
868 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
862 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
869 /// care about.
863 /// care about.
870 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
864 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
871 enum FakeFileType {
865 enum FakeFileType {
872 File,
866 File,
873 Directory,
867 Directory,
874 Symlink,
868 Symlink,
875 }
869 }
876
870
877 impl TryFrom<std::fs::FileType> for FakeFileType {
871 impl TryFrom<std::fs::FileType> for FakeFileType {
878 type Error = ();
872 type Error = ();
879
873
880 fn try_from(f: std::fs::FileType) -> Result<Self, Self::Error> {
874 fn try_from(f: std::fs::FileType) -> Result<Self, Self::Error> {
881 if f.is_dir() {
875 if f.is_dir() {
882 Ok(Self::Directory)
876 Ok(Self::Directory)
883 } else if f.is_file() {
877 } else if f.is_file() {
884 Ok(Self::File)
878 Ok(Self::File)
885 } else if f.is_symlink() {
879 } else if f.is_symlink() {
886 Ok(Self::Symlink)
880 Ok(Self::Symlink)
887 } else {
881 } else {
888 // Things like FIFO etc.
882 // Things like FIFO etc.
889 Err(())
883 Err(())
890 }
884 }
891 }
885 }
892 }
886 }
893
887
894 struct DirEntry<'a> {
888 struct DirEntry<'a> {
895 /// Path as stored in the dirstate, or just the filename for optimization.
889 /// Path as stored in the dirstate, or just the filename for optimization.
896 hg_path: HgPathCow<'a>,
890 hg_path: HgPathCow<'a>,
897 /// Filesystem path
891 /// Filesystem path
898 fs_path: Cow<'a, Path>,
892 fs_path: Cow<'a, Path>,
899 /// Lazily computed
893 /// Lazily computed
900 symlink_metadata: Option<std::fs::Metadata>,
894 symlink_metadata: Option<std::fs::Metadata>,
901 /// Already computed for ergonomics.
895 /// Already computed for ergonomics.
902 file_type: FakeFileType,
896 file_type: FakeFileType,
903 }
897 }
904
898
905 impl<'a> DirEntry<'a> {
899 impl<'a> DirEntry<'a> {
906 /// Returns **unsorted** entries in the given directory, with name,
900 /// Returns **unsorted** entries in the given directory, with name,
907 /// metadata and file type.
901 /// metadata and file type.
908 ///
902 ///
909 /// If a `.hg` sub-directory is encountered:
903 /// If a `.hg` sub-directory is encountered:
910 ///
904 ///
911 /// * At the repository root, ignore that sub-directory
905 /// * At the repository root, ignore that sub-directory
912 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
906 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
913 /// list instead.
907 /// list instead.
914 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
908 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
915 // `read_dir` returns a "not found" error for the empty path
909 // `read_dir` returns a "not found" error for the empty path
916 let at_cwd = path == Path::new("");
910 let at_cwd = path == Path::new("");
917 let read_dir_path = if at_cwd { Path::new(".") } else { path };
911 let read_dir_path = if at_cwd { Path::new(".") } else { path };
918 let mut results = Vec::new();
912 let mut results = Vec::new();
919 for entry in read_dir_path.read_dir()? {
913 for entry in read_dir_path.read_dir()? {
920 let entry = entry?;
914 let entry = entry?;
921 let file_type = match entry.file_type() {
915 let file_type = match entry.file_type() {
922 Ok(v) => v,
916 Ok(v) => v,
923 Err(e) => {
917 Err(e) => {
924 // race with file deletion?
918 // race with file deletion?
925 if e.kind() == std::io::ErrorKind::NotFound {
919 if e.kind() == std::io::ErrorKind::NotFound {
926 continue;
920 continue;
927 } else {
921 } else {
928 return Err(e);
922 return Err(e);
929 }
923 }
930 }
924 }
931 };
925 };
932 let file_name = entry.file_name();
926 let file_name = entry.file_name();
933 // FIXME don't do this when cached
927 // FIXME don't do this when cached
934 if file_name == ".hg" {
928 if file_name == ".hg" {
935 if is_at_repo_root {
929 if is_at_repo_root {
936 // Skip the repo’s own .hg (might be a symlink)
930 // Skip the repo’s own .hg (might be a symlink)
937 continue;
931 continue;
938 } else if file_type.is_dir() {
932 } else if file_type.is_dir() {
939 // A .hg sub-directory at another location means a subrepo,
933 // A .hg sub-directory at another location means a subrepo,
940 // skip it entirely.
934 // skip it entirely.
941 return Ok(Vec::new());
935 return Ok(Vec::new());
942 }
936 }
943 }
937 }
944 let full_path = if at_cwd {
938 let full_path = if at_cwd {
945 file_name.clone().into()
939 file_name.clone().into()
946 } else {
940 } else {
947 entry.path()
941 entry.path()
948 };
942 };
949 let filename =
943 let filename =
950 Cow::Owned(get_bytes_from_os_string(file_name).into());
944 Cow::Owned(get_bytes_from_os_string(file_name).into());
951 let file_type = match FakeFileType::try_from(file_type) {
945 let file_type = match FakeFileType::try_from(file_type) {
952 Ok(file_type) => file_type,
946 Ok(file_type) => file_type,
953 Err(_) => continue,
947 Err(_) => continue,
954 };
948 };
955 results.push(DirEntry {
949 results.push(DirEntry {
956 hg_path: filename,
950 hg_path: filename,
957 fs_path: Cow::Owned(full_path.to_path_buf()),
951 fs_path: Cow::Owned(full_path.to_path_buf()),
958 symlink_metadata: None,
952 symlink_metadata: None,
959 file_type,
953 file_type,
960 })
954 })
961 }
955 }
962 Ok(results)
956 Ok(results)
963 }
957 }
964
958
965 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
959 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
966 match &self.symlink_metadata {
960 match &self.symlink_metadata {
967 Some(meta) => Ok(meta.clone()),
961 Some(meta) => Ok(meta.clone()),
968 None => std::fs::symlink_metadata(&self.fs_path),
962 None => std::fs::symlink_metadata(&self.fs_path),
969 }
963 }
970 }
964 }
971
965
972 fn is_dir(&self) -> bool {
966 fn is_dir(&self) -> bool {
973 self.file_type == FakeFileType::Directory
967 self.file_type == FakeFileType::Directory
974 }
968 }
975
969
976 fn is_file(&self) -> bool {
970 fn is_file(&self) -> bool {
977 self.file_type == FakeFileType::File
971 self.file_type == FakeFileType::File
978 }
972 }
979
973
980 fn is_symlink(&self) -> bool {
974 fn is_symlink(&self) -> bool {
981 self.file_type == FakeFileType::Symlink
975 self.file_type == FakeFileType::Symlink
982 }
976 }
983 }
977 }
984
978
985 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
979 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
986 /// of the give repository.
980 /// of the give repository.
987 ///
981 ///
988 /// This is similar to `SystemTime::now()`, with the result truncated to the
982 /// This is similar to `SystemTime::now()`, with the result truncated to the
989 /// same time resolution as other files’ modification times. Using `.hg`
983 /// same time resolution as other files’ modification times. Using `.hg`
990 /// instead of the system’s default temporary directory (such as `/tmp`) makes
984 /// instead of the system’s default temporary directory (such as `/tmp`) makes
991 /// it more likely the temporary file is in the same disk partition as contents
985 /// it more likely the temporary file is in the same disk partition as contents
992 /// of the working directory, which can matter since different filesystems may
986 /// of the working directory, which can matter since different filesystems may
993 /// store timestamps with different resolutions.
987 /// store timestamps with different resolutions.
994 ///
988 ///
995 /// This may fail, typically if we lack write permissions. In that case we
989 /// This may fail, typically if we lack write permissions. In that case we
996 /// should continue the `status()` algoritm anyway and consider the current
990 /// should continue the `status()` algoritm anyway and consider the current
997 /// date/time to be unknown.
991 /// date/time to be unknown.
998 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
992 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
999 tempfile::tempfile_in(repo_root.join(".hg"))?
993 tempfile::tempfile_in(repo_root.join(".hg"))?
1000 .metadata()?
994 .metadata()?
1001 .modified()
995 .modified()
1002 }
996 }
@@ -1,695 +1,695 b''
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
15 use rand::seq::SliceRandom;
15 use rand::seq::SliceRandom;
16 use rand::{thread_rng, RngCore, SeedableRng};
16 use rand::{thread_rng, RngCore, SeedableRng};
17 use std::cmp::{max, min};
17 use std::cmp::{max, min};
18 use std::collections::{HashSet, VecDeque};
18 use std::collections::{HashSet, VecDeque};
19
19
20 type Rng = rand_pcg::Pcg32;
20 type Rng = rand_pcg::Pcg32;
21 type Seed = [u8; 16];
21 type Seed = [u8; 16];
22
22
23 pub struct PartialDiscovery<G: Graph + Clone> {
23 pub struct PartialDiscovery<G: Graph + Clone> {
24 target_heads: Option<Vec<Revision>>,
24 target_heads: Option<Vec<Revision>>,
25 graph: G, // plays the role of self._repo
25 graph: G, // plays the role of self._repo
26 common: MissingAncestors<G>,
26 common: MissingAncestors<G>,
27 undecided: Option<HashSet<Revision>>,
27 undecided: Option<HashSet<Revision>>,
28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
29 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
30 rng: Rng,
30 rng: Rng,
31 respect_size: bool,
31 respect_size: bool,
32 randomize: bool,
32 randomize: bool,
33 }
33 }
34
34
35 pub struct DiscoveryStats {
35 pub struct DiscoveryStats {
36 pub undecided: Option<usize>,
36 pub undecided: Option<usize>,
37 }
37 }
38
38
39 /// Update an existing sample to match the expected size
39 /// Update an existing sample to match the expected size
40 ///
40 ///
41 /// The sample is updated with revisions exponentially distant from each
41 /// The sample is updated with revisions exponentially distant from each
42 /// element of `heads`.
42 /// element of `heads`.
43 ///
43 ///
44 /// If a target size is specified, the sampling will stop once this size is
44 /// If a target size is specified, the sampling will stop once this size is
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
46 /// reached.
46 /// reached.
47 ///
47 ///
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
49 /// represented by `parentfn`
49 /// represented by `parentfn`
50 /// - `heads`: set of DAG head revs
50 /// - `heads`: set of DAG head revs
51 /// - `sample`: a sample to update
51 /// - `sample`: a sample to update
52 /// - `parentfn`: a callable to resolve parents for a revision
52 /// - `parentfn`: a callable to resolve parents for a revision
53 /// - `quicksamplesize`: optional target size of the sample
53 /// - `quicksamplesize`: optional target size of the sample
54 fn update_sample<I>(
54 fn update_sample<I>(
55 revs: Option<&HashSet<Revision>>,
55 revs: Option<&HashSet<Revision>>,
56 heads: impl IntoIterator<Item = Revision>,
56 heads: impl IntoIterator<Item = Revision>,
57 sample: &mut HashSet<Revision>,
57 sample: &mut HashSet<Revision>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
59 quicksamplesize: Option<usize>,
59 quicksamplesize: Option<usize>,
60 ) -> Result<(), GraphError>
60 ) -> Result<(), GraphError>
61 where
61 where
62 I: Iterator<Item = Revision>,
62 I: Iterator<Item = Revision>,
63 {
63 {
64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
66 let mut factor: u32 = 1;
66 let mut factor: u32 = 1;
67 let mut seen: HashSet<Revision> = HashSet::new();
67 let mut seen: HashSet<Revision> = HashSet::new();
68 while let Some(current) = visit.pop_front() {
68 while let Some(current) = visit.pop_front() {
69 if !seen.insert(current) {
69 if !seen.insert(current) {
70 continue;
70 continue;
71 }
71 }
72
72
73 let d = *distances.entry(current).or_insert(1);
73 let d = *distances.entry(current).or_insert(1);
74 if d > factor {
74 if d > factor {
75 factor *= 2;
75 factor *= 2;
76 }
76 }
77 if d == factor {
77 if d == factor {
78 sample.insert(current);
78 sample.insert(current);
79 if let Some(sz) = quicksamplesize {
79 if let Some(sz) = quicksamplesize {
80 if sample.len() >= sz {
80 if sample.len() >= sz {
81 return Ok(());
81 return Ok(());
82 }
82 }
83 }
83 }
84 }
84 }
85 for p in parentsfn(current)? {
85 for p in parentsfn(current)? {
86 if let Some(revs) = revs {
86 if let Some(revs) = revs {
87 if !revs.contains(&p) {
87 if !revs.contains(&p) {
88 continue;
88 continue;
89 }
89 }
90 }
90 }
91 distances.entry(p).or_insert(d + 1);
91 distances.entry(p).or_insert(d + 1);
92 visit.push_back(p);
92 visit.push_back(p);
93 }
93 }
94 }
94 }
95 Ok(())
95 Ok(())
96 }
96 }
97
97
98 struct ParentsIterator {
98 struct ParentsIterator {
99 parents: [Revision; 2],
99 parents: [Revision; 2],
100 cur: usize,
100 cur: usize,
101 }
101 }
102
102
103 impl ParentsIterator {
103 impl ParentsIterator {
104 fn graph_parents(
104 fn graph_parents(
105 graph: &impl Graph,
105 graph: &impl Graph,
106 r: Revision,
106 r: Revision,
107 ) -> Result<ParentsIterator, GraphError> {
107 ) -> Result<ParentsIterator, GraphError> {
108 Ok(ParentsIterator {
108 Ok(ParentsIterator {
109 parents: graph.parents(r)?,
109 parents: graph.parents(r)?,
110 cur: 0,
110 cur: 0,
111 })
111 })
112 }
112 }
113 }
113 }
114
114
115 impl Iterator for ParentsIterator {
115 impl Iterator for ParentsIterator {
116 type Item = Revision;
116 type Item = Revision;
117
117
118 fn next(&mut self) -> Option<Revision> {
118 fn next(&mut self) -> Option<Revision> {
119 if self.cur > 1 {
119 if self.cur > 1 {
120 return None;
120 return None;
121 }
121 }
122 let rev = self.parents[self.cur];
122 let rev = self.parents[self.cur];
123 self.cur += 1;
123 self.cur += 1;
124 if rev == NULL_REVISION {
124 if rev == NULL_REVISION {
125 return self.next();
125 return self.next();
126 }
126 }
127 Some(rev)
127 Some(rev)
128 }
128 }
129 }
129 }
130
130
131 impl<G: Graph + Clone> PartialDiscovery<G> {
131 impl<G: Graph + Clone> PartialDiscovery<G> {
132 /// Create a PartialDiscovery object, with the intent
132 /// Create a PartialDiscovery object, with the intent
133 /// of comparing our `::<target_heads>` revset to the contents of another
133 /// of comparing our `::<target_heads>` revset to the contents of another
134 /// repo.
134 /// repo.
135 ///
135 ///
136 /// For now `target_heads` is passed as a vector, and will be used
136 /// For now `target_heads` is passed as a vector, and will be used
137 /// at the first call to `ensure_undecided()`.
137 /// at the first call to `ensure_undecided()`.
138 ///
138 ///
139 /// If we want to make the signature more flexible,
139 /// If we want to make the signature more flexible,
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
141 /// object since we'll keep it in the meanwhile
141 /// object since we'll keep it in the meanwhile
142 ///
142 ///
143 /// The `respect_size` boolean controls how the sampling methods
143 /// The `respect_size` boolean controls how the sampling methods
144 /// will interpret the size argument requested by the caller. If it's
144 /// will interpret the size argument requested by the caller. If it's
145 /// `false`, they are allowed to produce a sample whose size is more
145 /// `false`, they are allowed to produce a sample whose size is more
146 /// appropriate to the situation (typically bigger).
146 /// appropriate to the situation (typically bigger).
147 ///
147 ///
148 /// The `randomize` boolean affects sampling, and specifically how
148 /// The `randomize` boolean affects sampling, and specifically how
149 /// limiting or last-minute expanding is been done:
149 /// limiting or last-minute expanding is been done:
150 ///
150 ///
151 /// If `true`, both will perform random picking from `self.undecided`.
151 /// If `true`, both will perform random picking from `self.undecided`.
152 /// This is currently the best for actual discoveries.
152 /// This is currently the best for actual discoveries.
153 ///
153 ///
154 /// If `false`, a reproductible picking strategy is performed. This is
154 /// If `false`, a reproductible picking strategy is performed. This is
155 /// useful for integration tests.
155 /// useful for integration tests.
156 pub fn new(
156 pub fn new(
157 graph: G,
157 graph: G,
158 target_heads: Vec<Revision>,
158 target_heads: Vec<Revision>,
159 respect_size: bool,
159 respect_size: bool,
160 randomize: bool,
160 randomize: bool,
161 ) -> Self {
161 ) -> Self {
162 let mut seed = [0; 16];
162 let mut seed = [0; 16];
163 if randomize {
163 if randomize {
164 thread_rng().fill_bytes(&mut seed);
164 thread_rng().fill_bytes(&mut seed);
165 }
165 }
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
167 }
167 }
168
168
169 pub fn new_with_seed(
169 pub fn new_with_seed(
170 graph: G,
170 graph: G,
171 target_heads: Vec<Revision>,
171 target_heads: Vec<Revision>,
172 seed: Seed,
172 seed: Seed,
173 respect_size: bool,
173 respect_size: bool,
174 randomize: bool,
174 randomize: bool,
175 ) -> Self {
175 ) -> Self {
176 PartialDiscovery {
176 PartialDiscovery {
177 undecided: None,
177 undecided: None,
178 children_cache: None,
178 children_cache: None,
179 target_heads: Some(target_heads),
179 target_heads: Some(target_heads),
180 graph: graph.clone(),
180 graph: graph.clone(),
181 common: MissingAncestors::new(graph, vec![]),
181 common: MissingAncestors::new(graph, vec![]),
182 missing: HashSet::new(),
182 missing: HashSet::new(),
183 rng: Rng::from_seed(seed),
183 rng: Rng::from_seed(seed),
184 respect_size,
184 respect_size,
185 randomize,
185 randomize,
186 }
186 }
187 }
187 }
188
188
189 /// Extract at most `size` random elements from sample and return them
189 /// Extract at most `size` random elements from sample and return them
190 /// as a vector
190 /// as a vector
191 fn limit_sample(
191 fn limit_sample(
192 &mut self,
192 &mut self,
193 mut sample: Vec<Revision>,
193 mut sample: Vec<Revision>,
194 size: usize,
194 size: usize,
195 ) -> Vec<Revision> {
195 ) -> Vec<Revision> {
196 if !self.randomize {
196 if !self.randomize {
197 sample.sort();
197 sample.sort_unstable();
198 sample.truncate(size);
198 sample.truncate(size);
199 return sample;
199 return sample;
200 }
200 }
201 let sample_len = sample.len();
201 let sample_len = sample.len();
202 if sample_len <= size {
202 if sample_len <= size {
203 return sample;
203 return sample;
204 }
204 }
205 let rng = &mut self.rng;
205 let rng = &mut self.rng;
206 let dropped_size = sample_len - size;
206 let dropped_size = sample_len - size;
207 let limited_slice = if size < dropped_size {
207 let limited_slice = if size < dropped_size {
208 sample.partial_shuffle(rng, size).0
208 sample.partial_shuffle(rng, size).0
209 } else {
209 } else {
210 sample.partial_shuffle(rng, dropped_size).1
210 sample.partial_shuffle(rng, dropped_size).1
211 };
211 };
212 limited_slice.to_owned()
212 limited_slice.to_owned()
213 }
213 }
214
214
215 /// Register revisions known as being common
215 /// Register revisions known as being common
216 pub fn add_common_revisions(
216 pub fn add_common_revisions(
217 &mut self,
217 &mut self,
218 common: impl IntoIterator<Item = Revision>,
218 common: impl IntoIterator<Item = Revision>,
219 ) -> Result<(), GraphError> {
219 ) -> Result<(), GraphError> {
220 let before_len = self.common.get_bases().len();
220 let before_len = self.common.get_bases().len();
221 self.common.add_bases(common);
221 self.common.add_bases(common);
222 if self.common.get_bases().len() == before_len {
222 if self.common.get_bases().len() == before_len {
223 return Ok(());
223 return Ok(());
224 }
224 }
225 if let Some(ref mut undecided) = self.undecided {
225 if let Some(ref mut undecided) = self.undecided {
226 self.common.remove_ancestors_from(undecided)?;
226 self.common.remove_ancestors_from(undecided)?;
227 }
227 }
228 Ok(())
228 Ok(())
229 }
229 }
230
230
231 /// Register revisions known as being missing
231 /// Register revisions known as being missing
232 ///
232 ///
233 /// # Performance note
233 /// # Performance note
234 ///
234 ///
235 /// Except in the most trivial case, the first call of this method has
235 /// Except in the most trivial case, the first call of this method has
236 /// the side effect of computing `self.undecided` set for the first time,
236 /// the side effect of computing `self.undecided` set for the first time,
237 /// and the related caches it might need for efficiency of its internal
237 /// and the related caches it might need for efficiency of its internal
238 /// computation. This is typically faster if more information is
238 /// computation. This is typically faster if more information is
239 /// available in `self.common`. Therefore, for good performance, the
239 /// available in `self.common`. Therefore, for good performance, the
240 /// caller should avoid calling this too early.
240 /// caller should avoid calling this too early.
241 pub fn add_missing_revisions(
241 pub fn add_missing_revisions(
242 &mut self,
242 &mut self,
243 missing: impl IntoIterator<Item = Revision>,
243 missing: impl IntoIterator<Item = Revision>,
244 ) -> Result<(), GraphError> {
244 ) -> Result<(), GraphError> {
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
246 if tovisit.is_empty() {
246 if tovisit.is_empty() {
247 return Ok(());
247 return Ok(());
248 }
248 }
249 self.ensure_children_cache()?;
249 self.ensure_children_cache()?;
250 self.ensure_undecided()?; // for safety of possible future refactors
250 self.ensure_undecided()?; // for safety of possible future refactors
251 let children = self.children_cache.as_ref().unwrap();
251 let children = self.children_cache.as_ref().unwrap();
252 let mut seen: HashSet<Revision> = HashSet::new();
252 let mut seen: HashSet<Revision> = HashSet::new();
253 let undecided_mut = self.undecided.as_mut().unwrap();
253 let undecided_mut = self.undecided.as_mut().unwrap();
254 while let Some(rev) = tovisit.pop_front() {
254 while let Some(rev) = tovisit.pop_front() {
255 if !self.missing.insert(rev) {
255 if !self.missing.insert(rev) {
256 // either it's known to be missing from a previous
256 // either it's known to be missing from a previous
257 // invocation, and there's no need to iterate on its
257 // invocation, and there's no need to iterate on its
258 // children (we now they are all missing)
258 // children (we now they are all missing)
259 // or it's from a previous iteration of this loop
259 // or it's from a previous iteration of this loop
260 // and its children have already been queued
260 // and its children have already been queued
261 continue;
261 continue;
262 }
262 }
263 undecided_mut.remove(&rev);
263 undecided_mut.remove(&rev);
264 match children.get(&rev) {
264 match children.get(&rev) {
265 None => {
265 None => {
266 continue;
266 continue;
267 }
267 }
268 Some(this_children) => {
268 Some(this_children) => {
269 for child in this_children.iter().cloned() {
269 for child in this_children.iter().cloned() {
270 if seen.insert(child) {
270 if seen.insert(child) {
271 tovisit.push_back(child);
271 tovisit.push_back(child);
272 }
272 }
273 }
273 }
274 }
274 }
275 }
275 }
276 }
276 }
277 Ok(())
277 Ok(())
278 }
278 }
279
279
280 /// Do we have any information about the peer?
280 /// Do we have any information about the peer?
281 pub fn has_info(&self) -> bool {
281 pub fn has_info(&self) -> bool {
282 self.common.has_bases()
282 self.common.has_bases()
283 }
283 }
284
284
285 /// Did we acquire full knowledge of our Revisions that the peer has?
285 /// Did we acquire full knowledge of our Revisions that the peer has?
286 pub fn is_complete(&self) -> bool {
286 pub fn is_complete(&self) -> bool {
287 self.undecided.as_ref().map_or(false, HashSet::is_empty)
287 self.undecided.as_ref().map_or(false, HashSet::is_empty)
288 }
288 }
289
289
290 /// Return the heads of the currently known common set of revisions.
290 /// Return the heads of the currently known common set of revisions.
291 ///
291 ///
292 /// If the discovery process is not complete (see `is_complete()`), the
292 /// If the discovery process is not complete (see `is_complete()`), the
293 /// caller must be aware that this is an intermediate state.
293 /// caller must be aware that this is an intermediate state.
294 ///
294 ///
295 /// On the other hand, if it is complete, then this is currently
295 /// On the other hand, if it is complete, then this is currently
296 /// the only way to retrieve the end results of the discovery process.
296 /// the only way to retrieve the end results of the discovery process.
297 ///
297 ///
298 /// We may introduce in the future an `into_common_heads` call that
298 /// We may introduce in the future an `into_common_heads` call that
299 /// would be more appropriate for normal Rust callers, dropping `self`
299 /// would be more appropriate for normal Rust callers, dropping `self`
300 /// if it is complete.
300 /// if it is complete.
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
302 self.common.bases_heads()
302 self.common.bases_heads()
303 }
303 }
304
304
305 /// Force first computation of `self.undecided`
305 /// Force first computation of `self.undecided`
306 ///
306 ///
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
308 /// unwrapped to get workable immutable or mutable references without
308 /// unwrapped to get workable immutable or mutable references without
309 /// any panic.
309 /// any panic.
310 ///
310 ///
311 /// This is an imperative call instead of an access with added lazyness
311 /// This is an imperative call instead of an access with added lazyness
312 /// to reduce easily the scope of mutable borrow for the caller,
312 /// to reduce easily the scope of mutable borrow for the caller,
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
314 /// as long as the resulting immutable one.
314 /// as long as the resulting immutable one.
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
316 if self.undecided.is_some() {
316 if self.undecided.is_some() {
317 return Ok(());
317 return Ok(());
318 }
318 }
319 let tgt = self.target_heads.take().unwrap();
319 let tgt = self.target_heads.take().unwrap();
320 self.undecided =
320 self.undecided =
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
322 Ok(())
322 Ok(())
323 }
323 }
324
324
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
326 if self.children_cache.is_some() {
326 if self.children_cache.is_some() {
327 return Ok(());
327 return Ok(());
328 }
328 }
329 self.ensure_undecided()?;
329 self.ensure_undecided()?;
330
330
331 let mut children: FastHashMap<Revision, Vec<Revision>> =
331 let mut children: FastHashMap<Revision, Vec<Revision>> =
332 FastHashMap::default();
332 FastHashMap::default();
333 for &rev in self.undecided.as_ref().unwrap() {
333 for &rev in self.undecided.as_ref().unwrap() {
334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
335 children.entry(p).or_insert_with(Vec::new).push(rev);
335 children.entry(p).or_insert_with(Vec::new).push(rev);
336 }
336 }
337 }
337 }
338 self.children_cache = Some(children);
338 self.children_cache = Some(children);
339 Ok(())
339 Ok(())
340 }
340 }
341
341
342 /// Provide statistics about the current state of the discovery process
342 /// Provide statistics about the current state of the discovery process
343 pub fn stats(&self) -> DiscoveryStats {
343 pub fn stats(&self) -> DiscoveryStats {
344 DiscoveryStats {
344 DiscoveryStats {
345 undecided: self.undecided.as_ref().map(HashSet::len),
345 undecided: self.undecided.as_ref().map(HashSet::len),
346 }
346 }
347 }
347 }
348
348
349 pub fn take_quick_sample(
349 pub fn take_quick_sample(
350 &mut self,
350 &mut self,
351 headrevs: impl IntoIterator<Item = Revision>,
351 headrevs: impl IntoIterator<Item = Revision>,
352 size: usize,
352 size: usize,
353 ) -> Result<Vec<Revision>, GraphError> {
353 ) -> Result<Vec<Revision>, GraphError> {
354 self.ensure_undecided()?;
354 self.ensure_undecided()?;
355 let mut sample = {
355 let mut sample = {
356 let undecided = self.undecided.as_ref().unwrap();
356 let undecided = self.undecided.as_ref().unwrap();
357 if undecided.len() <= size {
357 if undecided.len() <= size {
358 return Ok(undecided.iter().cloned().collect());
358 return Ok(undecided.iter().cloned().collect());
359 }
359 }
360 dagops::heads(&self.graph, undecided.iter())?
360 dagops::heads(&self.graph, undecided.iter())?
361 };
361 };
362 if sample.len() >= size {
362 if sample.len() >= size {
363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
364 }
364 }
365 update_sample(
365 update_sample(
366 None,
366 None,
367 headrevs,
367 headrevs,
368 &mut sample,
368 &mut sample,
369 |r| ParentsIterator::graph_parents(&self.graph, r),
369 |r| ParentsIterator::graph_parents(&self.graph, r),
370 Some(size),
370 Some(size),
371 )?;
371 )?;
372 Ok(sample.into_iter().collect())
372 Ok(sample.into_iter().collect())
373 }
373 }
374
374
375 /// Extract a sample from `self.undecided`, going from its heads and roots.
375 /// Extract a sample from `self.undecided`, going from its heads and roots.
376 ///
376 ///
377 /// The `size` parameter is used to avoid useless computations if
377 /// The `size` parameter is used to avoid useless computations if
378 /// it turns out to be bigger than the whole set of undecided Revisions.
378 /// it turns out to be bigger than the whole set of undecided Revisions.
379 ///
379 ///
380 /// The sample is taken by using `update_sample` from the heads, then
380 /// The sample is taken by using `update_sample` from the heads, then
381 /// from the roots, working on the reverse DAG,
381 /// from the roots, working on the reverse DAG,
382 /// expressed by `self.children_cache`.
382 /// expressed by `self.children_cache`.
383 ///
383 ///
384 /// No effort is being made to complete or limit the sample to `size`
384 /// No effort is being made to complete or limit the sample to `size`
385 /// but this method returns another interesting size that it derives
385 /// but this method returns another interesting size that it derives
386 /// from its knowledge of the structure of the various sets, leaving
386 /// from its knowledge of the structure of the various sets, leaving
387 /// to the caller the decision to use it or not.
387 /// to the caller the decision to use it or not.
388 fn bidirectional_sample(
388 fn bidirectional_sample(
389 &mut self,
389 &mut self,
390 size: usize,
390 size: usize,
391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
392 self.ensure_undecided()?;
392 self.ensure_undecided()?;
393 {
393 {
394 // we don't want to compute children_cache before this
394 // we don't want to compute children_cache before this
395 // but doing it after extracting self.undecided takes a mutable
395 // but doing it after extracting self.undecided takes a mutable
396 // ref to self while a shareable one is still active.
396 // ref to self while a shareable one is still active.
397 let undecided = self.undecided.as_ref().unwrap();
397 let undecided = self.undecided.as_ref().unwrap();
398 if undecided.len() <= size {
398 if undecided.len() <= size {
399 return Ok((undecided.clone(), size));
399 return Ok((undecided.clone(), size));
400 }
400 }
401 }
401 }
402
402
403 self.ensure_children_cache()?;
403 self.ensure_children_cache()?;
404 let revs = self.undecided.as_ref().unwrap();
404 let revs = self.undecided.as_ref().unwrap();
405 let mut sample: HashSet<Revision> = revs.clone();
405 let mut sample: HashSet<Revision> = revs.clone();
406
406
407 // it's possible that leveraging the children cache would be more
407 // it's possible that leveraging the children cache would be more
408 // efficient here
408 // efficient here
409 dagops::retain_heads(&self.graph, &mut sample)?;
409 dagops::retain_heads(&self.graph, &mut sample)?;
410 let revsheads = sample.clone(); // was again heads(revs) in python
410 let revsheads = sample.clone(); // was again heads(revs) in python
411
411
412 // update from heads
412 // update from heads
413 update_sample(
413 update_sample(
414 Some(revs),
414 Some(revs),
415 revsheads.iter().cloned(),
415 revsheads.iter().cloned(),
416 &mut sample,
416 &mut sample,
417 |r| ParentsIterator::graph_parents(&self.graph, r),
417 |r| ParentsIterator::graph_parents(&self.graph, r),
418 None,
418 None,
419 )?;
419 )?;
420
420
421 // update from roots
421 // update from roots
422 let revroots: HashSet<Revision> =
422 let revroots: HashSet<Revision> =
423 dagops::roots(&self.graph, revs)?.into_iter().collect();
423 dagops::roots(&self.graph, revs)?.into_iter().collect();
424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
425
425
426 let children = self.children_cache.as_ref().unwrap();
426 let children = self.children_cache.as_ref().unwrap();
427 let empty_vec: Vec<Revision> = Vec::new();
427 let empty_vec: Vec<Revision> = Vec::new();
428 update_sample(
428 update_sample(
429 Some(revs),
429 Some(revs),
430 revroots,
430 revroots,
431 &mut sample,
431 &mut sample,
432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
433 None,
433 None,
434 )?;
434 )?;
435 Ok((sample, prescribed_size))
435 Ok((sample, prescribed_size))
436 }
436 }
437
437
438 /// Fill up sample up to the wished size with random undecided Revisions.
438 /// Fill up sample up to the wished size with random undecided Revisions.
439 ///
439 ///
440 /// This is intended to be used as a last resort completion if the
440 /// This is intended to be used as a last resort completion if the
441 /// regular sampling algorithm returns too few elements.
441 /// regular sampling algorithm returns too few elements.
442 fn random_complete_sample(
442 fn random_complete_sample(
443 &mut self,
443 &mut self,
444 sample: &mut Vec<Revision>,
444 sample: &mut Vec<Revision>,
445 size: usize,
445 size: usize,
446 ) {
446 ) {
447 let sample_len = sample.len();
447 let sample_len = sample.len();
448 if size <= sample_len {
448 if size <= sample_len {
449 return;
449 return;
450 }
450 }
451 let take_from: Vec<Revision> = self
451 let take_from: Vec<Revision> = self
452 .undecided
452 .undecided
453 .as_ref()
453 .as_ref()
454 .unwrap()
454 .unwrap()
455 .iter()
455 .iter()
456 .filter(|&r| !sample.contains(r))
456 .filter(|&r| !sample.contains(r))
457 .cloned()
457 .cloned()
458 .collect();
458 .collect();
459 sample.extend(self.limit_sample(take_from, size - sample_len));
459 sample.extend(self.limit_sample(take_from, size - sample_len));
460 }
460 }
461
461
462 pub fn take_full_sample(
462 pub fn take_full_sample(
463 &mut self,
463 &mut self,
464 size: usize,
464 size: usize,
465 ) -> Result<Vec<Revision>, GraphError> {
465 ) -> Result<Vec<Revision>, GraphError> {
466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
467 let size = if self.respect_size {
467 let size = if self.respect_size {
468 size
468 size
469 } else {
469 } else {
470 prescribed_size
470 prescribed_size
471 };
471 };
472 let mut sample =
472 let mut sample =
473 self.limit_sample(sample_set.into_iter().collect(), size);
473 self.limit_sample(sample_set.into_iter().collect(), size);
474 self.random_complete_sample(&mut sample, size);
474 self.random_complete_sample(&mut sample, size);
475 Ok(sample)
475 Ok(sample)
476 }
476 }
477 }
477 }
478
478
479 #[cfg(test)]
479 #[cfg(test)]
480 mod tests {
480 mod tests {
481 use super::*;
481 use super::*;
482 use crate::testing::SampleGraph;
482 use crate::testing::SampleGraph;
483
483
484 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
484 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
485 ///
485 ///
486 /// To avoid actual randomness in these tests, we give it a fixed
486 /// To avoid actual randomness in these tests, we give it a fixed
487 /// random seed, but by default we'll test the random version.
487 /// random seed, but by default we'll test the random version.
488 fn full_disco() -> PartialDiscovery<SampleGraph> {
488 fn full_disco() -> PartialDiscovery<SampleGraph> {
489 PartialDiscovery::new_with_seed(
489 PartialDiscovery::new_with_seed(
490 SampleGraph,
490 SampleGraph,
491 vec![10, 11, 12, 13],
491 vec![10, 11, 12, 13],
492 [0; 16],
492 [0; 16],
493 true,
493 true,
494 true,
494 true,
495 )
495 )
496 }
496 }
497
497
498 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
498 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
499 ///
499 ///
500 /// To avoid actual randomness in tests, we give it a fixed random seed.
500 /// To avoid actual randomness in tests, we give it a fixed random seed.
501 fn disco12() -> PartialDiscovery<SampleGraph> {
501 fn disco12() -> PartialDiscovery<SampleGraph> {
502 PartialDiscovery::new_with_seed(
502 PartialDiscovery::new_with_seed(
503 SampleGraph,
503 SampleGraph,
504 vec![12],
504 vec![12],
505 [0; 16],
505 [0; 16],
506 true,
506 true,
507 true,
507 true,
508 )
508 )
509 }
509 }
510
510
511 fn sorted_undecided(
511 fn sorted_undecided(
512 disco: &PartialDiscovery<SampleGraph>,
512 disco: &PartialDiscovery<SampleGraph>,
513 ) -> Vec<Revision> {
513 ) -> Vec<Revision> {
514 let mut as_vec: Vec<Revision> =
514 let mut as_vec: Vec<Revision> =
515 disco.undecided.as_ref().unwrap().iter().cloned().collect();
515 disco.undecided.as_ref().unwrap().iter().cloned().collect();
516 as_vec.sort();
516 as_vec.sort_unstable();
517 as_vec
517 as_vec
518 }
518 }
519
519
520 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
520 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
521 let mut as_vec: Vec<Revision> =
521 let mut as_vec: Vec<Revision> =
522 disco.missing.iter().cloned().collect();
522 disco.missing.iter().cloned().collect();
523 as_vec.sort();
523 as_vec.sort_unstable();
524 as_vec
524 as_vec
525 }
525 }
526
526
527 fn sorted_common_heads(
527 fn sorted_common_heads(
528 disco: &PartialDiscovery<SampleGraph>,
528 disco: &PartialDiscovery<SampleGraph>,
529 ) -> Result<Vec<Revision>, GraphError> {
529 ) -> Result<Vec<Revision>, GraphError> {
530 let mut as_vec: Vec<Revision> =
530 let mut as_vec: Vec<Revision> =
531 disco.common_heads()?.iter().cloned().collect();
531 disco.common_heads()?.iter().cloned().collect();
532 as_vec.sort();
532 as_vec.sort_unstable();
533 Ok(as_vec)
533 Ok(as_vec)
534 }
534 }
535
535
536 #[test]
536 #[test]
537 fn test_add_common_get_undecided() -> Result<(), GraphError> {
537 fn test_add_common_get_undecided() -> Result<(), GraphError> {
538 let mut disco = full_disco();
538 let mut disco = full_disco();
539 assert_eq!(disco.undecided, None);
539 assert_eq!(disco.undecided, None);
540 assert!(!disco.has_info());
540 assert!(!disco.has_info());
541 assert_eq!(disco.stats().undecided, None);
541 assert_eq!(disco.stats().undecided, None);
542
542
543 disco.add_common_revisions(vec![11, 12])?;
543 disco.add_common_revisions(vec![11, 12])?;
544 assert!(disco.has_info());
544 assert!(disco.has_info());
545 assert!(!disco.is_complete());
545 assert!(!disco.is_complete());
546 assert!(disco.missing.is_empty());
546 assert!(disco.missing.is_empty());
547
547
548 // add_common_revisions did not trigger a premature computation
548 // add_common_revisions did not trigger a premature computation
549 // of `undecided`, let's check that and ask for them
549 // of `undecided`, let's check that and ask for them
550 assert_eq!(disco.undecided, None);
550 assert_eq!(disco.undecided, None);
551 disco.ensure_undecided()?;
551 disco.ensure_undecided()?;
552 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
552 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
553 assert_eq!(disco.stats().undecided, Some(4));
553 assert_eq!(disco.stats().undecided, Some(4));
554 Ok(())
554 Ok(())
555 }
555 }
556
556
557 /// in this test, we pretend that our peer misses exactly (8+10)::
557 /// in this test, we pretend that our peer misses exactly (8+10)::
558 /// and we're comparing all our repo to it (as in a bare push)
558 /// and we're comparing all our repo to it (as in a bare push)
559 #[test]
559 #[test]
560 fn test_discovery() -> Result<(), GraphError> {
560 fn test_discovery() -> Result<(), GraphError> {
561 let mut disco = full_disco();
561 let mut disco = full_disco();
562 disco.add_common_revisions(vec![11, 12])?;
562 disco.add_common_revisions(vec![11, 12])?;
563 disco.add_missing_revisions(vec![8, 10])?;
563 disco.add_missing_revisions(vec![8, 10])?;
564 assert_eq!(sorted_undecided(&disco), vec![5]);
564 assert_eq!(sorted_undecided(&disco), vec![5]);
565 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
565 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
566 assert!(!disco.is_complete());
566 assert!(!disco.is_complete());
567
567
568 disco.add_common_revisions(vec![5])?;
568 disco.add_common_revisions(vec![5])?;
569 assert_eq!(sorted_undecided(&disco), vec![]);
569 assert_eq!(sorted_undecided(&disco), vec![]);
570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
571 assert!(disco.is_complete());
571 assert!(disco.is_complete());
572 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
572 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
573 Ok(())
573 Ok(())
574 }
574 }
575
575
576 #[test]
576 #[test]
577 fn test_add_missing_early_continue() -> Result<(), GraphError> {
577 fn test_add_missing_early_continue() -> Result<(), GraphError> {
578 eprintln!("test_add_missing_early_stop");
578 eprintln!("test_add_missing_early_stop");
579 let mut disco = full_disco();
579 let mut disco = full_disco();
580 disco.add_common_revisions(vec![13, 3, 4])?;
580 disco.add_common_revisions(vec![13, 3, 4])?;
581 disco.ensure_children_cache()?;
581 disco.ensure_children_cache()?;
582 // 12 is grand-child of 6 through 9
582 // 12 is grand-child of 6 through 9
583 // passing them in this order maximizes the chances of the
583 // passing them in this order maximizes the chances of the
584 // early continue to do the wrong thing
584 // early continue to do the wrong thing
585 disco.add_missing_revisions(vec![6, 9, 12])?;
585 disco.add_missing_revisions(vec![6, 9, 12])?;
586 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
586 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
587 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
587 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
588 assert!(!disco.is_complete());
588 assert!(!disco.is_complete());
589 Ok(())
589 Ok(())
590 }
590 }
591
591
592 #[test]
592 #[test]
593 fn test_limit_sample_no_need_to() {
593 fn test_limit_sample_no_need_to() {
594 let sample = vec![1, 2, 3, 4];
594 let sample = vec![1, 2, 3, 4];
595 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
595 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
596 }
596 }
597
597
598 #[test]
598 #[test]
599 fn test_limit_sample_less_than_half() {
599 fn test_limit_sample_less_than_half() {
600 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![2, 5]);
600 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![2, 5]);
601 }
601 }
602
602
603 #[test]
603 #[test]
604 fn test_limit_sample_more_than_half() {
604 fn test_limit_sample_more_than_half() {
605 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![1, 2]);
605 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![1, 2]);
606 }
606 }
607
607
608 #[test]
608 #[test]
609 fn test_limit_sample_no_random() {
609 fn test_limit_sample_no_random() {
610 let mut disco = full_disco();
610 let mut disco = full_disco();
611 disco.randomize = false;
611 disco.randomize = false;
612 assert_eq!(
612 assert_eq!(
613 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
613 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
614 vec![1, 3, 5, 7]
614 vec![1, 3, 5, 7]
615 );
615 );
616 }
616 }
617
617
618 #[test]
618 #[test]
619 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
619 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
620 let mut disco = full_disco();
620 let mut disco = full_disco();
621 disco.undecided = Some((1..=13).collect());
621 disco.undecided = Some((1..=13).collect());
622
622
623 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
623 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
624 sample_vec.sort();
624 sample_vec.sort_unstable();
625 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
625 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
626 Ok(())
626 Ok(())
627 }
627 }
628
628
629 #[test]
629 #[test]
630 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
630 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
631 let mut disco = disco12();
631 let mut disco = disco12();
632 disco.ensure_undecided()?;
632 disco.ensure_undecided()?;
633
633
634 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
634 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
635 sample_vec.sort();
635 sample_vec.sort_unstable();
636 // r12's only parent is r9, whose unique grand-parent through the
636 // r12's only parent is r9, whose unique grand-parent through the
637 // diamond shape is r4. This ends there because the distance from r4
637 // diamond shape is r4. This ends there because the distance from r4
638 // to the root is only 3.
638 // to the root is only 3.
639 assert_eq!(sample_vec, vec![4, 9, 12]);
639 assert_eq!(sample_vec, vec![4, 9, 12]);
640 Ok(())
640 Ok(())
641 }
641 }
642
642
643 #[test]
643 #[test]
644 fn test_children_cache() -> Result<(), GraphError> {
644 fn test_children_cache() -> Result<(), GraphError> {
645 let mut disco = full_disco();
645 let mut disco = full_disco();
646 disco.ensure_children_cache()?;
646 disco.ensure_children_cache()?;
647
647
648 let cache = disco.children_cache.unwrap();
648 let cache = disco.children_cache.unwrap();
649 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
649 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
650 assert_eq!(cache.get(&10).cloned(), None);
650 assert_eq!(cache.get(&10).cloned(), None);
651
651
652 let mut children_4 = cache.get(&4).cloned().unwrap();
652 let mut children_4 = cache.get(&4).cloned().unwrap();
653 children_4.sort();
653 children_4.sort_unstable();
654 assert_eq!(children_4, vec![5, 6, 7]);
654 assert_eq!(children_4, vec![5, 6, 7]);
655
655
656 let mut children_7 = cache.get(&7).cloned().unwrap();
656 let mut children_7 = cache.get(&7).cloned().unwrap();
657 children_7.sort();
657 children_7.sort_unstable();
658 assert_eq!(children_7, vec![9, 11]);
658 assert_eq!(children_7, vec![9, 11]);
659
659
660 Ok(())
660 Ok(())
661 }
661 }
662
662
663 #[test]
663 #[test]
664 fn test_complete_sample() {
664 fn test_complete_sample() {
665 let mut disco = full_disco();
665 let mut disco = full_disco();
666 let undecided: HashSet<Revision> =
666 let undecided: HashSet<Revision> =
667 [4, 7, 9, 2, 3].iter().cloned().collect();
667 [4, 7, 9, 2, 3].iter().cloned().collect();
668 disco.undecided = Some(undecided);
668 disco.undecided = Some(undecided);
669
669
670 let mut sample = vec![0];
670 let mut sample = vec![0];
671 disco.random_complete_sample(&mut sample, 3);
671 disco.random_complete_sample(&mut sample, 3);
672 assert_eq!(sample.len(), 3);
672 assert_eq!(sample.len(), 3);
673
673
674 let mut sample = vec![2, 4, 7];
674 let mut sample = vec![2, 4, 7];
675 disco.random_complete_sample(&mut sample, 1);
675 disco.random_complete_sample(&mut sample, 1);
676 assert_eq!(sample.len(), 3);
676 assert_eq!(sample.len(), 3);
677 }
677 }
678
678
679 #[test]
679 #[test]
680 fn test_bidirectional_sample() -> Result<(), GraphError> {
680 fn test_bidirectional_sample() -> Result<(), GraphError> {
681 let mut disco = full_disco();
681 let mut disco = full_disco();
682 disco.undecided = Some((0..=13).into_iter().collect());
682 disco.undecided = Some((0..=13).into_iter().collect());
683
683
684 let (sample_set, size) = disco.bidirectional_sample(7)?;
684 let (sample_set, size) = disco.bidirectional_sample(7)?;
685 assert_eq!(size, 7);
685 assert_eq!(size, 7);
686 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
686 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
687 sample.sort();
687 sample.sort_unstable();
688 // our DAG is a bit too small for the results to be really interesting
688 // our DAG is a bit too small for the results to be really interesting
689 // at least it shows that
689 // at least it shows that
690 // - we went both ways
690 // - we went both ways
691 // - we didn't take all Revisions (6 is not in the sample)
691 // - we didn't take all Revisions (6 is not in the sample)
692 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
692 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
693 Ok(())
693 Ok(())
694 }
694 }
695 }
695 }
@@ -1,772 +1,772 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{
11 utils::{
11 utils::{
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 SliceExt,
14 SliceExt,
15 },
15 },
16 FastHashMap, PatternError,
16 FastHashMap, PatternError,
17 };
17 };
18 use lazy_static::lazy_static;
18 use lazy_static::lazy_static;
19 use regex::bytes::{NoExpand, Regex};
19 use regex::bytes::{NoExpand, Regex};
20 use std::ops::Deref;
20 use std::ops::Deref;
21 use std::path::{Path, PathBuf};
21 use std::path::{Path, PathBuf};
22 use std::vec::Vec;
22 use std::vec::Vec;
23
23
24 lazy_static! {
24 lazy_static! {
25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
27 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
27 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
28 for byte in to_escape {
28 for byte in to_escape {
29 v[*byte as usize].insert(0, b'\\');
29 v[*byte as usize].insert(0, b'\\');
30 }
30 }
31 v
31 v
32 };
32 };
33 }
33 }
34
34
35 /// These are matched in order
35 /// These are matched in order
36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
38
38
39 /// Appended to the regexp of globs
39 /// Appended to the regexp of globs
40 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
40 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
41
41
42 #[derive(Debug, Clone, PartialEq, Eq)]
42 #[derive(Debug, Clone, PartialEq, Eq)]
43 pub enum PatternSyntax {
43 pub enum PatternSyntax {
44 /// A regular expression
44 /// A regular expression
45 Regexp,
45 Regexp,
46 /// Glob that matches at the front of the path
46 /// Glob that matches at the front of the path
47 RootGlob,
47 RootGlob,
48 /// Glob that matches at any suffix of the path (still anchored at
48 /// Glob that matches at any suffix of the path (still anchored at
49 /// slashes)
49 /// slashes)
50 Glob,
50 Glob,
51 /// a path relative to repository root, which is matched recursively
51 /// a path relative to repository root, which is matched recursively
52 Path,
52 Path,
53 /// A path relative to cwd
53 /// A path relative to cwd
54 RelPath,
54 RelPath,
55 /// an unrooted glob (*.rs matches Rust files in all dirs)
55 /// an unrooted glob (*.rs matches Rust files in all dirs)
56 RelGlob,
56 RelGlob,
57 /// A regexp that needn't match the start of a name
57 /// A regexp that needn't match the start of a name
58 RelRegexp,
58 RelRegexp,
59 /// A path relative to repository root, which is matched non-recursively
59 /// A path relative to repository root, which is matched non-recursively
60 /// (will not match subdirectories)
60 /// (will not match subdirectories)
61 RootFiles,
61 RootFiles,
62 /// A file of patterns to read and include
62 /// A file of patterns to read and include
63 Include,
63 Include,
64 /// A file of patterns to match against files under the same directory
64 /// A file of patterns to match against files under the same directory
65 SubInclude,
65 SubInclude,
66 /// SubInclude with the result of parsing the included file
66 /// SubInclude with the result of parsing the included file
67 ///
67 ///
68 /// Note: there is no ExpandedInclude because that expansion can be done
68 /// Note: there is no ExpandedInclude because that expansion can be done
69 /// in place by replacing the Include pattern by the included patterns.
69 /// in place by replacing the Include pattern by the included patterns.
70 /// SubInclude requires more handling.
70 /// SubInclude requires more handling.
71 ///
71 ///
72 /// Note: `Box` is used to minimize size impact on other enum variants
72 /// Note: `Box` is used to minimize size impact on other enum variants
73 ExpandedSubInclude(Box<SubInclude>),
73 ExpandedSubInclude(Box<SubInclude>),
74 }
74 }
75
75
76 /// Transforms a glob pattern into a regex
76 /// Transforms a glob pattern into a regex
77 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
77 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
78 let mut input = pat;
78 let mut input = pat;
79 let mut res: Vec<u8> = vec![];
79 let mut res: Vec<u8> = vec![];
80 let mut group_depth = 0;
80 let mut group_depth = 0;
81
81
82 while let Some((c, rest)) = input.split_first() {
82 while let Some((c, rest)) = input.split_first() {
83 input = rest;
83 input = rest;
84
84
85 match c {
85 match c {
86 b'*' => {
86 b'*' => {
87 for (source, repl) in GLOB_REPLACEMENTS {
87 for (source, repl) in GLOB_REPLACEMENTS {
88 if let Some(rest) = input.drop_prefix(source) {
88 if let Some(rest) = input.drop_prefix(source) {
89 input = rest;
89 input = rest;
90 res.extend(*repl);
90 res.extend(*repl);
91 break;
91 break;
92 }
92 }
93 }
93 }
94 }
94 }
95 b'?' => res.extend(b"."),
95 b'?' => res.extend(b"."),
96 b'[' => {
96 b'[' => {
97 match input.iter().skip(1).position(|b| *b == b']') {
97 match input.iter().skip(1).position(|b| *b == b']') {
98 None => res.extend(b"\\["),
98 None => res.extend(b"\\["),
99 Some(end) => {
99 Some(end) => {
100 // Account for the one we skipped
100 // Account for the one we skipped
101 let end = end + 1;
101 let end = end + 1;
102
102
103 res.extend(b"[");
103 res.extend(b"[");
104
104
105 for (i, b) in input[..end].iter().enumerate() {
105 for (i, b) in input[..end].iter().enumerate() {
106 if *b == b'!' && i == 0 {
106 if *b == b'!' && i == 0 {
107 res.extend(b"^")
107 res.extend(b"^")
108 } else if *b == b'^' && i == 0 {
108 } else if *b == b'^' && i == 0 {
109 res.extend(b"\\^")
109 res.extend(b"\\^")
110 } else if *b == b'\\' {
110 } else if *b == b'\\' {
111 res.extend(b"\\\\")
111 res.extend(b"\\\\")
112 } else {
112 } else {
113 res.push(*b)
113 res.push(*b)
114 }
114 }
115 }
115 }
116 res.extend(b"]");
116 res.extend(b"]");
117 input = &input[end + 1..];
117 input = &input[end + 1..];
118 }
118 }
119 }
119 }
120 }
120 }
121 b'{' => {
121 b'{' => {
122 group_depth += 1;
122 group_depth += 1;
123 res.extend(b"(?:")
123 res.extend(b"(?:")
124 }
124 }
125 b'}' if group_depth > 0 => {
125 b'}' if group_depth > 0 => {
126 group_depth -= 1;
126 group_depth -= 1;
127 res.extend(b")");
127 res.extend(b")");
128 }
128 }
129 b',' if group_depth > 0 => res.extend(b"|"),
129 b',' if group_depth > 0 => res.extend(b"|"),
130 b'\\' => {
130 b'\\' => {
131 let c = {
131 let c = {
132 if let Some((c, rest)) = input.split_first() {
132 if let Some((c, rest)) = input.split_first() {
133 input = rest;
133 input = rest;
134 c
134 c
135 } else {
135 } else {
136 c
136 c
137 }
137 }
138 };
138 };
139 res.extend(&RE_ESCAPE[*c as usize])
139 res.extend(&RE_ESCAPE[*c as usize])
140 }
140 }
141 _ => res.extend(&RE_ESCAPE[*c as usize]),
141 _ => res.extend(&RE_ESCAPE[*c as usize]),
142 }
142 }
143 }
143 }
144 res
144 res
145 }
145 }
146
146
147 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
147 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
148 pattern
148 pattern
149 .iter()
149 .iter()
150 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
150 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
151 .collect()
151 .collect()
152 }
152 }
153
153
154 pub fn parse_pattern_syntax(
154 pub fn parse_pattern_syntax(
155 kind: &[u8],
155 kind: &[u8],
156 ) -> Result<PatternSyntax, PatternError> {
156 ) -> Result<PatternSyntax, PatternError> {
157 match kind {
157 match kind {
158 b"re:" => Ok(PatternSyntax::Regexp),
158 b"re:" => Ok(PatternSyntax::Regexp),
159 b"path:" => Ok(PatternSyntax::Path),
159 b"path:" => Ok(PatternSyntax::Path),
160 b"relpath:" => Ok(PatternSyntax::RelPath),
160 b"relpath:" => Ok(PatternSyntax::RelPath),
161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
162 b"relglob:" => Ok(PatternSyntax::RelGlob),
162 b"relglob:" => Ok(PatternSyntax::RelGlob),
163 b"relre:" => Ok(PatternSyntax::RelRegexp),
163 b"relre:" => Ok(PatternSyntax::RelRegexp),
164 b"glob:" => Ok(PatternSyntax::Glob),
164 b"glob:" => Ok(PatternSyntax::Glob),
165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
166 b"include:" => Ok(PatternSyntax::Include),
166 b"include:" => Ok(PatternSyntax::Include),
167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
168 _ => Err(PatternError::UnsupportedSyntax(
168 _ => Err(PatternError::UnsupportedSyntax(
169 String::from_utf8_lossy(kind).to_string(),
169 String::from_utf8_lossy(kind).to_string(),
170 )),
170 )),
171 }
171 }
172 }
172 }
173
173
174 lazy_static! {
174 lazy_static! {
175 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
175 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
176 }
176 }
177
177
178 /// Builds the regex that corresponds to the given pattern.
178 /// Builds the regex that corresponds to the given pattern.
179 /// If within a `syntax: regexp` context, returns the pattern,
179 /// If within a `syntax: regexp` context, returns the pattern,
180 /// otherwise, returns the corresponding regex.
180 /// otherwise, returns the corresponding regex.
181 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
181 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
182 let IgnorePattern {
182 let IgnorePattern {
183 syntax, pattern, ..
183 syntax, pattern, ..
184 } = entry;
184 } = entry;
185 if pattern.is_empty() {
185 if pattern.is_empty() {
186 return vec![];
186 return vec![];
187 }
187 }
188 match syntax {
188 match syntax {
189 PatternSyntax::Regexp => pattern.to_owned(),
189 PatternSyntax::Regexp => pattern.to_owned(),
190 PatternSyntax::RelRegexp => {
190 PatternSyntax::RelRegexp => {
191 // The `regex` crate accepts `**` while `re2` and Python's `re`
191 // The `regex` crate accepts `**` while `re2` and Python's `re`
192 // do not. Checking for `*` correctly triggers the same error all
192 // do not. Checking for `*` correctly triggers the same error all
193 // engines.
193 // engines.
194 if pattern[0] == b'^'
194 if pattern[0] == b'^'
195 || pattern[0] == b'*'
195 || pattern[0] == b'*'
196 || pattern.starts_with(b".*")
196 || pattern.starts_with(b".*")
197 {
197 {
198 return pattern.to_owned();
198 return pattern.to_owned();
199 }
199 }
200 match FLAG_RE.find(pattern) {
200 match FLAG_RE.find(pattern) {
201 Some(mat) => {
201 Some(mat) => {
202 let s = mat.start();
202 let s = mat.start();
203 let e = mat.end();
203 let e = mat.end();
204 [
204 [
205 &b"(?"[..],
205 &b"(?"[..],
206 &pattern[s + 2..e - 1],
206 &pattern[s + 2..e - 1],
207 &b":"[..],
207 &b":"[..],
208 if pattern[e] == b'^'
208 if pattern[e] == b'^'
209 || pattern[e] == b'*'
209 || pattern[e] == b'*'
210 || pattern[e..].starts_with(b".*")
210 || pattern[e..].starts_with(b".*")
211 {
211 {
212 &b""[..]
212 &b""[..]
213 } else {
213 } else {
214 &b".*"[..]
214 &b".*"[..]
215 },
215 },
216 &pattern[e..],
216 &pattern[e..],
217 &b")"[..],
217 &b")"[..],
218 ]
218 ]
219 .concat()
219 .concat()
220 }
220 }
221 None => [&b".*"[..], pattern].concat(),
221 None => [&b".*"[..], pattern].concat(),
222 }
222 }
223 }
223 }
224 PatternSyntax::Path | PatternSyntax::RelPath => {
224 PatternSyntax::Path | PatternSyntax::RelPath => {
225 if pattern == b"." {
225 if pattern == b"." {
226 return vec![];
226 return vec![];
227 }
227 }
228 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
228 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
229 }
229 }
230 PatternSyntax::RootFiles => {
230 PatternSyntax::RootFiles => {
231 let mut res = if pattern == b"." {
231 let mut res = if pattern == b"." {
232 vec![]
232 vec![]
233 } else {
233 } else {
234 // Pattern is a directory name.
234 // Pattern is a directory name.
235 [escape_pattern(pattern).as_slice(), b"/"].concat()
235 [escape_pattern(pattern).as_slice(), b"/"].concat()
236 };
236 };
237
237
238 // Anything after the pattern must be a non-directory.
238 // Anything after the pattern must be a non-directory.
239 res.extend(b"[^/]+$");
239 res.extend(b"[^/]+$");
240 res
240 res
241 }
241 }
242 PatternSyntax::RelGlob => {
242 PatternSyntax::RelGlob => {
243 let glob_re = glob_to_re(pattern);
243 let glob_re = glob_to_re(pattern);
244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
245 [b".*", rest, GLOB_SUFFIX].concat()
245 [b".*", rest, GLOB_SUFFIX].concat()
246 } else {
246 } else {
247 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
247 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
248 }
248 }
249 }
249 }
250 PatternSyntax::Glob | PatternSyntax::RootGlob => {
250 PatternSyntax::Glob | PatternSyntax::RootGlob => {
251 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
251 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
252 }
252 }
253 PatternSyntax::Include
253 PatternSyntax::Include
254 | PatternSyntax::SubInclude
254 | PatternSyntax::SubInclude
255 | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
255 | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
256 }
256 }
257 }
257 }
258
258
259 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
259 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
260 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
260 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
261
261
262 /// TODO support other platforms
262 /// TODO support other platforms
263 #[cfg(unix)]
263 #[cfg(unix)]
264 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
264 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
265 if bytes.is_empty() {
265 if bytes.is_empty() {
266 return b".".to_vec();
266 return b".".to_vec();
267 }
267 }
268 let sep = b'/';
268 let sep = b'/';
269
269
270 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
270 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
271 if initial_slashes > 2 {
271 if initial_slashes > 2 {
272 // POSIX allows one or two initial slashes, but treats three or more
272 // POSIX allows one or two initial slashes, but treats three or more
273 // as single slash.
273 // as single slash.
274 initial_slashes = 1;
274 initial_slashes = 1;
275 }
275 }
276 let components = bytes
276 let components = bytes
277 .split(|b| *b == sep)
277 .split(|b| *b == sep)
278 .filter(|c| !(c.is_empty() || c == b"."))
278 .filter(|c| !(c.is_empty() || c == b"."))
279 .fold(vec![], |mut acc, component| {
279 .fold(vec![], |mut acc, component| {
280 if component != b".."
280 if component != b".."
281 || (initial_slashes == 0 && acc.is_empty())
281 || (initial_slashes == 0 && acc.is_empty())
282 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
282 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
283 {
283 {
284 acc.push(component)
284 acc.push(component)
285 } else if !acc.is_empty() {
285 } else if !acc.is_empty() {
286 acc.pop();
286 acc.pop();
287 }
287 }
288 acc
288 acc
289 });
289 });
290 let mut new_bytes = components.join(&sep);
290 let mut new_bytes = components.join(&sep);
291
291
292 if initial_slashes > 0 {
292 if initial_slashes > 0 {
293 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
293 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
294 buf.extend(new_bytes);
294 buf.extend(new_bytes);
295 new_bytes = buf;
295 new_bytes = buf;
296 }
296 }
297 if new_bytes.is_empty() {
297 if new_bytes.is_empty() {
298 b".".to_vec()
298 b".".to_vec()
299 } else {
299 } else {
300 new_bytes
300 new_bytes
301 }
301 }
302 }
302 }
303
303
304 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
304 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
305 /// that don't need to be transformed into a regex.
305 /// that don't need to be transformed into a regex.
306 pub fn build_single_regex(
306 pub fn build_single_regex(
307 entry: &IgnorePattern,
307 entry: &IgnorePattern,
308 ) -> Result<Option<Vec<u8>>, PatternError> {
308 ) -> Result<Option<Vec<u8>>, PatternError> {
309 let IgnorePattern {
309 let IgnorePattern {
310 pattern, syntax, ..
310 pattern, syntax, ..
311 } = entry;
311 } = entry;
312 let pattern = match syntax {
312 let pattern = match syntax {
313 PatternSyntax::RootGlob
313 PatternSyntax::RootGlob
314 | PatternSyntax::Path
314 | PatternSyntax::Path
315 | PatternSyntax::RelGlob
315 | PatternSyntax::RelGlob
316 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
316 | PatternSyntax::RootFiles => normalize_path_bytes(pattern),
317 PatternSyntax::Include | PatternSyntax::SubInclude => {
317 PatternSyntax::Include | PatternSyntax::SubInclude => {
318 return Err(PatternError::NonRegexPattern(entry.clone()))
318 return Err(PatternError::NonRegexPattern(entry.clone()))
319 }
319 }
320 _ => pattern.to_owned(),
320 _ => pattern.to_owned(),
321 };
321 };
322 if *syntax == PatternSyntax::RootGlob
322 if *syntax == PatternSyntax::RootGlob
323 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
323 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
324 {
324 {
325 Ok(None)
325 Ok(None)
326 } else {
326 } else {
327 let mut entry = entry.clone();
327 let mut entry = entry.clone();
328 entry.pattern = pattern;
328 entry.pattern = pattern;
329 Ok(Some(_build_single_regex(&entry)))
329 Ok(Some(_build_single_regex(&entry)))
330 }
330 }
331 }
331 }
332
332
333 lazy_static! {
333 lazy_static! {
334 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
334 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
335 let mut m = FastHashMap::default();
335 let mut m = FastHashMap::default();
336
336
337 m.insert(b"re".as_ref(), b"relre:".as_ref());
337 m.insert(b"re".as_ref(), b"relre:".as_ref());
338 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
338 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
339 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
339 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
340 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
340 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
341 m.insert(b"include".as_ref(), b"include:".as_ref());
341 m.insert(b"include".as_ref(), b"include:".as_ref());
342 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
342 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
343 m.insert(b"path".as_ref(), b"path:".as_ref());
343 m.insert(b"path".as_ref(), b"path:".as_ref());
344 m.insert(b"rootfilesin".as_ref(), b"rootfilesin:".as_ref());
344 m.insert(b"rootfilesin".as_ref(), b"rootfilesin:".as_ref());
345 m
345 m
346 };
346 };
347 }
347 }
348
348
349 #[derive(Debug)]
349 #[derive(Debug)]
350 pub enum PatternFileWarning {
350 pub enum PatternFileWarning {
351 /// (file path, syntax bytes)
351 /// (file path, syntax bytes)
352 InvalidSyntax(PathBuf, Vec<u8>),
352 InvalidSyntax(PathBuf, Vec<u8>),
353 /// File path
353 /// File path
354 NoSuchFile(PathBuf),
354 NoSuchFile(PathBuf),
355 }
355 }
356
356
357 pub fn parse_pattern_file_contents(
357 pub fn parse_pattern_file_contents(
358 lines: &[u8],
358 lines: &[u8],
359 file_path: &Path,
359 file_path: &Path,
360 default_syntax_override: Option<&[u8]>,
360 default_syntax_override: Option<&[u8]>,
361 warn: bool,
361 warn: bool,
362 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
362 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
363 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
363 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
364
364
365 #[allow(clippy::trivial_regex)]
365 #[allow(clippy::trivial_regex)]
366 let comment_escape_regex = Regex::new(r"\\#").unwrap();
366 let comment_escape_regex = Regex::new(r"\\#").unwrap();
367 let mut inputs: Vec<IgnorePattern> = vec![];
367 let mut inputs: Vec<IgnorePattern> = vec![];
368 let mut warnings: Vec<PatternFileWarning> = vec![];
368 let mut warnings: Vec<PatternFileWarning> = vec![];
369
369
370 let mut current_syntax =
370 let mut current_syntax =
371 default_syntax_override.unwrap_or(b"relre:".as_ref());
371 default_syntax_override.unwrap_or_else(|| b"relre:".as_ref());
372
372
373 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
373 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
374 let line_number = line_number + 1;
374 let line_number = line_number + 1;
375
375
376 let line_buf;
376 let line_buf;
377 if line.contains(&b'#') {
377 if line.contains(&b'#') {
378 if let Some(cap) = comment_regex.captures(line) {
378 if let Some(cap) = comment_regex.captures(line) {
379 line = &line[..cap.get(1).unwrap().end()]
379 line = &line[..cap.get(1).unwrap().end()]
380 }
380 }
381 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
381 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
382 line = &line_buf;
382 line = &line_buf;
383 }
383 }
384
384
385 let mut line = line.trim_end();
385 let mut line = line.trim_end();
386
386
387 if line.is_empty() {
387 if line.is_empty() {
388 continue;
388 continue;
389 }
389 }
390
390
391 if let Some(syntax) = line.drop_prefix(b"syntax:") {
391 if let Some(syntax) = line.drop_prefix(b"syntax:") {
392 let syntax = syntax.trim();
392 let syntax = syntax.trim();
393
393
394 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
394 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
395 current_syntax = rel_syntax;
395 current_syntax = rel_syntax;
396 } else if warn {
396 } else if warn {
397 warnings.push(PatternFileWarning::InvalidSyntax(
397 warnings.push(PatternFileWarning::InvalidSyntax(
398 file_path.to_owned(),
398 file_path.to_owned(),
399 syntax.to_owned(),
399 syntax.to_owned(),
400 ));
400 ));
401 }
401 }
402 continue;
402 continue;
403 }
403 }
404
404
405 let mut line_syntax: &[u8] = &current_syntax;
405 let mut line_syntax: &[u8] = current_syntax;
406
406
407 for (s, rels) in SYNTAXES.iter() {
407 for (s, rels) in SYNTAXES.iter() {
408 if let Some(rest) = line.drop_prefix(rels) {
408 if let Some(rest) = line.drop_prefix(rels) {
409 line_syntax = rels;
409 line_syntax = rels;
410 line = rest;
410 line = rest;
411 break;
411 break;
412 }
412 }
413 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
413 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
414 line_syntax = rels;
414 line_syntax = rels;
415 line = rest;
415 line = rest;
416 break;
416 break;
417 }
417 }
418 }
418 }
419
419
420 inputs.push(IgnorePattern::new(
420 inputs.push(IgnorePattern::new(
421 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
421 parse_pattern_syntax(line_syntax).map_err(|e| match e {
422 PatternError::UnsupportedSyntax(syntax) => {
422 PatternError::UnsupportedSyntax(syntax) => {
423 PatternError::UnsupportedSyntaxInFile(
423 PatternError::UnsupportedSyntaxInFile(
424 syntax,
424 syntax,
425 file_path.to_string_lossy().into(),
425 file_path.to_string_lossy().into(),
426 line_number,
426 line_number,
427 )
427 )
428 }
428 }
429 _ => e,
429 _ => e,
430 })?,
430 })?,
431 &line,
431 line,
432 file_path,
432 file_path,
433 ));
433 ));
434 }
434 }
435 Ok((inputs, warnings))
435 Ok((inputs, warnings))
436 }
436 }
437
437
438 pub fn read_pattern_file(
438 pub fn read_pattern_file(
439 file_path: &Path,
439 file_path: &Path,
440 warn: bool,
440 warn: bool,
441 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
441 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
442 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
442 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
443 match std::fs::read(file_path) {
443 match std::fs::read(file_path) {
444 Ok(contents) => {
444 Ok(contents) => {
445 inspect_pattern_bytes(file_path, &contents);
445 inspect_pattern_bytes(file_path, &contents);
446 parse_pattern_file_contents(&contents, file_path, None, warn)
446 parse_pattern_file_contents(&contents, file_path, None, warn)
447 }
447 }
448 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
448 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
449 vec![],
449 vec![],
450 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
450 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
451 )),
451 )),
452 Err(e) => Err(e.into()),
452 Err(e) => Err(e.into()),
453 }
453 }
454 }
454 }
455
455
456 /// Represents an entry in an "ignore" file.
456 /// Represents an entry in an "ignore" file.
457 #[derive(Debug, Eq, PartialEq, Clone)]
457 #[derive(Debug, Eq, PartialEq, Clone)]
458 pub struct IgnorePattern {
458 pub struct IgnorePattern {
459 pub syntax: PatternSyntax,
459 pub syntax: PatternSyntax,
460 pub pattern: Vec<u8>,
460 pub pattern: Vec<u8>,
461 pub source: PathBuf,
461 pub source: PathBuf,
462 }
462 }
463
463
464 impl IgnorePattern {
464 impl IgnorePattern {
465 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
465 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
466 Self {
466 Self {
467 syntax,
467 syntax,
468 pattern: pattern.to_owned(),
468 pattern: pattern.to_owned(),
469 source: source.to_owned(),
469 source: source.to_owned(),
470 }
470 }
471 }
471 }
472 }
472 }
473
473
474 pub type PatternResult<T> = Result<T, PatternError>;
474 pub type PatternResult<T> = Result<T, PatternError>;
475
475
476 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
476 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
477 /// and `subinclude:` patterns.
477 /// and `subinclude:` patterns.
478 ///
478 ///
479 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
479 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
480 /// is used for the latter to form a tree of patterns.
480 /// is used for the latter to form a tree of patterns.
481 pub fn get_patterns_from_file(
481 pub fn get_patterns_from_file(
482 pattern_file: &Path,
482 pattern_file: &Path,
483 root_dir: &Path,
483 root_dir: &Path,
484 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
484 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
485 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
485 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
486 let (patterns, mut warnings) =
486 let (patterns, mut warnings) =
487 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
487 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
488 let patterns = patterns
488 let patterns = patterns
489 .into_iter()
489 .into_iter()
490 .flat_map(|entry| -> PatternResult<_> {
490 .flat_map(|entry| -> PatternResult<_> {
491 Ok(match &entry.syntax {
491 Ok(match &entry.syntax {
492 PatternSyntax::Include => {
492 PatternSyntax::Include => {
493 let inner_include =
493 let inner_include =
494 root_dir.join(get_path_from_bytes(&entry.pattern));
494 root_dir.join(get_path_from_bytes(&entry.pattern));
495 let (inner_pats, inner_warnings) = get_patterns_from_file(
495 let (inner_pats, inner_warnings) = get_patterns_from_file(
496 &inner_include,
496 &inner_include,
497 root_dir,
497 root_dir,
498 inspect_pattern_bytes,
498 inspect_pattern_bytes,
499 )?;
499 )?;
500 warnings.extend(inner_warnings);
500 warnings.extend(inner_warnings);
501 inner_pats
501 inner_pats
502 }
502 }
503 PatternSyntax::SubInclude => {
503 PatternSyntax::SubInclude => {
504 let mut sub_include = SubInclude::new(
504 let mut sub_include = SubInclude::new(
505 &root_dir,
505 root_dir,
506 &entry.pattern,
506 &entry.pattern,
507 &entry.source,
507 &entry.source,
508 )?;
508 )?;
509 let (inner_patterns, inner_warnings) =
509 let (inner_patterns, inner_warnings) =
510 get_patterns_from_file(
510 get_patterns_from_file(
511 &sub_include.path,
511 &sub_include.path,
512 &sub_include.root,
512 &sub_include.root,
513 inspect_pattern_bytes,
513 inspect_pattern_bytes,
514 )?;
514 )?;
515 sub_include.included_patterns = inner_patterns;
515 sub_include.included_patterns = inner_patterns;
516 warnings.extend(inner_warnings);
516 warnings.extend(inner_warnings);
517 vec![IgnorePattern {
517 vec![IgnorePattern {
518 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
518 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
519 sub_include,
519 sub_include,
520 )),
520 )),
521 ..entry
521 ..entry
522 }]
522 }]
523 }
523 }
524 _ => vec![entry],
524 _ => vec![entry],
525 })
525 })
526 })
526 })
527 .flatten()
527 .flatten()
528 .collect();
528 .collect();
529
529
530 Ok((patterns, warnings))
530 Ok((patterns, warnings))
531 }
531 }
532
532
533 /// Holds all the information needed to handle a `subinclude:` pattern.
533 /// Holds all the information needed to handle a `subinclude:` pattern.
534 #[derive(Debug, PartialEq, Eq, Clone)]
534 #[derive(Debug, PartialEq, Eq, Clone)]
535 pub struct SubInclude {
535 pub struct SubInclude {
536 /// Will be used for repository (hg) paths that start with this prefix.
536 /// Will be used for repository (hg) paths that start with this prefix.
537 /// It is relative to the current working directory, so comparing against
537 /// It is relative to the current working directory, so comparing against
538 /// repository paths is painless.
538 /// repository paths is painless.
539 pub prefix: HgPathBuf,
539 pub prefix: HgPathBuf,
540 /// The file itself, containing the patterns
540 /// The file itself, containing the patterns
541 pub path: PathBuf,
541 pub path: PathBuf,
542 /// Folder in the filesystem where this it applies
542 /// Folder in the filesystem where this it applies
543 pub root: PathBuf,
543 pub root: PathBuf,
544
544
545 pub included_patterns: Vec<IgnorePattern>,
545 pub included_patterns: Vec<IgnorePattern>,
546 }
546 }
547
547
548 impl SubInclude {
548 impl SubInclude {
549 pub fn new(
549 pub fn new(
550 root_dir: &Path,
550 root_dir: &Path,
551 pattern: &[u8],
551 pattern: &[u8],
552 source: &Path,
552 source: &Path,
553 ) -> Result<SubInclude, HgPathError> {
553 ) -> Result<SubInclude, HgPathError> {
554 let normalized_source =
554 let normalized_source =
555 normalize_path_bytes(&get_bytes_from_path(source));
555 normalize_path_bytes(&get_bytes_from_path(source));
556
556
557 let source_root = get_path_from_bytes(&normalized_source);
557 let source_root = get_path_from_bytes(&normalized_source);
558 let source_root =
558 let source_root =
559 source_root.parent().unwrap_or_else(|| source_root.deref());
559 source_root.parent().unwrap_or_else(|| source_root.deref());
560
560
561 let path = source_root.join(get_path_from_bytes(pattern));
561 let path = source_root.join(get_path_from_bytes(pattern));
562 let new_root = path.parent().unwrap_or_else(|| path.deref());
562 let new_root = path.parent().unwrap_or_else(|| path.deref());
563
563
564 let prefix = canonical_path(root_dir, root_dir, new_root)?;
564 let prefix = canonical_path(root_dir, root_dir, new_root)?;
565
565
566 Ok(Self {
566 Ok(Self {
567 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
567 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
568 if !p.is_empty() {
568 if !p.is_empty() {
569 p.push_byte(b'/');
569 p.push_byte(b'/');
570 }
570 }
571 Ok(p)
571 p
572 })?,
572 })?,
573 path: path.to_owned(),
573 path: path.to_owned(),
574 root: new_root.to_owned(),
574 root: new_root.to_owned(),
575 included_patterns: Vec::new(),
575 included_patterns: Vec::new(),
576 })
576 })
577 }
577 }
578 }
578 }
579
579
580 /// Separate and pre-process subincludes from other patterns for the "ignore"
580 /// Separate and pre-process subincludes from other patterns for the "ignore"
581 /// phase.
581 /// phase.
582 pub fn filter_subincludes(
582 pub fn filter_subincludes(
583 ignore_patterns: Vec<IgnorePattern>,
583 ignore_patterns: Vec<IgnorePattern>,
584 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
584 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
585 let mut subincludes = vec![];
585 let mut subincludes = vec![];
586 let mut others = vec![];
586 let mut others = vec![];
587
587
588 for pattern in ignore_patterns {
588 for pattern in ignore_patterns {
589 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
589 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
590 {
590 {
591 subincludes.push(*sub_include);
591 subincludes.push(*sub_include);
592 } else {
592 } else {
593 others.push(pattern)
593 others.push(pattern)
594 }
594 }
595 }
595 }
596 Ok((subincludes, others))
596 Ok((subincludes, others))
597 }
597 }
598
598
599 #[cfg(test)]
599 #[cfg(test)]
600 mod tests {
600 mod tests {
601 use super::*;
601 use super::*;
602 use pretty_assertions::assert_eq;
602 use pretty_assertions::assert_eq;
603
603
604 #[test]
604 #[test]
605 fn escape_pattern_test() {
605 fn escape_pattern_test() {
606 let untouched =
606 let untouched =
607 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
607 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
608 assert_eq!(escape_pattern(untouched), untouched.to_vec());
608 assert_eq!(escape_pattern(untouched), untouched.to_vec());
609 // All escape codes
609 // All escape codes
610 assert_eq!(
610 assert_eq!(
611 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
611 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
612 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
612 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
613 .to_vec()
613 .to_vec()
614 );
614 );
615 }
615 }
616
616
617 #[test]
617 #[test]
618 fn glob_test() {
618 fn glob_test() {
619 assert_eq!(glob_to_re(br#"?"#), br#"."#);
619 assert_eq!(glob_to_re(br#"?"#), br#"."#);
620 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
620 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
621 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
621 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
622 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
622 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
623 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
623 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
624 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
624 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
625 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
625 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
626 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
626 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
627 }
627 }
628
628
629 #[test]
629 #[test]
630 fn test_parse_pattern_file_contents() {
630 fn test_parse_pattern_file_contents() {
631 let lines = b"syntax: glob\n*.elc";
631 let lines = b"syntax: glob\n*.elc";
632
632
633 assert_eq!(
633 assert_eq!(
634 parse_pattern_file_contents(
634 parse_pattern_file_contents(
635 lines,
635 lines,
636 Path::new("file_path"),
636 Path::new("file_path"),
637 None,
637 None,
638 false
638 false
639 )
639 )
640 .unwrap()
640 .unwrap()
641 .0,
641 .0,
642 vec![IgnorePattern::new(
642 vec![IgnorePattern::new(
643 PatternSyntax::RelGlob,
643 PatternSyntax::RelGlob,
644 b"*.elc",
644 b"*.elc",
645 Path::new("file_path")
645 Path::new("file_path")
646 )],
646 )],
647 );
647 );
648
648
649 let lines = b"syntax: include\nsyntax: glob";
649 let lines = b"syntax: include\nsyntax: glob";
650
650
651 assert_eq!(
651 assert_eq!(
652 parse_pattern_file_contents(
652 parse_pattern_file_contents(
653 lines,
653 lines,
654 Path::new("file_path"),
654 Path::new("file_path"),
655 None,
655 None,
656 false
656 false
657 )
657 )
658 .unwrap()
658 .unwrap()
659 .0,
659 .0,
660 vec![]
660 vec![]
661 );
661 );
662 let lines = b"glob:**.o";
662 let lines = b"glob:**.o";
663 assert_eq!(
663 assert_eq!(
664 parse_pattern_file_contents(
664 parse_pattern_file_contents(
665 lines,
665 lines,
666 Path::new("file_path"),
666 Path::new("file_path"),
667 None,
667 None,
668 false
668 false
669 )
669 )
670 .unwrap()
670 .unwrap()
671 .0,
671 .0,
672 vec![IgnorePattern::new(
672 vec![IgnorePattern::new(
673 PatternSyntax::RelGlob,
673 PatternSyntax::RelGlob,
674 b"**.o",
674 b"**.o",
675 Path::new("file_path")
675 Path::new("file_path")
676 )]
676 )]
677 );
677 );
678 }
678 }
679
679
680 #[test]
680 #[test]
681 fn test_build_single_regex() {
681 fn test_build_single_regex() {
682 assert_eq!(
682 assert_eq!(
683 build_single_regex(&IgnorePattern::new(
683 build_single_regex(&IgnorePattern::new(
684 PatternSyntax::RelGlob,
684 PatternSyntax::RelGlob,
685 b"rust/target/",
685 b"rust/target/",
686 Path::new("")
686 Path::new("")
687 ))
687 ))
688 .unwrap(),
688 .unwrap(),
689 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
689 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
690 );
690 );
691 assert_eq!(
691 assert_eq!(
692 build_single_regex(&IgnorePattern::new(
692 build_single_regex(&IgnorePattern::new(
693 PatternSyntax::Regexp,
693 PatternSyntax::Regexp,
694 br"rust/target/\d+",
694 br"rust/target/\d+",
695 Path::new("")
695 Path::new("")
696 ))
696 ))
697 .unwrap(),
697 .unwrap(),
698 Some(br"rust/target/\d+".to_vec()),
698 Some(br"rust/target/\d+".to_vec()),
699 );
699 );
700 }
700 }
701
701
702 #[test]
702 #[test]
703 fn test_build_single_regex_shortcut() {
703 fn test_build_single_regex_shortcut() {
704 assert_eq!(
704 assert_eq!(
705 build_single_regex(&IgnorePattern::new(
705 build_single_regex(&IgnorePattern::new(
706 PatternSyntax::RootGlob,
706 PatternSyntax::RootGlob,
707 b"",
707 b"",
708 Path::new("")
708 Path::new("")
709 ))
709 ))
710 .unwrap(),
710 .unwrap(),
711 None,
711 None,
712 );
712 );
713 assert_eq!(
713 assert_eq!(
714 build_single_regex(&IgnorePattern::new(
714 build_single_regex(&IgnorePattern::new(
715 PatternSyntax::RootGlob,
715 PatternSyntax::RootGlob,
716 b"whatever",
716 b"whatever",
717 Path::new("")
717 Path::new("")
718 ))
718 ))
719 .unwrap(),
719 .unwrap(),
720 None,
720 None,
721 );
721 );
722 assert_eq!(
722 assert_eq!(
723 build_single_regex(&IgnorePattern::new(
723 build_single_regex(&IgnorePattern::new(
724 PatternSyntax::RootGlob,
724 PatternSyntax::RootGlob,
725 b"*.o",
725 b"*.o",
726 Path::new("")
726 Path::new("")
727 ))
727 ))
728 .unwrap(),
728 .unwrap(),
729 Some(br"[^/]*\.o(?:/|$)".to_vec()),
729 Some(br"[^/]*\.o(?:/|$)".to_vec()),
730 );
730 );
731 }
731 }
732
732
733 #[test]
733 #[test]
734 fn test_build_single_relregex() {
734 fn test_build_single_relregex() {
735 assert_eq!(
735 assert_eq!(
736 build_single_regex(&IgnorePattern::new(
736 build_single_regex(&IgnorePattern::new(
737 PatternSyntax::RelRegexp,
737 PatternSyntax::RelRegexp,
738 b"^ba{2}r",
738 b"^ba{2}r",
739 Path::new("")
739 Path::new("")
740 ))
740 ))
741 .unwrap(),
741 .unwrap(),
742 Some(b"^ba{2}r".to_vec()),
742 Some(b"^ba{2}r".to_vec()),
743 );
743 );
744 assert_eq!(
744 assert_eq!(
745 build_single_regex(&IgnorePattern::new(
745 build_single_regex(&IgnorePattern::new(
746 PatternSyntax::RelRegexp,
746 PatternSyntax::RelRegexp,
747 b"ba{2}r",
747 b"ba{2}r",
748 Path::new("")
748 Path::new("")
749 ))
749 ))
750 .unwrap(),
750 .unwrap(),
751 Some(b".*ba{2}r".to_vec()),
751 Some(b".*ba{2}r".to_vec()),
752 );
752 );
753 assert_eq!(
753 assert_eq!(
754 build_single_regex(&IgnorePattern::new(
754 build_single_regex(&IgnorePattern::new(
755 PatternSyntax::RelRegexp,
755 PatternSyntax::RelRegexp,
756 b"(?ia)ba{2}r",
756 b"(?ia)ba{2}r",
757 Path::new("")
757 Path::new("")
758 ))
758 ))
759 .unwrap(),
759 .unwrap(),
760 Some(b"(?ia:.*ba{2}r)".to_vec()),
760 Some(b"(?ia:.*ba{2}r)".to_vec()),
761 );
761 );
762 assert_eq!(
762 assert_eq!(
763 build_single_regex(&IgnorePattern::new(
763 build_single_regex(&IgnorePattern::new(
764 PatternSyntax::RelRegexp,
764 PatternSyntax::RelRegexp,
765 b"(?ia)^ba{2}r",
765 b"(?ia)^ba{2}r",
766 Path::new("")
766 Path::new("")
767 ))
767 ))
768 .unwrap(),
768 .unwrap(),
769 Some(b"(?ia:^ba{2}r)".to_vec()),
769 Some(b"(?ia:^ba{2}r)".to_vec()),
770 );
770 );
771 }
771 }
772 }
772 }
@@ -1,186 +1,188 b''
1 //! Filesystem-based locks for local repositories
1 //! Filesystem-based locks for local repositories
2
2
3 use crate::errors::HgError;
3 use crate::errors::HgError;
4 use crate::errors::HgResultExt;
4 use crate::errors::HgResultExt;
5 use crate::vfs::Vfs;
5 use crate::vfs::Vfs;
6 use std::io;
6 use std::io;
7 use std::io::ErrorKind;
7 use std::io::ErrorKind;
8
8
9 #[derive(derive_more::From)]
9 #[derive(derive_more::From)]
10 pub enum LockError {
10 pub enum LockError {
11 AlreadyHeld,
11 AlreadyHeld,
12 #[from]
12 #[from]
13 Other(HgError),
13 Other(HgError),
14 }
14 }
15
15
16 /// Try to call `f` with the lock acquired, without waiting.
16 /// Try to call `f` with the lock acquired, without waiting.
17 ///
17 ///
18 /// If the lock is aready held, `f` is not called and `LockError::AlreadyHeld`
18 /// If the lock is aready held, `f` is not called and `LockError::AlreadyHeld`
19 /// is returned. `LockError::Io` is returned for any unexpected I/O error
19 /// is returned. `LockError::Io` is returned for any unexpected I/O error
20 /// accessing the lock file, including for removing it after `f` was called.
20 /// accessing the lock file, including for removing it after `f` was called.
21 /// The return value of `f` is dropped in that case. If all is successful, the
21 /// The return value of `f` is dropped in that case. If all is successful, the
22 /// return value of `f` is forwarded.
22 /// return value of `f` is forwarded.
23 pub fn try_with_lock_no_wait<R>(
23 pub fn try_with_lock_no_wait<R>(
24 hg_vfs: Vfs,
24 hg_vfs: Vfs,
25 lock_filename: &str,
25 lock_filename: &str,
26 f: impl FnOnce() -> R,
26 f: impl FnOnce() -> R,
27 ) -> Result<R, LockError> {
27 ) -> Result<R, LockError> {
28 let our_lock_data = &*OUR_LOCK_DATA;
28 let our_lock_data = &*OUR_LOCK_DATA;
29 for _retry in 0..5 {
29 for _retry in 0..5 {
30 match make_lock(hg_vfs, lock_filename, our_lock_data) {
30 match make_lock(hg_vfs, lock_filename, our_lock_data) {
31 Ok(()) => {
31 Ok(()) => {
32 let result = f();
32 let result = f();
33 unlock(hg_vfs, lock_filename)?;
33 unlock(hg_vfs, lock_filename)?;
34 return Ok(result);
34 return Ok(result);
35 }
35 }
36 Err(HgError::IoError { error, .. })
36 Err(HgError::IoError { error, .. })
37 if error.kind() == ErrorKind::AlreadyExists =>
37 if error.kind() == ErrorKind::AlreadyExists =>
38 {
38 {
39 let lock_data = read_lock(hg_vfs, lock_filename)?;
39 let lock_data = read_lock(hg_vfs, lock_filename)?;
40 if lock_data.is_none() {
40 if lock_data.is_none() {
41 // Lock was apparently just released, retry acquiring it
41 // Lock was apparently just released, retry acquiring it
42 continue;
42 continue;
43 }
43 }
44 if !lock_should_be_broken(&lock_data) {
44 if !lock_should_be_broken(&lock_data) {
45 return Err(LockError::AlreadyHeld);
45 return Err(LockError::AlreadyHeld);
46 }
46 }
47 // The lock file is left over from a process not running
47 // The lock file is left over from a process not running
48 // anymore. Break it, but with another lock to
48 // anymore. Break it, but with another lock to
49 // avoid a race.
49 // avoid a race.
50 break_lock(hg_vfs, lock_filename)?;
50 break_lock(hg_vfs, lock_filename)?;
51
51
52 // Retry acquiring
52 // Retry acquiring
53 }
53 }
54 Err(error) => Err(error)?,
54 Err(error) => Err(error)?,
55 }
55 }
56 }
56 }
57 Err(LockError::AlreadyHeld)
57 Err(LockError::AlreadyHeld)
58 }
58 }
59
59
60 fn break_lock(hg_vfs: Vfs, lock_filename: &str) -> Result<(), LockError> {
60 fn break_lock(hg_vfs: Vfs, lock_filename: &str) -> Result<(), LockError> {
61 try_with_lock_no_wait(hg_vfs, &format!("{}.break", lock_filename), || {
61 try_with_lock_no_wait(hg_vfs, &format!("{}.break", lock_filename), || {
62 // Check again in case some other process broke and
62 // Check again in case some other process broke and
63 // acquired the lock in the meantime
63 // acquired the lock in the meantime
64 let lock_data = read_lock(hg_vfs, lock_filename)?;
64 let lock_data = read_lock(hg_vfs, lock_filename)?;
65 if !lock_should_be_broken(&lock_data) {
65 if !lock_should_be_broken(&lock_data) {
66 return Err(LockError::AlreadyHeld);
66 return Err(LockError::AlreadyHeld);
67 }
67 }
68 Ok(hg_vfs.remove_file(lock_filename)?)
68 Ok(hg_vfs.remove_file(lock_filename)?)
69 })?
69 })?
70 }
70 }
71
71
72 #[cfg(unix)]
72 #[cfg(unix)]
73 fn make_lock(
73 fn make_lock(
74 hg_vfs: Vfs,
74 hg_vfs: Vfs,
75 lock_filename: &str,
75 lock_filename: &str,
76 data: &str,
76 data: &str,
77 ) -> Result<(), HgError> {
77 ) -> Result<(), HgError> {
78 // Use a symbolic link because creating it is atomic.
78 // Use a symbolic link because creating it is atomic.
79 // The link’s "target" contains data not representing any path.
79 // The link’s "target" contains data not representing any path.
80 let fake_symlink_target = data;
80 let fake_symlink_target = data;
81 hg_vfs.create_symlink(lock_filename, fake_symlink_target)
81 hg_vfs.create_symlink(lock_filename, fake_symlink_target)
82 }
82 }
83
83
84 fn read_lock(
84 fn read_lock(
85 hg_vfs: Vfs,
85 hg_vfs: Vfs,
86 lock_filename: &str,
86 lock_filename: &str,
87 ) -> Result<Option<String>, HgError> {
87 ) -> Result<Option<String>, HgError> {
88 let link_target =
88 let link_target =
89 hg_vfs.read_link(lock_filename).io_not_found_as_none()?;
89 hg_vfs.read_link(lock_filename).io_not_found_as_none()?;
90 if let Some(target) = link_target {
90 if let Some(target) = link_target {
91 let data = target
91 let data = target
92 .into_os_string()
92 .into_os_string()
93 .into_string()
93 .into_string()
94 .map_err(|_| HgError::corrupted("non-UTF-8 lock data"))?;
94 .map_err(|_| HgError::corrupted("non-UTF-8 lock data"))?;
95 Ok(Some(data))
95 Ok(Some(data))
96 } else {
96 } else {
97 Ok(None)
97 Ok(None)
98 }
98 }
99 }
99 }
100
100
101 fn unlock(hg_vfs: Vfs, lock_filename: &str) -> Result<(), HgError> {
101 fn unlock(hg_vfs: Vfs, lock_filename: &str) -> Result<(), HgError> {
102 hg_vfs.remove_file(lock_filename)
102 hg_vfs.remove_file(lock_filename)
103 }
103 }
104
104
105 /// Return whether the process that is/was holding the lock is known not to be
105 /// Return whether the process that is/was holding the lock is known not to be
106 /// running anymore.
106 /// running anymore.
107 fn lock_should_be_broken(data: &Option<String>) -> bool {
107 fn lock_should_be_broken(data: &Option<String>) -> bool {
108 (|| -> Option<bool> {
108 (|| -> Option<bool> {
109 let (prefix, pid) = data.as_ref()?.split_once(':')?;
109 let (prefix, pid) = data.as_ref()?.split_once(':')?;
110 if prefix != &*LOCK_PREFIX {
110 if prefix != *LOCK_PREFIX {
111 return Some(false);
111 return Some(false);
112 }
112 }
113 let process_is_running;
113 let process_is_running;
114
114
115 #[cfg(unix)]
115 #[cfg(unix)]
116 {
116 {
117 let pid: libc::pid_t = pid.parse().ok()?;
117 let pid: libc::pid_t = pid.parse().ok()?;
118 unsafe {
118 unsafe {
119 let signal = 0; // Test if we could send a signal, without sending
119 let signal = 0; // Test if we could send a signal, without sending
120 let result = libc::kill(pid, signal);
120 let result = libc::kill(pid, signal);
121 if result == 0 {
121 if result == 0 {
122 process_is_running = true
122 process_is_running = true
123 } else {
123 } else {
124 let errno =
124 let errno =
125 io::Error::last_os_error().raw_os_error().unwrap();
125 io::Error::last_os_error().raw_os_error().unwrap();
126 process_is_running = errno != libc::ESRCH
126 process_is_running = errno != libc::ESRCH
127 }
127 }
128 }
128 }
129 }
129 }
130
130
131 Some(!process_is_running)
131 Some(!process_is_running)
132 })()
132 })()
133 .unwrap_or(false)
133 .unwrap_or(false)
134 }
134 }
135
135
136 lazy_static::lazy_static! {
136 lazy_static::lazy_static! {
137 /// A string which is used to differentiate pid namespaces
137 /// A string which is used to differentiate pid namespaces
138 ///
138 ///
139 /// It's useful to detect "dead" processes and remove stale locks with
139 /// It's useful to detect "dead" processes and remove stale locks with
140 /// confidence. Typically it's just hostname. On modern linux, we include an
140 /// confidence. Typically it's just hostname. On modern linux, we include an
141 /// extra Linux-specific pid namespace identifier.
141 /// extra Linux-specific pid namespace identifier.
142 static ref LOCK_PREFIX: String = {
142 static ref LOCK_PREFIX: String = {
143 // Note: this must match the behavior of `_getlockprefix` in `mercurial/lock.py`
143 // Note: this must match the behavior of `_getlockprefix` in `mercurial/lock.py`
144
144
145 /// Same as https://github.com/python/cpython/blob/v3.10.0/Modules/socketmodule.c#L5414
145 /// Same as https://github.com/python/cpython/blob/v3.10.0/Modules/socketmodule.c#L5414
146 const BUFFER_SIZE: usize = 1024;
146 const BUFFER_SIZE: usize = 1024;
147 // This cast is *needed* for platforms with signed chars
148 #[allow(clippy::unnecessary_cast)]
147 let mut buffer = [0 as libc::c_char; BUFFER_SIZE];
149 let mut buffer = [0 as libc::c_char; BUFFER_SIZE];
148 let hostname_bytes = unsafe {
150 let hostname_bytes = unsafe {
149 let result = libc::gethostname(buffer.as_mut_ptr(), BUFFER_SIZE);
151 let result = libc::gethostname(buffer.as_mut_ptr(), BUFFER_SIZE);
150 if result != 0 {
152 if result != 0 {
151 panic!("gethostname: {}", io::Error::last_os_error())
153 panic!("gethostname: {}", io::Error::last_os_error())
152 }
154 }
153 std::ffi::CStr::from_ptr(buffer.as_mut_ptr()).to_bytes()
155 std::ffi::CStr::from_ptr(buffer.as_mut_ptr()).to_bytes()
154 };
156 };
155 let hostname =
157 let hostname =
156 std::str::from_utf8(hostname_bytes).expect("non-UTF-8 hostname");
158 std::str::from_utf8(hostname_bytes).expect("non-UTF-8 hostname");
157
159
158 #[cfg(target_os = "linux")]
160 #[cfg(target_os = "linux")]
159 {
161 {
160 use std::os::linux::fs::MetadataExt;
162 use std::os::linux::fs::MetadataExt;
161 match std::fs::metadata("/proc/self/ns/pid") {
163 match std::fs::metadata("/proc/self/ns/pid") {
162 Ok(meta) => {
164 Ok(meta) => {
163 return format!("{}/{:x}", hostname, meta.st_ino())
165 return format!("{}/{:x}", hostname, meta.st_ino())
164 }
166 }
165 Err(error) => {
167 Err(error) => {
166 // TODO: match on `error.kind()` when `NotADirectory`
168 // TODO: match on `error.kind()` when `NotADirectory`
167 // is available on all supported Rust versions:
169 // is available on all supported Rust versions:
168 // https://github.com/rust-lang/rust/issues/86442
170 // https://github.com/rust-lang/rust/issues/86442
169 use libc::{
171 use libc::{
170 ENOENT, // ErrorKind::NotFound
172 ENOENT, // ErrorKind::NotFound
171 ENOTDIR, // ErrorKind::NotADirectory
173 ENOTDIR, // ErrorKind::NotADirectory
172 EACCES, // ErrorKind::PermissionDenied
174 EACCES, // ErrorKind::PermissionDenied
173 };
175 };
174 match error.raw_os_error() {
176 match error.raw_os_error() {
175 Some(ENOENT) | Some(ENOTDIR) | Some(EACCES) => {}
177 Some(ENOENT) | Some(ENOTDIR) | Some(EACCES) => {}
176 _ => panic!("stat /proc/self/ns/pid: {}", error),
178 _ => panic!("stat /proc/self/ns/pid: {}", error),
177 }
179 }
178 }
180 }
179 }
181 }
180 }
182 }
181
183
182 hostname.to_owned()
184 hostname.to_owned()
183 };
185 };
184
186
185 static ref OUR_LOCK_DATA: String = format!("{}:{}", &*LOCK_PREFIX, std::process::id());
187 static ref OUR_LOCK_DATA: String = format!("{}:{}", &*LOCK_PREFIX, std::process::id());
186 }
188 }
@@ -1,1719 +1,1717 b''
1 // matchers.rs
1 // matchers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Structs and types for matching files and directories.
8 //! Structs and types for matching files and directories.
9
9
10 use crate::{
10 use crate::{
11 dirstate::dirs_multiset::DirsChildrenMultiset,
11 dirstate::dirs_multiset::DirsChildrenMultiset,
12 filepatterns::{
12 filepatterns::{
13 build_single_regex, filter_subincludes, get_patterns_from_file,
13 build_single_regex, filter_subincludes, get_patterns_from_file,
14 PatternFileWarning, PatternResult,
14 PatternFileWarning, PatternResult,
15 },
15 },
16 utils::{
16 utils::{
17 files::find_dirs,
17 files::find_dirs,
18 hg_path::{HgPath, HgPathBuf},
18 hg_path::{HgPath, HgPathBuf},
19 Escaped,
19 Escaped,
20 },
20 },
21 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
21 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
22 PatternSyntax,
22 PatternSyntax,
23 };
23 };
24
24
25 use crate::dirstate::status::IgnoreFnType;
25 use crate::dirstate::status::IgnoreFnType;
26 use crate::filepatterns::normalize_path_bytes;
26 use crate::filepatterns::normalize_path_bytes;
27 use std::borrow::ToOwned;
27 use std::borrow::ToOwned;
28 use std::collections::HashSet;
28 use std::collections::HashSet;
29 use std::fmt::{Display, Error, Formatter};
29 use std::fmt::{Display, Error, Formatter};
30 use std::ops::Deref;
30 use std::ops::Deref;
31 use std::path::{Path, PathBuf};
31 use std::path::{Path, PathBuf};
32
32
33 #[derive(Debug, PartialEq)]
33 #[derive(Debug, PartialEq)]
34 pub enum VisitChildrenSet {
34 pub enum VisitChildrenSet {
35 /// Don't visit anything
35 /// Don't visit anything
36 Empty,
36 Empty,
37 /// Only visit this directory
37 /// Only visit this directory
38 This,
38 This,
39 /// Visit this directory and these subdirectories
39 /// Visit this directory and these subdirectories
40 /// TODO Should we implement a `NonEmptyHashSet`?
40 /// TODO Should we implement a `NonEmptyHashSet`?
41 Set(HashSet<HgPathBuf>),
41 Set(HashSet<HgPathBuf>),
42 /// Visit this directory and all subdirectories
42 /// Visit this directory and all subdirectories
43 Recursive,
43 Recursive,
44 }
44 }
45
45
46 pub trait Matcher: core::fmt::Debug {
46 pub trait Matcher: core::fmt::Debug {
47 /// Explicitly listed files
47 /// Explicitly listed files
48 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
48 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
49 /// Returns whether `filename` is in `file_set`
49 /// Returns whether `filename` is in `file_set`
50 fn exact_match(&self, filename: &HgPath) -> bool;
50 fn exact_match(&self, filename: &HgPath) -> bool;
51 /// Returns whether `filename` is matched by this matcher
51 /// Returns whether `filename` is matched by this matcher
52 fn matches(&self, filename: &HgPath) -> bool;
52 fn matches(&self, filename: &HgPath) -> bool;
53 /// Decides whether a directory should be visited based on whether it
53 /// Decides whether a directory should be visited based on whether it
54 /// has potential matches in it or one of its subdirectories, and
54 /// has potential matches in it or one of its subdirectories, and
55 /// potentially lists which subdirectories of that directory should be
55 /// potentially lists which subdirectories of that directory should be
56 /// visited. This is based on the match's primary, included, and excluded
56 /// visited. This is based on the match's primary, included, and excluded
57 /// patterns.
57 /// patterns.
58 ///
58 ///
59 /// # Example
59 /// # Example
60 ///
60 ///
61 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
61 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
62 /// return the following values (assuming the implementation of
62 /// return the following values (assuming the implementation of
63 /// visit_children_set is capable of recognizing this; some implementations
63 /// visit_children_set is capable of recognizing this; some implementations
64 /// are not).
64 /// are not).
65 ///
65 ///
66 /// ```text
66 /// ```text
67 /// ```ignore
67 /// ```ignore
68 /// '' -> {'foo', 'qux'}
68 /// '' -> {'foo', 'qux'}
69 /// 'baz' -> set()
69 /// 'baz' -> set()
70 /// 'foo' -> {'bar'}
70 /// 'foo' -> {'bar'}
71 /// // Ideally this would be `Recursive`, but since the prefix nature of
71 /// // Ideally this would be `Recursive`, but since the prefix nature of
72 /// // matchers is applied to the entire matcher, we have to downgrade this
72 /// // matchers is applied to the entire matcher, we have to downgrade this
73 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
73 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
74 /// // `RootFilesIn'-kind matcher being mixed in.
74 /// // `RootFilesIn'-kind matcher being mixed in.
75 /// 'foo/bar' -> 'this'
75 /// 'foo/bar' -> 'this'
76 /// 'qux' -> 'this'
76 /// 'qux' -> 'this'
77 /// ```
77 /// ```
78 /// # Important
78 /// # Important
79 ///
79 ///
80 /// Most matchers do not know if they're representing files or
80 /// Most matchers do not know if they're representing files or
81 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
81 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
82 /// file or a directory, so `visit_children_set('dir')` for most matchers
82 /// file or a directory, so `visit_children_set('dir')` for most matchers
83 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
83 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
84 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
84 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
85 /// it may return `VisitChildrenSet::This`.
85 /// it may return `VisitChildrenSet::This`.
86 /// Do not rely on the return being a `HashSet` indicating that there are
86 /// Do not rely on the return being a `HashSet` indicating that there are
87 /// no files in this dir to investigate (or equivalently that if there are
87 /// no files in this dir to investigate (or equivalently that if there are
88 /// files to investigate in 'dir' that it will always return
88 /// files to investigate in 'dir' that it will always return
89 /// `VisitChildrenSet::This`).
89 /// `VisitChildrenSet::This`).
90 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
90 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
91 /// Matcher will match everything and `files_set()` will be empty:
91 /// Matcher will match everything and `files_set()` will be empty:
92 /// optimization might be possible.
92 /// optimization might be possible.
93 fn matches_everything(&self) -> bool;
93 fn matches_everything(&self) -> bool;
94 /// Matcher will match exactly the files in `files_set()`: optimization
94 /// Matcher will match exactly the files in `files_set()`: optimization
95 /// might be possible.
95 /// might be possible.
96 fn is_exact(&self) -> bool;
96 fn is_exact(&self) -> bool;
97 }
97 }
98
98
99 /// Matches everything.
99 /// Matches everything.
100 ///```
100 ///```
101 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
101 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
102 ///
102 ///
103 /// let matcher = AlwaysMatcher;
103 /// let matcher = AlwaysMatcher;
104 ///
104 ///
105 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
105 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
106 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
106 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
107 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
107 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
108 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
108 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
109 /// ```
109 /// ```
110 #[derive(Debug)]
110 #[derive(Debug)]
111 pub struct AlwaysMatcher;
111 pub struct AlwaysMatcher;
112
112
113 impl Matcher for AlwaysMatcher {
113 impl Matcher for AlwaysMatcher {
114 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
114 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
115 None
115 None
116 }
116 }
117 fn exact_match(&self, _filename: &HgPath) -> bool {
117 fn exact_match(&self, _filename: &HgPath) -> bool {
118 false
118 false
119 }
119 }
120 fn matches(&self, _filename: &HgPath) -> bool {
120 fn matches(&self, _filename: &HgPath) -> bool {
121 true
121 true
122 }
122 }
123 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
123 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
124 VisitChildrenSet::Recursive
124 VisitChildrenSet::Recursive
125 }
125 }
126 fn matches_everything(&self) -> bool {
126 fn matches_everything(&self) -> bool {
127 true
127 true
128 }
128 }
129 fn is_exact(&self) -> bool {
129 fn is_exact(&self) -> bool {
130 false
130 false
131 }
131 }
132 }
132 }
133
133
134 /// Matches nothing.
134 /// Matches nothing.
135 #[derive(Debug)]
135 #[derive(Debug)]
136 pub struct NeverMatcher;
136 pub struct NeverMatcher;
137
137
138 impl Matcher for NeverMatcher {
138 impl Matcher for NeverMatcher {
139 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
139 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
140 None
140 None
141 }
141 }
142 fn exact_match(&self, _filename: &HgPath) -> bool {
142 fn exact_match(&self, _filename: &HgPath) -> bool {
143 false
143 false
144 }
144 }
145 fn matches(&self, _filename: &HgPath) -> bool {
145 fn matches(&self, _filename: &HgPath) -> bool {
146 false
146 false
147 }
147 }
148 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
148 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
149 VisitChildrenSet::Empty
149 VisitChildrenSet::Empty
150 }
150 }
151 fn matches_everything(&self) -> bool {
151 fn matches_everything(&self) -> bool {
152 false
152 false
153 }
153 }
154 fn is_exact(&self) -> bool {
154 fn is_exact(&self) -> bool {
155 true
155 true
156 }
156 }
157 }
157 }
158
158
159 /// Matches the input files exactly. They are interpreted as paths, not
159 /// Matches the input files exactly. They are interpreted as paths, not
160 /// patterns.
160 /// patterns.
161 ///
161 ///
162 ///```
162 ///```
163 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
163 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
164 ///
164 ///
165 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
165 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
166 /// let matcher = FileMatcher::new(files).unwrap();
166 /// let matcher = FileMatcher::new(files).unwrap();
167 ///
167 ///
168 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
168 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
169 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
169 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
170 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
170 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
171 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
171 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
172 /// ```
172 /// ```
173 #[derive(Debug)]
173 #[derive(Debug)]
174 pub struct FileMatcher {
174 pub struct FileMatcher {
175 files: HashSet<HgPathBuf>,
175 files: HashSet<HgPathBuf>,
176 dirs: DirsMultiset,
176 dirs: DirsMultiset,
177 }
177 }
178
178
179 impl FileMatcher {
179 impl FileMatcher {
180 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, DirstateMapError> {
180 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, DirstateMapError> {
181 let dirs = DirsMultiset::from_manifest(&files)?;
181 let dirs = DirsMultiset::from_manifest(&files)?;
182 Ok(Self {
182 Ok(Self {
183 files: HashSet::from_iter(files.into_iter()),
183 files: HashSet::from_iter(files.into_iter()),
184 dirs,
184 dirs,
185 })
185 })
186 }
186 }
187 fn inner_matches(&self, filename: &HgPath) -> bool {
187 fn inner_matches(&self, filename: &HgPath) -> bool {
188 self.files.contains(filename.as_ref())
188 self.files.contains(filename.as_ref())
189 }
189 }
190 }
190 }
191
191
192 impl Matcher for FileMatcher {
192 impl Matcher for FileMatcher {
193 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
193 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
194 Some(&self.files)
194 Some(&self.files)
195 }
195 }
196 fn exact_match(&self, filename: &HgPath) -> bool {
196 fn exact_match(&self, filename: &HgPath) -> bool {
197 self.inner_matches(filename)
197 self.inner_matches(filename)
198 }
198 }
199 fn matches(&self, filename: &HgPath) -> bool {
199 fn matches(&self, filename: &HgPath) -> bool {
200 self.inner_matches(filename)
200 self.inner_matches(filename)
201 }
201 }
202 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
202 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
203 if self.files.is_empty() || !self.dirs.contains(&directory) {
203 if self.files.is_empty() || !self.dirs.contains(&directory) {
204 return VisitChildrenSet::Empty;
204 return VisitChildrenSet::Empty;
205 }
205 }
206 let mut candidates: HashSet<HgPathBuf> =
206 let mut candidates: HashSet<HgPathBuf> =
207 self.dirs.iter().cloned().collect();
207 self.dirs.iter().cloned().collect();
208
208
209 candidates.extend(self.files.iter().cloned());
209 candidates.extend(self.files.iter().cloned());
210 candidates.remove(HgPath::new(b""));
210 candidates.remove(HgPath::new(b""));
211
211
212 if !directory.as_ref().is_empty() {
212 if !directory.as_ref().is_empty() {
213 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
213 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
214 candidates = candidates
214 candidates = candidates
215 .iter()
215 .iter()
216 .filter_map(|c| {
216 .filter_map(|c| {
217 if c.as_bytes().starts_with(&directory) {
217 if c.as_bytes().starts_with(&directory) {
218 Some(HgPathBuf::from_bytes(
218 Some(HgPathBuf::from_bytes(
219 &c.as_bytes()[directory.len()..],
219 &c.as_bytes()[directory.len()..],
220 ))
220 ))
221 } else {
221 } else {
222 None
222 None
223 }
223 }
224 })
224 })
225 .collect();
225 .collect();
226 }
226 }
227
227
228 // `self.dirs` includes all of the directories, recursively, so if
228 // `self.dirs` includes all of the directories, recursively, so if
229 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
229 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
230 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
230 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
231 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
231 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
232 // subdir will be in there without a slash.
232 // subdir will be in there without a slash.
233 VisitChildrenSet::Set(
233 VisitChildrenSet::Set(
234 candidates
234 candidates
235 .into_iter()
235 .into_iter()
236 .filter_map(|c| {
236 .filter_map(|c| {
237 if c.bytes().all(|b| *b != b'/') {
237 if c.bytes().all(|b| *b != b'/') {
238 Some(c)
238 Some(c)
239 } else {
239 } else {
240 None
240 None
241 }
241 }
242 })
242 })
243 .collect(),
243 .collect(),
244 )
244 )
245 }
245 }
246 fn matches_everything(&self) -> bool {
246 fn matches_everything(&self) -> bool {
247 false
247 false
248 }
248 }
249 fn is_exact(&self) -> bool {
249 fn is_exact(&self) -> bool {
250 true
250 true
251 }
251 }
252 }
252 }
253
253
254 /// Matches files that are included in the ignore rules.
254 /// Matches files that are included in the ignore rules.
255 /// ```
255 /// ```
256 /// use hg::{
256 /// use hg::{
257 /// matchers::{IncludeMatcher, Matcher},
257 /// matchers::{IncludeMatcher, Matcher},
258 /// IgnorePattern,
258 /// IgnorePattern,
259 /// PatternSyntax,
259 /// PatternSyntax,
260 /// utils::hg_path::HgPath
260 /// utils::hg_path::HgPath
261 /// };
261 /// };
262 /// use std::path::Path;
262 /// use std::path::Path;
263 /// ///
263 /// ///
264 /// let ignore_patterns =
264 /// let ignore_patterns =
265 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
265 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
266 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
266 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
267 /// ///
267 /// ///
268 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
268 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
269 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
269 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
270 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
270 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
271 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
271 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
272 /// ```
272 /// ```
273 pub struct IncludeMatcher<'a> {
273 pub struct IncludeMatcher<'a> {
274 patterns: Vec<u8>,
274 patterns: Vec<u8>,
275 match_fn: IgnoreFnType<'a>,
275 match_fn: IgnoreFnType<'a>,
276 /// Whether all the patterns match a prefix (i.e. recursively)
276 /// Whether all the patterns match a prefix (i.e. recursively)
277 prefix: bool,
277 prefix: bool,
278 roots: HashSet<HgPathBuf>,
278 roots: HashSet<HgPathBuf>,
279 dirs: HashSet<HgPathBuf>,
279 dirs: HashSet<HgPathBuf>,
280 parents: HashSet<HgPathBuf>,
280 parents: HashSet<HgPathBuf>,
281 }
281 }
282
282
283 impl core::fmt::Debug for IncludeMatcher<'_> {
283 impl core::fmt::Debug for IncludeMatcher<'_> {
284 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
284 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
285 f.debug_struct("IncludeMatcher")
285 f.debug_struct("IncludeMatcher")
286 .field("patterns", &String::from_utf8_lossy(&self.patterns))
286 .field("patterns", &String::from_utf8_lossy(&self.patterns))
287 .field("prefix", &self.prefix)
287 .field("prefix", &self.prefix)
288 .field("roots", &self.roots)
288 .field("roots", &self.roots)
289 .field("dirs", &self.dirs)
289 .field("dirs", &self.dirs)
290 .field("parents", &self.parents)
290 .field("parents", &self.parents)
291 .finish()
291 .finish()
292 }
292 }
293 }
293 }
294
294
295 impl<'a> Matcher for IncludeMatcher<'a> {
295 impl<'a> Matcher for IncludeMatcher<'a> {
296 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
296 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
297 None
297 None
298 }
298 }
299
299
300 fn exact_match(&self, _filename: &HgPath) -> bool {
300 fn exact_match(&self, _filename: &HgPath) -> bool {
301 false
301 false
302 }
302 }
303
303
304 fn matches(&self, filename: &HgPath) -> bool {
304 fn matches(&self, filename: &HgPath) -> bool {
305 (self.match_fn)(filename.as_ref())
305 (self.match_fn)(filename)
306 }
306 }
307
307
308 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
308 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
309 let dir = directory.as_ref();
309 let dir = directory;
310 if self.prefix && self.roots.contains(dir) {
310 if self.prefix && self.roots.contains(dir) {
311 return VisitChildrenSet::Recursive;
311 return VisitChildrenSet::Recursive;
312 }
312 }
313 if self.roots.contains(HgPath::new(b""))
313 if self.roots.contains(HgPath::new(b""))
314 || self.roots.contains(dir)
314 || self.roots.contains(dir)
315 || self.dirs.contains(dir)
315 || self.dirs.contains(dir)
316 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
316 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
317 {
317 {
318 return VisitChildrenSet::This;
318 return VisitChildrenSet::This;
319 }
319 }
320
320
321 if self.parents.contains(directory.as_ref()) {
321 if self.parents.contains(dir.as_ref()) {
322 let multiset = self.get_all_parents_children();
322 let multiset = self.get_all_parents_children();
323 if let Some(children) = multiset.get(dir) {
323 if let Some(children) = multiset.get(dir) {
324 return VisitChildrenSet::Set(
324 return VisitChildrenSet::Set(
325 children.into_iter().map(HgPathBuf::from).collect(),
325 children.iter().map(HgPathBuf::from).collect(),
326 );
326 );
327 }
327 }
328 }
328 }
329 VisitChildrenSet::Empty
329 VisitChildrenSet::Empty
330 }
330 }
331
331
332 fn matches_everything(&self) -> bool {
332 fn matches_everything(&self) -> bool {
333 false
333 false
334 }
334 }
335
335
336 fn is_exact(&self) -> bool {
336 fn is_exact(&self) -> bool {
337 false
337 false
338 }
338 }
339 }
339 }
340
340
341 /// The union of multiple matchers. Will match if any of the matchers match.
341 /// The union of multiple matchers. Will match if any of the matchers match.
342 #[derive(Debug)]
342 #[derive(Debug)]
343 pub struct UnionMatcher {
343 pub struct UnionMatcher {
344 matchers: Vec<Box<dyn Matcher + Sync>>,
344 matchers: Vec<Box<dyn Matcher + Sync>>,
345 }
345 }
346
346
347 impl Matcher for UnionMatcher {
347 impl Matcher for UnionMatcher {
348 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
348 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
349 None
349 None
350 }
350 }
351
351
352 fn exact_match(&self, _filename: &HgPath) -> bool {
352 fn exact_match(&self, _filename: &HgPath) -> bool {
353 false
353 false
354 }
354 }
355
355
356 fn matches(&self, filename: &HgPath) -> bool {
356 fn matches(&self, filename: &HgPath) -> bool {
357 self.matchers.iter().any(|m| m.matches(filename))
357 self.matchers.iter().any(|m| m.matches(filename))
358 }
358 }
359
359
360 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
360 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
361 let mut result = HashSet::new();
361 let mut result = HashSet::new();
362 let mut this = false;
362 let mut this = false;
363 for matcher in self.matchers.iter() {
363 for matcher in self.matchers.iter() {
364 let visit = matcher.visit_children_set(directory);
364 let visit = matcher.visit_children_set(directory);
365 match visit {
365 match visit {
366 VisitChildrenSet::Empty => continue,
366 VisitChildrenSet::Empty => continue,
367 VisitChildrenSet::This => {
367 VisitChildrenSet::This => {
368 this = true;
368 this = true;
369 // Don't break, we might have an 'all' in here.
369 // Don't break, we might have an 'all' in here.
370 continue;
370 continue;
371 }
371 }
372 VisitChildrenSet::Set(set) => {
372 VisitChildrenSet::Set(set) => {
373 result.extend(set);
373 result.extend(set);
374 }
374 }
375 VisitChildrenSet::Recursive => {
375 VisitChildrenSet::Recursive => {
376 return visit;
376 return visit;
377 }
377 }
378 }
378 }
379 }
379 }
380 if this {
380 if this {
381 return VisitChildrenSet::This;
381 return VisitChildrenSet::This;
382 }
382 }
383 if result.is_empty() {
383 if result.is_empty() {
384 VisitChildrenSet::Empty
384 VisitChildrenSet::Empty
385 } else {
385 } else {
386 VisitChildrenSet::Set(result)
386 VisitChildrenSet::Set(result)
387 }
387 }
388 }
388 }
389
389
390 fn matches_everything(&self) -> bool {
390 fn matches_everything(&self) -> bool {
391 // TODO Maybe if all are AlwaysMatcher?
391 // TODO Maybe if all are AlwaysMatcher?
392 false
392 false
393 }
393 }
394
394
395 fn is_exact(&self) -> bool {
395 fn is_exact(&self) -> bool {
396 false
396 false
397 }
397 }
398 }
398 }
399
399
400 impl UnionMatcher {
400 impl UnionMatcher {
401 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
401 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
402 Self { matchers }
402 Self { matchers }
403 }
403 }
404 }
404 }
405
405
406 #[derive(Debug)]
406 #[derive(Debug)]
407 pub struct IntersectionMatcher {
407 pub struct IntersectionMatcher {
408 m1: Box<dyn Matcher + Sync>,
408 m1: Box<dyn Matcher + Sync>,
409 m2: Box<dyn Matcher + Sync>,
409 m2: Box<dyn Matcher + Sync>,
410 files: Option<HashSet<HgPathBuf>>,
410 files: Option<HashSet<HgPathBuf>>,
411 }
411 }
412
412
413 impl Matcher for IntersectionMatcher {
413 impl Matcher for IntersectionMatcher {
414 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
414 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
415 self.files.as_ref()
415 self.files.as_ref()
416 }
416 }
417
417
418 fn exact_match(&self, filename: &HgPath) -> bool {
418 fn exact_match(&self, filename: &HgPath) -> bool {
419 self.files.as_ref().map_or(false, |f| f.contains(filename))
419 self.files.as_ref().map_or(false, |f| f.contains(filename))
420 }
420 }
421
421
422 fn matches(&self, filename: &HgPath) -> bool {
422 fn matches(&self, filename: &HgPath) -> bool {
423 self.m1.matches(filename) && self.m2.matches(filename)
423 self.m1.matches(filename) && self.m2.matches(filename)
424 }
424 }
425
425
426 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
426 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
427 let m1_set = self.m1.visit_children_set(directory);
427 let m1_set = self.m1.visit_children_set(directory);
428 if m1_set == VisitChildrenSet::Empty {
428 if m1_set == VisitChildrenSet::Empty {
429 return VisitChildrenSet::Empty;
429 return VisitChildrenSet::Empty;
430 }
430 }
431 let m2_set = self.m2.visit_children_set(directory);
431 let m2_set = self.m2.visit_children_set(directory);
432 if m2_set == VisitChildrenSet::Empty {
432 if m2_set == VisitChildrenSet::Empty {
433 return VisitChildrenSet::Empty;
433 return VisitChildrenSet::Empty;
434 }
434 }
435
435
436 if m1_set == VisitChildrenSet::Recursive {
436 if m1_set == VisitChildrenSet::Recursive {
437 return m2_set;
437 return m2_set;
438 } else if m2_set == VisitChildrenSet::Recursive {
438 } else if m2_set == VisitChildrenSet::Recursive {
439 return m1_set;
439 return m1_set;
440 }
440 }
441
441
442 match (&m1_set, &m2_set) {
442 match (&m1_set, &m2_set) {
443 (VisitChildrenSet::Recursive, _) => m2_set,
443 (VisitChildrenSet::Recursive, _) => m2_set,
444 (_, VisitChildrenSet::Recursive) => m1_set,
444 (_, VisitChildrenSet::Recursive) => m1_set,
445 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
445 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
446 VisitChildrenSet::This
446 VisitChildrenSet::This
447 }
447 }
448 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
448 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
449 let set: HashSet<_> = m1.intersection(&m2).cloned().collect();
449 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
450 if set.is_empty() {
450 if set.is_empty() {
451 VisitChildrenSet::Empty
451 VisitChildrenSet::Empty
452 } else {
452 } else {
453 VisitChildrenSet::Set(set)
453 VisitChildrenSet::Set(set)
454 }
454 }
455 }
455 }
456 _ => unreachable!(),
456 _ => unreachable!(),
457 }
457 }
458 }
458 }
459
459
460 fn matches_everything(&self) -> bool {
460 fn matches_everything(&self) -> bool {
461 self.m1.matches_everything() && self.m2.matches_everything()
461 self.m1.matches_everything() && self.m2.matches_everything()
462 }
462 }
463
463
464 fn is_exact(&self) -> bool {
464 fn is_exact(&self) -> bool {
465 self.m1.is_exact() || self.m2.is_exact()
465 self.m1.is_exact() || self.m2.is_exact()
466 }
466 }
467 }
467 }
468
468
469 impl IntersectionMatcher {
469 impl IntersectionMatcher {
470 pub fn new(
470 pub fn new(
471 mut m1: Box<dyn Matcher + Sync>,
471 mut m1: Box<dyn Matcher + Sync>,
472 mut m2: Box<dyn Matcher + Sync>,
472 mut m2: Box<dyn Matcher + Sync>,
473 ) -> Self {
473 ) -> Self {
474 let files = if m1.is_exact() || m2.is_exact() {
474 let files = if m1.is_exact() || m2.is_exact() {
475 if !m1.is_exact() {
475 if !m1.is_exact() {
476 std::mem::swap(&mut m1, &mut m2);
476 std::mem::swap(&mut m1, &mut m2);
477 }
477 }
478 m1.file_set().map(|m1_files| {
478 m1.file_set().map(|m1_files| {
479 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
479 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
480 })
480 })
481 } else {
481 } else {
482 None
482 None
483 };
483 };
484 Self { m1, m2, files }
484 Self { m1, m2, files }
485 }
485 }
486 }
486 }
487
487
488 #[derive(Debug)]
488 #[derive(Debug)]
489 pub struct DifferenceMatcher {
489 pub struct DifferenceMatcher {
490 base: Box<dyn Matcher + Sync>,
490 base: Box<dyn Matcher + Sync>,
491 excluded: Box<dyn Matcher + Sync>,
491 excluded: Box<dyn Matcher + Sync>,
492 files: Option<HashSet<HgPathBuf>>,
492 files: Option<HashSet<HgPathBuf>>,
493 }
493 }
494
494
495 impl Matcher for DifferenceMatcher {
495 impl Matcher for DifferenceMatcher {
496 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
496 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
497 self.files.as_ref()
497 self.files.as_ref()
498 }
498 }
499
499
500 fn exact_match(&self, filename: &HgPath) -> bool {
500 fn exact_match(&self, filename: &HgPath) -> bool {
501 self.files.as_ref().map_or(false, |f| f.contains(filename))
501 self.files.as_ref().map_or(false, |f| f.contains(filename))
502 }
502 }
503
503
504 fn matches(&self, filename: &HgPath) -> bool {
504 fn matches(&self, filename: &HgPath) -> bool {
505 self.base.matches(filename) && !self.excluded.matches(filename)
505 self.base.matches(filename) && !self.excluded.matches(filename)
506 }
506 }
507
507
508 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
508 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
509 let excluded_set = self.excluded.visit_children_set(directory);
509 let excluded_set = self.excluded.visit_children_set(directory);
510 if excluded_set == VisitChildrenSet::Recursive {
510 if excluded_set == VisitChildrenSet::Recursive {
511 return VisitChildrenSet::Empty;
511 return VisitChildrenSet::Empty;
512 }
512 }
513 let base_set = self.base.visit_children_set(directory);
513 let base_set = self.base.visit_children_set(directory);
514 // Possible values for base: 'recursive', 'this', set(...), set()
514 // Possible values for base: 'recursive', 'this', set(...), set()
515 // Possible values for excluded: 'this', set(...), set()
515 // Possible values for excluded: 'this', set(...), set()
516 // If excluded has nothing under here that we care about, return base,
516 // If excluded has nothing under here that we care about, return base,
517 // even if it's 'recursive'.
517 // even if it's 'recursive'.
518 if excluded_set == VisitChildrenSet::Empty {
518 if excluded_set == VisitChildrenSet::Empty {
519 return base_set;
519 return base_set;
520 }
520 }
521 match base_set {
521 match base_set {
522 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
522 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
523 // Never return 'recursive' here if excluded_set is any kind of
523 // Never return 'recursive' here if excluded_set is any kind of
524 // non-empty (either 'this' or set(foo)), since excluded might
524 // non-empty (either 'this' or set(foo)), since excluded might
525 // return set() for a subdirectory.
525 // return set() for a subdirectory.
526 VisitChildrenSet::This
526 VisitChildrenSet::This
527 }
527 }
528 set => {
528 set => {
529 // Possible values for base: set(...), set()
529 // Possible values for base: set(...), set()
530 // Possible values for excluded: 'this', set(...)
530 // Possible values for excluded: 'this', set(...)
531 // We ignore excluded set results. They're possibly incorrect:
531 // We ignore excluded set results. They're possibly incorrect:
532 // base = path:dir/subdir
532 // base = path:dir/subdir
533 // excluded=rootfilesin:dir,
533 // excluded=rootfilesin:dir,
534 // visit_children_set(''):
534 // visit_children_set(''):
535 // base returns {'dir'}, excluded returns {'dir'}, if we
535 // base returns {'dir'}, excluded returns {'dir'}, if we
536 // subtracted we'd return set(), which is *not* correct, we
536 // subtracted we'd return set(), which is *not* correct, we
537 // still need to visit 'dir'!
537 // still need to visit 'dir'!
538 set
538 set
539 }
539 }
540 }
540 }
541 }
541 }
542
542
543 fn matches_everything(&self) -> bool {
543 fn matches_everything(&self) -> bool {
544 false
544 false
545 }
545 }
546
546
547 fn is_exact(&self) -> bool {
547 fn is_exact(&self) -> bool {
548 self.base.is_exact()
548 self.base.is_exact()
549 }
549 }
550 }
550 }
551
551
552 impl DifferenceMatcher {
552 impl DifferenceMatcher {
553 pub fn new(
553 pub fn new(
554 base: Box<dyn Matcher + Sync>,
554 base: Box<dyn Matcher + Sync>,
555 excluded: Box<dyn Matcher + Sync>,
555 excluded: Box<dyn Matcher + Sync>,
556 ) -> Self {
556 ) -> Self {
557 let base_is_exact = base.is_exact();
557 let base_is_exact = base.is_exact();
558 let base_files = base.file_set().map(ToOwned::to_owned);
558 let base_files = base.file_set().map(ToOwned::to_owned);
559 let mut new = Self {
559 let mut new = Self {
560 base,
560 base,
561 excluded,
561 excluded,
562 files: None,
562 files: None,
563 };
563 };
564 if base_is_exact {
564 if base_is_exact {
565 new.files = base_files.map(|files| {
565 new.files = base_files.map(|files| {
566 files.iter().cloned().filter(|f| new.matches(f)).collect()
566 files.iter().cloned().filter(|f| new.matches(f)).collect()
567 });
567 });
568 }
568 }
569 new
569 new
570 }
570 }
571 }
571 }
572
572
573 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
573 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
574 /// contexts.
574 /// contexts.
575 ///
575 ///
576 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
576 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
577 /// from many threads at once is prone to contention, probably within the
577 /// from many threads at once is prone to contention, probably within the
578 /// scratch space needed as the regex DFA is built lazily.
578 /// scratch space needed as the regex DFA is built lazily.
579 ///
579 ///
580 /// We are in the process of raising the issue upstream, but for now
580 /// We are in the process of raising the issue upstream, but for now
581 /// the workaround used here is to store the `Regex` in a lazily populated
581 /// the workaround used here is to store the `Regex` in a lazily populated
582 /// thread-local variable, sharing the initial read-only compilation, but
582 /// thread-local variable, sharing the initial read-only compilation, but
583 /// not the lazy dfa scratch space mentioned above.
583 /// not the lazy dfa scratch space mentioned above.
584 ///
584 ///
585 /// This reduces the contention observed with 16+ threads, but does not
585 /// This reduces the contention observed with 16+ threads, but does not
586 /// completely remove it. Hopefully this can be addressed upstream.
586 /// completely remove it. Hopefully this can be addressed upstream.
587 struct RegexMatcher {
587 struct RegexMatcher {
588 /// Compiled at the start of the status algorithm, used as a base for
588 /// Compiled at the start of the status algorithm, used as a base for
589 /// cloning in each thread-local `self.local`, thus sharing the expensive
589 /// cloning in each thread-local `self.local`, thus sharing the expensive
590 /// first compilation.
590 /// first compilation.
591 base: regex::bytes::Regex,
591 base: regex::bytes::Regex,
592 /// Thread-local variable that holds the `Regex` that is actually queried
592 /// Thread-local variable that holds the `Regex` that is actually queried
593 /// from each thread.
593 /// from each thread.
594 local: thread_local::ThreadLocal<regex::bytes::Regex>,
594 local: thread_local::ThreadLocal<regex::bytes::Regex>,
595 }
595 }
596
596
597 impl RegexMatcher {
597 impl RegexMatcher {
598 /// Returns whether the path matches the stored `Regex`.
598 /// Returns whether the path matches the stored `Regex`.
599 pub fn is_match(&self, path: &HgPath) -> bool {
599 pub fn is_match(&self, path: &HgPath) -> bool {
600 self.local
600 self.local
601 .get_or(|| self.base.clone())
601 .get_or(|| self.base.clone())
602 .is_match(path.as_bytes())
602 .is_match(path.as_bytes())
603 }
603 }
604 }
604 }
605
605
606 /// Returns a function that matches an `HgPath` against the given regex
606 /// Returns a function that matches an `HgPath` against the given regex
607 /// pattern.
607 /// pattern.
608 ///
608 ///
609 /// This can fail when the pattern is invalid or not supported by the
609 /// This can fail when the pattern is invalid or not supported by the
610 /// underlying engine (the `regex` crate), for instance anything with
610 /// underlying engine (the `regex` crate), for instance anything with
611 /// back-references.
611 /// back-references.
612 #[logging_timer::time("trace")]
612 #[logging_timer::time("trace")]
613 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
613 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
614 use std::io::Write;
614 use std::io::Write;
615
615
616 // The `regex` crate adds `.*` to the start and end of expressions if there
616 // The `regex` crate adds `.*` to the start and end of expressions if there
617 // are no anchors, so add the start anchor.
617 // are no anchors, so add the start anchor.
618 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
618 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
619 for byte in pattern {
619 for byte in pattern {
620 if *byte > 127 {
620 if *byte > 127 {
621 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
621 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
622 } else {
622 } else {
623 escaped_bytes.push(*byte);
623 escaped_bytes.push(*byte);
624 }
624 }
625 }
625 }
626 escaped_bytes.push(b')');
626 escaped_bytes.push(b')');
627
627
628 // Avoid the cost of UTF8 checking
628 // Avoid the cost of UTF8 checking
629 //
629 //
630 // # Safety
630 // # Safety
631 // This is safe because we escaped all non-ASCII bytes.
631 // This is safe because we escaped all non-ASCII bytes.
632 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
632 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
633 let re = regex::bytes::RegexBuilder::new(&pattern_string)
633 let re = regex::bytes::RegexBuilder::new(&pattern_string)
634 .unicode(false)
634 .unicode(false)
635 // Big repos with big `.hgignore` will hit the default limit and
635 // Big repos with big `.hgignore` will hit the default limit and
636 // incur a significant performance hit. One repo's `hg status` hit
636 // incur a significant performance hit. One repo's `hg status` hit
637 // multiple *minutes*.
637 // multiple *minutes*.
638 .dfa_size_limit(50 * (1 << 20))
638 .dfa_size_limit(50 * (1 << 20))
639 .build()
639 .build()
640 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
640 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
641
641
642 Ok(RegexMatcher {
642 Ok(RegexMatcher {
643 base: re,
643 base: re,
644 local: Default::default(),
644 local: Default::default(),
645 })
645 })
646 }
646 }
647
647
648 /// Returns the regex pattern and a function that matches an `HgPath` against
648 /// Returns the regex pattern and a function that matches an `HgPath` against
649 /// said regex formed by the given ignore patterns.
649 /// said regex formed by the given ignore patterns.
650 fn build_regex_match<'a, 'b>(
650 fn build_regex_match<'a, 'b>(
651 ignore_patterns: &'a [IgnorePattern],
651 ignore_patterns: &'a [IgnorePattern],
652 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
652 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
653 let mut regexps = vec![];
653 let mut regexps = vec![];
654 let mut exact_set = HashSet::new();
654 let mut exact_set = HashSet::new();
655
655
656 for pattern in ignore_patterns {
656 for pattern in ignore_patterns {
657 if let Some(re) = build_single_regex(pattern)? {
657 if let Some(re) = build_single_regex(pattern)? {
658 regexps.push(re);
658 regexps.push(re);
659 } else {
659 } else {
660 let exact = normalize_path_bytes(&pattern.pattern);
660 let exact = normalize_path_bytes(&pattern.pattern);
661 exact_set.insert(HgPathBuf::from_bytes(&exact));
661 exact_set.insert(HgPathBuf::from_bytes(&exact));
662 }
662 }
663 }
663 }
664
664
665 let full_regex = regexps.join(&b'|');
665 let full_regex = regexps.join(&b'|');
666
666
667 // An empty pattern would cause the regex engine to incorrectly match the
667 // An empty pattern would cause the regex engine to incorrectly match the
668 // (empty) root directory
668 // (empty) root directory
669 let func = if !(regexps.is_empty()) {
669 let func = if !(regexps.is_empty()) {
670 let matcher = re_matcher(&full_regex)?;
670 let matcher = re_matcher(&full_regex)?;
671 let func = move |filename: &HgPath| {
671 let func = move |filename: &HgPath| {
672 exact_set.contains(filename) || matcher.is_match(filename)
672 exact_set.contains(filename) || matcher.is_match(filename)
673 };
673 };
674 Box::new(func) as IgnoreFnType
674 Box::new(func) as IgnoreFnType
675 } else {
675 } else {
676 let func = move |filename: &HgPath| exact_set.contains(filename);
676 let func = move |filename: &HgPath| exact_set.contains(filename);
677 Box::new(func) as IgnoreFnType
677 Box::new(func) as IgnoreFnType
678 };
678 };
679
679
680 Ok((full_regex, func))
680 Ok((full_regex, func))
681 }
681 }
682
682
683 /// Returns roots and directories corresponding to each pattern.
683 /// Returns roots and directories corresponding to each pattern.
684 ///
684 ///
685 /// This calculates the roots and directories exactly matching the patterns and
685 /// This calculates the roots and directories exactly matching the patterns and
686 /// returns a tuple of (roots, dirs). It does not return other directories
686 /// returns a tuple of (roots, dirs). It does not return other directories
687 /// which may also need to be considered, like the parent directories.
687 /// which may also need to be considered, like the parent directories.
688 fn roots_and_dirs(
688 fn roots_and_dirs(
689 ignore_patterns: &[IgnorePattern],
689 ignore_patterns: &[IgnorePattern],
690 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
690 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
691 let mut roots = Vec::new();
691 let mut roots = Vec::new();
692 let mut dirs = Vec::new();
692 let mut dirs = Vec::new();
693
693
694 for ignore_pattern in ignore_patterns {
694 for ignore_pattern in ignore_patterns {
695 let IgnorePattern {
695 let IgnorePattern {
696 syntax, pattern, ..
696 syntax, pattern, ..
697 } = ignore_pattern;
697 } = ignore_pattern;
698 match syntax {
698 match syntax {
699 PatternSyntax::RootGlob | PatternSyntax::Glob => {
699 PatternSyntax::RootGlob | PatternSyntax::Glob => {
700 let mut root = HgPathBuf::new();
700 let mut root = HgPathBuf::new();
701 for p in pattern.split(|c| *c == b'/') {
701 for p in pattern.split(|c| *c == b'/') {
702 if p.iter().any(|c| match *c {
702 if p.iter()
703 b'[' | b'{' | b'*' | b'?' => true,
703 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
704 _ => false,
704 {
705 }) {
706 break;
705 break;
707 }
706 }
708 root.push(HgPathBuf::from_bytes(p).as_ref());
707 root.push(HgPathBuf::from_bytes(p).as_ref());
709 }
708 }
710 roots.push(root);
709 roots.push(root);
711 }
710 }
712 PatternSyntax::Path | PatternSyntax::RelPath => {
711 PatternSyntax::Path | PatternSyntax::RelPath => {
713 let pat = HgPath::new(if pattern == b"." {
712 let pat = HgPath::new(if pattern == b"." {
714 &[] as &[u8]
713 &[] as &[u8]
715 } else {
714 } else {
716 pattern
715 pattern
717 });
716 });
718 roots.push(pat.to_owned());
717 roots.push(pat.to_owned());
719 }
718 }
720 PatternSyntax::RootFiles => {
719 PatternSyntax::RootFiles => {
721 let pat = if pattern == b"." {
720 let pat = if pattern == b"." {
722 &[] as &[u8]
721 &[] as &[u8]
723 } else {
722 } else {
724 pattern
723 pattern
725 };
724 };
726 dirs.push(HgPathBuf::from_bytes(pat));
725 dirs.push(HgPathBuf::from_bytes(pat));
727 }
726 }
728 _ => {
727 _ => {
729 roots.push(HgPathBuf::new());
728 roots.push(HgPathBuf::new());
730 }
729 }
731 }
730 }
732 }
731 }
733 (roots, dirs)
732 (roots, dirs)
734 }
733 }
735
734
736 /// Paths extracted from patterns
735 /// Paths extracted from patterns
737 #[derive(Debug, PartialEq)]
736 #[derive(Debug, PartialEq)]
738 struct RootsDirsAndParents {
737 struct RootsDirsAndParents {
739 /// Directories to match recursively
738 /// Directories to match recursively
740 pub roots: HashSet<HgPathBuf>,
739 pub roots: HashSet<HgPathBuf>,
741 /// Directories to match non-recursively
740 /// Directories to match non-recursively
742 pub dirs: HashSet<HgPathBuf>,
741 pub dirs: HashSet<HgPathBuf>,
743 /// Implicitly required directories to go to items in either roots or dirs
742 /// Implicitly required directories to go to items in either roots or dirs
744 pub parents: HashSet<HgPathBuf>,
743 pub parents: HashSet<HgPathBuf>,
745 }
744 }
746
745
747 /// Extract roots, dirs and parents from patterns.
746 /// Extract roots, dirs and parents from patterns.
748 fn roots_dirs_and_parents(
747 fn roots_dirs_and_parents(
749 ignore_patterns: &[IgnorePattern],
748 ignore_patterns: &[IgnorePattern],
750 ) -> PatternResult<RootsDirsAndParents> {
749 ) -> PatternResult<RootsDirsAndParents> {
751 let (roots, dirs) = roots_and_dirs(ignore_patterns);
750 let (roots, dirs) = roots_and_dirs(ignore_patterns);
752
751
753 let mut parents = HashSet::new();
752 let mut parents = HashSet::new();
754
753
755 parents.extend(
754 parents.extend(
756 DirsMultiset::from_manifest(&dirs)
755 DirsMultiset::from_manifest(&dirs)
757 .map_err(|e| match e {
756 .map_err(|e| match e {
758 DirstateMapError::InvalidPath(e) => e,
757 DirstateMapError::InvalidPath(e) => e,
759 _ => unreachable!(),
758 _ => unreachable!(),
760 })?
759 })?
761 .iter()
760 .iter()
762 .map(ToOwned::to_owned),
761 .map(ToOwned::to_owned),
763 );
762 );
764 parents.extend(
763 parents.extend(
765 DirsMultiset::from_manifest(&roots)
764 DirsMultiset::from_manifest(&roots)
766 .map_err(|e| match e {
765 .map_err(|e| match e {
767 DirstateMapError::InvalidPath(e) => e,
766 DirstateMapError::InvalidPath(e) => e,
768 _ => unreachable!(),
767 _ => unreachable!(),
769 })?
768 })?
770 .iter()
769 .iter()
771 .map(ToOwned::to_owned),
770 .map(ToOwned::to_owned),
772 );
771 );
773
772
774 Ok(RootsDirsAndParents {
773 Ok(RootsDirsAndParents {
775 roots: HashSet::from_iter(roots),
774 roots: HashSet::from_iter(roots),
776 dirs: HashSet::from_iter(dirs),
775 dirs: HashSet::from_iter(dirs),
777 parents,
776 parents,
778 })
777 })
779 }
778 }
780
779
781 /// Returns a function that checks whether a given file (in the general sense)
780 /// Returns a function that checks whether a given file (in the general sense)
782 /// should be matched.
781 /// should be matched.
783 fn build_match<'a, 'b>(
782 fn build_match<'a>(
784 ignore_patterns: Vec<IgnorePattern>,
783 ignore_patterns: Vec<IgnorePattern>,
785 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
784 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
786 let mut match_funcs: Vec<IgnoreFnType<'b>> = vec![];
785 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
787 // For debugging and printing
786 // For debugging and printing
788 let mut patterns = vec![];
787 let mut patterns = vec![];
789
788
790 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
789 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
791
790
792 if !subincludes.is_empty() {
791 if !subincludes.is_empty() {
793 // Build prefix-based matcher functions for subincludes
792 // Build prefix-based matcher functions for subincludes
794 let mut submatchers = FastHashMap::default();
793 let mut submatchers = FastHashMap::default();
795 let mut prefixes = vec![];
794 let mut prefixes = vec![];
796
795
797 for sub_include in subincludes {
796 for sub_include in subincludes {
798 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
797 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
799 let match_fn =
798 let match_fn =
800 Box::new(move |path: &HgPath| matcher.matches(path));
799 Box::new(move |path: &HgPath| matcher.matches(path));
801 prefixes.push(sub_include.prefix.clone());
800 prefixes.push(sub_include.prefix.clone());
802 submatchers.insert(sub_include.prefix.clone(), match_fn);
801 submatchers.insert(sub_include.prefix.clone(), match_fn);
803 }
802 }
804
803
805 let match_subinclude = move |filename: &HgPath| {
804 let match_subinclude = move |filename: &HgPath| {
806 for prefix in prefixes.iter() {
805 for prefix in prefixes.iter() {
807 if let Some(rel) = filename.relative_to(prefix) {
806 if let Some(rel) = filename.relative_to(prefix) {
808 if (submatchers[prefix])(rel) {
807 if (submatchers[prefix])(rel) {
809 return true;
808 return true;
810 }
809 }
811 }
810 }
812 }
811 }
813 false
812 false
814 };
813 };
815
814
816 match_funcs.push(Box::new(match_subinclude));
815 match_funcs.push(Box::new(match_subinclude));
817 }
816 }
818
817
819 if !ignore_patterns.is_empty() {
818 if !ignore_patterns.is_empty() {
820 // Either do dumb matching if all patterns are rootfiles, or match
819 // Either do dumb matching if all patterns are rootfiles, or match
821 // with a regex.
820 // with a regex.
822 if ignore_patterns
821 if ignore_patterns
823 .iter()
822 .iter()
824 .all(|k| k.syntax == PatternSyntax::RootFiles)
823 .all(|k| k.syntax == PatternSyntax::RootFiles)
825 {
824 {
826 let dirs: HashSet<_> = ignore_patterns
825 let dirs: HashSet<_> = ignore_patterns
827 .iter()
826 .iter()
828 .map(|k| k.pattern.to_owned())
827 .map(|k| k.pattern.to_owned())
829 .collect();
828 .collect();
830 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
829 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
831
830
832 let match_func = move |path: &HgPath| -> bool {
831 let match_func = move |path: &HgPath| -> bool {
833 let path = path.as_bytes();
832 let path = path.as_bytes();
834 let i = path.iter().rfind(|a| **a == b'/');
833 let i = path.iter().rfind(|a| **a == b'/');
835 let dir = if let Some(i) = i {
834 let dir = if let Some(i) = i {
836 &path[..*i as usize]
835 &path[..*i as usize]
837 } else {
836 } else {
838 b"."
837 b"."
839 };
838 };
840 dirs.contains(dir.deref())
839 dirs.contains(dir.deref())
841 };
840 };
842 match_funcs.push(Box::new(match_func));
841 match_funcs.push(Box::new(match_func));
843
842
844 patterns.extend(b"rootfilesin: ");
843 patterns.extend(b"rootfilesin: ");
845 dirs_vec.sort();
844 dirs_vec.sort();
846 patterns.extend(dirs_vec.escaped_bytes());
845 patterns.extend(dirs_vec.escaped_bytes());
847 } else {
846 } else {
848 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
847 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
849 patterns = new_re;
848 patterns = new_re;
850 match_funcs.push(match_func)
849 match_funcs.push(match_func)
851 }
850 }
852 }
851 }
853
852
854 Ok(if match_funcs.len() == 1 {
853 Ok(if match_funcs.len() == 1 {
855 (patterns, match_funcs.remove(0))
854 (patterns, match_funcs.remove(0))
856 } else {
855 } else {
857 (
856 (
858 patterns,
857 patterns,
859 Box::new(move |f: &HgPath| -> bool {
858 Box::new(move |f: &HgPath| -> bool {
860 match_funcs.iter().any(|match_func| match_func(f))
859 match_funcs.iter().any(|match_func| match_func(f))
861 }),
860 }),
862 )
861 )
863 })
862 })
864 }
863 }
865
864
866 /// Parses all "ignore" files with their recursive includes and returns a
865 /// Parses all "ignore" files with their recursive includes and returns a
867 /// function that checks whether a given file (in the general sense) should be
866 /// function that checks whether a given file (in the general sense) should be
868 /// ignored.
867 /// ignored.
869 pub fn get_ignore_matcher<'a>(
868 pub fn get_ignore_matcher<'a>(
870 mut all_pattern_files: Vec<PathBuf>,
869 mut all_pattern_files: Vec<PathBuf>,
871 root_dir: &Path,
870 root_dir: &Path,
872 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
871 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
873 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
872 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
874 let mut all_patterns = vec![];
873 let mut all_patterns = vec![];
875 let mut all_warnings = vec![];
874 let mut all_warnings = vec![];
876
875
877 // Sort to make the ordering of calls to `inspect_pattern_bytes`
876 // Sort to make the ordering of calls to `inspect_pattern_bytes`
878 // deterministic even if the ordering of `all_pattern_files` is not (such
877 // deterministic even if the ordering of `all_pattern_files` is not (such
879 // as when a iteration order of a Python dict or Rust HashMap is involved).
878 // as when a iteration order of a Python dict or Rust HashMap is involved).
880 // Sort by "string" representation instead of the default by component
879 // Sort by "string" representation instead of the default by component
881 // (with a Rust-specific definition of a component)
880 // (with a Rust-specific definition of a component)
882 all_pattern_files
881 all_pattern_files
883 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
882 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
884
883
885 for pattern_file in &all_pattern_files {
884 for pattern_file in &all_pattern_files {
886 let (patterns, warnings) = get_patterns_from_file(
885 let (patterns, warnings) = get_patterns_from_file(
887 pattern_file,
886 pattern_file,
888 root_dir,
887 root_dir,
889 inspect_pattern_bytes,
888 inspect_pattern_bytes,
890 )?;
889 )?;
891
890
892 all_patterns.extend(patterns.to_owned());
891 all_patterns.extend(patterns.to_owned());
893 all_warnings.extend(warnings);
892 all_warnings.extend(warnings);
894 }
893 }
895 let matcher = IncludeMatcher::new(all_patterns)?;
894 let matcher = IncludeMatcher::new(all_patterns)?;
896 Ok((matcher, all_warnings))
895 Ok((matcher, all_warnings))
897 }
896 }
898
897
899 /// Parses all "ignore" files with their recursive includes and returns a
898 /// Parses all "ignore" files with their recursive includes and returns a
900 /// function that checks whether a given file (in the general sense) should be
899 /// function that checks whether a given file (in the general sense) should be
901 /// ignored.
900 /// ignored.
902 pub fn get_ignore_function<'a>(
901 pub fn get_ignore_function<'a>(
903 all_pattern_files: Vec<PathBuf>,
902 all_pattern_files: Vec<PathBuf>,
904 root_dir: &Path,
903 root_dir: &Path,
905 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
904 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
906 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
905 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
907 let res =
906 let res =
908 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
907 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
909 res.map(|(matcher, all_warnings)| {
908 res.map(|(matcher, all_warnings)| {
910 let res: IgnoreFnType<'a> =
909 let res: IgnoreFnType<'a> =
911 Box::new(move |path: &HgPath| matcher.matches(path));
910 Box::new(move |path: &HgPath| matcher.matches(path));
912
911
913 (res, all_warnings)
912 (res, all_warnings)
914 })
913 })
915 }
914 }
916
915
917 impl<'a> IncludeMatcher<'a> {
916 impl<'a> IncludeMatcher<'a> {
918 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
917 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
919 let RootsDirsAndParents {
918 let RootsDirsAndParents {
920 roots,
919 roots,
921 dirs,
920 dirs,
922 parents,
921 parents,
923 } = roots_dirs_and_parents(&ignore_patterns)?;
922 } = roots_dirs_and_parents(&ignore_patterns)?;
924 let prefix = ignore_patterns.iter().all(|k| match k.syntax {
923 let prefix = ignore_patterns.iter().all(|k| {
925 PatternSyntax::Path | PatternSyntax::RelPath => true,
924 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
926 _ => false,
927 });
925 });
928 let (patterns, match_fn) = build_match(ignore_patterns)?;
926 let (patterns, match_fn) = build_match(ignore_patterns)?;
929
927
930 Ok(Self {
928 Ok(Self {
931 patterns,
929 patterns,
932 match_fn,
930 match_fn,
933 prefix,
931 prefix,
934 roots,
932 roots,
935 dirs,
933 dirs,
936 parents,
934 parents,
937 })
935 })
938 }
936 }
939
937
940 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
938 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
941 // TODO cache
939 // TODO cache
942 let thing = self
940 let thing = self
943 .dirs
941 .dirs
944 .iter()
942 .iter()
945 .chain(self.roots.iter())
943 .chain(self.roots.iter())
946 .chain(self.parents.iter());
944 .chain(self.parents.iter());
947 DirsChildrenMultiset::new(thing, Some(&self.parents))
945 DirsChildrenMultiset::new(thing, Some(&self.parents))
948 }
946 }
949
947
950 pub fn debug_get_patterns(&self) -> &[u8] {
948 pub fn debug_get_patterns(&self) -> &[u8] {
951 self.patterns.as_ref()
949 self.patterns.as_ref()
952 }
950 }
953 }
951 }
954
952
955 impl<'a> Display for IncludeMatcher<'a> {
953 impl<'a> Display for IncludeMatcher<'a> {
956 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
954 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
957 // XXX What about exact matches?
955 // XXX What about exact matches?
958 // I'm not sure it's worth it to clone the HashSet and keep it
956 // I'm not sure it's worth it to clone the HashSet and keep it
959 // around just in case someone wants to display the matcher, plus
957 // around just in case someone wants to display the matcher, plus
960 // it's going to be unreadable after a few entries, but we need to
958 // it's going to be unreadable after a few entries, but we need to
961 // inform in this display that exact matches are being used and are
959 // inform in this display that exact matches are being used and are
962 // (on purpose) missing from the `includes`.
960 // (on purpose) missing from the `includes`.
963 write!(
961 write!(
964 f,
962 f,
965 "IncludeMatcher(includes='{}')",
963 "IncludeMatcher(includes='{}')",
966 String::from_utf8_lossy(&self.patterns.escaped_bytes())
964 String::from_utf8_lossy(&self.patterns.escaped_bytes())
967 )
965 )
968 }
966 }
969 }
967 }
970
968
971 #[cfg(test)]
969 #[cfg(test)]
972 mod tests {
970 mod tests {
973 use super::*;
971 use super::*;
974 use pretty_assertions::assert_eq;
972 use pretty_assertions::assert_eq;
975 use std::path::Path;
973 use std::path::Path;
976
974
977 #[test]
975 #[test]
978 fn test_roots_and_dirs() {
976 fn test_roots_and_dirs() {
979 let pats = vec![
977 let pats = vec![
980 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
978 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
981 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
979 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
982 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
980 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
983 ];
981 ];
984 let (roots, dirs) = roots_and_dirs(&pats);
982 let (roots, dirs) = roots_and_dirs(&pats);
985
983
986 assert_eq!(
984 assert_eq!(
987 roots,
985 roots,
988 vec!(
986 vec!(
989 HgPathBuf::from_bytes(b"g/h"),
987 HgPathBuf::from_bytes(b"g/h"),
990 HgPathBuf::from_bytes(b"g/h"),
988 HgPathBuf::from_bytes(b"g/h"),
991 HgPathBuf::new()
989 HgPathBuf::new()
992 ),
990 ),
993 );
991 );
994 assert_eq!(dirs, vec!());
992 assert_eq!(dirs, vec!());
995 }
993 }
996
994
997 #[test]
995 #[test]
998 fn test_roots_dirs_and_parents() {
996 fn test_roots_dirs_and_parents() {
999 let pats = vec![
997 let pats = vec![
1000 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
998 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1001 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
999 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1002 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1000 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1003 ];
1001 ];
1004
1002
1005 let mut roots = HashSet::new();
1003 let mut roots = HashSet::new();
1006 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1004 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1007 roots.insert(HgPathBuf::new());
1005 roots.insert(HgPathBuf::new());
1008
1006
1009 let dirs = HashSet::new();
1007 let dirs = HashSet::new();
1010
1008
1011 let mut parents = HashSet::new();
1009 let mut parents = HashSet::new();
1012 parents.insert(HgPathBuf::new());
1010 parents.insert(HgPathBuf::new());
1013 parents.insert(HgPathBuf::from_bytes(b"g"));
1011 parents.insert(HgPathBuf::from_bytes(b"g"));
1014
1012
1015 assert_eq!(
1013 assert_eq!(
1016 roots_dirs_and_parents(&pats).unwrap(),
1014 roots_dirs_and_parents(&pats).unwrap(),
1017 RootsDirsAndParents {
1015 RootsDirsAndParents {
1018 roots,
1016 roots,
1019 dirs,
1017 dirs,
1020 parents
1018 parents
1021 }
1019 }
1022 );
1020 );
1023 }
1021 }
1024
1022
1025 #[test]
1023 #[test]
1026 fn test_filematcher_visit_children_set() {
1024 fn test_filematcher_visit_children_set() {
1027 // Visitchildrenset
1025 // Visitchildrenset
1028 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1026 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1029 let matcher = FileMatcher::new(files).unwrap();
1027 let matcher = FileMatcher::new(files).unwrap();
1030
1028
1031 let mut set = HashSet::new();
1029 let mut set = HashSet::new();
1032 set.insert(HgPathBuf::from_bytes(b"dir"));
1030 set.insert(HgPathBuf::from_bytes(b"dir"));
1033 assert_eq!(
1031 assert_eq!(
1034 matcher.visit_children_set(HgPath::new(b"")),
1032 matcher.visit_children_set(HgPath::new(b"")),
1035 VisitChildrenSet::Set(set)
1033 VisitChildrenSet::Set(set)
1036 );
1034 );
1037
1035
1038 let mut set = HashSet::new();
1036 let mut set = HashSet::new();
1039 set.insert(HgPathBuf::from_bytes(b"subdir"));
1037 set.insert(HgPathBuf::from_bytes(b"subdir"));
1040 assert_eq!(
1038 assert_eq!(
1041 matcher.visit_children_set(HgPath::new(b"dir")),
1039 matcher.visit_children_set(HgPath::new(b"dir")),
1042 VisitChildrenSet::Set(set)
1040 VisitChildrenSet::Set(set)
1043 );
1041 );
1044
1042
1045 let mut set = HashSet::new();
1043 let mut set = HashSet::new();
1046 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1044 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1047 assert_eq!(
1045 assert_eq!(
1048 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1046 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1049 VisitChildrenSet::Set(set)
1047 VisitChildrenSet::Set(set)
1050 );
1048 );
1051
1049
1052 assert_eq!(
1050 assert_eq!(
1053 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1051 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1054 VisitChildrenSet::Empty
1052 VisitChildrenSet::Empty
1055 );
1053 );
1056 assert_eq!(
1054 assert_eq!(
1057 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1055 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1058 VisitChildrenSet::Empty
1056 VisitChildrenSet::Empty
1059 );
1057 );
1060 assert_eq!(
1058 assert_eq!(
1061 matcher.visit_children_set(HgPath::new(b"folder")),
1059 matcher.visit_children_set(HgPath::new(b"folder")),
1062 VisitChildrenSet::Empty
1060 VisitChildrenSet::Empty
1063 );
1061 );
1064 }
1062 }
1065
1063
1066 #[test]
1064 #[test]
1067 fn test_filematcher_visit_children_set_files_and_dirs() {
1065 fn test_filematcher_visit_children_set_files_and_dirs() {
1068 let files = vec![
1066 let files = vec![
1069 HgPathBuf::from_bytes(b"rootfile.txt"),
1067 HgPathBuf::from_bytes(b"rootfile.txt"),
1070 HgPathBuf::from_bytes(b"a/file1.txt"),
1068 HgPathBuf::from_bytes(b"a/file1.txt"),
1071 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1069 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1072 // No file in a/b/c
1070 // No file in a/b/c
1073 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1071 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1074 ];
1072 ];
1075 let matcher = FileMatcher::new(files).unwrap();
1073 let matcher = FileMatcher::new(files).unwrap();
1076
1074
1077 let mut set = HashSet::new();
1075 let mut set = HashSet::new();
1078 set.insert(HgPathBuf::from_bytes(b"a"));
1076 set.insert(HgPathBuf::from_bytes(b"a"));
1079 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1077 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1080 assert_eq!(
1078 assert_eq!(
1081 matcher.visit_children_set(HgPath::new(b"")),
1079 matcher.visit_children_set(HgPath::new(b"")),
1082 VisitChildrenSet::Set(set)
1080 VisitChildrenSet::Set(set)
1083 );
1081 );
1084
1082
1085 let mut set = HashSet::new();
1083 let mut set = HashSet::new();
1086 set.insert(HgPathBuf::from_bytes(b"b"));
1084 set.insert(HgPathBuf::from_bytes(b"b"));
1087 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1085 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1088 assert_eq!(
1086 assert_eq!(
1089 matcher.visit_children_set(HgPath::new(b"a")),
1087 matcher.visit_children_set(HgPath::new(b"a")),
1090 VisitChildrenSet::Set(set)
1088 VisitChildrenSet::Set(set)
1091 );
1089 );
1092
1090
1093 let mut set = HashSet::new();
1091 let mut set = HashSet::new();
1094 set.insert(HgPathBuf::from_bytes(b"c"));
1092 set.insert(HgPathBuf::from_bytes(b"c"));
1095 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1093 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1096 assert_eq!(
1094 assert_eq!(
1097 matcher.visit_children_set(HgPath::new(b"a/b")),
1095 matcher.visit_children_set(HgPath::new(b"a/b")),
1098 VisitChildrenSet::Set(set)
1096 VisitChildrenSet::Set(set)
1099 );
1097 );
1100
1098
1101 let mut set = HashSet::new();
1099 let mut set = HashSet::new();
1102 set.insert(HgPathBuf::from_bytes(b"d"));
1100 set.insert(HgPathBuf::from_bytes(b"d"));
1103 assert_eq!(
1101 assert_eq!(
1104 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1102 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1105 VisitChildrenSet::Set(set)
1103 VisitChildrenSet::Set(set)
1106 );
1104 );
1107 let mut set = HashSet::new();
1105 let mut set = HashSet::new();
1108 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1106 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1109 assert_eq!(
1107 assert_eq!(
1110 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1108 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1111 VisitChildrenSet::Set(set)
1109 VisitChildrenSet::Set(set)
1112 );
1110 );
1113
1111
1114 assert_eq!(
1112 assert_eq!(
1115 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1113 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1116 VisitChildrenSet::Empty
1114 VisitChildrenSet::Empty
1117 );
1115 );
1118 assert_eq!(
1116 assert_eq!(
1119 matcher.visit_children_set(HgPath::new(b"folder")),
1117 matcher.visit_children_set(HgPath::new(b"folder")),
1120 VisitChildrenSet::Empty
1118 VisitChildrenSet::Empty
1121 );
1119 );
1122 }
1120 }
1123
1121
1124 #[test]
1122 #[test]
1125 fn test_includematcher() {
1123 fn test_includematcher() {
1126 // VisitchildrensetPrefix
1124 // VisitchildrensetPrefix
1127 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1125 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1128 PatternSyntax::RelPath,
1126 PatternSyntax::RelPath,
1129 b"dir/subdir",
1127 b"dir/subdir",
1130 Path::new(""),
1128 Path::new(""),
1131 )])
1129 )])
1132 .unwrap();
1130 .unwrap();
1133
1131
1134 let mut set = HashSet::new();
1132 let mut set = HashSet::new();
1135 set.insert(HgPathBuf::from_bytes(b"dir"));
1133 set.insert(HgPathBuf::from_bytes(b"dir"));
1136 assert_eq!(
1134 assert_eq!(
1137 matcher.visit_children_set(HgPath::new(b"")),
1135 matcher.visit_children_set(HgPath::new(b"")),
1138 VisitChildrenSet::Set(set)
1136 VisitChildrenSet::Set(set)
1139 );
1137 );
1140
1138
1141 let mut set = HashSet::new();
1139 let mut set = HashSet::new();
1142 set.insert(HgPathBuf::from_bytes(b"subdir"));
1140 set.insert(HgPathBuf::from_bytes(b"subdir"));
1143 assert_eq!(
1141 assert_eq!(
1144 matcher.visit_children_set(HgPath::new(b"dir")),
1142 matcher.visit_children_set(HgPath::new(b"dir")),
1145 VisitChildrenSet::Set(set)
1143 VisitChildrenSet::Set(set)
1146 );
1144 );
1147 assert_eq!(
1145 assert_eq!(
1148 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1146 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1149 VisitChildrenSet::Recursive
1147 VisitChildrenSet::Recursive
1150 );
1148 );
1151 // OPT: This should probably be 'all' if its parent is?
1149 // OPT: This should probably be 'all' if its parent is?
1152 assert_eq!(
1150 assert_eq!(
1153 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1151 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1154 VisitChildrenSet::This
1152 VisitChildrenSet::This
1155 );
1153 );
1156 assert_eq!(
1154 assert_eq!(
1157 matcher.visit_children_set(HgPath::new(b"folder")),
1155 matcher.visit_children_set(HgPath::new(b"folder")),
1158 VisitChildrenSet::Empty
1156 VisitChildrenSet::Empty
1159 );
1157 );
1160
1158
1161 // VisitchildrensetRootfilesin
1159 // VisitchildrensetRootfilesin
1162 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1160 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1163 PatternSyntax::RootFiles,
1161 PatternSyntax::RootFiles,
1164 b"dir/subdir",
1162 b"dir/subdir",
1165 Path::new(""),
1163 Path::new(""),
1166 )])
1164 )])
1167 .unwrap();
1165 .unwrap();
1168
1166
1169 let mut set = HashSet::new();
1167 let mut set = HashSet::new();
1170 set.insert(HgPathBuf::from_bytes(b"dir"));
1168 set.insert(HgPathBuf::from_bytes(b"dir"));
1171 assert_eq!(
1169 assert_eq!(
1172 matcher.visit_children_set(HgPath::new(b"")),
1170 matcher.visit_children_set(HgPath::new(b"")),
1173 VisitChildrenSet::Set(set)
1171 VisitChildrenSet::Set(set)
1174 );
1172 );
1175
1173
1176 let mut set = HashSet::new();
1174 let mut set = HashSet::new();
1177 set.insert(HgPathBuf::from_bytes(b"subdir"));
1175 set.insert(HgPathBuf::from_bytes(b"subdir"));
1178 assert_eq!(
1176 assert_eq!(
1179 matcher.visit_children_set(HgPath::new(b"dir")),
1177 matcher.visit_children_set(HgPath::new(b"dir")),
1180 VisitChildrenSet::Set(set)
1178 VisitChildrenSet::Set(set)
1181 );
1179 );
1182
1180
1183 assert_eq!(
1181 assert_eq!(
1184 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1182 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1185 VisitChildrenSet::This
1183 VisitChildrenSet::This
1186 );
1184 );
1187 assert_eq!(
1185 assert_eq!(
1188 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1186 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1189 VisitChildrenSet::Empty
1187 VisitChildrenSet::Empty
1190 );
1188 );
1191 assert_eq!(
1189 assert_eq!(
1192 matcher.visit_children_set(HgPath::new(b"folder")),
1190 matcher.visit_children_set(HgPath::new(b"folder")),
1193 VisitChildrenSet::Empty
1191 VisitChildrenSet::Empty
1194 );
1192 );
1195
1193
1196 // VisitchildrensetGlob
1194 // VisitchildrensetGlob
1197 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1195 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1198 PatternSyntax::Glob,
1196 PatternSyntax::Glob,
1199 b"dir/z*",
1197 b"dir/z*",
1200 Path::new(""),
1198 Path::new(""),
1201 )])
1199 )])
1202 .unwrap();
1200 .unwrap();
1203
1201
1204 let mut set = HashSet::new();
1202 let mut set = HashSet::new();
1205 set.insert(HgPathBuf::from_bytes(b"dir"));
1203 set.insert(HgPathBuf::from_bytes(b"dir"));
1206 assert_eq!(
1204 assert_eq!(
1207 matcher.visit_children_set(HgPath::new(b"")),
1205 matcher.visit_children_set(HgPath::new(b"")),
1208 VisitChildrenSet::Set(set)
1206 VisitChildrenSet::Set(set)
1209 );
1207 );
1210 assert_eq!(
1208 assert_eq!(
1211 matcher.visit_children_set(HgPath::new(b"folder")),
1209 matcher.visit_children_set(HgPath::new(b"folder")),
1212 VisitChildrenSet::Empty
1210 VisitChildrenSet::Empty
1213 );
1211 );
1214 assert_eq!(
1212 assert_eq!(
1215 matcher.visit_children_set(HgPath::new(b"dir")),
1213 matcher.visit_children_set(HgPath::new(b"dir")),
1216 VisitChildrenSet::This
1214 VisitChildrenSet::This
1217 );
1215 );
1218 // OPT: these should probably be set().
1216 // OPT: these should probably be set().
1219 assert_eq!(
1217 assert_eq!(
1220 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1218 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1221 VisitChildrenSet::This
1219 VisitChildrenSet::This
1222 );
1220 );
1223 assert_eq!(
1221 assert_eq!(
1224 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1222 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1225 VisitChildrenSet::This
1223 VisitChildrenSet::This
1226 );
1224 );
1227
1225
1228 // Test multiple patterns
1226 // Test multiple patterns
1229 let matcher = IncludeMatcher::new(vec![
1227 let matcher = IncludeMatcher::new(vec![
1230 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1228 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1231 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1229 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1232 ])
1230 ])
1233 .unwrap();
1231 .unwrap();
1234
1232
1235 assert_eq!(
1233 assert_eq!(
1236 matcher.visit_children_set(HgPath::new(b"")),
1234 matcher.visit_children_set(HgPath::new(b"")),
1237 VisitChildrenSet::This
1235 VisitChildrenSet::This
1238 );
1236 );
1239
1237
1240 // Test multiple patterns
1238 // Test multiple patterns
1241 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1239 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1242 PatternSyntax::Glob,
1240 PatternSyntax::Glob,
1243 b"**/*.exe",
1241 b"**/*.exe",
1244 Path::new(""),
1242 Path::new(""),
1245 )])
1243 )])
1246 .unwrap();
1244 .unwrap();
1247
1245
1248 assert_eq!(
1246 assert_eq!(
1249 matcher.visit_children_set(HgPath::new(b"")),
1247 matcher.visit_children_set(HgPath::new(b"")),
1250 VisitChildrenSet::This
1248 VisitChildrenSet::This
1251 );
1249 );
1252 }
1250 }
1253
1251
1254 #[test]
1252 #[test]
1255 fn test_unionmatcher() {
1253 fn test_unionmatcher() {
1256 // Path + Rootfiles
1254 // Path + Rootfiles
1257 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1255 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1258 PatternSyntax::RelPath,
1256 PatternSyntax::RelPath,
1259 b"dir/subdir",
1257 b"dir/subdir",
1260 Path::new(""),
1258 Path::new(""),
1261 )])
1259 )])
1262 .unwrap();
1260 .unwrap();
1263 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1261 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1264 PatternSyntax::RootFiles,
1262 PatternSyntax::RootFiles,
1265 b"dir",
1263 b"dir",
1266 Path::new(""),
1264 Path::new(""),
1267 )])
1265 )])
1268 .unwrap();
1266 .unwrap();
1269 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1267 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1270
1268
1271 let mut set = HashSet::new();
1269 let mut set = HashSet::new();
1272 set.insert(HgPathBuf::from_bytes(b"dir"));
1270 set.insert(HgPathBuf::from_bytes(b"dir"));
1273 assert_eq!(
1271 assert_eq!(
1274 matcher.visit_children_set(HgPath::new(b"")),
1272 matcher.visit_children_set(HgPath::new(b"")),
1275 VisitChildrenSet::Set(set)
1273 VisitChildrenSet::Set(set)
1276 );
1274 );
1277 assert_eq!(
1275 assert_eq!(
1278 matcher.visit_children_set(HgPath::new(b"dir")),
1276 matcher.visit_children_set(HgPath::new(b"dir")),
1279 VisitChildrenSet::This
1277 VisitChildrenSet::This
1280 );
1278 );
1281 assert_eq!(
1279 assert_eq!(
1282 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1280 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1283 VisitChildrenSet::Recursive
1281 VisitChildrenSet::Recursive
1284 );
1282 );
1285 assert_eq!(
1283 assert_eq!(
1286 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1284 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1287 VisitChildrenSet::Empty
1285 VisitChildrenSet::Empty
1288 );
1286 );
1289 assert_eq!(
1287 assert_eq!(
1290 matcher.visit_children_set(HgPath::new(b"folder")),
1288 matcher.visit_children_set(HgPath::new(b"folder")),
1291 VisitChildrenSet::Empty
1289 VisitChildrenSet::Empty
1292 );
1290 );
1293 assert_eq!(
1291 assert_eq!(
1294 matcher.visit_children_set(HgPath::new(b"folder")),
1292 matcher.visit_children_set(HgPath::new(b"folder")),
1295 VisitChildrenSet::Empty
1293 VisitChildrenSet::Empty
1296 );
1294 );
1297
1295
1298 // OPT: These next two could be 'all' instead of 'this'.
1296 // OPT: These next two could be 'all' instead of 'this'.
1299 assert_eq!(
1297 assert_eq!(
1300 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1298 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1301 VisitChildrenSet::This
1299 VisitChildrenSet::This
1302 );
1300 );
1303 assert_eq!(
1301 assert_eq!(
1304 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1302 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1305 VisitChildrenSet::This
1303 VisitChildrenSet::This
1306 );
1304 );
1307
1305
1308 // Path + unrelated Path
1306 // Path + unrelated Path
1309 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1307 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1310 PatternSyntax::RelPath,
1308 PatternSyntax::RelPath,
1311 b"dir/subdir",
1309 b"dir/subdir",
1312 Path::new(""),
1310 Path::new(""),
1313 )])
1311 )])
1314 .unwrap();
1312 .unwrap();
1315 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1313 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1316 PatternSyntax::RelPath,
1314 PatternSyntax::RelPath,
1317 b"folder",
1315 b"folder",
1318 Path::new(""),
1316 Path::new(""),
1319 )])
1317 )])
1320 .unwrap();
1318 .unwrap();
1321 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1319 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1322
1320
1323 let mut set = HashSet::new();
1321 let mut set = HashSet::new();
1324 set.insert(HgPathBuf::from_bytes(b"folder"));
1322 set.insert(HgPathBuf::from_bytes(b"folder"));
1325 set.insert(HgPathBuf::from_bytes(b"dir"));
1323 set.insert(HgPathBuf::from_bytes(b"dir"));
1326 assert_eq!(
1324 assert_eq!(
1327 matcher.visit_children_set(HgPath::new(b"")),
1325 matcher.visit_children_set(HgPath::new(b"")),
1328 VisitChildrenSet::Set(set)
1326 VisitChildrenSet::Set(set)
1329 );
1327 );
1330 let mut set = HashSet::new();
1328 let mut set = HashSet::new();
1331 set.insert(HgPathBuf::from_bytes(b"subdir"));
1329 set.insert(HgPathBuf::from_bytes(b"subdir"));
1332 assert_eq!(
1330 assert_eq!(
1333 matcher.visit_children_set(HgPath::new(b"dir")),
1331 matcher.visit_children_set(HgPath::new(b"dir")),
1334 VisitChildrenSet::Set(set)
1332 VisitChildrenSet::Set(set)
1335 );
1333 );
1336
1334
1337 assert_eq!(
1335 assert_eq!(
1338 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1336 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1339 VisitChildrenSet::Recursive
1337 VisitChildrenSet::Recursive
1340 );
1338 );
1341 assert_eq!(
1339 assert_eq!(
1342 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1340 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1343 VisitChildrenSet::Empty
1341 VisitChildrenSet::Empty
1344 );
1342 );
1345
1343
1346 assert_eq!(
1344 assert_eq!(
1347 matcher.visit_children_set(HgPath::new(b"folder")),
1345 matcher.visit_children_set(HgPath::new(b"folder")),
1348 VisitChildrenSet::Recursive
1346 VisitChildrenSet::Recursive
1349 );
1347 );
1350 // OPT: These next two could be 'all' instead of 'this'.
1348 // OPT: These next two could be 'all' instead of 'this'.
1351 assert_eq!(
1349 assert_eq!(
1352 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1350 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1353 VisitChildrenSet::This
1351 VisitChildrenSet::This
1354 );
1352 );
1355 assert_eq!(
1353 assert_eq!(
1356 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1354 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1357 VisitChildrenSet::This
1355 VisitChildrenSet::This
1358 );
1356 );
1359
1357
1360 // Path + subpath
1358 // Path + subpath
1361 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1359 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1362 PatternSyntax::RelPath,
1360 PatternSyntax::RelPath,
1363 b"dir/subdir/x",
1361 b"dir/subdir/x",
1364 Path::new(""),
1362 Path::new(""),
1365 )])
1363 )])
1366 .unwrap();
1364 .unwrap();
1367 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1365 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1368 PatternSyntax::RelPath,
1366 PatternSyntax::RelPath,
1369 b"dir/subdir",
1367 b"dir/subdir",
1370 Path::new(""),
1368 Path::new(""),
1371 )])
1369 )])
1372 .unwrap();
1370 .unwrap();
1373 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1371 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1374
1372
1375 let mut set = HashSet::new();
1373 let mut set = HashSet::new();
1376 set.insert(HgPathBuf::from_bytes(b"dir"));
1374 set.insert(HgPathBuf::from_bytes(b"dir"));
1377 assert_eq!(
1375 assert_eq!(
1378 matcher.visit_children_set(HgPath::new(b"")),
1376 matcher.visit_children_set(HgPath::new(b"")),
1379 VisitChildrenSet::Set(set)
1377 VisitChildrenSet::Set(set)
1380 );
1378 );
1381 let mut set = HashSet::new();
1379 let mut set = HashSet::new();
1382 set.insert(HgPathBuf::from_bytes(b"subdir"));
1380 set.insert(HgPathBuf::from_bytes(b"subdir"));
1383 assert_eq!(
1381 assert_eq!(
1384 matcher.visit_children_set(HgPath::new(b"dir")),
1382 matcher.visit_children_set(HgPath::new(b"dir")),
1385 VisitChildrenSet::Set(set)
1383 VisitChildrenSet::Set(set)
1386 );
1384 );
1387
1385
1388 assert_eq!(
1386 assert_eq!(
1389 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1387 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1390 VisitChildrenSet::Recursive
1388 VisitChildrenSet::Recursive
1391 );
1389 );
1392 assert_eq!(
1390 assert_eq!(
1393 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1391 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1394 VisitChildrenSet::Empty
1392 VisitChildrenSet::Empty
1395 );
1393 );
1396
1394
1397 assert_eq!(
1395 assert_eq!(
1398 matcher.visit_children_set(HgPath::new(b"folder")),
1396 matcher.visit_children_set(HgPath::new(b"folder")),
1399 VisitChildrenSet::Empty
1397 VisitChildrenSet::Empty
1400 );
1398 );
1401 assert_eq!(
1399 assert_eq!(
1402 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1400 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1403 VisitChildrenSet::Recursive
1401 VisitChildrenSet::Recursive
1404 );
1402 );
1405 // OPT: this should probably be 'all' not 'this'.
1403 // OPT: this should probably be 'all' not 'this'.
1406 assert_eq!(
1404 assert_eq!(
1407 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1405 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1408 VisitChildrenSet::This
1406 VisitChildrenSet::This
1409 );
1407 );
1410 }
1408 }
1411
1409
1412 #[test]
1410 #[test]
1413 fn test_intersectionmatcher() {
1411 fn test_intersectionmatcher() {
1414 // Include path + Include rootfiles
1412 // Include path + Include rootfiles
1415 let m1 = Box::new(
1413 let m1 = Box::new(
1416 IncludeMatcher::new(vec![IgnorePattern::new(
1414 IncludeMatcher::new(vec![IgnorePattern::new(
1417 PatternSyntax::RelPath,
1415 PatternSyntax::RelPath,
1418 b"dir/subdir",
1416 b"dir/subdir",
1419 Path::new(""),
1417 Path::new(""),
1420 )])
1418 )])
1421 .unwrap(),
1419 .unwrap(),
1422 );
1420 );
1423 let m2 = Box::new(
1421 let m2 = Box::new(
1424 IncludeMatcher::new(vec![IgnorePattern::new(
1422 IncludeMatcher::new(vec![IgnorePattern::new(
1425 PatternSyntax::RootFiles,
1423 PatternSyntax::RootFiles,
1426 b"dir",
1424 b"dir",
1427 Path::new(""),
1425 Path::new(""),
1428 )])
1426 )])
1429 .unwrap(),
1427 .unwrap(),
1430 );
1428 );
1431 let matcher = IntersectionMatcher::new(m1, m2);
1429 let matcher = IntersectionMatcher::new(m1, m2);
1432
1430
1433 let mut set = HashSet::new();
1431 let mut set = HashSet::new();
1434 set.insert(HgPathBuf::from_bytes(b"dir"));
1432 set.insert(HgPathBuf::from_bytes(b"dir"));
1435 assert_eq!(
1433 assert_eq!(
1436 matcher.visit_children_set(HgPath::new(b"")),
1434 matcher.visit_children_set(HgPath::new(b"")),
1437 VisitChildrenSet::Set(set)
1435 VisitChildrenSet::Set(set)
1438 );
1436 );
1439 assert_eq!(
1437 assert_eq!(
1440 matcher.visit_children_set(HgPath::new(b"dir")),
1438 matcher.visit_children_set(HgPath::new(b"dir")),
1441 VisitChildrenSet::This
1439 VisitChildrenSet::This
1442 );
1440 );
1443 assert_eq!(
1441 assert_eq!(
1444 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1442 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1445 VisitChildrenSet::Empty
1443 VisitChildrenSet::Empty
1446 );
1444 );
1447 assert_eq!(
1445 assert_eq!(
1448 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1446 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1449 VisitChildrenSet::Empty
1447 VisitChildrenSet::Empty
1450 );
1448 );
1451 assert_eq!(
1449 assert_eq!(
1452 matcher.visit_children_set(HgPath::new(b"folder")),
1450 matcher.visit_children_set(HgPath::new(b"folder")),
1453 VisitChildrenSet::Empty
1451 VisitChildrenSet::Empty
1454 );
1452 );
1455 assert_eq!(
1453 assert_eq!(
1456 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1454 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1457 VisitChildrenSet::Empty
1455 VisitChildrenSet::Empty
1458 );
1456 );
1459 assert_eq!(
1457 assert_eq!(
1460 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1458 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1461 VisitChildrenSet::Empty
1459 VisitChildrenSet::Empty
1462 );
1460 );
1463
1461
1464 // Non intersecting paths
1462 // Non intersecting paths
1465 let m1 = Box::new(
1463 let m1 = Box::new(
1466 IncludeMatcher::new(vec![IgnorePattern::new(
1464 IncludeMatcher::new(vec![IgnorePattern::new(
1467 PatternSyntax::RelPath,
1465 PatternSyntax::RelPath,
1468 b"dir/subdir",
1466 b"dir/subdir",
1469 Path::new(""),
1467 Path::new(""),
1470 )])
1468 )])
1471 .unwrap(),
1469 .unwrap(),
1472 );
1470 );
1473 let m2 = Box::new(
1471 let m2 = Box::new(
1474 IncludeMatcher::new(vec![IgnorePattern::new(
1472 IncludeMatcher::new(vec![IgnorePattern::new(
1475 PatternSyntax::RelPath,
1473 PatternSyntax::RelPath,
1476 b"folder",
1474 b"folder",
1477 Path::new(""),
1475 Path::new(""),
1478 )])
1476 )])
1479 .unwrap(),
1477 .unwrap(),
1480 );
1478 );
1481 let matcher = IntersectionMatcher::new(m1, m2);
1479 let matcher = IntersectionMatcher::new(m1, m2);
1482
1480
1483 assert_eq!(
1481 assert_eq!(
1484 matcher.visit_children_set(HgPath::new(b"")),
1482 matcher.visit_children_set(HgPath::new(b"")),
1485 VisitChildrenSet::Empty
1483 VisitChildrenSet::Empty
1486 );
1484 );
1487 assert_eq!(
1485 assert_eq!(
1488 matcher.visit_children_set(HgPath::new(b"dir")),
1486 matcher.visit_children_set(HgPath::new(b"dir")),
1489 VisitChildrenSet::Empty
1487 VisitChildrenSet::Empty
1490 );
1488 );
1491 assert_eq!(
1489 assert_eq!(
1492 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1490 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1493 VisitChildrenSet::Empty
1491 VisitChildrenSet::Empty
1494 );
1492 );
1495 assert_eq!(
1493 assert_eq!(
1496 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1494 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1497 VisitChildrenSet::Empty
1495 VisitChildrenSet::Empty
1498 );
1496 );
1499 assert_eq!(
1497 assert_eq!(
1500 matcher.visit_children_set(HgPath::new(b"folder")),
1498 matcher.visit_children_set(HgPath::new(b"folder")),
1501 VisitChildrenSet::Empty
1499 VisitChildrenSet::Empty
1502 );
1500 );
1503 assert_eq!(
1501 assert_eq!(
1504 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1502 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1505 VisitChildrenSet::Empty
1503 VisitChildrenSet::Empty
1506 );
1504 );
1507 assert_eq!(
1505 assert_eq!(
1508 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1506 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1509 VisitChildrenSet::Empty
1507 VisitChildrenSet::Empty
1510 );
1508 );
1511
1509
1512 // Nested paths
1510 // Nested paths
1513 let m1 = Box::new(
1511 let m1 = Box::new(
1514 IncludeMatcher::new(vec![IgnorePattern::new(
1512 IncludeMatcher::new(vec![IgnorePattern::new(
1515 PatternSyntax::RelPath,
1513 PatternSyntax::RelPath,
1516 b"dir/subdir/x",
1514 b"dir/subdir/x",
1517 Path::new(""),
1515 Path::new(""),
1518 )])
1516 )])
1519 .unwrap(),
1517 .unwrap(),
1520 );
1518 );
1521 let m2 = Box::new(
1519 let m2 = Box::new(
1522 IncludeMatcher::new(vec![IgnorePattern::new(
1520 IncludeMatcher::new(vec![IgnorePattern::new(
1523 PatternSyntax::RelPath,
1521 PatternSyntax::RelPath,
1524 b"dir/subdir",
1522 b"dir/subdir",
1525 Path::new(""),
1523 Path::new(""),
1526 )])
1524 )])
1527 .unwrap(),
1525 .unwrap(),
1528 );
1526 );
1529 let matcher = IntersectionMatcher::new(m1, m2);
1527 let matcher = IntersectionMatcher::new(m1, m2);
1530
1528
1531 let mut set = HashSet::new();
1529 let mut set = HashSet::new();
1532 set.insert(HgPathBuf::from_bytes(b"dir"));
1530 set.insert(HgPathBuf::from_bytes(b"dir"));
1533 assert_eq!(
1531 assert_eq!(
1534 matcher.visit_children_set(HgPath::new(b"")),
1532 matcher.visit_children_set(HgPath::new(b"")),
1535 VisitChildrenSet::Set(set)
1533 VisitChildrenSet::Set(set)
1536 );
1534 );
1537
1535
1538 let mut set = HashSet::new();
1536 let mut set = HashSet::new();
1539 set.insert(HgPathBuf::from_bytes(b"subdir"));
1537 set.insert(HgPathBuf::from_bytes(b"subdir"));
1540 assert_eq!(
1538 assert_eq!(
1541 matcher.visit_children_set(HgPath::new(b"dir")),
1539 matcher.visit_children_set(HgPath::new(b"dir")),
1542 VisitChildrenSet::Set(set)
1540 VisitChildrenSet::Set(set)
1543 );
1541 );
1544 let mut set = HashSet::new();
1542 let mut set = HashSet::new();
1545 set.insert(HgPathBuf::from_bytes(b"x"));
1543 set.insert(HgPathBuf::from_bytes(b"x"));
1546 assert_eq!(
1544 assert_eq!(
1547 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1545 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1548 VisitChildrenSet::Set(set)
1546 VisitChildrenSet::Set(set)
1549 );
1547 );
1550 assert_eq!(
1548 assert_eq!(
1551 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1549 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1552 VisitChildrenSet::Empty
1550 VisitChildrenSet::Empty
1553 );
1551 );
1554 assert_eq!(
1552 assert_eq!(
1555 matcher.visit_children_set(HgPath::new(b"folder")),
1553 matcher.visit_children_set(HgPath::new(b"folder")),
1556 VisitChildrenSet::Empty
1554 VisitChildrenSet::Empty
1557 );
1555 );
1558 assert_eq!(
1556 assert_eq!(
1559 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1557 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1560 VisitChildrenSet::Empty
1558 VisitChildrenSet::Empty
1561 );
1559 );
1562 // OPT: this should probably be 'all' not 'this'.
1560 // OPT: this should probably be 'all' not 'this'.
1563 assert_eq!(
1561 assert_eq!(
1564 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1562 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1565 VisitChildrenSet::This
1563 VisitChildrenSet::This
1566 );
1564 );
1567
1565
1568 // Diverging paths
1566 // Diverging paths
1569 let m1 = Box::new(
1567 let m1 = Box::new(
1570 IncludeMatcher::new(vec![IgnorePattern::new(
1568 IncludeMatcher::new(vec![IgnorePattern::new(
1571 PatternSyntax::RelPath,
1569 PatternSyntax::RelPath,
1572 b"dir/subdir/x",
1570 b"dir/subdir/x",
1573 Path::new(""),
1571 Path::new(""),
1574 )])
1572 )])
1575 .unwrap(),
1573 .unwrap(),
1576 );
1574 );
1577 let m2 = Box::new(
1575 let m2 = Box::new(
1578 IncludeMatcher::new(vec![IgnorePattern::new(
1576 IncludeMatcher::new(vec![IgnorePattern::new(
1579 PatternSyntax::RelPath,
1577 PatternSyntax::RelPath,
1580 b"dir/subdir/z",
1578 b"dir/subdir/z",
1581 Path::new(""),
1579 Path::new(""),
1582 )])
1580 )])
1583 .unwrap(),
1581 .unwrap(),
1584 );
1582 );
1585 let matcher = IntersectionMatcher::new(m1, m2);
1583 let matcher = IntersectionMatcher::new(m1, m2);
1586
1584
1587 // OPT: these next two could probably be Empty as well.
1585 // OPT: these next two could probably be Empty as well.
1588 let mut set = HashSet::new();
1586 let mut set = HashSet::new();
1589 set.insert(HgPathBuf::from_bytes(b"dir"));
1587 set.insert(HgPathBuf::from_bytes(b"dir"));
1590 assert_eq!(
1588 assert_eq!(
1591 matcher.visit_children_set(HgPath::new(b"")),
1589 matcher.visit_children_set(HgPath::new(b"")),
1592 VisitChildrenSet::Set(set)
1590 VisitChildrenSet::Set(set)
1593 );
1591 );
1594 // OPT: these next two could probably be Empty as well.
1592 // OPT: these next two could probably be Empty as well.
1595 let mut set = HashSet::new();
1593 let mut set = HashSet::new();
1596 set.insert(HgPathBuf::from_bytes(b"subdir"));
1594 set.insert(HgPathBuf::from_bytes(b"subdir"));
1597 assert_eq!(
1595 assert_eq!(
1598 matcher.visit_children_set(HgPath::new(b"dir")),
1596 matcher.visit_children_set(HgPath::new(b"dir")),
1599 VisitChildrenSet::Set(set)
1597 VisitChildrenSet::Set(set)
1600 );
1598 );
1601 assert_eq!(
1599 assert_eq!(
1602 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1600 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1603 VisitChildrenSet::Empty
1601 VisitChildrenSet::Empty
1604 );
1602 );
1605 assert_eq!(
1603 assert_eq!(
1606 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1604 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1607 VisitChildrenSet::Empty
1605 VisitChildrenSet::Empty
1608 );
1606 );
1609 assert_eq!(
1607 assert_eq!(
1610 matcher.visit_children_set(HgPath::new(b"folder")),
1608 matcher.visit_children_set(HgPath::new(b"folder")),
1611 VisitChildrenSet::Empty
1609 VisitChildrenSet::Empty
1612 );
1610 );
1613 assert_eq!(
1611 assert_eq!(
1614 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1612 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1615 VisitChildrenSet::Empty
1613 VisitChildrenSet::Empty
1616 );
1614 );
1617 assert_eq!(
1615 assert_eq!(
1618 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1616 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1619 VisitChildrenSet::Empty
1617 VisitChildrenSet::Empty
1620 );
1618 );
1621 }
1619 }
1622
1620
1623 #[test]
1621 #[test]
1624 fn test_differencematcher() {
1622 fn test_differencematcher() {
1625 // Two alwaysmatchers should function like a nevermatcher
1623 // Two alwaysmatchers should function like a nevermatcher
1626 let m1 = AlwaysMatcher;
1624 let m1 = AlwaysMatcher;
1627 let m2 = AlwaysMatcher;
1625 let m2 = AlwaysMatcher;
1628 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
1626 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
1629
1627
1630 for case in &[
1628 for case in &[
1631 &b""[..],
1629 &b""[..],
1632 b"dir",
1630 b"dir",
1633 b"dir/subdir",
1631 b"dir/subdir",
1634 b"dir/subdir/z",
1632 b"dir/subdir/z",
1635 b"dir/foo",
1633 b"dir/foo",
1636 b"dir/subdir/x",
1634 b"dir/subdir/x",
1637 b"folder",
1635 b"folder",
1638 ] {
1636 ] {
1639 assert_eq!(
1637 assert_eq!(
1640 matcher.visit_children_set(HgPath::new(case)),
1638 matcher.visit_children_set(HgPath::new(case)),
1641 VisitChildrenSet::Empty
1639 VisitChildrenSet::Empty
1642 );
1640 );
1643 }
1641 }
1644
1642
1645 // One always and one never should behave the same as an always
1643 // One always and one never should behave the same as an always
1646 let m1 = AlwaysMatcher;
1644 let m1 = AlwaysMatcher;
1647 let m2 = NeverMatcher;
1645 let m2 = NeverMatcher;
1648 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
1646 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
1649
1647
1650 for case in &[
1648 for case in &[
1651 &b""[..],
1649 &b""[..],
1652 b"dir",
1650 b"dir",
1653 b"dir/subdir",
1651 b"dir/subdir",
1654 b"dir/subdir/z",
1652 b"dir/subdir/z",
1655 b"dir/foo",
1653 b"dir/foo",
1656 b"dir/subdir/x",
1654 b"dir/subdir/x",
1657 b"folder",
1655 b"folder",
1658 ] {
1656 ] {
1659 assert_eq!(
1657 assert_eq!(
1660 matcher.visit_children_set(HgPath::new(case)),
1658 matcher.visit_children_set(HgPath::new(case)),
1661 VisitChildrenSet::Recursive
1659 VisitChildrenSet::Recursive
1662 );
1660 );
1663 }
1661 }
1664
1662
1665 // Two include matchers
1663 // Two include matchers
1666 let m1 = Box::new(
1664 let m1 = Box::new(
1667 IncludeMatcher::new(vec![IgnorePattern::new(
1665 IncludeMatcher::new(vec![IgnorePattern::new(
1668 PatternSyntax::RelPath,
1666 PatternSyntax::RelPath,
1669 b"dir/subdir",
1667 b"dir/subdir",
1670 Path::new("/repo"),
1668 Path::new("/repo"),
1671 )])
1669 )])
1672 .unwrap(),
1670 .unwrap(),
1673 );
1671 );
1674 let m2 = Box::new(
1672 let m2 = Box::new(
1675 IncludeMatcher::new(vec![IgnorePattern::new(
1673 IncludeMatcher::new(vec![IgnorePattern::new(
1676 PatternSyntax::RootFiles,
1674 PatternSyntax::RootFiles,
1677 b"dir",
1675 b"dir",
1678 Path::new("/repo"),
1676 Path::new("/repo"),
1679 )])
1677 )])
1680 .unwrap(),
1678 .unwrap(),
1681 );
1679 );
1682
1680
1683 let matcher = DifferenceMatcher::new(m1, m2);
1681 let matcher = DifferenceMatcher::new(m1, m2);
1684
1682
1685 let mut set = HashSet::new();
1683 let mut set = HashSet::new();
1686 set.insert(HgPathBuf::from_bytes(b"dir"));
1684 set.insert(HgPathBuf::from_bytes(b"dir"));
1687 assert_eq!(
1685 assert_eq!(
1688 matcher.visit_children_set(HgPath::new(b"")),
1686 matcher.visit_children_set(HgPath::new(b"")),
1689 VisitChildrenSet::Set(set)
1687 VisitChildrenSet::Set(set)
1690 );
1688 );
1691
1689
1692 let mut set = HashSet::new();
1690 let mut set = HashSet::new();
1693 set.insert(HgPathBuf::from_bytes(b"subdir"));
1691 set.insert(HgPathBuf::from_bytes(b"subdir"));
1694 assert_eq!(
1692 assert_eq!(
1695 matcher.visit_children_set(HgPath::new(b"dir")),
1693 matcher.visit_children_set(HgPath::new(b"dir")),
1696 VisitChildrenSet::Set(set)
1694 VisitChildrenSet::Set(set)
1697 );
1695 );
1698 assert_eq!(
1696 assert_eq!(
1699 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1697 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1700 VisitChildrenSet::Recursive
1698 VisitChildrenSet::Recursive
1701 );
1699 );
1702 assert_eq!(
1700 assert_eq!(
1703 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1701 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1704 VisitChildrenSet::Empty
1702 VisitChildrenSet::Empty
1705 );
1703 );
1706 assert_eq!(
1704 assert_eq!(
1707 matcher.visit_children_set(HgPath::new(b"folder")),
1705 matcher.visit_children_set(HgPath::new(b"folder")),
1708 VisitChildrenSet::Empty
1706 VisitChildrenSet::Empty
1709 );
1707 );
1710 assert_eq!(
1708 assert_eq!(
1711 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1709 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1712 VisitChildrenSet::This
1710 VisitChildrenSet::This
1713 );
1711 );
1714 assert_eq!(
1712 assert_eq!(
1715 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1713 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1716 VisitChildrenSet::This
1714 VisitChildrenSet::This
1717 );
1715 );
1718 }
1716 }
1719 }
1717 }
@@ -1,111 +1,113 b''
1 use std::path::Path;
1 use std::path::Path;
2
2
3 use crate::{
3 use crate::{
4 errors::HgError,
4 errors::HgError,
5 exit_codes,
5 exit_codes,
6 filepatterns::parse_pattern_file_contents,
6 filepatterns::parse_pattern_file_contents,
7 matchers::{
7 matchers::{
8 AlwaysMatcher, DifferenceMatcher, IncludeMatcher, Matcher,
8 AlwaysMatcher, DifferenceMatcher, IncludeMatcher, Matcher,
9 NeverMatcher,
9 NeverMatcher,
10 },
10 },
11 repo::Repo,
11 repo::Repo,
12 requirements::NARROW_REQUIREMENT,
12 requirements::NARROW_REQUIREMENT,
13 sparse::{self, SparseConfigError, SparseWarning},
13 sparse::{self, SparseConfigError, SparseWarning},
14 };
14 };
15
15
16 /// The file in .hg/store/ that indicates which paths exit in the store
16 /// The file in .hg/store/ that indicates which paths exit in the store
17 const FILENAME: &str = "narrowspec";
17 const FILENAME: &str = "narrowspec";
18 /// The file in .hg/ that indicates which paths exit in the dirstate
18 /// The file in .hg/ that indicates which paths exit in the dirstate
19 const DIRSTATE_FILENAME: &str = "narrowspec.dirstate";
19 const DIRSTATE_FILENAME: &str = "narrowspec.dirstate";
20
20
21 /// Pattern prefixes that are allowed in narrow patterns. This list MUST
21 /// Pattern prefixes that are allowed in narrow patterns. This list MUST
22 /// only contain patterns that are fast and safe to evaluate. Keep in mind
22 /// only contain patterns that are fast and safe to evaluate. Keep in mind
23 /// that patterns are supplied by clients and executed on remote servers
23 /// that patterns are supplied by clients and executed on remote servers
24 /// as part of wire protocol commands. That means that changes to this
24 /// as part of wire protocol commands. That means that changes to this
25 /// data structure influence the wire protocol and should not be taken
25 /// data structure influence the wire protocol and should not be taken
26 /// lightly - especially removals.
26 /// lightly - especially removals.
27 const VALID_PREFIXES: [&str; 2] = ["path:", "rootfilesin:"];
27 const VALID_PREFIXES: [&str; 2] = ["path:", "rootfilesin:"];
28
28
29 /// Return the matcher for the current narrow spec, and all configuration
29 /// Return the matcher for the current narrow spec, and all configuration
30 /// warnings to display.
30 /// warnings to display.
31 pub fn matcher(
31 pub fn matcher(
32 repo: &Repo,
32 repo: &Repo,
33 ) -> Result<(Box<dyn Matcher + Sync>, Vec<SparseWarning>), SparseConfigError> {
33 ) -> Result<(Box<dyn Matcher + Sync>, Vec<SparseWarning>), SparseConfigError> {
34 let mut warnings = vec![];
34 let mut warnings = vec![];
35 if !repo.requirements().contains(NARROW_REQUIREMENT) {
35 if !repo.requirements().contains(NARROW_REQUIREMENT) {
36 return Ok((Box::new(AlwaysMatcher), warnings));
36 return Ok((Box::new(AlwaysMatcher), warnings));
37 }
37 }
38 // Treat "narrowspec does not exist" the same as "narrowspec file exists
38 // Treat "narrowspec does not exist" the same as "narrowspec file exists
39 // and is empty".
39 // and is empty".
40 let store_spec = repo.store_vfs().try_read(FILENAME)?.unwrap_or(vec![]);
40 let store_spec = repo.store_vfs().try_read(FILENAME)?.unwrap_or_default();
41 let working_copy_spec =
41 let working_copy_spec = repo
42 repo.hg_vfs().try_read(DIRSTATE_FILENAME)?.unwrap_or(vec![]);
42 .hg_vfs()
43 .try_read(DIRSTATE_FILENAME)?
44 .unwrap_or_default();
43 if store_spec != working_copy_spec {
45 if store_spec != working_copy_spec {
44 return Err(HgError::abort(
46 return Err(HgError::abort(
45 "working copy's narrowspec is stale",
47 "working copy's narrowspec is stale",
46 exit_codes::STATE_ERROR,
48 exit_codes::STATE_ERROR,
47 Some("run 'hg tracked --update-working-copy'".into()),
49 Some("run 'hg tracked --update-working-copy'".into()),
48 )
50 )
49 .into());
51 .into());
50 }
52 }
51
53
52 let config = sparse::parse_config(
54 let config = sparse::parse_config(
53 &store_spec,
55 &store_spec,
54 sparse::SparseConfigContext::Narrow,
56 sparse::SparseConfigContext::Narrow,
55 )?;
57 )?;
56
58
57 warnings.extend(config.warnings);
59 warnings.extend(config.warnings);
58
60
59 if !config.profiles.is_empty() {
61 if !config.profiles.is_empty() {
60 // TODO (from Python impl) maybe do something with profiles?
62 // TODO (from Python impl) maybe do something with profiles?
61 return Err(SparseConfigError::IncludesInNarrow);
63 return Err(SparseConfigError::IncludesInNarrow);
62 }
64 }
63 validate_patterns(&config.includes)?;
65 validate_patterns(&config.includes)?;
64 validate_patterns(&config.excludes)?;
66 validate_patterns(&config.excludes)?;
65
67
66 if config.includes.is_empty() {
68 if config.includes.is_empty() {
67 return Ok((Box::new(NeverMatcher), warnings));
69 return Ok((Box::new(NeverMatcher), warnings));
68 }
70 }
69
71
70 let (patterns, subwarnings) = parse_pattern_file_contents(
72 let (patterns, subwarnings) = parse_pattern_file_contents(
71 &config.includes,
73 &config.includes,
72 Path::new(""),
74 Path::new(""),
73 None,
75 None,
74 false,
76 false,
75 )?;
77 )?;
76 warnings.extend(subwarnings.into_iter().map(From::from));
78 warnings.extend(subwarnings.into_iter().map(From::from));
77
79
78 let mut m: Box<dyn Matcher + Sync> =
80 let mut m: Box<dyn Matcher + Sync> =
79 Box::new(IncludeMatcher::new(patterns)?);
81 Box::new(IncludeMatcher::new(patterns)?);
80
82
81 let (patterns, subwarnings) = parse_pattern_file_contents(
83 let (patterns, subwarnings) = parse_pattern_file_contents(
82 &config.excludes,
84 &config.excludes,
83 Path::new(""),
85 Path::new(""),
84 None,
86 None,
85 false,
87 false,
86 )?;
88 )?;
87 if !patterns.is_empty() {
89 if !patterns.is_empty() {
88 warnings.extend(subwarnings.into_iter().map(From::from));
90 warnings.extend(subwarnings.into_iter().map(From::from));
89 let exclude_matcher = Box::new(IncludeMatcher::new(patterns)?);
91 let exclude_matcher = Box::new(IncludeMatcher::new(patterns)?);
90 m = Box::new(DifferenceMatcher::new(m, exclude_matcher));
92 m = Box::new(DifferenceMatcher::new(m, exclude_matcher));
91 }
93 }
92
94
93 Ok((m, warnings))
95 Ok((m, warnings))
94 }
96 }
95
97
96 fn validate_patterns(patterns: &[u8]) -> Result<(), SparseConfigError> {
98 fn validate_patterns(patterns: &[u8]) -> Result<(), SparseConfigError> {
97 for pattern in patterns.split(|c| *c == b'\n') {
99 for pattern in patterns.split(|c| *c == b'\n') {
98 if pattern.is_empty() {
100 if pattern.is_empty() {
99 continue;
101 continue;
100 }
102 }
101 for prefix in VALID_PREFIXES.iter() {
103 for prefix in VALID_PREFIXES.iter() {
102 if pattern.starts_with(prefix.as_bytes()) {
104 if pattern.starts_with(prefix.as_bytes()) {
103 return Ok(());
105 return Ok(());
104 }
106 }
105 }
107 }
106 return Err(SparseConfigError::InvalidNarrowPrefix(
108 return Err(SparseConfigError::InvalidNarrowPrefix(
107 pattern.to_owned(),
109 pattern.to_owned(),
108 ));
110 ));
109 }
111 }
110 Ok(())
112 Ok(())
111 }
113 }
@@ -1,117 +1,115 b''
1 // list_tracked_files.rs
1 // list_tracked_files.rs
2 //
2 //
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::repo::Repo;
8 use crate::repo::Repo;
9 use crate::revlog::revlog::RevlogError;
9 use crate::revlog::revlog::RevlogError;
10 use crate::revlog::Node;
10 use crate::revlog::Node;
11
11
12 use crate::utils::hg_path::HgPath;
12 use crate::utils::hg_path::HgPath;
13
13
14 use crate::errors::HgError;
14 use crate::errors::HgError;
15 use crate::manifest::Manifest;
15 use crate::manifest::Manifest;
16 use crate::manifest::ManifestEntry;
16 use crate::manifest::ManifestEntry;
17 use itertools::put_back;
17 use itertools::put_back;
18 use itertools::PutBack;
18 use itertools::PutBack;
19 use std::cmp::Ordering;
19 use std::cmp::Ordering;
20
20
21 pub struct CatOutput<'a> {
21 pub struct CatOutput<'a> {
22 /// Whether any file in the manifest matched the paths given as CLI
22 /// Whether any file in the manifest matched the paths given as CLI
23 /// arguments
23 /// arguments
24 pub found_any: bool,
24 pub found_any: bool,
25 /// The contents of matching files, in manifest order
25 /// The contents of matching files, in manifest order
26 pub results: Vec<(&'a HgPath, Vec<u8>)>,
26 pub results: Vec<(&'a HgPath, Vec<u8>)>,
27 /// Which of the CLI arguments did not match any manifest file
27 /// Which of the CLI arguments did not match any manifest file
28 pub missing: Vec<&'a HgPath>,
28 pub missing: Vec<&'a HgPath>,
29 /// The node ID that the given revset was resolved to
29 /// The node ID that the given revset was resolved to
30 pub node: Node,
30 pub node: Node,
31 }
31 }
32
32
33 // Find an item in an iterator over a sorted collection.
33 // Find an item in an iterator over a sorted collection.
34 fn find_item<'a>(
34 fn find_item<'a>(
35 i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>,
35 i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>,
36 needle: &HgPath,
36 needle: &HgPath,
37 ) -> Result<Option<Node>, HgError> {
37 ) -> Result<Option<Node>, HgError> {
38 loop {
38 loop {
39 match i.next() {
39 match i.next() {
40 None => return Ok(None),
40 None => return Ok(None),
41 Some(result) => {
41 Some(result) => {
42 let entry = result?;
42 let entry = result?;
43 match needle.as_bytes().cmp(entry.path.as_bytes()) {
43 match needle.as_bytes().cmp(entry.path.as_bytes()) {
44 Ordering::Less => {
44 Ordering::Less => {
45 i.put_back(Ok(entry));
45 i.put_back(Ok(entry));
46 return Ok(None);
46 return Ok(None);
47 }
47 }
48 Ordering::Greater => continue,
48 Ordering::Greater => continue,
49 Ordering::Equal => return Ok(Some(entry.node_id()?)),
49 Ordering::Equal => return Ok(Some(entry.node_id()?)),
50 }
50 }
51 }
51 }
52 }
52 }
53 }
53 }
54 }
54 }
55
55
56 // Tuple of (missing, found) paths in the manifest
56 // Tuple of (missing, found) paths in the manifest
57 type ManifestQueryResponse<'a> = (Vec<(&'a HgPath, Node)>, Vec<&'a HgPath>);
57 type ManifestQueryResponse<'a> = (Vec<(&'a HgPath, Node)>, Vec<&'a HgPath>);
58
58
59 fn find_files_in_manifest<'query>(
59 fn find_files_in_manifest<'query>(
60 manifest: &Manifest,
60 manifest: &Manifest,
61 query: impl Iterator<Item = &'query HgPath>,
61 query: impl Iterator<Item = &'query HgPath>,
62 ) -> Result<ManifestQueryResponse<'query>, HgError> {
62 ) -> Result<ManifestQueryResponse<'query>, HgError> {
63 let mut manifest = put_back(manifest.iter());
63 let mut manifest = put_back(manifest.iter());
64 let mut res = vec![];
64 let mut res = vec![];
65 let mut missing = vec![];
65 let mut missing = vec![];
66
66
67 for file in query {
67 for file in query {
68 match find_item(&mut manifest, file)? {
68 match find_item(&mut manifest, file)? {
69 None => missing.push(file),
69 None => missing.push(file),
70 Some(item) => res.push((file, item)),
70 Some(item) => res.push((file, item)),
71 }
71 }
72 }
72 }
73 return Ok((res, missing));
73 Ok((res, missing))
74 }
74 }
75
75
76 /// Output the given revision of files
76 /// Output the given revision of files
77 ///
77 ///
78 /// * `root`: Repository root
78 /// * `root`: Repository root
79 /// * `rev`: The revision to cat the files from.
79 /// * `rev`: The revision to cat the files from.
80 /// * `files`: The files to output.
80 /// * `files`: The files to output.
81 pub fn cat<'a>(
81 pub fn cat<'a>(
82 repo: &Repo,
82 repo: &Repo,
83 revset: &str,
83 revset: &str,
84 mut files: Vec<&'a HgPath>,
84 mut files: Vec<&'a HgPath>,
85 ) -> Result<CatOutput<'a>, RevlogError> {
85 ) -> Result<CatOutput<'a>, RevlogError> {
86 let rev = crate::revset::resolve_single(revset, repo)?;
86 let rev = crate::revset::resolve_single(revset, repo)?;
87 let manifest = repo.manifest_for_rev(rev)?;
87 let manifest = repo.manifest_for_rev(rev)?;
88 let node = *repo
88 let node = *repo
89 .changelog()?
89 .changelog()?
90 .node_from_rev(rev)
90 .node_from_rev(rev)
91 .expect("should succeed when repo.manifest did");
91 .expect("should succeed when repo.manifest did");
92 let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![];
92 let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![];
93 let mut found_any = false;
93 let mut found_any = false;
94
94
95 files.sort_unstable();
95 files.sort_unstable();
96
96
97 let (found, missing) = find_files_in_manifest(
97 let (found, missing) =
98 &manifest,
98 find_files_in_manifest(&manifest, files.into_iter())?;
99 files.into_iter().map(|f| f.as_ref()),
100 )?;
101
99
102 for (file_path, file_node) in found {
100 for (file_path, file_node) in found {
103 found_any = true;
101 found_any = true;
104 let file_log = repo.filelog(file_path)?;
102 let file_log = repo.filelog(file_path)?;
105 results.push((
103 results.push((
106 file_path,
104 file_path,
107 file_log.data_for_node(file_node)?.into_file_data()?,
105 file_log.data_for_node(file_node)?.into_file_data()?,
108 ));
106 ));
109 }
107 }
110
108
111 Ok(CatOutput {
109 Ok(CatOutput {
112 found_any,
110 found_any,
113 results,
111 results,
114 missing,
112 missing,
115 node,
113 node,
116 })
114 })
117 }
115 }
@@ -1,567 +1,563 b''
1 use crate::changelog::Changelog;
1 use crate::changelog::Changelog;
2 use crate::config::{Config, ConfigError, ConfigParseError};
2 use crate::config::{Config, ConfigError, ConfigParseError};
3 use crate::dirstate::DirstateParents;
3 use crate::dirstate::DirstateParents;
4 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
4 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
5 use crate::dirstate_tree::owning::OwningDirstateMap;
5 use crate::dirstate_tree::owning::OwningDirstateMap;
6 use crate::errors::HgResultExt;
6 use crate::errors::HgResultExt;
7 use crate::errors::{HgError, IoResultExt};
7 use crate::errors::{HgError, IoResultExt};
8 use crate::lock::{try_with_lock_no_wait, LockError};
8 use crate::lock::{try_with_lock_no_wait, LockError};
9 use crate::manifest::{Manifest, Manifestlog};
9 use crate::manifest::{Manifest, Manifestlog};
10 use crate::revlog::filelog::Filelog;
10 use crate::revlog::filelog::Filelog;
11 use crate::revlog::revlog::RevlogError;
11 use crate::revlog::revlog::RevlogError;
12 use crate::utils::files::get_path_from_bytes;
12 use crate::utils::files::get_path_from_bytes;
13 use crate::utils::hg_path::HgPath;
13 use crate::utils::hg_path::HgPath;
14 use crate::utils::SliceExt;
14 use crate::utils::SliceExt;
15 use crate::vfs::{is_dir, is_file, Vfs};
15 use crate::vfs::{is_dir, is_file, Vfs};
16 use crate::{requirements, NodePrefix};
16 use crate::{requirements, NodePrefix};
17 use crate::{DirstateError, Revision};
17 use crate::{DirstateError, Revision};
18 use std::cell::{Ref, RefCell, RefMut};
18 use std::cell::{Ref, RefCell, RefMut};
19 use std::collections::HashSet;
19 use std::collections::HashSet;
20 use std::io::Seek;
20 use std::io::Seek;
21 use std::io::SeekFrom;
21 use std::io::SeekFrom;
22 use std::io::Write as IoWrite;
22 use std::io::Write as IoWrite;
23 use std::path::{Path, PathBuf};
23 use std::path::{Path, PathBuf};
24
24
25 /// A repository on disk
25 /// A repository on disk
26 pub struct Repo {
26 pub struct Repo {
27 working_directory: PathBuf,
27 working_directory: PathBuf,
28 dot_hg: PathBuf,
28 dot_hg: PathBuf,
29 store: PathBuf,
29 store: PathBuf,
30 requirements: HashSet<String>,
30 requirements: HashSet<String>,
31 config: Config,
31 config: Config,
32 dirstate_parents: LazyCell<DirstateParents>,
32 dirstate_parents: LazyCell<DirstateParents>,
33 dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>,
33 dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>,
34 dirstate_map: LazyCell<OwningDirstateMap>,
34 dirstate_map: LazyCell<OwningDirstateMap>,
35 changelog: LazyCell<Changelog>,
35 changelog: LazyCell<Changelog>,
36 manifestlog: LazyCell<Manifestlog>,
36 manifestlog: LazyCell<Manifestlog>,
37 }
37 }
38
38
39 #[derive(Debug, derive_more::From)]
39 #[derive(Debug, derive_more::From)]
40 pub enum RepoError {
40 pub enum RepoError {
41 NotFound {
41 NotFound {
42 at: PathBuf,
42 at: PathBuf,
43 },
43 },
44 #[from]
44 #[from]
45 ConfigParseError(ConfigParseError),
45 ConfigParseError(ConfigParseError),
46 #[from]
46 #[from]
47 Other(HgError),
47 Other(HgError),
48 }
48 }
49
49
50 impl From<ConfigError> for RepoError {
50 impl From<ConfigError> for RepoError {
51 fn from(error: ConfigError) -> Self {
51 fn from(error: ConfigError) -> Self {
52 match error {
52 match error {
53 ConfigError::Parse(error) => error.into(),
53 ConfigError::Parse(error) => error.into(),
54 ConfigError::Other(error) => error.into(),
54 ConfigError::Other(error) => error.into(),
55 }
55 }
56 }
56 }
57 }
57 }
58
58
59 impl Repo {
59 impl Repo {
60 /// tries to find nearest repository root in current working directory or
60 /// tries to find nearest repository root in current working directory or
61 /// its ancestors
61 /// its ancestors
62 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
62 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
63 let current_directory = crate::utils::current_dir()?;
63 let current_directory = crate::utils::current_dir()?;
64 // ancestors() is inclusive: it first yields `current_directory`
64 // ancestors() is inclusive: it first yields `current_directory`
65 // as-is.
65 // as-is.
66 for ancestor in current_directory.ancestors() {
66 for ancestor in current_directory.ancestors() {
67 if is_dir(ancestor.join(".hg"))? {
67 if is_dir(ancestor.join(".hg"))? {
68 return Ok(ancestor.to_path_buf());
68 return Ok(ancestor.to_path_buf());
69 }
69 }
70 }
70 }
71 return Err(RepoError::NotFound {
71 Err(RepoError::NotFound {
72 at: current_directory,
72 at: current_directory,
73 });
73 })
74 }
74 }
75
75
76 /// Find a repository, either at the given path (which must contain a `.hg`
76 /// Find a repository, either at the given path (which must contain a `.hg`
77 /// sub-directory) or by searching the current directory and its
77 /// sub-directory) or by searching the current directory and its
78 /// ancestors.
78 /// ancestors.
79 ///
79 ///
80 /// A method with two very different "modes" like this usually a code smell
80 /// A method with two very different "modes" like this usually a code smell
81 /// to make two methods instead, but in this case an `Option` is what rhg
81 /// to make two methods instead, but in this case an `Option` is what rhg
82 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
82 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
83 /// Having two methods would just move that `if` to almost all callers.
83 /// Having two methods would just move that `if` to almost all callers.
84 pub fn find(
84 pub fn find(
85 config: &Config,
85 config: &Config,
86 explicit_path: Option<PathBuf>,
86 explicit_path: Option<PathBuf>,
87 ) -> Result<Self, RepoError> {
87 ) -> Result<Self, RepoError> {
88 if let Some(root) = explicit_path {
88 if let Some(root) = explicit_path {
89 if is_dir(root.join(".hg"))? {
89 if is_dir(root.join(".hg"))? {
90 Self::new_at_path(root.to_owned(), config)
90 Self::new_at_path(root, config)
91 } else if is_file(&root)? {
91 } else if is_file(&root)? {
92 Err(HgError::unsupported("bundle repository").into())
92 Err(HgError::unsupported("bundle repository").into())
93 } else {
93 } else {
94 Err(RepoError::NotFound {
94 Err(RepoError::NotFound { at: root })
95 at: root.to_owned(),
96 })
97 }
95 }
98 } else {
96 } else {
99 let root = Self::find_repo_root()?;
97 let root = Self::find_repo_root()?;
100 Self::new_at_path(root, config)
98 Self::new_at_path(root, config)
101 }
99 }
102 }
100 }
103
101
104 /// To be called after checking that `.hg` is a sub-directory
102 /// To be called after checking that `.hg` is a sub-directory
105 fn new_at_path(
103 fn new_at_path(
106 working_directory: PathBuf,
104 working_directory: PathBuf,
107 config: &Config,
105 config: &Config,
108 ) -> Result<Self, RepoError> {
106 ) -> Result<Self, RepoError> {
109 let dot_hg = working_directory.join(".hg");
107 let dot_hg = working_directory.join(".hg");
110
108
111 let mut repo_config_files = Vec::new();
109 let mut repo_config_files =
112 repo_config_files.push(dot_hg.join("hgrc"));
110 vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
113 repo_config_files.push(dot_hg.join("hgrc-not-shared"));
114
111
115 let hg_vfs = Vfs { base: &dot_hg };
112 let hg_vfs = Vfs { base: &dot_hg };
116 let mut reqs = requirements::load_if_exists(hg_vfs)?;
113 let mut reqs = requirements::load_if_exists(hg_vfs)?;
117 let relative =
114 let relative =
118 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
115 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
119 let shared =
116 let shared =
120 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
117 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
121
118
122 // From `mercurial/localrepo.py`:
119 // From `mercurial/localrepo.py`:
123 //
120 //
124 // if .hg/requires contains the sharesafe requirement, it means
121 // if .hg/requires contains the sharesafe requirement, it means
125 // there exists a `.hg/store/requires` too and we should read it
122 // there exists a `.hg/store/requires` too and we should read it
126 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
123 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
127 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
124 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
128 // is not present, refer checkrequirementscompat() for that
125 // is not present, refer checkrequirementscompat() for that
129 //
126 //
130 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
127 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
131 // repository was shared the old way. We check the share source
128 // repository was shared the old way. We check the share source
132 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
129 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
133 // current repository needs to be reshared
130 // current repository needs to be reshared
134 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
131 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
135
132
136 let store_path;
133 let store_path;
137 if !shared {
134 if !shared {
138 store_path = dot_hg.join("store");
135 store_path = dot_hg.join("store");
139 } else {
136 } else {
140 let bytes = hg_vfs.read("sharedpath")?;
137 let bytes = hg_vfs.read("sharedpath")?;
141 let mut shared_path =
138 let mut shared_path =
142 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
139 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
143 .to_owned();
140 .to_owned();
144 if relative {
141 if relative {
145 shared_path = dot_hg.join(shared_path)
142 shared_path = dot_hg.join(shared_path)
146 }
143 }
147 if !is_dir(&shared_path)? {
144 if !is_dir(&shared_path)? {
148 return Err(HgError::corrupted(format!(
145 return Err(HgError::corrupted(format!(
149 ".hg/sharedpath points to nonexistent directory {}",
146 ".hg/sharedpath points to nonexistent directory {}",
150 shared_path.display()
147 shared_path.display()
151 ))
148 ))
152 .into());
149 .into());
153 }
150 }
154
151
155 store_path = shared_path.join("store");
152 store_path = shared_path.join("store");
156
153
157 let source_is_share_safe =
154 let source_is_share_safe =
158 requirements::load(Vfs { base: &shared_path })?
155 requirements::load(Vfs { base: &shared_path })?
159 .contains(requirements::SHARESAFE_REQUIREMENT);
156 .contains(requirements::SHARESAFE_REQUIREMENT);
160
157
161 if share_safe != source_is_share_safe {
158 if share_safe != source_is_share_safe {
162 return Err(HgError::unsupported("share-safe mismatch").into());
159 return Err(HgError::unsupported("share-safe mismatch").into());
163 }
160 }
164
161
165 if share_safe {
162 if share_safe {
166 repo_config_files.insert(0, shared_path.join("hgrc"))
163 repo_config_files.insert(0, shared_path.join("hgrc"))
167 }
164 }
168 }
165 }
169 if share_safe {
166 if share_safe {
170 reqs.extend(requirements::load(Vfs { base: &store_path })?);
167 reqs.extend(requirements::load(Vfs { base: &store_path })?);
171 }
168 }
172
169
173 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
170 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
174 config.combine_with_repo(&repo_config_files)?
171 config.combine_with_repo(&repo_config_files)?
175 } else {
172 } else {
176 config.clone()
173 config.clone()
177 };
174 };
178
175
179 let repo = Self {
176 let repo = Self {
180 requirements: reqs,
177 requirements: reqs,
181 working_directory,
178 working_directory,
182 store: store_path,
179 store: store_path,
183 dot_hg,
180 dot_hg,
184 config: repo_config,
181 config: repo_config,
185 dirstate_parents: LazyCell::new(),
182 dirstate_parents: LazyCell::new(),
186 dirstate_data_file_uuid: LazyCell::new(),
183 dirstate_data_file_uuid: LazyCell::new(),
187 dirstate_map: LazyCell::new(),
184 dirstate_map: LazyCell::new(),
188 changelog: LazyCell::new(),
185 changelog: LazyCell::new(),
189 manifestlog: LazyCell::new(),
186 manifestlog: LazyCell::new(),
190 };
187 };
191
188
192 requirements::check(&repo)?;
189 requirements::check(&repo)?;
193
190
194 Ok(repo)
191 Ok(repo)
195 }
192 }
196
193
197 pub fn working_directory_path(&self) -> &Path {
194 pub fn working_directory_path(&self) -> &Path {
198 &self.working_directory
195 &self.working_directory
199 }
196 }
200
197
201 pub fn requirements(&self) -> &HashSet<String> {
198 pub fn requirements(&self) -> &HashSet<String> {
202 &self.requirements
199 &self.requirements
203 }
200 }
204
201
205 pub fn config(&self) -> &Config {
202 pub fn config(&self) -> &Config {
206 &self.config
203 &self.config
207 }
204 }
208
205
209 /// For accessing repository files (in `.hg`), except for the store
206 /// For accessing repository files (in `.hg`), except for the store
210 /// (`.hg/store`).
207 /// (`.hg/store`).
211 pub fn hg_vfs(&self) -> Vfs<'_> {
208 pub fn hg_vfs(&self) -> Vfs<'_> {
212 Vfs { base: &self.dot_hg }
209 Vfs { base: &self.dot_hg }
213 }
210 }
214
211
215 /// For accessing repository store files (in `.hg/store`)
212 /// For accessing repository store files (in `.hg/store`)
216 pub fn store_vfs(&self) -> Vfs<'_> {
213 pub fn store_vfs(&self) -> Vfs<'_> {
217 Vfs { base: &self.store }
214 Vfs { base: &self.store }
218 }
215 }
219
216
220 /// For accessing the working copy
217 /// For accessing the working copy
221 pub fn working_directory_vfs(&self) -> Vfs<'_> {
218 pub fn working_directory_vfs(&self) -> Vfs<'_> {
222 Vfs {
219 Vfs {
223 base: &self.working_directory,
220 base: &self.working_directory,
224 }
221 }
225 }
222 }
226
223
227 pub fn try_with_wlock_no_wait<R>(
224 pub fn try_with_wlock_no_wait<R>(
228 &self,
225 &self,
229 f: impl FnOnce() -> R,
226 f: impl FnOnce() -> R,
230 ) -> Result<R, LockError> {
227 ) -> Result<R, LockError> {
231 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
228 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
232 }
229 }
233
230
234 pub fn has_dirstate_v2(&self) -> bool {
231 pub fn has_dirstate_v2(&self) -> bool {
235 self.requirements
232 self.requirements
236 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
233 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
237 }
234 }
238
235
239 pub fn has_sparse(&self) -> bool {
236 pub fn has_sparse(&self) -> bool {
240 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
237 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
241 }
238 }
242
239
243 pub fn has_narrow(&self) -> bool {
240 pub fn has_narrow(&self) -> bool {
244 self.requirements.contains(requirements::NARROW_REQUIREMENT)
241 self.requirements.contains(requirements::NARROW_REQUIREMENT)
245 }
242 }
246
243
247 pub fn has_nodemap(&self) -> bool {
244 pub fn has_nodemap(&self) -> bool {
248 self.requirements
245 self.requirements
249 .contains(requirements::NODEMAP_REQUIREMENT)
246 .contains(requirements::NODEMAP_REQUIREMENT)
250 }
247 }
251
248
252 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
249 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
253 Ok(self
250 Ok(self
254 .hg_vfs()
251 .hg_vfs()
255 .read("dirstate")
252 .read("dirstate")
256 .io_not_found_as_none()?
253 .io_not_found_as_none()?
257 .unwrap_or(Vec::new()))
254 .unwrap_or_default())
258 }
255 }
259
256
260 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
257 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
261 Ok(*self
258 Ok(*self
262 .dirstate_parents
259 .dirstate_parents
263 .get_or_init(|| self.read_dirstate_parents())?)
260 .get_or_init(|| self.read_dirstate_parents())?)
264 }
261 }
265
262
266 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
263 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
267 let dirstate = self.dirstate_file_contents()?;
264 let dirstate = self.dirstate_file_contents()?;
268 let parents = if dirstate.is_empty() {
265 let parents = if dirstate.is_empty() {
269 if self.has_dirstate_v2() {
266 if self.has_dirstate_v2() {
270 self.dirstate_data_file_uuid.set(None);
267 self.dirstate_data_file_uuid.set(None);
271 }
268 }
272 DirstateParents::NULL
269 DirstateParents::NULL
273 } else if self.has_dirstate_v2() {
270 } else if self.has_dirstate_v2() {
274 let docket =
271 let docket =
275 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
272 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
276 self.dirstate_data_file_uuid
273 self.dirstate_data_file_uuid
277 .set(Some(docket.uuid.to_owned()));
274 .set(Some(docket.uuid.to_owned()));
278 docket.parents()
275 docket.parents()
279 } else {
276 } else {
280 crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
277 *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
281 .clone()
282 };
278 };
283 self.dirstate_parents.set(parents);
279 self.dirstate_parents.set(parents);
284 Ok(parents)
280 Ok(parents)
285 }
281 }
286
282
287 fn read_dirstate_data_file_uuid(
283 fn read_dirstate_data_file_uuid(
288 &self,
284 &self,
289 ) -> Result<Option<Vec<u8>>, HgError> {
285 ) -> Result<Option<Vec<u8>>, HgError> {
290 assert!(
286 assert!(
291 self.has_dirstate_v2(),
287 self.has_dirstate_v2(),
292 "accessing dirstate data file ID without dirstate-v2"
288 "accessing dirstate data file ID without dirstate-v2"
293 );
289 );
294 let dirstate = self.dirstate_file_contents()?;
290 let dirstate = self.dirstate_file_contents()?;
295 if dirstate.is_empty() {
291 if dirstate.is_empty() {
296 self.dirstate_parents.set(DirstateParents::NULL);
292 self.dirstate_parents.set(DirstateParents::NULL);
297 Ok(None)
293 Ok(None)
298 } else {
294 } else {
299 let docket =
295 let docket =
300 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
296 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
301 self.dirstate_parents.set(docket.parents());
297 self.dirstate_parents.set(docket.parents());
302 Ok(Some(docket.uuid.to_owned()))
298 Ok(Some(docket.uuid.to_owned()))
303 }
299 }
304 }
300 }
305
301
306 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
302 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
307 let dirstate_file_contents = self.dirstate_file_contents()?;
303 let dirstate_file_contents = self.dirstate_file_contents()?;
308 if dirstate_file_contents.is_empty() {
304 if dirstate_file_contents.is_empty() {
309 self.dirstate_parents.set(DirstateParents::NULL);
305 self.dirstate_parents.set(DirstateParents::NULL);
310 if self.has_dirstate_v2() {
306 if self.has_dirstate_v2() {
311 self.dirstate_data_file_uuid.set(None);
307 self.dirstate_data_file_uuid.set(None);
312 }
308 }
313 Ok(OwningDirstateMap::new_empty(Vec::new()))
309 Ok(OwningDirstateMap::new_empty(Vec::new()))
314 } else if self.has_dirstate_v2() {
310 } else if self.has_dirstate_v2() {
315 let docket = crate::dirstate_tree::on_disk::read_docket(
311 let docket = crate::dirstate_tree::on_disk::read_docket(
316 &dirstate_file_contents,
312 &dirstate_file_contents,
317 )?;
313 )?;
318 self.dirstate_parents.set(docket.parents());
314 self.dirstate_parents.set(docket.parents());
319 self.dirstate_data_file_uuid
315 self.dirstate_data_file_uuid
320 .set(Some(docket.uuid.to_owned()));
316 .set(Some(docket.uuid.to_owned()));
321 let data_size = docket.data_size();
317 let data_size = docket.data_size();
322 let metadata = docket.tree_metadata();
318 let metadata = docket.tree_metadata();
323 if let Some(data_mmap) = self
319 if let Some(data_mmap) = self
324 .hg_vfs()
320 .hg_vfs()
325 .mmap_open(docket.data_filename())
321 .mmap_open(docket.data_filename())
326 .io_not_found_as_none()?
322 .io_not_found_as_none()?
327 {
323 {
328 OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
324 OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
329 } else {
325 } else {
330 OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
326 OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
331 }
327 }
332 } else {
328 } else {
333 let (map, parents) =
329 let (map, parents) =
334 OwningDirstateMap::new_v1(dirstate_file_contents)?;
330 OwningDirstateMap::new_v1(dirstate_file_contents)?;
335 self.dirstate_parents.set(parents);
331 self.dirstate_parents.set(parents);
336 Ok(map)
332 Ok(map)
337 }
333 }
338 }
334 }
339
335
340 pub fn dirstate_map(
336 pub fn dirstate_map(
341 &self,
337 &self,
342 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
338 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
343 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
339 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
344 }
340 }
345
341
346 pub fn dirstate_map_mut(
342 pub fn dirstate_map_mut(
347 &self,
343 &self,
348 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
344 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
349 self.dirstate_map
345 self.dirstate_map
350 .get_mut_or_init(|| self.new_dirstate_map())
346 .get_mut_or_init(|| self.new_dirstate_map())
351 }
347 }
352
348
353 fn new_changelog(&self) -> Result<Changelog, HgError> {
349 fn new_changelog(&self) -> Result<Changelog, HgError> {
354 Changelog::open(&self.store_vfs(), self.has_nodemap())
350 Changelog::open(&self.store_vfs(), self.has_nodemap())
355 }
351 }
356
352
357 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
353 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
358 self.changelog.get_or_init(|| self.new_changelog())
354 self.changelog.get_or_init(|| self.new_changelog())
359 }
355 }
360
356
361 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
357 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
362 self.changelog.get_mut_or_init(|| self.new_changelog())
358 self.changelog.get_mut_or_init(|| self.new_changelog())
363 }
359 }
364
360
365 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
361 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
366 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
362 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
367 }
363 }
368
364
369 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
365 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
370 self.manifestlog.get_or_init(|| self.new_manifestlog())
366 self.manifestlog.get_or_init(|| self.new_manifestlog())
371 }
367 }
372
368
373 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
369 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
374 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
370 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
375 }
371 }
376
372
377 /// Returns the manifest of the *changeset* with the given node ID
373 /// Returns the manifest of the *changeset* with the given node ID
378 pub fn manifest_for_node(
374 pub fn manifest_for_node(
379 &self,
375 &self,
380 node: impl Into<NodePrefix>,
376 node: impl Into<NodePrefix>,
381 ) -> Result<Manifest, RevlogError> {
377 ) -> Result<Manifest, RevlogError> {
382 self.manifestlog()?.data_for_node(
378 self.manifestlog()?.data_for_node(
383 self.changelog()?
379 self.changelog()?
384 .data_for_node(node.into())?
380 .data_for_node(node.into())?
385 .manifest_node()?
381 .manifest_node()?
386 .into(),
382 .into(),
387 )
383 )
388 }
384 }
389
385
390 /// Returns the manifest of the *changeset* with the given revision number
386 /// Returns the manifest of the *changeset* with the given revision number
391 pub fn manifest_for_rev(
387 pub fn manifest_for_rev(
392 &self,
388 &self,
393 revision: Revision,
389 revision: Revision,
394 ) -> Result<Manifest, RevlogError> {
390 ) -> Result<Manifest, RevlogError> {
395 self.manifestlog()?.data_for_node(
391 self.manifestlog()?.data_for_node(
396 self.changelog()?
392 self.changelog()?
397 .data_for_rev(revision)?
393 .data_for_rev(revision)?
398 .manifest_node()?
394 .manifest_node()?
399 .into(),
395 .into(),
400 )
396 )
401 }
397 }
402
398
403 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
399 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
404 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
400 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
405 Ok(entry.tracked())
401 Ok(entry.tracked())
406 } else {
402 } else {
407 Ok(false)
403 Ok(false)
408 }
404 }
409 }
405 }
410
406
411 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
407 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
412 Filelog::open(self, path)
408 Filelog::open(self, path)
413 }
409 }
414
410
415 /// Write to disk any updates that were made through `dirstate_map_mut`.
411 /// Write to disk any updates that were made through `dirstate_map_mut`.
416 ///
412 ///
417 /// The "wlock" must be held while calling this.
413 /// The "wlock" must be held while calling this.
418 /// See for example `try_with_wlock_no_wait`.
414 /// See for example `try_with_wlock_no_wait`.
419 ///
415 ///
420 /// TODO: have a `WritableRepo` type only accessible while holding the
416 /// TODO: have a `WritableRepo` type only accessible while holding the
421 /// lock?
417 /// lock?
422 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
418 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
423 let map = self.dirstate_map()?;
419 let map = self.dirstate_map()?;
424 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
420 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
425 // it’s unset
421 // it’s unset
426 let parents = self.dirstate_parents()?;
422 let parents = self.dirstate_parents()?;
427 let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
423 let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
428 let uuid_opt = self
424 let uuid_opt = self
429 .dirstate_data_file_uuid
425 .dirstate_data_file_uuid
430 .get_or_init(|| self.read_dirstate_data_file_uuid())?;
426 .get_or_init(|| self.read_dirstate_data_file_uuid())?;
431 let uuid_opt = uuid_opt.as_ref();
427 let uuid_opt = uuid_opt.as_ref();
432 let can_append = uuid_opt.is_some();
428 let can_append = uuid_opt.is_some();
433 let (data, tree_metadata, append, old_data_size) =
429 let (data, tree_metadata, append, old_data_size) =
434 map.pack_v2(can_append)?;
430 map.pack_v2(can_append)?;
435
431
436 // Reuse the uuid, or generate a new one, keeping the old for
432 // Reuse the uuid, or generate a new one, keeping the old for
437 // deletion.
433 // deletion.
438 let (uuid, old_uuid) = match uuid_opt {
434 let (uuid, old_uuid) = match uuid_opt {
439 Some(uuid) => {
435 Some(uuid) => {
440 let as_str = std::str::from_utf8(uuid)
436 let as_str = std::str::from_utf8(uuid)
441 .map_err(|_| {
437 .map_err(|_| {
442 HgError::corrupted(
438 HgError::corrupted(
443 "non-UTF-8 dirstate data file ID",
439 "non-UTF-8 dirstate data file ID",
444 )
440 )
445 })?
441 })?
446 .to_owned();
442 .to_owned();
447 if append {
443 if append {
448 (as_str, None)
444 (as_str, None)
449 } else {
445 } else {
450 (DirstateDocket::new_uid(), Some(as_str))
446 (DirstateDocket::new_uid(), Some(as_str))
451 }
447 }
452 }
448 }
453 None => (DirstateDocket::new_uid(), None),
449 None => (DirstateDocket::new_uid(), None),
454 };
450 };
455
451
456 let data_filename = format!("dirstate.{}", uuid);
452 let data_filename = format!("dirstate.{}", uuid);
457 let data_filename = self.hg_vfs().join(data_filename);
453 let data_filename = self.hg_vfs().join(data_filename);
458 let mut options = std::fs::OpenOptions::new();
454 let mut options = std::fs::OpenOptions::new();
459 options.write(true);
455 options.write(true);
460
456
461 // Why are we not using the O_APPEND flag when appending?
457 // Why are we not using the O_APPEND flag when appending?
462 //
458 //
463 // - O_APPEND makes it trickier to deal with garbage at the end of
459 // - O_APPEND makes it trickier to deal with garbage at the end of
464 // the file, left by a previous uncommitted transaction. By
460 // the file, left by a previous uncommitted transaction. By
465 // starting the write at [old_data_size] we make sure we erase
461 // starting the write at [old_data_size] we make sure we erase
466 // all such garbage.
462 // all such garbage.
467 //
463 //
468 // - O_APPEND requires to special-case 0-byte writes, whereas we
464 // - O_APPEND requires to special-case 0-byte writes, whereas we
469 // don't need that.
465 // don't need that.
470 //
466 //
471 // - Some OSes have bugs in implementation O_APPEND:
467 // - Some OSes have bugs in implementation O_APPEND:
472 // revlog.py talks about a Solaris bug, but we also saw some ZFS
468 // revlog.py talks about a Solaris bug, but we also saw some ZFS
473 // bug: https://github.com/openzfs/zfs/pull/3124,
469 // bug: https://github.com/openzfs/zfs/pull/3124,
474 // https://github.com/openzfs/zfs/issues/13370
470 // https://github.com/openzfs/zfs/issues/13370
475 //
471 //
476 if !append {
472 if !append {
477 options.create_new(true);
473 options.create_new(true);
478 }
474 }
479
475
480 let data_size = (|| {
476 let data_size = (|| {
481 // TODO: loop and try another random ID if !append and this
477 // TODO: loop and try another random ID if !append and this
482 // returns `ErrorKind::AlreadyExists`? Collision chance of two
478 // returns `ErrorKind::AlreadyExists`? Collision chance of two
483 // random IDs is one in 2**32
479 // random IDs is one in 2**32
484 let mut file = options.open(&data_filename)?;
480 let mut file = options.open(&data_filename)?;
485 if append {
481 if append {
486 file.seek(SeekFrom::Start(old_data_size as u64))?;
482 file.seek(SeekFrom::Start(old_data_size as u64))?;
487 }
483 }
488 file.write_all(&data)?;
484 file.write_all(&data)?;
489 file.flush()?;
485 file.flush()?;
490 file.seek(SeekFrom::Current(0))
486 file.seek(SeekFrom::Current(0))
491 })()
487 })()
492 .when_writing_file(&data_filename)?;
488 .when_writing_file(&data_filename)?;
493
489
494 let packed_dirstate = DirstateDocket::serialize(
490 let packed_dirstate = DirstateDocket::serialize(
495 parents,
491 parents,
496 tree_metadata,
492 tree_metadata,
497 data_size,
493 data_size,
498 uuid.as_bytes(),
494 uuid.as_bytes(),
499 )
495 )
500 .map_err(|_: std::num::TryFromIntError| {
496 .map_err(|_: std::num::TryFromIntError| {
501 HgError::corrupted("overflow in dirstate docket serialization")
497 HgError::corrupted("overflow in dirstate docket serialization")
502 })?;
498 })?;
503
499
504 (packed_dirstate, old_uuid)
500 (packed_dirstate, old_uuid)
505 } else {
501 } else {
506 (map.pack_v1(parents)?, None)
502 (map.pack_v1(parents)?, None)
507 };
503 };
508
504
509 let vfs = self.hg_vfs();
505 let vfs = self.hg_vfs();
510 vfs.atomic_write("dirstate", &packed_dirstate)?;
506 vfs.atomic_write("dirstate", &packed_dirstate)?;
511 if let Some(uuid) = old_uuid_to_remove {
507 if let Some(uuid) = old_uuid_to_remove {
512 // Remove the old data file after the new docket pointing to the
508 // Remove the old data file after the new docket pointing to the
513 // new data file was written.
509 // new data file was written.
514 vfs.remove_file(format!("dirstate.{}", uuid))?;
510 vfs.remove_file(format!("dirstate.{}", uuid))?;
515 }
511 }
516 Ok(())
512 Ok(())
517 }
513 }
518 }
514 }
519
515
520 /// Lazily-initialized component of `Repo` with interior mutability
516 /// Lazily-initialized component of `Repo` with interior mutability
521 ///
517 ///
522 /// This differs from `OnceCell` in that the value can still be "deinitialized"
518 /// This differs from `OnceCell` in that the value can still be "deinitialized"
523 /// later by setting its inner `Option` to `None`. It also takes the
519 /// later by setting its inner `Option` to `None`. It also takes the
524 /// initialization function as an argument when the value is requested, not
520 /// initialization function as an argument when the value is requested, not
525 /// when the instance is created.
521 /// when the instance is created.
526 struct LazyCell<T> {
522 struct LazyCell<T> {
527 value: RefCell<Option<T>>,
523 value: RefCell<Option<T>>,
528 }
524 }
529
525
530 impl<T> LazyCell<T> {
526 impl<T> LazyCell<T> {
531 fn new() -> Self {
527 fn new() -> Self {
532 Self {
528 Self {
533 value: RefCell::new(None),
529 value: RefCell::new(None),
534 }
530 }
535 }
531 }
536
532
537 fn set(&self, value: T) {
533 fn set(&self, value: T) {
538 *self.value.borrow_mut() = Some(value)
534 *self.value.borrow_mut() = Some(value)
539 }
535 }
540
536
541 fn get_or_init<E>(
537 fn get_or_init<E>(
542 &self,
538 &self,
543 init: impl Fn() -> Result<T, E>,
539 init: impl Fn() -> Result<T, E>,
544 ) -> Result<Ref<T>, E> {
540 ) -> Result<Ref<T>, E> {
545 let mut borrowed = self.value.borrow();
541 let mut borrowed = self.value.borrow();
546 if borrowed.is_none() {
542 if borrowed.is_none() {
547 drop(borrowed);
543 drop(borrowed);
548 // Only use `borrow_mut` if it is really needed to avoid panic in
544 // Only use `borrow_mut` if it is really needed to avoid panic in
549 // case there is another outstanding borrow but mutation is not
545 // case there is another outstanding borrow but mutation is not
550 // needed.
546 // needed.
551 *self.value.borrow_mut() = Some(init()?);
547 *self.value.borrow_mut() = Some(init()?);
552 borrowed = self.value.borrow()
548 borrowed = self.value.borrow()
553 }
549 }
554 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
550 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
555 }
551 }
556
552
557 fn get_mut_or_init<E>(
553 fn get_mut_or_init<E>(
558 &self,
554 &self,
559 init: impl Fn() -> Result<T, E>,
555 init: impl Fn() -> Result<T, E>,
560 ) -> Result<RefMut<T>, E> {
556 ) -> Result<RefMut<T>, E> {
561 let mut borrowed = self.value.borrow_mut();
557 let mut borrowed = self.value.borrow_mut();
562 if borrowed.is_none() {
558 if borrowed.is_none() {
563 *borrowed = Some(init()?);
559 *borrowed = Some(init()?);
564 }
560 }
565 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
561 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
566 }
562 }
567 }
563 }
@@ -1,271 +1,271 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::revlog::{Revlog, RevlogEntry, RevlogError};
2 use crate::revlog::revlog::{Revlog, RevlogEntry, RevlogError};
3 use crate::revlog::Revision;
3 use crate::revlog::Revision;
4 use crate::revlog::{Node, NodePrefix};
4 use crate::revlog::{Node, NodePrefix};
5 use crate::utils::hg_path::HgPath;
5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use itertools::Itertools;
7 use itertools::Itertools;
8 use std::ascii::escape_default;
8 use std::ascii::escape_default;
9 use std::borrow::Cow;
9 use std::borrow::Cow;
10 use std::fmt::{Debug, Formatter};
10 use std::fmt::{Debug, Formatter};
11
11
12 /// A specialized `Revlog` to work with `changelog` data format.
12 /// A specialized `Revlog` to work with `changelog` data format.
13 pub struct Changelog {
13 pub struct Changelog {
14 /// The generic `revlog` format.
14 /// The generic `revlog` format.
15 pub(crate) revlog: Revlog,
15 pub(crate) revlog: Revlog,
16 }
16 }
17
17
18 impl Changelog {
18 impl Changelog {
19 /// Open the `changelog` of a repository given by its root.
19 /// Open the `changelog` of a repository given by its root.
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 let revlog =
21 let revlog =
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 Ok(Self { revlog })
23 Ok(Self { revlog })
24 }
24 }
25
25
26 /// Return the `ChangelogEntry` for the given node ID.
26 /// Return the `ChangelogEntry` for the given node ID.
27 pub fn data_for_node(
27 pub fn data_for_node(
28 &self,
28 &self,
29 node: NodePrefix,
29 node: NodePrefix,
30 ) -> Result<ChangelogRevisionData, RevlogError> {
30 ) -> Result<ChangelogRevisionData, RevlogError> {
31 let rev = self.revlog.rev_from_node(node)?;
31 let rev = self.revlog.rev_from_node(node)?;
32 self.data_for_rev(rev)
32 self.data_for_rev(rev)
33 }
33 }
34
34
35 /// Return the `RevlogEntry` of the given revision number.
35 /// Return the `RevlogEntry` of the given revision number.
36 pub fn entry_for_rev(
36 pub fn entry_for_rev(
37 &self,
37 &self,
38 rev: Revision,
38 rev: Revision,
39 ) -> Result<RevlogEntry, RevlogError> {
39 ) -> Result<RevlogEntry, RevlogError> {
40 self.revlog.get_entry(rev)
40 self.revlog.get_entry(rev)
41 }
41 }
42
42
43 /// Return the `ChangelogEntry` of the given revision number.
43 /// Return the `ChangelogEntry` of the given revision number.
44 pub fn data_for_rev(
44 pub fn data_for_rev(
45 &self,
45 &self,
46 rev: Revision,
46 rev: Revision,
47 ) -> Result<ChangelogRevisionData, RevlogError> {
47 ) -> Result<ChangelogRevisionData, RevlogError> {
48 let bytes = self.revlog.get_rev_data(rev)?;
48 let bytes = self.revlog.get_rev_data(rev)?;
49 if bytes.is_empty() {
49 if bytes.is_empty() {
50 Ok(ChangelogRevisionData::null())
50 Ok(ChangelogRevisionData::null())
51 } else {
51 } else {
52 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
52 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
53 RevlogError::Other(HgError::CorruptedRepository(format!(
53 RevlogError::Other(HgError::CorruptedRepository(format!(
54 "Invalid changelog data for revision {}: {:?}",
54 "Invalid changelog data for revision {}: {:?}",
55 rev, err
55 rev, err
56 )))
56 )))
57 })?)
57 })?)
58 }
58 }
59 }
59 }
60
60
61 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
61 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
62 self.revlog.node_from_rev(rev)
62 self.revlog.node_from_rev(rev)
63 }
63 }
64
64
65 pub fn rev_from_node(
65 pub fn rev_from_node(
66 &self,
66 &self,
67 node: NodePrefix,
67 node: NodePrefix,
68 ) -> Result<Revision, RevlogError> {
68 ) -> Result<Revision, RevlogError> {
69 self.revlog.rev_from_node(node)
69 self.revlog.rev_from_node(node)
70 }
70 }
71 }
71 }
72
72
73 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
73 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
74 #[derive(PartialEq)]
74 #[derive(PartialEq)]
75 pub struct ChangelogRevisionData<'changelog> {
75 pub struct ChangelogRevisionData<'changelog> {
76 /// The data bytes of the `changelog` entry.
76 /// The data bytes of the `changelog` entry.
77 bytes: Cow<'changelog, [u8]>,
77 bytes: Cow<'changelog, [u8]>,
78 /// The end offset for the hex manifest (not including the newline)
78 /// The end offset for the hex manifest (not including the newline)
79 manifest_end: usize,
79 manifest_end: usize,
80 /// The end offset for the user+email (not including the newline)
80 /// The end offset for the user+email (not including the newline)
81 user_end: usize,
81 user_end: usize,
82 /// The end offset for the timestamp+timezone+extras (not including the
82 /// The end offset for the timestamp+timezone+extras (not including the
83 /// newline)
83 /// newline)
84 timestamp_end: usize,
84 timestamp_end: usize,
85 /// The end offset for the file list (not including the newline)
85 /// The end offset for the file list (not including the newline)
86 files_end: usize,
86 files_end: usize,
87 }
87 }
88
88
89 impl<'changelog> ChangelogRevisionData<'changelog> {
89 impl<'changelog> ChangelogRevisionData<'changelog> {
90 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
90 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
91 let mut line_iter = bytes.split(|b| b == &b'\n');
91 let mut line_iter = bytes.split(|b| b == &b'\n');
92 let manifest_end = line_iter
92 let manifest_end = line_iter
93 .next()
93 .next()
94 .expect("Empty iterator from split()?")
94 .expect("Empty iterator from split()?")
95 .len();
95 .len();
96 let user_slice = line_iter.next().ok_or_else(|| {
96 let user_slice = line_iter.next().ok_or_else(|| {
97 HgError::corrupted("Changeset data truncated after manifest line")
97 HgError::corrupted("Changeset data truncated after manifest line")
98 })?;
98 })?;
99 let user_end = manifest_end + 1 + user_slice.len();
99 let user_end = manifest_end + 1 + user_slice.len();
100 let timestamp_slice = line_iter.next().ok_or_else(|| {
100 let timestamp_slice = line_iter.next().ok_or_else(|| {
101 HgError::corrupted("Changeset data truncated after user line")
101 HgError::corrupted("Changeset data truncated after user line")
102 })?;
102 })?;
103 let timestamp_end = user_end + 1 + timestamp_slice.len();
103 let timestamp_end = user_end + 1 + timestamp_slice.len();
104 let mut files_end = timestamp_end + 1;
104 let mut files_end = timestamp_end + 1;
105 loop {
105 loop {
106 let line = line_iter.next().ok_or_else(|| {
106 let line = line_iter.next().ok_or_else(|| {
107 HgError::corrupted("Changeset data truncated in files list")
107 HgError::corrupted("Changeset data truncated in files list")
108 })?;
108 })?;
109 if line.is_empty() {
109 if line.is_empty() {
110 if files_end == bytes.len() {
110 if files_end == bytes.len() {
111 // The list of files ended with a single newline (there
111 // The list of files ended with a single newline (there
112 // should be two)
112 // should be two)
113 return Err(HgError::corrupted(
113 return Err(HgError::corrupted(
114 "Changeset data truncated after files list",
114 "Changeset data truncated after files list",
115 ));
115 ));
116 }
116 }
117 files_end -= 1;
117 files_end -= 1;
118 break;
118 break;
119 }
119 }
120 files_end += line.len() + 1;
120 files_end += line.len() + 1;
121 }
121 }
122
122
123 Ok(Self {
123 Ok(Self {
124 bytes,
124 bytes,
125 manifest_end,
125 manifest_end,
126 user_end,
126 user_end,
127 timestamp_end,
127 timestamp_end,
128 files_end,
128 files_end,
129 })
129 })
130 }
130 }
131
131
132 fn null() -> Self {
132 fn null() -> Self {
133 Self::new(Cow::Borrowed(
133 Self::new(Cow::Borrowed(
134 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
134 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
135 ))
135 ))
136 .unwrap()
136 .unwrap()
137 }
137 }
138
138
139 /// Return an iterator over the lines of the entry.
139 /// Return an iterator over the lines of the entry.
140 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
140 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
141 self.bytes.split(|b| b == &b'\n')
141 self.bytes.split(|b| b == &b'\n')
142 }
142 }
143
143
144 /// Return the node id of the `manifest` referenced by this `changelog`
144 /// Return the node id of the `manifest` referenced by this `changelog`
145 /// entry.
145 /// entry.
146 pub fn manifest_node(&self) -> Result<Node, HgError> {
146 pub fn manifest_node(&self) -> Result<Node, HgError> {
147 let manifest_node_hex = &self.bytes[..self.manifest_end];
147 let manifest_node_hex = &self.bytes[..self.manifest_end];
148 Node::from_hex_for_repo(manifest_node_hex)
148 Node::from_hex_for_repo(manifest_node_hex)
149 }
149 }
150
150
151 /// The full user string (usually a name followed by an email enclosed in
151 /// The full user string (usually a name followed by an email enclosed in
152 /// angle brackets)
152 /// angle brackets)
153 pub fn user(&self) -> &[u8] {
153 pub fn user(&self) -> &[u8] {
154 &self.bytes[self.manifest_end + 1..self.user_end]
154 &self.bytes[self.manifest_end + 1..self.user_end]
155 }
155 }
156
156
157 /// The full timestamp line (timestamp in seconds, offset in seconds, and
157 /// The full timestamp line (timestamp in seconds, offset in seconds, and
158 /// possibly extras)
158 /// possibly extras)
159 // TODO: We should expose this in a more useful way
159 // TODO: We should expose this in a more useful way
160 pub fn timestamp_line(&self) -> &[u8] {
160 pub fn timestamp_line(&self) -> &[u8] {
161 &self.bytes[self.user_end + 1..self.timestamp_end]
161 &self.bytes[self.user_end + 1..self.timestamp_end]
162 }
162 }
163
163
164 /// The files changed in this revision.
164 /// The files changed in this revision.
165 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
165 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
166 self.bytes[self.timestamp_end + 1..self.files_end]
166 self.bytes[self.timestamp_end + 1..self.files_end]
167 .split(|b| b == &b'\n')
167 .split(|b| b == &b'\n')
168 .map(|path| HgPath::new(path))
168 .map(HgPath::new)
169 }
169 }
170
170
171 /// The change description.
171 /// The change description.
172 pub fn description(&self) -> &[u8] {
172 pub fn description(&self) -> &[u8] {
173 &self.bytes[self.files_end + 2..]
173 &self.bytes[self.files_end + 2..]
174 }
174 }
175 }
175 }
176
176
177 impl Debug for ChangelogRevisionData<'_> {
177 impl Debug for ChangelogRevisionData<'_> {
178 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
178 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
179 f.debug_struct("ChangelogRevisionData")
179 f.debug_struct("ChangelogRevisionData")
180 .field("bytes", &debug_bytes(&self.bytes))
180 .field("bytes", &debug_bytes(&self.bytes))
181 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
181 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
182 .field(
182 .field(
183 "user",
183 "user",
184 &debug_bytes(
184 &debug_bytes(
185 &self.bytes[self.manifest_end + 1..self.user_end],
185 &self.bytes[self.manifest_end + 1..self.user_end],
186 ),
186 ),
187 )
187 )
188 .field(
188 .field(
189 "timestamp",
189 "timestamp",
190 &debug_bytes(
190 &debug_bytes(
191 &self.bytes[self.user_end + 1..self.timestamp_end],
191 &self.bytes[self.user_end + 1..self.timestamp_end],
192 ),
192 ),
193 )
193 )
194 .field(
194 .field(
195 "files",
195 "files",
196 &debug_bytes(
196 &debug_bytes(
197 &self.bytes[self.timestamp_end + 1..self.files_end],
197 &self.bytes[self.timestamp_end + 1..self.files_end],
198 ),
198 ),
199 )
199 )
200 .field(
200 .field(
201 "description",
201 "description",
202 &debug_bytes(&self.bytes[self.files_end + 2..]),
202 &debug_bytes(&self.bytes[self.files_end + 2..]),
203 )
203 )
204 .finish()
204 .finish()
205 }
205 }
206 }
206 }
207
207
208 fn debug_bytes(bytes: &[u8]) -> String {
208 fn debug_bytes(bytes: &[u8]) -> String {
209 String::from_utf8_lossy(
209 String::from_utf8_lossy(
210 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
210 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
211 )
211 )
212 .to_string()
212 .to_string()
213 }
213 }
214
214
215 #[cfg(test)]
215 #[cfg(test)]
216 mod tests {
216 mod tests {
217 use super::*;
217 use super::*;
218 use pretty_assertions::assert_eq;
218 use pretty_assertions::assert_eq;
219
219
220 #[test]
220 #[test]
221 fn test_create_changelogrevisiondata_invalid() {
221 fn test_create_changelogrevisiondata_invalid() {
222 // Completely empty
222 // Completely empty
223 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
223 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
224 // No newline after manifest
224 // No newline after manifest
225 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
225 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
226 // No newline after user
226 // No newline after user
227 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
227 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
228 // No newline after timestamp
228 // No newline after timestamp
229 assert!(
229 assert!(
230 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
230 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
231 );
231 );
232 // Missing newline after files
232 // Missing newline after files
233 assert!(ChangelogRevisionData::new(Cow::Borrowed(
233 assert!(ChangelogRevisionData::new(Cow::Borrowed(
234 b"abcd\n\n0 0\nfile1\nfile2"
234 b"abcd\n\n0 0\nfile1\nfile2"
235 ))
235 ))
236 .is_err(),);
236 .is_err(),);
237 // Only one newline after files
237 // Only one newline after files
238 assert!(ChangelogRevisionData::new(Cow::Borrowed(
238 assert!(ChangelogRevisionData::new(Cow::Borrowed(
239 b"abcd\n\n0 0\nfile1\nfile2\n"
239 b"abcd\n\n0 0\nfile1\nfile2\n"
240 ))
240 ))
241 .is_err(),);
241 .is_err(),);
242 }
242 }
243
243
244 #[test]
244 #[test]
245 fn test_create_changelogrevisiondata() {
245 fn test_create_changelogrevisiondata() {
246 let data = ChangelogRevisionData::new(Cow::Borrowed(
246 let data = ChangelogRevisionData::new(Cow::Borrowed(
247 b"0123456789abcdef0123456789abcdef01234567
247 b"0123456789abcdef0123456789abcdef01234567
248 Some One <someone@example.com>
248 Some One <someone@example.com>
249 0 0
249 0 0
250 file1
250 file1
251 file2
251 file2
252
252
253 some
253 some
254 commit
254 commit
255 message",
255 message",
256 ))
256 ))
257 .unwrap();
257 .unwrap();
258 assert_eq!(
258 assert_eq!(
259 data.manifest_node().unwrap(),
259 data.manifest_node().unwrap(),
260 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
260 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
261 .unwrap()
261 .unwrap()
262 );
262 );
263 assert_eq!(data.user(), b"Some One <someone@example.com>");
263 assert_eq!(data.user(), b"Some One <someone@example.com>");
264 assert_eq!(data.timestamp_line(), b"0 0");
264 assert_eq!(data.timestamp_line(), b"0 0");
265 assert_eq!(
265 assert_eq!(
266 data.files().collect_vec(),
266 data.files().collect_vec(),
267 vec![HgPath::new("file1"), HgPath::new("file2")]
267 vec![HgPath::new("file1"), HgPath::new("file2")]
268 );
268 );
269 assert_eq!(data.description(), b"some\ncommit\nmessage");
269 assert_eq!(data.description(), b"some\ncommit\nmessage");
270 }
270 }
271 }
271 }
@@ -1,208 +1,208 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::repo::Repo;
2 use crate::repo::Repo;
3 use crate::revlog::path_encode::path_encode;
3 use crate::revlog::path_encode::path_encode;
4 use crate::revlog::revlog::RevlogEntry;
4 use crate::revlog::revlog::RevlogEntry;
5 use crate::revlog::revlog::{Revlog, RevlogError};
5 use crate::revlog::revlog::{Revlog, RevlogError};
6 use crate::revlog::NodePrefix;
6 use crate::revlog::NodePrefix;
7 use crate::revlog::Revision;
7 use crate::revlog::Revision;
8 use crate::utils::files::get_path_from_bytes;
8 use crate::utils::files::get_path_from_bytes;
9 use crate::utils::hg_path::HgPath;
9 use crate::utils::hg_path::HgPath;
10 use crate::utils::SliceExt;
10 use crate::utils::SliceExt;
11 use std::path::PathBuf;
11 use std::path::PathBuf;
12
12
13 /// A specialized `Revlog` to work with file data logs.
13 /// A specialized `Revlog` to work with file data logs.
14 pub struct Filelog {
14 pub struct Filelog {
15 /// The generic `revlog` format.
15 /// The generic `revlog` format.
16 revlog: Revlog,
16 revlog: Revlog,
17 }
17 }
18
18
19 impl Filelog {
19 impl Filelog {
20 pub fn open_vfs(
20 pub fn open_vfs(
21 store_vfs: &crate::vfs::Vfs<'_>,
21 store_vfs: &crate::vfs::Vfs<'_>,
22 file_path: &HgPath,
22 file_path: &HgPath,
23 ) -> Result<Self, HgError> {
23 ) -> Result<Self, HgError> {
24 let index_path = store_path(file_path, b".i");
24 let index_path = store_path(file_path, b".i");
25 let data_path = store_path(file_path, b".d");
25 let data_path = store_path(file_path, b".d");
26 let revlog =
26 let revlog =
27 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
27 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
28 Ok(Self { revlog })
28 Ok(Self { revlog })
29 }
29 }
30
30
31 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
31 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
32 Self::open_vfs(&repo.store_vfs(), file_path)
32 Self::open_vfs(&repo.store_vfs(), file_path)
33 }
33 }
34
34
35 /// The given node ID is that of the file as found in a filelog, not of a
35 /// The given node ID is that of the file as found in a filelog, not of a
36 /// changeset.
36 /// changeset.
37 pub fn data_for_node(
37 pub fn data_for_node(
38 &self,
38 &self,
39 file_node: impl Into<NodePrefix>,
39 file_node: impl Into<NodePrefix>,
40 ) -> Result<FilelogRevisionData, RevlogError> {
40 ) -> Result<FilelogRevisionData, RevlogError> {
41 let file_rev = self.revlog.rev_from_node(file_node.into())?;
41 let file_rev = self.revlog.rev_from_node(file_node.into())?;
42 self.data_for_rev(file_rev)
42 self.data_for_rev(file_rev)
43 }
43 }
44
44
45 /// The given revision is that of the file as found in a filelog, not of a
45 /// The given revision is that of the file as found in a filelog, not of a
46 /// changeset.
46 /// changeset.
47 pub fn data_for_rev(
47 pub fn data_for_rev(
48 &self,
48 &self,
49 file_rev: Revision,
49 file_rev: Revision,
50 ) -> Result<FilelogRevisionData, RevlogError> {
50 ) -> Result<FilelogRevisionData, RevlogError> {
51 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
51 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
52 Ok(FilelogRevisionData(data.into()))
52 Ok(FilelogRevisionData(data))
53 }
53 }
54
54
55 /// The given node ID is that of the file as found in a filelog, not of a
55 /// The given node ID is that of the file as found in a filelog, not of a
56 /// changeset.
56 /// changeset.
57 pub fn entry_for_node(
57 pub fn entry_for_node(
58 &self,
58 &self,
59 file_node: impl Into<NodePrefix>,
59 file_node: impl Into<NodePrefix>,
60 ) -> Result<FilelogEntry, RevlogError> {
60 ) -> Result<FilelogEntry, RevlogError> {
61 let file_rev = self.revlog.rev_from_node(file_node.into())?;
61 let file_rev = self.revlog.rev_from_node(file_node.into())?;
62 self.entry_for_rev(file_rev)
62 self.entry_for_rev(file_rev)
63 }
63 }
64
64
65 /// The given revision is that of the file as found in a filelog, not of a
65 /// The given revision is that of the file as found in a filelog, not of a
66 /// changeset.
66 /// changeset.
67 pub fn entry_for_rev(
67 pub fn entry_for_rev(
68 &self,
68 &self,
69 file_rev: Revision,
69 file_rev: Revision,
70 ) -> Result<FilelogEntry, RevlogError> {
70 ) -> Result<FilelogEntry, RevlogError> {
71 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
71 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
72 }
72 }
73 }
73 }
74
74
75 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
75 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
76 let encoded_bytes =
76 let encoded_bytes =
77 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
77 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
78 get_path_from_bytes(&encoded_bytes).into()
78 get_path_from_bytes(&encoded_bytes).into()
79 }
79 }
80
80
81 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
81 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
82
82
83 impl FilelogEntry<'_> {
83 impl FilelogEntry<'_> {
84 /// `self.data()` can be expensive, with decompression and delta
84 /// `self.data()` can be expensive, with decompression and delta
85 /// resolution.
85 /// resolution.
86 ///
86 ///
87 /// *Without* paying this cost, based on revlog index information
87 /// *Without* paying this cost, based on revlog index information
88 /// including `RevlogEntry::uncompressed_len`:
88 /// including `RevlogEntry::uncompressed_len`:
89 ///
89 ///
90 /// * Returns `true` if the length that `self.data().file_data().len()`
90 /// * Returns `true` if the length that `self.data().file_data().len()`
91 /// would return is definitely **not equal** to `other_len`.
91 /// would return is definitely **not equal** to `other_len`.
92 /// * Returns `false` if available information is inconclusive.
92 /// * Returns `false` if available information is inconclusive.
93 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
93 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
94 // Relevant code that implement this behavior in Python code:
94 // Relevant code that implement this behavior in Python code:
95 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
95 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
96 // revlog.size, revlog.rawsize
96 // revlog.size, revlog.rawsize
97
97
98 // Let’s call `file_data_len` what would be returned by
98 // Let’s call `file_data_len` what would be returned by
99 // `self.data().file_data().len()`.
99 // `self.data().file_data().len()`.
100
100
101 if self.0.is_censored() {
101 if self.0.is_censored() {
102 let file_data_len = 0;
102 let file_data_len = 0;
103 return other_len != file_data_len;
103 return other_len != file_data_len;
104 }
104 }
105
105
106 if self.0.has_length_affecting_flag_processor() {
106 if self.0.has_length_affecting_flag_processor() {
107 // We can’t conclude anything about `file_data_len`.
107 // We can’t conclude anything about `file_data_len`.
108 return false;
108 return false;
109 }
109 }
110
110
111 // Revlog revisions (usually) have metadata for the size of
111 // Revlog revisions (usually) have metadata for the size of
112 // their data after decompression and delta resolution
112 // their data after decompression and delta resolution
113 // as would be returned by `Revlog::get_rev_data`.
113 // as would be returned by `Revlog::get_rev_data`.
114 //
114 //
115 // For filelogs this is the file’s contents preceded by an optional
115 // For filelogs this is the file’s contents preceded by an optional
116 // metadata block.
116 // metadata block.
117 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
117 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
118 l as u64
118 l as u64
119 } else {
119 } else {
120 // The field was set to -1, the actual uncompressed len is unknown.
120 // The field was set to -1, the actual uncompressed len is unknown.
121 // We need to decompress to say more.
121 // We need to decompress to say more.
122 return false;
122 return false;
123 };
123 };
124 // `uncompressed_len = file_data_len + optional_metadata_len`,
124 // `uncompressed_len = file_data_len + optional_metadata_len`,
125 // so `file_data_len <= uncompressed_len`.
125 // so `file_data_len <= uncompressed_len`.
126 if uncompressed_len < other_len {
126 if uncompressed_len < other_len {
127 // Transitively, `file_data_len < other_len`.
127 // Transitively, `file_data_len < other_len`.
128 // So `other_len != file_data_len` definitely.
128 // So `other_len != file_data_len` definitely.
129 return true;
129 return true;
130 }
130 }
131
131
132 if uncompressed_len == other_len + 4 {
132 if uncompressed_len == other_len + 4 {
133 // It’s possible that `file_data_len == other_len` with an empty
133 // It’s possible that `file_data_len == other_len` with an empty
134 // metadata block (2 start marker bytes + 2 end marker bytes).
134 // metadata block (2 start marker bytes + 2 end marker bytes).
135 // This happens when there wouldn’t otherwise be metadata, but
135 // This happens when there wouldn’t otherwise be metadata, but
136 // the first 2 bytes of file data happen to match a start marker
136 // the first 2 bytes of file data happen to match a start marker
137 // and would be ambiguous.
137 // and would be ambiguous.
138 return false;
138 return false;
139 }
139 }
140
140
141 if !self.0.has_p1() {
141 if !self.0.has_p1() {
142 // There may or may not be copy metadata, so we can’t deduce more
142 // There may or may not be copy metadata, so we can’t deduce more
143 // about `file_data_len` without computing file data.
143 // about `file_data_len` without computing file data.
144 return false;
144 return false;
145 }
145 }
146
146
147 // Filelog ancestry is not meaningful in the way changelog ancestry is.
147 // Filelog ancestry is not meaningful in the way changelog ancestry is.
148 // It only provides hints to delta generation.
148 // It only provides hints to delta generation.
149 // p1 and p2 are set to null when making a copy or rename since
149 // p1 and p2 are set to null when making a copy or rename since
150 // contents are likely unrelatedto what might have previously existed
150 // contents are likely unrelatedto what might have previously existed
151 // at the destination path.
151 // at the destination path.
152 //
152 //
153 // Conversely, since here p1 is non-null, there is no copy metadata.
153 // Conversely, since here p1 is non-null, there is no copy metadata.
154 // Note that this reasoning may be invalidated in the presence of
154 // Note that this reasoning may be invalidated in the presence of
155 // merges made by some previous versions of Mercurial that
155 // merges made by some previous versions of Mercurial that
156 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
156 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
157 // and `tests/test-issue6528.t`.
157 // and `tests/test-issue6528.t`.
158 //
158 //
159 // Since copy metadata is currently the only kind of metadata
159 // Since copy metadata is currently the only kind of metadata
160 // kept in revlog data of filelogs,
160 // kept in revlog data of filelogs,
161 // this `FilelogEntry` does not have such metadata:
161 // this `FilelogEntry` does not have such metadata:
162 let file_data_len = uncompressed_len;
162 let file_data_len = uncompressed_len;
163
163
164 return file_data_len != other_len;
164 file_data_len != other_len
165 }
165 }
166
166
167 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
167 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
168 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
168 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
169 }
169 }
170 }
170 }
171
171
172 /// The data for one revision in a filelog, uncompressed and delta-resolved.
172 /// The data for one revision in a filelog, uncompressed and delta-resolved.
173 pub struct FilelogRevisionData(Vec<u8>);
173 pub struct FilelogRevisionData(Vec<u8>);
174
174
175 impl FilelogRevisionData {
175 impl FilelogRevisionData {
176 /// Split into metadata and data
176 /// Split into metadata and data
177 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
177 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
178 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
178 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
179
179
180 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
180 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
181 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
181 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
182 Ok((Some(metadata), data))
182 Ok((Some(metadata), data))
183 } else {
183 } else {
184 Err(HgError::corrupted(
184 Err(HgError::corrupted(
185 "Missing metadata end delimiter in filelog entry",
185 "Missing metadata end delimiter in filelog entry",
186 ))
186 ))
187 }
187 }
188 } else {
188 } else {
189 Ok((None, &self.0))
189 Ok((None, &self.0))
190 }
190 }
191 }
191 }
192
192
193 /// Returns the file contents at this revision, stripped of any metadata
193 /// Returns the file contents at this revision, stripped of any metadata
194 pub fn file_data(&self) -> Result<&[u8], HgError> {
194 pub fn file_data(&self) -> Result<&[u8], HgError> {
195 let (_metadata, data) = self.split()?;
195 let (_metadata, data) = self.split()?;
196 Ok(data)
196 Ok(data)
197 }
197 }
198
198
199 /// Consume the entry, and convert it into data, discarding any metadata,
199 /// Consume the entry, and convert it into data, discarding any metadata,
200 /// if present.
200 /// if present.
201 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
201 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
202 if let (Some(_metadata), data) = self.split()? {
202 if let (Some(_metadata), data) = self.split()? {
203 Ok(data.to_owned())
203 Ok(data.to_owned())
204 } else {
204 } else {
205 Ok(self.0)
205 Ok(self.0)
206 }
206 }
207 }
207 }
208 }
208 }
@@ -1,616 +1,615 b''
1 use std::ops::Deref;
1 use std::ops::Deref;
2
2
3 use byteorder::{BigEndian, ByteOrder};
3 use byteorder::{BigEndian, ByteOrder};
4
4
5 use crate::errors::HgError;
5 use crate::errors::HgError;
6 use crate::revlog::node::Node;
6 use crate::revlog::node::Node;
7 use crate::revlog::{Revision, NULL_REVISION};
7 use crate::revlog::{Revision, NULL_REVISION};
8
8
9 pub const INDEX_ENTRY_SIZE: usize = 64;
9 pub const INDEX_ENTRY_SIZE: usize = 64;
10
10
11 pub struct IndexHeader {
11 pub struct IndexHeader {
12 header_bytes: [u8; 4],
12 header_bytes: [u8; 4],
13 }
13 }
14
14
15 #[derive(Copy, Clone)]
15 #[derive(Copy, Clone)]
16 pub struct IndexHeaderFlags {
16 pub struct IndexHeaderFlags {
17 flags: u16,
17 flags: u16,
18 }
18 }
19
19
20 /// Corresponds to the high bits of `_format_flags` in python
20 /// Corresponds to the high bits of `_format_flags` in python
21 impl IndexHeaderFlags {
21 impl IndexHeaderFlags {
22 /// Corresponds to FLAG_INLINE_DATA in python
22 /// Corresponds to FLAG_INLINE_DATA in python
23 pub fn is_inline(self) -> bool {
23 pub fn is_inline(self) -> bool {
24 return self.flags & 1 != 0;
24 self.flags & 1 != 0
25 }
25 }
26 /// Corresponds to FLAG_GENERALDELTA in python
26 /// Corresponds to FLAG_GENERALDELTA in python
27 pub fn uses_generaldelta(self) -> bool {
27 pub fn uses_generaldelta(self) -> bool {
28 return self.flags & 2 != 0;
28 self.flags & 2 != 0
29 }
29 }
30 }
30 }
31
31
32 /// Corresponds to the INDEX_HEADER structure,
32 /// Corresponds to the INDEX_HEADER structure,
33 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
33 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
34 impl IndexHeader {
34 impl IndexHeader {
35 fn format_flags(&self) -> IndexHeaderFlags {
35 fn format_flags(&self) -> IndexHeaderFlags {
36 // No "unknown flags" check here, unlike in python. Maybe there should
36 // No "unknown flags" check here, unlike in python. Maybe there should
37 // be.
37 // be.
38 return IndexHeaderFlags {
38 IndexHeaderFlags {
39 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
39 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
40 };
40 }
41 }
41 }
42
42
43 /// The only revlog version currently supported by rhg.
43 /// The only revlog version currently supported by rhg.
44 const REVLOGV1: u16 = 1;
44 const REVLOGV1: u16 = 1;
45
45
46 /// Corresponds to `_format_version` in Python.
46 /// Corresponds to `_format_version` in Python.
47 fn format_version(&self) -> u16 {
47 fn format_version(&self) -> u16 {
48 return BigEndian::read_u16(&self.header_bytes[2..4]);
48 BigEndian::read_u16(&self.header_bytes[2..4])
49 }
49 }
50
50
51 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
51 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
52 // We treat an empty file as a valid index with no entries.
52 // We treat an empty file as a valid index with no entries.
53 // Here we make an arbitrary choice of what we assume the format of the
53 // Here we make an arbitrary choice of what we assume the format of the
54 // index to be (V1, using generaldelta).
54 // index to be (V1, using generaldelta).
55 // This doesn't matter too much, since we're only doing read-only
55 // This doesn't matter too much, since we're only doing read-only
56 // access. but the value corresponds to the `new_header` variable in
56 // access. but the value corresponds to the `new_header` variable in
57 // `revlog.py`, `_loadindex`
57 // `revlog.py`, `_loadindex`
58 header_bytes: [0, 3, 0, 1],
58 header_bytes: [0, 3, 0, 1],
59 };
59 };
60
60
61 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
61 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
62 if index_bytes.len() == 0 {
62 if index_bytes.is_empty() {
63 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
63 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
64 }
64 }
65 if index_bytes.len() < 4 {
65 if index_bytes.len() < 4 {
66 return Err(HgError::corrupted(
66 return Err(HgError::corrupted(
67 "corrupted revlog: can't read the index format header",
67 "corrupted revlog: can't read the index format header",
68 ));
68 ));
69 }
69 }
70 return Ok(IndexHeader {
70 Ok(IndexHeader {
71 header_bytes: {
71 header_bytes: {
72 let bytes: [u8; 4] =
72 let bytes: [u8; 4] =
73 index_bytes[0..4].try_into().expect("impossible");
73 index_bytes[0..4].try_into().expect("impossible");
74 bytes
74 bytes
75 },
75 },
76 });
76 })
77 }
77 }
78 }
78 }
79
79
80 /// A Revlog index
80 /// A Revlog index
81 pub struct Index {
81 pub struct Index {
82 bytes: Box<dyn Deref<Target = [u8]> + Send>,
82 bytes: Box<dyn Deref<Target = [u8]> + Send>,
83 /// Offsets of starts of index blocks.
83 /// Offsets of starts of index blocks.
84 /// Only needed when the index is interleaved with data.
84 /// Only needed when the index is interleaved with data.
85 offsets: Option<Vec<usize>>,
85 offsets: Option<Vec<usize>>,
86 uses_generaldelta: bool,
86 uses_generaldelta: bool,
87 }
87 }
88
88
89 impl Index {
89 impl Index {
90 /// Create an index from bytes.
90 /// Create an index from bytes.
91 /// Calculate the start of each entry when is_inline is true.
91 /// Calculate the start of each entry when is_inline is true.
92 pub fn new(
92 pub fn new(
93 bytes: Box<dyn Deref<Target = [u8]> + Send>,
93 bytes: Box<dyn Deref<Target = [u8]> + Send>,
94 ) -> Result<Self, HgError> {
94 ) -> Result<Self, HgError> {
95 let header = IndexHeader::parse(bytes.as_ref())?;
95 let header = IndexHeader::parse(bytes.as_ref())?;
96
96
97 if header.format_version() != IndexHeader::REVLOGV1 {
97 if header.format_version() != IndexHeader::REVLOGV1 {
98 // A proper new version should have had a repo/store
98 // A proper new version should have had a repo/store
99 // requirement.
99 // requirement.
100 return Err(HgError::corrupted("unsupported revlog version"));
100 return Err(HgError::corrupted("unsupported revlog version"));
101 }
101 }
102
102
103 // This is only correct because we know version is REVLOGV1.
103 // This is only correct because we know version is REVLOGV1.
104 // In v2 we always use generaldelta, while in v0 we never use
104 // In v2 we always use generaldelta, while in v0 we never use
105 // generaldelta. Similar for [is_inline] (it's only used in v1).
105 // generaldelta. Similar for [is_inline] (it's only used in v1).
106 let uses_generaldelta = header.format_flags().uses_generaldelta();
106 let uses_generaldelta = header.format_flags().uses_generaldelta();
107
107
108 if header.format_flags().is_inline() {
108 if header.format_flags().is_inline() {
109 let mut offset: usize = 0;
109 let mut offset: usize = 0;
110 let mut offsets = Vec::new();
110 let mut offsets = Vec::new();
111
111
112 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
112 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
113 offsets.push(offset);
113 offsets.push(offset);
114 let end = offset + INDEX_ENTRY_SIZE;
114 let end = offset + INDEX_ENTRY_SIZE;
115 let entry = IndexEntry {
115 let entry = IndexEntry {
116 bytes: &bytes[offset..end],
116 bytes: &bytes[offset..end],
117 offset_override: None,
117 offset_override: None,
118 };
118 };
119
119
120 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
120 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
121 }
121 }
122
122
123 if offset == bytes.len() {
123 if offset == bytes.len() {
124 Ok(Self {
124 Ok(Self {
125 bytes,
125 bytes,
126 offsets: Some(offsets),
126 offsets: Some(offsets),
127 uses_generaldelta,
127 uses_generaldelta,
128 })
128 })
129 } else {
129 } else {
130 Err(HgError::corrupted("unexpected inline revlog length")
130 Err(HgError::corrupted("unexpected inline revlog length"))
131 .into())
132 }
131 }
133 } else {
132 } else {
134 Ok(Self {
133 Ok(Self {
135 bytes,
134 bytes,
136 offsets: None,
135 offsets: None,
137 uses_generaldelta,
136 uses_generaldelta,
138 })
137 })
139 }
138 }
140 }
139 }
141
140
142 pub fn uses_generaldelta(&self) -> bool {
141 pub fn uses_generaldelta(&self) -> bool {
143 self.uses_generaldelta
142 self.uses_generaldelta
144 }
143 }
145
144
146 /// Value of the inline flag.
145 /// Value of the inline flag.
147 pub fn is_inline(&self) -> bool {
146 pub fn is_inline(&self) -> bool {
148 self.offsets.is_some()
147 self.offsets.is_some()
149 }
148 }
150
149
151 /// Return a slice of bytes if `revlog` is inline. Panic if not.
150 /// Return a slice of bytes if `revlog` is inline. Panic if not.
152 pub fn data(&self, start: usize, end: usize) -> &[u8] {
151 pub fn data(&self, start: usize, end: usize) -> &[u8] {
153 if !self.is_inline() {
152 if !self.is_inline() {
154 panic!("tried to access data in the index of a revlog that is not inline");
153 panic!("tried to access data in the index of a revlog that is not inline");
155 }
154 }
156 &self.bytes[start..end]
155 &self.bytes[start..end]
157 }
156 }
158
157
159 /// Return number of entries of the revlog index.
158 /// Return number of entries of the revlog index.
160 pub fn len(&self) -> usize {
159 pub fn len(&self) -> usize {
161 if let Some(offsets) = &self.offsets {
160 if let Some(offsets) = &self.offsets {
162 offsets.len()
161 offsets.len()
163 } else {
162 } else {
164 self.bytes.len() / INDEX_ENTRY_SIZE
163 self.bytes.len() / INDEX_ENTRY_SIZE
165 }
164 }
166 }
165 }
167
166
168 /// Returns `true` if the `Index` has zero `entries`.
167 /// Returns `true` if the `Index` has zero `entries`.
169 pub fn is_empty(&self) -> bool {
168 pub fn is_empty(&self) -> bool {
170 self.len() == 0
169 self.len() == 0
171 }
170 }
172
171
173 /// Return the index entry corresponding to the given revision if it
172 /// Return the index entry corresponding to the given revision if it
174 /// exists.
173 /// exists.
175 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
174 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
176 if rev == NULL_REVISION {
175 if rev == NULL_REVISION {
177 return None;
176 return None;
178 }
177 }
179 if let Some(offsets) = &self.offsets {
178 if let Some(offsets) = &self.offsets {
180 self.get_entry_inline(rev, offsets)
179 self.get_entry_inline(rev, offsets)
181 } else {
180 } else {
182 self.get_entry_separated(rev)
181 self.get_entry_separated(rev)
183 }
182 }
184 }
183 }
185
184
186 fn get_entry_inline(
185 fn get_entry_inline(
187 &self,
186 &self,
188 rev: Revision,
187 rev: Revision,
189 offsets: &[usize],
188 offsets: &[usize],
190 ) -> Option<IndexEntry> {
189 ) -> Option<IndexEntry> {
191 let start = *offsets.get(rev as usize)?;
190 let start = *offsets.get(rev as usize)?;
192 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
191 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
193 let bytes = &self.bytes[start..end];
192 let bytes = &self.bytes[start..end];
194
193
195 // See IndexEntry for an explanation of this override.
194 // See IndexEntry for an explanation of this override.
196 let offset_override = Some(end);
195 let offset_override = Some(end);
197
196
198 Some(IndexEntry {
197 Some(IndexEntry {
199 bytes,
198 bytes,
200 offset_override,
199 offset_override,
201 })
200 })
202 }
201 }
203
202
204 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
203 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
205 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
204 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
206 if rev as usize >= max_rev {
205 if rev as usize >= max_rev {
207 return None;
206 return None;
208 }
207 }
209 let start = rev as usize * INDEX_ENTRY_SIZE;
208 let start = rev as usize * INDEX_ENTRY_SIZE;
210 let end = start + INDEX_ENTRY_SIZE;
209 let end = start + INDEX_ENTRY_SIZE;
211 let bytes = &self.bytes[start..end];
210 let bytes = &self.bytes[start..end];
212
211
213 // Override the offset of the first revision as its bytes are used
212 // Override the offset of the first revision as its bytes are used
214 // for the index's metadata (saving space because it is always 0)
213 // for the index's metadata (saving space because it is always 0)
215 let offset_override = if rev == 0 { Some(0) } else { None };
214 let offset_override = if rev == 0 { Some(0) } else { None };
216
215
217 Some(IndexEntry {
216 Some(IndexEntry {
218 bytes,
217 bytes,
219 offset_override,
218 offset_override,
220 })
219 })
221 }
220 }
222 }
221 }
223
222
224 impl super::RevlogIndex for Index {
223 impl super::RevlogIndex for Index {
225 fn len(&self) -> usize {
224 fn len(&self) -> usize {
226 self.len()
225 self.len()
227 }
226 }
228
227
229 fn node(&self, rev: Revision) -> Option<&Node> {
228 fn node(&self, rev: Revision) -> Option<&Node> {
230 self.get_entry(rev).map(|entry| entry.hash())
229 self.get_entry(rev).map(|entry| entry.hash())
231 }
230 }
232 }
231 }
233
232
234 #[derive(Debug)]
233 #[derive(Debug)]
235 pub struct IndexEntry<'a> {
234 pub struct IndexEntry<'a> {
236 bytes: &'a [u8],
235 bytes: &'a [u8],
237 /// Allows to override the offset value of the entry.
236 /// Allows to override the offset value of the entry.
238 ///
237 ///
239 /// For interleaved index and data, the offset stored in the index
238 /// For interleaved index and data, the offset stored in the index
240 /// corresponds to the separated data offset.
239 /// corresponds to the separated data offset.
241 /// It has to be overridden with the actual offset in the interleaved
240 /// It has to be overridden with the actual offset in the interleaved
242 /// index which is just after the index block.
241 /// index which is just after the index block.
243 ///
242 ///
244 /// For separated index and data, the offset stored in the first index
243 /// For separated index and data, the offset stored in the first index
245 /// entry is mixed with the index headers.
244 /// entry is mixed with the index headers.
246 /// It has to be overridden with 0.
245 /// It has to be overridden with 0.
247 offset_override: Option<usize>,
246 offset_override: Option<usize>,
248 }
247 }
249
248
250 impl<'a> IndexEntry<'a> {
249 impl<'a> IndexEntry<'a> {
251 /// Return the offset of the data.
250 /// Return the offset of the data.
252 pub fn offset(&self) -> usize {
251 pub fn offset(&self) -> usize {
253 if let Some(offset_override) = self.offset_override {
252 if let Some(offset_override) = self.offset_override {
254 offset_override
253 offset_override
255 } else {
254 } else {
256 let mut bytes = [0; 8];
255 let mut bytes = [0; 8];
257 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
256 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
258 BigEndian::read_u64(&bytes[..]) as usize
257 BigEndian::read_u64(&bytes[..]) as usize
259 }
258 }
260 }
259 }
261
260
262 pub fn flags(&self) -> u16 {
261 pub fn flags(&self) -> u16 {
263 BigEndian::read_u16(&self.bytes[6..=7])
262 BigEndian::read_u16(&self.bytes[6..=7])
264 }
263 }
265
264
266 /// Return the compressed length of the data.
265 /// Return the compressed length of the data.
267 pub fn compressed_len(&self) -> u32 {
266 pub fn compressed_len(&self) -> u32 {
268 BigEndian::read_u32(&self.bytes[8..=11])
267 BigEndian::read_u32(&self.bytes[8..=11])
269 }
268 }
270
269
271 /// Return the uncompressed length of the data.
270 /// Return the uncompressed length of the data.
272 pub fn uncompressed_len(&self) -> i32 {
271 pub fn uncompressed_len(&self) -> i32 {
273 BigEndian::read_i32(&self.bytes[12..=15])
272 BigEndian::read_i32(&self.bytes[12..=15])
274 }
273 }
275
274
276 /// Return the revision upon which the data has been derived.
275 /// Return the revision upon which the data has been derived.
277 pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
276 pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
278 // TODO Maybe return an Option when base_revision == rev?
277 // TODO Maybe return an Option when base_revision == rev?
279 // Requires to add rev to IndexEntry
278 // Requires to add rev to IndexEntry
280
279
281 BigEndian::read_i32(&self.bytes[16..])
280 BigEndian::read_i32(&self.bytes[16..])
282 }
281 }
283
282
284 pub fn link_revision(&self) -> Revision {
283 pub fn link_revision(&self) -> Revision {
285 BigEndian::read_i32(&self.bytes[20..])
284 BigEndian::read_i32(&self.bytes[20..])
286 }
285 }
287
286
288 pub fn p1(&self) -> Revision {
287 pub fn p1(&self) -> Revision {
289 BigEndian::read_i32(&self.bytes[24..])
288 BigEndian::read_i32(&self.bytes[24..])
290 }
289 }
291
290
292 pub fn p2(&self) -> Revision {
291 pub fn p2(&self) -> Revision {
293 BigEndian::read_i32(&self.bytes[28..])
292 BigEndian::read_i32(&self.bytes[28..])
294 }
293 }
295
294
296 /// Return the hash of revision's full text.
295 /// Return the hash of revision's full text.
297 ///
296 ///
298 /// Currently, SHA-1 is used and only the first 20 bytes of this field
297 /// Currently, SHA-1 is used and only the first 20 bytes of this field
299 /// are used.
298 /// are used.
300 pub fn hash(&self) -> &'a Node {
299 pub fn hash(&self) -> &'a Node {
301 (&self.bytes[32..52]).try_into().unwrap()
300 (&self.bytes[32..52]).try_into().unwrap()
302 }
301 }
303 }
302 }
304
303
305 #[cfg(test)]
304 #[cfg(test)]
306 mod tests {
305 mod tests {
307 use super::*;
306 use super::*;
308 use crate::node::NULL_NODE;
307 use crate::node::NULL_NODE;
309
308
310 #[cfg(test)]
309 #[cfg(test)]
311 #[derive(Debug, Copy, Clone)]
310 #[derive(Debug, Copy, Clone)]
312 pub struct IndexEntryBuilder {
311 pub struct IndexEntryBuilder {
313 is_first: bool,
312 is_first: bool,
314 is_inline: bool,
313 is_inline: bool,
315 is_general_delta: bool,
314 is_general_delta: bool,
316 version: u16,
315 version: u16,
317 offset: usize,
316 offset: usize,
318 compressed_len: usize,
317 compressed_len: usize,
319 uncompressed_len: usize,
318 uncompressed_len: usize,
320 base_revision_or_base_of_delta_chain: Revision,
319 base_revision_or_base_of_delta_chain: Revision,
321 link_revision: Revision,
320 link_revision: Revision,
322 p1: Revision,
321 p1: Revision,
323 p2: Revision,
322 p2: Revision,
324 node: Node,
323 node: Node,
325 }
324 }
326
325
327 #[cfg(test)]
326 #[cfg(test)]
328 impl IndexEntryBuilder {
327 impl IndexEntryBuilder {
329 #[allow(clippy::new_without_default)]
328 #[allow(clippy::new_without_default)]
330 pub fn new() -> Self {
329 pub fn new() -> Self {
331 Self {
330 Self {
332 is_first: false,
331 is_first: false,
333 is_inline: false,
332 is_inline: false,
334 is_general_delta: true,
333 is_general_delta: true,
335 version: 1,
334 version: 1,
336 offset: 0,
335 offset: 0,
337 compressed_len: 0,
336 compressed_len: 0,
338 uncompressed_len: 0,
337 uncompressed_len: 0,
339 base_revision_or_base_of_delta_chain: 0,
338 base_revision_or_base_of_delta_chain: 0,
340 link_revision: 0,
339 link_revision: 0,
341 p1: NULL_REVISION,
340 p1: NULL_REVISION,
342 p2: NULL_REVISION,
341 p2: NULL_REVISION,
343 node: NULL_NODE,
342 node: NULL_NODE,
344 }
343 }
345 }
344 }
346
345
347 pub fn is_first(&mut self, value: bool) -> &mut Self {
346 pub fn is_first(&mut self, value: bool) -> &mut Self {
348 self.is_first = value;
347 self.is_first = value;
349 self
348 self
350 }
349 }
351
350
352 pub fn with_inline(&mut self, value: bool) -> &mut Self {
351 pub fn with_inline(&mut self, value: bool) -> &mut Self {
353 self.is_inline = value;
352 self.is_inline = value;
354 self
353 self
355 }
354 }
356
355
357 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
356 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
358 self.is_general_delta = value;
357 self.is_general_delta = value;
359 self
358 self
360 }
359 }
361
360
362 pub fn with_version(&mut self, value: u16) -> &mut Self {
361 pub fn with_version(&mut self, value: u16) -> &mut Self {
363 self.version = value;
362 self.version = value;
364 self
363 self
365 }
364 }
366
365
367 pub fn with_offset(&mut self, value: usize) -> &mut Self {
366 pub fn with_offset(&mut self, value: usize) -> &mut Self {
368 self.offset = value;
367 self.offset = value;
369 self
368 self
370 }
369 }
371
370
372 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
371 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
373 self.compressed_len = value;
372 self.compressed_len = value;
374 self
373 self
375 }
374 }
376
375
377 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
376 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
378 self.uncompressed_len = value;
377 self.uncompressed_len = value;
379 self
378 self
380 }
379 }
381
380
382 pub fn with_base_revision_or_base_of_delta_chain(
381 pub fn with_base_revision_or_base_of_delta_chain(
383 &mut self,
382 &mut self,
384 value: Revision,
383 value: Revision,
385 ) -> &mut Self {
384 ) -> &mut Self {
386 self.base_revision_or_base_of_delta_chain = value;
385 self.base_revision_or_base_of_delta_chain = value;
387 self
386 self
388 }
387 }
389
388
390 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
389 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
391 self.link_revision = value;
390 self.link_revision = value;
392 self
391 self
393 }
392 }
394
393
395 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
394 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
396 self.p1 = value;
395 self.p1 = value;
397 self
396 self
398 }
397 }
399
398
400 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
399 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
401 self.p2 = value;
400 self.p2 = value;
402 self
401 self
403 }
402 }
404
403
405 pub fn with_node(&mut self, value: Node) -> &mut Self {
404 pub fn with_node(&mut self, value: Node) -> &mut Self {
406 self.node = value;
405 self.node = value;
407 self
406 self
408 }
407 }
409
408
410 pub fn build(&self) -> Vec<u8> {
409 pub fn build(&self) -> Vec<u8> {
411 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
410 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
412 if self.is_first {
411 if self.is_first {
413 bytes.extend(&match (self.is_general_delta, self.is_inline) {
412 bytes.extend(&match (self.is_general_delta, self.is_inline) {
414 (false, false) => [0u8, 0],
413 (false, false) => [0u8, 0],
415 (false, true) => [0u8, 1],
414 (false, true) => [0u8, 1],
416 (true, false) => [0u8, 2],
415 (true, false) => [0u8, 2],
417 (true, true) => [0u8, 3],
416 (true, true) => [0u8, 3],
418 });
417 });
419 bytes.extend(&self.version.to_be_bytes());
418 bytes.extend(&self.version.to_be_bytes());
420 // Remaining offset bytes.
419 // Remaining offset bytes.
421 bytes.extend(&[0u8; 2]);
420 bytes.extend(&[0u8; 2]);
422 } else {
421 } else {
423 // Offset stored on 48 bits (6 bytes)
422 // Offset stored on 48 bits (6 bytes)
424 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
423 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
425 }
424 }
426 bytes.extend(&[0u8; 2]); // Revision flags.
425 bytes.extend(&[0u8; 2]); // Revision flags.
427 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
426 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
428 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
427 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
429 bytes.extend(
428 bytes.extend(
430 &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
429 &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
431 );
430 );
432 bytes.extend(&self.link_revision.to_be_bytes());
431 bytes.extend(&self.link_revision.to_be_bytes());
433 bytes.extend(&self.p1.to_be_bytes());
432 bytes.extend(&self.p1.to_be_bytes());
434 bytes.extend(&self.p2.to_be_bytes());
433 bytes.extend(&self.p2.to_be_bytes());
435 bytes.extend(self.node.as_bytes());
434 bytes.extend(self.node.as_bytes());
436 bytes.extend(vec![0u8; 12]);
435 bytes.extend(vec![0u8; 12]);
437 bytes
436 bytes
438 }
437 }
439 }
438 }
440
439
441 pub fn is_inline(index_bytes: &[u8]) -> bool {
440 pub fn is_inline(index_bytes: &[u8]) -> bool {
442 IndexHeader::parse(index_bytes)
441 IndexHeader::parse(index_bytes)
443 .expect("too short")
442 .expect("too short")
444 .format_flags()
443 .format_flags()
445 .is_inline()
444 .is_inline()
446 }
445 }
447
446
448 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
447 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
449 IndexHeader::parse(index_bytes)
448 IndexHeader::parse(index_bytes)
450 .expect("too short")
449 .expect("too short")
451 .format_flags()
450 .format_flags()
452 .uses_generaldelta()
451 .uses_generaldelta()
453 }
452 }
454
453
455 pub fn get_version(index_bytes: &[u8]) -> u16 {
454 pub fn get_version(index_bytes: &[u8]) -> u16 {
456 IndexHeader::parse(index_bytes)
455 IndexHeader::parse(index_bytes)
457 .expect("too short")
456 .expect("too short")
458 .format_version()
457 .format_version()
459 }
458 }
460
459
461 #[test]
460 #[test]
462 fn flags_when_no_inline_flag_test() {
461 fn flags_when_no_inline_flag_test() {
463 let bytes = IndexEntryBuilder::new()
462 let bytes = IndexEntryBuilder::new()
464 .is_first(true)
463 .is_first(true)
465 .with_general_delta(false)
464 .with_general_delta(false)
466 .with_inline(false)
465 .with_inline(false)
467 .build();
466 .build();
468
467
469 assert_eq!(is_inline(&bytes), false);
468 assert!(!is_inline(&bytes));
470 assert_eq!(uses_generaldelta(&bytes), false);
469 assert!(!uses_generaldelta(&bytes));
471 }
470 }
472
471
473 #[test]
472 #[test]
474 fn flags_when_inline_flag_test() {
473 fn flags_when_inline_flag_test() {
475 let bytes = IndexEntryBuilder::new()
474 let bytes = IndexEntryBuilder::new()
476 .is_first(true)
475 .is_first(true)
477 .with_general_delta(false)
476 .with_general_delta(false)
478 .with_inline(true)
477 .with_inline(true)
479 .build();
478 .build();
480
479
481 assert_eq!(is_inline(&bytes), true);
480 assert!(is_inline(&bytes));
482 assert_eq!(uses_generaldelta(&bytes), false);
481 assert!(!uses_generaldelta(&bytes));
483 }
482 }
484
483
485 #[test]
484 #[test]
486 fn flags_when_inline_and_generaldelta_flags_test() {
485 fn flags_when_inline_and_generaldelta_flags_test() {
487 let bytes = IndexEntryBuilder::new()
486 let bytes = IndexEntryBuilder::new()
488 .is_first(true)
487 .is_first(true)
489 .with_general_delta(true)
488 .with_general_delta(true)
490 .with_inline(true)
489 .with_inline(true)
491 .build();
490 .build();
492
491
493 assert_eq!(is_inline(&bytes), true);
492 assert!(is_inline(&bytes));
494 assert_eq!(uses_generaldelta(&bytes), true);
493 assert!(uses_generaldelta(&bytes));
495 }
494 }
496
495
497 #[test]
496 #[test]
498 fn test_offset() {
497 fn test_offset() {
499 let bytes = IndexEntryBuilder::new().with_offset(1).build();
498 let bytes = IndexEntryBuilder::new().with_offset(1).build();
500 let entry = IndexEntry {
499 let entry = IndexEntry {
501 bytes: &bytes,
500 bytes: &bytes,
502 offset_override: None,
501 offset_override: None,
503 };
502 };
504
503
505 assert_eq!(entry.offset(), 1)
504 assert_eq!(entry.offset(), 1)
506 }
505 }
507
506
508 #[test]
507 #[test]
509 fn test_with_overridden_offset() {
508 fn test_with_overridden_offset() {
510 let bytes = IndexEntryBuilder::new().with_offset(1).build();
509 let bytes = IndexEntryBuilder::new().with_offset(1).build();
511 let entry = IndexEntry {
510 let entry = IndexEntry {
512 bytes: &bytes,
511 bytes: &bytes,
513 offset_override: Some(2),
512 offset_override: Some(2),
514 };
513 };
515
514
516 assert_eq!(entry.offset(), 2)
515 assert_eq!(entry.offset(), 2)
517 }
516 }
518
517
519 #[test]
518 #[test]
520 fn test_compressed_len() {
519 fn test_compressed_len() {
521 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
520 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
522 let entry = IndexEntry {
521 let entry = IndexEntry {
523 bytes: &bytes,
522 bytes: &bytes,
524 offset_override: None,
523 offset_override: None,
525 };
524 };
526
525
527 assert_eq!(entry.compressed_len(), 1)
526 assert_eq!(entry.compressed_len(), 1)
528 }
527 }
529
528
530 #[test]
529 #[test]
531 fn test_uncompressed_len() {
530 fn test_uncompressed_len() {
532 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
531 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
533 let entry = IndexEntry {
532 let entry = IndexEntry {
534 bytes: &bytes,
533 bytes: &bytes,
535 offset_override: None,
534 offset_override: None,
536 };
535 };
537
536
538 assert_eq!(entry.uncompressed_len(), 1)
537 assert_eq!(entry.uncompressed_len(), 1)
539 }
538 }
540
539
541 #[test]
540 #[test]
542 fn test_base_revision_or_base_of_delta_chain() {
541 fn test_base_revision_or_base_of_delta_chain() {
543 let bytes = IndexEntryBuilder::new()
542 let bytes = IndexEntryBuilder::new()
544 .with_base_revision_or_base_of_delta_chain(1)
543 .with_base_revision_or_base_of_delta_chain(1)
545 .build();
544 .build();
546 let entry = IndexEntry {
545 let entry = IndexEntry {
547 bytes: &bytes,
546 bytes: &bytes,
548 offset_override: None,
547 offset_override: None,
549 };
548 };
550
549
551 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
550 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
552 }
551 }
553
552
554 #[test]
553 #[test]
555 fn link_revision_test() {
554 fn link_revision_test() {
556 let bytes = IndexEntryBuilder::new().with_link_revision(123).build();
555 let bytes = IndexEntryBuilder::new().with_link_revision(123).build();
557
556
558 let entry = IndexEntry {
557 let entry = IndexEntry {
559 bytes: &bytes,
558 bytes: &bytes,
560 offset_override: None,
559 offset_override: None,
561 };
560 };
562
561
563 assert_eq!(entry.link_revision(), 123);
562 assert_eq!(entry.link_revision(), 123);
564 }
563 }
565
564
566 #[test]
565 #[test]
567 fn p1_test() {
566 fn p1_test() {
568 let bytes = IndexEntryBuilder::new().with_p1(123).build();
567 let bytes = IndexEntryBuilder::new().with_p1(123).build();
569
568
570 let entry = IndexEntry {
569 let entry = IndexEntry {
571 bytes: &bytes,
570 bytes: &bytes,
572 offset_override: None,
571 offset_override: None,
573 };
572 };
574
573
575 assert_eq!(entry.p1(), 123);
574 assert_eq!(entry.p1(), 123);
576 }
575 }
577
576
578 #[test]
577 #[test]
579 fn p2_test() {
578 fn p2_test() {
580 let bytes = IndexEntryBuilder::new().with_p2(123).build();
579 let bytes = IndexEntryBuilder::new().with_p2(123).build();
581
580
582 let entry = IndexEntry {
581 let entry = IndexEntry {
583 bytes: &bytes,
582 bytes: &bytes,
584 offset_override: None,
583 offset_override: None,
585 };
584 };
586
585
587 assert_eq!(entry.p2(), 123);
586 assert_eq!(entry.p2(), 123);
588 }
587 }
589
588
590 #[test]
589 #[test]
591 fn node_test() {
590 fn node_test() {
592 let node = Node::from_hex("0123456789012345678901234567890123456789")
591 let node = Node::from_hex("0123456789012345678901234567890123456789")
593 .unwrap();
592 .unwrap();
594 let bytes = IndexEntryBuilder::new().with_node(node).build();
593 let bytes = IndexEntryBuilder::new().with_node(node).build();
595
594
596 let entry = IndexEntry {
595 let entry = IndexEntry {
597 bytes: &bytes,
596 bytes: &bytes,
598 offset_override: None,
597 offset_override: None,
599 };
598 };
600
599
601 assert_eq!(*entry.hash(), node);
600 assert_eq!(*entry.hash(), node);
602 }
601 }
603
602
604 #[test]
603 #[test]
605 fn version_test() {
604 fn version_test() {
606 let bytes = IndexEntryBuilder::new()
605 let bytes = IndexEntryBuilder::new()
607 .is_first(true)
606 .is_first(true)
608 .with_version(2)
607 .with_version(2)
609 .build();
608 .build();
610
609
611 assert_eq!(get_version(&bytes), 2)
610 assert_eq!(get_version(&bytes), 2)
612 }
611 }
613 }
612 }
614
613
615 #[cfg(test)]
614 #[cfg(test)]
616 pub use tests::IndexEntryBuilder;
615 pub use tests::IndexEntryBuilder;
@@ -1,1069 +1,1069 b''
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 //!
7 //!
8 //! This provides a variation on the 16-ary radix tree that is
8 //! This provides a variation on the 16-ary radix tree that is
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 //! on disk.
10 //! on disk.
11 //!
11 //!
12 //! Following existing implicit conventions, the "nodemap" terminology
12 //! Following existing implicit conventions, the "nodemap" terminology
13 //! is used in a more abstract context.
13 //! is used in a more abstract context.
14
14
15 use super::{
15 use super::{
16 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
16 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
17 };
17 };
18
18
19 use bytes_cast::{unaligned, BytesCast};
19 use bytes_cast::{unaligned, BytesCast};
20 use std::cmp::max;
20 use std::cmp::max;
21 use std::fmt;
21 use std::fmt;
22 use std::mem::{self, align_of, size_of};
22 use std::mem::{self, align_of, size_of};
23 use std::ops::Deref;
23 use std::ops::Deref;
24 use std::ops::Index;
24 use std::ops::Index;
25
25
26 #[derive(Debug, PartialEq)]
26 #[derive(Debug, PartialEq)]
27 pub enum NodeMapError {
27 pub enum NodeMapError {
28 MultipleResults,
28 MultipleResults,
29 /// A `Revision` stored in the nodemap could not be found in the index
29 /// A `Revision` stored in the nodemap could not be found in the index
30 RevisionNotInIndex(Revision),
30 RevisionNotInIndex(Revision),
31 }
31 }
32
32
33 /// Mapping system from Mercurial nodes to revision numbers.
33 /// Mapping system from Mercurial nodes to revision numbers.
34 ///
34 ///
35 /// ## `RevlogIndex` and `NodeMap`
35 /// ## `RevlogIndex` and `NodeMap`
36 ///
36 ///
37 /// One way to think about their relationship is that
37 /// One way to think about their relationship is that
38 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
38 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
39 /// carried by a [`RevlogIndex`].
39 /// carried by a [`RevlogIndex`].
40 ///
40 ///
41 /// Many of the methods in this trait take a `RevlogIndex` argument
41 /// Many of the methods in this trait take a `RevlogIndex` argument
42 /// which is used for validation of their results. This index must naturally
42 /// which is used for validation of their results. This index must naturally
43 /// be the one the `NodeMap` is about, and it must be consistent.
43 /// be the one the `NodeMap` is about, and it must be consistent.
44 ///
44 ///
45 /// Notably, the `NodeMap` must not store
45 /// Notably, the `NodeMap` must not store
46 /// information about more `Revision` values than there are in the index.
46 /// information about more `Revision` values than there are in the index.
47 /// In these methods, an encountered `Revision` is not in the index, a
47 /// In these methods, an encountered `Revision` is not in the index, a
48 /// [`RevisionNotInIndex`] error is returned.
48 /// [`RevisionNotInIndex`] error is returned.
49 ///
49 ///
50 /// In insert operations, the rule is thus that the `NodeMap` must always
50 /// In insert operations, the rule is thus that the `NodeMap` must always
51 /// be updated after the `RevlogIndex`
51 /// be updated after the `RevlogIndex`
52 /// be updated first, and the `NodeMap` second.
52 /// be updated first, and the `NodeMap` second.
53 ///
53 ///
54 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
54 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
55 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
55 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
56 pub trait NodeMap {
56 pub trait NodeMap {
57 /// Find the unique `Revision` having the given `Node`
57 /// Find the unique `Revision` having the given `Node`
58 ///
58 ///
59 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
59 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
60 fn find_node(
60 fn find_node(
61 &self,
61 &self,
62 index: &impl RevlogIndex,
62 index: &impl RevlogIndex,
63 node: &Node,
63 node: &Node,
64 ) -> Result<Option<Revision>, NodeMapError> {
64 ) -> Result<Option<Revision>, NodeMapError> {
65 self.find_bin(index, node.into())
65 self.find_bin(index, node.into())
66 }
66 }
67
67
68 /// Find the unique Revision whose `Node` starts with a given binary prefix
68 /// Find the unique Revision whose `Node` starts with a given binary prefix
69 ///
69 ///
70 /// If no Revision matches the given prefix, `Ok(None)` is returned.
70 /// If no Revision matches the given prefix, `Ok(None)` is returned.
71 ///
71 ///
72 /// If several Revisions match the given prefix, a [`MultipleResults`]
72 /// If several Revisions match the given prefix, a [`MultipleResults`]
73 /// error is returned.
73 /// error is returned.
74 fn find_bin<'a>(
74 fn find_bin(
75 &self,
75 &self,
76 idx: &impl RevlogIndex,
76 idx: &impl RevlogIndex,
77 prefix: NodePrefix,
77 prefix: NodePrefix,
78 ) -> Result<Option<Revision>, NodeMapError>;
78 ) -> Result<Option<Revision>, NodeMapError>;
79
79
80 /// Give the size of the shortest node prefix that determines
80 /// Give the size of the shortest node prefix that determines
81 /// the revision uniquely.
81 /// the revision uniquely.
82 ///
82 ///
83 /// From a binary node prefix, if it is matched in the node map, this
83 /// From a binary node prefix, if it is matched in the node map, this
84 /// returns the number of hexadecimal digits that would had sufficed
84 /// returns the number of hexadecimal digits that would had sufficed
85 /// to find the revision uniquely.
85 /// to find the revision uniquely.
86 ///
86 ///
87 /// Returns `None` if no `Revision` could be found for the prefix.
87 /// Returns `None` if no `Revision` could be found for the prefix.
88 ///
88 ///
89 /// If several Revisions match the given prefix, a [`MultipleResults`]
89 /// If several Revisions match the given prefix, a [`MultipleResults`]
90 /// error is returned.
90 /// error is returned.
91 fn unique_prefix_len_bin<'a>(
91 fn unique_prefix_len_bin(
92 &self,
92 &self,
93 idx: &impl RevlogIndex,
93 idx: &impl RevlogIndex,
94 node_prefix: NodePrefix,
94 node_prefix: NodePrefix,
95 ) -> Result<Option<usize>, NodeMapError>;
95 ) -> Result<Option<usize>, NodeMapError>;
96
96
97 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
97 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
98 fn unique_prefix_len_node(
98 fn unique_prefix_len_node(
99 &self,
99 &self,
100 idx: &impl RevlogIndex,
100 idx: &impl RevlogIndex,
101 node: &Node,
101 node: &Node,
102 ) -> Result<Option<usize>, NodeMapError> {
102 ) -> Result<Option<usize>, NodeMapError> {
103 self.unique_prefix_len_bin(idx, node.into())
103 self.unique_prefix_len_bin(idx, node.into())
104 }
104 }
105 }
105 }
106
106
107 pub trait MutableNodeMap: NodeMap {
107 pub trait MutableNodeMap: NodeMap {
108 fn insert<I: RevlogIndex>(
108 fn insert<I: RevlogIndex>(
109 &mut self,
109 &mut self,
110 index: &I,
110 index: &I,
111 node: &Node,
111 node: &Node,
112 rev: Revision,
112 rev: Revision,
113 ) -> Result<(), NodeMapError>;
113 ) -> Result<(), NodeMapError>;
114 }
114 }
115
115
116 /// Low level NodeTree [`Blocks`] elements
116 /// Low level NodeTree [`Blocks`] elements
117 ///
117 ///
118 /// These are exactly as for instance on persistent storage.
118 /// These are exactly as for instance on persistent storage.
119 type RawElement = unaligned::I32Be;
119 type RawElement = unaligned::I32Be;
120
120
121 /// High level representation of values in NodeTree
121 /// High level representation of values in NodeTree
122 /// [`Blocks`](struct.Block.html)
122 /// [`Blocks`](struct.Block.html)
123 ///
123 ///
124 /// This is the high level representation that most algorithms should
124 /// This is the high level representation that most algorithms should
125 /// use.
125 /// use.
126 #[derive(Clone, Debug, Eq, PartialEq)]
126 #[derive(Clone, Debug, Eq, PartialEq)]
127 enum Element {
127 enum Element {
128 Rev(Revision),
128 Rev(Revision),
129 Block(usize),
129 Block(usize),
130 None,
130 None,
131 }
131 }
132
132
133 impl From<RawElement> for Element {
133 impl From<RawElement> for Element {
134 /// Conversion from low level representation, after endianness conversion.
134 /// Conversion from low level representation, after endianness conversion.
135 ///
135 ///
136 /// See [`Block`](struct.Block.html) for explanation about the encoding.
136 /// See [`Block`](struct.Block.html) for explanation about the encoding.
137 fn from(raw: RawElement) -> Element {
137 fn from(raw: RawElement) -> Element {
138 let int = raw.get();
138 let int = raw.get();
139 if int >= 0 {
139 if int >= 0 {
140 Element::Block(int as usize)
140 Element::Block(int as usize)
141 } else if int == -1 {
141 } else if int == -1 {
142 Element::None
142 Element::None
143 } else {
143 } else {
144 Element::Rev(-int - 2)
144 Element::Rev(-int - 2)
145 }
145 }
146 }
146 }
147 }
147 }
148
148
149 impl From<Element> for RawElement {
149 impl From<Element> for RawElement {
150 fn from(element: Element) -> RawElement {
150 fn from(element: Element) -> RawElement {
151 RawElement::from(match element {
151 RawElement::from(match element {
152 Element::None => 0,
152 Element::None => 0,
153 Element::Block(i) => i as i32,
153 Element::Block(i) => i as i32,
154 Element::Rev(rev) => -rev - 2,
154 Element::Rev(rev) => -rev - 2,
155 })
155 })
156 }
156 }
157 }
157 }
158
158
159 /// A logical block of the `NodeTree`, packed with a fixed size.
159 /// A logical block of the `NodeTree`, packed with a fixed size.
160 ///
160 ///
161 /// These are always used in container types implementing `Index<Block>`,
161 /// These are always used in container types implementing `Index<Block>`,
162 /// such as `&Block`
162 /// such as `&Block`
163 ///
163 ///
164 /// As an array of integers, its ith element encodes that the
164 /// As an array of integers, its ith element encodes that the
165 /// ith potential edge from the block, representing the ith hexadecimal digit
165 /// ith potential edge from the block, representing the ith hexadecimal digit
166 /// (nybble) `i` is either:
166 /// (nybble) `i` is either:
167 ///
167 ///
168 /// - absent (value -1)
168 /// - absent (value -1)
169 /// - another `Block` in the same indexable container (value ≥ 0)
169 /// - another `Block` in the same indexable container (value ≥ 0)
170 /// - a `Revision` leaf (value ≤ -2)
170 /// - a `Revision` leaf (value ≤ -2)
171 ///
171 ///
172 /// Endianness has to be fixed for consistency on shared storage across
172 /// Endianness has to be fixed for consistency on shared storage across
173 /// different architectures.
173 /// different architectures.
174 ///
174 ///
175 /// A key difference with the C `nodetree` is that we need to be
175 /// A key difference with the C `nodetree` is that we need to be
176 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
176 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
177 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
177 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
178 ///
178 ///
179 /// Another related difference is that `NULL_REVISION` (-1) is not
179 /// Another related difference is that `NULL_REVISION` (-1) is not
180 /// represented at all, because we want an immutable empty nodetree
180 /// represented at all, because we want an immutable empty nodetree
181 /// to be valid.
181 /// to be valid.
182
182
183 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
183 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
184
184
185 #[derive(Copy, Clone, BytesCast, PartialEq)]
185 #[derive(Copy, Clone, BytesCast, PartialEq)]
186 #[repr(transparent)]
186 #[repr(transparent)]
187 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
187 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
188
188
189 impl Block {
189 impl Block {
190 fn new() -> Self {
190 fn new() -> Self {
191 let absent_node = RawElement::from(-1);
191 let absent_node = RawElement::from(-1);
192 Block([absent_node; ELEMENTS_PER_BLOCK])
192 Block([absent_node; ELEMENTS_PER_BLOCK])
193 }
193 }
194
194
195 fn get(&self, nybble: u8) -> Element {
195 fn get(&self, nybble: u8) -> Element {
196 self.0[nybble as usize].into()
196 self.0[nybble as usize].into()
197 }
197 }
198
198
199 fn set(&mut self, nybble: u8, element: Element) {
199 fn set(&mut self, nybble: u8, element: Element) {
200 self.0[nybble as usize] = element.into()
200 self.0[nybble as usize] = element.into()
201 }
201 }
202 }
202 }
203
203
204 impl fmt::Debug for Block {
204 impl fmt::Debug for Block {
205 /// sparse representation for testing and debugging purposes
205 /// sparse representation for testing and debugging purposes
206 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
206 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207 f.debug_map()
207 f.debug_map()
208 .entries((0..16).filter_map(|i| match self.get(i) {
208 .entries((0..16).filter_map(|i| match self.get(i) {
209 Element::None => None,
209 Element::None => None,
210 element => Some((i, element)),
210 element => Some((i, element)),
211 }))
211 }))
212 .finish()
212 .finish()
213 }
213 }
214 }
214 }
215
215
216 /// A mutable 16-radix tree with the root block logically at the end
216 /// A mutable 16-radix tree with the root block logically at the end
217 ///
217 ///
218 /// Because of the append only nature of our node trees, we need to
218 /// Because of the append only nature of our node trees, we need to
219 /// keep the original untouched and store new blocks separately.
219 /// keep the original untouched and store new blocks separately.
220 ///
220 ///
221 /// The mutable root `Block` is kept apart so that we don't have to rebump
221 /// The mutable root `Block` is kept apart so that we don't have to rebump
222 /// it on each insertion.
222 /// it on each insertion.
223 pub struct NodeTree {
223 pub struct NodeTree {
224 readonly: Box<dyn Deref<Target = [Block]> + Send>,
224 readonly: Box<dyn Deref<Target = [Block]> + Send>,
225 growable: Vec<Block>,
225 growable: Vec<Block>,
226 root: Block,
226 root: Block,
227 masked_inner_blocks: usize,
227 masked_inner_blocks: usize,
228 }
228 }
229
229
230 impl Index<usize> for NodeTree {
230 impl Index<usize> for NodeTree {
231 type Output = Block;
231 type Output = Block;
232
232
233 fn index(&self, i: usize) -> &Block {
233 fn index(&self, i: usize) -> &Block {
234 let ro_len = self.readonly.len();
234 let ro_len = self.readonly.len();
235 if i < ro_len {
235 if i < ro_len {
236 &self.readonly[i]
236 &self.readonly[i]
237 } else if i == ro_len + self.growable.len() {
237 } else if i == ro_len + self.growable.len() {
238 &self.root
238 &self.root
239 } else {
239 } else {
240 &self.growable[i - ro_len]
240 &self.growable[i - ro_len]
241 }
241 }
242 }
242 }
243 }
243 }
244
244
245 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
245 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
246 fn has_prefix_or_none(
246 fn has_prefix_or_none(
247 idx: &impl RevlogIndex,
247 idx: &impl RevlogIndex,
248 prefix: NodePrefix,
248 prefix: NodePrefix,
249 rev: Revision,
249 rev: Revision,
250 ) -> Result<Option<Revision>, NodeMapError> {
250 ) -> Result<Option<Revision>, NodeMapError> {
251 idx.node(rev)
251 idx.node(rev)
252 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
252 .ok_or(NodeMapError::RevisionNotInIndex(rev))
253 .map(|node| {
253 .map(|node| {
254 if prefix.is_prefix_of(node) {
254 if prefix.is_prefix_of(node) {
255 Some(rev)
255 Some(rev)
256 } else {
256 } else {
257 None
257 None
258 }
258 }
259 })
259 })
260 }
260 }
261
261
262 /// validate that the candidate's node starts indeed with given prefix,
262 /// validate that the candidate's node starts indeed with given prefix,
263 /// and treat ambiguities related to `NULL_REVISION`.
263 /// and treat ambiguities related to `NULL_REVISION`.
264 ///
264 ///
265 /// From the data in the NodeTree, one can only conclude that some
265 /// From the data in the NodeTree, one can only conclude that some
266 /// revision is the only one for a *subprefix* of the one being looked up.
266 /// revision is the only one for a *subprefix* of the one being looked up.
267 fn validate_candidate(
267 fn validate_candidate(
268 idx: &impl RevlogIndex,
268 idx: &impl RevlogIndex,
269 prefix: NodePrefix,
269 prefix: NodePrefix,
270 candidate: (Option<Revision>, usize),
270 candidate: (Option<Revision>, usize),
271 ) -> Result<(Option<Revision>, usize), NodeMapError> {
271 ) -> Result<(Option<Revision>, usize), NodeMapError> {
272 let (rev, steps) = candidate;
272 let (rev, steps) = candidate;
273 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
273 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
274 rev.map_or(Ok((None, steps)), |r| {
274 rev.map_or(Ok((None, steps)), |r| {
275 has_prefix_or_none(idx, prefix, r)
275 has_prefix_or_none(idx, prefix, r)
276 .map(|opt| (opt, max(steps, nz_nybble + 1)))
276 .map(|opt| (opt, max(steps, nz_nybble + 1)))
277 })
277 })
278 } else {
278 } else {
279 // the prefix is only made of zeros; NULL_REVISION always matches it
279 // the prefix is only made of zeros; NULL_REVISION always matches it
280 // and any other *valid* result is an ambiguity
280 // and any other *valid* result is an ambiguity
281 match rev {
281 match rev {
282 None => Ok((Some(NULL_REVISION), steps + 1)),
282 None => Ok((Some(NULL_REVISION), steps + 1)),
283 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
283 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
284 None => Ok((Some(NULL_REVISION), steps + 1)),
284 None => Ok((Some(NULL_REVISION), steps + 1)),
285 _ => Err(NodeMapError::MultipleResults),
285 _ => Err(NodeMapError::MultipleResults),
286 },
286 },
287 }
287 }
288 }
288 }
289 }
289 }
290
290
291 impl NodeTree {
291 impl NodeTree {
292 /// Initiate a NodeTree from an immutable slice-like of `Block`
292 /// Initiate a NodeTree from an immutable slice-like of `Block`
293 ///
293 ///
294 /// We keep `readonly` and clone its root block if it isn't empty.
294 /// We keep `readonly` and clone its root block if it isn't empty.
295 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
295 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
296 let root = readonly.last().cloned().unwrap_or_else(Block::new);
296 let root = readonly.last().cloned().unwrap_or_else(Block::new);
297 NodeTree {
297 NodeTree {
298 readonly,
298 readonly,
299 growable: Vec::new(),
299 growable: Vec::new(),
300 root,
300 root,
301 masked_inner_blocks: 0,
301 masked_inner_blocks: 0,
302 }
302 }
303 }
303 }
304
304
305 /// Create from an opaque bunch of bytes
305 /// Create from an opaque bunch of bytes
306 ///
306 ///
307 /// The created `NodeTreeBytes` from `buffer`,
307 /// The created `NodeTreeBytes` from `buffer`,
308 /// of which exactly `amount` bytes are used.
308 /// of which exactly `amount` bytes are used.
309 ///
309 ///
310 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
310 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
311 /// - `offset` allows for the final file format to include fixed data
311 /// - `offset` allows for the final file format to include fixed data
312 /// (generation number, behavioural flags)
312 /// (generation number, behavioural flags)
313 /// - `amount` is expressed in bytes, and is not automatically derived from
313 /// - `amount` is expressed in bytes, and is not automatically derived from
314 /// `bytes`, so that a caller that manages them atomically can perform
314 /// `bytes`, so that a caller that manages them atomically can perform
315 /// temporary disk serializations and still rollback easily if needed.
315 /// temporary disk serializations and still rollback easily if needed.
316 /// First use-case for this would be to support Mercurial shell hooks.
316 /// First use-case for this would be to support Mercurial shell hooks.
317 ///
317 ///
318 /// panics if `buffer` is smaller than `amount`
318 /// panics if `buffer` is smaller than `amount`
319 pub fn load_bytes(
319 pub fn load_bytes(
320 bytes: Box<dyn Deref<Target = [u8]> + Send>,
320 bytes: Box<dyn Deref<Target = [u8]> + Send>,
321 amount: usize,
321 amount: usize,
322 ) -> Self {
322 ) -> Self {
323 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
323 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
324 }
324 }
325
325
326 /// Retrieve added `Block` and the original immutable data
326 /// Retrieve added `Block` and the original immutable data
327 pub fn into_readonly_and_added(
327 pub fn into_readonly_and_added(
328 self,
328 self,
329 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
329 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
330 let mut vec = self.growable;
330 let mut vec = self.growable;
331 let readonly = self.readonly;
331 let readonly = self.readonly;
332 if readonly.last() != Some(&self.root) {
332 if readonly.last() != Some(&self.root) {
333 vec.push(self.root);
333 vec.push(self.root);
334 }
334 }
335 (readonly, vec)
335 (readonly, vec)
336 }
336 }
337
337
338 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
338 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
339 /// storage
339 /// storage
340 pub fn into_readonly_and_added_bytes(
340 pub fn into_readonly_and_added_bytes(
341 self,
341 self,
342 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
342 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
343 let (readonly, vec) = self.into_readonly_and_added();
343 let (readonly, vec) = self.into_readonly_and_added();
344 // Prevent running `v`'s destructor so we are in complete control
344 // Prevent running `v`'s destructor so we are in complete control
345 // of the allocation.
345 // of the allocation.
346 let vec = mem::ManuallyDrop::new(vec);
346 let vec = mem::ManuallyDrop::new(vec);
347
347
348 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
348 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
349 // bytes, so this is perfectly safe.
349 // bytes, so this is perfectly safe.
350 let bytes = unsafe {
350 let bytes = unsafe {
351 // Check for compatible allocation layout.
351 // Check for compatible allocation layout.
352 // (Optimized away by constant-folding + dead code elimination.)
352 // (Optimized away by constant-folding + dead code elimination.)
353 assert_eq!(size_of::<Block>(), 64);
353 assert_eq!(size_of::<Block>(), 64);
354 assert_eq!(align_of::<Block>(), 1);
354 assert_eq!(align_of::<Block>(), 1);
355
355
356 // /!\ Any use of `vec` after this is use-after-free.
356 // /!\ Any use of `vec` after this is use-after-free.
357 // TODO: use `into_raw_parts` once stabilized
357 // TODO: use `into_raw_parts` once stabilized
358 Vec::from_raw_parts(
358 Vec::from_raw_parts(
359 vec.as_ptr() as *mut u8,
359 vec.as_ptr() as *mut u8,
360 vec.len() * size_of::<Block>(),
360 vec.len() * size_of::<Block>(),
361 vec.capacity() * size_of::<Block>(),
361 vec.capacity() * size_of::<Block>(),
362 )
362 )
363 };
363 };
364 (readonly, bytes)
364 (readonly, bytes)
365 }
365 }
366
366
367 /// Total number of blocks
367 /// Total number of blocks
368 fn len(&self) -> usize {
368 fn len(&self) -> usize {
369 self.readonly.len() + self.growable.len() + 1
369 self.readonly.len() + self.growable.len() + 1
370 }
370 }
371
371
372 /// Implemented for completeness
372 /// Implemented for completeness
373 ///
373 ///
374 /// A `NodeTree` always has at least the mutable root block.
374 /// A `NodeTree` always has at least the mutable root block.
375 #[allow(dead_code)]
375 #[allow(dead_code)]
376 fn is_empty(&self) -> bool {
376 fn is_empty(&self) -> bool {
377 false
377 false
378 }
378 }
379
379
380 /// Main working method for `NodeTree` searches
380 /// Main working method for `NodeTree` searches
381 ///
381 ///
382 /// The first returned value is the result of analysing `NodeTree` data
382 /// The first returned value is the result of analysing `NodeTree` data
383 /// *alone*: whereas `None` guarantees that the given prefix is absent
383 /// *alone*: whereas `None` guarantees that the given prefix is absent
384 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
384 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
385 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
385 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
386 /// that could match the prefix. Actually, all that can be inferred from
386 /// that could match the prefix. Actually, all that can be inferred from
387 /// the `NodeTree` data is that `rev` is the revision with the longest
387 /// the `NodeTree` data is that `rev` is the revision with the longest
388 /// common node prefix with the given prefix.
388 /// common node prefix with the given prefix.
389 ///
389 ///
390 /// The second returned value is the size of the smallest subprefix
390 /// The second returned value is the size of the smallest subprefix
391 /// of `prefix` that would give the same result, i.e. not the
391 /// of `prefix` that would give the same result, i.e. not the
392 /// `MultipleResults` error variant (again, using only the data of the
392 /// `MultipleResults` error variant (again, using only the data of the
393 /// `NodeTree`).
393 /// `NodeTree`).
394 fn lookup(
394 fn lookup(
395 &self,
395 &self,
396 prefix: NodePrefix,
396 prefix: NodePrefix,
397 ) -> Result<(Option<Revision>, usize), NodeMapError> {
397 ) -> Result<(Option<Revision>, usize), NodeMapError> {
398 for (i, visit_item) in self.visit(prefix).enumerate() {
398 for (i, visit_item) in self.visit(prefix).enumerate() {
399 if let Some(opt) = visit_item.final_revision() {
399 if let Some(opt) = visit_item.final_revision() {
400 return Ok((opt, i + 1));
400 return Ok((opt, i + 1));
401 }
401 }
402 }
402 }
403 Err(NodeMapError::MultipleResults)
403 Err(NodeMapError::MultipleResults)
404 }
404 }
405
405
406 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
406 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
407 NodeTreeVisitor {
407 NodeTreeVisitor {
408 nt: self,
408 nt: self,
409 prefix,
409 prefix,
410 visit: self.len() - 1,
410 visit: self.len() - 1,
411 nybble_idx: 0,
411 nybble_idx: 0,
412 done: false,
412 done: false,
413 }
413 }
414 }
414 }
415 /// Return a mutable reference for `Block` at index `idx`.
415 /// Return a mutable reference for `Block` at index `idx`.
416 ///
416 ///
417 /// If `idx` lies in the immutable area, then the reference is to
417 /// If `idx` lies in the immutable area, then the reference is to
418 /// a newly appended copy.
418 /// a newly appended copy.
419 ///
419 ///
420 /// Returns (new_idx, glen, mut_ref) where
420 /// Returns (new_idx, glen, mut_ref) where
421 ///
421 ///
422 /// - `new_idx` is the index of the mutable `Block`
422 /// - `new_idx` is the index of the mutable `Block`
423 /// - `mut_ref` is a mutable reference to the mutable Block.
423 /// - `mut_ref` is a mutable reference to the mutable Block.
424 /// - `glen` is the new length of `self.growable`
424 /// - `glen` is the new length of `self.growable`
425 ///
425 ///
426 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
426 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
427 /// itself because of the mutable borrow taken with the returned `Block`
427 /// itself because of the mutable borrow taken with the returned `Block`
428 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
428 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
429 let ro_blocks = &self.readonly;
429 let ro_blocks = &self.readonly;
430 let ro_len = ro_blocks.len();
430 let ro_len = ro_blocks.len();
431 let glen = self.growable.len();
431 let glen = self.growable.len();
432 if idx < ro_len {
432 if idx < ro_len {
433 self.masked_inner_blocks += 1;
433 self.masked_inner_blocks += 1;
434 self.growable.push(ro_blocks[idx]);
434 self.growable.push(ro_blocks[idx]);
435 (glen + ro_len, &mut self.growable[glen], glen + 1)
435 (glen + ro_len, &mut self.growable[glen], glen + 1)
436 } else if glen + ro_len == idx {
436 } else if glen + ro_len == idx {
437 (idx, &mut self.root, glen)
437 (idx, &mut self.root, glen)
438 } else {
438 } else {
439 (idx, &mut self.growable[idx - ro_len], glen)
439 (idx, &mut self.growable[idx - ro_len], glen)
440 }
440 }
441 }
441 }
442
442
443 /// Main insertion method
443 /// Main insertion method
444 ///
444 ///
445 /// This will dive in the node tree to find the deepest `Block` for
445 /// This will dive in the node tree to find the deepest `Block` for
446 /// `node`, split it as much as needed and record `node` in there.
446 /// `node`, split it as much as needed and record `node` in there.
447 /// The method then backtracks, updating references in all the visited
447 /// The method then backtracks, updating references in all the visited
448 /// blocks from the root.
448 /// blocks from the root.
449 ///
449 ///
450 /// All the mutated `Block` are copied first to the growable part if
450 /// All the mutated `Block` are copied first to the growable part if
451 /// needed. That happens for those in the immutable part except the root.
451 /// needed. That happens for those in the immutable part except the root.
452 pub fn insert<I: RevlogIndex>(
452 pub fn insert<I: RevlogIndex>(
453 &mut self,
453 &mut self,
454 index: &I,
454 index: &I,
455 node: &Node,
455 node: &Node,
456 rev: Revision,
456 rev: Revision,
457 ) -> Result<(), NodeMapError> {
457 ) -> Result<(), NodeMapError> {
458 let ro_len = &self.readonly.len();
458 let ro_len = &self.readonly.len();
459
459
460 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
460 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
461 let read_nybbles = visit_steps.len();
461 let read_nybbles = visit_steps.len();
462 // visit_steps cannot be empty, since we always visit the root block
462 // visit_steps cannot be empty, since we always visit the root block
463 let deepest = visit_steps.pop().unwrap();
463 let deepest = visit_steps.pop().unwrap();
464
464
465 let (mut block_idx, mut block, mut glen) =
465 let (mut block_idx, mut block, mut glen) =
466 self.mutable_block(deepest.block_idx);
466 self.mutable_block(deepest.block_idx);
467
467
468 if let Element::Rev(old_rev) = deepest.element {
468 if let Element::Rev(old_rev) = deepest.element {
469 let old_node = index
469 let old_node = index
470 .node(old_rev)
470 .node(old_rev)
471 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
471 .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
472 if old_node == node {
472 if old_node == node {
473 return Ok(()); // avoid creating lots of useless blocks
473 return Ok(()); // avoid creating lots of useless blocks
474 }
474 }
475
475
476 // Looping over the tail of nybbles in both nodes, creating
476 // Looping over the tail of nybbles in both nodes, creating
477 // new blocks until we find the difference
477 // new blocks until we find the difference
478 let mut new_block_idx = ro_len + glen;
478 let mut new_block_idx = ro_len + glen;
479 let mut nybble = deepest.nybble;
479 let mut nybble = deepest.nybble;
480 for nybble_pos in read_nybbles..node.nybbles_len() {
480 for nybble_pos in read_nybbles..node.nybbles_len() {
481 block.set(nybble, Element::Block(new_block_idx));
481 block.set(nybble, Element::Block(new_block_idx));
482
482
483 let new_nybble = node.get_nybble(nybble_pos);
483 let new_nybble = node.get_nybble(nybble_pos);
484 let old_nybble = old_node.get_nybble(nybble_pos);
484 let old_nybble = old_node.get_nybble(nybble_pos);
485
485
486 if old_nybble == new_nybble {
486 if old_nybble == new_nybble {
487 self.growable.push(Block::new());
487 self.growable.push(Block::new());
488 block = &mut self.growable[glen];
488 block = &mut self.growable[glen];
489 glen += 1;
489 glen += 1;
490 new_block_idx += 1;
490 new_block_idx += 1;
491 nybble = new_nybble;
491 nybble = new_nybble;
492 } else {
492 } else {
493 let mut new_block = Block::new();
493 let mut new_block = Block::new();
494 new_block.set(old_nybble, Element::Rev(old_rev));
494 new_block.set(old_nybble, Element::Rev(old_rev));
495 new_block.set(new_nybble, Element::Rev(rev));
495 new_block.set(new_nybble, Element::Rev(rev));
496 self.growable.push(new_block);
496 self.growable.push(new_block);
497 break;
497 break;
498 }
498 }
499 }
499 }
500 } else {
500 } else {
501 // Free slot in the deepest block: no splitting has to be done
501 // Free slot in the deepest block: no splitting has to be done
502 block.set(deepest.nybble, Element::Rev(rev));
502 block.set(deepest.nybble, Element::Rev(rev));
503 }
503 }
504
504
505 // Backtrack over visit steps to update references
505 // Backtrack over visit steps to update references
506 while let Some(visited) = visit_steps.pop() {
506 while let Some(visited) = visit_steps.pop() {
507 let to_write = Element::Block(block_idx);
507 let to_write = Element::Block(block_idx);
508 if visit_steps.is_empty() {
508 if visit_steps.is_empty() {
509 self.root.set(visited.nybble, to_write);
509 self.root.set(visited.nybble, to_write);
510 break;
510 break;
511 }
511 }
512 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
512 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
513 if block.get(visited.nybble) == to_write {
513 if block.get(visited.nybble) == to_write {
514 break;
514 break;
515 }
515 }
516 block.set(visited.nybble, to_write);
516 block.set(visited.nybble, to_write);
517 block_idx = new_idx;
517 block_idx = new_idx;
518 }
518 }
519 Ok(())
519 Ok(())
520 }
520 }
521
521
522 /// Make the whole `NodeTree` logically empty, without touching the
522 /// Make the whole `NodeTree` logically empty, without touching the
523 /// immutable part.
523 /// immutable part.
524 pub fn invalidate_all(&mut self) {
524 pub fn invalidate_all(&mut self) {
525 self.root = Block::new();
525 self.root = Block::new();
526 self.growable = Vec::new();
526 self.growable = Vec::new();
527 self.masked_inner_blocks = self.readonly.len();
527 self.masked_inner_blocks = self.readonly.len();
528 }
528 }
529
529
530 /// Return the number of blocks in the readonly part that are currently
530 /// Return the number of blocks in the readonly part that are currently
531 /// masked in the mutable part.
531 /// masked in the mutable part.
532 ///
532 ///
533 /// The `NodeTree` structure has no efficient way to know how many blocks
533 /// The `NodeTree` structure has no efficient way to know how many blocks
534 /// are already unreachable in the readonly part.
534 /// are already unreachable in the readonly part.
535 ///
535 ///
536 /// After a call to `invalidate_all()`, the returned number can be actually
536 /// After a call to `invalidate_all()`, the returned number can be actually
537 /// bigger than the whole readonly part, a conventional way to mean that
537 /// bigger than the whole readonly part, a conventional way to mean that
538 /// all the readonly blocks have been masked. This is what is really
538 /// all the readonly blocks have been masked. This is what is really
539 /// useful to the caller and does not require to know how many were
539 /// useful to the caller and does not require to know how many were
540 /// actually unreachable to begin with.
540 /// actually unreachable to begin with.
541 pub fn masked_readonly_blocks(&self) -> usize {
541 pub fn masked_readonly_blocks(&self) -> usize {
542 if let Some(readonly_root) = self.readonly.last() {
542 if let Some(readonly_root) = self.readonly.last() {
543 if readonly_root == &self.root {
543 if readonly_root == &self.root {
544 return 0;
544 return 0;
545 }
545 }
546 } else {
546 } else {
547 return 0;
547 return 0;
548 }
548 }
549 self.masked_inner_blocks + 1
549 self.masked_inner_blocks + 1
550 }
550 }
551 }
551 }
552
552
553 pub struct NodeTreeBytes {
553 pub struct NodeTreeBytes {
554 buffer: Box<dyn Deref<Target = [u8]> + Send>,
554 buffer: Box<dyn Deref<Target = [u8]> + Send>,
555 len_in_blocks: usize,
555 len_in_blocks: usize,
556 }
556 }
557
557
558 impl NodeTreeBytes {
558 impl NodeTreeBytes {
559 fn new(
559 fn new(
560 buffer: Box<dyn Deref<Target = [u8]> + Send>,
560 buffer: Box<dyn Deref<Target = [u8]> + Send>,
561 amount: usize,
561 amount: usize,
562 ) -> Self {
562 ) -> Self {
563 assert!(buffer.len() >= amount);
563 assert!(buffer.len() >= amount);
564 let len_in_blocks = amount / size_of::<Block>();
564 let len_in_blocks = amount / size_of::<Block>();
565 NodeTreeBytes {
565 NodeTreeBytes {
566 buffer,
566 buffer,
567 len_in_blocks,
567 len_in_blocks,
568 }
568 }
569 }
569 }
570 }
570 }
571
571
572 impl Deref for NodeTreeBytes {
572 impl Deref for NodeTreeBytes {
573 type Target = [Block];
573 type Target = [Block];
574
574
575 fn deref(&self) -> &[Block] {
575 fn deref(&self) -> &[Block] {
576 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
576 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
577 // `NodeTreeBytes::new` already asserted that `self.buffer` is
577 // `NodeTreeBytes::new` already asserted that `self.buffer` is
578 // large enough.
578 // large enough.
579 .unwrap()
579 .unwrap()
580 .0
580 .0
581 }
581 }
582 }
582 }
583
583
584 struct NodeTreeVisitor<'n> {
584 struct NodeTreeVisitor<'n> {
585 nt: &'n NodeTree,
585 nt: &'n NodeTree,
586 prefix: NodePrefix,
586 prefix: NodePrefix,
587 visit: usize,
587 visit: usize,
588 nybble_idx: usize,
588 nybble_idx: usize,
589 done: bool,
589 done: bool,
590 }
590 }
591
591
592 #[derive(Debug, PartialEq, Clone)]
592 #[derive(Debug, PartialEq, Clone)]
593 struct NodeTreeVisitItem {
593 struct NodeTreeVisitItem {
594 block_idx: usize,
594 block_idx: usize,
595 nybble: u8,
595 nybble: u8,
596 element: Element,
596 element: Element,
597 }
597 }
598
598
599 impl<'n> Iterator for NodeTreeVisitor<'n> {
599 impl<'n> Iterator for NodeTreeVisitor<'n> {
600 type Item = NodeTreeVisitItem;
600 type Item = NodeTreeVisitItem;
601
601
602 fn next(&mut self) -> Option<Self::Item> {
602 fn next(&mut self) -> Option<Self::Item> {
603 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
603 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
604 return None;
604 return None;
605 }
605 }
606
606
607 let nybble = self.prefix.get_nybble(self.nybble_idx);
607 let nybble = self.prefix.get_nybble(self.nybble_idx);
608 self.nybble_idx += 1;
608 self.nybble_idx += 1;
609
609
610 let visit = self.visit;
610 let visit = self.visit;
611 let element = self.nt[visit].get(nybble);
611 let element = self.nt[visit].get(nybble);
612 if let Element::Block(idx) = element {
612 if let Element::Block(idx) = element {
613 self.visit = idx;
613 self.visit = idx;
614 } else {
614 } else {
615 self.done = true;
615 self.done = true;
616 }
616 }
617
617
618 Some(NodeTreeVisitItem {
618 Some(NodeTreeVisitItem {
619 block_idx: visit,
619 block_idx: visit,
620 nybble,
620 nybble,
621 element,
621 element,
622 })
622 })
623 }
623 }
624 }
624 }
625
625
626 impl NodeTreeVisitItem {
626 impl NodeTreeVisitItem {
627 // Return `Some(opt)` if this item is final, with `opt` being the
627 // Return `Some(opt)` if this item is final, with `opt` being the
628 // `Revision` that it may represent.
628 // `Revision` that it may represent.
629 //
629 //
630 // If the item is not terminal, return `None`
630 // If the item is not terminal, return `None`
631 fn final_revision(&self) -> Option<Option<Revision>> {
631 fn final_revision(&self) -> Option<Option<Revision>> {
632 match self.element {
632 match self.element {
633 Element::Block(_) => None,
633 Element::Block(_) => None,
634 Element::Rev(r) => Some(Some(r)),
634 Element::Rev(r) => Some(Some(r)),
635 Element::None => Some(None),
635 Element::None => Some(None),
636 }
636 }
637 }
637 }
638 }
638 }
639
639
640 impl From<Vec<Block>> for NodeTree {
640 impl From<Vec<Block>> for NodeTree {
641 fn from(vec: Vec<Block>) -> Self {
641 fn from(vec: Vec<Block>) -> Self {
642 Self::new(Box::new(vec))
642 Self::new(Box::new(vec))
643 }
643 }
644 }
644 }
645
645
646 impl fmt::Debug for NodeTree {
646 impl fmt::Debug for NodeTree {
647 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
647 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
648 let readonly: &[Block] = &*self.readonly;
648 let readonly: &[Block] = &*self.readonly;
649 write!(
649 write!(
650 f,
650 f,
651 "readonly: {:?}, growable: {:?}, root: {:?}",
651 "readonly: {:?}, growable: {:?}, root: {:?}",
652 readonly, self.growable, self.root
652 readonly, self.growable, self.root
653 )
653 )
654 }
654 }
655 }
655 }
656
656
657 impl Default for NodeTree {
657 impl Default for NodeTree {
658 /// Create a fully mutable empty NodeTree
658 /// Create a fully mutable empty NodeTree
659 fn default() -> Self {
659 fn default() -> Self {
660 NodeTree::new(Box::new(Vec::new()))
660 NodeTree::new(Box::new(Vec::new()))
661 }
661 }
662 }
662 }
663
663
664 impl NodeMap for NodeTree {
664 impl NodeMap for NodeTree {
665 fn find_bin<'a>(
665 fn find_bin<'a>(
666 &self,
666 &self,
667 idx: &impl RevlogIndex,
667 idx: &impl RevlogIndex,
668 prefix: NodePrefix,
668 prefix: NodePrefix,
669 ) -> Result<Option<Revision>, NodeMapError> {
669 ) -> Result<Option<Revision>, NodeMapError> {
670 validate_candidate(idx, prefix, self.lookup(prefix)?)
670 validate_candidate(idx, prefix, self.lookup(prefix)?)
671 .map(|(opt, _shortest)| opt)
671 .map(|(opt, _shortest)| opt)
672 }
672 }
673
673
674 fn unique_prefix_len_bin<'a>(
674 fn unique_prefix_len_bin<'a>(
675 &self,
675 &self,
676 idx: &impl RevlogIndex,
676 idx: &impl RevlogIndex,
677 prefix: NodePrefix,
677 prefix: NodePrefix,
678 ) -> Result<Option<usize>, NodeMapError> {
678 ) -> Result<Option<usize>, NodeMapError> {
679 validate_candidate(idx, prefix, self.lookup(prefix)?)
679 validate_candidate(idx, prefix, self.lookup(prefix)?)
680 .map(|(opt, shortest)| opt.map(|_rev| shortest))
680 .map(|(opt, shortest)| opt.map(|_rev| shortest))
681 }
681 }
682 }
682 }
683
683
684 #[cfg(test)]
684 #[cfg(test)]
685 mod tests {
685 mod tests {
686 use super::NodeMapError::*;
686 use super::NodeMapError::*;
687 use super::*;
687 use super::*;
688 use crate::revlog::node::{hex_pad_right, Node};
688 use crate::revlog::node::{hex_pad_right, Node};
689 use std::collections::HashMap;
689 use std::collections::HashMap;
690
690
691 /// Creates a `Block` using a syntax close to the `Debug` output
691 /// Creates a `Block` using a syntax close to the `Debug` output
692 macro_rules! block {
692 macro_rules! block {
693 {$($nybble:tt : $variant:ident($val:tt)),*} => (
693 {$($nybble:tt : $variant:ident($val:tt)),*} => (
694 {
694 {
695 let mut block = Block::new();
695 let mut block = Block::new();
696 $(block.set($nybble, Element::$variant($val)));*;
696 $(block.set($nybble, Element::$variant($val)));*;
697 block
697 block
698 }
698 }
699 )
699 )
700 }
700 }
701
701
702 #[test]
702 #[test]
703 fn test_block_debug() {
703 fn test_block_debug() {
704 let mut block = Block::new();
704 let mut block = Block::new();
705 block.set(1, Element::Rev(3));
705 block.set(1, Element::Rev(3));
706 block.set(10, Element::Block(0));
706 block.set(10, Element::Block(0));
707 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
707 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
708 }
708 }
709
709
710 #[test]
710 #[test]
711 fn test_block_macro() {
711 fn test_block_macro() {
712 let block = block! {5: Block(2)};
712 let block = block! {5: Block(2)};
713 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
713 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
714
714
715 let block = block! {13: Rev(15), 5: Block(2)};
715 let block = block! {13: Rev(15), 5: Block(2)};
716 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
716 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
717 }
717 }
718
718
719 #[test]
719 #[test]
720 fn test_raw_block() {
720 fn test_raw_block() {
721 let mut raw = [255u8; 64];
721 let mut raw = [255u8; 64];
722
722
723 let mut counter = 0;
723 let mut counter = 0;
724 for val in [0_i32, 15, -2, -1, -3].iter() {
724 for val in [0_i32, 15, -2, -1, -3].iter() {
725 for byte in val.to_be_bytes().iter() {
725 for byte in val.to_be_bytes().iter() {
726 raw[counter] = *byte;
726 raw[counter] = *byte;
727 counter += 1;
727 counter += 1;
728 }
728 }
729 }
729 }
730 let (block, _) = Block::from_bytes(&raw).unwrap();
730 let (block, _) = Block::from_bytes(&raw).unwrap();
731 assert_eq!(block.get(0), Element::Block(0));
731 assert_eq!(block.get(0), Element::Block(0));
732 assert_eq!(block.get(1), Element::Block(15));
732 assert_eq!(block.get(1), Element::Block(15));
733 assert_eq!(block.get(3), Element::None);
733 assert_eq!(block.get(3), Element::None);
734 assert_eq!(block.get(2), Element::Rev(0));
734 assert_eq!(block.get(2), Element::Rev(0));
735 assert_eq!(block.get(4), Element::Rev(1));
735 assert_eq!(block.get(4), Element::Rev(1));
736 }
736 }
737
737
738 type TestIndex = HashMap<Revision, Node>;
738 type TestIndex = HashMap<Revision, Node>;
739
739
740 impl RevlogIndex for TestIndex {
740 impl RevlogIndex for TestIndex {
741 fn node(&self, rev: Revision) -> Option<&Node> {
741 fn node(&self, rev: Revision) -> Option<&Node> {
742 self.get(&rev)
742 self.get(&rev)
743 }
743 }
744
744
745 fn len(&self) -> usize {
745 fn len(&self) -> usize {
746 self.len()
746 self.len()
747 }
747 }
748 }
748 }
749
749
750 /// Pad hexadecimal Node prefix with zeros on the right
750 /// Pad hexadecimal Node prefix with zeros on the right
751 ///
751 ///
752 /// This avoids having to repeatedly write very long hexadecimal
752 /// This avoids having to repeatedly write very long hexadecimal
753 /// strings for test data, and brings actual hash size independency.
753 /// strings for test data, and brings actual hash size independency.
754 #[cfg(test)]
754 #[cfg(test)]
755 fn pad_node(hex: &str) -> Node {
755 fn pad_node(hex: &str) -> Node {
756 Node::from_hex(&hex_pad_right(hex)).unwrap()
756 Node::from_hex(&hex_pad_right(hex)).unwrap()
757 }
757 }
758
758
759 /// Pad hexadecimal Node prefix with zeros on the right, then insert
759 /// Pad hexadecimal Node prefix with zeros on the right, then insert
760 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
760 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
761 idx.insert(rev, pad_node(hex));
761 idx.insert(rev, pad_node(hex));
762 }
762 }
763
763
764 fn sample_nodetree() -> NodeTree {
764 fn sample_nodetree() -> NodeTree {
765 NodeTree::from(vec![
765 NodeTree::from(vec![
766 block![0: Rev(9)],
766 block![0: Rev(9)],
767 block![0: Rev(0), 1: Rev(9)],
767 block![0: Rev(0), 1: Rev(9)],
768 block![0: Block(1), 1:Rev(1)],
768 block![0: Block(1), 1:Rev(1)],
769 ])
769 ])
770 }
770 }
771
771
772 fn hex(s: &str) -> NodePrefix {
772 fn hex(s: &str) -> NodePrefix {
773 NodePrefix::from_hex(s).unwrap()
773 NodePrefix::from_hex(s).unwrap()
774 }
774 }
775
775
776 #[test]
776 #[test]
777 fn test_nt_debug() {
777 fn test_nt_debug() {
778 let nt = sample_nodetree();
778 let nt = sample_nodetree();
779 assert_eq!(
779 assert_eq!(
780 format!("{:?}", nt),
780 format!("{:?}", nt),
781 "readonly: \
781 "readonly: \
782 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
782 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
783 growable: [], \
783 growable: [], \
784 root: {0: Block(1), 1: Rev(1)}",
784 root: {0: Block(1), 1: Rev(1)}",
785 );
785 );
786 }
786 }
787
787
788 #[test]
788 #[test]
789 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
789 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
790 let mut idx: TestIndex = HashMap::new();
790 let mut idx: TestIndex = HashMap::new();
791 pad_insert(&mut idx, 1, "1234deadcafe");
791 pad_insert(&mut idx, 1, "1234deadcafe");
792
792
793 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
793 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
794 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
794 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
795 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
795 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
796 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
796 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
797 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
797 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
798 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
798 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
799
799
800 // and with full binary Nodes
800 // and with full binary Nodes
801 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
801 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
802 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
802 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
803 assert_eq!(nt.find_node(&idx, &unknown)?, None);
803 assert_eq!(nt.find_node(&idx, &unknown)?, None);
804 Ok(())
804 Ok(())
805 }
805 }
806
806
807 #[test]
807 #[test]
808 fn test_immutable_find_one_jump() {
808 fn test_immutable_find_one_jump() {
809 let mut idx = TestIndex::new();
809 let mut idx = TestIndex::new();
810 pad_insert(&mut idx, 9, "012");
810 pad_insert(&mut idx, 9, "012");
811 pad_insert(&mut idx, 0, "00a");
811 pad_insert(&mut idx, 0, "00a");
812
812
813 let nt = sample_nodetree();
813 let nt = sample_nodetree();
814
814
815 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
815 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
816 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
816 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
817 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
817 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
818 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
818 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
819 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
819 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
820 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
820 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
821 }
821 }
822
822
823 #[test]
823 #[test]
824 fn test_mutated_find() -> Result<(), NodeMapError> {
824 fn test_mutated_find() -> Result<(), NodeMapError> {
825 let mut idx = TestIndex::new();
825 let mut idx = TestIndex::new();
826 pad_insert(&mut idx, 9, "012");
826 pad_insert(&mut idx, 9, "012");
827 pad_insert(&mut idx, 0, "00a");
827 pad_insert(&mut idx, 0, "00a");
828 pad_insert(&mut idx, 2, "cafe");
828 pad_insert(&mut idx, 2, "cafe");
829 pad_insert(&mut idx, 3, "15");
829 pad_insert(&mut idx, 3, "15");
830 pad_insert(&mut idx, 1, "10");
830 pad_insert(&mut idx, 1, "10");
831
831
832 let nt = NodeTree {
832 let nt = NodeTree {
833 readonly: sample_nodetree().readonly,
833 readonly: sample_nodetree().readonly,
834 growable: vec![block![0: Rev(1), 5: Rev(3)]],
834 growable: vec![block![0: Rev(1), 5: Rev(3)]],
835 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
835 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
836 masked_inner_blocks: 1,
836 masked_inner_blocks: 1,
837 };
837 };
838 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
838 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
839 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
839 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
840 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
840 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
841 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
841 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
842 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
842 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
843 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
843 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
844 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
844 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
845 assert_eq!(nt.masked_readonly_blocks(), 2);
845 assert_eq!(nt.masked_readonly_blocks(), 2);
846 Ok(())
846 Ok(())
847 }
847 }
848
848
849 struct TestNtIndex {
849 struct TestNtIndex {
850 index: TestIndex,
850 index: TestIndex,
851 nt: NodeTree,
851 nt: NodeTree,
852 }
852 }
853
853
854 impl TestNtIndex {
854 impl TestNtIndex {
855 fn new() -> Self {
855 fn new() -> Self {
856 TestNtIndex {
856 TestNtIndex {
857 index: HashMap::new(),
857 index: HashMap::new(),
858 nt: NodeTree::default(),
858 nt: NodeTree::default(),
859 }
859 }
860 }
860 }
861
861
862 fn insert(
862 fn insert(
863 &mut self,
863 &mut self,
864 rev: Revision,
864 rev: Revision,
865 hex: &str,
865 hex: &str,
866 ) -> Result<(), NodeMapError> {
866 ) -> Result<(), NodeMapError> {
867 let node = pad_node(hex);
867 let node = pad_node(hex);
868 self.index.insert(rev, node.clone());
868 self.index.insert(rev, node);
869 self.nt.insert(&self.index, &node, rev)?;
869 self.nt.insert(&self.index, &node, rev)?;
870 Ok(())
870 Ok(())
871 }
871 }
872
872
873 fn find_hex(
873 fn find_hex(
874 &self,
874 &self,
875 prefix: &str,
875 prefix: &str,
876 ) -> Result<Option<Revision>, NodeMapError> {
876 ) -> Result<Option<Revision>, NodeMapError> {
877 self.nt.find_bin(&self.index, hex(prefix))
877 self.nt.find_bin(&self.index, hex(prefix))
878 }
878 }
879
879
880 fn unique_prefix_len_hex(
880 fn unique_prefix_len_hex(
881 &self,
881 &self,
882 prefix: &str,
882 prefix: &str,
883 ) -> Result<Option<usize>, NodeMapError> {
883 ) -> Result<Option<usize>, NodeMapError> {
884 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
884 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
885 }
885 }
886
886
887 /// Drain `added` and restart a new one
887 /// Drain `added` and restart a new one
888 fn commit(self) -> Self {
888 fn commit(self) -> Self {
889 let mut as_vec: Vec<Block> =
889 let mut as_vec: Vec<Block> =
890 self.nt.readonly.iter().map(|block| block.clone()).collect();
890 self.nt.readonly.iter().copied().collect();
891 as_vec.extend(self.nt.growable);
891 as_vec.extend(self.nt.growable);
892 as_vec.push(self.nt.root);
892 as_vec.push(self.nt.root);
893
893
894 Self {
894 Self {
895 index: self.index,
895 index: self.index,
896 nt: NodeTree::from(as_vec).into(),
896 nt: NodeTree::from(as_vec),
897 }
897 }
898 }
898 }
899 }
899 }
900
900
901 #[test]
901 #[test]
902 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
902 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
903 let mut idx = TestNtIndex::new();
903 let mut idx = TestNtIndex::new();
904 idx.insert(0, "1234")?;
904 idx.insert(0, "1234")?;
905 assert_eq!(idx.find_hex("1")?, Some(0));
905 assert_eq!(idx.find_hex("1")?, Some(0));
906 assert_eq!(idx.find_hex("12")?, Some(0));
906 assert_eq!(idx.find_hex("12")?, Some(0));
907
907
908 // let's trigger a simple split
908 // let's trigger a simple split
909 idx.insert(1, "1a34")?;
909 idx.insert(1, "1a34")?;
910 assert_eq!(idx.nt.growable.len(), 1);
910 assert_eq!(idx.nt.growable.len(), 1);
911 assert_eq!(idx.find_hex("12")?, Some(0));
911 assert_eq!(idx.find_hex("12")?, Some(0));
912 assert_eq!(idx.find_hex("1a")?, Some(1));
912 assert_eq!(idx.find_hex("1a")?, Some(1));
913
913
914 // reinserting is a no_op
914 // reinserting is a no_op
915 idx.insert(1, "1a34")?;
915 idx.insert(1, "1a34")?;
916 assert_eq!(idx.nt.growable.len(), 1);
916 assert_eq!(idx.nt.growable.len(), 1);
917 assert_eq!(idx.find_hex("12")?, Some(0));
917 assert_eq!(idx.find_hex("12")?, Some(0));
918 assert_eq!(idx.find_hex("1a")?, Some(1));
918 assert_eq!(idx.find_hex("1a")?, Some(1));
919
919
920 idx.insert(2, "1a01")?;
920 idx.insert(2, "1a01")?;
921 assert_eq!(idx.nt.growable.len(), 2);
921 assert_eq!(idx.nt.growable.len(), 2);
922 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
922 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
923 assert_eq!(idx.find_hex("12")?, Some(0));
923 assert_eq!(idx.find_hex("12")?, Some(0));
924 assert_eq!(idx.find_hex("1a3")?, Some(1));
924 assert_eq!(idx.find_hex("1a3")?, Some(1));
925 assert_eq!(idx.find_hex("1a0")?, Some(2));
925 assert_eq!(idx.find_hex("1a0")?, Some(2));
926 assert_eq!(idx.find_hex("1a12")?, None);
926 assert_eq!(idx.find_hex("1a12")?, None);
927
927
928 // now let's make it split and create more than one additional block
928 // now let's make it split and create more than one additional block
929 idx.insert(3, "1a345")?;
929 idx.insert(3, "1a345")?;
930 assert_eq!(idx.nt.growable.len(), 4);
930 assert_eq!(idx.nt.growable.len(), 4);
931 assert_eq!(idx.find_hex("1a340")?, Some(1));
931 assert_eq!(idx.find_hex("1a340")?, Some(1));
932 assert_eq!(idx.find_hex("1a345")?, Some(3));
932 assert_eq!(idx.find_hex("1a345")?, Some(3));
933 assert_eq!(idx.find_hex("1a341")?, None);
933 assert_eq!(idx.find_hex("1a341")?, None);
934
934
935 // there's no readonly block to mask
935 // there's no readonly block to mask
936 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
936 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
937 Ok(())
937 Ok(())
938 }
938 }
939
939
940 #[test]
940 #[test]
941 fn test_unique_prefix_len_zero_prefix() {
941 fn test_unique_prefix_len_zero_prefix() {
942 let mut idx = TestNtIndex::new();
942 let mut idx = TestNtIndex::new();
943 idx.insert(0, "00000abcd").unwrap();
943 idx.insert(0, "00000abcd").unwrap();
944
944
945 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
945 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
946 // in the nodetree proper, this will be found at the first nybble
946 // in the nodetree proper, this will be found at the first nybble
947 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
947 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
948 // but the first difference with `NULL_NODE`
948 // but the first difference with `NULL_NODE`
949 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
949 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
950 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
950 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
951
951
952 // same with odd result
952 // same with odd result
953 idx.insert(1, "00123").unwrap();
953 idx.insert(1, "00123").unwrap();
954 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
954 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
955 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
955 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
956
956
957 // these are unchanged of course
957 // these are unchanged of course
958 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
958 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
959 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
959 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
960 }
960 }
961
961
962 #[test]
962 #[test]
963 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
963 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
964 // check that the splitting loop is long enough
964 // check that the splitting loop is long enough
965 let mut nt_idx = TestNtIndex::new();
965 let mut nt_idx = TestNtIndex::new();
966 let nt = &mut nt_idx.nt;
966 let nt = &mut nt_idx.nt;
967 let idx = &mut nt_idx.index;
967 let idx = &mut nt_idx.index;
968
968
969 let node0_hex = hex_pad_right("444444");
969 let node0_hex = hex_pad_right("444444");
970 let mut node1_hex = hex_pad_right("444444").clone();
970 let mut node1_hex = hex_pad_right("444444");
971 node1_hex.pop();
971 node1_hex.pop();
972 node1_hex.push('5');
972 node1_hex.push('5');
973 let node0 = Node::from_hex(&node0_hex).unwrap();
973 let node0 = Node::from_hex(&node0_hex).unwrap();
974 let node1 = Node::from_hex(&node1_hex).unwrap();
974 let node1 = Node::from_hex(&node1_hex).unwrap();
975
975
976 idx.insert(0, node0.clone());
976 idx.insert(0, node0);
977 nt.insert(idx, &node0, 0)?;
977 nt.insert(idx, &node0, 0)?;
978 idx.insert(1, node1.clone());
978 idx.insert(1, node1);
979 nt.insert(idx, &node1, 1)?;
979 nt.insert(idx, &node1, 1)?;
980
980
981 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
981 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
982 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
982 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
983 Ok(())
983 Ok(())
984 }
984 }
985
985
986 #[test]
986 #[test]
987 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
987 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
988 let mut idx = TestNtIndex::new();
988 let mut idx = TestNtIndex::new();
989 idx.insert(0, "1234")?;
989 idx.insert(0, "1234")?;
990 idx.insert(1, "1235")?;
990 idx.insert(1, "1235")?;
991 idx.insert(2, "131")?;
991 idx.insert(2, "131")?;
992 idx.insert(3, "cafe")?;
992 idx.insert(3, "cafe")?;
993 let mut idx = idx.commit();
993 let mut idx = idx.commit();
994 assert_eq!(idx.find_hex("1234")?, Some(0));
994 assert_eq!(idx.find_hex("1234")?, Some(0));
995 assert_eq!(idx.find_hex("1235")?, Some(1));
995 assert_eq!(idx.find_hex("1235")?, Some(1));
996 assert_eq!(idx.find_hex("131")?, Some(2));
996 assert_eq!(idx.find_hex("131")?, Some(2));
997 assert_eq!(idx.find_hex("cafe")?, Some(3));
997 assert_eq!(idx.find_hex("cafe")?, Some(3));
998 // we did not add anything since init from readonly
998 // we did not add anything since init from readonly
999 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
999 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1000
1000
1001 idx.insert(4, "123A")?;
1001 idx.insert(4, "123A")?;
1002 assert_eq!(idx.find_hex("1234")?, Some(0));
1002 assert_eq!(idx.find_hex("1234")?, Some(0));
1003 assert_eq!(idx.find_hex("1235")?, Some(1));
1003 assert_eq!(idx.find_hex("1235")?, Some(1));
1004 assert_eq!(idx.find_hex("131")?, Some(2));
1004 assert_eq!(idx.find_hex("131")?, Some(2));
1005 assert_eq!(idx.find_hex("cafe")?, Some(3));
1005 assert_eq!(idx.find_hex("cafe")?, Some(3));
1006 assert_eq!(idx.find_hex("123A")?, Some(4));
1006 assert_eq!(idx.find_hex("123A")?, Some(4));
1007 // we masked blocks for all prefixes of "123", including the root
1007 // we masked blocks for all prefixes of "123", including the root
1008 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1008 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1009
1009
1010 eprintln!("{:?}", idx.nt);
1010 eprintln!("{:?}", idx.nt);
1011 idx.insert(5, "c0")?;
1011 idx.insert(5, "c0")?;
1012 assert_eq!(idx.find_hex("cafe")?, Some(3));
1012 assert_eq!(idx.find_hex("cafe")?, Some(3));
1013 assert_eq!(idx.find_hex("c0")?, Some(5));
1013 assert_eq!(idx.find_hex("c0")?, Some(5));
1014 assert_eq!(idx.find_hex("c1")?, None);
1014 assert_eq!(idx.find_hex("c1")?, None);
1015 assert_eq!(idx.find_hex("1234")?, Some(0));
1015 assert_eq!(idx.find_hex("1234")?, Some(0));
1016 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1016 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1017 // it doesn't mask anything
1017 // it doesn't mask anything
1018 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1018 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1019
1019
1020 Ok(())
1020 Ok(())
1021 }
1021 }
1022
1022
1023 #[test]
1023 #[test]
1024 fn test_invalidate_all() -> Result<(), NodeMapError> {
1024 fn test_invalidate_all() -> Result<(), NodeMapError> {
1025 let mut idx = TestNtIndex::new();
1025 let mut idx = TestNtIndex::new();
1026 idx.insert(0, "1234")?;
1026 idx.insert(0, "1234")?;
1027 idx.insert(1, "1235")?;
1027 idx.insert(1, "1235")?;
1028 idx.insert(2, "131")?;
1028 idx.insert(2, "131")?;
1029 idx.insert(3, "cafe")?;
1029 idx.insert(3, "cafe")?;
1030 let mut idx = idx.commit();
1030 let mut idx = idx.commit();
1031
1031
1032 idx.nt.invalidate_all();
1032 idx.nt.invalidate_all();
1033
1033
1034 assert_eq!(idx.find_hex("1234")?, None);
1034 assert_eq!(idx.find_hex("1234")?, None);
1035 assert_eq!(idx.find_hex("1235")?, None);
1035 assert_eq!(idx.find_hex("1235")?, None);
1036 assert_eq!(idx.find_hex("131")?, None);
1036 assert_eq!(idx.find_hex("131")?, None);
1037 assert_eq!(idx.find_hex("cafe")?, None);
1037 assert_eq!(idx.find_hex("cafe")?, None);
1038 // all the readonly blocks have been masked, this is the
1038 // all the readonly blocks have been masked, this is the
1039 // conventional expected response
1039 // conventional expected response
1040 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1040 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1041 Ok(())
1041 Ok(())
1042 }
1042 }
1043
1043
1044 #[test]
1044 #[test]
1045 fn test_into_added_empty() {
1045 fn test_into_added_empty() {
1046 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1046 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1047 assert!(sample_nodetree()
1047 assert!(sample_nodetree()
1048 .into_readonly_and_added_bytes()
1048 .into_readonly_and_added_bytes()
1049 .1
1049 .1
1050 .is_empty());
1050 .is_empty());
1051 }
1051 }
1052
1052
1053 #[test]
1053 #[test]
1054 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1054 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1055 let mut idx = TestNtIndex::new();
1055 let mut idx = TestNtIndex::new();
1056 idx.insert(0, "1234")?;
1056 idx.insert(0, "1234")?;
1057 let mut idx = idx.commit();
1057 let mut idx = idx.commit();
1058 idx.insert(4, "cafe")?;
1058 idx.insert(4, "cafe")?;
1059 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1059 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1060
1060
1061 // only the root block has been changed
1061 // only the root block has been changed
1062 assert_eq!(bytes.len(), size_of::<Block>());
1062 assert_eq!(bytes.len(), size_of::<Block>());
1063 // big endian for -2
1063 // big endian for -2
1064 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1064 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1065 // big endian for -6
1065 // big endian for -6
1066 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1066 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1067 Ok(())
1067 Ok(())
1068 }
1068 }
1069 }
1069 }
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now