##// END OF EJS Templates
rust-dirstate: add "dirs" Rust implementation...
Raphaël Gomès -
r42736:2dcee649 default
parent child Browse files
Show More
@@ -0,0 +1,355 b''
1 // dirs_multiset.rs
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 //! A multiset of directory names.
9 //!
10 //! Used to counts the references to directories in a manifest or dirstate.
11 use std::collections::hash_map::Entry;
12 use std::collections::HashMap;
13 use std::ops::Deref;
14 use {DirsIterable, DirstateEntry, DirstateMapError};
15
16 #[derive(PartialEq, Debug)]
17 pub struct DirsMultiset {
18 inner: HashMap<Vec<u8>, u32>,
19 }
20
21 impl Deref for DirsMultiset {
22 type Target = HashMap<Vec<u8>, u32>;
23
24 fn deref(&self) -> &Self::Target {
25 &self.inner
26 }
27 }
28
29 impl DirsMultiset {
30 /// Initializes the multiset from a dirstate or a manifest.
31 ///
32 /// If `skip_state` is provided, skips dirstate entries with equal state.
33 pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
34 let mut multiset = DirsMultiset {
35 inner: HashMap::new(),
36 };
37
38 match iterable {
39 DirsIterable::Dirstate(vec) => {
40 for (ref filename, DirstateEntry { state, .. }) in vec {
41 // This `if` is optimized out of the loop
42 if let Some(skip) = skip_state {
43 if skip != state {
44 multiset.add_path(filename);
45 }
46 } else {
47 multiset.add_path(filename);
48 }
49 }
50 }
51 DirsIterable::Manifest(vec) => {
52 for ref filename in vec {
53 multiset.add_path(filename);
54 }
55 }
56 }
57
58 multiset
59 }
60
61 /// Returns the slice up to the next directory name from right to left,
62 /// without trailing slash
63 fn find_dir(path: &[u8]) -> &[u8] {
64 let mut path = path;
65 loop {
66 if let Some(new_pos) = path.len().checked_sub(1) {
67 if path[new_pos] == b'/' {
68 break &path[..new_pos];
69 }
70 path = &path[..new_pos];
71 } else {
72 break &[];
73 }
74 }
75 }
76
77 /// Increases the count of deepest directory contained in the path.
78 ///
79 /// If the directory is not yet in the map, adds its parents.
80 pub fn add_path(&mut self, path: &[u8]) {
81 let mut pos = path.len();
82
83 loop {
84 let subpath = Self::find_dir(&path[..pos]);
85 if let Some(val) = self.inner.get_mut(subpath) {
86 *val += 1;
87 break;
88 }
89 self.inner.insert(subpath.to_owned(), 1);
90
91 pos = subpath.len();
92 if pos == 0 {
93 break;
94 }
95 }
96 }
97
98 /// Decreases the count of deepest directory contained in the path.
99 ///
100 /// If it is the only reference, decreases all parents until one is
101 /// removed.
102 /// If the directory is not in the map, something horrible has happened.
103 pub fn delete_path(
104 &mut self,
105 path: &[u8],
106 ) -> Result<(), DirstateMapError> {
107 let mut pos = path.len();
108
109 loop {
110 let subpath = Self::find_dir(&path[..pos]);
111 match self.inner.entry(subpath.to_owned()) {
112 Entry::Occupied(mut entry) => {
113 let val = entry.get().clone();
114 if val > 1 {
115 entry.insert(val - 1);
116 break;
117 }
118 entry.remove();
119 }
120 Entry::Vacant(_) => {
121 return Err(DirstateMapError::PathNotFound(path.to_owned()))
122 }
123 };
124
125 pos = subpath.len();
126 if pos == 0 {
127 break;
128 }
129 }
130
131 Ok(())
132 }
133 }
134
135 #[cfg(test)]
136 mod tests {
137 use super::*;
138
139 #[test]
140 fn test_delete_path_path_not_found() {
141 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
142 let path = b"doesnotexist/";
143 assert_eq!(
144 Err(DirstateMapError::PathNotFound(path.to_vec())),
145 map.delete_path(path)
146 );
147 }
148
149 #[test]
150 fn test_delete_path_empty_path() {
151 let mut map =
152 DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
153 let path = b"";
154 assert_eq!(Ok(()), map.delete_path(path));
155 assert_eq!(
156 Err(DirstateMapError::PathNotFound(path.to_vec())),
157 map.delete_path(path)
158 );
159 }
160
161 #[test]
162 fn test_delete_path_successful() {
163 let mut map = DirsMultiset {
164 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
165 .iter()
166 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
167 .collect(),
168 };
169
170 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
171 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
172 assert_eq!(
173 Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
174 map.delete_path(b"a/b/")
175 );
176
177 assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
178 assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
179 eprintln!("{:?}", map);
180 assert_eq!(Ok(()), map.delete_path(b"a/"));
181 eprintln!("{:?}", map);
182
183 assert_eq!(Ok(()), map.delete_path(b"a/c/"));
184 assert_eq!(
185 Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
186 map.delete_path(b"a/c/")
187 );
188 }
189
190 #[test]
191 fn test_add_path_empty_path() {
192 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
193 let path = b"";
194 map.add_path(path);
195
196 assert_eq!(1, map.len());
197 }
198
199 #[test]
200 fn test_add_path_successful() {
201 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
202
203 map.add_path(b"a/");
204 assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
205 assert_eq!(1, *map.get(&Vec::new()).unwrap());
206 assert_eq!(2, map.len());
207
208 // Non directory should be ignored
209 map.add_path(b"a");
210 assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
211 assert_eq!(2, map.len());
212
213 // Non directory will still add its base
214 map.add_path(b"a/b");
215 assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
216 assert_eq!(2, map.len());
217
218 // Duplicate path works
219 map.add_path(b"a/");
220 assert_eq!(3, *map.get(&b"a".to_vec()).unwrap());
221
222 // Nested dir adds to its base
223 map.add_path(b"a/b/");
224 assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
225 assert_eq!(1, *map.get(&b"a/b".to_vec()).unwrap());
226
227 // but not its base's base, because it already existed
228 map.add_path(b"a/b/c/");
229 assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
230 assert_eq!(2, *map.get(&b"a/b".to_vec()).unwrap());
231
232 map.add_path(b"a/c/");
233 assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
234
235 let expected = DirsMultiset {
236 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
237 .iter()
238 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
239 .collect(),
240 };
241 assert_eq!(map, expected);
242 }
243
244 #[test]
245 fn test_dirsmultiset_new_empty() {
246 use DirsIterable::{Dirstate, Manifest};
247
248 let new = DirsMultiset::new(Manifest(vec![]), None);
249 let expected = DirsMultiset {
250 inner: HashMap::new(),
251 };
252 assert_eq!(expected, new);
253
254 let new = DirsMultiset::new(Dirstate(vec![]), None);
255 let expected = DirsMultiset {
256 inner: HashMap::new(),
257 };
258 assert_eq!(expected, new);
259 }
260
261 #[test]
262 fn test_dirsmultiset_new_no_skip() {
263 use DirsIterable::{Dirstate, Manifest};
264
265 let input_vec = ["a/", "b/", "a/c", "a/d/"]
266 .iter()
267 .map(|e| e.as_bytes().to_vec())
268 .collect();
269 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
270 .iter()
271 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
272 .collect();
273
274 let new = DirsMultiset::new(Manifest(input_vec), None);
275 let expected = DirsMultiset {
276 inner: expected_inner,
277 };
278 assert_eq!(expected, new);
279
280 let input_map = ["a/", "b/", "a/c", "a/d/"]
281 .iter()
282 .map(|f| {
283 (
284 f.as_bytes().to_vec(),
285 DirstateEntry {
286 state: 0,
287 mode: 0,
288 mtime: 0,
289 size: 0,
290 },
291 )
292 })
293 .collect();
294 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
295 .iter()
296 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
297 .collect();
298
299 let new = DirsMultiset::new(Dirstate(input_map), None);
300 let expected = DirsMultiset {
301 inner: expected_inner,
302 };
303 assert_eq!(expected, new);
304 }
305
306 #[test]
307 fn test_dirsmultiset_new_skip() {
308 use DirsIterable::{Dirstate, Manifest};
309
310 let input_vec = ["a/", "b/", "a/c", "a/d/"]
311 .iter()
312 .map(|e| e.as_bytes().to_vec())
313 .collect();
314 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
315 .iter()
316 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
317 .collect();
318
319 let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
320 let expected = DirsMultiset {
321 inner: expected_inner,
322 };
323 // Skip does not affect a manifest
324 assert_eq!(expected, new);
325
326 let input_map =
327 [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
328 .iter()
329 .map(|(f, state)| {
330 (
331 f.as_bytes().to_vec(),
332 DirstateEntry {
333 state: *state as i8,
334 mode: 0,
335 mtime: 0,
336 size: 0,
337 },
338 )
339 })
340 .collect();
341
342 // "a" incremented with "a/c" and "a/d/"
343 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
344 .iter()
345 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
346 .collect();
347
348 let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
349 let expected = DirsMultiset {
350 inner: expected_inner,
351 };
352 assert_eq!(expected, new);
353 }
354
355 }
@@ -1,28 +1,36 b''
1 pub mod dirs_multiset;
1 pub mod parsers;
2 pub mod parsers;
2
3
3 #[derive(Debug, PartialEq, Copy, Clone)]
4 #[derive(Debug, PartialEq, Copy, Clone)]
4 pub struct DirstateParents<'a> {
5 pub struct DirstateParents<'a> {
5 pub p1: &'a [u8],
6 pub p1: &'a [u8],
6 pub p2: &'a [u8],
7 pub p2: &'a [u8],
7 }
8 }
8
9
9 /// The C implementation uses all signed types. This will be an issue
10 /// The C implementation uses all signed types. This will be an issue
10 /// either when 4GB+ source files are commonplace or in 2038, whichever
11 /// either when 4GB+ source files are commonplace or in 2038, whichever
11 /// comes first.
12 /// comes first.
12 #[derive(Debug, PartialEq)]
13 #[derive(Debug, PartialEq)]
13 pub struct DirstateEntry {
14 pub struct DirstateEntry {
14 pub state: i8,
15 pub state: i8,
15 pub mode: i32,
16 pub mode: i32,
16 pub mtime: i32,
17 pub mtime: i32,
17 pub size: i32,
18 pub size: i32,
18 }
19 }
19
20
20 pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>;
21 pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>;
21
22
22 #[derive(Debug, PartialEq)]
23 #[derive(Debug, PartialEq)]
23 pub struct CopyVecEntry<'a> {
24 pub struct CopyVecEntry<'a> {
24 pub path: &'a [u8],
25 pub path: &'a [u8],
25 pub copy_path: &'a [u8],
26 pub copy_path: &'a [u8],
26 }
27 }
27
28
28 pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
29 pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
30
31 /// The Python implementation passes either a mapping (dirstate) or a flat
32 /// iterable (manifest)
33 pub enum DirsIterable {
34 Dirstate(DirstateVec),
35 Manifest(Vec<Vec<u8>>),
36 }
@@ -1,103 +1,111 b''
1 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
1 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5 extern crate byteorder;
5 extern crate byteorder;
6 extern crate memchr;
6 extern crate memchr;
7 #[macro_use]
7 #[macro_use]
8 extern crate lazy_static;
8 extern crate lazy_static;
9 extern crate regex;
9 extern crate regex;
10
10
11 mod ancestors;
11 mod ancestors;
12 pub mod dagops;
12 pub mod dagops;
13 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
13 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
14 mod dirstate;
14 mod dirstate;
15 pub mod discovery;
15 pub mod discovery;
16 pub mod testing; // unconditionally built, for use from integration tests
16 pub mod testing; // unconditionally built, for use from integration tests
17 pub use dirstate::{
17 pub use dirstate::{
18 dirs_multiset::DirsMultiset,
18 parsers::{pack_dirstate, parse_dirstate},
19 parsers::{pack_dirstate, parse_dirstate},
19 CopyVec, CopyVecEntry, DirstateEntry, DirstateParents, DirstateVec,
20 CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
21 DirstateVec,
20 };
22 };
21 mod filepatterns;
23 mod filepatterns;
22 mod utils;
24 mod utils;
23
25
24 pub use filepatterns::{
26 pub use filepatterns::{
25 build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
27 build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
26 };
28 };
27
29
28 /// Mercurial revision numbers
30 /// Mercurial revision numbers
29 ///
31 ///
30 /// As noted in revlog.c, revision numbers are actually encoded in
32 /// As noted in revlog.c, revision numbers are actually encoded in
31 /// 4 bytes, and are liberally converted to ints, whence the i32
33 /// 4 bytes, and are liberally converted to ints, whence the i32
32 pub type Revision = i32;
34 pub type Revision = i32;
33
35
34 /// Marker expressing the absence of a parent
36 /// Marker expressing the absence of a parent
35 ///
37 ///
36 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
38 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
37 /// to be smaller that all existing revisions.
39 /// to be smaller that all existing revisions.
38 pub const NULL_REVISION: Revision = -1;
40 pub const NULL_REVISION: Revision = -1;
39
41
40 /// Same as `mercurial.node.wdirrev`
42 /// Same as `mercurial.node.wdirrev`
41 ///
43 ///
42 /// This is also equal to `i32::max_value()`, but it's better to spell
44 /// This is also equal to `i32::max_value()`, but it's better to spell
43 /// it out explicitely, same as in `mercurial.node`
45 /// it out explicitely, same as in `mercurial.node`
44 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
46 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
45
47
46 /// The simplest expression of what we need of Mercurial DAGs.
48 /// The simplest expression of what we need of Mercurial DAGs.
47 pub trait Graph {
49 pub trait Graph {
48 /// Return the two parents of the given `Revision`.
50 /// Return the two parents of the given `Revision`.
49 ///
51 ///
50 /// Each of the parents can be independently `NULL_REVISION`
52 /// Each of the parents can be independently `NULL_REVISION`
51 fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
53 fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
52 }
54 }
53
55
54 pub type LineNumber = usize;
56 pub type LineNumber = usize;
55
57
56 #[derive(Clone, Debug, PartialEq)]
58 #[derive(Clone, Debug, PartialEq)]
57 pub enum GraphError {
59 pub enum GraphError {
58 ParentOutOfRange(Revision),
60 ParentOutOfRange(Revision),
59 WorkingDirectoryUnsupported,
61 WorkingDirectoryUnsupported,
60 }
62 }
61
63
62 #[derive(Clone, Debug, PartialEq)]
64 #[derive(Clone, Debug, PartialEq)]
63 pub enum DirstateParseError {
65 pub enum DirstateParseError {
64 TooLittleData,
66 TooLittleData,
65 Overflow,
67 Overflow,
66 CorruptedEntry(String),
68 CorruptedEntry(String),
67 }
69 }
68
70
69 #[derive(Debug, PartialEq)]
71 #[derive(Debug, PartialEq)]
70 pub enum DirstatePackError {
72 pub enum DirstatePackError {
71 CorruptedEntry(String),
73 CorruptedEntry(String),
72 CorruptedParent,
74 CorruptedParent,
73 BadSize(usize, usize),
75 BadSize(usize, usize),
74 }
76 }
75
77
78 #[derive(Debug, PartialEq)]
79 pub enum DirstateMapError {
80 PathNotFound(Vec<u8>),
81 EmptyPath,
82 }
83
76 impl From<std::io::Error> for DirstatePackError {
84 impl From<std::io::Error> for DirstatePackError {
77 fn from(e: std::io::Error) -> Self {
85 fn from(e: std::io::Error) -> Self {
78 DirstatePackError::CorruptedEntry(e.to_string())
86 DirstatePackError::CorruptedEntry(e.to_string())
79 }
87 }
80 }
88 }
81
89
82 impl From<std::io::Error> for DirstateParseError {
90 impl From<std::io::Error> for DirstateParseError {
83 fn from(e: std::io::Error) -> Self {
91 fn from(e: std::io::Error) -> Self {
84 DirstateParseError::CorruptedEntry(e.to_string())
92 DirstateParseError::CorruptedEntry(e.to_string())
85 }
93 }
86 }
94 }
87
95
88 #[derive(Debug)]
96 #[derive(Debug)]
89 pub enum PatternError {
97 pub enum PatternError {
90 UnsupportedSyntax(String),
98 UnsupportedSyntax(String),
91 }
99 }
92
100
93 #[derive(Debug)]
101 #[derive(Debug)]
94 pub enum PatternFileError {
102 pub enum PatternFileError {
95 IO(std::io::Error),
103 IO(std::io::Error),
96 Pattern(PatternError, LineNumber),
104 Pattern(PatternError, LineNumber),
97 }
105 }
98
106
99 impl From<std::io::Error> for PatternFileError {
107 impl From<std::io::Error> for PatternFileError {
100 fn from(e: std::io::Error) -> Self {
108 fn from(e: std::io::Error) -> Self {
101 PatternFileError::IO(e)
109 PatternFileError::IO(e)
102 }
110 }
103 }
111 }
General Comments 0
You need to be logged in to leave comments. Login now