##// END OF EJS Templates
rust-dirstate: rust implementation of dirstatemap...
Raphaël Gomès -
r42998:fce6dc93 default
parent child Browse files
Show More
@@ -0,0 +1,435
1 // dirstate_map.rs
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 use crate::{
9 dirstate::{parsers::PARENT_SIZE, EntryState},
10 pack_dirstate, parse_dirstate,
11 utils::copy_into_array,
12 CopyMap, DirsIterable, DirsMultiset, DirstateEntry, DirstateError,
13 DirstateMapError, DirstateParents, DirstateParseError, StateMap,
14 };
15 use core::borrow::Borrow;
16 use std::collections::{HashMap, HashSet};
17 use std::iter::FromIterator;
18 use std::ops::Deref;
19 use std::time::Duration;
20
21 pub type FileFoldMap = HashMap<Vec<u8>, Vec<u8>>;
22
23 const NULL_REVISION: [u8; 20] = [
24 b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0',
25 b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0',
26 ];
27 const MTIME_UNSET: i32 = -1;
28 const SIZE_DIRTY: i32 = -2;
29
30 #[derive(Default)]
31 pub struct DirstateMap {
32 state_map: StateMap,
33 pub copy_map: CopyMap,
34 file_fold_map: Option<FileFoldMap>,
35 pub dirs: Option<DirsMultiset>,
36 pub all_dirs: Option<DirsMultiset>,
37 non_normal_set: HashSet<Vec<u8>>,
38 other_parent_set: HashSet<Vec<u8>>,
39 parents: Option<DirstateParents>,
40 dirty_parents: bool,
41 }
42
43 /// Should only really be used in python interface code, for clarity
44 impl Deref for DirstateMap {
45 type Target = StateMap;
46
47 fn deref(&self) -> &Self::Target {
48 &self.state_map
49 }
50 }
51
52 impl FromIterator<(Vec<u8>, DirstateEntry)> for DirstateMap {
53 fn from_iter<I: IntoIterator<Item = (Vec<u8>, DirstateEntry)>>(
54 iter: I,
55 ) -> Self {
56 Self {
57 state_map: iter.into_iter().collect(),
58 ..Self::default()
59 }
60 }
61 }
62
63 impl DirstateMap {
64 pub fn new() -> Self {
65 Self::default()
66 }
67
68 pub fn clear(&mut self) {
69 self.state_map.clear();
70 self.copy_map.clear();
71 self.file_fold_map = None;
72 self.non_normal_set.clear();
73 self.other_parent_set.clear();
74 self.set_parents(DirstateParents {
75 p1: NULL_REVISION,
76 p2: NULL_REVISION,
77 })
78 }
79
80 /// Add a tracked file to the dirstate
81 pub fn add_file(
82 &mut self,
83 filename: &[u8],
84 old_state: EntryState,
85 entry: DirstateEntry,
86 ) {
87 if old_state == EntryState::Unknown || old_state == EntryState::Removed
88 {
89 if let Some(ref mut dirs) = self.dirs {
90 dirs.add_path(filename)
91 }
92 }
93 if old_state == EntryState::Unknown {
94 if let Some(ref mut all_dirs) = self.all_dirs {
95 all_dirs.add_path(filename)
96 }
97 }
98 self.state_map.insert(filename.to_owned(), entry.to_owned());
99
100 if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
101 self.non_normal_set.insert(filename.to_owned());
102 }
103
104 if entry.size == SIZE_DIRTY {
105 self.other_parent_set.insert(filename.to_owned());
106 }
107 }
108
109 /// Mark a file as removed in the dirstate.
110 ///
111 /// The `size` parameter is used to store sentinel values that indicate
112 /// the file's previous state. In the future, we should refactor this
113 /// to be more explicit about what that state is.
114 pub fn remove_file(
115 &mut self,
116 filename: &[u8],
117 old_state: EntryState,
118 size: i32,
119 ) -> Result<(), DirstateMapError> {
120 if old_state != EntryState::Unknown && old_state != EntryState::Removed
121 {
122 if let Some(ref mut dirs) = self.dirs {
123 dirs.delete_path(filename)?;
124 }
125 }
126 if old_state == EntryState::Unknown {
127 if let Some(ref mut all_dirs) = self.all_dirs {
128 all_dirs.add_path(filename);
129 }
130 }
131
132 if let Some(ref mut file_fold_map) = self.file_fold_map {
133 file_fold_map
134 .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref());
135 }
136 self.state_map.insert(
137 filename.to_owned(),
138 DirstateEntry {
139 state: EntryState::Removed,
140 mode: 0,
141 size,
142 mtime: 0,
143 },
144 );
145 self.non_normal_set.insert(filename.to_owned());
146 Ok(())
147 }
148
149 /// Remove a file from the dirstate.
150 /// Returns `true` if the file was previously recorded.
151 pub fn drop_file(
152 &mut self,
153 filename: &[u8],
154 old_state: EntryState,
155 ) -> Result<bool, DirstateMapError> {
156 let exists = self
157 .state_map
158 .remove::<Vec<u8>>(filename.to_owned().as_ref())
159 .is_some();
160
161 if exists {
162 if old_state != EntryState::Removed {
163 if let Some(ref mut dirs) = self.dirs {
164 dirs.delete_path(filename)?;
165 }
166 }
167 if let Some(ref mut all_dirs) = self.all_dirs {
168 all_dirs.delete_path(filename)?;
169 }
170 }
171 if let Some(ref mut file_fold_map) = self.file_fold_map {
172 file_fold_map
173 .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref());
174 }
175 self.non_normal_set
176 .remove::<Vec<u8>>(filename.to_owned().as_ref());
177
178 Ok(exists)
179 }
180
181 pub fn clear_ambiguous_times(
182 &mut self,
183 filenames: Vec<Vec<u8>>,
184 now: i32,
185 ) {
186 for filename in filenames {
187 let mut changed = false;
188 self.state_map
189 .entry(filename.to_owned())
190 .and_modify(|entry| {
191 if entry.state == EntryState::Normal && entry.mtime == now
192 {
193 changed = true;
194 *entry = DirstateEntry {
195 mtime: MTIME_UNSET,
196 ..*entry
197 };
198 }
199 });
200 if changed {
201 self.non_normal_set.insert(filename.to_owned());
202 }
203 }
204 }
205
206 pub fn non_normal_other_parent_entries(
207 &self,
208 ) -> (HashSet<Vec<u8>>, HashSet<Vec<u8>>) {
209 let mut non_normal = HashSet::new();
210 let mut other_parent = HashSet::new();
211
212 for (
213 filename,
214 DirstateEntry {
215 state, size, mtime, ..
216 },
217 ) in self.state_map.iter()
218 {
219 if *state != EntryState::Normal || *mtime == MTIME_UNSET {
220 non_normal.insert(filename.to_owned());
221 }
222 if *state == EntryState::Normal && *size == SIZE_DIRTY {
223 other_parent.insert(filename.to_owned());
224 }
225 }
226
227 (non_normal, other_parent)
228 }
229
230 /// Both of these setters and their uses appear to be the simplest way to
231 /// emulate a Python lazy property, but it is ugly and unidiomatic.
232 /// TODO One day, rewriting this struct using the typestate might be a
233 /// good idea.
234 pub fn set_all_dirs(&mut self) {
235 if self.all_dirs.is_none() {
236 self.all_dirs = Some(DirsMultiset::new(
237 DirsIterable::Dirstate(&self.state_map),
238 None,
239 ));
240 }
241 }
242
243 pub fn set_dirs(&mut self) {
244 if self.dirs.is_none() {
245 self.dirs = Some(DirsMultiset::new(
246 DirsIterable::Dirstate(&self.state_map),
247 Some(EntryState::Removed),
248 ));
249 }
250 }
251
252 pub fn has_tracked_dir(&mut self, directory: &[u8]) -> bool {
253 self.set_dirs();
254 self.dirs.as_ref().unwrap().contains(directory.as_ref())
255 }
256
257 pub fn has_dir(&mut self, directory: &[u8]) -> bool {
258 self.set_all_dirs();
259 self.all_dirs.as_ref().unwrap().contains(directory.as_ref())
260 }
261
262 pub fn parents(
263 &mut self,
264 file_contents: &[u8],
265 ) -> Result<DirstateParents, DirstateError> {
266 if let Some(ref parents) = self.parents {
267 return Ok(parents.clone());
268 }
269 let parents;
270 if file_contents.len() == 40 {
271 parents = DirstateParents {
272 p1: copy_into_array(&file_contents[..PARENT_SIZE]),
273 p2: copy_into_array(
274 &file_contents[PARENT_SIZE..PARENT_SIZE * 2],
275 ),
276 };
277 } else if file_contents.is_empty() {
278 parents = DirstateParents {
279 p1: NULL_REVISION,
280 p2: NULL_REVISION,
281 };
282 } else {
283 return Err(DirstateError::Parse(DirstateParseError::Damaged));
284 }
285
286 self.parents = Some(parents.to_owned());
287 Ok(parents.clone())
288 }
289
290 pub fn set_parents(&mut self, parents: DirstateParents) {
291 self.parents = Some(parents.clone());
292 self.dirty_parents = true;
293 }
294
295 pub fn read(
296 &mut self,
297 file_contents: &[u8],
298 ) -> Result<Option<DirstateParents>, DirstateError> {
299 if file_contents.is_empty() {
300 return Ok(None);
301 }
302
303 let parents = parse_dirstate(
304 &mut self.state_map,
305 &mut self.copy_map,
306 file_contents,
307 )?;
308
309 if !self.dirty_parents {
310 self.set_parents(parents.to_owned());
311 }
312
313 Ok(Some(parents))
314 }
315
316 pub fn pack(
317 &mut self,
318 parents: DirstateParents,
319 now: Duration,
320 ) -> Result<Vec<u8>, DirstateError> {
321 let packed =
322 pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
323
324 self.dirty_parents = false;
325
326 let result = self.non_normal_other_parent_entries();
327 self.non_normal_set = result.0;
328 self.other_parent_set = result.1;
329 Ok(packed)
330 }
331
332 pub fn build_file_fold_map(&mut self) -> FileFoldMap {
333 if let Some(ref file_fold_map) = self.file_fold_map {
334 return file_fold_map.to_owned();
335 }
336 let mut new_file_fold_map = FileFoldMap::new();
337 for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
338 {
339 if *state == EntryState::Removed {
340 new_file_fold_map.insert(
341 filename.to_ascii_uppercase().to_owned(),
342 filename.to_owned(),
343 );
344 }
345 }
346 self.file_fold_map = Some(new_file_fold_map);
347 self.file_fold_map.to_owned().unwrap()
348 }
349 }
350
351 #[cfg(test)]
352 mod tests {
353 use super::*;
354
355 #[test]
356 fn test_dirs_multiset() {
357 let mut map = DirstateMap::new();
358 assert!(map.dirs.is_none());
359 assert!(map.all_dirs.is_none());
360
361 assert_eq!(false, map.has_dir(b"nope"));
362 assert!(map.all_dirs.is_some());
363 assert!(map.dirs.is_none());
364
365 assert_eq!(false, map.has_tracked_dir(b"nope"));
366 assert!(map.dirs.is_some());
367 }
368
369 #[test]
370 fn test_add_file() {
371 let mut map = DirstateMap::new();
372
373 assert_eq!(0, map.len());
374
375 map.add_file(
376 b"meh",
377 EntryState::Normal,
378 DirstateEntry {
379 state: EntryState::Normal,
380 mode: 1337,
381 mtime: 1337,
382 size: 1337,
383 },
384 );
385
386 assert_eq!(1, map.len());
387 assert_eq!(0, map.non_normal_set.len());
388 assert_eq!(0, map.other_parent_set.len());
389 }
390
391 #[test]
392 fn test_non_normal_other_parent_entries() {
393 let map: DirstateMap = [
394 (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
395 (b"f2", (EntryState::Normal, 1337, 1337, -1)),
396 (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
397 (b"f4", (EntryState::Normal, 1337, -2, 1337)),
398 (b"f5", (EntryState::Added, 1337, 1337, 1337)),
399 (b"f6", (EntryState::Added, 1337, 1337, -1)),
400 (b"f7", (EntryState::Merged, 1337, 1337, -1)),
401 (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
402 (b"f9", (EntryState::Merged, 1337, -2, 1337)),
403 (b"fa", (EntryState::Added, 1337, -2, 1337)),
404 (b"fb", (EntryState::Removed, 1337, -2, 1337)),
405 ]
406 .iter()
407 .map(|(fname, (state, mode, size, mtime))| {
408 (
409 fname.to_vec(),
410 DirstateEntry {
411 state: *state,
412 mode: *mode,
413 size: *size,
414 mtime: *mtime,
415 },
416 )
417 })
418 .collect();
419
420 let non_normal = [
421 b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
422 ]
423 .iter()
424 .map(|x| x.to_vec())
425 .collect();
426
427 let mut other_parent = HashSet::new();
428 other_parent.insert(b"f4".to_vec());
429
430 assert_eq!(
431 (non_normal, other_parent),
432 map.non_normal_other_parent_entries()
433 );
434 }
435 }
@@ -1,79 +1,80
1 1 // dirstate module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::DirstateParseError;
9 9 use std::collections::HashMap;
10 10 use std::convert::TryFrom;
11 11
12 12 pub mod dirs_multiset;
13 pub mod dirstate_map;
13 14 pub mod parsers;
14 15
15 16 #[derive(Debug, PartialEq, Clone)]
16 17 pub struct DirstateParents {
17 18 pub p1: [u8; 20],
18 19 pub p2: [u8; 20],
19 20 }
20 21
21 22 /// The C implementation uses all signed types. This will be an issue
22 23 /// either when 4GB+ source files are commonplace or in 2038, whichever
23 24 /// comes first.
24 25 #[derive(Debug, PartialEq, Copy, Clone)]
25 26 pub struct DirstateEntry {
26 27 pub state: EntryState,
27 28 pub mode: i32,
28 29 pub mtime: i32,
29 30 pub size: i32,
30 31 }
31 32
32 33 pub type StateMap = HashMap<Vec<u8>, DirstateEntry>;
33 34 pub type CopyMap = HashMap<Vec<u8>, Vec<u8>>;
34 35
35 36 /// The Python implementation passes either a mapping (dirstate) or a flat
36 37 /// iterable (manifest)
37 38 pub enum DirsIterable<'a> {
38 39 Dirstate(&'a HashMap<Vec<u8>, DirstateEntry>),
39 40 Manifest(&'a Vec<Vec<u8>>),
40 41 }
41 42
42 43 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
43 44 pub enum EntryState {
44 45 Normal,
45 46 Added,
46 47 Removed,
47 48 Merged,
48 49 Unknown,
49 50 }
50 51
51 52 impl TryFrom<u8> for EntryState {
52 53 type Error = DirstateParseError;
53 54
54 55 fn try_from(value: u8) -> Result<Self, Self::Error> {
55 56 match value {
56 57 b'n' => Ok(EntryState::Normal),
57 58 b'a' => Ok(EntryState::Added),
58 59 b'r' => Ok(EntryState::Removed),
59 60 b'm' => Ok(EntryState::Merged),
60 61 b'?' => Ok(EntryState::Unknown),
61 62 _ => Err(DirstateParseError::CorruptedEntry(format!(
62 63 "Incorrect entry state {}",
63 64 value
64 65 ))),
65 66 }
66 67 }
67 68 }
68 69
69 70 impl Into<u8> for EntryState {
70 71 fn into(self) -> u8 {
71 72 match self {
72 73 EntryState::Normal => b'n',
73 74 EntryState::Added => b'a',
74 75 EntryState::Removed => b'r',
75 76 EntryState::Merged => b'm',
76 77 EntryState::Unknown => b'?',
77 78 }
78 79 }
79 80 }
@@ -1,148 +1,149
1 1 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5 mod ancestors;
6 6 pub mod dagops;
7 7 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
8 8 mod dirstate;
9 9 pub mod discovery;
10 10 pub mod testing; // unconditionally built, for use from integration tests
11 11 pub use dirstate::{
12 12 dirs_multiset::DirsMultiset,
13 dirstate_map::DirstateMap,
13 14 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
14 15 CopyMap, DirsIterable, DirstateEntry, DirstateParents, EntryState,
15 16 StateMap,
16 17 };
17 18 mod filepatterns;
18 19 pub mod utils;
19 20
20 21 pub use filepatterns::{
21 22 build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
22 23 };
23 24
24 25 /// Mercurial revision numbers
25 26 ///
26 27 /// As noted in revlog.c, revision numbers are actually encoded in
27 28 /// 4 bytes, and are liberally converted to ints, whence the i32
28 29 pub type Revision = i32;
29 30
30 31 /// Marker expressing the absence of a parent
31 32 ///
32 33 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
33 34 /// to be smaller that all existing revisions.
34 35 pub const NULL_REVISION: Revision = -1;
35 36
36 37 /// Same as `mercurial.node.wdirrev`
37 38 ///
38 39 /// This is also equal to `i32::max_value()`, but it's better to spell
39 40 /// it out explicitely, same as in `mercurial.node`
40 41 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
41 42
42 43 /// The simplest expression of what we need of Mercurial DAGs.
43 44 pub trait Graph {
44 45 /// Return the two parents of the given `Revision`.
45 46 ///
46 47 /// Each of the parents can be independently `NULL_REVISION`
47 48 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
48 49 }
49 50
50 51 pub type LineNumber = usize;
51 52
52 53 #[derive(Clone, Debug, PartialEq)]
53 54 pub enum GraphError {
54 55 ParentOutOfRange(Revision),
55 56 WorkingDirectoryUnsupported,
56 57 }
57 58
58 59 #[derive(Clone, Debug, PartialEq)]
59 60 pub enum DirstateParseError {
60 61 TooLittleData,
61 62 Overflow,
62 63 CorruptedEntry(String),
63 64 Damaged,
64 65 }
65 66
66 67 impl From<std::io::Error> for DirstateParseError {
67 68 fn from(e: std::io::Error) -> Self {
68 69 DirstateParseError::CorruptedEntry(e.to_string())
69 70 }
70 71 }
71 72
72 73 impl ToString for DirstateParseError {
73 74 fn to_string(&self) -> String {
74 75 use crate::DirstateParseError::*;
75 76 match self {
76 77 TooLittleData => "Too little data for dirstate.".to_string(),
77 78 Overflow => "Overflow in dirstate.".to_string(),
78 79 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
79 80 Damaged => "Dirstate appears to be damaged.".to_string(),
80 81 }
81 82 }
82 83 }
83 84
84 85 #[derive(Debug, PartialEq)]
85 86 pub enum DirstatePackError {
86 87 CorruptedEntry(String),
87 88 CorruptedParent,
88 89 BadSize(usize, usize),
89 90 }
90 91
91 92 impl From<std::io::Error> for DirstatePackError {
92 93 fn from(e: std::io::Error) -> Self {
93 94 DirstatePackError::CorruptedEntry(e.to_string())
94 95 }
95 96 }
96 97 #[derive(Debug, PartialEq)]
97 98 pub enum DirstateMapError {
98 99 PathNotFound(Vec<u8>),
99 100 EmptyPath,
100 101 }
101 102
102 103 pub enum DirstateError {
103 104 Parse(DirstateParseError),
104 105 Pack(DirstatePackError),
105 106 Map(DirstateMapError),
106 107 IO(std::io::Error),
107 108 }
108 109
109 110 impl From<DirstateParseError> for DirstateError {
110 111 fn from(e: DirstateParseError) -> Self {
111 112 DirstateError::Parse(e)
112 113 }
113 114 }
114 115
115 116 impl From<DirstatePackError> for DirstateError {
116 117 fn from(e: DirstatePackError) -> Self {
117 118 DirstateError::Pack(e)
118 119 }
119 120 }
120 121
121 122 #[derive(Debug)]
122 123 pub enum PatternError {
123 124 UnsupportedSyntax(String),
124 125 }
125 126
126 127 #[derive(Debug)]
127 128 pub enum PatternFileError {
128 129 IO(std::io::Error),
129 130 Pattern(PatternError, LineNumber),
130 131 }
131 132
132 133 impl From<std::io::Error> for PatternFileError {
133 134 fn from(e: std::io::Error) -> Self {
134 135 PatternFileError::IO(e)
135 136 }
136 137 }
137 138
138 139 impl From<DirstateMapError> for DirstateError {
139 140 fn from(e: DirstateMapError) -> Self {
140 141 DirstateError::Map(e)
141 142 }
142 143 }
143 144
144 145 impl From<std::io::Error> for DirstateError {
145 146 fn from(e: std::io::Error) -> Self {
146 147 DirstateError::IO(e)
147 148 }
148 149 }
General Comments 0
You need to be logged in to leave comments. Login now