##// END OF EJS Templates
rust: Remove EntryState::Unknown...
Simon Sapin -
r48838:1b2ee68e default
parent child Browse files
Show More
@@ -1,481 +1,470 b''
1 1 // dirstate_map.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::dirstate::parsers::Timestamp;
9 9 use crate::{
10 10 dirstate::EntryState,
11 11 dirstate::MTIME_UNSET,
12 12 dirstate::SIZE_FROM_OTHER_PARENT,
13 13 dirstate::SIZE_NON_NORMAL,
14 14 dirstate::V1_RANGEMASK,
15 15 pack_dirstate, parse_dirstate,
16 16 utils::hg_path::{HgPath, HgPathBuf},
17 17 CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateParents,
18 18 StateMap,
19 19 };
20 20 use micro_timer::timed;
21 21 use std::collections::HashSet;
22 22 use std::iter::FromIterator;
23 23 use std::ops::Deref;
24 24
25 25 #[derive(Default)]
26 26 pub struct DirstateMap {
27 27 state_map: StateMap,
28 28 pub copy_map: CopyMap,
29 29 pub dirs: Option<DirsMultiset>,
30 30 pub all_dirs: Option<DirsMultiset>,
31 31 non_normal_set: Option<HashSet<HgPathBuf>>,
32 32 other_parent_set: Option<HashSet<HgPathBuf>>,
33 33 }
34 34
35 35 /// Should only really be used in python interface code, for clarity
36 36 impl Deref for DirstateMap {
37 37 type Target = StateMap;
38 38
39 39 fn deref(&self) -> &Self::Target {
40 40 &self.state_map
41 41 }
42 42 }
43 43
44 44 impl FromIterator<(HgPathBuf, DirstateEntry)> for DirstateMap {
45 45 fn from_iter<I: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(
46 46 iter: I,
47 47 ) -> Self {
48 48 Self {
49 49 state_map: iter.into_iter().collect(),
50 50 ..Self::default()
51 51 }
52 52 }
53 53 }
54 54
55 55 impl DirstateMap {
56 56 pub fn new() -> Self {
57 57 Self::default()
58 58 }
59 59
60 60 pub fn clear(&mut self) {
61 61 self.state_map = StateMap::default();
62 62 self.copy_map.clear();
63 63 self.non_normal_set = None;
64 64 self.other_parent_set = None;
65 65 }
66 66
67 67 pub fn set_v1_inner(&mut self, filename: &HgPath, entry: DirstateEntry) {
68 68 self.state_map.insert(filename.to_owned(), entry);
69 69 }
70 70
71 71 /// Add a tracked file to the dirstate
72 72 pub fn add_file(
73 73 &mut self,
74 74 filename: &HgPath,
75 75 entry: DirstateEntry,
76 76 // XXX once the dust settle this should probably become an enum
77 77 added: bool,
78 78 merged: bool,
79 79 from_p2: bool,
80 80 possibly_dirty: bool,
81 81 ) -> Result<(), DirstateError> {
82 82 let state;
83 83 let size;
84 84 let mtime;
85 85 if added {
86 86 assert!(!possibly_dirty);
87 87 assert!(!from_p2);
88 88 state = EntryState::Added;
89 89 size = SIZE_NON_NORMAL;
90 90 mtime = MTIME_UNSET;
91 91 } else if merged {
92 92 assert!(!possibly_dirty);
93 93 assert!(!from_p2);
94 94 state = EntryState::Merged;
95 95 size = SIZE_FROM_OTHER_PARENT;
96 96 mtime = MTIME_UNSET;
97 97 } else if from_p2 {
98 98 assert!(!possibly_dirty);
99 99 state = EntryState::Normal;
100 100 size = SIZE_FROM_OTHER_PARENT;
101 101 mtime = MTIME_UNSET;
102 102 } else if possibly_dirty {
103 103 state = EntryState::Normal;
104 104 size = SIZE_NON_NORMAL;
105 105 mtime = MTIME_UNSET;
106 106 } else {
107 107 state = EntryState::Normal;
108 108 size = entry.size() & V1_RANGEMASK;
109 109 mtime = entry.mtime() & V1_RANGEMASK;
110 110 }
111 111 let mode = entry.mode();
112 112 let entry = DirstateEntry::from_v1_data(state, mode, size, mtime);
113 113
114 let old_state = match self.get(filename) {
115 Some(e) => e.state(),
116 None => EntryState::Unknown,
117 };
118 if old_state == EntryState::Unknown || old_state == EntryState::Removed
119 {
114 let old_state = self.get(filename).map(|e| e.state());
115 if old_state.is_none() || old_state == Some(EntryState::Removed) {
120 116 if let Some(ref mut dirs) = self.dirs {
121 117 dirs.add_path(filename)?;
122 118 }
123 119 }
124 if old_state == EntryState::Unknown {
120 if old_state.is_none() {
125 121 if let Some(ref mut all_dirs) = self.all_dirs {
126 122 all_dirs.add_path(filename)?;
127 123 }
128 124 }
129 125 self.state_map.insert(filename.to_owned(), entry.to_owned());
130 126
131 127 if entry.is_non_normal() {
132 128 self.get_non_normal_other_parent_entries()
133 129 .0
134 130 .insert(filename.to_owned());
135 131 }
136 132
137 133 if entry.is_from_other_parent() {
138 134 self.get_non_normal_other_parent_entries()
139 135 .1
140 136 .insert(filename.to_owned());
141 137 }
142 138 Ok(())
143 139 }
144 140
145 141 /// Mark a file as removed in the dirstate.
146 142 ///
147 143 /// The `size` parameter is used to store sentinel values that indicate
148 144 /// the file's previous state. In the future, we should refactor this
149 145 /// to be more explicit about what that state is.
150 146 pub fn remove_file(
151 147 &mut self,
152 148 filename: &HgPath,
153 149 in_merge: bool,
154 150 ) -> Result<(), DirstateError> {
155 151 let old_entry_opt = self.get(filename);
156 let old_state = match old_entry_opt {
157 Some(e) => e.state(),
158 None => EntryState::Unknown,
159 };
152 let old_state = old_entry_opt.map(|e| e.state());
160 153 let mut size = 0;
161 154 if in_merge {
162 155 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
163 156 // during a merge. So I (marmoute) am not sure we need the
164 157 // conditionnal at all. Adding double checking this with assert
165 158 // would be nice.
166 159 if let Some(old_entry) = old_entry_opt {
167 160 // backup the previous state
168 161 if old_entry.state() == EntryState::Merged {
169 162 size = SIZE_NON_NORMAL;
170 163 } else if old_entry.state() == EntryState::Normal
171 164 && old_entry.size() == SIZE_FROM_OTHER_PARENT
172 165 {
173 166 // other parent
174 167 size = SIZE_FROM_OTHER_PARENT;
175 168 self.get_non_normal_other_parent_entries()
176 169 .1
177 170 .insert(filename.to_owned());
178 171 }
179 172 }
180 173 }
181 if old_state != EntryState::Unknown && old_state != EntryState::Removed
182 {
174 if old_state.is_some() && old_state != Some(EntryState::Removed) {
183 175 if let Some(ref mut dirs) = self.dirs {
184 176 dirs.delete_path(filename)?;
185 177 }
186 178 }
187 if old_state == EntryState::Unknown {
179 if old_state.is_none() {
188 180 if let Some(ref mut all_dirs) = self.all_dirs {
189 181 all_dirs.add_path(filename)?;
190 182 }
191 183 }
192 184 if size == 0 {
193 185 self.copy_map.remove(filename);
194 186 }
195 187
196 188 self.state_map
197 189 .insert(filename.to_owned(), DirstateEntry::new_removed(size));
198 190 self.get_non_normal_other_parent_entries()
199 191 .0
200 192 .insert(filename.to_owned());
201 193 Ok(())
202 194 }
203 195
204 196 /// Remove a file from the dirstate.
205 197 /// Returns `true` if the file was previously recorded.
206 198 pub fn drop_file(
207 199 &mut self,
208 200 filename: &HgPath,
209 201 ) -> Result<bool, DirstateError> {
210 let old_state = match self.get(filename) {
211 Some(e) => e.state(),
212 None => EntryState::Unknown,
213 };
202 let old_state = self.get(filename).map(|e| e.state());
214 203 let exists = self.state_map.remove(filename).is_some();
215 204
216 205 if exists {
217 if old_state != EntryState::Removed {
206 if old_state != Some(EntryState::Removed) {
218 207 if let Some(ref mut dirs) = self.dirs {
219 208 dirs.delete_path(filename)?;
220 209 }
221 210 }
222 211 if let Some(ref mut all_dirs) = self.all_dirs {
223 212 all_dirs.delete_path(filename)?;
224 213 }
225 214 }
226 215 self.get_non_normal_other_parent_entries()
227 216 .0
228 217 .remove(filename);
229 218
230 219 Ok(exists)
231 220 }
232 221
233 222 pub fn clear_ambiguous_times(
234 223 &mut self,
235 224 filenames: Vec<HgPathBuf>,
236 225 now: i32,
237 226 ) {
238 227 for filename in filenames {
239 228 if let Some(entry) = self.state_map.get_mut(&filename) {
240 229 if entry.clear_ambiguous_mtime(now) {
241 230 self.get_non_normal_other_parent_entries()
242 231 .0
243 232 .insert(filename.to_owned());
244 233 }
245 234 }
246 235 }
247 236 }
248 237
249 238 pub fn non_normal_entries_remove(
250 239 &mut self,
251 240 key: impl AsRef<HgPath>,
252 241 ) -> bool {
253 242 self.get_non_normal_other_parent_entries()
254 243 .0
255 244 .remove(key.as_ref())
256 245 }
257 246
258 247 pub fn non_normal_entries_add(&mut self, key: impl AsRef<HgPath>) {
259 248 self.get_non_normal_other_parent_entries()
260 249 .0
261 250 .insert(key.as_ref().into());
262 251 }
263 252
264 253 pub fn non_normal_entries_union(
265 254 &mut self,
266 255 other: HashSet<HgPathBuf>,
267 256 ) -> Vec<HgPathBuf> {
268 257 self.get_non_normal_other_parent_entries()
269 258 .0
270 259 .union(&other)
271 260 .map(ToOwned::to_owned)
272 261 .collect()
273 262 }
274 263
275 264 pub fn get_non_normal_other_parent_entries(
276 265 &mut self,
277 266 ) -> (&mut HashSet<HgPathBuf>, &mut HashSet<HgPathBuf>) {
278 267 self.set_non_normal_other_parent_entries(false);
279 268 (
280 269 self.non_normal_set.as_mut().unwrap(),
281 270 self.other_parent_set.as_mut().unwrap(),
282 271 )
283 272 }
284 273
285 274 /// Useful to get immutable references to those sets in contexts where
286 275 /// you only have an immutable reference to the `DirstateMap`, like when
287 276 /// sharing references with Python.
288 277 ///
289 278 /// TODO, get rid of this along with the other "setter/getter" stuff when
290 279 /// a nice typestate plan is defined.
291 280 ///
292 281 /// # Panics
293 282 ///
294 283 /// Will panic if either set is `None`.
295 284 pub fn get_non_normal_other_parent_entries_panic(
296 285 &self,
297 286 ) -> (&HashSet<HgPathBuf>, &HashSet<HgPathBuf>) {
298 287 (
299 288 self.non_normal_set.as_ref().unwrap(),
300 289 self.other_parent_set.as_ref().unwrap(),
301 290 )
302 291 }
303 292
304 293 pub fn set_non_normal_other_parent_entries(&mut self, force: bool) {
305 294 if !force
306 295 && self.non_normal_set.is_some()
307 296 && self.other_parent_set.is_some()
308 297 {
309 298 return;
310 299 }
311 300 let mut non_normal = HashSet::new();
312 301 let mut other_parent = HashSet::new();
313 302
314 303 for (filename, entry) in self.state_map.iter() {
315 304 if entry.is_non_normal() {
316 305 non_normal.insert(filename.to_owned());
317 306 }
318 307 if entry.is_from_other_parent() {
319 308 other_parent.insert(filename.to_owned());
320 309 }
321 310 }
322 311 self.non_normal_set = Some(non_normal);
323 312 self.other_parent_set = Some(other_parent);
324 313 }
325 314
326 315 /// Both of these setters and their uses appear to be the simplest way to
327 316 /// emulate a Python lazy property, but it is ugly and unidiomatic.
328 317 /// TODO One day, rewriting this struct using the typestate might be a
329 318 /// good idea.
330 319 pub fn set_all_dirs(&mut self) -> Result<(), DirstateError> {
331 320 if self.all_dirs.is_none() {
332 321 self.all_dirs = Some(DirsMultiset::from_dirstate(
333 322 self.state_map.iter().map(|(k, v)| Ok((k, *v))),
334 323 false,
335 324 )?);
336 325 }
337 326 Ok(())
338 327 }
339 328
340 329 pub fn set_dirs(&mut self) -> Result<(), DirstateError> {
341 330 if self.dirs.is_none() {
342 331 self.dirs = Some(DirsMultiset::from_dirstate(
343 332 self.state_map.iter().map(|(k, v)| Ok((k, *v))),
344 333 true,
345 334 )?);
346 335 }
347 336 Ok(())
348 337 }
349 338
350 339 pub fn has_tracked_dir(
351 340 &mut self,
352 341 directory: &HgPath,
353 342 ) -> Result<bool, DirstateError> {
354 343 self.set_dirs()?;
355 344 Ok(self.dirs.as_ref().unwrap().contains(directory))
356 345 }
357 346
358 347 pub fn has_dir(
359 348 &mut self,
360 349 directory: &HgPath,
361 350 ) -> Result<bool, DirstateError> {
362 351 self.set_all_dirs()?;
363 352 Ok(self.all_dirs.as_ref().unwrap().contains(directory))
364 353 }
365 354
366 355 #[timed]
367 356 pub fn read(
368 357 &mut self,
369 358 file_contents: &[u8],
370 359 ) -> Result<Option<DirstateParents>, DirstateError> {
371 360 if file_contents.is_empty() {
372 361 return Ok(None);
373 362 }
374 363
375 364 let (parents, entries, copies) = parse_dirstate(file_contents)?;
376 365 self.state_map.extend(
377 366 entries
378 367 .into_iter()
379 368 .map(|(path, entry)| (path.to_owned(), entry)),
380 369 );
381 370 self.copy_map.extend(
382 371 copies
383 372 .into_iter()
384 373 .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
385 374 );
386 375 Ok(Some(parents.clone()))
387 376 }
388 377
389 378 pub fn pack(
390 379 &mut self,
391 380 parents: DirstateParents,
392 381 now: Timestamp,
393 382 ) -> Result<Vec<u8>, DirstateError> {
394 383 let packed =
395 384 pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
396 385
397 386 self.set_non_normal_other_parent_entries(true);
398 387 Ok(packed)
399 388 }
400 389 }
401 390
402 391 #[cfg(test)]
403 392 mod tests {
404 393 use super::*;
405 394
406 395 #[test]
407 396 fn test_dirs_multiset() {
408 397 let mut map = DirstateMap::new();
409 398 assert!(map.dirs.is_none());
410 399 assert!(map.all_dirs.is_none());
411 400
412 401 assert_eq!(map.has_dir(HgPath::new(b"nope")).unwrap(), false);
413 402 assert!(map.all_dirs.is_some());
414 403 assert!(map.dirs.is_none());
415 404
416 405 assert_eq!(map.has_tracked_dir(HgPath::new(b"nope")).unwrap(), false);
417 406 assert!(map.dirs.is_some());
418 407 }
419 408
420 409 #[test]
421 410 fn test_add_file() {
422 411 let mut map = DirstateMap::new();
423 412
424 413 assert_eq!(0, map.len());
425 414
426 415 map.add_file(
427 416 HgPath::new(b"meh"),
428 417 DirstateEntry::from_v1_data(EntryState::Normal, 1337, 1337, 1337),
429 418 false,
430 419 false,
431 420 false,
432 421 false,
433 422 )
434 423 .unwrap();
435 424
436 425 assert_eq!(1, map.len());
437 426 assert_eq!(0, map.get_non_normal_other_parent_entries().0.len());
438 427 assert_eq!(0, map.get_non_normal_other_parent_entries().1.len());
439 428 }
440 429
441 430 #[test]
442 431 fn test_non_normal_other_parent_entries() {
443 432 let mut map: DirstateMap = [
444 433 (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
445 434 (b"f2", (EntryState::Normal, 1337, 1337, -1)),
446 435 (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
447 436 (b"f4", (EntryState::Normal, 1337, -2, 1337)),
448 437 (b"f5", (EntryState::Added, 1337, 1337, 1337)),
449 438 (b"f6", (EntryState::Added, 1337, 1337, -1)),
450 439 (b"f7", (EntryState::Merged, 1337, 1337, -1)),
451 440 (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
452 441 (b"f9", (EntryState::Merged, 1337, -2, 1337)),
453 442 (b"fa", (EntryState::Added, 1337, -2, 1337)),
454 443 (b"fb", (EntryState::Removed, 1337, -2, 1337)),
455 444 ]
456 445 .iter()
457 446 .map(|(fname, (state, mode, size, mtime))| {
458 447 (
459 448 HgPathBuf::from_bytes(fname.as_ref()),
460 449 DirstateEntry::from_v1_data(*state, *mode, *size, *mtime),
461 450 )
462 451 })
463 452 .collect();
464 453
465 454 let mut non_normal = [
466 455 b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
467 456 ]
468 457 .iter()
469 458 .map(|x| HgPathBuf::from_bytes(x.as_ref()))
470 459 .collect();
471 460
472 461 let mut other_parent = HashSet::new();
473 462 other_parent.insert(HgPathBuf::from_bytes(b"f4"));
474 463 let entries = map.get_non_normal_other_parent_entries();
475 464
476 465 assert_eq!(
477 466 (&mut non_normal, &mut other_parent),
478 467 (entries.0, entries.1)
479 468 );
480 469 }
481 470 }
@@ -1,193 +1,190 b''
1 1 use crate::errors::HgError;
2 2 use std::convert::TryFrom;
3 3
4 4 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
5 5 pub enum EntryState {
6 6 Normal,
7 7 Added,
8 8 Removed,
9 9 Merged,
10 Unknown,
11 10 }
12 11
13 12 /// The C implementation uses all signed types. This will be an issue
14 13 /// either when 4GB+ source files are commonplace or in 2038, whichever
15 14 /// comes first.
16 15 #[derive(Debug, PartialEq, Copy, Clone)]
17 16 pub struct DirstateEntry {
18 17 state: EntryState,
19 18 mode: i32,
20 19 size: i32,
21 20 mtime: i32,
22 21 }
23 22
24 23 pub const V1_RANGEMASK: i32 = 0x7FFFFFFF;
25 24
26 25 pub const MTIME_UNSET: i32 = -1;
27 26
28 27 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
29 28 /// other parent. This allows revert to pick the right status back during a
30 29 /// merge.
31 30 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
32 31 /// A special value used for internal representation of special case in
33 32 /// dirstate v1 format.
34 33 pub const SIZE_NON_NORMAL: i32 = -1;
35 34
36 35 impl DirstateEntry {
37 36 pub fn from_v1_data(
38 37 state: EntryState,
39 38 mode: i32,
40 39 size: i32,
41 40 mtime: i32,
42 41 ) -> Self {
43 42 Self {
44 43 state,
45 44 mode,
46 45 size,
47 46 mtime,
48 47 }
49 48 }
50 49
51 50 /// Creates a new entry in "removed" state.
52 51 ///
53 52 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
54 53 /// `SIZE_FROM_OTHER_PARENT`
55 54 pub fn new_removed(size: i32) -> Self {
56 55 Self {
57 56 state: EntryState::Removed,
58 57 mode: 0,
59 58 size,
60 59 mtime: 0,
61 60 }
62 61 }
63 62
64 63 /// TODO: refactor `DirstateMap::add_file` to not take a `DirstateEntry`
65 64 /// parameter and remove this constructor
66 65 pub fn new_for_add_file(mode: i32, size: i32, mtime: i32) -> Self {
67 66 Self {
68 67 // XXX Arbitrary default value since the value is determined later
69 68 state: EntryState::Normal,
70 69 mode,
71 70 size,
72 71 mtime,
73 72 }
74 73 }
75 74
76 75 pub fn state(&self) -> EntryState {
77 76 self.state
78 77 }
79 78
80 79 pub fn mode(&self) -> i32 {
81 80 self.mode
82 81 }
83 82
84 83 pub fn size(&self) -> i32 {
85 84 self.size
86 85 }
87 86
88 87 pub fn mtime(&self) -> i32 {
89 88 self.mtime
90 89 }
91 90
92 91 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
93 92 /// in the dirstate-v1 format.
94 93 ///
95 94 /// This includes marker values such as `mtime == -1`. In the future we may
96 95 /// want to not represent these cases that way in memory, but serialization
97 96 /// will need to keep the same format.
98 97 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
99 98 (self.state.into(), self.mode, self.size, self.mtime)
100 99 }
101 100
102 101 pub fn is_non_normal(&self) -> bool {
103 102 self.state != EntryState::Normal || self.mtime == MTIME_UNSET
104 103 }
105 104
106 105 pub fn is_from_other_parent(&self) -> bool {
107 106 self.state == EntryState::Normal && self.size == SIZE_FROM_OTHER_PARENT
108 107 }
109 108
110 109 // TODO: other platforms
111 110 #[cfg(unix)]
112 111 pub fn mode_changed(
113 112 &self,
114 113 filesystem_metadata: &std::fs::Metadata,
115 114 ) -> bool {
116 115 use std::os::unix::fs::MetadataExt;
117 116 const EXEC_BIT_MASK: u32 = 0o100;
118 117 let dirstate_exec_bit = (self.mode as u32) & EXEC_BIT_MASK;
119 118 let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
120 119 dirstate_exec_bit != fs_exec_bit
121 120 }
122 121
123 122 /// Returns a `(state, mode, size, mtime)` tuple as for
124 123 /// `DirstateMapMethods::debug_iter`.
125 124 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
126 125 (self.state.into(), self.mode, self.size, self.mtime)
127 126 }
128 127
129 128 pub fn mtime_is_ambiguous(&self, now: i32) -> bool {
130 129 self.state == EntryState::Normal && self.mtime == now
131 130 }
132 131
133 132 pub fn clear_ambiguous_mtime(&mut self, now: i32) -> bool {
134 133 let ambiguous = self.mtime_is_ambiguous(now);
135 134 if ambiguous {
136 135 // The file was last modified "simultaneously" with the current
137 136 // write to dirstate (i.e. within the same second for file-
138 137 // systems with a granularity of 1 sec). This commonly happens
139 138 // for at least a couple of files on 'update'.
140 139 // The user could change the file without changing its size
141 140 // within the same second. Invalidate the file's mtime in
142 141 // dirstate, forcing future 'status' calls to compare the
143 142 // contents of the file if the size is the same. This prevents
144 143 // mistakenly treating such files as clean.
145 144 self.clear_mtime()
146 145 }
147 146 ambiguous
148 147 }
149 148
150 149 pub fn clear_mtime(&mut self) {
151 150 self.mtime = -1;
152 151 }
153 152 }
154 153
155 154 impl EntryState {
156 155 pub fn is_tracked(self) -> bool {
157 156 use EntryState::*;
158 157 match self {
159 158 Normal | Added | Merged => true,
160 Removed | Unknown => false,
159 Removed => false,
161 160 }
162 161 }
163 162 }
164 163
165 164 impl TryFrom<u8> for EntryState {
166 165 type Error = HgError;
167 166
168 167 fn try_from(value: u8) -> Result<Self, Self::Error> {
169 168 match value {
170 169 b'n' => Ok(EntryState::Normal),
171 170 b'a' => Ok(EntryState::Added),
172 171 b'r' => Ok(EntryState::Removed),
173 172 b'm' => Ok(EntryState::Merged),
174 b'?' => Ok(EntryState::Unknown),
175 173 _ => Err(HgError::CorruptedRepository(format!(
176 174 "Incorrect dirstate entry state {}",
177 175 value
178 176 ))),
179 177 }
180 178 }
181 179 }
182 180
183 181 impl Into<u8> for EntryState {
184 182 fn into(self) -> u8 {
185 183 match self {
186 184 EntryState::Normal => b'n',
187 185 EntryState::Added => b'a',
188 186 EntryState::Removed => b'r',
189 187 EntryState::Merged => b'm',
190 EntryState::Unknown => b'?',
191 188 }
192 189 }
193 190 }
@@ -1,947 +1,944 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
13 13 use crate::utils::path_auditor::PathAuditor;
14 14 use crate::{
15 15 dirstate::SIZE_FROM_OTHER_PARENT,
16 16 filepatterns::PatternFileWarning,
17 17 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
18 18 utils::{
19 19 files::{find_dirs, HgMetadata},
20 20 hg_path::{
21 21 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
22 22 HgPathError,
23 23 },
24 24 },
25 25 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
26 26 PatternError,
27 27 };
28 28 use lazy_static::lazy_static;
29 29 use micro_timer::timed;
30 30 use rayon::prelude::*;
31 31 use std::{
32 32 borrow::Cow,
33 33 collections::HashSet,
34 34 fmt,
35 35 fs::{read_dir, DirEntry},
36 36 io::ErrorKind,
37 37 ops::Deref,
38 38 path::{Path, PathBuf},
39 39 };
40 40
41 41 /// Wrong type of file from a `BadMatch`
42 42 /// Note: a lot of those don't exist on all platforms.
43 43 #[derive(Debug, Copy, Clone)]
44 44 pub enum BadType {
45 45 CharacterDevice,
46 46 BlockDevice,
47 47 FIFO,
48 48 Socket,
49 49 Directory,
50 50 Unknown,
51 51 }
52 52
53 53 impl fmt::Display for BadType {
54 54 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
55 55 f.write_str(match self {
56 56 BadType::CharacterDevice => "character device",
57 57 BadType::BlockDevice => "block device",
58 58 BadType::FIFO => "fifo",
59 59 BadType::Socket => "socket",
60 60 BadType::Directory => "directory",
61 61 BadType::Unknown => "unknown",
62 62 })
63 63 }
64 64 }
65 65
66 66 /// Was explicitly matched but cannot be found/accessed
67 67 #[derive(Debug, Copy, Clone)]
68 68 pub enum BadMatch {
69 69 OsError(i32),
70 70 BadType(BadType),
71 71 }
72 72
73 73 /// Enum used to dispatch new status entries into the right collections.
74 74 /// Is similar to `crate::EntryState`, but represents the transient state of
75 75 /// entries during the lifetime of a command.
76 76 #[derive(Debug, Copy, Clone)]
77 77 pub enum Dispatch {
78 78 Unsure,
79 79 Modified,
80 80 Added,
81 81 Removed,
82 82 Deleted,
83 83 Clean,
84 84 Unknown,
85 85 Ignored,
86 86 /// Empty dispatch, the file is not worth listing
87 87 None,
88 88 /// Was explicitly matched but cannot be found/accessed
89 89 Bad(BadMatch),
90 90 Directory {
91 91 /// True if the directory used to be a file in the dmap so we can say
92 92 /// that it's been removed.
93 93 was_file: bool,
94 94 },
95 95 }
96 96
97 97 type IoResult<T> = std::io::Result<T>;
98 98
99 99 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait + 'static>`, so add
100 100 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
101 101 pub type IgnoreFnType<'a> =
102 102 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
103 103
104 104 /// We have a good mix of owned (from directory traversal) and borrowed (from
105 105 /// the dirstate/explicit) paths, this comes up a lot.
106 106 pub type HgPathCow<'a> = Cow<'a, HgPath>;
107 107
108 108 /// A path with its computed ``Dispatch`` information
109 109 type DispatchedPath<'a> = (HgPathCow<'a>, Dispatch);
110 110
111 111 /// The conversion from `HgPath` to a real fs path failed.
112 112 /// `22` is the error code for "Invalid argument"
113 113 const INVALID_PATH_DISPATCH: Dispatch = Dispatch::Bad(BadMatch::OsError(22));
114 114
115 115 /// Dates and times that are outside the 31-bit signed range are compared
116 116 /// modulo 2^31. This should prevent hg from behaving badly with very large
117 117 /// files or corrupt dates while still having a high probability of detecting
118 118 /// changes. (issue2608)
119 119 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
120 120 /// is not defined for `i32`, and there is no `As` trait. This forces the
121 121 /// caller to cast `b` as `i32`.
122 122 fn mod_compare(a: i32, b: i32) -> bool {
123 123 a & i32::max_value() != b & i32::max_value()
124 124 }
125 125
126 126 /// Return a sorted list containing information about the entries
127 127 /// in the directory.
128 128 ///
129 129 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
130 130 fn list_directory(
131 131 path: impl AsRef<Path>,
132 132 skip_dot_hg: bool,
133 133 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
134 134 let mut results = vec![];
135 135 let entries = read_dir(path.as_ref())?;
136 136
137 137 for entry in entries {
138 138 let entry = entry?;
139 139 let filename = os_string_to_hg_path_buf(entry.file_name())?;
140 140 let file_type = entry.file_type()?;
141 141 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
142 142 return Ok(vec![]);
143 143 } else {
144 144 results.push((filename, entry))
145 145 }
146 146 }
147 147
148 148 results.sort_unstable_by_key(|e| e.0.clone());
149 149 Ok(results)
150 150 }
151 151
152 152 /// The file corresponding to the dirstate entry was found on the filesystem.
153 153 fn dispatch_found(
154 154 filename: impl AsRef<HgPath>,
155 155 entry: DirstateEntry,
156 156 metadata: HgMetadata,
157 157 copy_map: &CopyMap,
158 158 options: StatusOptions,
159 159 ) -> Dispatch {
160 160 match entry.state() {
161 161 EntryState::Normal => {
162 162 let mode = entry.mode();
163 163 let size = entry.size();
164 164 let mtime = entry.mtime();
165 165
166 166 let HgMetadata {
167 167 st_mode,
168 168 st_size,
169 169 st_mtime,
170 170 ..
171 171 } = metadata;
172 172
173 173 let size_changed = mod_compare(size, st_size as i32);
174 174 let mode_changed =
175 175 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
176 176 let metadata_changed = size >= 0 && (size_changed || mode_changed);
177 177 let other_parent = size == SIZE_FROM_OTHER_PARENT;
178 178
179 179 if metadata_changed
180 180 || other_parent
181 181 || copy_map.contains_key(filename.as_ref())
182 182 {
183 183 if metadata.is_symlink() && size_changed {
184 184 // issue6456: Size returned may be longer due to encryption
185 185 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
186 186 Dispatch::Unsure
187 187 } else {
188 188 Dispatch::Modified
189 189 }
190 190 } else if mod_compare(mtime, st_mtime as i32)
191 191 || st_mtime == options.last_normal_time
192 192 {
193 193 // the file may have just been marked as normal and
194 194 // it may have changed in the same second without
195 195 // changing its size. This can happen if we quickly
196 196 // do multiple commits. Force lookup, so we don't
197 197 // miss such a racy file change.
198 198 Dispatch::Unsure
199 199 } else if options.list_clean {
200 200 Dispatch::Clean
201 201 } else {
202 202 Dispatch::None
203 203 }
204 204 }
205 205 EntryState::Merged => Dispatch::Modified,
206 206 EntryState::Added => Dispatch::Added,
207 207 EntryState::Removed => Dispatch::Removed,
208 EntryState::Unknown => Dispatch::Unknown,
209 208 }
210 209 }
211 210
212 211 /// The file corresponding to this Dirstate entry is missing.
213 212 fn dispatch_missing(state: EntryState) -> Dispatch {
214 213 match state {
215 214 // File was removed from the filesystem during commands
216 215 EntryState::Normal | EntryState::Merged | EntryState::Added => {
217 216 Dispatch::Deleted
218 217 }
219 218 // File was removed, everything is normal
220 219 EntryState::Removed => Dispatch::Removed,
221 // File is unknown to Mercurial, everything is normal
222 EntryState::Unknown => Dispatch::Unknown,
223 220 }
224 221 }
225 222
226 223 fn dispatch_os_error(e: &std::io::Error) -> Dispatch {
227 224 Dispatch::Bad(BadMatch::OsError(
228 225 e.raw_os_error().expect("expected real OS error"),
229 226 ))
230 227 }
231 228
232 229 lazy_static! {
233 230 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
234 231 let mut h = HashSet::new();
235 232 h.insert(HgPath::new(b""));
236 233 h
237 234 };
238 235 }
239 236
240 237 #[derive(Debug, Copy, Clone)]
241 238 pub struct StatusOptions {
242 239 /// Remember the most recent modification timeslot for status, to make
243 240 /// sure we won't miss future size-preserving file content modifications
244 241 /// that happen within the same timeslot.
245 242 pub last_normal_time: i64,
246 243 /// Whether we are on a filesystem with UNIX-like exec flags
247 244 pub check_exec: bool,
248 245 pub list_clean: bool,
249 246 pub list_unknown: bool,
250 247 pub list_ignored: bool,
251 248 /// Whether to collect traversed dirs for applying a callback later.
252 249 /// Used by `hg purge` for example.
253 250 pub collect_traversed_dirs: bool,
254 251 }
255 252
256 253 #[derive(Debug, Default)]
257 254 pub struct DirstateStatus<'a> {
258 255 /// Tracked files whose contents have changed since the parent revision
259 256 pub modified: Vec<HgPathCow<'a>>,
260 257
261 258 /// Newly-tracked files that were not present in the parent
262 259 pub added: Vec<HgPathCow<'a>>,
263 260
264 261 /// Previously-tracked files that have been (re)moved with an hg command
265 262 pub removed: Vec<HgPathCow<'a>>,
266 263
267 264 /// (Still) tracked files that are missing, (re)moved with an non-hg
268 265 /// command
269 266 pub deleted: Vec<HgPathCow<'a>>,
270 267
271 268 /// Tracked files that are up to date with the parent.
272 269 /// Only pupulated if `StatusOptions::list_clean` is true.
273 270 pub clean: Vec<HgPathCow<'a>>,
274 271
275 272 /// Files in the working directory that are ignored with `.hgignore`.
276 273 /// Only pupulated if `StatusOptions::list_ignored` is true.
277 274 pub ignored: Vec<HgPathCow<'a>>,
278 275
279 276 /// Files in the working directory that are neither tracked nor ignored.
280 277 /// Only pupulated if `StatusOptions::list_unknown` is true.
281 278 pub unknown: Vec<HgPathCow<'a>>,
282 279
283 280 /// Was explicitly matched but cannot be found/accessed
284 281 pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
285 282
286 283 /// Either clean or modified, but we can’t tell from filesystem metadata
287 284 /// alone. The file contents need to be read and compared with that in
288 285 /// the parent.
289 286 pub unsure: Vec<HgPathCow<'a>>,
290 287
291 288 /// Only filled if `collect_traversed_dirs` is `true`
292 289 pub traversed: Vec<HgPathCow<'a>>,
293 290
294 291 /// Whether `status()` made changed to the `DirstateMap` that should be
295 292 /// written back to disk
296 293 pub dirty: bool,
297 294 }
298 295
299 296 #[derive(Debug, derive_more::From)]
300 297 pub enum StatusError {
301 298 /// Generic IO error
302 299 IO(std::io::Error),
303 300 /// An invalid path that cannot be represented in Mercurial was found
304 301 Path(HgPathError),
305 302 /// An invalid "ignore" pattern was found
306 303 Pattern(PatternError),
307 304 /// Corrupted dirstate
308 305 DirstateV2ParseError(DirstateV2ParseError),
309 306 }
310 307
311 308 pub type StatusResult<T> = Result<T, StatusError>;
312 309
313 310 impl fmt::Display for StatusError {
314 311 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
315 312 match self {
316 313 StatusError::IO(error) => error.fmt(f),
317 314 StatusError::Path(error) => error.fmt(f),
318 315 StatusError::Pattern(error) => error.fmt(f),
319 316 StatusError::DirstateV2ParseError(_) => {
320 317 f.write_str("dirstate-v2 parse error")
321 318 }
322 319 }
323 320 }
324 321 }
325 322
326 323 /// Gives information about which files are changed in the working directory
327 324 /// and how, compared to the revision we're based on
328 325 pub struct Status<'a, M: ?Sized + Matcher + Sync> {
329 326 dmap: &'a DirstateMap,
330 327 pub(crate) matcher: &'a M,
331 328 root_dir: PathBuf,
332 329 pub(crate) options: StatusOptions,
333 330 ignore_fn: IgnoreFnType<'a>,
334 331 }
335 332
336 333 impl<'a, M> Status<'a, M>
337 334 where
338 335 M: ?Sized + Matcher + Sync,
339 336 {
340 337 pub fn new(
341 338 dmap: &'a DirstateMap,
342 339 matcher: &'a M,
343 340 root_dir: PathBuf,
344 341 ignore_files: Vec<PathBuf>,
345 342 options: StatusOptions,
346 343 ) -> StatusResult<(Self, Vec<PatternFileWarning>)> {
347 344 // Needs to outlive `dir_ignore_fn` since it's captured.
348 345
349 346 let (ignore_fn, warnings): (IgnoreFnType, _) =
350 347 if options.list_ignored || options.list_unknown {
351 348 get_ignore_function(ignore_files, &root_dir, &mut |_| {})?
352 349 } else {
353 350 (Box::new(|&_| true), vec![])
354 351 };
355 352
356 353 Ok((
357 354 Self {
358 355 dmap,
359 356 matcher,
360 357 root_dir,
361 358 options,
362 359 ignore_fn,
363 360 },
364 361 warnings,
365 362 ))
366 363 }
367 364
368 365 /// Is the path ignored?
369 366 pub fn is_ignored(&self, path: impl AsRef<HgPath>) -> bool {
370 367 (self.ignore_fn)(path.as_ref())
371 368 }
372 369
373 370 /// Is the path or one of its ancestors ignored?
374 371 pub fn dir_ignore(&self, dir: impl AsRef<HgPath>) -> bool {
375 372 // Only involve ignore mechanism if we're listing unknowns or ignored.
376 373 if self.options.list_ignored || self.options.list_unknown {
377 374 if self.is_ignored(&dir) {
378 375 true
379 376 } else {
380 377 for p in find_dirs(dir.as_ref()) {
381 378 if self.is_ignored(p) {
382 379 return true;
383 380 }
384 381 }
385 382 false
386 383 }
387 384 } else {
388 385 true
389 386 }
390 387 }
391 388
392 389 /// Get stat data about the files explicitly specified by the matcher.
393 390 /// Returns a tuple of the directories that need to be traversed and the
394 391 /// files with their corresponding `Dispatch`.
395 392 /// TODO subrepos
396 393 #[timed]
397 394 pub fn walk_explicit(
398 395 &self,
399 396 traversed_sender: crossbeam_channel::Sender<HgPathBuf>,
400 397 ) -> (Vec<DispatchedPath<'a>>, Vec<DispatchedPath<'a>>) {
401 398 self.matcher
402 399 .file_set()
403 400 .unwrap_or(&DEFAULT_WORK)
404 401 .par_iter()
405 402 .flat_map(|&filename| -> Option<_> {
406 403 // TODO normalization
407 404 let normalized = filename;
408 405
409 406 let buf = match hg_path_to_path_buf(normalized) {
410 407 Ok(x) => x,
411 408 Err(_) => {
412 409 return Some((
413 410 Cow::Borrowed(normalized),
414 411 INVALID_PATH_DISPATCH,
415 412 ))
416 413 }
417 414 };
418 415 let target = self.root_dir.join(buf);
419 416 let st = target.symlink_metadata();
420 417 let in_dmap = self.dmap.get(normalized);
421 418 match st {
422 419 Ok(meta) => {
423 420 let file_type = meta.file_type();
424 421 return if file_type.is_file() || file_type.is_symlink()
425 422 {
426 423 if let Some(entry) = in_dmap {
427 424 return Some((
428 425 Cow::Borrowed(normalized),
429 426 dispatch_found(
430 427 &normalized,
431 428 *entry,
432 429 HgMetadata::from_metadata(meta),
433 430 &self.dmap.copy_map,
434 431 self.options,
435 432 ),
436 433 ));
437 434 }
438 435 Some((
439 436 Cow::Borrowed(normalized),
440 437 Dispatch::Unknown,
441 438 ))
442 439 } else if file_type.is_dir() {
443 440 if self.options.collect_traversed_dirs {
444 441 traversed_sender
445 442 .send(normalized.to_owned())
446 443 .expect("receiver should outlive sender");
447 444 }
448 445 Some((
449 446 Cow::Borrowed(normalized),
450 447 Dispatch::Directory {
451 448 was_file: in_dmap.is_some(),
452 449 },
453 450 ))
454 451 } else {
455 452 Some((
456 453 Cow::Borrowed(normalized),
457 454 Dispatch::Bad(BadMatch::BadType(
458 455 // TODO do more than unknown
459 456 // Support for all `BadType` variant
460 457 // varies greatly between platforms.
461 458 // So far, no tests check the type and
462 459 // this should be good enough for most
463 460 // users.
464 461 BadType::Unknown,
465 462 )),
466 463 ))
467 464 };
468 465 }
469 466 Err(_) => {
470 467 if let Some(entry) = in_dmap {
471 468 return Some((
472 469 Cow::Borrowed(normalized),
473 470 dispatch_missing(entry.state()),
474 471 ));
475 472 }
476 473 }
477 474 };
478 475 None
479 476 })
480 477 .partition(|(_, dispatch)| match dispatch {
481 478 Dispatch::Directory { .. } => true,
482 479 _ => false,
483 480 })
484 481 }
485 482
486 483 /// Walk the working directory recursively to look for changes compared to
487 484 /// the current `DirstateMap`.
488 485 ///
489 486 /// This takes a mutable reference to the results to account for the
490 487 /// `extend` in timings
491 488 #[timed]
492 489 pub fn traverse(
493 490 &self,
494 491 path: impl AsRef<HgPath>,
495 492 old_results: &FastHashMap<HgPathCow<'a>, Dispatch>,
496 493 results: &mut Vec<DispatchedPath<'a>>,
497 494 traversed_sender: crossbeam_channel::Sender<HgPathBuf>,
498 495 ) {
499 496 // The traversal is done in parallel, so use a channel to gather
500 497 // entries. `crossbeam_channel::Sender` is `Sync`, while `mpsc::Sender`
501 498 // is not.
502 499 let (files_transmitter, files_receiver) =
503 500 crossbeam_channel::unbounded();
504 501
505 502 self.traverse_dir(
506 503 &files_transmitter,
507 504 path,
508 505 &old_results,
509 506 traversed_sender,
510 507 );
511 508
512 509 // Disconnect the channel so the receiver stops waiting
513 510 drop(files_transmitter);
514 511
515 512 let new_results = files_receiver
516 513 .into_iter()
517 514 .par_bridge()
518 515 .map(|(f, d)| (Cow::Owned(f), d));
519 516
520 517 results.par_extend(new_results);
521 518 }
522 519
523 520 /// Dispatch a single entry (file, folder, symlink...) found during
524 521 /// `traverse`. If the entry is a folder that needs to be traversed, it
525 522 /// will be handled in a separate thread.
526 523 fn handle_traversed_entry<'b>(
527 524 &'a self,
528 525 scope: &rayon::Scope<'b>,
529 526 files_sender: &'b crossbeam_channel::Sender<(HgPathBuf, Dispatch)>,
530 527 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
531 528 filename: HgPathBuf,
532 529 dir_entry: DirEntry,
533 530 traversed_sender: crossbeam_channel::Sender<HgPathBuf>,
534 531 ) -> IoResult<()>
535 532 where
536 533 'a: 'b,
537 534 {
538 535 let file_type = dir_entry.file_type()?;
539 536 let entry_option = self.dmap.get(&filename);
540 537
541 538 if filename.as_bytes() == b".hg" {
542 539 // Could be a directory or a symlink
543 540 return Ok(());
544 541 }
545 542
546 543 if file_type.is_dir() {
547 544 self.handle_traversed_dir(
548 545 scope,
549 546 files_sender,
550 547 old_results,
551 548 entry_option,
552 549 filename,
553 550 traversed_sender,
554 551 );
555 552 } else if file_type.is_file() || file_type.is_symlink() {
556 553 if let Some(entry) = entry_option {
557 554 if self.matcher.matches_everything()
558 555 || self.matcher.matches(&filename)
559 556 {
560 557 let metadata = dir_entry.metadata()?;
561 558 files_sender
562 559 .send((
563 560 filename.to_owned(),
564 561 dispatch_found(
565 562 &filename,
566 563 *entry,
567 564 HgMetadata::from_metadata(metadata),
568 565 &self.dmap.copy_map,
569 566 self.options,
570 567 ),
571 568 ))
572 569 .unwrap();
573 570 }
574 571 } else if (self.matcher.matches_everything()
575 572 || self.matcher.matches(&filename))
576 573 && !self.is_ignored(&filename)
577 574 {
578 575 if (self.options.list_ignored
579 576 || self.matcher.exact_match(&filename))
580 577 && self.dir_ignore(&filename)
581 578 {
582 579 if self.options.list_ignored {
583 580 files_sender
584 581 .send((filename.to_owned(), Dispatch::Ignored))
585 582 .unwrap();
586 583 }
587 584 } else if self.options.list_unknown {
588 585 files_sender
589 586 .send((filename.to_owned(), Dispatch::Unknown))
590 587 .unwrap();
591 588 }
592 589 } else if self.is_ignored(&filename) && self.options.list_ignored {
593 590 if self.matcher.matches(&filename) {
594 591 files_sender
595 592 .send((filename.to_owned(), Dispatch::Ignored))
596 593 .unwrap();
597 594 }
598 595 }
599 596 } else if let Some(entry) = entry_option {
600 597 // Used to be a file or a folder, now something else.
601 598 if self.matcher.matches_everything()
602 599 || self.matcher.matches(&filename)
603 600 {
604 601 files_sender
605 602 .send((
606 603 filename.to_owned(),
607 604 dispatch_missing(entry.state()),
608 605 ))
609 606 .unwrap();
610 607 }
611 608 }
612 609
613 610 Ok(())
614 611 }
615 612
616 613 /// A directory was found in the filesystem and needs to be traversed
617 614 fn handle_traversed_dir<'b>(
618 615 &'a self,
619 616 scope: &rayon::Scope<'b>,
620 617 files_sender: &'b crossbeam_channel::Sender<(HgPathBuf, Dispatch)>,
621 618 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
622 619 entry_option: Option<&'a DirstateEntry>,
623 620 directory: HgPathBuf,
624 621 traversed_sender: crossbeam_channel::Sender<HgPathBuf>,
625 622 ) where
626 623 'a: 'b,
627 624 {
628 625 scope.spawn(move |_| {
629 626 // Nested `if` until `rust-lang/rust#53668` is stable
630 627 if let Some(entry) = entry_option {
631 628 // Used to be a file, is now a folder
632 629 if self.matcher.matches_everything()
633 630 || self.matcher.matches(&directory)
634 631 {
635 632 files_sender
636 633 .send((
637 634 directory.to_owned(),
638 635 dispatch_missing(entry.state()),
639 636 ))
640 637 .unwrap();
641 638 }
642 639 }
643 640 // Do we need to traverse it?
644 641 if !self.is_ignored(&directory) || self.options.list_ignored {
645 642 self.traverse_dir(
646 643 files_sender,
647 644 directory,
648 645 &old_results,
649 646 traversed_sender,
650 647 )
651 648 }
652 649 });
653 650 }
654 651
655 652 /// Decides whether the directory needs to be listed, and if so handles the
656 653 /// entries in a separate thread.
657 654 fn traverse_dir(
658 655 &self,
659 656 files_sender: &crossbeam_channel::Sender<(HgPathBuf, Dispatch)>,
660 657 directory: impl AsRef<HgPath>,
661 658 old_results: &FastHashMap<Cow<HgPath>, Dispatch>,
662 659 traversed_sender: crossbeam_channel::Sender<HgPathBuf>,
663 660 ) {
664 661 let directory = directory.as_ref();
665 662
666 663 if self.options.collect_traversed_dirs {
667 664 traversed_sender
668 665 .send(directory.to_owned())
669 666 .expect("receiver should outlive sender");
670 667 }
671 668
672 669 let visit_entries = match self.matcher.visit_children_set(directory) {
673 670 VisitChildrenSet::Empty => return,
674 671 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
675 672 VisitChildrenSet::Set(set) => Some(set),
676 673 };
677 674 let buf = match hg_path_to_path_buf(directory) {
678 675 Ok(b) => b,
679 676 Err(_) => {
680 677 files_sender
681 678 .send((directory.to_owned(), INVALID_PATH_DISPATCH))
682 679 .expect("receiver should outlive sender");
683 680 return;
684 681 }
685 682 };
686 683 let dir_path = self.root_dir.join(buf);
687 684
688 685 let skip_dot_hg = !directory.as_bytes().is_empty();
689 686 let entries = match list_directory(dir_path, skip_dot_hg) {
690 687 Err(e) => {
691 688 files_sender
692 689 .send((directory.to_owned(), dispatch_os_error(&e)))
693 690 .expect("receiver should outlive sender");
694 691 return;
695 692 }
696 693 Ok(entries) => entries,
697 694 };
698 695
699 696 rayon::scope(|scope| {
700 697 for (filename, dir_entry) in entries {
701 698 if let Some(ref set) = visit_entries {
702 699 if !set.contains(filename.deref()) {
703 700 continue;
704 701 }
705 702 }
706 703 // TODO normalize
707 704 let filename = if directory.is_empty() {
708 705 filename.to_owned()
709 706 } else {
710 707 directory.join(&filename)
711 708 };
712 709
713 710 if !old_results.contains_key(filename.deref()) {
714 711 match self.handle_traversed_entry(
715 712 scope,
716 713 files_sender,
717 714 old_results,
718 715 filename,
719 716 dir_entry,
720 717 traversed_sender.clone(),
721 718 ) {
722 719 Err(e) => {
723 720 files_sender
724 721 .send((
725 722 directory.to_owned(),
726 723 dispatch_os_error(&e),
727 724 ))
728 725 .expect("receiver should outlive sender");
729 726 }
730 727 Ok(_) => {}
731 728 }
732 729 }
733 730 }
734 731 })
735 732 }
736 733
737 734 /// Add the files in the dirstate to the results.
738 735 ///
739 736 /// This takes a mutable reference to the results to account for the
740 737 /// `extend` in timings
741 738 #[timed]
742 739 pub fn extend_from_dmap(&self, results: &mut Vec<DispatchedPath<'a>>) {
743 740 results.par_extend(
744 741 self.dmap
745 742 .par_iter()
746 743 .filter(|(path, _)| self.matcher.matches(path))
747 744 .map(move |(filename, entry)| {
748 745 let filename: &HgPath = filename;
749 746 let filename_as_path = match hg_path_to_path_buf(filename)
750 747 {
751 748 Ok(f) => f,
752 749 Err(_) => {
753 750 return (
754 751 Cow::Borrowed(filename),
755 752 INVALID_PATH_DISPATCH,
756 753 )
757 754 }
758 755 };
759 756 let meta = self
760 757 .root_dir
761 758 .join(filename_as_path)
762 759 .symlink_metadata();
763 760 match meta {
764 761 Ok(m)
765 762 if !(m.file_type().is_file()
766 763 || m.file_type().is_symlink()) =>
767 764 {
768 765 (
769 766 Cow::Borrowed(filename),
770 767 dispatch_missing(entry.state()),
771 768 )
772 769 }
773 770 Ok(m) => (
774 771 Cow::Borrowed(filename),
775 772 dispatch_found(
776 773 filename,
777 774 *entry,
778 775 HgMetadata::from_metadata(m),
779 776 &self.dmap.copy_map,
780 777 self.options,
781 778 ),
782 779 ),
783 780 Err(e)
784 781 if e.kind() == ErrorKind::NotFound
785 782 || e.raw_os_error() == Some(20) =>
786 783 {
787 784 // Rust does not yet have an `ErrorKind` for
788 785 // `NotADirectory` (errno 20)
789 786 // It happens if the dirstate contains `foo/bar`
790 787 // and foo is not a
791 788 // directory
792 789 (
793 790 Cow::Borrowed(filename),
794 791 dispatch_missing(entry.state()),
795 792 )
796 793 }
797 794 Err(e) => {
798 795 (Cow::Borrowed(filename), dispatch_os_error(&e))
799 796 }
800 797 }
801 798 }),
802 799 );
803 800 }
804 801
805 802 /// Checks all files that are in the dirstate but were not found during the
806 803 /// working directory traversal. This means that the rest must
807 804 /// be either ignored, under a symlink or under a new nested repo.
808 805 ///
809 806 /// This takes a mutable reference to the results to account for the
810 807 /// `extend` in timings
811 808 #[timed]
812 809 pub fn handle_unknowns(&self, results: &mut Vec<DispatchedPath<'a>>) {
813 810 let to_visit: Vec<(&HgPath, &DirstateEntry)> =
814 811 if results.is_empty() && self.matcher.matches_everything() {
815 812 self.dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
816 813 } else {
817 814 // Only convert to a hashmap if needed.
818 815 let old_results: FastHashMap<_, _> =
819 816 results.iter().cloned().collect();
820 817 self.dmap
821 818 .iter()
822 819 .filter_map(move |(f, e)| {
823 820 if !old_results.contains_key(f.deref())
824 821 && self.matcher.matches(f)
825 822 {
826 823 Some((f.deref(), e))
827 824 } else {
828 825 None
829 826 }
830 827 })
831 828 .collect()
832 829 };
833 830
834 831 let path_auditor = PathAuditor::new(&self.root_dir);
835 832
836 833 let new_results = to_visit.into_par_iter().filter_map(
837 834 |(filename, entry)| -> Option<_> {
838 835 // Report ignored items in the dmap as long as they are not
839 836 // under a symlink directory.
840 837 if path_auditor.check(filename) {
841 838 // TODO normalize for case-insensitive filesystems
842 839 let buf = match hg_path_to_path_buf(filename) {
843 840 Ok(x) => x,
844 841 Err(_) => {
845 842 return Some((
846 843 Cow::Owned(filename.to_owned()),
847 844 INVALID_PATH_DISPATCH,
848 845 ));
849 846 }
850 847 };
851 848 Some((
852 849 Cow::Owned(filename.to_owned()),
853 850 match self.root_dir.join(&buf).symlink_metadata() {
854 851 // File was just ignored, no links, and exists
855 852 Ok(meta) => {
856 853 let metadata = HgMetadata::from_metadata(meta);
857 854 dispatch_found(
858 855 filename,
859 856 *entry,
860 857 metadata,
861 858 &self.dmap.copy_map,
862 859 self.options,
863 860 )
864 861 }
865 862 // File doesn't exist
866 863 Err(_) => dispatch_missing(entry.state()),
867 864 },
868 865 ))
869 866 } else {
870 867 // It's either missing or under a symlink directory which
871 868 // we, in this case, report as missing.
872 869 Some((
873 870 Cow::Owned(filename.to_owned()),
874 871 dispatch_missing(entry.state()),
875 872 ))
876 873 }
877 874 },
878 875 );
879 876
880 877 results.par_extend(new_results);
881 878 }
882 879 }
883 880
884 881 #[timed]
885 882 pub fn build_response<'a>(
886 883 results: impl IntoIterator<Item = DispatchedPath<'a>>,
887 884 traversed: Vec<HgPathCow<'a>>,
888 885 ) -> DirstateStatus<'a> {
889 886 let mut unsure = vec![];
890 887 let mut modified = vec![];
891 888 let mut added = vec![];
892 889 let mut removed = vec![];
893 890 let mut deleted = vec![];
894 891 let mut clean = vec![];
895 892 let mut ignored = vec![];
896 893 let mut unknown = vec![];
897 894 let mut bad = vec![];
898 895
899 896 for (filename, dispatch) in results.into_iter() {
900 897 match dispatch {
901 898 Dispatch::Unknown => unknown.push(filename),
902 899 Dispatch::Unsure => unsure.push(filename),
903 900 Dispatch::Modified => modified.push(filename),
904 901 Dispatch::Added => added.push(filename),
905 902 Dispatch::Removed => removed.push(filename),
906 903 Dispatch::Deleted => deleted.push(filename),
907 904 Dispatch::Clean => clean.push(filename),
908 905 Dispatch::Ignored => ignored.push(filename),
909 906 Dispatch::None => {}
910 907 Dispatch::Bad(reason) => bad.push((filename, reason)),
911 908 Dispatch::Directory { .. } => {}
912 909 }
913 910 }
914 911
915 912 DirstateStatus {
916 913 modified,
917 914 added,
918 915 removed,
919 916 deleted,
920 917 clean,
921 918 ignored,
922 919 unknown,
923 920 bad,
924 921 unsure,
925 922 traversed,
926 923 dirty: false,
927 924 }
928 925 }
929 926
930 927 /// Get the status of files in the working directory.
931 928 ///
932 929 /// This is the current entry-point for `hg-core` and is realistically unusable
933 930 /// outside of a Python context because its arguments need to provide a lot of
934 931 /// information that will not be necessary in the future.
935 932 #[timed]
936 933 pub fn status<'a>(
937 934 dmap: &'a DirstateMap,
938 935 matcher: &'a (dyn Matcher + Sync),
939 936 root_dir: PathBuf,
940 937 ignore_files: Vec<PathBuf>,
941 938 options: StatusOptions,
942 939 ) -> StatusResult<(DirstateStatus<'a>, Vec<PatternFileWarning>)> {
943 940 let (status, warnings) =
944 941 Status::new(dmap, matcher, root_dir, ignore_files, options)?;
945 942
946 943 Ok((status.run()?, warnings))
947 944 }
@@ -1,1315 +1,1309 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk;
8 8 use super::on_disk::DirstateV2ParseError;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::parsers::Timestamp;
14 14 use crate::dirstate::MTIME_UNSET;
15 15 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
16 16 use crate::dirstate::SIZE_NON_NORMAL;
17 17 use crate::dirstate::V1_RANGEMASK;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::CopyMapIter;
21 21 use crate::DirstateEntry;
22 22 use crate::DirstateError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::EntryState;
26 26 use crate::FastHashMap;
27 27 use crate::PatternFileWarning;
28 28 use crate::StateMapIter;
29 29 use crate::StatusError;
30 30 use crate::StatusOptions;
31 31
32 32 /// Append to an existing data file if the amount of unreachable data (not used
33 33 /// anymore) is less than this fraction of the total amount of existing data.
34 34 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
35 35
36 36 pub struct DirstateMap<'on_disk> {
37 37 /// Contents of the `.hg/dirstate` file
38 38 pub(super) on_disk: &'on_disk [u8],
39 39
40 40 pub(super) root: ChildNodes<'on_disk>,
41 41
42 42 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
43 43 pub(super) nodes_with_entry_count: u32,
44 44
45 45 /// Number of nodes anywhere in the tree that have
46 46 /// `.copy_source.is_some()`.
47 47 pub(super) nodes_with_copy_source_count: u32,
48 48
49 49 /// See on_disk::Header
50 50 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
51 51
52 52 /// How many bytes of `on_disk` are not used anymore
53 53 pub(super) unreachable_bytes: u32,
54 54 }
55 55
56 56 /// Using a plain `HgPathBuf` of the full path from the repository root as a
57 57 /// map key would also work: all paths in a given map have the same parent
58 58 /// path, so comparing full paths gives the same result as comparing base
59 59 /// names. However `HashMap` would waste time always re-hashing the same
60 60 /// string prefix.
61 61 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
62 62
63 63 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
64 64 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
65 65 pub(super) enum BorrowedPath<'tree, 'on_disk> {
66 66 InMemory(&'tree HgPathBuf),
67 67 OnDisk(&'on_disk HgPath),
68 68 }
69 69
70 70 pub(super) enum ChildNodes<'on_disk> {
71 71 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
72 72 OnDisk(&'on_disk [on_disk::Node]),
73 73 }
74 74
75 75 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
76 76 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
77 77 OnDisk(&'on_disk [on_disk::Node]),
78 78 }
79 79
80 80 pub(super) enum NodeRef<'tree, 'on_disk> {
81 81 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
82 82 OnDisk(&'on_disk on_disk::Node),
83 83 }
84 84
85 85 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
86 86 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
87 87 match *self {
88 88 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
89 89 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
90 90 }
91 91 }
92 92 }
93 93
94 94 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
95 95 type Target = HgPath;
96 96
97 97 fn deref(&self) -> &HgPath {
98 98 match *self {
99 99 BorrowedPath::InMemory(in_memory) => in_memory,
100 100 BorrowedPath::OnDisk(on_disk) => on_disk,
101 101 }
102 102 }
103 103 }
104 104
105 105 impl Default for ChildNodes<'_> {
106 106 fn default() -> Self {
107 107 ChildNodes::InMemory(Default::default())
108 108 }
109 109 }
110 110
111 111 impl<'on_disk> ChildNodes<'on_disk> {
112 112 pub(super) fn as_ref<'tree>(
113 113 &'tree self,
114 114 ) -> ChildNodesRef<'tree, 'on_disk> {
115 115 match self {
116 116 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
117 117 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
118 118 }
119 119 }
120 120
121 121 pub(super) fn is_empty(&self) -> bool {
122 122 match self {
123 123 ChildNodes::InMemory(nodes) => nodes.is_empty(),
124 124 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
125 125 }
126 126 }
127 127
128 128 fn make_mut(
129 129 &mut self,
130 130 on_disk: &'on_disk [u8],
131 131 unreachable_bytes: &mut u32,
132 132 ) -> Result<
133 133 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
134 134 DirstateV2ParseError,
135 135 > {
136 136 match self {
137 137 ChildNodes::InMemory(nodes) => Ok(nodes),
138 138 ChildNodes::OnDisk(nodes) => {
139 139 *unreachable_bytes +=
140 140 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
141 141 let nodes = nodes
142 142 .iter()
143 143 .map(|node| {
144 144 Ok((
145 145 node.path(on_disk)?,
146 146 node.to_in_memory_node(on_disk)?,
147 147 ))
148 148 })
149 149 .collect::<Result<_, _>>()?;
150 150 *self = ChildNodes::InMemory(nodes);
151 151 match self {
152 152 ChildNodes::InMemory(nodes) => Ok(nodes),
153 153 ChildNodes::OnDisk(_) => unreachable!(),
154 154 }
155 155 }
156 156 }
157 157 }
158 158 }
159 159
160 160 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
161 161 pub(super) fn get(
162 162 &self,
163 163 base_name: &HgPath,
164 164 on_disk: &'on_disk [u8],
165 165 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
166 166 match self {
167 167 ChildNodesRef::InMemory(nodes) => Ok(nodes
168 168 .get_key_value(base_name)
169 169 .map(|(k, v)| NodeRef::InMemory(k, v))),
170 170 ChildNodesRef::OnDisk(nodes) => {
171 171 let mut parse_result = Ok(());
172 172 let search_result = nodes.binary_search_by(|node| {
173 173 match node.base_name(on_disk) {
174 174 Ok(node_base_name) => node_base_name.cmp(base_name),
175 175 Err(e) => {
176 176 parse_result = Err(e);
177 177 // Dummy comparison result, `search_result` won’t
178 178 // be used since `parse_result` is an error
179 179 std::cmp::Ordering::Equal
180 180 }
181 181 }
182 182 });
183 183 parse_result.map(|()| {
184 184 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
185 185 })
186 186 }
187 187 }
188 188 }
189 189
190 190 /// Iterate in undefined order
191 191 pub(super) fn iter(
192 192 &self,
193 193 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
194 194 match self {
195 195 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
196 196 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
197 197 ),
198 198 ChildNodesRef::OnDisk(nodes) => {
199 199 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
200 200 }
201 201 }
202 202 }
203 203
204 204 /// Iterate in parallel in undefined order
205 205 pub(super) fn par_iter(
206 206 &self,
207 207 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
208 208 {
209 209 use rayon::prelude::*;
210 210 match self {
211 211 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
212 212 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
213 213 ),
214 214 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
215 215 nodes.par_iter().map(NodeRef::OnDisk),
216 216 ),
217 217 }
218 218 }
219 219
220 220 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
221 221 match self {
222 222 ChildNodesRef::InMemory(nodes) => {
223 223 let mut vec: Vec<_> = nodes
224 224 .iter()
225 225 .map(|(k, v)| NodeRef::InMemory(k, v))
226 226 .collect();
227 227 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
228 228 match node {
229 229 NodeRef::InMemory(path, _node) => path.base_name(),
230 230 NodeRef::OnDisk(_) => unreachable!(),
231 231 }
232 232 }
233 233 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
234 234 // value: https://github.com/rust-lang/rust/issues/34162
235 235 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
236 236 vec
237 237 }
238 238 ChildNodesRef::OnDisk(nodes) => {
239 239 // Nodes on disk are already sorted
240 240 nodes.iter().map(NodeRef::OnDisk).collect()
241 241 }
242 242 }
243 243 }
244 244 }
245 245
246 246 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
247 247 pub(super) fn full_path(
248 248 &self,
249 249 on_disk: &'on_disk [u8],
250 250 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
251 251 match self {
252 252 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
253 253 NodeRef::OnDisk(node) => node.full_path(on_disk),
254 254 }
255 255 }
256 256
257 257 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
258 258 /// HgPath>` detached from `'tree`
259 259 pub(super) fn full_path_borrowed(
260 260 &self,
261 261 on_disk: &'on_disk [u8],
262 262 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
263 263 match self {
264 264 NodeRef::InMemory(path, _node) => match path.full_path() {
265 265 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
266 266 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
267 267 },
268 268 NodeRef::OnDisk(node) => {
269 269 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
270 270 }
271 271 }
272 272 }
273 273
274 274 pub(super) fn base_name(
275 275 &self,
276 276 on_disk: &'on_disk [u8],
277 277 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
278 278 match self {
279 279 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
280 280 NodeRef::OnDisk(node) => node.base_name(on_disk),
281 281 }
282 282 }
283 283
284 284 pub(super) fn children(
285 285 &self,
286 286 on_disk: &'on_disk [u8],
287 287 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
288 288 match self {
289 289 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
290 290 NodeRef::OnDisk(node) => {
291 291 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
292 292 }
293 293 }
294 294 }
295 295
296 296 pub(super) fn has_copy_source(&self) -> bool {
297 297 match self {
298 298 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
299 299 NodeRef::OnDisk(node) => node.has_copy_source(),
300 300 }
301 301 }
302 302
303 303 pub(super) fn copy_source(
304 304 &self,
305 305 on_disk: &'on_disk [u8],
306 306 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
307 307 match self {
308 308 NodeRef::InMemory(_path, node) => {
309 309 Ok(node.copy_source.as_ref().map(|s| &**s))
310 310 }
311 311 NodeRef::OnDisk(node) => node.copy_source(on_disk),
312 312 }
313 313 }
314 314
315 315 pub(super) fn entry(
316 316 &self,
317 317 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
318 318 match self {
319 319 NodeRef::InMemory(_path, node) => {
320 320 Ok(node.data.as_entry().copied())
321 321 }
322 322 NodeRef::OnDisk(node) => node.entry(),
323 323 }
324 324 }
325 325
326 326 pub(super) fn state(
327 327 &self,
328 328 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
329 329 match self {
330 330 NodeRef::InMemory(_path, node) => {
331 331 Ok(node.data.as_entry().map(|entry| entry.state()))
332 332 }
333 333 NodeRef::OnDisk(node) => node.state(),
334 334 }
335 335 }
336 336
337 337 pub(super) fn cached_directory_mtime(
338 338 &self,
339 339 ) -> Option<&'tree on_disk::Timestamp> {
340 340 match self {
341 341 NodeRef::InMemory(_path, node) => match &node.data {
342 342 NodeData::CachedDirectory { mtime } => Some(mtime),
343 343 _ => None,
344 344 },
345 345 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
346 346 }
347 347 }
348 348
349 349 pub(super) fn descendants_with_entry_count(&self) -> u32 {
350 350 match self {
351 351 NodeRef::InMemory(_path, node) => {
352 352 node.descendants_with_entry_count
353 353 }
354 354 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
355 355 }
356 356 }
357 357
358 358 pub(super) fn tracked_descendants_count(&self) -> u32 {
359 359 match self {
360 360 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
361 361 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
362 362 }
363 363 }
364 364 }
365 365
366 366 /// Represents a file or a directory
367 367 #[derive(Default)]
368 368 pub(super) struct Node<'on_disk> {
369 369 pub(super) data: NodeData,
370 370
371 371 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
372 372
373 373 pub(super) children: ChildNodes<'on_disk>,
374 374
375 375 /// How many (non-inclusive) descendants of this node have an entry.
376 376 pub(super) descendants_with_entry_count: u32,
377 377
378 378 /// How many (non-inclusive) descendants of this node have an entry whose
379 379 /// state is "tracked".
380 380 pub(super) tracked_descendants_count: u32,
381 381 }
382 382
383 383 pub(super) enum NodeData {
384 384 Entry(DirstateEntry),
385 385 CachedDirectory { mtime: on_disk::Timestamp },
386 386 None,
387 387 }
388 388
389 389 impl Default for NodeData {
390 390 fn default() -> Self {
391 391 NodeData::None
392 392 }
393 393 }
394 394
395 395 impl NodeData {
396 396 fn has_entry(&self) -> bool {
397 397 match self {
398 398 NodeData::Entry(_) => true,
399 399 _ => false,
400 400 }
401 401 }
402 402
403 403 fn as_entry(&self) -> Option<&DirstateEntry> {
404 404 match self {
405 405 NodeData::Entry(entry) => Some(entry),
406 406 _ => None,
407 407 }
408 408 }
409 409 }
410 410
411 411 impl<'on_disk> DirstateMap<'on_disk> {
412 412 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
413 413 Self {
414 414 on_disk,
415 415 root: ChildNodes::default(),
416 416 nodes_with_entry_count: 0,
417 417 nodes_with_copy_source_count: 0,
418 418 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
419 419 unreachable_bytes: 0,
420 420 }
421 421 }
422 422
423 423 #[timed]
424 424 pub fn new_v2(
425 425 on_disk: &'on_disk [u8],
426 426 data_size: usize,
427 427 metadata: &[u8],
428 428 ) -> Result<Self, DirstateError> {
429 429 if let Some(data) = on_disk.get(..data_size) {
430 430 Ok(on_disk::read(data, metadata)?)
431 431 } else {
432 432 Err(DirstateV2ParseError.into())
433 433 }
434 434 }
435 435
436 436 #[timed]
437 437 pub fn new_v1(
438 438 on_disk: &'on_disk [u8],
439 439 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
440 440 let mut map = Self::empty(on_disk);
441 441 if map.on_disk.is_empty() {
442 442 return Ok((map, None));
443 443 }
444 444
445 445 let parents = parse_dirstate_entries(
446 446 map.on_disk,
447 447 |path, entry, copy_source| {
448 448 let tracked = entry.state().is_tracked();
449 449 let node = Self::get_or_insert_node(
450 450 map.on_disk,
451 451 &mut map.unreachable_bytes,
452 452 &mut map.root,
453 453 path,
454 454 WithBasename::to_cow_borrowed,
455 455 |ancestor| {
456 456 if tracked {
457 457 ancestor.tracked_descendants_count += 1
458 458 }
459 459 ancestor.descendants_with_entry_count += 1
460 460 },
461 461 )?;
462 462 assert!(
463 463 !node.data.has_entry(),
464 464 "duplicate dirstate entry in read"
465 465 );
466 466 assert!(
467 467 node.copy_source.is_none(),
468 468 "duplicate dirstate entry in read"
469 469 );
470 470 node.data = NodeData::Entry(*entry);
471 471 node.copy_source = copy_source.map(Cow::Borrowed);
472 472 map.nodes_with_entry_count += 1;
473 473 if copy_source.is_some() {
474 474 map.nodes_with_copy_source_count += 1
475 475 }
476 476 Ok(())
477 477 },
478 478 )?;
479 479 let parents = Some(parents.clone());
480 480
481 481 Ok((map, parents))
482 482 }
483 483
484 484 /// Assuming dirstate-v2 format, returns whether the next write should
485 485 /// append to the existing data file that contains `self.on_disk` (true),
486 486 /// or create a new data file from scratch (false).
487 487 pub(super) fn write_should_append(&self) -> bool {
488 488 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
489 489 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
490 490 }
491 491
492 492 fn get_node<'tree>(
493 493 &'tree self,
494 494 path: &HgPath,
495 495 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
496 496 let mut children = self.root.as_ref();
497 497 let mut components = path.components();
498 498 let mut component =
499 499 components.next().expect("expected at least one components");
500 500 loop {
501 501 if let Some(child) = children.get(component, self.on_disk)? {
502 502 if let Some(next_component) = components.next() {
503 503 component = next_component;
504 504 children = child.children(self.on_disk)?;
505 505 } else {
506 506 return Ok(Some(child));
507 507 }
508 508 } else {
509 509 return Ok(None);
510 510 }
511 511 }
512 512 }
513 513
514 514 /// Returns a mutable reference to the node at `path` if it exists
515 515 ///
516 516 /// This takes `root` instead of `&mut self` so that callers can mutate
517 517 /// other fields while the returned borrow is still valid
518 518 fn get_node_mut<'tree>(
519 519 on_disk: &'on_disk [u8],
520 520 unreachable_bytes: &mut u32,
521 521 root: &'tree mut ChildNodes<'on_disk>,
522 522 path: &HgPath,
523 523 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
524 524 let mut children = root;
525 525 let mut components = path.components();
526 526 let mut component =
527 527 components.next().expect("expected at least one components");
528 528 loop {
529 529 if let Some(child) = children
530 530 .make_mut(on_disk, unreachable_bytes)?
531 531 .get_mut(component)
532 532 {
533 533 if let Some(next_component) = components.next() {
534 534 component = next_component;
535 535 children = &mut child.children;
536 536 } else {
537 537 return Ok(Some(child));
538 538 }
539 539 } else {
540 540 return Ok(None);
541 541 }
542 542 }
543 543 }
544 544
545 545 pub(super) fn get_or_insert<'tree, 'path>(
546 546 &'tree mut self,
547 547 path: &HgPath,
548 548 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
549 549 Self::get_or_insert_node(
550 550 self.on_disk,
551 551 &mut self.unreachable_bytes,
552 552 &mut self.root,
553 553 path,
554 554 WithBasename::to_cow_owned,
555 555 |_| {},
556 556 )
557 557 }
558 558
559 559 fn get_or_insert_node<'tree, 'path>(
560 560 on_disk: &'on_disk [u8],
561 561 unreachable_bytes: &mut u32,
562 562 root: &'tree mut ChildNodes<'on_disk>,
563 563 path: &'path HgPath,
564 564 to_cow: impl Fn(
565 565 WithBasename<&'path HgPath>,
566 566 ) -> WithBasename<Cow<'on_disk, HgPath>>,
567 567 mut each_ancestor: impl FnMut(&mut Node),
568 568 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
569 569 let mut child_nodes = root;
570 570 let mut inclusive_ancestor_paths =
571 571 WithBasename::inclusive_ancestors_of(path);
572 572 let mut ancestor_path = inclusive_ancestor_paths
573 573 .next()
574 574 .expect("expected at least one inclusive ancestor");
575 575 loop {
576 576 // TODO: can we avoid allocating an owned key in cases where the
577 577 // map already contains that key, without introducing double
578 578 // lookup?
579 579 let child_node = child_nodes
580 580 .make_mut(on_disk, unreachable_bytes)?
581 581 .entry(to_cow(ancestor_path))
582 582 .or_default();
583 583 if let Some(next) = inclusive_ancestor_paths.next() {
584 584 each_ancestor(child_node);
585 585 ancestor_path = next;
586 586 child_nodes = &mut child_node.children;
587 587 } else {
588 588 return Ok(child_node);
589 589 }
590 590 }
591 591 }
592 592
593 593 fn add_or_remove_file(
594 594 &mut self,
595 595 path: &HgPath,
596 old_state: EntryState,
596 old_state: Option<EntryState>,
597 597 new_entry: DirstateEntry,
598 598 ) -> Result<(), DirstateV2ParseError> {
599 let had_entry = old_state != EntryState::Unknown;
599 let had_entry = old_state.is_some();
600 let was_tracked = old_state.map_or(false, |s| s.is_tracked());
600 601 let tracked_count_increment =
601 match (old_state.is_tracked(), new_entry.state().is_tracked()) {
602 match (was_tracked, new_entry.state().is_tracked()) {
602 603 (false, true) => 1,
603 604 (true, false) => -1,
604 605 _ => 0,
605 606 };
606 607
607 608 let node = Self::get_or_insert_node(
608 609 self.on_disk,
609 610 &mut self.unreachable_bytes,
610 611 &mut self.root,
611 612 path,
612 613 WithBasename::to_cow_owned,
613 614 |ancestor| {
614 615 if !had_entry {
615 616 ancestor.descendants_with_entry_count += 1;
616 617 }
617 618
618 619 // We can’t use `+= increment` because the counter is unsigned,
619 620 // and we want debug builds to detect accidental underflow
620 621 // through zero
621 622 match tracked_count_increment {
622 623 1 => ancestor.tracked_descendants_count += 1,
623 624 -1 => ancestor.tracked_descendants_count -= 1,
624 625 _ => {}
625 626 }
626 627 },
627 628 )?;
628 629 if !had_entry {
629 630 self.nodes_with_entry_count += 1
630 631 }
631 632 node.data = NodeData::Entry(new_entry);
632 633 Ok(())
633 634 }
634 635
635 636 fn iter_nodes<'tree>(
636 637 &'tree self,
637 638 ) -> impl Iterator<
638 639 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
639 640 > + 'tree {
640 641 // Depth first tree traversal.
641 642 //
642 643 // If we could afford internal iteration and recursion,
643 644 // this would look like:
644 645 //
645 646 // ```
646 647 // fn traverse_children(
647 648 // children: &ChildNodes,
648 649 // each: &mut impl FnMut(&Node),
649 650 // ) {
650 651 // for child in children.values() {
651 652 // traverse_children(&child.children, each);
652 653 // each(child);
653 654 // }
654 655 // }
655 656 // ```
656 657 //
657 658 // However we want an external iterator and therefore can’t use the
658 659 // call stack. Use an explicit stack instead:
659 660 let mut stack = Vec::new();
660 661 let mut iter = self.root.as_ref().iter();
661 662 std::iter::from_fn(move || {
662 663 while let Some(child_node) = iter.next() {
663 664 let children = match child_node.children(self.on_disk) {
664 665 Ok(children) => children,
665 666 Err(error) => return Some(Err(error)),
666 667 };
667 668 // Pseudo-recursion
668 669 let new_iter = children.iter();
669 670 let old_iter = std::mem::replace(&mut iter, new_iter);
670 671 stack.push((child_node, old_iter));
671 672 }
672 673 // Found the end of a `children.iter()` iterator.
673 674 if let Some((child_node, next_iter)) = stack.pop() {
674 675 // "Return" from pseudo-recursion by restoring state from the
675 676 // explicit stack
676 677 iter = next_iter;
677 678
678 679 Some(Ok(child_node))
679 680 } else {
680 681 // Reached the bottom of the stack, we’re done
681 682 None
682 683 }
683 684 })
684 685 }
685 686
686 687 fn clear_known_ambiguous_mtimes(
687 688 &mut self,
688 689 paths: &[impl AsRef<HgPath>],
689 690 ) -> Result<(), DirstateV2ParseError> {
690 691 for path in paths {
691 692 if let Some(node) = Self::get_node_mut(
692 693 self.on_disk,
693 694 &mut self.unreachable_bytes,
694 695 &mut self.root,
695 696 path.as_ref(),
696 697 )? {
697 698 if let NodeData::Entry(entry) = &mut node.data {
698 699 entry.clear_mtime();
699 700 }
700 701 }
701 702 }
702 703 Ok(())
703 704 }
704 705
705 706 /// Return a faillilble iterator of full paths of nodes that have an
706 707 /// `entry` for which the given `predicate` returns true.
707 708 ///
708 709 /// Fallibility means that each iterator item is a `Result`, which may
709 710 /// indicate a parse error of the on-disk dirstate-v2 format. Such errors
710 711 /// should only happen if Mercurial is buggy or a repository is corrupted.
711 712 fn filter_full_paths<'tree>(
712 713 &'tree self,
713 714 predicate: impl Fn(&DirstateEntry) -> bool + 'tree,
714 715 ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree
715 716 {
716 717 filter_map_results(self.iter_nodes(), move |node| {
717 718 if let Some(entry) = node.entry()? {
718 719 if predicate(&entry) {
719 720 return Ok(Some(node.full_path(self.on_disk)?));
720 721 }
721 722 }
722 723 Ok(None)
723 724 })
724 725 }
725 726
726 727 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
727 728 if let Cow::Borrowed(path) = path {
728 729 *unreachable_bytes += path.len() as u32
729 730 }
730 731 }
731 732 }
732 733
733 734 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
734 735 ///
735 736 /// The callback is only called for incoming `Ok` values. Errors are passed
736 737 /// through as-is. In order to let it use the `?` operator the callback is
737 738 /// expected to return a `Result` of `Option`, instead of an `Option` of
738 739 /// `Result`.
739 740 fn filter_map_results<'a, I, F, A, B, E>(
740 741 iter: I,
741 742 f: F,
742 743 ) -> impl Iterator<Item = Result<B, E>> + 'a
743 744 where
744 745 I: Iterator<Item = Result<A, E>> + 'a,
745 746 F: Fn(A) -> Result<Option<B>, E> + 'a,
746 747 {
747 748 iter.filter_map(move |result| match result {
748 749 Ok(node) => f(node).transpose(),
749 750 Err(e) => Some(Err(e)),
750 751 })
751 752 }
752 753
753 754 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
754 755 fn clear(&mut self) {
755 756 self.root = Default::default();
756 757 self.nodes_with_entry_count = 0;
757 758 self.nodes_with_copy_source_count = 0;
758 759 }
759 760
760 761 fn set_v1(&mut self, filename: &HgPath, entry: DirstateEntry) {
761 762 let node =
762 763 self.get_or_insert(&filename).expect("no parse error in v1");
763 764 node.data = NodeData::Entry(entry);
764 765 node.children = ChildNodes::default();
765 766 node.copy_source = None;
766 767 node.descendants_with_entry_count = 0;
767 768 node.tracked_descendants_count = 0;
768 769 }
769 770
770 771 fn add_file(
771 772 &mut self,
772 773 filename: &HgPath,
773 774 entry: DirstateEntry,
774 775 added: bool,
775 776 merged: bool,
776 777 from_p2: bool,
777 778 possibly_dirty: bool,
778 779 ) -> Result<(), DirstateError> {
779 780 let state;
780 781 let size;
781 782 let mtime;
782 783 if added {
783 784 assert!(!possibly_dirty);
784 785 assert!(!from_p2);
785 786 state = EntryState::Added;
786 787 size = SIZE_NON_NORMAL;
787 788 mtime = MTIME_UNSET;
788 789 } else if merged {
789 790 assert!(!possibly_dirty);
790 791 assert!(!from_p2);
791 792 state = EntryState::Merged;
792 793 size = SIZE_FROM_OTHER_PARENT;
793 794 mtime = MTIME_UNSET;
794 795 } else if from_p2 {
795 796 assert!(!possibly_dirty);
796 797 state = EntryState::Normal;
797 798 size = SIZE_FROM_OTHER_PARENT;
798 799 mtime = MTIME_UNSET;
799 800 } else if possibly_dirty {
800 801 state = EntryState::Normal;
801 802 size = SIZE_NON_NORMAL;
802 803 mtime = MTIME_UNSET;
803 804 } else {
804 805 state = EntryState::Normal;
805 806 size = entry.size() & V1_RANGEMASK;
806 807 mtime = entry.mtime() & V1_RANGEMASK;
807 808 }
808 809 let mode = entry.mode();
809 810 let entry = DirstateEntry::from_v1_data(state, mode, size, mtime);
810 811
811 let old_state = match self.get(filename)? {
812 Some(e) => e.state(),
813 None => EntryState::Unknown,
814 };
812 let old_state = self.get(filename)?.map(|e| e.state());
815 813
816 814 Ok(self.add_or_remove_file(filename, old_state, entry)?)
817 815 }
818 816
819 817 fn remove_file(
820 818 &mut self,
821 819 filename: &HgPath,
822 820 in_merge: bool,
823 821 ) -> Result<(), DirstateError> {
824 822 let old_entry_opt = self.get(filename)?;
825 let old_state = match old_entry_opt {
826 Some(e) => e.state(),
827 None => EntryState::Unknown,
828 };
823 let old_state = old_entry_opt.map(|e| e.state());
829 824 let mut size = 0;
830 825 if in_merge {
831 826 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
832 827 // during a merge. So I (marmoute) am not sure we need the
833 828 // conditionnal at all. Adding double checking this with assert
834 829 // would be nice.
835 830 if let Some(old_entry) = old_entry_opt {
836 831 // backup the previous state
837 832 if old_entry.state() == EntryState::Merged {
838 833 size = SIZE_NON_NORMAL;
839 834 } else if old_entry.state() == EntryState::Normal
840 835 && old_entry.size() == SIZE_FROM_OTHER_PARENT
841 836 {
842 837 // other parent
843 838 size = SIZE_FROM_OTHER_PARENT;
844 839 }
845 840 }
846 841 }
847 842 if size == 0 {
848 843 self.copy_map_remove(filename)?;
849 844 }
850 845 let entry = DirstateEntry::new_removed(size);
851 846 Ok(self.add_or_remove_file(filename, old_state, entry)?)
852 847 }
853 848
854 849 fn drop_file(&mut self, filename: &HgPath) -> Result<bool, DirstateError> {
855 let old_state = match self.get(filename)? {
856 Some(e) => e.state(),
857 None => EntryState::Unknown,
858 };
850 let was_tracked = self
851 .get(filename)?
852 .map_or(false, |e| e.state().is_tracked());
859 853 struct Dropped {
860 854 was_tracked: bool,
861 855 had_entry: bool,
862 856 had_copy_source: bool,
863 857 }
864 858
865 859 /// If this returns `Ok(Some((dropped, removed)))`, then
866 860 ///
867 861 /// * `dropped` is about the leaf node that was at `filename`
868 862 /// * `removed` is whether this particular level of recursion just
869 863 /// removed a node in `nodes`.
870 864 fn recur<'on_disk>(
871 865 on_disk: &'on_disk [u8],
872 866 unreachable_bytes: &mut u32,
873 867 nodes: &mut ChildNodes<'on_disk>,
874 868 path: &HgPath,
875 869 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
876 870 let (first_path_component, rest_of_path) =
877 871 path.split_first_component();
878 872 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
879 873 let node = if let Some(node) = nodes.get_mut(first_path_component)
880 874 {
881 875 node
882 876 } else {
883 877 return Ok(None);
884 878 };
885 879 let dropped;
886 880 if let Some(rest) = rest_of_path {
887 881 if let Some((d, removed)) = recur(
888 882 on_disk,
889 883 unreachable_bytes,
890 884 &mut node.children,
891 885 rest,
892 886 )? {
893 887 dropped = d;
894 888 if dropped.had_entry {
895 889 node.descendants_with_entry_count -= 1;
896 890 }
897 891 if dropped.was_tracked {
898 892 node.tracked_descendants_count -= 1;
899 893 }
900 894
901 895 // Directory caches must be invalidated when removing a
902 896 // child node
903 897 if removed {
904 898 if let NodeData::CachedDirectory { .. } = &node.data {
905 899 node.data = NodeData::None
906 900 }
907 901 }
908 902 } else {
909 903 return Ok(None);
910 904 }
911 905 } else {
912 906 let had_entry = node.data.has_entry();
913 907 if had_entry {
914 908 node.data = NodeData::None
915 909 }
916 910 if let Some(source) = &node.copy_source {
917 911 DirstateMap::count_dropped_path(unreachable_bytes, source)
918 912 }
919 913 dropped = Dropped {
920 914 was_tracked: node
921 915 .data
922 916 .as_entry()
923 917 .map_or(false, |entry| entry.state().is_tracked()),
924 918 had_entry,
925 919 had_copy_source: node.copy_source.take().is_some(),
926 920 };
927 921 }
928 922 // After recursion, for both leaf (rest_of_path is None) nodes and
929 923 // parent nodes, remove a node if it just became empty.
930 924 let remove = !node.data.has_entry()
931 925 && node.copy_source.is_none()
932 926 && node.children.is_empty();
933 927 if remove {
934 928 let (key, _) =
935 929 nodes.remove_entry(first_path_component).unwrap();
936 930 DirstateMap::count_dropped_path(
937 931 unreachable_bytes,
938 932 key.full_path(),
939 933 )
940 934 }
941 935 Ok(Some((dropped, remove)))
942 936 }
943 937
944 938 if let Some((dropped, _removed)) = recur(
945 939 self.on_disk,
946 940 &mut self.unreachable_bytes,
947 941 &mut self.root,
948 942 filename,
949 943 )? {
950 944 if dropped.had_entry {
951 945 self.nodes_with_entry_count -= 1
952 946 }
953 947 if dropped.had_copy_source {
954 948 self.nodes_with_copy_source_count -= 1
955 949 }
956 950 Ok(dropped.had_entry)
957 951 } else {
958 debug_assert!(!old_state.is_tracked());
952 debug_assert!(!was_tracked);
959 953 Ok(false)
960 954 }
961 955 }
962 956
963 957 fn clear_ambiguous_times(
964 958 &mut self,
965 959 filenames: Vec<HgPathBuf>,
966 960 now: i32,
967 961 ) -> Result<(), DirstateV2ParseError> {
968 962 for filename in filenames {
969 963 if let Some(node) = Self::get_node_mut(
970 964 self.on_disk,
971 965 &mut self.unreachable_bytes,
972 966 &mut self.root,
973 967 &filename,
974 968 )? {
975 969 if let NodeData::Entry(entry) = &mut node.data {
976 970 entry.clear_ambiguous_mtime(now);
977 971 }
978 972 }
979 973 }
980 974 Ok(())
981 975 }
982 976
983 977 fn non_normal_entries_contains(
984 978 &mut self,
985 979 key: &HgPath,
986 980 ) -> Result<bool, DirstateV2ParseError> {
987 981 Ok(if let Some(node) = self.get_node(key)? {
988 982 node.entry()?.map_or(false, |entry| entry.is_non_normal())
989 983 } else {
990 984 false
991 985 })
992 986 }
993 987
994 988 fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool {
995 989 // Do nothing, this `DirstateMap` does not have a separate "non normal
996 990 // entries" set that need to be kept up to date.
997 991 if let Ok(Some(v)) = self.get(key) {
998 992 return v.is_non_normal();
999 993 }
1000 994 false
1001 995 }
1002 996
1003 997 fn non_normal_entries_add(&mut self, _key: &HgPath) {
1004 998 // Do nothing, this `DirstateMap` does not have a separate "non normal
1005 999 // entries" set that need to be kept up to date
1006 1000 }
1007 1001
1008 1002 fn non_normal_or_other_parent_paths(
1009 1003 &mut self,
1010 1004 ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
1011 1005 {
1012 1006 Box::new(self.filter_full_paths(|entry| {
1013 1007 entry.is_non_normal() || entry.is_from_other_parent()
1014 1008 }))
1015 1009 }
1016 1010
1017 1011 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
1018 1012 // Do nothing, this `DirstateMap` does not have a separate "non normal
1019 1013 // entries" and "from other parent" sets that need to be recomputed
1020 1014 }
1021 1015
1022 1016 fn iter_non_normal_paths(
1023 1017 &mut self,
1024 1018 ) -> Box<
1025 1019 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
1026 1020 > {
1027 1021 self.iter_non_normal_paths_panic()
1028 1022 }
1029 1023
1030 1024 fn iter_non_normal_paths_panic(
1031 1025 &self,
1032 1026 ) -> Box<
1033 1027 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
1034 1028 > {
1035 1029 Box::new(self.filter_full_paths(|entry| entry.is_non_normal()))
1036 1030 }
1037 1031
1038 1032 fn iter_other_parent_paths(
1039 1033 &mut self,
1040 1034 ) -> Box<
1041 1035 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
1042 1036 > {
1043 1037 Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent()))
1044 1038 }
1045 1039
1046 1040 fn has_tracked_dir(
1047 1041 &mut self,
1048 1042 directory: &HgPath,
1049 1043 ) -> Result<bool, DirstateError> {
1050 1044 if let Some(node) = self.get_node(directory)? {
1051 1045 // A node without a `DirstateEntry` was created to hold child
1052 1046 // nodes, and is therefore a directory.
1053 1047 let state = node.state()?;
1054 1048 Ok(state.is_none() && node.tracked_descendants_count() > 0)
1055 1049 } else {
1056 1050 Ok(false)
1057 1051 }
1058 1052 }
1059 1053
1060 1054 fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> {
1061 1055 if let Some(node) = self.get_node(directory)? {
1062 1056 // A node without a `DirstateEntry` was created to hold child
1063 1057 // nodes, and is therefore a directory.
1064 1058 let state = node.state()?;
1065 1059 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
1066 1060 } else {
1067 1061 Ok(false)
1068 1062 }
1069 1063 }
1070 1064
1071 1065 #[timed]
1072 1066 fn pack_v1(
1073 1067 &mut self,
1074 1068 parents: DirstateParents,
1075 1069 now: Timestamp,
1076 1070 ) -> Result<Vec<u8>, DirstateError> {
1077 1071 let now: i32 = now.0.try_into().expect("time overflow");
1078 1072 let mut ambiguous_mtimes = Vec::new();
1079 1073 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1080 1074 // reallocations
1081 1075 let mut size = parents.as_bytes().len();
1082 1076 for node in self.iter_nodes() {
1083 1077 let node = node?;
1084 1078 if let Some(entry) = node.entry()? {
1085 1079 size += packed_entry_size(
1086 1080 node.full_path(self.on_disk)?,
1087 1081 node.copy_source(self.on_disk)?,
1088 1082 );
1089 1083 if entry.mtime_is_ambiguous(now) {
1090 1084 ambiguous_mtimes.push(
1091 1085 node.full_path_borrowed(self.on_disk)?
1092 1086 .detach_from_tree(),
1093 1087 )
1094 1088 }
1095 1089 }
1096 1090 }
1097 1091 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
1098 1092
1099 1093 let mut packed = Vec::with_capacity(size);
1100 1094 packed.extend(parents.as_bytes());
1101 1095
1102 1096 for node in self.iter_nodes() {
1103 1097 let node = node?;
1104 1098 if let Some(entry) = node.entry()? {
1105 1099 pack_entry(
1106 1100 node.full_path(self.on_disk)?,
1107 1101 &entry,
1108 1102 node.copy_source(self.on_disk)?,
1109 1103 &mut packed,
1110 1104 );
1111 1105 }
1112 1106 }
1113 1107 Ok(packed)
1114 1108 }
1115 1109
1116 1110 /// Returns new data and metadata together with whether that data should be
1117 1111 /// appended to the existing data file whose content is at
1118 1112 /// `self.on_disk` (true), instead of written to a new data file
1119 1113 /// (false).
1120 1114 #[timed]
1121 1115 fn pack_v2(
1122 1116 &mut self,
1123 1117 now: Timestamp,
1124 1118 can_append: bool,
1125 1119 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
1126 1120 // TODO: how do we want to handle this in 2038?
1127 1121 let now: i32 = now.0.try_into().expect("time overflow");
1128 1122 let mut paths = Vec::new();
1129 1123 for node in self.iter_nodes() {
1130 1124 let node = node?;
1131 1125 if let Some(entry) = node.entry()? {
1132 1126 if entry.mtime_is_ambiguous(now) {
1133 1127 paths.push(
1134 1128 node.full_path_borrowed(self.on_disk)?
1135 1129 .detach_from_tree(),
1136 1130 )
1137 1131 }
1138 1132 }
1139 1133 }
1140 1134 // Borrow of `self` ends here since we collect cloned paths
1141 1135
1142 1136 self.clear_known_ambiguous_mtimes(&paths)?;
1143 1137
1144 1138 on_disk::write(self, can_append)
1145 1139 }
1146 1140
1147 1141 fn status<'a>(
1148 1142 &'a mut self,
1149 1143 matcher: &'a (dyn Matcher + Sync),
1150 1144 root_dir: PathBuf,
1151 1145 ignore_files: Vec<PathBuf>,
1152 1146 options: StatusOptions,
1153 1147 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
1154 1148 {
1155 1149 super::status::status(self, matcher, root_dir, ignore_files, options)
1156 1150 }
1157 1151
1158 1152 fn copy_map_len(&self) -> usize {
1159 1153 self.nodes_with_copy_source_count as usize
1160 1154 }
1161 1155
1162 1156 fn copy_map_iter(&self) -> CopyMapIter<'_> {
1163 1157 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1164 1158 Ok(if let Some(source) = node.copy_source(self.on_disk)? {
1165 1159 Some((node.full_path(self.on_disk)?, source))
1166 1160 } else {
1167 1161 None
1168 1162 })
1169 1163 }))
1170 1164 }
1171 1165
1172 1166 fn copy_map_contains_key(
1173 1167 &self,
1174 1168 key: &HgPath,
1175 1169 ) -> Result<bool, DirstateV2ParseError> {
1176 1170 Ok(if let Some(node) = self.get_node(key)? {
1177 1171 node.has_copy_source()
1178 1172 } else {
1179 1173 false
1180 1174 })
1181 1175 }
1182 1176
1183 1177 fn copy_map_get(
1184 1178 &self,
1185 1179 key: &HgPath,
1186 1180 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1187 1181 if let Some(node) = self.get_node(key)? {
1188 1182 if let Some(source) = node.copy_source(self.on_disk)? {
1189 1183 return Ok(Some(source));
1190 1184 }
1191 1185 }
1192 1186 Ok(None)
1193 1187 }
1194 1188
1195 1189 fn copy_map_remove(
1196 1190 &mut self,
1197 1191 key: &HgPath,
1198 1192 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1199 1193 let count = &mut self.nodes_with_copy_source_count;
1200 1194 let unreachable_bytes = &mut self.unreachable_bytes;
1201 1195 Ok(Self::get_node_mut(
1202 1196 self.on_disk,
1203 1197 unreachable_bytes,
1204 1198 &mut self.root,
1205 1199 key,
1206 1200 )?
1207 1201 .and_then(|node| {
1208 1202 if let Some(source) = &node.copy_source {
1209 1203 *count -= 1;
1210 1204 Self::count_dropped_path(unreachable_bytes, source);
1211 1205 }
1212 1206 node.copy_source.take().map(Cow::into_owned)
1213 1207 }))
1214 1208 }
1215 1209
1216 1210 fn copy_map_insert(
1217 1211 &mut self,
1218 1212 key: HgPathBuf,
1219 1213 value: HgPathBuf,
1220 1214 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1221 1215 let node = Self::get_or_insert_node(
1222 1216 self.on_disk,
1223 1217 &mut self.unreachable_bytes,
1224 1218 &mut self.root,
1225 1219 &key,
1226 1220 WithBasename::to_cow_owned,
1227 1221 |_ancestor| {},
1228 1222 )?;
1229 1223 if node.copy_source.is_none() {
1230 1224 self.nodes_with_copy_source_count += 1
1231 1225 }
1232 1226 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1233 1227 }
1234 1228
1235 1229 fn len(&self) -> usize {
1236 1230 self.nodes_with_entry_count as usize
1237 1231 }
1238 1232
1239 1233 fn contains_key(
1240 1234 &self,
1241 1235 key: &HgPath,
1242 1236 ) -> Result<bool, DirstateV2ParseError> {
1243 1237 Ok(self.get(key)?.is_some())
1244 1238 }
1245 1239
1246 1240 fn get(
1247 1241 &self,
1248 1242 key: &HgPath,
1249 1243 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1250 1244 Ok(if let Some(node) = self.get_node(key)? {
1251 1245 node.entry()?
1252 1246 } else {
1253 1247 None
1254 1248 })
1255 1249 }
1256 1250
1257 1251 fn iter(&self) -> StateMapIter<'_> {
1258 1252 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1259 1253 Ok(if let Some(entry) = node.entry()? {
1260 1254 Some((node.full_path(self.on_disk)?, entry))
1261 1255 } else {
1262 1256 None
1263 1257 })
1264 1258 }))
1265 1259 }
1266 1260
1267 1261 fn iter_tracked_dirs(
1268 1262 &mut self,
1269 1263 ) -> Result<
1270 1264 Box<
1271 1265 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1272 1266 + Send
1273 1267 + '_,
1274 1268 >,
1275 1269 DirstateError,
1276 1270 > {
1277 1271 let on_disk = self.on_disk;
1278 1272 Ok(Box::new(filter_map_results(
1279 1273 self.iter_nodes(),
1280 1274 move |node| {
1281 1275 Ok(if node.tracked_descendants_count() > 0 {
1282 1276 Some(node.full_path(on_disk)?)
1283 1277 } else {
1284 1278 None
1285 1279 })
1286 1280 },
1287 1281 )))
1288 1282 }
1289 1283
1290 1284 fn debug_iter(
1291 1285 &self,
1292 1286 all: bool,
1293 1287 ) -> Box<
1294 1288 dyn Iterator<
1295 1289 Item = Result<
1296 1290 (&HgPath, (u8, i32, i32, i32)),
1297 1291 DirstateV2ParseError,
1298 1292 >,
1299 1293 > + Send
1300 1294 + '_,
1301 1295 > {
1302 1296 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1303 1297 let debug_tuple = if let Some(entry) = node.entry()? {
1304 1298 entry.debug_tuple()
1305 1299 } else if !all {
1306 1300 return Ok(None);
1307 1301 } else if let Some(mtime) = node.cached_directory_mtime() {
1308 1302 (b' ', 0, -1, mtime.seconds() as i32)
1309 1303 } else {
1310 1304 (b' ', 0, -1, -1)
1311 1305 };
1312 1306 Ok(Some((node.full_path(self.on_disk)?, debug_tuple)))
1313 1307 }))
1314 1308 }
1315 1309 }
@@ -1,756 +1,753 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::BorrowedPath;
3 3 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
4 4 use crate::dirstate_tree::dirstate_map::DirstateMap;
5 5 use crate::dirstate_tree::dirstate_map::NodeData;
6 6 use crate::dirstate_tree::dirstate_map::NodeRef;
7 7 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
8 8 use crate::dirstate_tree::on_disk::Timestamp;
9 9 use crate::matchers::get_ignore_function;
10 10 use crate::matchers::Matcher;
11 11 use crate::utils::files::get_bytes_from_os_string;
12 12 use crate::utils::files::get_path_from_bytes;
13 13 use crate::utils::hg_path::HgPath;
14 14 use crate::BadMatch;
15 15 use crate::DirstateStatus;
16 16 use crate::EntryState;
17 17 use crate::HgPathBuf;
18 18 use crate::PatternFileWarning;
19 19 use crate::StatusError;
20 20 use crate::StatusOptions;
21 21 use micro_timer::timed;
22 22 use rayon::prelude::*;
23 23 use sha1::{Digest, Sha1};
24 24 use std::borrow::Cow;
25 25 use std::io;
26 26 use std::path::Path;
27 27 use std::path::PathBuf;
28 28 use std::sync::Mutex;
29 29 use std::time::SystemTime;
30 30
31 31 /// Returns the status of the working directory compared to its parent
32 32 /// changeset.
33 33 ///
34 34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 35 /// and variable names) and dirstate tree at the same time. The core of this
36 36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 38 /// exists in one of the two trees, depending on information requested by
39 39 /// `options` we may need to traverse the remaining subtree.
40 40 #[timed]
41 41 pub fn status<'tree, 'on_disk: 'tree>(
42 42 dmap: &'tree mut DirstateMap<'on_disk>,
43 43 matcher: &(dyn Matcher + Sync),
44 44 root_dir: PathBuf,
45 45 ignore_files: Vec<PathBuf>,
46 46 options: StatusOptions,
47 47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
48 48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
49 49 if options.list_ignored || options.list_unknown {
50 50 let mut hasher = Sha1::new();
51 51 let (ignore_fn, warnings) = get_ignore_function(
52 52 ignore_files,
53 53 &root_dir,
54 54 &mut |pattern_bytes| hasher.update(pattern_bytes),
55 55 )?;
56 56 let new_hash = *hasher.finalize().as_ref();
57 57 let changed = new_hash != dmap.ignore_patterns_hash;
58 58 dmap.ignore_patterns_hash = new_hash;
59 59 (ignore_fn, warnings, Some(changed))
60 60 } else {
61 61 (Box::new(|&_| true), vec![], None)
62 62 };
63 63
64 64 let common = StatusCommon {
65 65 dmap,
66 66 options,
67 67 matcher,
68 68 ignore_fn,
69 69 outcome: Default::default(),
70 70 ignore_patterns_have_changed: patterns_changed,
71 71 new_cachable_directories: Default::default(),
72 72 outated_cached_directories: Default::default(),
73 73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
74 74 };
75 75 let is_at_repo_root = true;
76 76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
77 77 let has_ignored_ancestor = false;
78 78 let root_cached_mtime = None;
79 79 let root_dir_metadata = None;
80 80 // If the path we have for the repository root is a symlink, do follow it.
81 81 // (As opposed to symlinks within the working directory which are not
82 82 // followed, using `std::fs::symlink_metadata`.)
83 83 common.traverse_fs_directory_and_dirstate(
84 84 has_ignored_ancestor,
85 85 dmap.root.as_ref(),
86 86 hg_path,
87 87 &root_dir,
88 88 root_dir_metadata,
89 89 root_cached_mtime,
90 90 is_at_repo_root,
91 91 )?;
92 92 let mut outcome = common.outcome.into_inner().unwrap();
93 93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
94 94 let outdated = common.outated_cached_directories.into_inner().unwrap();
95 95
96 96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
97 97 || !outdated.is_empty()
98 98 || !new_cachable.is_empty();
99 99
100 100 // Remove outdated mtimes before adding new mtimes, in case a given
101 101 // directory is both
102 102 for path in &outdated {
103 103 let node = dmap.get_or_insert(path)?;
104 104 if let NodeData::CachedDirectory { .. } = &node.data {
105 105 node.data = NodeData::None
106 106 }
107 107 }
108 108 for (path, mtime) in &new_cachable {
109 109 let node = dmap.get_or_insert(path)?;
110 110 match &node.data {
111 111 NodeData::Entry(_) => {} // Don’t overwrite an entry
112 112 NodeData::CachedDirectory { .. } | NodeData::None => {
113 113 node.data = NodeData::CachedDirectory { mtime: *mtime }
114 114 }
115 115 }
116 116 }
117 117
118 118 Ok((outcome, warnings))
119 119 }
120 120
121 121 /// Bag of random things needed by various parts of the algorithm. Reduces the
122 122 /// number of parameters passed to functions.
123 123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
124 124 dmap: &'tree DirstateMap<'on_disk>,
125 125 options: StatusOptions,
126 126 matcher: &'a (dyn Matcher + Sync),
127 127 ignore_fn: IgnoreFnType<'a>,
128 128 outcome: Mutex<DirstateStatus<'on_disk>>,
129 129 new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
130 130 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
131 131
132 132 /// Whether ignore files like `.hgignore` have changed since the previous
133 133 /// time a `status()` call wrote their hash to the dirstate. `None` means
134 134 /// we don’t know as this run doesn’t list either ignored or uknown files
135 135 /// and therefore isn’t reading `.hgignore`.
136 136 ignore_patterns_have_changed: Option<bool>,
137 137
138 138 /// The current time at the start of the `status()` algorithm, as measured
139 139 /// and possibly truncated by the filesystem.
140 140 filesystem_time_at_status_start: Option<SystemTime>,
141 141 }
142 142
143 143 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
144 144 fn read_dir(
145 145 &self,
146 146 hg_path: &HgPath,
147 147 fs_path: &Path,
148 148 is_at_repo_root: bool,
149 149 ) -> Result<Vec<DirEntry>, ()> {
150 150 DirEntry::read_dir(fs_path, is_at_repo_root)
151 151 .map_err(|error| self.io_error(error, hg_path))
152 152 }
153 153
154 154 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
155 155 let errno = error.raw_os_error().expect("expected real OS error");
156 156 self.outcome
157 157 .lock()
158 158 .unwrap()
159 159 .bad
160 160 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
161 161 }
162 162
163 163 fn check_for_outdated_directory_cache(
164 164 &self,
165 165 dirstate_node: &NodeRef<'tree, 'on_disk>,
166 166 ) -> Result<(), DirstateV2ParseError> {
167 167 if self.ignore_patterns_have_changed == Some(true)
168 168 && dirstate_node.cached_directory_mtime().is_some()
169 169 {
170 170 self.outated_cached_directories.lock().unwrap().push(
171 171 dirstate_node
172 172 .full_path_borrowed(self.dmap.on_disk)?
173 173 .detach_from_tree(),
174 174 )
175 175 }
176 176 Ok(())
177 177 }
178 178
179 179 /// If this returns true, we can get accurate results by only using
180 180 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
181 181 /// need to call `read_dir`.
182 182 fn can_skip_fs_readdir(
183 183 &self,
184 184 directory_metadata: Option<&std::fs::Metadata>,
185 185 cached_directory_mtime: Option<&Timestamp>,
186 186 ) -> bool {
187 187 if !self.options.list_unknown && !self.options.list_ignored {
188 188 // All states that we care about listing have corresponding
189 189 // dirstate entries.
190 190 // This happens for example with `hg status -mard`.
191 191 return true;
192 192 }
193 193 if !self.options.list_ignored
194 194 && self.ignore_patterns_have_changed == Some(false)
195 195 {
196 196 if let Some(cached_mtime) = cached_directory_mtime {
197 197 // The dirstate contains a cached mtime for this directory, set
198 198 // by a previous run of the `status` algorithm which found this
199 199 // directory eligible for `read_dir` caching.
200 200 if let Some(meta) = directory_metadata {
201 201 if let Ok(current_mtime) = meta.modified() {
202 202 if current_mtime == cached_mtime.into() {
203 203 // The mtime of that directory has not changed
204 204 // since then, which means that the results of
205 205 // `read_dir` should also be unchanged.
206 206 return true;
207 207 }
208 208 }
209 209 }
210 210 }
211 211 }
212 212 false
213 213 }
214 214
215 215 /// Returns whether all child entries of the filesystem directory have a
216 216 /// corresponding dirstate node or are ignored.
217 217 fn traverse_fs_directory_and_dirstate(
218 218 &self,
219 219 has_ignored_ancestor: bool,
220 220 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
221 221 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
222 222 directory_fs_path: &Path,
223 223 directory_metadata: Option<&std::fs::Metadata>,
224 224 cached_directory_mtime: Option<&Timestamp>,
225 225 is_at_repo_root: bool,
226 226 ) -> Result<bool, DirstateV2ParseError> {
227 227 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
228 228 {
229 229 dirstate_nodes
230 230 .par_iter()
231 231 .map(|dirstate_node| {
232 232 let fs_path = directory_fs_path.join(get_path_from_bytes(
233 233 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
234 234 ));
235 235 match std::fs::symlink_metadata(&fs_path) {
236 236 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
237 237 &fs_path,
238 238 &fs_metadata,
239 239 dirstate_node,
240 240 has_ignored_ancestor,
241 241 ),
242 242 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
243 243 self.traverse_dirstate_only(dirstate_node)
244 244 }
245 245 Err(error) => {
246 246 let hg_path =
247 247 dirstate_node.full_path(self.dmap.on_disk)?;
248 248 Ok(self.io_error(error, hg_path))
249 249 }
250 250 }
251 251 })
252 252 .collect::<Result<_, _>>()?;
253 253
254 254 // We don’t know, so conservatively say this isn’t the case
255 255 let children_all_have_dirstate_node_or_are_ignored = false;
256 256
257 257 return Ok(children_all_have_dirstate_node_or_are_ignored);
258 258 }
259 259
260 260 let mut fs_entries = if let Ok(entries) = self.read_dir(
261 261 directory_hg_path,
262 262 directory_fs_path,
263 263 is_at_repo_root,
264 264 ) {
265 265 entries
266 266 } else {
267 267 // Treat an unreadable directory (typically because of insufficient
268 268 // permissions) like an empty directory. `self.read_dir` has
269 269 // already called `self.io_error` so a warning will be emitted.
270 270 Vec::new()
271 271 };
272 272
273 273 // `merge_join_by` requires both its input iterators to be sorted:
274 274
275 275 let dirstate_nodes = dirstate_nodes.sorted();
276 276 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
277 277 // https://github.com/rust-lang/rust/issues/34162
278 278 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
279 279
280 280 // Propagate here any error that would happen inside the comparison
281 281 // callback below
282 282 for dirstate_node in &dirstate_nodes {
283 283 dirstate_node.base_name(self.dmap.on_disk)?;
284 284 }
285 285 itertools::merge_join_by(
286 286 dirstate_nodes,
287 287 &fs_entries,
288 288 |dirstate_node, fs_entry| {
289 289 // This `unwrap` never panics because we already propagated
290 290 // those errors above
291 291 dirstate_node
292 292 .base_name(self.dmap.on_disk)
293 293 .unwrap()
294 294 .cmp(&fs_entry.base_name)
295 295 },
296 296 )
297 297 .par_bridge()
298 298 .map(|pair| {
299 299 use itertools::EitherOrBoth::*;
300 300 let has_dirstate_node_or_is_ignored;
301 301 match pair {
302 302 Both(dirstate_node, fs_entry) => {
303 303 self.traverse_fs_and_dirstate(
304 304 &fs_entry.full_path,
305 305 &fs_entry.metadata,
306 306 dirstate_node,
307 307 has_ignored_ancestor,
308 308 )?;
309 309 has_dirstate_node_or_is_ignored = true
310 310 }
311 311 Left(dirstate_node) => {
312 312 self.traverse_dirstate_only(dirstate_node)?;
313 313 has_dirstate_node_or_is_ignored = true;
314 314 }
315 315 Right(fs_entry) => {
316 316 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
317 317 has_ignored_ancestor,
318 318 directory_hg_path,
319 319 fs_entry,
320 320 )
321 321 }
322 322 }
323 323 Ok(has_dirstate_node_or_is_ignored)
324 324 })
325 325 .try_reduce(|| true, |a, b| Ok(a && b))
326 326 }
327 327
328 328 fn traverse_fs_and_dirstate(
329 329 &self,
330 330 fs_path: &Path,
331 331 fs_metadata: &std::fs::Metadata,
332 332 dirstate_node: NodeRef<'tree, 'on_disk>,
333 333 has_ignored_ancestor: bool,
334 334 ) -> Result<(), DirstateV2ParseError> {
335 335 self.check_for_outdated_directory_cache(&dirstate_node)?;
336 336 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
337 337 let file_type = fs_metadata.file_type();
338 338 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
339 339 if !file_or_symlink {
340 340 // If we previously had a file here, it was removed (with
341 341 // `hg rm` or similar) or deleted before it could be
342 342 // replaced by a directory or something else.
343 343 self.mark_removed_or_deleted_if_file(
344 344 &hg_path,
345 345 dirstate_node.state()?,
346 346 );
347 347 }
348 348 if file_type.is_dir() {
349 349 if self.options.collect_traversed_dirs {
350 350 self.outcome
351 351 .lock()
352 352 .unwrap()
353 353 .traversed
354 354 .push(hg_path.detach_from_tree())
355 355 }
356 356 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
357 357 let is_at_repo_root = false;
358 358 let children_all_have_dirstate_node_or_are_ignored = self
359 359 .traverse_fs_directory_and_dirstate(
360 360 is_ignored,
361 361 dirstate_node.children(self.dmap.on_disk)?,
362 362 hg_path,
363 363 fs_path,
364 364 Some(fs_metadata),
365 365 dirstate_node.cached_directory_mtime(),
366 366 is_at_repo_root,
367 367 )?;
368 368 self.maybe_save_directory_mtime(
369 369 children_all_have_dirstate_node_or_are_ignored,
370 370 fs_metadata,
371 371 dirstate_node,
372 372 )?
373 373 } else {
374 374 if file_or_symlink && self.matcher.matches(hg_path) {
375 375 if let Some(state) = dirstate_node.state()? {
376 376 match state {
377 377 EntryState::Added => self
378 378 .outcome
379 379 .lock()
380 380 .unwrap()
381 381 .added
382 382 .push(hg_path.detach_from_tree()),
383 383 EntryState::Removed => self
384 384 .outcome
385 385 .lock()
386 386 .unwrap()
387 387 .removed
388 388 .push(hg_path.detach_from_tree()),
389 389 EntryState::Merged => self
390 390 .outcome
391 391 .lock()
392 392 .unwrap()
393 393 .modified
394 394 .push(hg_path.detach_from_tree()),
395 395 EntryState::Normal => self
396 396 .handle_normal_file(&dirstate_node, fs_metadata)?,
397 // This variant is not used in DirstateMap
398 // nodes
399 EntryState::Unknown => unreachable!(),
400 397 }
401 398 } else {
402 399 // `node.entry.is_none()` indicates a "directory"
403 400 // node, but the filesystem has a file
404 401 self.mark_unknown_or_ignored(
405 402 has_ignored_ancestor,
406 403 hg_path,
407 404 );
408 405 }
409 406 }
410 407
411 408 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
412 409 {
413 410 self.traverse_dirstate_only(child_node)?
414 411 }
415 412 }
416 413 Ok(())
417 414 }
418 415
419 416 fn maybe_save_directory_mtime(
420 417 &self,
421 418 children_all_have_dirstate_node_or_are_ignored: bool,
422 419 directory_metadata: &std::fs::Metadata,
423 420 dirstate_node: NodeRef<'tree, 'on_disk>,
424 421 ) -> Result<(), DirstateV2ParseError> {
425 422 if children_all_have_dirstate_node_or_are_ignored {
426 423 // All filesystem directory entries from `read_dir` have a
427 424 // corresponding node in the dirstate, so we can reconstitute the
428 425 // names of those entries without calling `read_dir` again.
429 426 if let (Some(status_start), Ok(directory_mtime)) = (
430 427 &self.filesystem_time_at_status_start,
431 428 directory_metadata.modified(),
432 429 ) {
433 430 // Although the Rust standard library’s `SystemTime` type
434 431 // has nanosecond precision, the times reported for a
435 432 // directory’s (or file’s) modified time may have lower
436 433 // resolution based on the filesystem (for example ext3
437 434 // only stores integer seconds), kernel (see
438 435 // https://stackoverflow.com/a/14393315/1162888), etc.
439 436 if &directory_mtime >= status_start {
440 437 // The directory was modified too recently, don’t cache its
441 438 // `read_dir` results.
442 439 //
443 440 // A timeline like this is possible:
444 441 //
445 442 // 1. A change to this directory (direct child was
446 443 // added or removed) cause its mtime to be set
447 444 // (possibly truncated) to `directory_mtime`
448 445 // 2. This `status` algorithm calls `read_dir`
449 446 // 3. An other change is made to the same directory is
450 447 // made so that calling `read_dir` agin would give
451 448 // different results, but soon enough after 1. that
452 449 // the mtime stays the same
453 450 //
454 451 // On a system where the time resolution poor, this
455 452 // scenario is not unlikely if all three steps are caused
456 453 // by the same script.
457 454 } else {
458 455 // We’ve observed (through `status_start`) that time has
459 456 // “progressed” since `directory_mtime`, so any further
460 457 // change to this directory is extremely likely to cause a
461 458 // different mtime.
462 459 //
463 460 // Having the same mtime again is not entirely impossible
464 461 // since the system clock is not monotonous. It could jump
465 462 // backward to some point before `directory_mtime`, then a
466 463 // directory change could potentially happen during exactly
467 464 // the wrong tick.
468 465 //
469 466 // We deem this scenario (unlike the previous one) to be
470 467 // unlikely enough in practice.
471 468 let timestamp = directory_mtime.into();
472 469 let cached = dirstate_node.cached_directory_mtime();
473 470 if cached != Some(&timestamp) {
474 471 let hg_path = dirstate_node
475 472 .full_path_borrowed(self.dmap.on_disk)?
476 473 .detach_from_tree();
477 474 self.new_cachable_directories
478 475 .lock()
479 476 .unwrap()
480 477 .push((hg_path, timestamp))
481 478 }
482 479 }
483 480 }
484 481 }
485 482 Ok(())
486 483 }
487 484
488 485 /// A file with `EntryState::Normal` in the dirstate was found in the
489 486 /// filesystem
490 487 fn handle_normal_file(
491 488 &self,
492 489 dirstate_node: &NodeRef<'tree, 'on_disk>,
493 490 fs_metadata: &std::fs::Metadata,
494 491 ) -> Result<(), DirstateV2ParseError> {
495 492 // Keep the low 31 bits
496 493 fn truncate_u64(value: u64) -> i32 {
497 494 (value & 0x7FFF_FFFF) as i32
498 495 }
499 496 fn truncate_i64(value: i64) -> i32 {
500 497 (value & 0x7FFF_FFFF) as i32
501 498 }
502 499
503 500 let entry = dirstate_node
504 501 .entry()?
505 502 .expect("handle_normal_file called with entry-less node");
506 503 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
507 504 let mode_changed =
508 505 || self.options.check_exec && entry.mode_changed(fs_metadata);
509 506 let size = entry.size();
510 507 let size_changed = size != truncate_u64(fs_metadata.len());
511 508 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
512 509 // issue6456: Size returned may be longer due to encryption
513 510 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
514 511 self.outcome
515 512 .lock()
516 513 .unwrap()
517 514 .unsure
518 515 .push(hg_path.detach_from_tree())
519 516 } else if dirstate_node.has_copy_source()
520 517 || entry.is_from_other_parent()
521 518 || (size >= 0 && (size_changed || mode_changed()))
522 519 {
523 520 self.outcome
524 521 .lock()
525 522 .unwrap()
526 523 .modified
527 524 .push(hg_path.detach_from_tree())
528 525 } else {
529 526 let mtime = mtime_seconds(fs_metadata);
530 527 if truncate_i64(mtime) != entry.mtime()
531 528 || mtime == self.options.last_normal_time
532 529 {
533 530 self.outcome
534 531 .lock()
535 532 .unwrap()
536 533 .unsure
537 534 .push(hg_path.detach_from_tree())
538 535 } else if self.options.list_clean {
539 536 self.outcome
540 537 .lock()
541 538 .unwrap()
542 539 .clean
543 540 .push(hg_path.detach_from_tree())
544 541 }
545 542 }
546 543 Ok(())
547 544 }
548 545
549 546 /// A node in the dirstate tree has no corresponding filesystem entry
550 547 fn traverse_dirstate_only(
551 548 &self,
552 549 dirstate_node: NodeRef<'tree, 'on_disk>,
553 550 ) -> Result<(), DirstateV2ParseError> {
554 551 self.check_for_outdated_directory_cache(&dirstate_node)?;
555 552 self.mark_removed_or_deleted_if_file(
556 553 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
557 554 dirstate_node.state()?,
558 555 );
559 556 dirstate_node
560 557 .children(self.dmap.on_disk)?
561 558 .par_iter()
562 559 .map(|child_node| self.traverse_dirstate_only(child_node))
563 560 .collect()
564 561 }
565 562
566 563 /// A node in the dirstate tree has no corresponding *file* on the
567 564 /// filesystem
568 565 ///
569 566 /// Does nothing on a "directory" node
570 567 fn mark_removed_or_deleted_if_file(
571 568 &self,
572 569 hg_path: &BorrowedPath<'tree, 'on_disk>,
573 570 dirstate_node_state: Option<EntryState>,
574 571 ) {
575 572 if let Some(state) = dirstate_node_state {
576 573 if self.matcher.matches(hg_path) {
577 574 if let EntryState::Removed = state {
578 575 self.outcome
579 576 .lock()
580 577 .unwrap()
581 578 .removed
582 579 .push(hg_path.detach_from_tree())
583 580 } else {
584 581 self.outcome
585 582 .lock()
586 583 .unwrap()
587 584 .deleted
588 585 .push(hg_path.detach_from_tree())
589 586 }
590 587 }
591 588 }
592 589 }
593 590
594 591 /// Something in the filesystem has no corresponding dirstate node
595 592 ///
596 593 /// Returns whether that path is ignored
597 594 fn traverse_fs_only(
598 595 &self,
599 596 has_ignored_ancestor: bool,
600 597 directory_hg_path: &HgPath,
601 598 fs_entry: &DirEntry,
602 599 ) -> bool {
603 600 let hg_path = directory_hg_path.join(&fs_entry.base_name);
604 601 let file_type = fs_entry.metadata.file_type();
605 602 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
606 603 if file_type.is_dir() {
607 604 let is_ignored =
608 605 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
609 606 let traverse_children = if is_ignored {
610 607 // Descendants of an ignored directory are all ignored
611 608 self.options.list_ignored
612 609 } else {
613 610 // Descendants of an unknown directory may be either unknown or
614 611 // ignored
615 612 self.options.list_unknown || self.options.list_ignored
616 613 };
617 614 if traverse_children {
618 615 let is_at_repo_root = false;
619 616 if let Ok(children_fs_entries) = self.read_dir(
620 617 &hg_path,
621 618 &fs_entry.full_path,
622 619 is_at_repo_root,
623 620 ) {
624 621 children_fs_entries.par_iter().for_each(|child_fs_entry| {
625 622 self.traverse_fs_only(
626 623 is_ignored,
627 624 &hg_path,
628 625 child_fs_entry,
629 626 );
630 627 })
631 628 }
632 629 }
633 630 if self.options.collect_traversed_dirs {
634 631 self.outcome.lock().unwrap().traversed.push(hg_path.into())
635 632 }
636 633 is_ignored
637 634 } else {
638 635 if file_or_symlink {
639 636 if self.matcher.matches(&hg_path) {
640 637 self.mark_unknown_or_ignored(
641 638 has_ignored_ancestor,
642 639 &BorrowedPath::InMemory(&hg_path),
643 640 )
644 641 } else {
645 642 // We haven’t computed whether this path is ignored. It
646 643 // might not be, and a future run of status might have a
647 644 // different matcher that matches it. So treat it as not
648 645 // ignored. That is, inhibit readdir caching of the parent
649 646 // directory.
650 647 false
651 648 }
652 649 } else {
653 650 // This is neither a directory, a plain file, or a symlink.
654 651 // Treat it like an ignored file.
655 652 true
656 653 }
657 654 }
658 655 }
659 656
660 657 /// Returns whether that path is ignored
661 658 fn mark_unknown_or_ignored(
662 659 &self,
663 660 has_ignored_ancestor: bool,
664 661 hg_path: &BorrowedPath<'_, 'on_disk>,
665 662 ) -> bool {
666 663 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
667 664 if is_ignored {
668 665 if self.options.list_ignored {
669 666 self.outcome
670 667 .lock()
671 668 .unwrap()
672 669 .ignored
673 670 .push(hg_path.detach_from_tree())
674 671 }
675 672 } else {
676 673 if self.options.list_unknown {
677 674 self.outcome
678 675 .lock()
679 676 .unwrap()
680 677 .unknown
681 678 .push(hg_path.detach_from_tree())
682 679 }
683 680 }
684 681 is_ignored
685 682 }
686 683 }
687 684
688 685 #[cfg(unix)] // TODO
689 686 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
690 687 // Going through `Metadata::modified()` would be portable, but would take
691 688 // care to construct a `SystemTime` value with sub-second precision just
692 689 // for us to throw that away here.
693 690 use std::os::unix::fs::MetadataExt;
694 691 metadata.mtime()
695 692 }
696 693
697 694 struct DirEntry {
698 695 base_name: HgPathBuf,
699 696 full_path: PathBuf,
700 697 metadata: std::fs::Metadata,
701 698 }
702 699
703 700 impl DirEntry {
704 701 /// Returns **unsorted** entries in the given directory, with name and
705 702 /// metadata.
706 703 ///
707 704 /// If a `.hg` sub-directory is encountered:
708 705 ///
709 706 /// * At the repository root, ignore that sub-directory
710 707 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
711 708 /// list instead.
712 709 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
713 710 let mut results = Vec::new();
714 711 for entry in path.read_dir()? {
715 712 let entry = entry?;
716 713 let metadata = entry.metadata()?;
717 714 let name = get_bytes_from_os_string(entry.file_name());
718 715 // FIXME don't do this when cached
719 716 if name == b".hg" {
720 717 if is_at_repo_root {
721 718 // Skip the repo’s own .hg (might be a symlink)
722 719 continue;
723 720 } else if metadata.is_dir() {
724 721 // A .hg sub-directory at another location means a subrepo,
725 722 // skip it entirely.
726 723 return Ok(Vec::new());
727 724 }
728 725 }
729 726 results.push(DirEntry {
730 727 base_name: name.into(),
731 728 full_path: entry.path(),
732 729 metadata,
733 730 })
734 731 }
735 732 Ok(results)
736 733 }
737 734 }
738 735
739 736 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
740 737 /// of the give repository.
741 738 ///
742 739 /// This is similar to `SystemTime::now()`, with the result truncated to the
743 740 /// same time resolution as other files’ modification times. Using `.hg`
744 741 /// instead of the system’s default temporary directory (such as `/tmp`) makes
745 742 /// it more likely the temporary file is in the same disk partition as contents
746 743 /// of the working directory, which can matter since different filesystems may
747 744 /// store timestamps with different resolutions.
748 745 ///
749 746 /// This may fail, typically if we lack write permissions. In that case we
750 747 /// should continue the `status()` algoritm anyway and consider the current
751 748 /// date/time to be unknown.
752 749 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
753 750 tempfile::tempfile_in(repo_root.join(".hg"))?
754 751 .metadata()?
755 752 .modified()
756 753 }
General Comments 0
You need to be logged in to leave comments. Login now