Show More
@@ -23,6 +23,7 b' rayon = "1.3.0"' | |||
|
23 | 23 | regex = "1.3.9" |
|
24 | 24 | twox-hash = "1.5.0" |
|
25 | 25 | same-file = "1.0.6" |
|
26 | tempfile = "3.1.0" | |
|
26 | 27 | crossbeam-channel = "0.4" |
|
27 | 28 | micro-timer = "0.3.0" |
|
28 | 29 | log = "0.4.8" |
@@ -41,4 +42,3 b' default-features = false' | |||
|
41 | 42 | [dev-dependencies] |
|
42 | 43 | clap = "*" |
|
43 | 44 | pretty_assertions = "0.6.1" |
|
44 | tempfile = "3.1.0" |
@@ -317,6 +317,18 b" impl<'tree, 'on_disk> NodeRef<'tree, 'on" | |||
|
317 | 317 | } |
|
318 | 318 | } |
|
319 | 319 | |
|
320 | pub(super) fn cached_directory_mtime( | |
|
321 | &self, | |
|
322 | ) -> Option<&on_disk::Timestamp> { | |
|
323 | match self { | |
|
324 | NodeRef::InMemory(_path, node) => match &node.data { | |
|
325 | NodeData::CachedDirectory { mtime } => Some(mtime), | |
|
326 | _ => None, | |
|
327 | }, | |
|
328 | NodeRef::OnDisk(node) => node.cached_directory_mtime(), | |
|
329 | } | |
|
330 | } | |
|
331 | ||
|
320 | 332 | pub(super) fn tracked_descendants_count(&self) -> u32 { |
|
321 | 333 | match self { |
|
322 | 334 | NodeRef::InMemory(_path, node) => node.tracked_descendants_count, |
@@ -479,7 +491,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
479 | 491 | } |
|
480 | 492 | } |
|
481 | 493 | |
|
482 | fn get_or_insert_node<'tree, 'path>( | |
|
494 | pub(super) fn get_or_insert_node<'tree, 'path>( | |
|
483 | 495 | on_disk: &'on_disk [u8], |
|
484 | 496 | root: &'tree mut ChildNodes<'on_disk>, |
|
485 | 497 | path: &'path HgPath, |
@@ -56,13 +56,31 b' pub(super) struct Node {' | |||
|
56 | 56 | |
|
57 | 57 | /// Dependending on the value of `state`: |
|
58 | 58 | /// |
|
59 |
/// * A null byte: `data` |
|
|
59 | /// * A null byte: `data` is not used. | |
|
60 | /// | |
|
60 | 61 | /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together |
|
61 |
/// represent |
|
|
62 | /// represent a dirstate entry like in the v1 format. | |
|
63 | /// | |
|
62 | 64 | /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted |
|
63 | 65 | /// as the `Timestamp` for the mtime of a cached directory. |
|
64 | 66 | /// |
|
65 | /// TODO: document directory caching | |
|
67 | /// The presence of this state means that at some point, this path in | |
|
68 | /// the working directory was observed: | |
|
69 | /// | |
|
70 | /// - To be a directory | |
|
71 | /// - With the modification time as given by `Timestamp` | |
|
72 | /// - That timestamp was already strictly in the past when observed, | |
|
73 | /// meaning that later changes cannot happen in the same clock tick | |
|
74 | /// and must cause a different modification time (unless the system | |
|
75 | /// clock jumps back and we get unlucky, which is not impossible but | |
|
76 | /// but deemed unlikely enough). | |
|
77 | /// - The directory did not contain any child entry that did not have a | |
|
78 | /// corresponding dirstate node. | |
|
79 | /// | |
|
80 | /// This means that if `std::fs::symlink_metadata` later reports the | |
|
81 | /// same modification time, we don’t need to call `std::fs::read_dir` | |
|
82 | /// again for this directory and can iterate child dirstate nodes | |
|
83 | /// instead. | |
|
66 | 84 | state: u8, |
|
67 | 85 | data: Entry, |
|
68 | 86 | } |
@@ -76,7 +94,7 b' struct Entry {' | |||
|
76 | 94 | } |
|
77 | 95 | |
|
78 | 96 | /// Duration since the Unix epoch |
|
79 | #[derive(BytesCast, Copy, Clone)] | |
|
97 | #[derive(BytesCast, Copy, Clone, PartialEq)] | |
|
80 | 98 | #[repr(C)] |
|
81 | 99 | pub(super) struct Timestamp { |
|
82 | 100 | seconds: I64Be, |
@@ -258,6 +276,14 b' impl Node {' | |||
|
258 | 276 | } |
|
259 | 277 | } |
|
260 | 278 | |
|
279 | pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> { | |
|
280 | if self.state == b'd' { | |
|
281 | Some(self.data.as_timestamp()) | |
|
282 | } else { | |
|
283 | None | |
|
284 | } | |
|
285 | } | |
|
286 | ||
|
261 | 287 | pub(super) fn state( |
|
262 | 288 | &self, |
|
263 | 289 | ) -> Result<Option<EntryState>, DirstateV2ParseError> { |
@@ -326,8 +352,8 b' impl Entry {' | |||
|
326 | 352 | } |
|
327 | 353 | } |
|
328 | 354 | |
|
329 |
impl From< |
|
|
330 |
fn from(system_time: |
|
|
355 | impl From<SystemTime> for Timestamp { | |
|
356 | fn from(system_time: SystemTime) -> Self { | |
|
331 | 357 | let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) { |
|
332 | 358 | Ok(duration) => { |
|
333 | 359 | (duration.as_secs() as i64, duration.subsec_nanos()) |
@@ -2,8 +2,11 b' use crate::dirstate::status::IgnoreFnTyp' | |||
|
2 | 2 | use crate::dirstate_tree::dirstate_map::BorrowedPath; |
|
3 | 3 | use crate::dirstate_tree::dirstate_map::ChildNodesRef; |
|
4 | 4 | use crate::dirstate_tree::dirstate_map::DirstateMap; |
|
5 | use crate::dirstate_tree::dirstate_map::NodeData; | |
|
5 | 6 | use crate::dirstate_tree::dirstate_map::NodeRef; |
|
6 | 7 | use crate::dirstate_tree::on_disk::DirstateV2ParseError; |
|
8 | use crate::dirstate_tree::on_disk::Timestamp; | |
|
9 | use crate::dirstate_tree::path_with_basename::WithBasename; | |
|
7 | 10 | use crate::matchers::get_ignore_function; |
|
8 | 11 | use crate::matchers::Matcher; |
|
9 | 12 | use crate::utils::files::get_bytes_from_os_string; |
@@ -18,10 +21,12 b' use crate::StatusError;' | |||
|
18 | 21 | use crate::StatusOptions; |
|
19 | 22 | use micro_timer::timed; |
|
20 | 23 | use rayon::prelude::*; |
|
24 | use std::borrow::Cow; | |
|
21 | 25 | use std::io; |
|
22 | 26 | use std::path::Path; |
|
23 | 27 | use std::path::PathBuf; |
|
24 | 28 | use std::sync::Mutex; |
|
29 | use std::time::SystemTime; | |
|
25 | 30 | |
|
26 | 31 | /// Returns the status of the working directory compared to its parent |
|
27 | 32 | /// changeset. |
@@ -52,19 +57,45 b" pub fn status<'tree, 'on_disk: 'tree>(" | |||
|
52 | 57 | options, |
|
53 | 58 | matcher, |
|
54 | 59 | ignore_fn, |
|
55 |
outcome: |
|
|
60 | outcome: Default::default(), | |
|
61 | cached_directory_mtimes_to_add: Default::default(), | |
|
62 | filesystem_time_at_status_start: filesystem_now(&root_dir).ok(), | |
|
56 | 63 | }; |
|
57 | 64 | let is_at_repo_root = true; |
|
58 | 65 | let hg_path = &BorrowedPath::OnDisk(HgPath::new("")); |
|
59 | 66 | let has_ignored_ancestor = false; |
|
67 | let root_cached_mtime = None; | |
|
68 | let root_dir_metadata = None; | |
|
69 | // If the path we have for the repository root is a symlink, do follow it. | |
|
70 | // (As opposed to symlinks within the working directory which are not | |
|
71 | // followed, using `std::fs::symlink_metadata`.) | |
|
60 | 72 | common.traverse_fs_directory_and_dirstate( |
|
61 | 73 | has_ignored_ancestor, |
|
62 | 74 | dmap.root.as_ref(), |
|
63 | 75 | hg_path, |
|
64 | 76 | &root_dir, |
|
77 | root_dir_metadata, | |
|
78 | root_cached_mtime, | |
|
65 | 79 | is_at_repo_root, |
|
66 | 80 | )?; |
|
67 |
|
|
|
81 | let outcome = common.outcome.into_inner().unwrap(); | |
|
82 | let to_add = common.cached_directory_mtimes_to_add.into_inner().unwrap(); | |
|
83 | for (path, mtime) in &to_add { | |
|
84 | let node = DirstateMap::get_or_insert_node( | |
|
85 | dmap.on_disk, | |
|
86 | &mut dmap.root, | |
|
87 | path, | |
|
88 | WithBasename::to_cow_owned, | |
|
89 | |_| {}, | |
|
90 | )?; | |
|
91 | match &node.data { | |
|
92 | NodeData::Entry(_) => {} // Don’t overwrite an entry | |
|
93 | NodeData::CachedDirectory { .. } | NodeData::None => { | |
|
94 | node.data = NodeData::CachedDirectory { mtime: *mtime } | |
|
95 | } | |
|
96 | } | |
|
97 | } | |
|
98 | Ok((outcome, warnings)) | |
|
68 | 99 | } |
|
69 | 100 | |
|
70 | 101 | /// Bag of random things needed by various parts of the algorithm. Reduces the |
@@ -75,6 +106,12 b" struct StatusCommon<'a, 'tree, 'on_disk:" | |||
|
75 | 106 | matcher: &'a (dyn Matcher + Sync), |
|
76 | 107 | ignore_fn: IgnoreFnType<'a>, |
|
77 | 108 | outcome: Mutex<DirstateStatus<'on_disk>>, |
|
109 | cached_directory_mtimes_to_add: | |
|
110 | Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>, | |
|
111 | ||
|
112 | /// The current time at the start of the `status()` algorithm, as measured | |
|
113 | /// and possibly truncated by the filesystem. | |
|
114 | filesystem_time_at_status_start: Option<SystemTime>, | |
|
78 | 115 | } |
|
79 | 116 | |
|
80 | 117 | impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> { |
@@ -97,18 +134,54 b" impl<'a, 'tree, 'on_disk> StatusCommon<'" | |||
|
97 | 134 | .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) |
|
98 | 135 | } |
|
99 | 136 | |
|
137 | /// If this returns true, we can get accurate results by only using | |
|
138 | /// `symlink_metadata` for child nodes that exist in the dirstate and don’t | |
|
139 | /// need to call `read_dir`. | |
|
140 | fn can_skip_fs_readdir( | |
|
141 | &self, | |
|
142 | directory_metadata: Option<&std::fs::Metadata>, | |
|
143 | cached_directory_mtime: Option<&Timestamp>, | |
|
144 | ) -> bool { | |
|
145 | if !self.options.list_unknown && !self.options.list_ignored { | |
|
146 | // All states that we care about listing have corresponding | |
|
147 | // dirstate entries. | |
|
148 | // This happens for example with `hg status -mard`. | |
|
149 | return true; | |
|
150 | } | |
|
151 | if let Some(cached_mtime) = cached_directory_mtime { | |
|
152 | // The dirstate contains a cached mtime for this directory, set by | |
|
153 | // a previous run of the `status` algorithm which found this | |
|
154 | // directory eligible for `read_dir` caching. | |
|
155 | if let Some(meta) = directory_metadata { | |
|
156 | if let Ok(current_mtime) = meta.modified() { | |
|
157 | if current_mtime == cached_mtime.into() { | |
|
158 | // The mtime of that directory has not changed since | |
|
159 | // then, which means that the | |
|
160 | // results of `read_dir` should also | |
|
161 | // be unchanged. | |
|
162 | return true; | |
|
163 | } | |
|
164 | } | |
|
165 | } | |
|
166 | } | |
|
167 | false | |
|
168 | } | |
|
169 | ||
|
170 | /// Returns whether the filesystem directory was found to have any entry | |
|
171 | /// that does not have a corresponding dirstate tree node. | |
|
100 | 172 | fn traverse_fs_directory_and_dirstate( |
|
101 | 173 | &self, |
|
102 | 174 | has_ignored_ancestor: bool, |
|
103 | 175 | dirstate_nodes: ChildNodesRef<'tree, 'on_disk>, |
|
104 | 176 | directory_hg_path: &BorrowedPath<'tree, 'on_disk>, |
|
105 | 177 | directory_fs_path: &Path, |
|
178 | directory_metadata: Option<&std::fs::Metadata>, | |
|
179 | cached_directory_mtime: Option<&Timestamp>, | |
|
106 | 180 | is_at_repo_root: bool, |
|
107 |
) -> Result< |
|
|
108 | if !self.options.list_unknown && !self.options.list_ignored { | |
|
109 | // We only care about files in the dirstate, so we can skip listing | |
|
110 | // filesystem directories entirely. | |
|
111 | return dirstate_nodes | |
|
181 | ) -> Result<bool, DirstateV2ParseError> { | |
|
182 | if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime) | |
|
183 | { | |
|
184 | dirstate_nodes | |
|
112 | 185 | .par_iter() |
|
113 | 186 | .map(|dirstate_node| { |
|
114 | 187 | let fs_path = directory_fs_path.join(get_path_from_bytes( |
@@ -131,7 +204,13 b" impl<'a, 'tree, 'on_disk> StatusCommon<'" | |||
|
131 | 204 | } |
|
132 | 205 | } |
|
133 | 206 | }) |
|
134 | .collect(); | |
|
207 | .collect::<Result<_, _>>()?; | |
|
208 | ||
|
209 | // Conservatively don’t let the caller assume that there aren’t | |
|
210 | // any, since we don’t know. | |
|
211 | let directory_has_any_fs_only_entry = true; | |
|
212 | ||
|
213 | return Ok(directory_has_any_fs_only_entry); | |
|
135 | 214 | } |
|
136 | 215 | |
|
137 | 216 | let mut fs_entries = if let Ok(entries) = self.read_dir( |
@@ -174,6 +253,7 b" impl<'a, 'tree, 'on_disk> StatusCommon<'" | |||
|
174 | 253 | .par_bridge() |
|
175 | 254 | .map(|pair| { |
|
176 | 255 | use itertools::EitherOrBoth::*; |
|
256 | let is_fs_only = pair.is_right(); | |
|
177 | 257 | match pair { |
|
178 | 258 | Both(dirstate_node, fs_entry) => self |
|
179 | 259 | .traverse_fs_and_dirstate( |
@@ -181,18 +261,19 b" impl<'a, 'tree, 'on_disk> StatusCommon<'" | |||
|
181 | 261 | &fs_entry.metadata, |
|
182 | 262 | dirstate_node, |
|
183 | 263 | has_ignored_ancestor, |
|
184 | ), | |
|
264 | )?, | |
|
185 | 265 | Left(dirstate_node) => { |
|
186 | self.traverse_dirstate_only(dirstate_node) | |
|
266 | self.traverse_dirstate_only(dirstate_node)? | |
|
187 | 267 | } |
|
188 |
Right(fs_entry) => |
|
|
268 | Right(fs_entry) => self.traverse_fs_only( | |
|
189 | 269 | has_ignored_ancestor, |
|
190 | 270 | directory_hg_path, |
|
191 | 271 | fs_entry, |
|
192 |
) |
|
|
272 | ), | |
|
193 | 273 | } |
|
274 | Ok(is_fs_only) | |
|
194 | 275 | }) |
|
195 | .collect() | |
|
276 | .try_reduce(|| false, |a, b| Ok(a || b)) | |
|
196 | 277 | } |
|
197 | 278 | |
|
198 | 279 | fn traverse_fs_and_dirstate( |
@@ -224,12 +305,20 b" impl<'a, 'tree, 'on_disk> StatusCommon<'" | |||
|
224 | 305 | } |
|
225 | 306 | let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); |
|
226 | 307 | let is_at_repo_root = false; |
|
227 | self.traverse_fs_directory_and_dirstate( | |
|
308 | let directory_has_any_fs_only_entry = self | |
|
309 | .traverse_fs_directory_and_dirstate( | |
|
228 | 310 | is_ignored, |
|
229 | 311 | dirstate_node.children(self.dmap.on_disk)?, |
|
230 | 312 | hg_path, |
|
231 | 313 | fs_path, |
|
314 | Some(fs_metadata), | |
|
315 | dirstate_node.cached_directory_mtime(), | |
|
232 | 316 | is_at_repo_root, |
|
317 | )?; | |
|
318 | self.maybe_save_directory_mtime( | |
|
319 | directory_has_any_fs_only_entry, | |
|
320 | fs_metadata, | |
|
321 | dirstate_node, | |
|
233 | 322 | )? |
|
234 | 323 | } else { |
|
235 | 324 | if file_or_symlink && self.matcher.matches(hg_path) { |
@@ -274,6 +363,75 b" impl<'a, 'tree, 'on_disk> StatusCommon<'" | |||
|
274 | 363 | Ok(()) |
|
275 | 364 | } |
|
276 | 365 | |
|
366 | fn maybe_save_directory_mtime( | |
|
367 | &self, | |
|
368 | directory_has_any_fs_only_entry: bool, | |
|
369 | directory_metadata: &std::fs::Metadata, | |
|
370 | dirstate_node: NodeRef<'tree, 'on_disk>, | |
|
371 | ) -> Result<(), DirstateV2ParseError> { | |
|
372 | if !directory_has_any_fs_only_entry { | |
|
373 | // All filesystem directory entries from `read_dir` have a | |
|
374 | // corresponding node in the dirstate, so we can reconstitute the | |
|
375 | // names of those entries without calling `read_dir` again. | |
|
376 | if let (Some(status_start), Ok(directory_mtime)) = ( | |
|
377 | &self.filesystem_time_at_status_start, | |
|
378 | directory_metadata.modified(), | |
|
379 | ) { | |
|
380 | // Although the Rust standard library’s `SystemTime` type | |
|
381 | // has nanosecond precision, the times reported for a | |
|
382 | // directory’s (or file’s) modified time may have lower | |
|
383 | // resolution based on the filesystem (for example ext3 | |
|
384 | // only stores integer seconds), kernel (see | |
|
385 | // https://stackoverflow.com/a/14393315/1162888), etc. | |
|
386 | if &directory_mtime >= status_start { | |
|
387 | // The directory was modified too recently, don’t cache its | |
|
388 | // `read_dir` results. | |
|
389 | // | |
|
390 | // A timeline like this is possible: | |
|
391 | // | |
|
392 | // 1. A change to this directory (direct child was | |
|
393 | // added or removed) cause its mtime to be set | |
|
394 | // (possibly truncated) to `directory_mtime` | |
|
395 | // 2. This `status` algorithm calls `read_dir` | |
|
396 | // 3. An other change is made to the same directory is | |
|
397 | // made so that calling `read_dir` agin would give | |
|
398 | // different results, but soon enough after 1. that | |
|
399 | // the mtime stays the same | |
|
400 | // | |
|
401 | // On a system where the time resolution poor, this | |
|
402 | // scenario is not unlikely if all three steps are caused | |
|
403 | // by the same script. | |
|
404 | } else { | |
|
405 | // We’ve observed (through `status_start`) that time has | |
|
406 | // “progressed” since `directory_mtime`, so any further | |
|
407 | // change to this directory is extremely likely to cause a | |
|
408 | // different mtime. | |
|
409 | // | |
|
410 | // Having the same mtime again is not entirely impossible | |
|
411 | // since the system clock is not monotonous. It could jump | |
|
412 | // backward to some point before `directory_mtime`, then a | |
|
413 | // directory change could potentially happen during exactly | |
|
414 | // the wrong tick. | |
|
415 | // | |
|
416 | // We deem this scenario (unlike the previous one) to be | |
|
417 | // unlikely enough in practice. | |
|
418 | let timestamp = directory_mtime.into(); | |
|
419 | let cached = dirstate_node.cached_directory_mtime(); | |
|
420 | if cached != Some(×tamp) { | |
|
421 | let hg_path = dirstate_node | |
|
422 | .full_path_borrowed(self.dmap.on_disk)? | |
|
423 | .detach_from_tree(); | |
|
424 | self.cached_directory_mtimes_to_add | |
|
425 | .lock() | |
|
426 | .unwrap() | |
|
427 | .push((hg_path, timestamp)) | |
|
428 | } | |
|
429 | } | |
|
430 | } | |
|
431 | } | |
|
432 | Ok(()) | |
|
433 | } | |
|
434 | ||
|
277 | 435 | /// A file with `EntryState::Normal` in the dirstate was found in the |
|
278 | 436 | /// filesystem |
|
279 | 437 | fn handle_normal_file( |
@@ -505,3 +663,22 b' impl DirEntry {' | |||
|
505 | 663 | Ok(results) |
|
506 | 664 | } |
|
507 | 665 | } |
|
666 | ||
|
667 | /// Return the `mtime` of a temporary file newly-created in the `.hg` directory | |
|
668 | /// of the give repository. | |
|
669 | /// | |
|
670 | /// This is similar to `SystemTime::now()`, with the result truncated to the | |
|
671 | /// same time resolution as other files’ modification times. Using `.hg` | |
|
672 | /// instead of the system’s default temporary directory (such as `/tmp`) makes | |
|
673 | /// it more likely the temporary file is in the same disk partition as contents | |
|
674 | /// of the working directory, which can matter since different filesystems may | |
|
675 | /// store timestamps with different resolutions. | |
|
676 | /// | |
|
677 | /// This may fail, typically if we lack write permissions. In that case we | |
|
678 | /// should continue the `status()` algoritm anyway and consider the current | |
|
679 | /// date/time to be unknown. | |
|
680 | fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> { | |
|
681 | tempfile::tempfile_in(repo_root.join(".hg"))? | |
|
682 | .metadata()? | |
|
683 | .modified() | |
|
684 | } |
General Comments 0
You need to be logged in to leave comments.
Login now