Show More
@@ -1,379 +1,385 b'' | |||
|
1 | 1 | use crate::dirstate::status::IgnoreFnType; |
|
2 | 2 | use crate::dirstate_tree::dirstate_map::ChildNodes; |
|
3 | 3 | use crate::dirstate_tree::dirstate_map::DirstateMap; |
|
4 | 4 | use crate::dirstate_tree::dirstate_map::Node; |
|
5 | 5 | use crate::matchers::get_ignore_function; |
|
6 | 6 | use crate::matchers::Matcher; |
|
7 | 7 | use crate::utils::files::get_bytes_from_os_string; |
|
8 | 8 | use crate::utils::hg_path::HgPath; |
|
9 | 9 | use crate::DirstateStatus; |
|
10 | 10 | use crate::EntryState; |
|
11 | 11 | use crate::HgPathBuf; |
|
12 | 12 | use crate::PatternFileWarning; |
|
13 | 13 | use crate::StatusError; |
|
14 | 14 | use crate::StatusOptions; |
|
15 | 15 | use std::borrow::Cow; |
|
16 | 16 | use std::io; |
|
17 | 17 | use std::path::Path; |
|
18 | 18 | use std::path::PathBuf; |
|
19 | 19 | |
|
20 | 20 | /// Returns the status of the working directory compared to its parent |
|
21 | 21 | /// changeset. |
|
22 | 22 | /// |
|
23 | 23 | /// This algorithm is based on traversing the filesystem tree (`fs` in function |
|
24 | 24 | /// and variable names) and dirstate tree at the same time. The core of this |
|
25 | 25 | /// traversal is the recursive `traverse_fs_directory_and_dirstate` function |
|
26 | 26 | /// and its use of `itertools::merge_join_by`. When reaching a path that only |
|
27 | 27 | /// exists in one of the two trees, depending on information requested by |
|
28 | 28 | /// `options` we may need to traverse the remaining subtree. |
|
29 | 29 | pub fn status<'tree>( |
|
30 | 30 | dmap: &'tree mut DirstateMap, |
|
31 | 31 | matcher: &(dyn Matcher + Sync), |
|
32 | 32 | root_dir: PathBuf, |
|
33 | 33 | ignore_files: Vec<PathBuf>, |
|
34 | 34 | options: StatusOptions, |
|
35 | 35 | ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> { |
|
36 | 36 | let (ignore_fn, warnings): (IgnoreFnType, _) = |
|
37 | 37 | if options.list_ignored || options.list_unknown { |
|
38 | 38 | get_ignore_function(ignore_files, &root_dir)? |
|
39 | 39 | } else { |
|
40 | 40 | (Box::new(|&_| true), vec![]) |
|
41 | 41 | }; |
|
42 | 42 | |
|
43 | 43 | let mut common = StatusCommon { |
|
44 | 44 | options, |
|
45 | 45 | matcher, |
|
46 | 46 | ignore_fn, |
|
47 | 47 | outcome: DirstateStatus::default(), |
|
48 | 48 | }; |
|
49 | 49 | let is_at_repo_root = true; |
|
50 | 50 | let hg_path = HgPath::new(""); |
|
51 | 51 | let has_ignored_ancestor = false; |
|
52 | 52 | common.traverse_fs_directory_and_dirstate( |
|
53 | 53 | has_ignored_ancestor, |
|
54 | 54 | &mut dmap.root, |
|
55 | 55 | hg_path, |
|
56 | 56 | &root_dir, |
|
57 | 57 | is_at_repo_root, |
|
58 | 58 | ); |
|
59 | 59 | Ok((common.outcome, warnings)) |
|
60 | 60 | } |
|
61 | 61 | |
|
62 | 62 | /// Bag of random things needed by various parts of the algorithm. Reduces the |
|
63 | 63 | /// number of parameters passed to functions. |
|
64 | 64 | struct StatusCommon<'tree, 'a> { |
|
65 | 65 | options: StatusOptions, |
|
66 | 66 | matcher: &'a (dyn Matcher + Sync), |
|
67 | 67 | ignore_fn: IgnoreFnType<'a>, |
|
68 | 68 | outcome: DirstateStatus<'tree>, |
|
69 | 69 | } |
|
70 | 70 | |
|
71 | 71 | impl<'tree, 'a> StatusCommon<'tree, 'a> { |
|
72 | 72 | fn traverse_fs_directory_and_dirstate( |
|
73 | 73 | &mut self, |
|
74 | 74 | has_ignored_ancestor: bool, |
|
75 | 75 | dirstate_nodes: &'tree mut ChildNodes, |
|
76 | 76 | directory_hg_path: &HgPath, |
|
77 | 77 | fs_path: &Path, |
|
78 | 78 | is_at_repo_root: bool, |
|
79 | 79 | ) { |
|
80 | 80 | // TODO: handle I/O errors |
|
81 | 81 | let mut fs_entries = |
|
82 | 82 | DirEntry::read_dir(fs_path, is_at_repo_root).unwrap(); |
|
83 | 83 | |
|
84 | 84 | // `merge_join_by` requires both its input iterators to be sorted: |
|
85 | 85 | |
|
86 | 86 | // * `BTreeMap` iterates according to keys’ ordering by definition |
|
87 | 87 | |
|
88 | 88 | // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: |
|
89 | 89 | // https://github.com/rust-lang/rust/issues/34162 |
|
90 | 90 | fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); |
|
91 | 91 | |
|
92 | 92 | for pair in itertools::merge_join_by( |
|
93 | 93 | dirstate_nodes, |
|
94 | 94 | &fs_entries, |
|
95 | 95 | |(full_path, _node), fs_entry| { |
|
96 | 96 | full_path.base_name().cmp(&fs_entry.base_name) |
|
97 | 97 | }, |
|
98 | 98 | ) { |
|
99 | 99 | use itertools::EitherOrBoth::*; |
|
100 | 100 | match pair { |
|
101 | 101 | Both((hg_path, dirstate_node), fs_entry) => { |
|
102 | 102 | self.traverse_fs_and_dirstate( |
|
103 | 103 | fs_entry, |
|
104 | 104 | hg_path.full_path(), |
|
105 | 105 | dirstate_node, |
|
106 | 106 | has_ignored_ancestor, |
|
107 | 107 | ); |
|
108 | 108 | } |
|
109 | 109 | Left((hg_path, dirstate_node)) => self.traverse_dirstate_only( |
|
110 | 110 | hg_path.full_path(), |
|
111 | 111 | dirstate_node, |
|
112 | 112 | ), |
|
113 | 113 | Right(fs_entry) => self.traverse_fs_only( |
|
114 | 114 | has_ignored_ancestor, |
|
115 | 115 | directory_hg_path, |
|
116 | 116 | fs_entry, |
|
117 | 117 | ), |
|
118 | 118 | } |
|
119 | 119 | } |
|
120 | 120 | } |
|
121 | 121 | |
|
122 | 122 | fn traverse_fs_and_dirstate( |
|
123 | 123 | &mut self, |
|
124 | 124 | fs_entry: &DirEntry, |
|
125 | 125 | hg_path: &'tree HgPath, |
|
126 | 126 | dirstate_node: &'tree mut Node, |
|
127 | 127 | has_ignored_ancestor: bool, |
|
128 | 128 | ) { |
|
129 |
|
|
|
130 | if self.options.collect_traversed_dirs { | |
|
131 | self.outcome.traversed.push(hg_path.into()) | |
|
132 | } | |
|
129 | let file_type = fs_entry.metadata.file_type(); | |
|
130 | let file_or_symlink = file_type.is_file() || file_type.is_symlink(); | |
|
131 | if !file_or_symlink { | |
|
133 | 132 | // If we previously had a file here, it was removed (with |
|
134 | 133 | // `hg rm` or similar) or deleted before it could be |
|
135 | // replaced by a directory. | |
|
134 | // replaced by a directory or something else. | |
|
136 | 135 | self.mark_removed_or_deleted_if_file( |
|
137 | 136 | hg_path, |
|
138 | 137 | dirstate_node.state(), |
|
139 | 138 | ); |
|
139 | } | |
|
140 | if file_type.is_dir() { | |
|
141 | if self.options.collect_traversed_dirs { | |
|
142 | self.outcome.traversed.push(hg_path.into()) | |
|
143 | } | |
|
140 | 144 | let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); |
|
141 | 145 | let is_at_repo_root = false; |
|
142 | 146 | self.traverse_fs_directory_and_dirstate( |
|
143 | 147 | is_ignored, |
|
144 | 148 | &mut dirstate_node.children, |
|
145 | 149 | hg_path, |
|
146 | 150 | &fs_entry.full_path, |
|
147 | 151 | is_at_repo_root, |
|
148 | 152 | ); |
|
149 | 153 | } else { |
|
150 | if self.matcher.matches(hg_path) { | |
|
154 | if file_or_symlink && self.matcher.matches(hg_path) { | |
|
151 | 155 | let full_path = Cow::from(hg_path); |
|
152 | 156 | if let Some(entry) = &dirstate_node.entry { |
|
153 | 157 | match entry.state { |
|
154 | 158 | EntryState::Added => { |
|
155 | 159 | self.outcome.added.push(full_path) |
|
156 | 160 | } |
|
157 | 161 | EntryState::Removed => { |
|
158 | 162 | self.outcome.removed.push(full_path) |
|
159 | 163 | } |
|
160 | 164 | EntryState::Merged => { |
|
161 | 165 | self.outcome.modified.push(full_path) |
|
162 | 166 | } |
|
163 | 167 | EntryState::Normal => { |
|
164 | 168 | self.handle_normal_file( |
|
165 | 169 | full_path, |
|
166 | 170 | dirstate_node, |
|
167 | 171 | entry, |
|
168 | 172 | fs_entry, |
|
169 | 173 | ); |
|
170 | 174 | } |
|
171 | 175 | // This variant is not used in DirstateMap |
|
172 | 176 | // nodes |
|
173 | 177 | EntryState::Unknown => unreachable!(), |
|
174 | 178 | } |
|
175 | 179 | } else { |
|
176 | 180 | // `node.entry.is_none()` indicates a "directory" |
|
177 | 181 | // node, but the filesystem has a file |
|
178 | 182 | self.mark_unknown_or_ignored( |
|
179 | 183 | has_ignored_ancestor, |
|
180 | 184 | full_path, |
|
181 | 185 | ) |
|
182 | 186 | } |
|
183 | 187 | } |
|
184 | 188 | |
|
185 | 189 | for (child_hg_path, child_node) in &mut dirstate_node.children { |
|
186 | 190 | self.traverse_dirstate_only( |
|
187 | 191 | child_hg_path.full_path(), |
|
188 | 192 | child_node, |
|
189 | 193 | ) |
|
190 | 194 | } |
|
191 | 195 | } |
|
192 | 196 | } |
|
193 | 197 | |
|
194 | 198 | /// A file with `EntryState::Normal` in the dirstate was found in the |
|
195 | 199 | /// filesystem |
|
196 | 200 | fn handle_normal_file( |
|
197 | 201 | &mut self, |
|
198 | 202 | full_path: Cow<'tree, HgPath>, |
|
199 | 203 | dirstate_node: &Node, |
|
200 | 204 | entry: &crate::DirstateEntry, |
|
201 | 205 | fs_entry: &DirEntry, |
|
202 | 206 | ) { |
|
203 | 207 | // Keep the low 31 bits |
|
204 | 208 | fn truncate_u64(value: u64) -> i32 { |
|
205 | 209 | (value & 0x7FFF_FFFF) as i32 |
|
206 | 210 | } |
|
207 | 211 | fn truncate_i64(value: i64) -> i32 { |
|
208 | 212 | (value & 0x7FFF_FFFF) as i32 |
|
209 | 213 | } |
|
210 | 214 | |
|
211 | 215 | let mode_changed = || { |
|
212 | 216 | self.options.check_exec && entry.mode_changed(&fs_entry.metadata) |
|
213 | 217 | }; |
|
214 | 218 | let size_changed = entry.size != truncate_u64(fs_entry.metadata.len()); |
|
215 | 219 | if entry.size >= 0 |
|
216 | 220 | && size_changed |
|
217 | 221 | && fs_entry.metadata.file_type().is_symlink() |
|
218 | 222 | { |
|
219 | 223 | // issue6456: Size returned may be longer due to encryption |
|
220 | 224 | // on EXT-4 fscrypt. TODO maybe only do it on EXT4? |
|
221 | 225 | self.outcome.unsure.push(full_path) |
|
222 | 226 | } else if dirstate_node.copy_source.is_some() |
|
223 | 227 | || entry.is_from_other_parent() |
|
224 | 228 | || (entry.size >= 0 && (size_changed || mode_changed())) |
|
225 | 229 | { |
|
226 | 230 | self.outcome.modified.push(full_path) |
|
227 | 231 | } else { |
|
228 | 232 | let mtime = mtime_seconds(&fs_entry.metadata); |
|
229 | 233 | if truncate_i64(mtime) != entry.mtime |
|
230 | 234 | || mtime == self.options.last_normal_time |
|
231 | 235 | { |
|
232 | 236 | self.outcome.unsure.push(full_path) |
|
233 | 237 | } else if self.options.list_clean { |
|
234 | 238 | self.outcome.clean.push(full_path) |
|
235 | 239 | } |
|
236 | 240 | } |
|
237 | 241 | } |
|
238 | 242 | |
|
239 | 243 | /// A node in the dirstate tree has no corresponding filesystem entry |
|
240 | 244 | fn traverse_dirstate_only( |
|
241 | 245 | &mut self, |
|
242 | 246 | hg_path: &'tree HgPath, |
|
243 | 247 | dirstate_node: &'tree mut Node, |
|
244 | 248 | ) { |
|
245 | 249 | self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state()); |
|
246 | 250 | for (child_hg_path, child_node) in &mut dirstate_node.children { |
|
247 | 251 | self.traverse_dirstate_only(child_hg_path.full_path(), child_node) |
|
248 | 252 | } |
|
249 | 253 | } |
|
250 | 254 | |
|
251 | 255 | /// A node in the dirstate tree has no corresponding *file* on the |
|
252 | 256 | /// filesystem |
|
253 | 257 | /// |
|
254 | 258 | /// Does nothing on a "directory" node |
|
255 | 259 | fn mark_removed_or_deleted_if_file( |
|
256 | 260 | &mut self, |
|
257 | 261 | hg_path: &'tree HgPath, |
|
258 | 262 | dirstate_node_state: Option<EntryState>, |
|
259 | 263 | ) { |
|
260 | 264 | if let Some(state) = dirstate_node_state { |
|
261 | 265 | if self.matcher.matches(hg_path) { |
|
262 | 266 | if let EntryState::Removed = state { |
|
263 | 267 | self.outcome.removed.push(hg_path.into()) |
|
264 | 268 | } else { |
|
265 | 269 | self.outcome.deleted.push(hg_path.into()) |
|
266 | 270 | } |
|
267 | 271 | } |
|
268 | 272 | } |
|
269 | 273 | } |
|
270 | 274 | |
|
271 | 275 | /// Something in the filesystem has no corresponding dirstate node |
|
272 | 276 | fn traverse_fs_only( |
|
273 | 277 | &mut self, |
|
274 | 278 | has_ignored_ancestor: bool, |
|
275 | 279 | directory_hg_path: &HgPath, |
|
276 | 280 | fs_entry: &DirEntry, |
|
277 | 281 | ) { |
|
278 | 282 | let hg_path = directory_hg_path.join(&fs_entry.base_name); |
|
279 |
|
|
|
283 | let file_type = fs_entry.metadata.file_type(); | |
|
284 | let file_or_symlink = file_type.is_file() || file_type.is_symlink(); | |
|
285 | if file_type.is_dir() { | |
|
280 | 286 | let is_ignored = |
|
281 | 287 | has_ignored_ancestor || (self.ignore_fn)(&hg_path); |
|
282 | 288 | let traverse_children = if is_ignored { |
|
283 | 289 | // Descendants of an ignored directory are all ignored |
|
284 | 290 | self.options.list_ignored |
|
285 | 291 | } else { |
|
286 | 292 | // Descendants of an unknown directory may be either unknown or |
|
287 | 293 | // ignored |
|
288 | 294 | self.options.list_unknown || self.options.list_ignored |
|
289 | 295 | }; |
|
290 | 296 | if traverse_children { |
|
291 | 297 | let is_at_repo_root = false; |
|
292 | 298 | // TODO: handle I/O errors |
|
293 | 299 | let children_fs_entries = |
|
294 | 300 | DirEntry::read_dir(&fs_entry.full_path, is_at_repo_root) |
|
295 | 301 | .unwrap(); |
|
296 | 302 | for child_fs_entry in children_fs_entries { |
|
297 | 303 | self.traverse_fs_only( |
|
298 | 304 | is_ignored, |
|
299 | 305 | &hg_path, |
|
300 | 306 | &child_fs_entry, |
|
301 | 307 | ) |
|
302 | 308 | } |
|
303 | 309 | } |
|
304 | 310 | if self.options.collect_traversed_dirs { |
|
305 | 311 | self.outcome.traversed.push(hg_path.into()) |
|
306 | 312 | } |
|
307 | } else if self.matcher.matches(&hg_path) { | |
|
313 | } else if file_or_symlink && self.matcher.matches(&hg_path) { | |
|
308 | 314 | self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into()) |
|
309 | 315 | } |
|
310 | 316 | } |
|
311 | 317 | |
|
312 | 318 | fn mark_unknown_or_ignored( |
|
313 | 319 | &mut self, |
|
314 | 320 | has_ignored_ancestor: bool, |
|
315 | 321 | hg_path: Cow<'tree, HgPath>, |
|
316 | 322 | ) { |
|
317 | 323 | let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path); |
|
318 | 324 | if is_ignored { |
|
319 | 325 | if self.options.list_ignored { |
|
320 | 326 | self.outcome.ignored.push(hg_path) |
|
321 | 327 | } |
|
322 | 328 | } else { |
|
323 | 329 | if self.options.list_unknown { |
|
324 | 330 | self.outcome.unknown.push(hg_path) |
|
325 | 331 | } |
|
326 | 332 | } |
|
327 | 333 | } |
|
328 | 334 | } |
|
329 | 335 | |
|
330 | 336 | #[cfg(unix)] // TODO |
|
331 | 337 | fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 { |
|
332 | 338 | // Going through `Metadata::modified()` would be portable, but would take |
|
333 | 339 | // care to construct a `SystemTime` value with sub-second precision just |
|
334 | 340 | // for us to throw that away here. |
|
335 | 341 | use std::os::unix::fs::MetadataExt; |
|
336 | 342 | metadata.mtime() |
|
337 | 343 | } |
|
338 | 344 | |
|
339 | 345 | struct DirEntry { |
|
340 | 346 | base_name: HgPathBuf, |
|
341 | 347 | full_path: PathBuf, |
|
342 | 348 | metadata: std::fs::Metadata, |
|
343 | 349 | } |
|
344 | 350 | |
|
345 | 351 | impl DirEntry { |
|
346 | 352 | /// Returns **unsorted** entries in the given directory, with name and |
|
347 | 353 | /// metadata. |
|
348 | 354 | /// |
|
349 | 355 | /// If a `.hg` sub-directory is encountered: |
|
350 | 356 | /// |
|
351 | 357 | /// * At the repository root, ignore that sub-directory |
|
352 | 358 | /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty |
|
353 | 359 | /// list instead. |
|
354 | 360 | fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> { |
|
355 | 361 | let mut results = Vec::new(); |
|
356 | 362 | for entry in path.read_dir()? { |
|
357 | 363 | let entry = entry?; |
|
358 | 364 | let metadata = entry.metadata()?; |
|
359 | 365 | let name = get_bytes_from_os_string(entry.file_name()); |
|
360 | 366 | // FIXME don't do this when cached |
|
361 | 367 | if name == b".hg" { |
|
362 | 368 | if is_at_repo_root { |
|
363 | 369 | // Skip the repo’s own .hg (might be a symlink) |
|
364 | 370 | continue; |
|
365 | 371 | } else if metadata.is_dir() { |
|
366 | 372 | // A .hg sub-directory at another location means a subrepo, |
|
367 | 373 | // skip it entirely. |
|
368 | 374 | return Ok(Vec::new()); |
|
369 | 375 | } |
|
370 | 376 | } |
|
371 | 377 | results.push(DirEntry { |
|
372 | 378 | base_name: name.into(), |
|
373 | 379 | full_path: entry.path(), |
|
374 | 380 | metadata, |
|
375 | 381 | }) |
|
376 | 382 | } |
|
377 | 383 | Ok(results) |
|
378 | 384 | } |
|
379 | 385 | } |
General Comments 0
You need to be logged in to leave comments.
Login now