##// END OF EJS Templates
dirstate-v2: Apply SECOND_AMBIGUOUS to directory mtimes too...
Simon Sapin -
r49332:4afb9627 default
parent child Browse files
Show More
@@ -1,154 +1,154 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 use crate::dirstate::entry::TruncatedTimestamp;
12 13 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
13
14 14 use crate::{
15 15 utils::hg_path::{HgPath, HgPathError},
16 16 PatternError,
17 17 };
18 18
19 19 use std::{borrow::Cow, fmt};
20 20
21 21 /// Wrong type of file from a `BadMatch`
22 22 /// Note: a lot of those don't exist on all platforms.
23 23 #[derive(Debug, Copy, Clone)]
24 24 pub enum BadType {
25 25 CharacterDevice,
26 26 BlockDevice,
27 27 FIFO,
28 28 Socket,
29 29 Directory,
30 30 Unknown,
31 31 }
32 32
33 33 impl fmt::Display for BadType {
34 34 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
35 35 f.write_str(match self {
36 36 BadType::CharacterDevice => "character device",
37 37 BadType::BlockDevice => "block device",
38 38 BadType::FIFO => "fifo",
39 39 BadType::Socket => "socket",
40 40 BadType::Directory => "directory",
41 41 BadType::Unknown => "unknown",
42 42 })
43 43 }
44 44 }
45 45
46 46 /// Was explicitly matched but cannot be found/accessed
47 47 #[derive(Debug, Copy, Clone)]
48 48 pub enum BadMatch {
49 49 OsError(i32),
50 50 BadType(BadType),
51 51 }
52 52
53 53 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait + 'static>`, so add
54 54 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
55 55 pub type IgnoreFnType<'a> =
56 56 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
57 57
58 58 /// We have a good mix of owned (from directory traversal) and borrowed (from
59 59 /// the dirstate/explicit) paths, this comes up a lot.
60 60 pub type HgPathCow<'a> = Cow<'a, HgPath>;
61 61
62 62 #[derive(Debug, Copy, Clone)]
63 63 pub struct StatusOptions {
64 64 /// Whether we are on a filesystem with UNIX-like exec flags
65 65 pub check_exec: bool,
66 66 pub list_clean: bool,
67 67 pub list_unknown: bool,
68 68 pub list_ignored: bool,
69 69 /// Whether to populate `StatusPath::copy_source`
70 70 pub list_copies: bool,
71 71 /// Whether to collect traversed dirs for applying a callback later.
72 72 /// Used by `hg purge` for example.
73 73 pub collect_traversed_dirs: bool,
74 74 }
75 75
76 76 #[derive(Default)]
77 77 pub struct DirstateStatus<'a> {
78 78 /// The current time at the start of the `status()` algorithm, as measured
79 79 /// and possibly truncated by the filesystem.
80 pub filesystem_time_at_status_start: Option<std::time::SystemTime>,
80 pub filesystem_time_at_status_start: Option<TruncatedTimestamp>,
81 81
82 82 /// Tracked files whose contents have changed since the parent revision
83 83 pub modified: Vec<StatusPath<'a>>,
84 84
85 85 /// Newly-tracked files that were not present in the parent
86 86 pub added: Vec<StatusPath<'a>>,
87 87
88 88 /// Previously-tracked files that have been (re)moved with an hg command
89 89 pub removed: Vec<StatusPath<'a>>,
90 90
91 91 /// (Still) tracked files that are missing, (re)moved with an non-hg
92 92 /// command
93 93 pub deleted: Vec<StatusPath<'a>>,
94 94
95 95 /// Tracked files that are up to date with the parent.
96 96 /// Only pupulated if `StatusOptions::list_clean` is true.
97 97 pub clean: Vec<StatusPath<'a>>,
98 98
99 99 /// Files in the working directory that are ignored with `.hgignore`.
100 100 /// Only pupulated if `StatusOptions::list_ignored` is true.
101 101 pub ignored: Vec<StatusPath<'a>>,
102 102
103 103 /// Files in the working directory that are neither tracked nor ignored.
104 104 /// Only pupulated if `StatusOptions::list_unknown` is true.
105 105 pub unknown: Vec<StatusPath<'a>>,
106 106
107 107 /// Was explicitly matched but cannot be found/accessed
108 108 pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
109 109
110 110 /// Either clean or modified, but we can’t tell from filesystem metadata
111 111 /// alone. The file contents need to be read and compared with that in
112 112 /// the parent.
113 113 pub unsure: Vec<StatusPath<'a>>,
114 114
115 115 /// Only filled if `collect_traversed_dirs` is `true`
116 116 pub traversed: Vec<HgPathCow<'a>>,
117 117
118 118 /// Whether `status()` made changed to the `DirstateMap` that should be
119 119 /// written back to disk
120 120 pub dirty: bool,
121 121 }
122 122
123 123 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
124 124 pub struct StatusPath<'a> {
125 125 pub path: HgPathCow<'a>,
126 126 pub copy_source: Option<HgPathCow<'a>>,
127 127 }
128 128
129 129 #[derive(Debug, derive_more::From)]
130 130 pub enum StatusError {
131 131 /// Generic IO error
132 132 IO(std::io::Error),
133 133 /// An invalid path that cannot be represented in Mercurial was found
134 134 Path(HgPathError),
135 135 /// An invalid "ignore" pattern was found
136 136 Pattern(PatternError),
137 137 /// Corrupted dirstate
138 138 DirstateV2ParseError(DirstateV2ParseError),
139 139 }
140 140
141 141 pub type StatusResult<T> = Result<T, StatusError>;
142 142
143 143 impl fmt::Display for StatusError {
144 144 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
145 145 match self {
146 146 StatusError::IO(error) => error.fmt(f),
147 147 StatusError::Path(error) => error.fmt(f),
148 148 StatusError::Pattern(error) => error.fmt(f),
149 149 StatusError::DirstateV2ParseError(_) => {
150 150 f.write_str("dirstate-v2 parse error")
151 151 }
152 152 }
153 153 }
154 154 }
@@ -1,837 +1,843 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use rand::Rng;
18 18 use std::borrow::Cow;
19 19 use std::convert::{TryFrom, TryInto};
20 20 use std::fmt::Write;
21 21
22 22 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 23 /// This a redundant sanity check more than an actual "magic number" since
24 24 /// `.hg/requires` already governs which format should be used.
25 25 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26 26
27 27 /// Keep space for 256-bit hashes
28 28 const STORED_NODE_ID_BYTES: usize = 32;
29 29
30 30 /// … even though only 160 bits are used for now, with SHA-1
31 31 const USED_NODE_ID_BYTES: usize = 20;
32 32
33 33 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 34 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35 35
36 36 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 37 const TREE_METADATA_SIZE: usize = 44;
38 38 const NODE_SIZE: usize = 44;
39 39
40 40 /// Make sure that size-affecting changes are made knowingly
41 41 #[allow(unused)]
42 42 fn static_assert_size_of() {
43 43 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 44 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 45 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 46 }
47 47
48 48 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 49 #[derive(BytesCast)]
50 50 #[repr(C)]
51 51 struct DocketHeader {
52 52 marker: [u8; V2_FORMAT_MARKER.len()],
53 53 parent_1: [u8; STORED_NODE_ID_BYTES],
54 54 parent_2: [u8; STORED_NODE_ID_BYTES],
55 55
56 56 metadata: TreeMetadata,
57 57
58 58 /// Counted in bytes
59 59 data_size: Size,
60 60
61 61 uuid_size: u8,
62 62 }
63 63
64 64 pub struct Docket<'on_disk> {
65 65 header: &'on_disk DocketHeader,
66 66 pub uuid: &'on_disk [u8],
67 67 }
68 68
69 69 /// Fields are documented in the *Tree metadata in the docket file*
70 70 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 71 #[derive(BytesCast)]
72 72 #[repr(C)]
73 73 pub struct TreeMetadata {
74 74 root_nodes: ChildNodes,
75 75 nodes_with_entry_count: Size,
76 76 nodes_with_copy_source_count: Size,
77 77 unreachable_bytes: Size,
78 78 unused: [u8; 4],
79 79
80 80 /// See *Optional hash of ignore patterns* section of
81 81 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 82 ignore_patterns_hash: IgnorePatternsHash,
83 83 }
84 84
85 85 /// Fields are documented in the *The data file format*
86 86 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 87 #[derive(BytesCast)]
88 88 #[repr(C)]
89 89 pub(super) struct Node {
90 90 full_path: PathSlice,
91 91
92 92 /// In bytes from `self.full_path.start`
93 93 base_name_start: PathSize,
94 94
95 95 copy_source: OptPathSlice,
96 96 children: ChildNodes,
97 97 pub(super) descendants_with_entry_count: Size,
98 98 pub(super) tracked_descendants_count: Size,
99 99 flags: U16Be,
100 100 size: U32Be,
101 101 mtime: PackedTruncatedTimestamp,
102 102 }
103 103
104 104 bitflags! {
105 105 #[repr(C)]
106 106 struct Flags: u16 {
107 107 const WDIR_TRACKED = 1 << 0;
108 108 const P1_TRACKED = 1 << 1;
109 109 const P2_INFO = 1 << 2;
110 110 const MODE_EXEC_PERM = 1 << 3;
111 111 const MODE_IS_SYMLINK = 1 << 4;
112 112 const HAS_FALLBACK_EXEC = 1 << 5;
113 113 const FALLBACK_EXEC = 1 << 6;
114 114 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 115 const FALLBACK_SYMLINK = 1 << 8;
116 116 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 117 const HAS_MODE_AND_SIZE = 1 <<10;
118 118 const HAS_MTIME = 1 <<11;
119 119 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 120 const DIRECTORY = 1 <<13;
121 121 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 122 const ALL_IGNORED_RECORDED = 1 <<15;
123 123 }
124 124 }
125 125
126 126 /// Duration since the Unix epoch
127 127 #[derive(BytesCast, Copy, Clone)]
128 128 #[repr(C)]
129 129 struct PackedTruncatedTimestamp {
130 130 truncated_seconds: U32Be,
131 131 nanoseconds: U32Be,
132 132 }
133 133
134 134 /// Counted in bytes from the start of the file
135 135 ///
136 136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 137 type Offset = U32Be;
138 138
139 139 /// Counted in number of items
140 140 ///
141 141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 142 type Size = U32Be;
143 143
144 144 /// Counted in bytes
145 145 ///
146 146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 147 type PathSize = U16Be;
148 148
149 149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 150 /// of either some other node or of the repository root.
151 151 ///
152 152 /// Always sorted by ascending `full_path`, to allow binary search.
153 153 /// Since nodes with the same parent nodes also have the same parent path,
154 154 /// only the `base_name`s need to be compared during binary search.
155 155 #[derive(BytesCast, Copy, Clone)]
156 156 #[repr(C)]
157 157 struct ChildNodes {
158 158 start: Offset,
159 159 len: Size,
160 160 }
161 161
162 162 /// A `HgPath` of `len` bytes
163 163 #[derive(BytesCast, Copy, Clone)]
164 164 #[repr(C)]
165 165 struct PathSlice {
166 166 start: Offset,
167 167 len: PathSize,
168 168 }
169 169
170 170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 171 type OptPathSlice = PathSlice;
172 172
173 173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 174 ///
175 175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 176 #[derive(Debug)]
177 177 pub struct DirstateV2ParseError;
178 178
179 179 impl From<DirstateV2ParseError> for HgError {
180 180 fn from(_: DirstateV2ParseError) -> Self {
181 181 HgError::corrupted("dirstate-v2 parse error")
182 182 }
183 183 }
184 184
185 185 impl From<DirstateV2ParseError> for crate::DirstateError {
186 186 fn from(error: DirstateV2ParseError) -> Self {
187 187 HgError::from(error).into()
188 188 }
189 189 }
190 190
191 191 impl TreeMetadata {
192 192 pub fn as_bytes(&self) -> &[u8] {
193 193 BytesCast::as_bytes(self)
194 194 }
195 195 }
196 196
197 197 impl<'on_disk> Docket<'on_disk> {
198 198 /// Generate the identifier for a new data file
199 199 ///
200 200 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
201 201 /// See `mercurial/revlogutils/docket.py`
202 202 pub fn new_uid() -> String {
203 203 const ID_LENGTH: usize = 8;
204 204 let mut id = String::with_capacity(ID_LENGTH);
205 205 let mut rng = rand::thread_rng();
206 206 for _ in 0..ID_LENGTH {
207 207 // One random hexadecimal digit.
208 208 // `unwrap` never panics because `impl Write for String`
209 209 // never returns an error.
210 210 write!(&mut id, "{:x}", rng.gen_range(0, 16)).unwrap();
211 211 }
212 212 id
213 213 }
214 214
215 215 pub fn serialize(
216 216 parents: DirstateParents,
217 217 tree_metadata: TreeMetadata,
218 218 data_size: u64,
219 219 uuid: &[u8],
220 220 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
221 221 let header = DocketHeader {
222 222 marker: *V2_FORMAT_MARKER,
223 223 parent_1: parents.p1.pad_to_256_bits(),
224 224 parent_2: parents.p2.pad_to_256_bits(),
225 225 metadata: tree_metadata,
226 226 data_size: u32::try_from(data_size)?.into(),
227 227 uuid_size: uuid.len().try_into()?,
228 228 };
229 229 let header = header.as_bytes();
230 230 let mut docket = Vec::with_capacity(header.len() + uuid.len());
231 231 docket.extend_from_slice(header);
232 232 docket.extend_from_slice(uuid);
233 233 Ok(docket)
234 234 }
235 235
236 236 pub fn parents(&self) -> DirstateParents {
237 237 use crate::Node;
238 238 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
239 239 .unwrap()
240 240 .clone();
241 241 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
242 242 .unwrap()
243 243 .clone();
244 244 DirstateParents { p1, p2 }
245 245 }
246 246
247 247 pub fn tree_metadata(&self) -> &[u8] {
248 248 self.header.metadata.as_bytes()
249 249 }
250 250
251 251 pub fn data_size(&self) -> usize {
252 252 // This `unwrap` could only panic on a 16-bit CPU
253 253 self.header.data_size.get().try_into().unwrap()
254 254 }
255 255
256 256 pub fn data_filename(&self) -> String {
257 257 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
258 258 }
259 259 }
260 260
261 261 pub fn read_docket(
262 262 on_disk: &[u8],
263 263 ) -> Result<Docket<'_>, DirstateV2ParseError> {
264 264 let (header, uuid) =
265 265 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
266 266 let uuid_size = header.uuid_size as usize;
267 267 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
268 268 Ok(Docket { header, uuid })
269 269 } else {
270 270 Err(DirstateV2ParseError)
271 271 }
272 272 }
273 273
274 274 pub(super) fn read<'on_disk>(
275 275 on_disk: &'on_disk [u8],
276 276 metadata: &[u8],
277 277 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
278 278 if on_disk.is_empty() {
279 279 return Ok(DirstateMap::empty(on_disk));
280 280 }
281 281 let (meta, _) = TreeMetadata::from_bytes(metadata)
282 282 .map_err(|_| DirstateV2ParseError)?;
283 283 let dirstate_map = DirstateMap {
284 284 on_disk,
285 285 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
286 286 on_disk,
287 287 meta.root_nodes,
288 288 )?),
289 289 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
290 290 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
291 291 ignore_patterns_hash: meta.ignore_patterns_hash,
292 292 unreachable_bytes: meta.unreachable_bytes.get(),
293 293 };
294 294 Ok(dirstate_map)
295 295 }
296 296
297 297 impl Node {
298 298 pub(super) fn full_path<'on_disk>(
299 299 &self,
300 300 on_disk: &'on_disk [u8],
301 301 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
302 302 read_hg_path(on_disk, self.full_path)
303 303 }
304 304
305 305 pub(super) fn base_name_start<'on_disk>(
306 306 &self,
307 307 ) -> Result<usize, DirstateV2ParseError> {
308 308 let start = self.base_name_start.get();
309 309 if start < self.full_path.len.get() {
310 310 let start = usize::try_from(start)
311 311 // u32 -> usize, could only panic on a 16-bit CPU
312 312 .expect("dirstate-v2 base_name_start out of bounds");
313 313 Ok(start)
314 314 } else {
315 315 Err(DirstateV2ParseError)
316 316 }
317 317 }
318 318
319 319 pub(super) fn base_name<'on_disk>(
320 320 &self,
321 321 on_disk: &'on_disk [u8],
322 322 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
323 323 let full_path = self.full_path(on_disk)?;
324 324 let base_name_start = self.base_name_start()?;
325 325 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
326 326 }
327 327
328 328 pub(super) fn path<'on_disk>(
329 329 &self,
330 330 on_disk: &'on_disk [u8],
331 331 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
332 332 Ok(WithBasename::from_raw_parts(
333 333 Cow::Borrowed(self.full_path(on_disk)?),
334 334 self.base_name_start()?,
335 335 ))
336 336 }
337 337
338 338 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
339 339 self.copy_source.start.get() != 0
340 340 }
341 341
342 342 pub(super) fn copy_source<'on_disk>(
343 343 &self,
344 344 on_disk: &'on_disk [u8],
345 345 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
346 346 Ok(if self.has_copy_source() {
347 347 Some(read_hg_path(on_disk, self.copy_source)?)
348 348 } else {
349 349 None
350 350 })
351 351 }
352 352
353 353 fn flags(&self) -> Flags {
354 354 Flags::from_bits_truncate(self.flags.get())
355 355 }
356 356
357 357 fn has_entry(&self) -> bool {
358 358 self.flags().intersects(
359 359 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
360 360 )
361 361 }
362 362
363 363 pub(super) fn node_data(
364 364 &self,
365 365 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
366 366 if self.has_entry() {
367 367 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
368 368 } else if let Some(mtime) = self.cached_directory_mtime()? {
369 369 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
370 370 } else {
371 371 Ok(dirstate_map::NodeData::None)
372 372 }
373 373 }
374 374
375 375 pub(super) fn cached_directory_mtime(
376 376 &self,
377 377 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
378 378 // For now we do not have code to handle the absence of
379 379 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
380 380 // unset.
381 381 if self.flags().contains(Flags::DIRECTORY)
382 382 && self.flags().contains(Flags::HAS_MTIME)
383 383 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
384 384 {
385 Ok(Some(self.mtime.try_into()?))
385 Ok(Some(self.mtime()?))
386 386 } else {
387 387 Ok(None)
388 388 }
389 389 }
390 390
391 391 fn synthesize_unix_mode(&self) -> u32 {
392 392 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
393 393 libc::S_IFLNK
394 394 } else {
395 395 libc::S_IFREG
396 396 };
397 397 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
398 398 0o755
399 399 } else {
400 400 0o644
401 401 };
402 402 file_type | permisions
403 403 }
404 404
405 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
406 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
407 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
408 m.second_ambiguous = true;
409 }
410 Ok(m)
411 }
412
405 413 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
406 414 // TODO: convert through raw bits instead?
407 415 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
408 416 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
409 417 let p2_info = self.flags().contains(Flags::P2_INFO);
410 418 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
411 419 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
412 420 {
413 421 Some((self.synthesize_unix_mode(), self.size.into()))
414 422 } else {
415 423 None
416 424 };
417 425 let mtime = if self.flags().contains(Flags::HAS_MTIME)
418 426 && !self.flags().contains(Flags::DIRECTORY)
419 427 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
420 428 {
421 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
422 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
423 m.second_ambiguous = true;
424 }
425 Some(m)
429 Some(self.mtime()?)
426 430 } else {
427 431 None
428 432 };
429 433 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
430 434 {
431 435 Some(self.flags().contains(Flags::FALLBACK_EXEC))
432 436 } else {
433 437 None
434 438 };
435 439 let fallback_symlink =
436 440 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
437 441 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
438 442 } else {
439 443 None
440 444 };
441 445 Ok(DirstateEntry::from_v2_data(
442 446 wdir_tracked,
443 447 p1_tracked,
444 448 p2_info,
445 449 mode_size,
446 450 mtime,
447 451 fallback_exec,
448 452 fallback_symlink,
449 453 ))
450 454 }
451 455
452 456 pub(super) fn entry(
453 457 &self,
454 458 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
455 459 if self.has_entry() {
456 460 Ok(Some(self.assume_entry()?))
457 461 } else {
458 462 Ok(None)
459 463 }
460 464 }
461 465
462 466 pub(super) fn children<'on_disk>(
463 467 &self,
464 468 on_disk: &'on_disk [u8],
465 469 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
466 470 read_nodes(on_disk, self.children)
467 471 }
468 472
469 473 pub(super) fn to_in_memory_node<'on_disk>(
470 474 &self,
471 475 on_disk: &'on_disk [u8],
472 476 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
473 477 Ok(dirstate_map::Node {
474 478 children: dirstate_map::ChildNodes::OnDisk(
475 479 self.children(on_disk)?,
476 480 ),
477 481 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
478 482 data: self.node_data()?,
479 483 descendants_with_entry_count: self
480 484 .descendants_with_entry_count
481 485 .get(),
482 486 tracked_descendants_count: self.tracked_descendants_count.get(),
483 487 })
484 488 }
485 489
486 490 fn from_dirstate_entry(
487 491 entry: &DirstateEntry,
488 492 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
489 493 let (
490 494 wdir_tracked,
491 495 p1_tracked,
492 496 p2_info,
493 497 mode_size_opt,
494 498 mtime_opt,
495 499 fallback_exec,
496 500 fallback_symlink,
497 501 ) = entry.v2_data();
498 502 // TODO: convert throug raw flag bits instead?
499 503 let mut flags = Flags::empty();
500 504 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
501 505 flags.set(Flags::P1_TRACKED, p1_tracked);
502 506 flags.set(Flags::P2_INFO, p2_info);
503 507 let size = if let Some((m, s)) = mode_size_opt {
504 508 let exec_perm = m & libc::S_IXUSR != 0;
505 509 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
506 510 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
507 511 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
508 512 flags.insert(Flags::HAS_MODE_AND_SIZE);
509 513 s.into()
510 514 } else {
511 515 0.into()
512 516 };
513 517 let mtime = if let Some(m) = mtime_opt {
514 518 flags.insert(Flags::HAS_MTIME);
515 519 if m.second_ambiguous {
516 520 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
517 521 };
518 522 m.into()
519 523 } else {
520 524 PackedTruncatedTimestamp::null()
521 525 };
522 526 if let Some(f_exec) = fallback_exec {
523 527 flags.insert(Flags::HAS_FALLBACK_EXEC);
524 528 if f_exec {
525 529 flags.insert(Flags::FALLBACK_EXEC);
526 530 }
527 531 }
528 532 if let Some(f_symlink) = fallback_symlink {
529 533 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
530 534 if f_symlink {
531 535 flags.insert(Flags::FALLBACK_SYMLINK);
532 536 }
533 537 }
534 538 (flags, size, mtime)
535 539 }
536 540 }
537 541
538 542 fn read_hg_path(
539 543 on_disk: &[u8],
540 544 slice: PathSlice,
541 545 ) -> Result<&HgPath, DirstateV2ParseError> {
542 546 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
543 547 }
544 548
545 549 fn read_nodes(
546 550 on_disk: &[u8],
547 551 slice: ChildNodes,
548 552 ) -> Result<&[Node], DirstateV2ParseError> {
549 553 read_slice(on_disk, slice.start, slice.len.get())
550 554 }
551 555
552 556 fn read_slice<T, Len>(
553 557 on_disk: &[u8],
554 558 start: Offset,
555 559 len: Len,
556 560 ) -> Result<&[T], DirstateV2ParseError>
557 561 where
558 562 T: BytesCast,
559 563 Len: TryInto<usize>,
560 564 {
561 565 // Either `usize::MAX` would result in "out of bounds" error since a single
562 566 // `&[u8]` cannot occupy the entire addess space.
563 567 let start = start.get().try_into().unwrap_or(std::usize::MAX);
564 568 let len = len.try_into().unwrap_or(std::usize::MAX);
565 569 on_disk
566 570 .get(start..)
567 571 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
568 572 .map(|(slice, _rest)| slice)
569 573 .ok_or_else(|| DirstateV2ParseError)
570 574 }
571 575
572 576 pub(crate) fn for_each_tracked_path<'on_disk>(
573 577 on_disk: &'on_disk [u8],
574 578 metadata: &[u8],
575 579 mut f: impl FnMut(&'on_disk HgPath),
576 580 ) -> Result<(), DirstateV2ParseError> {
577 581 let (meta, _) = TreeMetadata::from_bytes(metadata)
578 582 .map_err(|_| DirstateV2ParseError)?;
579 583 fn recur<'on_disk>(
580 584 on_disk: &'on_disk [u8],
581 585 nodes: ChildNodes,
582 586 f: &mut impl FnMut(&'on_disk HgPath),
583 587 ) -> Result<(), DirstateV2ParseError> {
584 588 for node in read_nodes(on_disk, nodes)? {
585 589 if let Some(entry) = node.entry()? {
586 590 if entry.state().is_tracked() {
587 591 f(node.full_path(on_disk)?)
588 592 }
589 593 }
590 594 recur(on_disk, node.children, f)?
591 595 }
592 596 Ok(())
593 597 }
594 598 recur(on_disk, meta.root_nodes, &mut f)
595 599 }
596 600
597 601 /// Returns new data and metadata, together with whether that data should be
598 602 /// appended to the existing data file whose content is at
599 603 /// `dirstate_map.on_disk` (true), instead of written to a new data file
600 604 /// (false).
601 605 pub(super) fn write(
602 606 dirstate_map: &DirstateMap,
603 607 can_append: bool,
604 608 ) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
605 609 let append = can_append && dirstate_map.write_should_append();
606 610
607 611 // This ignores the space for paths, and for nodes without an entry.
608 612 // TODO: better estimate? Skip the `Vec` and write to a file directly?
609 613 let size_guess = std::mem::size_of::<Node>()
610 614 * dirstate_map.nodes_with_entry_count as usize;
611 615
612 616 let mut writer = Writer {
613 617 dirstate_map,
614 618 append,
615 619 out: Vec::with_capacity(size_guess),
616 620 };
617 621
618 622 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
619 623
620 624 let meta = TreeMetadata {
621 625 root_nodes,
622 626 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
623 627 nodes_with_copy_source_count: dirstate_map
624 628 .nodes_with_copy_source_count
625 629 .into(),
626 630 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
627 631 unused: [0; 4],
628 632 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
629 633 };
630 634 Ok((writer.out, meta, append))
631 635 }
632 636
633 637 struct Writer<'dmap, 'on_disk> {
634 638 dirstate_map: &'dmap DirstateMap<'on_disk>,
635 639 append: bool,
636 640 out: Vec<u8>,
637 641 }
638 642
639 643 impl Writer<'_, '_> {
640 644 fn write_nodes(
641 645 &mut self,
642 646 nodes: dirstate_map::ChildNodesRef,
643 647 ) -> Result<ChildNodes, DirstateError> {
644 648 // Reuse already-written nodes if possible
645 649 if self.append {
646 650 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
647 651 let start = self.on_disk_offset_of(nodes_slice).expect(
648 652 "dirstate-v2 OnDisk nodes not found within on_disk",
649 653 );
650 654 let len = child_nodes_len_from_usize(nodes_slice.len());
651 655 return Ok(ChildNodes { start, len });
652 656 }
653 657 }
654 658
655 659 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
656 660 // undefined iteration order. Sort to enable binary search in the
657 661 // written file.
658 662 let nodes = nodes.sorted();
659 663 let nodes_len = nodes.len();
660 664
661 665 // First accumulate serialized nodes in a `Vec`
662 666 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
663 667 for node in nodes {
664 668 let children =
665 669 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
666 670 let full_path = node.full_path(self.dirstate_map.on_disk)?;
667 671 let full_path = self.write_path(full_path.as_bytes());
668 672 let copy_source = if let Some(source) =
669 673 node.copy_source(self.dirstate_map.on_disk)?
670 674 {
671 675 self.write_path(source.as_bytes())
672 676 } else {
673 677 PathSlice {
674 678 start: 0.into(),
675 679 len: 0.into(),
676 680 }
677 681 };
678 682 on_disk_nodes.push(match node {
679 683 NodeRef::InMemory(path, node) => {
680 684 let (flags, size, mtime) = match &node.data {
681 685 dirstate_map::NodeData::Entry(entry) => {
682 686 Node::from_dirstate_entry(entry)
683 687 }
684 dirstate_map::NodeData::CachedDirectory { mtime } => (
688 dirstate_map::NodeData::CachedDirectory { mtime } => {
685 689 // we currently never set a mtime if unknown file
686 690 // are present.
687 691 // So if we have a mtime for a directory, we know
688 692 // they are no unknown
689 693 // files and we
690 694 // blindly set ALL_UNKNOWN_RECORDED.
691 695 //
692 696 // We never set ALL_IGNORED_RECORDED since we
693 697 // don't track that case
694 698 // currently.
695 Flags::DIRECTORY
699 let mut flags = Flags::DIRECTORY
696 700 | Flags::HAS_MTIME
697 | Flags::ALL_UNKNOWN_RECORDED,
698 0.into(),
699 (*mtime).into(),
700 ),
701 | Flags::ALL_UNKNOWN_RECORDED;
702 if mtime.second_ambiguous {
703 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
704 }
705 (flags, 0.into(), (*mtime).into())
706 }
701 707 dirstate_map::NodeData::None => (
702 708 Flags::DIRECTORY,
703 709 0.into(),
704 710 PackedTruncatedTimestamp::null(),
705 711 ),
706 712 };
707 713 Node {
708 714 children,
709 715 copy_source,
710 716 full_path,
711 717 base_name_start: u16::try_from(path.base_name_start())
712 718 // Could only panic for paths over 64 KiB
713 719 .expect("dirstate-v2 path length overflow")
714 720 .into(),
715 721 descendants_with_entry_count: node
716 722 .descendants_with_entry_count
717 723 .into(),
718 724 tracked_descendants_count: node
719 725 .tracked_descendants_count
720 726 .into(),
721 727 flags: flags.bits().into(),
722 728 size,
723 729 mtime,
724 730 }
725 731 }
726 732 NodeRef::OnDisk(node) => Node {
727 733 children,
728 734 copy_source,
729 735 full_path,
730 736 ..*node
731 737 },
732 738 })
733 739 }
734 740 // … so we can write them contiguously, after writing everything else
735 741 // they refer to.
736 742 let start = self.current_offset();
737 743 let len = child_nodes_len_from_usize(nodes_len);
738 744 self.out.extend(on_disk_nodes.as_bytes());
739 745 Ok(ChildNodes { start, len })
740 746 }
741 747
742 748 /// If the given slice of items is within `on_disk`, returns its offset
743 749 /// from the start of `on_disk`.
744 750 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
745 751 where
746 752 T: BytesCast,
747 753 {
748 754 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
749 755 let start = slice.as_ptr() as usize;
750 756 let end = start + slice.len();
751 757 start..=end
752 758 }
753 759 let slice_addresses = address_range(slice.as_bytes());
754 760 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
755 761 if on_disk_addresses.contains(slice_addresses.start())
756 762 && on_disk_addresses.contains(slice_addresses.end())
757 763 {
758 764 let offset = slice_addresses.start() - on_disk_addresses.start();
759 765 Some(offset_from_usize(offset))
760 766 } else {
761 767 None
762 768 }
763 769 }
764 770
765 771 fn current_offset(&mut self) -> Offset {
766 772 let mut offset = self.out.len();
767 773 if self.append {
768 774 offset += self.dirstate_map.on_disk.len()
769 775 }
770 776 offset_from_usize(offset)
771 777 }
772 778
773 779 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
774 780 let len = path_len_from_usize(slice.len());
775 781 // Reuse an already-written path if possible
776 782 if self.append {
777 783 if let Some(start) = self.on_disk_offset_of(slice) {
778 784 return PathSlice { start, len };
779 785 }
780 786 }
781 787 let start = self.current_offset();
782 788 self.out.extend(slice.as_bytes());
783 789 PathSlice { start, len }
784 790 }
785 791 }
786 792
787 793 fn offset_from_usize(x: usize) -> Offset {
788 794 u32::try_from(x)
789 795 // Could only panic for a dirstate file larger than 4 GiB
790 796 .expect("dirstate-v2 offset overflow")
791 797 .into()
792 798 }
793 799
794 800 fn child_nodes_len_from_usize(x: usize) -> Size {
795 801 u32::try_from(x)
796 802 // Could only panic with over 4 billion nodes
797 803 .expect("dirstate-v2 slice length overflow")
798 804 .into()
799 805 }
800 806
801 807 fn path_len_from_usize(x: usize) -> PathSize {
802 808 u16::try_from(x)
803 809 // Could only panic for paths over 64 KiB
804 810 .expect("dirstate-v2 path length overflow")
805 811 .into()
806 812 }
807 813
808 814 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
809 815 fn from(timestamp: TruncatedTimestamp) -> Self {
810 816 Self {
811 817 truncated_seconds: timestamp.truncated_seconds().into(),
812 818 nanoseconds: timestamp.nanoseconds().into(),
813 819 }
814 820 }
815 821 }
816 822
817 823 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
818 824 type Error = DirstateV2ParseError;
819 825
820 826 fn try_from(
821 827 timestamp: PackedTruncatedTimestamp,
822 828 ) -> Result<Self, Self::Error> {
823 829 Self::from_already_truncated(
824 830 timestamp.truncated_seconds.get(),
825 831 timestamp.nanoseconds.get(),
826 832 false,
827 833 )
828 834 }
829 835 }
830 836 impl PackedTruncatedTimestamp {
831 837 fn null() -> Self {
832 838 Self {
833 839 truncated_seconds: 0.into(),
834 840 nanoseconds: 0.into(),
835 841 }
836 842 }
837 843 }
@@ -1,782 +1,798 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::NodeData;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_path_from_bytes;
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::BadMatch;
16 16 use crate::DirstateStatus;
17 17 use crate::EntryState;
18 18 use crate::HgPathBuf;
19 19 use crate::HgPathCow;
20 20 use crate::PatternFileWarning;
21 21 use crate::StatusError;
22 22 use crate::StatusOptions;
23 23 use micro_timer::timed;
24 24 use rayon::prelude::*;
25 25 use sha1::{Digest, Sha1};
26 26 use std::borrow::Cow;
27 27 use std::io;
28 28 use std::path::Path;
29 29 use std::path::PathBuf;
30 30 use std::sync::Mutex;
31 31 use std::time::SystemTime;
32 32
33 33 /// Returns the status of the working directory compared to its parent
34 34 /// changeset.
35 35 ///
36 36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 37 /// and variable names) and dirstate tree at the same time. The core of this
38 38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 40 /// exists in one of the two trees, depending on information requested by
41 41 /// `options` we may need to traverse the remaining subtree.
42 42 #[timed]
43 43 pub fn status<'tree, 'on_disk: 'tree>(
44 44 dmap: &'tree mut DirstateMap<'on_disk>,
45 45 matcher: &(dyn Matcher + Sync),
46 46 root_dir: PathBuf,
47 47 ignore_files: Vec<PathBuf>,
48 48 options: StatusOptions,
49 49 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
50 50 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
51 51 if options.list_ignored || options.list_unknown {
52 52 let mut hasher = Sha1::new();
53 53 let (ignore_fn, warnings) = get_ignore_function(
54 54 ignore_files,
55 55 &root_dir,
56 56 &mut |pattern_bytes| hasher.update(pattern_bytes),
57 57 )?;
58 58 let new_hash = *hasher.finalize().as_ref();
59 59 let changed = new_hash != dmap.ignore_patterns_hash;
60 60 dmap.ignore_patterns_hash = new_hash;
61 61 (ignore_fn, warnings, Some(changed))
62 62 } else {
63 63 (Box::new(|&_| true), vec![], None)
64 64 };
65 65
66 let filesystem_time_at_status_start = filesystem_now(&root_dir).ok();
66 let filesystem_time_at_status_start =
67 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
67 68 let outcome = DirstateStatus {
68 69 filesystem_time_at_status_start,
69 70 ..Default::default()
70 71 };
71 72 let common = StatusCommon {
72 73 dmap,
73 74 options,
74 75 matcher,
75 76 ignore_fn,
76 77 outcome: Mutex::new(outcome),
77 78 ignore_patterns_have_changed: patterns_changed,
78 79 new_cachable_directories: Default::default(),
79 80 outated_cached_directories: Default::default(),
80 81 filesystem_time_at_status_start,
81 82 };
82 83 let is_at_repo_root = true;
83 84 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
84 85 let has_ignored_ancestor = false;
85 86 let root_cached_mtime = None;
86 87 let root_dir_metadata = None;
87 88 // If the path we have for the repository root is a symlink, do follow it.
88 89 // (As opposed to symlinks within the working directory which are not
89 90 // followed, using `std::fs::symlink_metadata`.)
90 91 common.traverse_fs_directory_and_dirstate(
91 92 has_ignored_ancestor,
92 93 dmap.root.as_ref(),
93 94 hg_path,
94 95 &root_dir,
95 96 root_dir_metadata,
96 97 root_cached_mtime,
97 98 is_at_repo_root,
98 99 )?;
99 100 let mut outcome = common.outcome.into_inner().unwrap();
100 101 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
101 102 let outdated = common.outated_cached_directories.into_inner().unwrap();
102 103
103 104 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
104 105 || !outdated.is_empty()
105 106 || !new_cachable.is_empty();
106 107
107 108 // Remove outdated mtimes before adding new mtimes, in case a given
108 109 // directory is both
109 110 for path in &outdated {
110 111 let node = dmap.get_or_insert(path)?;
111 112 if let NodeData::CachedDirectory { .. } = &node.data {
112 113 node.data = NodeData::None
113 114 }
114 115 }
115 116 for (path, mtime) in &new_cachable {
116 117 let node = dmap.get_or_insert(path)?;
117 118 match &node.data {
118 119 NodeData::Entry(_) => {} // Don’t overwrite an entry
119 120 NodeData::CachedDirectory { .. } | NodeData::None => {
120 121 node.data = NodeData::CachedDirectory { mtime: *mtime }
121 122 }
122 123 }
123 124 }
124 125
125 126 Ok((outcome, warnings))
126 127 }
127 128
128 129 /// Bag of random things needed by various parts of the algorithm. Reduces the
129 130 /// number of parameters passed to functions.
130 131 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
131 132 dmap: &'tree DirstateMap<'on_disk>,
132 133 options: StatusOptions,
133 134 matcher: &'a (dyn Matcher + Sync),
134 135 ignore_fn: IgnoreFnType<'a>,
135 136 outcome: Mutex<DirstateStatus<'on_disk>>,
136 137 new_cachable_directories:
137 138 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
138 139 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
139 140
140 141 /// Whether ignore files like `.hgignore` have changed since the previous
141 142 /// time a `status()` call wrote their hash to the dirstate. `None` means
142 143 /// we don’t know as this run doesn’t list either ignored or uknown files
143 144 /// and therefore isn’t reading `.hgignore`.
144 145 ignore_patterns_have_changed: Option<bool>,
145 146
146 147 /// The current time at the start of the `status()` algorithm, as measured
147 148 /// and possibly truncated by the filesystem.
148 filesystem_time_at_status_start: Option<SystemTime>,
149 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
149 150 }
150 151
151 152 enum Outcome {
152 153 Modified,
153 154 Added,
154 155 Removed,
155 156 Deleted,
156 157 Clean,
157 158 Ignored,
158 159 Unknown,
159 160 Unsure,
160 161 }
161 162
162 163 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
163 164 fn push_outcome(
164 165 &self,
165 166 which: Outcome,
166 167 dirstate_node: &NodeRef<'tree, 'on_disk>,
167 168 ) -> Result<(), DirstateV2ParseError> {
168 169 let path = dirstate_node
169 170 .full_path_borrowed(self.dmap.on_disk)?
170 171 .detach_from_tree();
171 172 let copy_source = if self.options.list_copies {
172 173 dirstate_node
173 174 .copy_source_borrowed(self.dmap.on_disk)?
174 175 .map(|source| source.detach_from_tree())
175 176 } else {
176 177 None
177 178 };
178 179 self.push_outcome_common(which, path, copy_source);
179 180 Ok(())
180 181 }
181 182
182 183 fn push_outcome_without_copy_source(
183 184 &self,
184 185 which: Outcome,
185 186 path: &BorrowedPath<'_, 'on_disk>,
186 187 ) {
187 188 self.push_outcome_common(which, path.detach_from_tree(), None)
188 189 }
189 190
190 191 fn push_outcome_common(
191 192 &self,
192 193 which: Outcome,
193 194 path: HgPathCow<'on_disk>,
194 195 copy_source: Option<HgPathCow<'on_disk>>,
195 196 ) {
196 197 let mut outcome = self.outcome.lock().unwrap();
197 198 let vec = match which {
198 199 Outcome::Modified => &mut outcome.modified,
199 200 Outcome::Added => &mut outcome.added,
200 201 Outcome::Removed => &mut outcome.removed,
201 202 Outcome::Deleted => &mut outcome.deleted,
202 203 Outcome::Clean => &mut outcome.clean,
203 204 Outcome::Ignored => &mut outcome.ignored,
204 205 Outcome::Unknown => &mut outcome.unknown,
205 206 Outcome::Unsure => &mut outcome.unsure,
206 207 };
207 208 vec.push(StatusPath { path, copy_source });
208 209 }
209 210
210 211 fn read_dir(
211 212 &self,
212 213 hg_path: &HgPath,
213 214 fs_path: &Path,
214 215 is_at_repo_root: bool,
215 216 ) -> Result<Vec<DirEntry>, ()> {
216 217 DirEntry::read_dir(fs_path, is_at_repo_root)
217 218 .map_err(|error| self.io_error(error, hg_path))
218 219 }
219 220
220 221 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
221 222 let errno = error.raw_os_error().expect("expected real OS error");
222 223 self.outcome
223 224 .lock()
224 225 .unwrap()
225 226 .bad
226 227 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
227 228 }
228 229
229 230 fn check_for_outdated_directory_cache(
230 231 &self,
231 232 dirstate_node: &NodeRef<'tree, 'on_disk>,
232 233 ) -> Result<(), DirstateV2ParseError> {
233 234 if self.ignore_patterns_have_changed == Some(true)
234 235 && dirstate_node.cached_directory_mtime()?.is_some()
235 236 {
236 237 self.outated_cached_directories.lock().unwrap().push(
237 238 dirstate_node
238 239 .full_path_borrowed(self.dmap.on_disk)?
239 240 .detach_from_tree(),
240 241 )
241 242 }
242 243 Ok(())
243 244 }
244 245
245 246 /// If this returns true, we can get accurate results by only using
246 247 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
247 248 /// need to call `read_dir`.
248 249 fn can_skip_fs_readdir(
249 250 &self,
250 251 directory_metadata: Option<&std::fs::Metadata>,
251 252 cached_directory_mtime: Option<TruncatedTimestamp>,
252 253 ) -> bool {
253 254 if !self.options.list_unknown && !self.options.list_ignored {
254 255 // All states that we care about listing have corresponding
255 256 // dirstate entries.
256 257 // This happens for example with `hg status -mard`.
257 258 return true;
258 259 }
259 260 if !self.options.list_ignored
260 261 && self.ignore_patterns_have_changed == Some(false)
261 262 {
262 263 if let Some(cached_mtime) = cached_directory_mtime {
263 264 // The dirstate contains a cached mtime for this directory, set
264 265 // by a previous run of the `status` algorithm which found this
265 266 // directory eligible for `read_dir` caching.
266 267 if let Some(meta) = directory_metadata {
267 268 if cached_mtime
268 269 .likely_equal_to_mtime_of(meta)
269 270 .unwrap_or(false)
270 271 {
271 272 // The mtime of that directory has not changed
272 273 // since then, which means that the results of
273 274 // `read_dir` should also be unchanged.
274 275 return true;
275 276 }
276 277 }
277 278 }
278 279 }
279 280 false
280 281 }
281 282
282 283 /// Returns whether all child entries of the filesystem directory have a
283 284 /// corresponding dirstate node or are ignored.
284 285 fn traverse_fs_directory_and_dirstate(
285 286 &self,
286 287 has_ignored_ancestor: bool,
287 288 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
288 289 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
289 290 directory_fs_path: &Path,
290 291 directory_metadata: Option<&std::fs::Metadata>,
291 292 cached_directory_mtime: Option<TruncatedTimestamp>,
292 293 is_at_repo_root: bool,
293 294 ) -> Result<bool, DirstateV2ParseError> {
294 295 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
295 296 {
296 297 dirstate_nodes
297 298 .par_iter()
298 299 .map(|dirstate_node| {
299 300 let fs_path = directory_fs_path.join(get_path_from_bytes(
300 301 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
301 302 ));
302 303 match std::fs::symlink_metadata(&fs_path) {
303 304 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
304 305 &fs_path,
305 306 &fs_metadata,
306 307 dirstate_node,
307 308 has_ignored_ancestor,
308 309 ),
309 310 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
310 311 self.traverse_dirstate_only(dirstate_node)
311 312 }
312 313 Err(error) => {
313 314 let hg_path =
314 315 dirstate_node.full_path(self.dmap.on_disk)?;
315 316 Ok(self.io_error(error, hg_path))
316 317 }
317 318 }
318 319 })
319 320 .collect::<Result<_, _>>()?;
320 321
321 322 // We don’t know, so conservatively say this isn’t the case
322 323 let children_all_have_dirstate_node_or_are_ignored = false;
323 324
324 325 return Ok(children_all_have_dirstate_node_or_are_ignored);
325 326 }
326 327
327 328 let mut fs_entries = if let Ok(entries) = self.read_dir(
328 329 directory_hg_path,
329 330 directory_fs_path,
330 331 is_at_repo_root,
331 332 ) {
332 333 entries
333 334 } else {
334 335 // Treat an unreadable directory (typically because of insufficient
335 336 // permissions) like an empty directory. `self.read_dir` has
336 337 // already called `self.io_error` so a warning will be emitted.
337 338 Vec::new()
338 339 };
339 340
340 341 // `merge_join_by` requires both its input iterators to be sorted:
341 342
342 343 let dirstate_nodes = dirstate_nodes.sorted();
343 344 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
344 345 // https://github.com/rust-lang/rust/issues/34162
345 346 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
346 347
347 348 // Propagate here any error that would happen inside the comparison
348 349 // callback below
349 350 for dirstate_node in &dirstate_nodes {
350 351 dirstate_node.base_name(self.dmap.on_disk)?;
351 352 }
352 353 itertools::merge_join_by(
353 354 dirstate_nodes,
354 355 &fs_entries,
355 356 |dirstate_node, fs_entry| {
356 357 // This `unwrap` never panics because we already propagated
357 358 // those errors above
358 359 dirstate_node
359 360 .base_name(self.dmap.on_disk)
360 361 .unwrap()
361 362 .cmp(&fs_entry.base_name)
362 363 },
363 364 )
364 365 .par_bridge()
365 366 .map(|pair| {
366 367 use itertools::EitherOrBoth::*;
367 368 let has_dirstate_node_or_is_ignored;
368 369 match pair {
369 370 Both(dirstate_node, fs_entry) => {
370 371 self.traverse_fs_and_dirstate(
371 372 &fs_entry.full_path,
372 373 &fs_entry.metadata,
373 374 dirstate_node,
374 375 has_ignored_ancestor,
375 376 )?;
376 377 has_dirstate_node_or_is_ignored = true
377 378 }
378 379 Left(dirstate_node) => {
379 380 self.traverse_dirstate_only(dirstate_node)?;
380 381 has_dirstate_node_or_is_ignored = true;
381 382 }
382 383 Right(fs_entry) => {
383 384 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
384 385 has_ignored_ancestor,
385 386 directory_hg_path,
386 387 fs_entry,
387 388 )
388 389 }
389 390 }
390 391 Ok(has_dirstate_node_or_is_ignored)
391 392 })
392 393 .try_reduce(|| true, |a, b| Ok(a && b))
393 394 }
394 395
395 396 fn traverse_fs_and_dirstate(
396 397 &self,
397 398 fs_path: &Path,
398 399 fs_metadata: &std::fs::Metadata,
399 400 dirstate_node: NodeRef<'tree, 'on_disk>,
400 401 has_ignored_ancestor: bool,
401 402 ) -> Result<(), DirstateV2ParseError> {
402 403 self.check_for_outdated_directory_cache(&dirstate_node)?;
403 404 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
404 405 let file_type = fs_metadata.file_type();
405 406 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
406 407 if !file_or_symlink {
407 408 // If we previously had a file here, it was removed (with
408 409 // `hg rm` or similar) or deleted before it could be
409 410 // replaced by a directory or something else.
410 411 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
411 412 }
412 413 if file_type.is_dir() {
413 414 if self.options.collect_traversed_dirs {
414 415 self.outcome
415 416 .lock()
416 417 .unwrap()
417 418 .traversed
418 419 .push(hg_path.detach_from_tree())
419 420 }
420 421 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
421 422 let is_at_repo_root = false;
422 423 let children_all_have_dirstate_node_or_are_ignored = self
423 424 .traverse_fs_directory_and_dirstate(
424 425 is_ignored,
425 426 dirstate_node.children(self.dmap.on_disk)?,
426 427 hg_path,
427 428 fs_path,
428 429 Some(fs_metadata),
429 430 dirstate_node.cached_directory_mtime()?,
430 431 is_at_repo_root,
431 432 )?;
432 433 self.maybe_save_directory_mtime(
433 434 children_all_have_dirstate_node_or_are_ignored,
434 435 fs_metadata,
435 436 dirstate_node,
436 437 )?
437 438 } else {
438 439 if file_or_symlink && self.matcher.matches(hg_path) {
439 440 if let Some(state) = dirstate_node.state()? {
440 441 match state {
441 442 EntryState::Added => {
442 443 self.push_outcome(Outcome::Added, &dirstate_node)?
443 444 }
444 445 EntryState::Removed => self
445 446 .push_outcome(Outcome::Removed, &dirstate_node)?,
446 447 EntryState::Merged => self
447 448 .push_outcome(Outcome::Modified, &dirstate_node)?,
448 449 EntryState::Normal => self
449 450 .handle_normal_file(&dirstate_node, fs_metadata)?,
450 451 }
451 452 } else {
452 453 // `node.entry.is_none()` indicates a "directory"
453 454 // node, but the filesystem has a file
454 455 self.mark_unknown_or_ignored(
455 456 has_ignored_ancestor,
456 457 hg_path,
457 458 );
458 459 }
459 460 }
460 461
461 462 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
462 463 {
463 464 self.traverse_dirstate_only(child_node)?
464 465 }
465 466 }
466 467 Ok(())
467 468 }
468 469
469 470 fn maybe_save_directory_mtime(
470 471 &self,
471 472 children_all_have_dirstate_node_or_are_ignored: bool,
472 473 directory_metadata: &std::fs::Metadata,
473 474 dirstate_node: NodeRef<'tree, 'on_disk>,
474 475 ) -> Result<(), DirstateV2ParseError> {
475 if children_all_have_dirstate_node_or_are_ignored {
476 // All filesystem directory entries from `read_dir` have a
477 // corresponding node in the dirstate, so we can reconstitute the
478 // names of those entries without calling `read_dir` again.
479 if let (Some(status_start), Ok(directory_mtime)) = (
480 &self.filesystem_time_at_status_start,
481 directory_metadata.modified(),
476 if !children_all_have_dirstate_node_or_are_ignored {
477 return Ok(());
478 }
479 // All filesystem directory entries from `read_dir` have a
480 // corresponding node in the dirstate, so we can reconstitute the
481 // names of those entries without calling `read_dir` again.
482
483 // TODO: use let-else here and below when available:
484 // https://github.com/rust-lang/rust/issues/87335
485 let status_start = if let Some(status_start) =
486 &self.filesystem_time_at_status_start
487 {
488 status_start
489 } else {
490 return Ok(());
491 };
492
493 // Although the Rust standard library’s `SystemTime` type
494 // has nanosecond precision, the times reported for a
495 // directory’s (or file’s) modified time may have lower
496 // resolution based on the filesystem (for example ext3
497 // only stores integer seconds), kernel (see
498 // https://stackoverflow.com/a/14393315/1162888), etc.
499 let directory_mtime = if let Ok(option) =
500 TruncatedTimestamp::for_reliable_mtime_of(
501 directory_metadata,
502 status_start,
482 503 ) {
483 // Although the Rust standard library’s `SystemTime` type
484 // has nanosecond precision, the times reported for a
485 // directory’s (or file’s) modified time may have lower
486 // resolution based on the filesystem (for example ext3
487 // only stores integer seconds), kernel (see
488 // https://stackoverflow.com/a/14393315/1162888), etc.
489 if &directory_mtime >= status_start {
490 // The directory was modified too recently, don’t cache its
491 // `read_dir` results.
492 //
493 // A timeline like this is possible:
494 //
495 // 1. A change to this directory (direct child was
496 // added or removed) cause its mtime to be set
497 // (possibly truncated) to `directory_mtime`
498 // 2. This `status` algorithm calls `read_dir`
499 // 3. An other change is made to the same directory is
500 // made so that calling `read_dir` agin would give
501 // different results, but soon enough after 1. that
502 // the mtime stays the same
503 //
504 // On a system where the time resolution poor, this
505 // scenario is not unlikely if all three steps are caused
506 // by the same script.
507 } else {
508 // We’ve observed (through `status_start`) that time has
509 // “progressed” since `directory_mtime`, so any further
510 // change to this directory is extremely likely to cause a
511 // different mtime.
512 //
513 // Having the same mtime again is not entirely impossible
514 // since the system clock is not monotonous. It could jump
515 // backward to some point before `directory_mtime`, then a
516 // directory change could potentially happen during exactly
517 // the wrong tick.
518 //
519 // We deem this scenario (unlike the previous one) to be
520 // unlikely enough in practice.
521 let truncated = TruncatedTimestamp::from(directory_mtime);
522 let is_up_to_date = if let Some(cached) =
523 dirstate_node.cached_directory_mtime()?
524 {
525 cached.likely_equal(truncated)
526 } else {
527 false
528 };
529 if !is_up_to_date {
530 let hg_path = dirstate_node
531 .full_path_borrowed(self.dmap.on_disk)?
532 .detach_from_tree();
533 self.new_cachable_directories
534 .lock()
535 .unwrap()
536 .push((hg_path, truncated))
537 }
538 }
504 if let Some(directory_mtime) = option {
505 directory_mtime
506 } else {
507 // The directory was modified too recently,
508 // don’t cache its `read_dir` results.
509 //
510 // 1. A change to this directory (direct child was
511 // added or removed) cause its mtime to be set
512 // (possibly truncated) to `directory_mtime`
513 // 2. This `status` algorithm calls `read_dir`
514 // 3. An other change is made to the same directory is
515 // made so that calling `read_dir` agin would give
516 // different results, but soon enough after 1. that
517 // the mtime stays the same
518 //
519 // On a system where the time resolution poor, this
520 // scenario is not unlikely if all three steps are caused
521 // by the same script.
522 return Ok(());
539 523 }
524 } else {
525 // OS/libc does not support mtime?
526 return Ok(());
527 };
528 // We’ve observed (through `status_start`) that time has
529 // “progressed” since `directory_mtime`, so any further
530 // change to this directory is extremely likely to cause a
531 // different mtime.
532 //
533 // Having the same mtime again is not entirely impossible
534 // since the system clock is not monotonous. It could jump
535 // backward to some point before `directory_mtime`, then a
536 // directory change could potentially happen during exactly
537 // the wrong tick.
538 //
539 // We deem this scenario (unlike the previous one) to be
540 // unlikely enough in practice.
541
542 let is_up_to_date =
543 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
544 cached.likely_equal(directory_mtime)
545 } else {
546 false
547 };
548 if !is_up_to_date {
549 let hg_path = dirstate_node
550 .full_path_borrowed(self.dmap.on_disk)?
551 .detach_from_tree();
552 self.new_cachable_directories
553 .lock()
554 .unwrap()
555 .push((hg_path, directory_mtime))
540 556 }
541 557 Ok(())
542 558 }
543 559
544 560 /// A file with `EntryState::Normal` in the dirstate was found in the
545 561 /// filesystem
546 562 fn handle_normal_file(
547 563 &self,
548 564 dirstate_node: &NodeRef<'tree, 'on_disk>,
549 565 fs_metadata: &std::fs::Metadata,
550 566 ) -> Result<(), DirstateV2ParseError> {
551 567 // Keep the low 31 bits
552 568 fn truncate_u64(value: u64) -> i32 {
553 569 (value & 0x7FFF_FFFF) as i32
554 570 }
555 571
556 572 let entry = dirstate_node
557 573 .entry()?
558 574 .expect("handle_normal_file called with entry-less node");
559 575 let mode_changed =
560 576 || self.options.check_exec && entry.mode_changed(fs_metadata);
561 577 let size = entry.size();
562 578 let size_changed = size != truncate_u64(fs_metadata.len());
563 579 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
564 580 // issue6456: Size returned may be longer due to encryption
565 581 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
566 582 self.push_outcome(Outcome::Unsure, dirstate_node)?
567 583 } else if dirstate_node.has_copy_source()
568 584 || entry.is_from_other_parent()
569 585 || (size >= 0 && (size_changed || mode_changed()))
570 586 {
571 587 self.push_outcome(Outcome::Modified, dirstate_node)?
572 588 } else {
573 589 let mtime_looks_clean;
574 590 if let Some(dirstate_mtime) = entry.truncated_mtime() {
575 591 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
576 592 .expect("OS/libc does not support mtime?");
577 593 // There might be a change in the future if for example the
578 594 // internal clock become off while process run, but this is a
579 595 // case where the issues the user would face
580 596 // would be a lot worse and there is nothing we
581 597 // can really do.
582 598 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
583 599 } else {
584 600 // No mtime in the dirstate entry
585 601 mtime_looks_clean = false
586 602 };
587 603 if !mtime_looks_clean {
588 604 self.push_outcome(Outcome::Unsure, dirstate_node)?
589 605 } else if self.options.list_clean {
590 606 self.push_outcome(Outcome::Clean, dirstate_node)?
591 607 }
592 608 }
593 609 Ok(())
594 610 }
595 611
596 612 /// A node in the dirstate tree has no corresponding filesystem entry
597 613 fn traverse_dirstate_only(
598 614 &self,
599 615 dirstate_node: NodeRef<'tree, 'on_disk>,
600 616 ) -> Result<(), DirstateV2ParseError> {
601 617 self.check_for_outdated_directory_cache(&dirstate_node)?;
602 618 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
603 619 dirstate_node
604 620 .children(self.dmap.on_disk)?
605 621 .par_iter()
606 622 .map(|child_node| self.traverse_dirstate_only(child_node))
607 623 .collect()
608 624 }
609 625
610 626 /// A node in the dirstate tree has no corresponding *file* on the
611 627 /// filesystem
612 628 ///
613 629 /// Does nothing on a "directory" node
614 630 fn mark_removed_or_deleted_if_file(
615 631 &self,
616 632 dirstate_node: &NodeRef<'tree, 'on_disk>,
617 633 ) -> Result<(), DirstateV2ParseError> {
618 634 if let Some(state) = dirstate_node.state()? {
619 635 let path = dirstate_node.full_path(self.dmap.on_disk)?;
620 636 if self.matcher.matches(path) {
621 637 if let EntryState::Removed = state {
622 638 self.push_outcome(Outcome::Removed, dirstate_node)?
623 639 } else {
624 640 self.push_outcome(Outcome::Deleted, &dirstate_node)?
625 641 }
626 642 }
627 643 }
628 644 Ok(())
629 645 }
630 646
631 647 /// Something in the filesystem has no corresponding dirstate node
632 648 ///
633 649 /// Returns whether that path is ignored
634 650 fn traverse_fs_only(
635 651 &self,
636 652 has_ignored_ancestor: bool,
637 653 directory_hg_path: &HgPath,
638 654 fs_entry: &DirEntry,
639 655 ) -> bool {
640 656 let hg_path = directory_hg_path.join(&fs_entry.base_name);
641 657 let file_type = fs_entry.metadata.file_type();
642 658 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
643 659 if file_type.is_dir() {
644 660 let is_ignored =
645 661 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
646 662 let traverse_children = if is_ignored {
647 663 // Descendants of an ignored directory are all ignored
648 664 self.options.list_ignored
649 665 } else {
650 666 // Descendants of an unknown directory may be either unknown or
651 667 // ignored
652 668 self.options.list_unknown || self.options.list_ignored
653 669 };
654 670 if traverse_children {
655 671 let is_at_repo_root = false;
656 672 if let Ok(children_fs_entries) = self.read_dir(
657 673 &hg_path,
658 674 &fs_entry.full_path,
659 675 is_at_repo_root,
660 676 ) {
661 677 children_fs_entries.par_iter().for_each(|child_fs_entry| {
662 678 self.traverse_fs_only(
663 679 is_ignored,
664 680 &hg_path,
665 681 child_fs_entry,
666 682 );
667 683 })
668 684 }
669 685 }
670 686 if self.options.collect_traversed_dirs {
671 687 self.outcome.lock().unwrap().traversed.push(hg_path.into())
672 688 }
673 689 is_ignored
674 690 } else {
675 691 if file_or_symlink {
676 692 if self.matcher.matches(&hg_path) {
677 693 self.mark_unknown_or_ignored(
678 694 has_ignored_ancestor,
679 695 &BorrowedPath::InMemory(&hg_path),
680 696 )
681 697 } else {
682 698 // We haven’t computed whether this path is ignored. It
683 699 // might not be, and a future run of status might have a
684 700 // different matcher that matches it. So treat it as not
685 701 // ignored. That is, inhibit readdir caching of the parent
686 702 // directory.
687 703 false
688 704 }
689 705 } else {
690 706 // This is neither a directory, a plain file, or a symlink.
691 707 // Treat it like an ignored file.
692 708 true
693 709 }
694 710 }
695 711 }
696 712
697 713 /// Returns whether that path is ignored
698 714 fn mark_unknown_or_ignored(
699 715 &self,
700 716 has_ignored_ancestor: bool,
701 717 hg_path: &BorrowedPath<'_, 'on_disk>,
702 718 ) -> bool {
703 719 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
704 720 if is_ignored {
705 721 if self.options.list_ignored {
706 722 self.push_outcome_without_copy_source(
707 723 Outcome::Ignored,
708 724 hg_path,
709 725 )
710 726 }
711 727 } else {
712 728 if self.options.list_unknown {
713 729 self.push_outcome_without_copy_source(
714 730 Outcome::Unknown,
715 731 hg_path,
716 732 )
717 733 }
718 734 }
719 735 is_ignored
720 736 }
721 737 }
722 738
723 739 struct DirEntry {
724 740 base_name: HgPathBuf,
725 741 full_path: PathBuf,
726 742 metadata: std::fs::Metadata,
727 743 }
728 744
729 745 impl DirEntry {
730 746 /// Returns **unsorted** entries in the given directory, with name and
731 747 /// metadata.
732 748 ///
733 749 /// If a `.hg` sub-directory is encountered:
734 750 ///
735 751 /// * At the repository root, ignore that sub-directory
736 752 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
737 753 /// list instead.
738 754 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
739 755 let mut results = Vec::new();
740 756 for entry in path.read_dir()? {
741 757 let entry = entry?;
742 758 let metadata = entry.metadata()?;
743 759 let name = get_bytes_from_os_string(entry.file_name());
744 760 // FIXME don't do this when cached
745 761 if name == b".hg" {
746 762 if is_at_repo_root {
747 763 // Skip the repo’s own .hg (might be a symlink)
748 764 continue;
749 765 } else if metadata.is_dir() {
750 766 // A .hg sub-directory at another location means a subrepo,
751 767 // skip it entirely.
752 768 return Ok(Vec::new());
753 769 }
754 770 }
755 771 results.push(DirEntry {
756 772 base_name: name.into(),
757 773 full_path: entry.path(),
758 774 metadata,
759 775 })
760 776 }
761 777 Ok(results)
762 778 }
763 779 }
764 780
765 781 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
766 782 /// of the give repository.
767 783 ///
768 784 /// This is similar to `SystemTime::now()`, with the result truncated to the
769 785 /// same time resolution as other files’ modification times. Using `.hg`
770 786 /// instead of the system’s default temporary directory (such as `/tmp`) makes
771 787 /// it more likely the temporary file is in the same disk partition as contents
772 788 /// of the working directory, which can matter since different filesystems may
773 789 /// store timestamps with different resolutions.
774 790 ///
775 791 /// This may fail, typically if we lack write permissions. In that case we
776 792 /// should continue the `status()` algoritm anyway and consider the current
777 793 /// date/time to be unknown.
778 794 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
779 795 tempfile::tempfile_in(repo_root.join(".hg"))?
780 796 .metadata()?
781 797 .modified()
782 798 }
@@ -1,506 +1,505 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::error::CommandError;
9 9 use crate::ui::Ui;
10 10 use crate::utils::path_utils::RelativizePaths;
11 11 use clap::{Arg, SubCommand};
12 12 use format_bytes::format_bytes;
13 13 use hg;
14 14 use hg::config::Config;
15 15 use hg::dirstate::has_exec_bit;
16 16 use hg::dirstate::status::StatusPath;
17 17 use hg::dirstate::TruncatedTimestamp;
18 18 use hg::dirstate::RANGE_MASK_31BIT;
19 19 use hg::errors::{HgError, IoResultExt};
20 20 use hg::lock::LockError;
21 21 use hg::manifest::Manifest;
22 22 use hg::matchers::AlwaysMatcher;
23 23 use hg::repo::Repo;
24 24 use hg::utils::files::get_bytes_from_os_string;
25 25 use hg::utils::files::get_path_from_bytes;
26 26 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
27 27 use hg::StatusOptions;
28 28 use log::{info, warn};
29 29 use std::io;
30 30 use std::path::PathBuf;
31 31
32 32 pub const HELP_TEXT: &str = "
33 33 Show changed files in the working directory
34 34
35 35 This is a pure Rust version of `hg status`.
36 36
37 37 Some options might be missing, check the list below.
38 38 ";
39 39
40 40 pub fn args() -> clap::App<'static, 'static> {
41 41 SubCommand::with_name("status")
42 42 .alias("st")
43 43 .about(HELP_TEXT)
44 44 .arg(
45 45 Arg::with_name("all")
46 46 .help("show status of all files")
47 47 .short("-A")
48 48 .long("--all"),
49 49 )
50 50 .arg(
51 51 Arg::with_name("modified")
52 52 .help("show only modified files")
53 53 .short("-m")
54 54 .long("--modified"),
55 55 )
56 56 .arg(
57 57 Arg::with_name("added")
58 58 .help("show only added files")
59 59 .short("-a")
60 60 .long("--added"),
61 61 )
62 62 .arg(
63 63 Arg::with_name("removed")
64 64 .help("show only removed files")
65 65 .short("-r")
66 66 .long("--removed"),
67 67 )
68 68 .arg(
69 69 Arg::with_name("clean")
70 70 .help("show only clean files")
71 71 .short("-c")
72 72 .long("--clean"),
73 73 )
74 74 .arg(
75 75 Arg::with_name("deleted")
76 76 .help("show only deleted files")
77 77 .short("-d")
78 78 .long("--deleted"),
79 79 )
80 80 .arg(
81 81 Arg::with_name("unknown")
82 82 .help("show only unknown (not tracked) files")
83 83 .short("-u")
84 84 .long("--unknown"),
85 85 )
86 86 .arg(
87 87 Arg::with_name("ignored")
88 88 .help("show only ignored files")
89 89 .short("-i")
90 90 .long("--ignored"),
91 91 )
92 92 .arg(
93 93 Arg::with_name("copies")
94 94 .help("show source of copied files (DEFAULT: ui.statuscopies)")
95 95 .short("-C")
96 96 .long("--copies"),
97 97 )
98 98 .arg(
99 99 Arg::with_name("no-status")
100 100 .help("hide status prefix")
101 101 .short("-n")
102 102 .long("--no-status"),
103 103 )
104 104 }
105 105
106 106 /// Pure data type allowing the caller to specify file states to display
107 107 #[derive(Copy, Clone, Debug)]
108 108 pub struct DisplayStates {
109 109 pub modified: bool,
110 110 pub added: bool,
111 111 pub removed: bool,
112 112 pub clean: bool,
113 113 pub deleted: bool,
114 114 pub unknown: bool,
115 115 pub ignored: bool,
116 116 }
117 117
118 118 pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
119 119 modified: true,
120 120 added: true,
121 121 removed: true,
122 122 clean: false,
123 123 deleted: true,
124 124 unknown: true,
125 125 ignored: false,
126 126 };
127 127
128 128 pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
129 129 modified: true,
130 130 added: true,
131 131 removed: true,
132 132 clean: true,
133 133 deleted: true,
134 134 unknown: true,
135 135 ignored: true,
136 136 };
137 137
138 138 impl DisplayStates {
139 139 pub fn is_empty(&self) -> bool {
140 140 !(self.modified
141 141 || self.added
142 142 || self.removed
143 143 || self.clean
144 144 || self.deleted
145 145 || self.unknown
146 146 || self.ignored)
147 147 }
148 148 }
149 149
150 150 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
151 151 let status_enabled_default = false;
152 152 let status_enabled = invocation.config.get_option(b"rhg", b"status")?;
153 153 if !status_enabled.unwrap_or(status_enabled_default) {
154 154 return Err(CommandError::unsupported(
155 155 "status is experimental in rhg (enable it with 'rhg.status = true' \
156 156 or enable fallback with 'rhg.on-unsupported = fallback')"
157 157 ));
158 158 }
159 159
160 160 // TODO: lift these limitations
161 161 if invocation.config.get_bool(b"ui", b"tweakdefaults")? {
162 162 return Err(CommandError::unsupported(
163 163 "ui.tweakdefaults is not yet supported with rhg status",
164 164 ));
165 165 }
166 166 if invocation.config.get_bool(b"ui", b"statuscopies")? {
167 167 return Err(CommandError::unsupported(
168 168 "ui.statuscopies is not yet supported with rhg status",
169 169 ));
170 170 }
171 171 if invocation
172 172 .config
173 173 .get(b"commands", b"status.terse")
174 174 .is_some()
175 175 {
176 176 return Err(CommandError::unsupported(
177 177 "status.terse is not yet supported with rhg status",
178 178 ));
179 179 }
180 180
181 181 let ui = invocation.ui;
182 182 let config = invocation.config;
183 183 let args = invocation.subcommand_args;
184 184 let all = args.is_present("all");
185 185 let display_states = if all {
186 186 // TODO when implementing `--quiet`: it excludes clean files
187 187 // from `--all`
188 188 ALL_DISPLAY_STATES
189 189 } else {
190 190 let requested = DisplayStates {
191 191 modified: args.is_present("modified"),
192 192 added: args.is_present("added"),
193 193 removed: args.is_present("removed"),
194 194 clean: args.is_present("clean"),
195 195 deleted: args.is_present("deleted"),
196 196 unknown: args.is_present("unknown"),
197 197 ignored: args.is_present("ignored"),
198 198 };
199 199 if requested.is_empty() {
200 200 DEFAULT_DISPLAY_STATES
201 201 } else {
202 202 requested
203 203 }
204 204 };
205 205 let no_status = args.is_present("no-status");
206 206 let list_copies = all
207 207 || args.is_present("copies")
208 208 || config.get_bool(b"ui", b"statuscopies")?;
209 209
210 210 let repo = invocation.repo?;
211 211
212 212 if repo.has_sparse() || repo.has_narrow() {
213 213 return Err(CommandError::unsupported(
214 214 "rhg status is not supported for sparse checkouts or narrow clones yet"
215 215 ));
216 216 }
217 217
218 218 let mut dmap = repo.dirstate_map_mut()?;
219 219
220 220 let options = StatusOptions {
221 221 // we're currently supporting file systems with exec flags only
222 222 // anyway
223 223 check_exec: true,
224 224 list_clean: display_states.clean,
225 225 list_unknown: display_states.unknown,
226 226 list_ignored: display_states.ignored,
227 227 list_copies,
228 228 collect_traversed_dirs: false,
229 229 };
230 230 let (mut ds_status, pattern_warnings) = dmap.status(
231 231 &AlwaysMatcher,
232 232 repo.working_directory_path().to_owned(),
233 233 ignore_files(repo, config),
234 234 options,
235 235 )?;
236 236 if !pattern_warnings.is_empty() {
237 237 warn!("Pattern warnings: {:?}", &pattern_warnings);
238 238 }
239 239
240 240 for (path, error) in ds_status.bad {
241 241 let error = match error {
242 242 hg::BadMatch::OsError(code) => {
243 243 std::io::Error::from_raw_os_error(code).to_string()
244 244 }
245 245 hg::BadMatch::BadType(ty) => {
246 246 format!("unsupported file type (type is {})", ty)
247 247 }
248 248 };
249 249 ui.write_stderr(&format_bytes!(
250 250 b"{}: {}\n",
251 251 path.as_bytes(),
252 252 error.as_bytes()
253 253 ))?
254 254 }
255 255 if !ds_status.unsure.is_empty() {
256 256 info!(
257 257 "Files to be rechecked by retrieval from filelog: {:?}",
258 258 ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
259 259 );
260 260 }
261 261 let mut fixup = Vec::new();
262 262 if !ds_status.unsure.is_empty()
263 263 && (display_states.modified || display_states.clean)
264 264 {
265 265 let p1 = repo.dirstate_parents()?.p1;
266 266 let manifest = repo.manifest_for_node(p1).map_err(|e| {
267 267 CommandError::from((e, &*format!("{:x}", p1.short())))
268 268 })?;
269 269 for to_check in ds_status.unsure {
270 270 if unsure_is_modified(repo, &manifest, &to_check.path)? {
271 271 if display_states.modified {
272 272 ds_status.modified.push(to_check);
273 273 }
274 274 } else {
275 275 if display_states.clean {
276 276 ds_status.clean.push(to_check.clone());
277 277 }
278 278 fixup.push(to_check.path.into_owned())
279 279 }
280 280 }
281 281 }
282 282 let relative_paths = (!ui.plain())
283 283 && config
284 284 .get_option(b"commands", b"status.relative")?
285 285 .unwrap_or(config.get_bool(b"ui", b"relative-paths")?);
286 286 let output = DisplayStatusPaths {
287 287 ui,
288 288 no_status,
289 289 relativize: if relative_paths {
290 290 Some(RelativizePaths::new(repo)?)
291 291 } else {
292 292 None
293 293 },
294 294 };
295 295 if display_states.modified {
296 296 output.display(b"M", ds_status.modified)?;
297 297 }
298 298 if display_states.added {
299 299 output.display(b"A", ds_status.added)?;
300 300 }
301 301 if display_states.removed {
302 302 output.display(b"R", ds_status.removed)?;
303 303 }
304 304 if display_states.deleted {
305 305 output.display(b"!", ds_status.deleted)?;
306 306 }
307 307 if display_states.unknown {
308 308 output.display(b"?", ds_status.unknown)?;
309 309 }
310 310 if display_states.ignored {
311 311 output.display(b"I", ds_status.ignored)?;
312 312 }
313 313 if display_states.clean {
314 314 output.display(b"C", ds_status.clean)?;
315 315 }
316 316
317 317 let mut dirstate_write_needed = ds_status.dirty;
318 let filesystem_time_at_status_start = ds_status
319 .filesystem_time_at_status_start
320 .map(TruncatedTimestamp::from);
318 let filesystem_time_at_status_start =
319 ds_status.filesystem_time_at_status_start;
321 320
322 321 if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
323 322 && !dirstate_write_needed
324 323 {
325 324 // Nothing to update
326 325 return Ok(());
327 326 }
328 327
329 328 // Update the dirstate on disk if we can
330 329 let with_lock_result =
331 330 repo.try_with_wlock_no_wait(|| -> Result<(), CommandError> {
332 331 if let Some(mtime_boundary) = filesystem_time_at_status_start {
333 332 for hg_path in fixup {
334 333 use std::os::unix::fs::MetadataExt;
335 334 let fs_path = hg_path_to_path_buf(&hg_path)
336 335 .expect("HgPath conversion");
337 336 // Specifically do not reuse `fs_metadata` from
338 337 // `unsure_is_clean` which was needed before reading
339 338 // contents. Here we access metadata again after reading
340 339 // content, in case it changed in the meantime.
341 340 let fs_metadata = repo
342 341 .working_directory_vfs()
343 342 .symlink_metadata(&fs_path)?;
344 343 if let Some(mtime) =
345 344 TruncatedTimestamp::for_reliable_mtime_of(
346 345 &fs_metadata,
347 346 &mtime_boundary,
348 347 )
349 348 .when_reading_file(&fs_path)?
350 349 {
351 350 let mode = fs_metadata.mode();
352 351 let size = fs_metadata.len() as u32 & RANGE_MASK_31BIT;
353 352 let mut entry = dmap
354 353 .get(&hg_path)?
355 354 .expect("ambiguous file not in dirstate");
356 355 entry.set_clean(mode, size, mtime);
357 356 dmap.add_file(&hg_path, entry)?;
358 357 dirstate_write_needed = true
359 358 }
360 359 }
361 360 }
362 361 drop(dmap); // Avoid "already mutably borrowed" RefCell panics
363 362 if dirstate_write_needed {
364 363 repo.write_dirstate()?
365 364 }
366 365 Ok(())
367 366 });
368 367 match with_lock_result {
369 368 Ok(closure_result) => closure_result?,
370 369 Err(LockError::AlreadyHeld) => {
371 370 // Not updating the dirstate is not ideal but not critical:
372 371 // don’t keep our caller waiting until some other Mercurial
373 372 // process releases the lock.
374 373 }
375 374 Err(LockError::Other(HgError::IoError { error, .. }))
376 375 if error.kind() == io::ErrorKind::PermissionDenied =>
377 376 {
378 377 // `hg status` on a read-only repository is fine
379 378 }
380 379 Err(LockError::Other(error)) => {
381 380 // Report other I/O errors
382 381 Err(error)?
383 382 }
384 383 }
385 384 Ok(())
386 385 }
387 386
388 387 fn ignore_files(repo: &Repo, config: &Config) -> Vec<PathBuf> {
389 388 let mut ignore_files = Vec::new();
390 389 let repo_ignore = repo.working_directory_vfs().join(".hgignore");
391 390 if repo_ignore.exists() {
392 391 ignore_files.push(repo_ignore)
393 392 }
394 393 for (key, value) in config.iter_section(b"ui") {
395 394 if key == b"ignore" || key.starts_with(b"ignore.") {
396 395 let path = get_path_from_bytes(value);
397 396 // TODO: expand "~/" and environment variable here, like Python
398 397 // does with `os.path.expanduser` and `os.path.expandvars`
399 398
400 399 let joined = repo.working_directory_path().join(path);
401 400 ignore_files.push(joined);
402 401 }
403 402 }
404 403 ignore_files
405 404 }
406 405
407 406 struct DisplayStatusPaths<'a> {
408 407 ui: &'a Ui,
409 408 no_status: bool,
410 409 relativize: Option<RelativizePaths>,
411 410 }
412 411
413 412 impl DisplayStatusPaths<'_> {
414 413 // Probably more elegant to use a Deref or Borrow trait rather than
415 414 // harcode HgPathBuf, but probably not really useful at this point
416 415 fn display(
417 416 &self,
418 417 status_prefix: &[u8],
419 418 mut paths: Vec<StatusPath<'_>>,
420 419 ) -> Result<(), CommandError> {
421 420 paths.sort_unstable();
422 421 for StatusPath { path, copy_source } in paths {
423 422 let relative;
424 423 let path = if let Some(relativize) = &self.relativize {
425 424 relative = relativize.relativize(&path);
426 425 &*relative
427 426 } else {
428 427 path.as_bytes()
429 428 };
430 429 // TODO optim, probably lots of unneeded copies here, especially
431 430 // if out stream is buffered
432 431 if self.no_status {
433 432 self.ui.write_stdout(&format_bytes!(b"{}\n", path))?
434 433 } else {
435 434 self.ui.write_stdout(&format_bytes!(
436 435 b"{} {}\n",
437 436 status_prefix,
438 437 path
439 438 ))?
440 439 }
441 440 if let Some(source) = copy_source {
442 441 self.ui.write_stdout(&format_bytes!(
443 442 b" {}\n",
444 443 source.as_bytes()
445 444 ))?
446 445 }
447 446 }
448 447 Ok(())
449 448 }
450 449 }
451 450
452 451 /// Check if a file is modified by comparing actual repo store and file system.
453 452 ///
454 453 /// This meant to be used for those that the dirstate cannot resolve, due
455 454 /// to time resolution limits.
456 455 fn unsure_is_modified(
457 456 repo: &Repo,
458 457 manifest: &Manifest,
459 458 hg_path: &HgPath,
460 459 ) -> Result<bool, HgError> {
461 460 let vfs = repo.working_directory_vfs();
462 461 let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
463 462 let fs_metadata = vfs.symlink_metadata(&fs_path)?;
464 463 let is_symlink = fs_metadata.file_type().is_symlink();
465 464 // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
466 465 // dirstate
467 466 let fs_flags = if is_symlink {
468 467 Some(b'l')
469 468 } else if has_exec_bit(&fs_metadata) {
470 469 Some(b'x')
471 470 } else {
472 471 None
473 472 };
474 473
475 474 let entry = manifest
476 475 .find_by_path(hg_path)?
477 476 .expect("ambgious file not in p1");
478 477 if entry.flags != fs_flags {
479 478 return Ok(true);
480 479 }
481 480 let filelog = repo.filelog(hg_path)?;
482 481 let fs_len = fs_metadata.len();
483 482 // TODO: check `fs_len` here like below, but based on
484 483 // `RevlogEntry::uncompressed_len` without decompressing the full filelog
485 484 // contents where possible. This is only valid if the revlog data does not
486 485 // contain metadata. See how Python’s `revlog.rawsize` calls
487 486 // `storageutil.filerevisioncopied`.
488 487 // (Maybe also check for content-modifying flags? See `revlog.size`.)
489 488 let filelog_entry =
490 489 filelog.data_for_node(entry.node_id()?).map_err(|_| {
491 490 HgError::corrupted("filelog missing node from manifest")
492 491 })?;
493 492 let contents_in_p1 = filelog_entry.data()?;
494 493 if contents_in_p1.len() as u64 != fs_len {
495 494 // No need to read the file contents:
496 495 // it cannot be equal if it has a different length.
497 496 return Ok(true);
498 497 }
499 498
500 499 let fs_contents = if is_symlink {
501 500 get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string())
502 501 } else {
503 502 vfs.read(fs_path)?
504 503 };
505 504 Ok(contents_in_p1 != &*fs_contents)
506 505 }
General Comments 0
You need to be logged in to leave comments. Login now