##// END OF EJS Templates
status: Extend read_dir caching to directories with ignored files...
Simon Sapin -
r48269:94e38822 default
parent child Browse files
Show More
@@ -1,600 +1,603 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! # File format
3 //! # File format
4 //!
4 //!
5 //! The file starts with a fixed-sized header, whose layout is defined by the
5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 //! the nodes representing the files and directories at the root of the
7 //! the nodes representing the files and directories at the root of the
8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 //! Nodes in turn contain slices to variable-size paths, and to their own child
9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 //! nodes (if any) for nested files and directories.
10 //! nodes (if any) for nested files and directories.
11
11
12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 use crate::dirstate_tree::path_with_basename::WithBasename;
13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 use crate::errors::HgError;
14 use crate::errors::HgError;
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::DirstateEntry;
16 use crate::DirstateEntry;
17 use crate::DirstateError;
17 use crate::DirstateError;
18 use crate::DirstateParents;
18 use crate::DirstateParents;
19 use crate::EntryState;
19 use crate::EntryState;
20 use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
20 use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
21 use bytes_cast::BytesCast;
21 use bytes_cast::BytesCast;
22 use std::borrow::Cow;
22 use std::borrow::Cow;
23 use std::convert::TryFrom;
23 use std::convert::TryFrom;
24 use std::time::{Duration, SystemTime, UNIX_EPOCH};
24 use std::time::{Duration, SystemTime, UNIX_EPOCH};
25
25
26 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
26 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
27 /// This a redundant sanity check more than an actual "magic number" since
27 /// This a redundant sanity check more than an actual "magic number" since
28 /// `.hg/requires` already governs which format should be used.
28 /// `.hg/requires` already governs which format should be used.
29 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
29 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
30
30
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33
33
34 #[derive(BytesCast)]
34 #[derive(BytesCast)]
35 #[repr(C)]
35 #[repr(C)]
36 struct Header {
36 struct Header {
37 marker: [u8; V2_FORMAT_MARKER.len()],
37 marker: [u8; V2_FORMAT_MARKER.len()],
38
38
39 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
39 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
40 /// `parents` field being at this offset, immediately after `marker`.
40 /// `parents` field being at this offset, immediately after `marker`.
41 parents: DirstateParents,
41 parents: DirstateParents,
42
42
43 root: ChildNodes,
43 root: ChildNodes,
44 nodes_with_entry_count: Size,
44 nodes_with_entry_count: Size,
45 nodes_with_copy_source_count: Size,
45 nodes_with_copy_source_count: Size,
46
46
47 /// If non-zero, a hash of ignore files that were used for some previous
47 /// If non-zero, a hash of ignore files that were used for some previous
48 /// run of the `status` algorithm.
48 /// run of the `status` algorithm.
49 ///
49 ///
50 /// We define:
50 /// We define:
51 ///
51 ///
52 /// * "Root" ignore files are `.hgignore` at the root of the repository if
52 /// * "Root" ignore files are `.hgignore` at the root of the repository if
53 /// it exists, and files from `ui.ignore.*` config. This set of files is
53 /// it exists, and files from `ui.ignore.*` config. This set of files is
54 /// then sorted by the string representation of their path.
54 /// then sorted by the string representation of their path.
55 /// * The "expanded contents" of an ignore files is the byte string made
55 /// * The "expanded contents" of an ignore files is the byte string made
56 /// by concatenating its contents with the "expanded contents" of other
56 /// by concatenating its contents with the "expanded contents" of other
57 /// files included with `include:` or `subinclude:` files, in inclusion
57 /// files included with `include:` or `subinclude:` files, in inclusion
58 /// order. This definition is recursive, as included files can
58 /// order. This definition is recursive, as included files can
59 /// themselves include more files.
59 /// themselves include more files.
60 ///
60 ///
61 /// This hash is defined as the SHA-1 of the concatenation (in sorted
61 /// This hash is defined as the SHA-1 of the concatenation (in sorted
62 /// order) of the "expanded contents" of each "root" ignore file.
62 /// order) of the "expanded contents" of each "root" ignore file.
63 /// (Note that computing this does not require actually concatenating byte
63 /// (Note that computing this does not require actually concatenating byte
64 /// strings into contiguous memory, instead SHA-1 hashing can be done
64 /// strings into contiguous memory, instead SHA-1 hashing can be done
65 /// incrementally.)
65 /// incrementally.)
66 ignore_patterns_hash: IgnorePatternsHash,
66 ignore_patterns_hash: IgnorePatternsHash,
67 }
67 }
68
68
69 #[derive(BytesCast)]
69 #[derive(BytesCast)]
70 #[repr(C)]
70 #[repr(C)]
71 pub(super) struct Node {
71 pub(super) struct Node {
72 full_path: PathSlice,
72 full_path: PathSlice,
73
73
74 /// In bytes from `self.full_path.start`
74 /// In bytes from `self.full_path.start`
75 base_name_start: Size,
75 base_name_start: Size,
76
76
77 copy_source: OptPathSlice,
77 copy_source: OptPathSlice,
78 children: ChildNodes,
78 children: ChildNodes,
79 pub(super) tracked_descendants_count: Size,
79 pub(super) tracked_descendants_count: Size,
80
80
81 /// Dependending on the value of `state`:
81 /// Dependending on the value of `state`:
82 ///
82 ///
83 /// * A null byte: `data` is not used.
83 /// * A null byte: `data` is not used.
84 ///
84 ///
85 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
85 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
86 /// represent a dirstate entry like in the v1 format.
86 /// represent a dirstate entry like in the v1 format.
87 ///
87 ///
88 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
88 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
89 /// as the `Timestamp` for the mtime of a cached directory.
89 /// as the `Timestamp` for the mtime of a cached directory.
90 ///
90 ///
91 /// The presence of this state means that at some point, this path in
91 /// The presence of this state means that at some point, this path in
92 /// the working directory was observed:
92 /// the working directory was observed:
93 ///
93 ///
94 /// - To be a directory
94 /// - To be a directory
95 /// - With the modification time as given by `Timestamp`
95 /// - With the modification time as given by `Timestamp`
96 /// - That timestamp was already strictly in the past when observed,
96 /// - That timestamp was already strictly in the past when observed,
97 /// meaning that later changes cannot happen in the same clock tick
97 /// meaning that later changes cannot happen in the same clock tick
98 /// and must cause a different modification time (unless the system
98 /// and must cause a different modification time (unless the system
99 /// clock jumps back and we get unlucky, which is not impossible but
99 /// clock jumps back and we get unlucky, which is not impossible but
100 /// but deemed unlikely enough).
100 /// but deemed unlikely enough).
101 /// - The directory did not contain any child entry that did not have a
101 /// - All direct children of this directory (as returned by
102 /// corresponding dirstate node.
102 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
103 /// are ignored by ignore patterns whose hash is in
104 /// `Header::ignore_patterns_hash`.
103 ///
105 ///
104 /// This means that if `std::fs::symlink_metadata` later reports the
106 /// This means that if `std::fs::symlink_metadata` later reports the
105 /// same modification time, we don’t need to call `std::fs::read_dir`
107 /// same modification time and ignored patterns haven’t changed, a run
106 /// again for this directory and can iterate child dirstate nodes
108 /// of status that is not listing ignored files can skip calling
107 /// instead.
109 /// `std::fs::read_dir` again for this directory, iterate child
110 /// dirstate nodes instead.
108 state: u8,
111 state: u8,
109 data: Entry,
112 data: Entry,
110 }
113 }
111
114
112 #[derive(BytesCast, Copy, Clone)]
115 #[derive(BytesCast, Copy, Clone)]
113 #[repr(C)]
116 #[repr(C)]
114 struct Entry {
117 struct Entry {
115 mode: I32Be,
118 mode: I32Be,
116 mtime: I32Be,
119 mtime: I32Be,
117 size: I32Be,
120 size: I32Be,
118 }
121 }
119
122
120 /// Duration since the Unix epoch
123 /// Duration since the Unix epoch
121 #[derive(BytesCast, Copy, Clone, PartialEq)]
124 #[derive(BytesCast, Copy, Clone, PartialEq)]
122 #[repr(C)]
125 #[repr(C)]
123 pub(super) struct Timestamp {
126 pub(super) struct Timestamp {
124 seconds: I64Be,
127 seconds: I64Be,
125
128
126 /// In `0 .. 1_000_000_000`.
129 /// In `0 .. 1_000_000_000`.
127 ///
130 ///
128 /// This timestamp is later or earlier than `(seconds, 0)` by this many
131 /// This timestamp is later or earlier than `(seconds, 0)` by this many
129 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
132 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
130 nanoseconds: U32Be,
133 nanoseconds: U32Be,
131 }
134 }
132
135
133 /// Counted in bytes from the start of the file
136 /// Counted in bytes from the start of the file
134 ///
137 ///
135 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
138 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
136 /// we could save space by using `U32Be` instead.
139 /// we could save space by using `U32Be` instead.
137 type Offset = U64Be;
140 type Offset = U64Be;
138
141
139 /// Counted in number of items
142 /// Counted in number of items
140 ///
143 ///
141 /// NOTE: not supporting directories with more than 4 billion direct children,
144 /// NOTE: not supporting directories with more than 4 billion direct children,
142 /// or filenames more than 4 GiB.
145 /// or filenames more than 4 GiB.
143 type Size = U32Be;
146 type Size = U32Be;
144
147
145 /// Location of consecutive, fixed-size items.
148 /// Location of consecutive, fixed-size items.
146 ///
149 ///
147 /// An item can be a single byte for paths, or a struct with
150 /// An item can be a single byte for paths, or a struct with
148 /// `derive(BytesCast)`.
151 /// `derive(BytesCast)`.
149 #[derive(BytesCast, Copy, Clone)]
152 #[derive(BytesCast, Copy, Clone)]
150 #[repr(C)]
153 #[repr(C)]
151 struct Slice {
154 struct Slice {
152 start: Offset,
155 start: Offset,
153 len: Size,
156 len: Size,
154 }
157 }
155
158
156 /// A contiguous sequence of `len` times `Node`, representing the child nodes
159 /// A contiguous sequence of `len` times `Node`, representing the child nodes
157 /// of either some other node or of the repository root.
160 /// of either some other node or of the repository root.
158 ///
161 ///
159 /// Always sorted by ascending `full_path`, to allow binary search.
162 /// Always sorted by ascending `full_path`, to allow binary search.
160 /// Since nodes with the same parent nodes also have the same parent path,
163 /// Since nodes with the same parent nodes also have the same parent path,
161 /// only the `base_name`s need to be compared during binary search.
164 /// only the `base_name`s need to be compared during binary search.
162 type ChildNodes = Slice;
165 type ChildNodes = Slice;
163
166
164 /// A `HgPath` of `len` bytes
167 /// A `HgPath` of `len` bytes
165 type PathSlice = Slice;
168 type PathSlice = Slice;
166
169
167 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
168 type OptPathSlice = Slice;
171 type OptPathSlice = Slice;
169
172
170 /// Make sure that size-affecting changes are made knowingly
173 /// Make sure that size-affecting changes are made knowingly
171 fn _static_assert_size_of() {
174 fn _static_assert_size_of() {
172 let _ = std::mem::transmute::<Header, [u8; 92]>;
175 let _ = std::mem::transmute::<Header, [u8; 92]>;
173 let _ = std::mem::transmute::<Node, [u8; 57]>;
176 let _ = std::mem::transmute::<Node, [u8; 57]>;
174 }
177 }
175
178
176 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
179 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
177 ///
180 ///
178 /// This should only happen if Mercurial is buggy or a repository is corrupted.
181 /// This should only happen if Mercurial is buggy or a repository is corrupted.
179 #[derive(Debug)]
182 #[derive(Debug)]
180 pub struct DirstateV2ParseError;
183 pub struct DirstateV2ParseError;
181
184
182 impl From<DirstateV2ParseError> for HgError {
185 impl From<DirstateV2ParseError> for HgError {
183 fn from(_: DirstateV2ParseError) -> Self {
186 fn from(_: DirstateV2ParseError) -> Self {
184 HgError::corrupted("dirstate-v2 parse error")
187 HgError::corrupted("dirstate-v2 parse error")
185 }
188 }
186 }
189 }
187
190
188 impl From<DirstateV2ParseError> for crate::DirstateError {
191 impl From<DirstateV2ParseError> for crate::DirstateError {
189 fn from(error: DirstateV2ParseError) -> Self {
192 fn from(error: DirstateV2ParseError) -> Self {
190 HgError::from(error).into()
193 HgError::from(error).into()
191 }
194 }
192 }
195 }
193
196
194 fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> {
197 fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> {
195 let (header, _) =
198 let (header, _) =
196 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
199 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
197 if header.marker == *V2_FORMAT_MARKER {
200 if header.marker == *V2_FORMAT_MARKER {
198 Ok(header)
201 Ok(header)
199 } else {
202 } else {
200 Err(DirstateV2ParseError)
203 Err(DirstateV2ParseError)
201 }
204 }
202 }
205 }
203
206
204 pub(super) fn read<'on_disk>(
207 pub(super) fn read<'on_disk>(
205 on_disk: &'on_disk [u8],
208 on_disk: &'on_disk [u8],
206 ) -> Result<
209 ) -> Result<
207 (DirstateMap<'on_disk>, Option<DirstateParents>),
210 (DirstateMap<'on_disk>, Option<DirstateParents>),
208 DirstateV2ParseError,
211 DirstateV2ParseError,
209 > {
212 > {
210 if on_disk.is_empty() {
213 if on_disk.is_empty() {
211 return Ok((DirstateMap::empty(on_disk), None));
214 return Ok((DirstateMap::empty(on_disk), None));
212 }
215 }
213 let header = read_header(on_disk)?;
216 let header = read_header(on_disk)?;
214 let dirstate_map = DirstateMap {
217 let dirstate_map = DirstateMap {
215 on_disk,
218 on_disk,
216 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
219 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
217 on_disk,
220 on_disk,
218 header.root,
221 header.root,
219 )?),
222 )?),
220 nodes_with_entry_count: header.nodes_with_entry_count.get(),
223 nodes_with_entry_count: header.nodes_with_entry_count.get(),
221 nodes_with_copy_source_count: header
224 nodes_with_copy_source_count: header
222 .nodes_with_copy_source_count
225 .nodes_with_copy_source_count
223 .get(),
226 .get(),
224 ignore_patterns_hash: header.ignore_patterns_hash,
227 ignore_patterns_hash: header.ignore_patterns_hash,
225 };
228 };
226 let parents = Some(header.parents.clone());
229 let parents = Some(header.parents.clone());
227 Ok((dirstate_map, parents))
230 Ok((dirstate_map, parents))
228 }
231 }
229
232
230 impl Node {
233 impl Node {
231 pub(super) fn full_path<'on_disk>(
234 pub(super) fn full_path<'on_disk>(
232 &self,
235 &self,
233 on_disk: &'on_disk [u8],
236 on_disk: &'on_disk [u8],
234 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
237 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
235 read_hg_path(on_disk, self.full_path)
238 read_hg_path(on_disk, self.full_path)
236 }
239 }
237
240
238 pub(super) fn base_name_start<'on_disk>(
241 pub(super) fn base_name_start<'on_disk>(
239 &self,
242 &self,
240 ) -> Result<usize, DirstateV2ParseError> {
243 ) -> Result<usize, DirstateV2ParseError> {
241 let start = self.base_name_start.get();
244 let start = self.base_name_start.get();
242 if start < self.full_path.len.get() {
245 if start < self.full_path.len.get() {
243 let start = usize::try_from(start)
246 let start = usize::try_from(start)
244 // u32 -> usize, could only panic on a 16-bit CPU
247 // u32 -> usize, could only panic on a 16-bit CPU
245 .expect("dirstate-v2 base_name_start out of bounds");
248 .expect("dirstate-v2 base_name_start out of bounds");
246 Ok(start)
249 Ok(start)
247 } else {
250 } else {
248 Err(DirstateV2ParseError)
251 Err(DirstateV2ParseError)
249 }
252 }
250 }
253 }
251
254
252 pub(super) fn base_name<'on_disk>(
255 pub(super) fn base_name<'on_disk>(
253 &self,
256 &self,
254 on_disk: &'on_disk [u8],
257 on_disk: &'on_disk [u8],
255 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
258 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
256 let full_path = self.full_path(on_disk)?;
259 let full_path = self.full_path(on_disk)?;
257 let base_name_start = self.base_name_start()?;
260 let base_name_start = self.base_name_start()?;
258 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
261 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
259 }
262 }
260
263
261 pub(super) fn path<'on_disk>(
264 pub(super) fn path<'on_disk>(
262 &self,
265 &self,
263 on_disk: &'on_disk [u8],
266 on_disk: &'on_disk [u8],
264 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
267 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
265 Ok(WithBasename::from_raw_parts(
268 Ok(WithBasename::from_raw_parts(
266 Cow::Borrowed(self.full_path(on_disk)?),
269 Cow::Borrowed(self.full_path(on_disk)?),
267 self.base_name_start()?,
270 self.base_name_start()?,
268 ))
271 ))
269 }
272 }
270
273
271 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
274 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
272 self.copy_source.start.get() != 0
275 self.copy_source.start.get() != 0
273 }
276 }
274
277
275 pub(super) fn copy_source<'on_disk>(
278 pub(super) fn copy_source<'on_disk>(
276 &self,
279 &self,
277 on_disk: &'on_disk [u8],
280 on_disk: &'on_disk [u8],
278 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
281 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
279 Ok(if self.has_copy_source() {
282 Ok(if self.has_copy_source() {
280 Some(read_hg_path(on_disk, self.copy_source)?)
283 Some(read_hg_path(on_disk, self.copy_source)?)
281 } else {
284 } else {
282 None
285 None
283 })
286 })
284 }
287 }
285
288
286 pub(super) fn node_data(
289 pub(super) fn node_data(
287 &self,
290 &self,
288 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
291 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
289 let entry = |state| {
292 let entry = |state| {
290 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
293 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
291 };
294 };
292
295
293 match self.state {
296 match self.state {
294 b'\0' => Ok(dirstate_map::NodeData::None),
297 b'\0' => Ok(dirstate_map::NodeData::None),
295 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
298 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
296 mtime: *self.data.as_timestamp(),
299 mtime: *self.data.as_timestamp(),
297 }),
300 }),
298 b'n' => Ok(entry(EntryState::Normal)),
301 b'n' => Ok(entry(EntryState::Normal)),
299 b'a' => Ok(entry(EntryState::Added)),
302 b'a' => Ok(entry(EntryState::Added)),
300 b'r' => Ok(entry(EntryState::Removed)),
303 b'r' => Ok(entry(EntryState::Removed)),
301 b'm' => Ok(entry(EntryState::Merged)),
304 b'm' => Ok(entry(EntryState::Merged)),
302 _ => Err(DirstateV2ParseError),
305 _ => Err(DirstateV2ParseError),
303 }
306 }
304 }
307 }
305
308
306 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
309 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
307 if self.state == b'd' {
310 if self.state == b'd' {
308 Some(self.data.as_timestamp())
311 Some(self.data.as_timestamp())
309 } else {
312 } else {
310 None
313 None
311 }
314 }
312 }
315 }
313
316
314 pub(super) fn state(
317 pub(super) fn state(
315 &self,
318 &self,
316 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
319 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
317 match self.state {
320 match self.state {
318 b'\0' | b'd' => Ok(None),
321 b'\0' | b'd' => Ok(None),
319 b'n' => Ok(Some(EntryState::Normal)),
322 b'n' => Ok(Some(EntryState::Normal)),
320 b'a' => Ok(Some(EntryState::Added)),
323 b'a' => Ok(Some(EntryState::Added)),
321 b'r' => Ok(Some(EntryState::Removed)),
324 b'r' => Ok(Some(EntryState::Removed)),
322 b'm' => Ok(Some(EntryState::Merged)),
325 b'm' => Ok(Some(EntryState::Merged)),
323 _ => Err(DirstateV2ParseError),
326 _ => Err(DirstateV2ParseError),
324 }
327 }
325 }
328 }
326
329
327 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
330 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
328 DirstateEntry {
331 DirstateEntry {
329 state,
332 state,
330 mode: self.data.mode.get(),
333 mode: self.data.mode.get(),
331 mtime: self.data.mtime.get(),
334 mtime: self.data.mtime.get(),
332 size: self.data.size.get(),
335 size: self.data.size.get(),
333 }
336 }
334 }
337 }
335
338
336 pub(super) fn entry(
339 pub(super) fn entry(
337 &self,
340 &self,
338 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
341 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
339 Ok(self
342 Ok(self
340 .state()?
343 .state()?
341 .map(|state| self.entry_with_given_state(state)))
344 .map(|state| self.entry_with_given_state(state)))
342 }
345 }
343
346
344 pub(super) fn children<'on_disk>(
347 pub(super) fn children<'on_disk>(
345 &self,
348 &self,
346 on_disk: &'on_disk [u8],
349 on_disk: &'on_disk [u8],
347 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
350 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
348 read_slice::<Node>(on_disk, self.children)
351 read_slice::<Node>(on_disk, self.children)
349 }
352 }
350
353
351 pub(super) fn to_in_memory_node<'on_disk>(
354 pub(super) fn to_in_memory_node<'on_disk>(
352 &self,
355 &self,
353 on_disk: &'on_disk [u8],
356 on_disk: &'on_disk [u8],
354 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
357 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
355 Ok(dirstate_map::Node {
358 Ok(dirstate_map::Node {
356 children: dirstate_map::ChildNodes::OnDisk(
359 children: dirstate_map::ChildNodes::OnDisk(
357 self.children(on_disk)?,
360 self.children(on_disk)?,
358 ),
361 ),
359 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
362 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
360 data: self.node_data()?,
363 data: self.node_data()?,
361 tracked_descendants_count: self.tracked_descendants_count.get(),
364 tracked_descendants_count: self.tracked_descendants_count.get(),
362 })
365 })
363 }
366 }
364 }
367 }
365
368
366 impl Entry {
369 impl Entry {
367 fn from_timestamp(timestamp: Timestamp) -> Self {
370 fn from_timestamp(timestamp: Timestamp) -> Self {
368 // Safety: both types implement the `ByteCast` trait, so we could
371 // Safety: both types implement the `ByteCast` trait, so we could
369 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
372 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
370 // `transmute` instead makes the compiler check that the two types
373 // `transmute` instead makes the compiler check that the two types
371 // have the same size, which eliminates the error case of
374 // have the same size, which eliminates the error case of
372 // `from_bytes`.
375 // `from_bytes`.
373 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
376 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
374 }
377 }
375
378
376 fn as_timestamp(&self) -> &Timestamp {
379 fn as_timestamp(&self) -> &Timestamp {
377 // Safety: same as above in `from_timestamp`
380 // Safety: same as above in `from_timestamp`
378 unsafe { &*(self as *const Entry as *const Timestamp) }
381 unsafe { &*(self as *const Entry as *const Timestamp) }
379 }
382 }
380 }
383 }
381
384
382 impl Timestamp {
385 impl Timestamp {
383 pub fn seconds(&self) -> i64 {
386 pub fn seconds(&self) -> i64 {
384 self.seconds.get()
387 self.seconds.get()
385 }
388 }
386 }
389 }
387
390
388 impl From<SystemTime> for Timestamp {
391 impl From<SystemTime> for Timestamp {
389 fn from(system_time: SystemTime) -> Self {
392 fn from(system_time: SystemTime) -> Self {
390 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
393 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
391 Ok(duration) => {
394 Ok(duration) => {
392 (duration.as_secs() as i64, duration.subsec_nanos())
395 (duration.as_secs() as i64, duration.subsec_nanos())
393 }
396 }
394 Err(error) => {
397 Err(error) => {
395 let negative = error.duration();
398 let negative = error.duration();
396 (-(negative.as_secs() as i64), negative.subsec_nanos())
399 (-(negative.as_secs() as i64), negative.subsec_nanos())
397 }
400 }
398 };
401 };
399 Timestamp {
402 Timestamp {
400 seconds: secs.into(),
403 seconds: secs.into(),
401 nanoseconds: nanos.into(),
404 nanoseconds: nanos.into(),
402 }
405 }
403 }
406 }
404 }
407 }
405
408
406 impl From<&'_ Timestamp> for SystemTime {
409 impl From<&'_ Timestamp> for SystemTime {
407 fn from(timestamp: &'_ Timestamp) -> Self {
410 fn from(timestamp: &'_ Timestamp) -> Self {
408 let secs = timestamp.seconds.get();
411 let secs = timestamp.seconds.get();
409 let nanos = timestamp.nanoseconds.get();
412 let nanos = timestamp.nanoseconds.get();
410 if secs >= 0 {
413 if secs >= 0 {
411 UNIX_EPOCH + Duration::new(secs as u64, nanos)
414 UNIX_EPOCH + Duration::new(secs as u64, nanos)
412 } else {
415 } else {
413 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
416 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
414 }
417 }
415 }
418 }
416 }
419 }
417
420
418 fn read_hg_path(
421 fn read_hg_path(
419 on_disk: &[u8],
422 on_disk: &[u8],
420 slice: Slice,
423 slice: Slice,
421 ) -> Result<&HgPath, DirstateV2ParseError> {
424 ) -> Result<&HgPath, DirstateV2ParseError> {
422 let bytes = read_slice::<u8>(on_disk, slice)?;
425 let bytes = read_slice::<u8>(on_disk, slice)?;
423 Ok(HgPath::new(bytes))
426 Ok(HgPath::new(bytes))
424 }
427 }
425
428
426 fn read_slice<T>(
429 fn read_slice<T>(
427 on_disk: &[u8],
430 on_disk: &[u8],
428 slice: Slice,
431 slice: Slice,
429 ) -> Result<&[T], DirstateV2ParseError>
432 ) -> Result<&[T], DirstateV2ParseError>
430 where
433 where
431 T: BytesCast,
434 T: BytesCast,
432 {
435 {
433 // Either `usize::MAX` would result in "out of bounds" error since a single
436 // Either `usize::MAX` would result in "out of bounds" error since a single
434 // `&[u8]` cannot occupy the entire addess space.
437 // `&[u8]` cannot occupy the entire addess space.
435 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
438 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
436 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
439 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
437 on_disk
440 on_disk
438 .get(start..)
441 .get(start..)
439 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
442 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
440 .map(|(slice, _rest)| slice)
443 .map(|(slice, _rest)| slice)
441 .ok_or_else(|| DirstateV2ParseError)
444 .ok_or_else(|| DirstateV2ParseError)
442 }
445 }
443
446
444 pub(crate) fn parse_dirstate_parents(
447 pub(crate) fn parse_dirstate_parents(
445 on_disk: &[u8],
448 on_disk: &[u8],
446 ) -> Result<&DirstateParents, HgError> {
449 ) -> Result<&DirstateParents, HgError> {
447 Ok(&read_header(on_disk)?.parents)
450 Ok(&read_header(on_disk)?.parents)
448 }
451 }
449
452
450 pub(crate) fn for_each_tracked_path<'on_disk>(
453 pub(crate) fn for_each_tracked_path<'on_disk>(
451 on_disk: &'on_disk [u8],
454 on_disk: &'on_disk [u8],
452 mut f: impl FnMut(&'on_disk HgPath),
455 mut f: impl FnMut(&'on_disk HgPath),
453 ) -> Result<(), DirstateV2ParseError> {
456 ) -> Result<(), DirstateV2ParseError> {
454 let header = read_header(on_disk)?;
457 let header = read_header(on_disk)?;
455 fn recur<'on_disk>(
458 fn recur<'on_disk>(
456 on_disk: &'on_disk [u8],
459 on_disk: &'on_disk [u8],
457 nodes: Slice,
460 nodes: Slice,
458 f: &mut impl FnMut(&'on_disk HgPath),
461 f: &mut impl FnMut(&'on_disk HgPath),
459 ) -> Result<(), DirstateV2ParseError> {
462 ) -> Result<(), DirstateV2ParseError> {
460 for node in read_slice::<Node>(on_disk, nodes)? {
463 for node in read_slice::<Node>(on_disk, nodes)? {
461 if let Some(state) = node.state()? {
464 if let Some(state) = node.state()? {
462 if state.is_tracked() {
465 if state.is_tracked() {
463 f(node.full_path(on_disk)?)
466 f(node.full_path(on_disk)?)
464 }
467 }
465 }
468 }
466 recur(on_disk, node.children, f)?
469 recur(on_disk, node.children, f)?
467 }
470 }
468 Ok(())
471 Ok(())
469 }
472 }
470 recur(on_disk, header.root, &mut f)
473 recur(on_disk, header.root, &mut f)
471 }
474 }
472
475
473 pub(super) fn write(
476 pub(super) fn write(
474 dirstate_map: &mut DirstateMap,
477 dirstate_map: &mut DirstateMap,
475 parents: DirstateParents,
478 parents: DirstateParents,
476 ) -> Result<Vec<u8>, DirstateError> {
479 ) -> Result<Vec<u8>, DirstateError> {
477 let header_len = std::mem::size_of::<Header>();
480 let header_len = std::mem::size_of::<Header>();
478
481
479 // This ignores the space for paths, and for nodes without an entry.
482 // This ignores the space for paths, and for nodes without an entry.
480 // TODO: better estimate? Skip the `Vec` and write to a file directly?
483 // TODO: better estimate? Skip the `Vec` and write to a file directly?
481 let size_guess = header_len
484 let size_guess = header_len
482 + std::mem::size_of::<Node>()
485 + std::mem::size_of::<Node>()
483 * dirstate_map.nodes_with_entry_count as usize;
486 * dirstate_map.nodes_with_entry_count as usize;
484 let mut out = Vec::with_capacity(size_guess);
487 let mut out = Vec::with_capacity(size_guess);
485
488
486 // Keep space for the header. We’ll fill it out at the end when we know the
489 // Keep space for the header. We’ll fill it out at the end when we know the
487 // actual offset for the root nodes.
490 // actual offset for the root nodes.
488 out.resize(header_len, 0_u8);
491 out.resize(header_len, 0_u8);
489
492
490 let root =
493 let root =
491 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
494 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
492
495
493 let header = Header {
496 let header = Header {
494 marker: *V2_FORMAT_MARKER,
497 marker: *V2_FORMAT_MARKER,
495 parents: parents,
498 parents: parents,
496 root,
499 root,
497 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
500 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
498 nodes_with_copy_source_count: dirstate_map
501 nodes_with_copy_source_count: dirstate_map
499 .nodes_with_copy_source_count
502 .nodes_with_copy_source_count
500 .into(),
503 .into(),
501 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
504 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
502 };
505 };
503 out[..header_len].copy_from_slice(header.as_bytes());
506 out[..header_len].copy_from_slice(header.as_bytes());
504 Ok(out)
507 Ok(out)
505 }
508 }
506
509
507 fn write_nodes(
510 fn write_nodes(
508 dirstate_map: &DirstateMap,
511 dirstate_map: &DirstateMap,
509 nodes: dirstate_map::ChildNodesRef,
512 nodes: dirstate_map::ChildNodesRef,
510 out: &mut Vec<u8>,
513 out: &mut Vec<u8>,
511 ) -> Result<ChildNodes, DirstateError> {
514 ) -> Result<ChildNodes, DirstateError> {
512 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
515 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
513 // order. Sort to enable binary search in the written file.
516 // order. Sort to enable binary search in the written file.
514 let nodes = nodes.sorted();
517 let nodes = nodes.sorted();
515
518
516 // First accumulate serialized nodes in a `Vec`
519 // First accumulate serialized nodes in a `Vec`
517 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
520 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
518 for node in nodes {
521 for node in nodes {
519 let children = write_nodes(
522 let children = write_nodes(
520 dirstate_map,
523 dirstate_map,
521 node.children(dirstate_map.on_disk)?,
524 node.children(dirstate_map.on_disk)?,
522 out,
525 out,
523 )?;
526 )?;
524 let full_path = node.full_path(dirstate_map.on_disk)?;
527 let full_path = node.full_path(dirstate_map.on_disk)?;
525 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
528 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
526 let copy_source =
529 let copy_source =
527 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
530 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
528 write_slice::<u8>(source.as_bytes(), out)
531 write_slice::<u8>(source.as_bytes(), out)
529 } else {
532 } else {
530 Slice {
533 Slice {
531 start: 0.into(),
534 start: 0.into(),
532 len: 0.into(),
535 len: 0.into(),
533 }
536 }
534 };
537 };
535 on_disk_nodes.push(match node {
538 on_disk_nodes.push(match node {
536 NodeRef::InMemory(path, node) => {
539 NodeRef::InMemory(path, node) => {
537 let (state, data) = match &node.data {
540 let (state, data) = match &node.data {
538 dirstate_map::NodeData::Entry(entry) => (
541 dirstate_map::NodeData::Entry(entry) => (
539 entry.state.into(),
542 entry.state.into(),
540 Entry {
543 Entry {
541 mode: entry.mode.into(),
544 mode: entry.mode.into(),
542 mtime: entry.mtime.into(),
545 mtime: entry.mtime.into(),
543 size: entry.size.into(),
546 size: entry.size.into(),
544 },
547 },
545 ),
548 ),
546 dirstate_map::NodeData::CachedDirectory { mtime } => {
549 dirstate_map::NodeData::CachedDirectory { mtime } => {
547 (b'd', Entry::from_timestamp(*mtime))
550 (b'd', Entry::from_timestamp(*mtime))
548 }
551 }
549 dirstate_map::NodeData::None => (
552 dirstate_map::NodeData::None => (
550 b'\0',
553 b'\0',
551 Entry {
554 Entry {
552 mode: 0.into(),
555 mode: 0.into(),
553 mtime: 0.into(),
556 mtime: 0.into(),
554 size: 0.into(),
557 size: 0.into(),
555 },
558 },
556 ),
559 ),
557 };
560 };
558 Node {
561 Node {
559 children,
562 children,
560 copy_source,
563 copy_source,
561 full_path,
564 full_path,
562 base_name_start: u32::try_from(path.base_name_start())
565 base_name_start: u32::try_from(path.base_name_start())
563 // Could only panic for paths over 4 GiB
566 // Could only panic for paths over 4 GiB
564 .expect("dirstate-v2 offset overflow")
567 .expect("dirstate-v2 offset overflow")
565 .into(),
568 .into(),
566 tracked_descendants_count: node
569 tracked_descendants_count: node
567 .tracked_descendants_count
570 .tracked_descendants_count
568 .into(),
571 .into(),
569 state,
572 state,
570 data,
573 data,
571 }
574 }
572 }
575 }
573 NodeRef::OnDisk(node) => Node {
576 NodeRef::OnDisk(node) => Node {
574 children,
577 children,
575 copy_source,
578 copy_source,
576 full_path,
579 full_path,
577 ..*node
580 ..*node
578 },
581 },
579 })
582 })
580 }
583 }
581 // … so we can write them contiguously
584 // … so we can write them contiguously
582 Ok(write_slice::<Node>(&on_disk_nodes, out))
585 Ok(write_slice::<Node>(&on_disk_nodes, out))
583 }
586 }
584
587
585 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
588 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
586 where
589 where
587 T: BytesCast,
590 T: BytesCast,
588 {
591 {
589 let start = u64::try_from(out.len())
592 let start = u64::try_from(out.len())
590 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
593 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
591 .expect("dirstate-v2 offset overflow")
594 .expect("dirstate-v2 offset overflow")
592 .into();
595 .into();
593 let len = u32::try_from(slice.len())
596 let len = u32::try_from(slice.len())
594 // Could only panic for paths over 4 GiB or nodes with over 4 billions
597 // Could only panic for paths over 4 GiB or nodes with over 4 billions
595 // child nodes
598 // child nodes
596 .expect("dirstate-v2 offset overflow")
599 .expect("dirstate-v2 offset overflow")
597 .into();
600 .into();
598 out.extend(slice.as_bytes());
601 out.extend(slice.as_bytes());
599 Slice { start, len }
602 Slice { start, len }
600 }
603 }
@@ -1,728 +1,758 b''
1 use crate::dirstate::status::IgnoreFnType;
1 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate_tree::dirstate_map::BorrowedPath;
2 use crate::dirstate_tree::dirstate_map::BorrowedPath;
3 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
3 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
4 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 use crate::dirstate_tree::dirstate_map::DirstateMap;
5 use crate::dirstate_tree::dirstate_map::NodeData;
5 use crate::dirstate_tree::dirstate_map::NodeData;
6 use crate::dirstate_tree::dirstate_map::NodeRef;
6 use crate::dirstate_tree::dirstate_map::NodeRef;
7 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
7 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
8 use crate::dirstate_tree::on_disk::Timestamp;
8 use crate::dirstate_tree::on_disk::Timestamp;
9 use crate::matchers::get_ignore_function;
9 use crate::matchers::get_ignore_function;
10 use crate::matchers::Matcher;
10 use crate::matchers::Matcher;
11 use crate::utils::files::get_bytes_from_os_string;
11 use crate::utils::files::get_bytes_from_os_string;
12 use crate::utils::files::get_path_from_bytes;
12 use crate::utils::files::get_path_from_bytes;
13 use crate::utils::hg_path::HgPath;
13 use crate::utils::hg_path::HgPath;
14 use crate::BadMatch;
14 use crate::BadMatch;
15 use crate::DirstateStatus;
15 use crate::DirstateStatus;
16 use crate::EntryState;
16 use crate::EntryState;
17 use crate::HgPathBuf;
17 use crate::HgPathBuf;
18 use crate::PatternFileWarning;
18 use crate::PatternFileWarning;
19 use crate::StatusError;
19 use crate::StatusError;
20 use crate::StatusOptions;
20 use crate::StatusOptions;
21 use micro_timer::timed;
21 use micro_timer::timed;
22 use rayon::prelude::*;
22 use rayon::prelude::*;
23 use sha1::{Digest, Sha1};
23 use sha1::{Digest, Sha1};
24 use std::borrow::Cow;
24 use std::borrow::Cow;
25 use std::io;
25 use std::io;
26 use std::path::Path;
26 use std::path::Path;
27 use std::path::PathBuf;
27 use std::path::PathBuf;
28 use std::sync::Mutex;
28 use std::sync::Mutex;
29 use std::time::SystemTime;
29 use std::time::SystemTime;
30
30
31 /// Returns the status of the working directory compared to its parent
31 /// Returns the status of the working directory compared to its parent
32 /// changeset.
32 /// changeset.
33 ///
33 ///
34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 /// and variable names) and dirstate tree at the same time. The core of this
35 /// and variable names) and dirstate tree at the same time. The core of this
36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 /// exists in one of the two trees, depending on information requested by
38 /// exists in one of the two trees, depending on information requested by
39 /// `options` we may need to traverse the remaining subtree.
39 /// `options` we may need to traverse the remaining subtree.
40 #[timed]
40 #[timed]
41 pub fn status<'tree, 'on_disk: 'tree>(
41 pub fn status<'tree, 'on_disk: 'tree>(
42 dmap: &'tree mut DirstateMap<'on_disk>,
42 dmap: &'tree mut DirstateMap<'on_disk>,
43 matcher: &(dyn Matcher + Sync),
43 matcher: &(dyn Matcher + Sync),
44 root_dir: PathBuf,
44 root_dir: PathBuf,
45 ignore_files: Vec<PathBuf>,
45 ignore_files: Vec<PathBuf>,
46 options: StatusOptions,
46 options: StatusOptions,
47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
49 if options.list_ignored || options.list_unknown {
49 if options.list_ignored || options.list_unknown {
50 let mut hasher = Sha1::new();
50 let mut hasher = Sha1::new();
51 let (ignore_fn, warnings) = get_ignore_function(
51 let (ignore_fn, warnings) = get_ignore_function(
52 ignore_files,
52 ignore_files,
53 &root_dir,
53 &root_dir,
54 &mut |pattern_bytes| hasher.update(pattern_bytes),
54 &mut |pattern_bytes| hasher.update(pattern_bytes),
55 )?;
55 )?;
56 let new_hash = *hasher.finalize().as_ref();
56 let new_hash = *hasher.finalize().as_ref();
57 let changed = new_hash != dmap.ignore_patterns_hash;
57 let changed = new_hash != dmap.ignore_patterns_hash;
58 dmap.ignore_patterns_hash = new_hash;
58 dmap.ignore_patterns_hash = new_hash;
59 (ignore_fn, warnings, Some(changed))
59 (ignore_fn, warnings, Some(changed))
60 } else {
60 } else {
61 (Box::new(|&_| true), vec![], None)
61 (Box::new(|&_| true), vec![], None)
62 };
62 };
63
63
64 let common = StatusCommon {
64 let common = StatusCommon {
65 dmap,
65 dmap,
66 options,
66 options,
67 matcher,
67 matcher,
68 ignore_fn,
68 ignore_fn,
69 outcome: Default::default(),
69 outcome: Default::default(),
70 ignore_patterns_have_changed: patterns_changed,
70 ignore_patterns_have_changed: patterns_changed,
71 new_cachable_directories: Default::default(),
71 new_cachable_directories: Default::default(),
72 outated_cached_directories: Default::default(),
72 outated_cached_directories: Default::default(),
73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
74 };
74 };
75 let is_at_repo_root = true;
75 let is_at_repo_root = true;
76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
77 let has_ignored_ancestor = false;
77 let has_ignored_ancestor = false;
78 let root_cached_mtime = None;
78 let root_cached_mtime = None;
79 let root_dir_metadata = None;
79 let root_dir_metadata = None;
80 // If the path we have for the repository root is a symlink, do follow it.
80 // If the path we have for the repository root is a symlink, do follow it.
81 // (As opposed to symlinks within the working directory which are not
81 // (As opposed to symlinks within the working directory which are not
82 // followed, using `std::fs::symlink_metadata`.)
82 // followed, using `std::fs::symlink_metadata`.)
83 common.traverse_fs_directory_and_dirstate(
83 common.traverse_fs_directory_and_dirstate(
84 has_ignored_ancestor,
84 has_ignored_ancestor,
85 dmap.root.as_ref(),
85 dmap.root.as_ref(),
86 hg_path,
86 hg_path,
87 &root_dir,
87 &root_dir,
88 root_dir_metadata,
88 root_dir_metadata,
89 root_cached_mtime,
89 root_cached_mtime,
90 is_at_repo_root,
90 is_at_repo_root,
91 )?;
91 )?;
92 let mut outcome = common.outcome.into_inner().unwrap();
92 let mut outcome = common.outcome.into_inner().unwrap();
93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
94 let outdated = common.outated_cached_directories.into_inner().unwrap();
94 let outdated = common.outated_cached_directories.into_inner().unwrap();
95
95
96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
97 || !outdated.is_empty()
97 || !outdated.is_empty()
98 || !new_cachable.is_empty();
98 || !new_cachable.is_empty();
99
99
100 // Remove outdated mtimes before adding new mtimes, in case a given
100 // Remove outdated mtimes before adding new mtimes, in case a given
101 // directory is both
101 // directory is both
102 for path in &outdated {
102 for path in &outdated {
103 let node = dmap.get_or_insert(path)?;
103 let node = dmap.get_or_insert(path)?;
104 if let NodeData::CachedDirectory { .. } = &node.data {
104 if let NodeData::CachedDirectory { .. } = &node.data {
105 node.data = NodeData::None
105 node.data = NodeData::None
106 }
106 }
107 }
107 }
108 for (path, mtime) in &new_cachable {
108 for (path, mtime) in &new_cachable {
109 let node = dmap.get_or_insert(path)?;
109 let node = dmap.get_or_insert(path)?;
110 match &node.data {
110 match &node.data {
111 NodeData::Entry(_) => {} // Don’t overwrite an entry
111 NodeData::Entry(_) => {} // Don’t overwrite an entry
112 NodeData::CachedDirectory { .. } | NodeData::None => {
112 NodeData::CachedDirectory { .. } | NodeData::None => {
113 node.data = NodeData::CachedDirectory { mtime: *mtime }
113 node.data = NodeData::CachedDirectory { mtime: *mtime }
114 }
114 }
115 }
115 }
116 }
116 }
117
117
118 Ok((outcome, warnings))
118 Ok((outcome, warnings))
119 }
119 }
120
120
121 /// Bag of random things needed by various parts of the algorithm. Reduces the
121 /// Bag of random things needed by various parts of the algorithm. Reduces the
122 /// number of parameters passed to functions.
122 /// number of parameters passed to functions.
123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
124 dmap: &'tree DirstateMap<'on_disk>,
124 dmap: &'tree DirstateMap<'on_disk>,
125 options: StatusOptions,
125 options: StatusOptions,
126 matcher: &'a (dyn Matcher + Sync),
126 matcher: &'a (dyn Matcher + Sync),
127 ignore_fn: IgnoreFnType<'a>,
127 ignore_fn: IgnoreFnType<'a>,
128 outcome: Mutex<DirstateStatus<'on_disk>>,
128 outcome: Mutex<DirstateStatus<'on_disk>>,
129 new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
129 new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
130 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
130 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
131
131
132 /// Whether ignore files like `.hgignore` have changed since the previous
132 /// Whether ignore files like `.hgignore` have changed since the previous
133 /// time a `status()` call wrote their hash to the dirstate. `None` means
133 /// time a `status()` call wrote their hash to the dirstate. `None` means
134 /// we don’t know as this run doesn’t list either ignored or uknown files
134 /// we don’t know as this run doesn’t list either ignored or uknown files
135 /// and therefore isn’t reading `.hgignore`.
135 /// and therefore isn’t reading `.hgignore`.
136 ignore_patterns_have_changed: Option<bool>,
136 ignore_patterns_have_changed: Option<bool>,
137
137
138 /// The current time at the start of the `status()` algorithm, as measured
138 /// The current time at the start of the `status()` algorithm, as measured
139 /// and possibly truncated by the filesystem.
139 /// and possibly truncated by the filesystem.
140 filesystem_time_at_status_start: Option<SystemTime>,
140 filesystem_time_at_status_start: Option<SystemTime>,
141 }
141 }
142
142
143 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
143 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
144 fn read_dir(
144 fn read_dir(
145 &self,
145 &self,
146 hg_path: &HgPath,
146 hg_path: &HgPath,
147 fs_path: &Path,
147 fs_path: &Path,
148 is_at_repo_root: bool,
148 is_at_repo_root: bool,
149 ) -> Result<Vec<DirEntry>, ()> {
149 ) -> Result<Vec<DirEntry>, ()> {
150 DirEntry::read_dir(fs_path, is_at_repo_root)
150 DirEntry::read_dir(fs_path, is_at_repo_root)
151 .map_err(|error| self.io_error(error, hg_path))
151 .map_err(|error| self.io_error(error, hg_path))
152 }
152 }
153
153
154 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
154 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
155 let errno = error.raw_os_error().expect("expected real OS error");
155 let errno = error.raw_os_error().expect("expected real OS error");
156 self.outcome
156 self.outcome
157 .lock()
157 .lock()
158 .unwrap()
158 .unwrap()
159 .bad
159 .bad
160 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
160 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
161 }
161 }
162
162
163 fn check_for_outdated_directory_cache(
163 fn check_for_outdated_directory_cache(
164 &self,
164 &self,
165 dirstate_node: &NodeRef<'tree, 'on_disk>,
165 dirstate_node: &NodeRef<'tree, 'on_disk>,
166 ) -> Result<(), DirstateV2ParseError> {
166 ) -> Result<(), DirstateV2ParseError> {
167 if self.ignore_patterns_have_changed == Some(true)
167 if self.ignore_patterns_have_changed == Some(true)
168 && dirstate_node.cached_directory_mtime().is_some()
168 && dirstate_node.cached_directory_mtime().is_some()
169 {
169 {
170 self.outated_cached_directories.lock().unwrap().push(
170 self.outated_cached_directories.lock().unwrap().push(
171 dirstate_node
171 dirstate_node
172 .full_path_borrowed(self.dmap.on_disk)?
172 .full_path_borrowed(self.dmap.on_disk)?
173 .detach_from_tree(),
173 .detach_from_tree(),
174 )
174 )
175 }
175 }
176 Ok(())
176 Ok(())
177 }
177 }
178
178
179 /// If this returns true, we can get accurate results by only using
179 /// If this returns true, we can get accurate results by only using
180 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
180 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
181 /// need to call `read_dir`.
181 /// need to call `read_dir`.
182 fn can_skip_fs_readdir(
182 fn can_skip_fs_readdir(
183 &self,
183 &self,
184 directory_metadata: Option<&std::fs::Metadata>,
184 directory_metadata: Option<&std::fs::Metadata>,
185 cached_directory_mtime: Option<&Timestamp>,
185 cached_directory_mtime: Option<&Timestamp>,
186 ) -> bool {
186 ) -> bool {
187 if !self.options.list_unknown && !self.options.list_ignored {
187 if !self.options.list_unknown && !self.options.list_ignored {
188 // All states that we care about listing have corresponding
188 // All states that we care about listing have corresponding
189 // dirstate entries.
189 // dirstate entries.
190 // This happens for example with `hg status -mard`.
190 // This happens for example with `hg status -mard`.
191 return true;
191 return true;
192 }
192 }
193 if let Some(cached_mtime) = cached_directory_mtime {
193 if !self.options.list_ignored
194 // The dirstate contains a cached mtime for this directory, set by
194 && self.ignore_patterns_have_changed == Some(false)
195 // a previous run of the `status` algorithm which found this
195 {
196 // directory eligible for `read_dir` caching.
196 if let Some(cached_mtime) = cached_directory_mtime {
197 if let Some(meta) = directory_metadata {
197 // The dirstate contains a cached mtime for this directory, set
198 if let Ok(current_mtime) = meta.modified() {
198 // by a previous run of the `status` algorithm which found this
199 if current_mtime == cached_mtime.into() {
199 // directory eligible for `read_dir` caching.
200 // The mtime of that directory has not changed since
200 if let Some(meta) = directory_metadata {
201 // then, which means that the
201 if let Ok(current_mtime) = meta.modified() {
202 // results of `read_dir` should also
202 if current_mtime == cached_mtime.into() {
203 // be unchanged.
203 // The mtime of that directory has not changed
204 return true;
204 // since then, which means that the results of
205 // `read_dir` should also be unchanged.
206 return true;
207 }
205 }
208 }
206 }
209 }
207 }
210 }
208 }
211 }
209 false
212 false
210 }
213 }
211
214
212 /// Returns whether the filesystem directory was found to have any entry
215 /// Returns whether all child entries of the filesystem directory have a
213 /// that does not have a corresponding dirstate tree node.
216 /// corresponding dirstate node or are ignored.
214 fn traverse_fs_directory_and_dirstate(
217 fn traverse_fs_directory_and_dirstate(
215 &self,
218 &self,
216 has_ignored_ancestor: bool,
219 has_ignored_ancestor: bool,
217 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
220 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
218 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
221 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
219 directory_fs_path: &Path,
222 directory_fs_path: &Path,
220 directory_metadata: Option<&std::fs::Metadata>,
223 directory_metadata: Option<&std::fs::Metadata>,
221 cached_directory_mtime: Option<&Timestamp>,
224 cached_directory_mtime: Option<&Timestamp>,
222 is_at_repo_root: bool,
225 is_at_repo_root: bool,
223 ) -> Result<bool, DirstateV2ParseError> {
226 ) -> Result<bool, DirstateV2ParseError> {
224 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
227 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
225 {
228 {
226 dirstate_nodes
229 dirstate_nodes
227 .par_iter()
230 .par_iter()
228 .map(|dirstate_node| {
231 .map(|dirstate_node| {
229 let fs_path = directory_fs_path.join(get_path_from_bytes(
232 let fs_path = directory_fs_path.join(get_path_from_bytes(
230 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
233 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
231 ));
234 ));
232 match std::fs::symlink_metadata(&fs_path) {
235 match std::fs::symlink_metadata(&fs_path) {
233 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
236 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
234 &fs_path,
237 &fs_path,
235 &fs_metadata,
238 &fs_metadata,
236 dirstate_node,
239 dirstate_node,
237 has_ignored_ancestor,
240 has_ignored_ancestor,
238 ),
241 ),
239 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
242 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
240 self.traverse_dirstate_only(dirstate_node)
243 self.traverse_dirstate_only(dirstate_node)
241 }
244 }
242 Err(error) => {
245 Err(error) => {
243 let hg_path =
246 let hg_path =
244 dirstate_node.full_path(self.dmap.on_disk)?;
247 dirstate_node.full_path(self.dmap.on_disk)?;
245 Ok(self.io_error(error, hg_path))
248 Ok(self.io_error(error, hg_path))
246 }
249 }
247 }
250 }
248 })
251 })
249 .collect::<Result<_, _>>()?;
252 .collect::<Result<_, _>>()?;
250
253
251 // Conservatively don’t let the caller assume that there aren’t
254 // We don’t know, so conservatively say this isn’t the case
252 // any, since we don’t know.
255 let children_all_have_dirstate_node_or_are_ignored = false;
253 let directory_has_any_fs_only_entry = true;
254
256
255 return Ok(directory_has_any_fs_only_entry);
257 return Ok(children_all_have_dirstate_node_or_are_ignored);
256 }
258 }
257
259
258 let mut fs_entries = if let Ok(entries) = self.read_dir(
260 let mut fs_entries = if let Ok(entries) = self.read_dir(
259 directory_hg_path,
261 directory_hg_path,
260 directory_fs_path,
262 directory_fs_path,
261 is_at_repo_root,
263 is_at_repo_root,
262 ) {
264 ) {
263 entries
265 entries
264 } else {
266 } else {
265 // Treat an unreadable directory (typically because of insufficient
267 // Treat an unreadable directory (typically because of insufficient
266 // permissions) like an empty directory. `self.read_dir` has
268 // permissions) like an empty directory. `self.read_dir` has
267 // already called `self.io_error` so a warning will be emitted.
269 // already called `self.io_error` so a warning will be emitted.
268 Vec::new()
270 Vec::new()
269 };
271 };
270
272
271 // `merge_join_by` requires both its input iterators to be sorted:
273 // `merge_join_by` requires both its input iterators to be sorted:
272
274
273 let dirstate_nodes = dirstate_nodes.sorted();
275 let dirstate_nodes = dirstate_nodes.sorted();
274 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
276 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
275 // https://github.com/rust-lang/rust/issues/34162
277 // https://github.com/rust-lang/rust/issues/34162
276 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
278 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
277
279
278 // Propagate here any error that would happen inside the comparison
280 // Propagate here any error that would happen inside the comparison
279 // callback below
281 // callback below
280 for dirstate_node in &dirstate_nodes {
282 for dirstate_node in &dirstate_nodes {
281 dirstate_node.base_name(self.dmap.on_disk)?;
283 dirstate_node.base_name(self.dmap.on_disk)?;
282 }
284 }
283 itertools::merge_join_by(
285 itertools::merge_join_by(
284 dirstate_nodes,
286 dirstate_nodes,
285 &fs_entries,
287 &fs_entries,
286 |dirstate_node, fs_entry| {
288 |dirstate_node, fs_entry| {
287 // This `unwrap` never panics because we already propagated
289 // This `unwrap` never panics because we already propagated
288 // those errors above
290 // those errors above
289 dirstate_node
291 dirstate_node
290 .base_name(self.dmap.on_disk)
292 .base_name(self.dmap.on_disk)
291 .unwrap()
293 .unwrap()
292 .cmp(&fs_entry.base_name)
294 .cmp(&fs_entry.base_name)
293 },
295 },
294 )
296 )
295 .par_bridge()
297 .par_bridge()
296 .map(|pair| {
298 .map(|pair| {
297 use itertools::EitherOrBoth::*;
299 use itertools::EitherOrBoth::*;
298 let is_fs_only = pair.is_right();
300 let has_dirstate_node_or_is_ignored;
299 match pair {
301 match pair {
300 Both(dirstate_node, fs_entry) => self
302 Both(dirstate_node, fs_entry) => {
301 .traverse_fs_and_dirstate(
303 self.traverse_fs_and_dirstate(
302 &fs_entry.full_path,
304 &fs_entry.full_path,
303 &fs_entry.metadata,
305 &fs_entry.metadata,
304 dirstate_node,
306 dirstate_node,
305 has_ignored_ancestor,
307 has_ignored_ancestor,
306 )?,
308 )?;
309 has_dirstate_node_or_is_ignored = true
310 }
307 Left(dirstate_node) => {
311 Left(dirstate_node) => {
308 self.traverse_dirstate_only(dirstate_node)?
312 self.traverse_dirstate_only(dirstate_node)?;
313 has_dirstate_node_or_is_ignored = true;
309 }
314 }
310 Right(fs_entry) => self.traverse_fs_only(
315 Right(fs_entry) => {
311 has_ignored_ancestor,
316 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
312 directory_hg_path,
317 has_ignored_ancestor,
313 fs_entry,
318 directory_hg_path,
314 ),
319 fs_entry,
320 )
321 }
315 }
322 }
316 Ok(is_fs_only)
323 Ok(has_dirstate_node_or_is_ignored)
317 })
324 })
318 .try_reduce(|| false, |a, b| Ok(a || b))
325 .try_reduce(|| true, |a, b| Ok(a && b))
319 }
326 }
320
327
321 fn traverse_fs_and_dirstate(
328 fn traverse_fs_and_dirstate(
322 &self,
329 &self,
323 fs_path: &Path,
330 fs_path: &Path,
324 fs_metadata: &std::fs::Metadata,
331 fs_metadata: &std::fs::Metadata,
325 dirstate_node: NodeRef<'tree, 'on_disk>,
332 dirstate_node: NodeRef<'tree, 'on_disk>,
326 has_ignored_ancestor: bool,
333 has_ignored_ancestor: bool,
327 ) -> Result<(), DirstateV2ParseError> {
334 ) -> Result<(), DirstateV2ParseError> {
328 self.check_for_outdated_directory_cache(&dirstate_node)?;
335 self.check_for_outdated_directory_cache(&dirstate_node)?;
329 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
336 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
330 let file_type = fs_metadata.file_type();
337 let file_type = fs_metadata.file_type();
331 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
338 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
332 if !file_or_symlink {
339 if !file_or_symlink {
333 // If we previously had a file here, it was removed (with
340 // If we previously had a file here, it was removed (with
334 // `hg rm` or similar) or deleted before it could be
341 // `hg rm` or similar) or deleted before it could be
335 // replaced by a directory or something else.
342 // replaced by a directory or something else.
336 self.mark_removed_or_deleted_if_file(
343 self.mark_removed_or_deleted_if_file(
337 &hg_path,
344 &hg_path,
338 dirstate_node.state()?,
345 dirstate_node.state()?,
339 );
346 );
340 }
347 }
341 if file_type.is_dir() {
348 if file_type.is_dir() {
342 if self.options.collect_traversed_dirs {
349 if self.options.collect_traversed_dirs {
343 self.outcome
350 self.outcome
344 .lock()
351 .lock()
345 .unwrap()
352 .unwrap()
346 .traversed
353 .traversed
347 .push(hg_path.detach_from_tree())
354 .push(hg_path.detach_from_tree())
348 }
355 }
349 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
356 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
350 let is_at_repo_root = false;
357 let is_at_repo_root = false;
351 let directory_has_any_fs_only_entry = self
358 let children_all_have_dirstate_node_or_are_ignored = self
352 .traverse_fs_directory_and_dirstate(
359 .traverse_fs_directory_and_dirstate(
353 is_ignored,
360 is_ignored,
354 dirstate_node.children(self.dmap.on_disk)?,
361 dirstate_node.children(self.dmap.on_disk)?,
355 hg_path,
362 hg_path,
356 fs_path,
363 fs_path,
357 Some(fs_metadata),
364 Some(fs_metadata),
358 dirstate_node.cached_directory_mtime(),
365 dirstate_node.cached_directory_mtime(),
359 is_at_repo_root,
366 is_at_repo_root,
360 )?;
367 )?;
361 self.maybe_save_directory_mtime(
368 self.maybe_save_directory_mtime(
362 directory_has_any_fs_only_entry,
369 children_all_have_dirstate_node_or_are_ignored,
363 fs_metadata,
370 fs_metadata,
364 dirstate_node,
371 dirstate_node,
365 )?
372 )?
366 } else {
373 } else {
367 if file_or_symlink && self.matcher.matches(hg_path) {
374 if file_or_symlink && self.matcher.matches(hg_path) {
368 if let Some(state) = dirstate_node.state()? {
375 if let Some(state) = dirstate_node.state()? {
369 match state {
376 match state {
370 EntryState::Added => self
377 EntryState::Added => self
371 .outcome
378 .outcome
372 .lock()
379 .lock()
373 .unwrap()
380 .unwrap()
374 .added
381 .added
375 .push(hg_path.detach_from_tree()),
382 .push(hg_path.detach_from_tree()),
376 EntryState::Removed => self
383 EntryState::Removed => self
377 .outcome
384 .outcome
378 .lock()
385 .lock()
379 .unwrap()
386 .unwrap()
380 .removed
387 .removed
381 .push(hg_path.detach_from_tree()),
388 .push(hg_path.detach_from_tree()),
382 EntryState::Merged => self
389 EntryState::Merged => self
383 .outcome
390 .outcome
384 .lock()
391 .lock()
385 .unwrap()
392 .unwrap()
386 .modified
393 .modified
387 .push(hg_path.detach_from_tree()),
394 .push(hg_path.detach_from_tree()),
388 EntryState::Normal => self
395 EntryState::Normal => self
389 .handle_normal_file(&dirstate_node, fs_metadata)?,
396 .handle_normal_file(&dirstate_node, fs_metadata)?,
390 // This variant is not used in DirstateMap
397 // This variant is not used in DirstateMap
391 // nodes
398 // nodes
392 EntryState::Unknown => unreachable!(),
399 EntryState::Unknown => unreachable!(),
393 }
400 }
394 } else {
401 } else {
395 // `node.entry.is_none()` indicates a "directory"
402 // `node.entry.is_none()` indicates a "directory"
396 // node, but the filesystem has a file
403 // node, but the filesystem has a file
397 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path)
404 self.mark_unknown_or_ignored(
405 has_ignored_ancestor,
406 hg_path,
407 );
398 }
408 }
399 }
409 }
400
410
401 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
411 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
402 {
412 {
403 self.traverse_dirstate_only(child_node)?
413 self.traverse_dirstate_only(child_node)?
404 }
414 }
405 }
415 }
406 Ok(())
416 Ok(())
407 }
417 }
408
418
409 fn maybe_save_directory_mtime(
419 fn maybe_save_directory_mtime(
410 &self,
420 &self,
411 directory_has_any_fs_only_entry: bool,
421 children_all_have_dirstate_node_or_are_ignored: bool,
412 directory_metadata: &std::fs::Metadata,
422 directory_metadata: &std::fs::Metadata,
413 dirstate_node: NodeRef<'tree, 'on_disk>,
423 dirstate_node: NodeRef<'tree, 'on_disk>,
414 ) -> Result<(), DirstateV2ParseError> {
424 ) -> Result<(), DirstateV2ParseError> {
415 if !directory_has_any_fs_only_entry {
425 if children_all_have_dirstate_node_or_are_ignored {
416 // All filesystem directory entries from `read_dir` have a
426 // All filesystem directory entries from `read_dir` have a
417 // corresponding node in the dirstate, so we can reconstitute the
427 // corresponding node in the dirstate, so we can reconstitute the
418 // names of those entries without calling `read_dir` again.
428 // names of those entries without calling `read_dir` again.
419 if let (Some(status_start), Ok(directory_mtime)) = (
429 if let (Some(status_start), Ok(directory_mtime)) = (
420 &self.filesystem_time_at_status_start,
430 &self.filesystem_time_at_status_start,
421 directory_metadata.modified(),
431 directory_metadata.modified(),
422 ) {
432 ) {
423 // Although the Rust standard library’s `SystemTime` type
433 // Although the Rust standard library’s `SystemTime` type
424 // has nanosecond precision, the times reported for a
434 // has nanosecond precision, the times reported for a
425 // directory’s (or file’s) modified time may have lower
435 // directory’s (or file’s) modified time may have lower
426 // resolution based on the filesystem (for example ext3
436 // resolution based on the filesystem (for example ext3
427 // only stores integer seconds), kernel (see
437 // only stores integer seconds), kernel (see
428 // https://stackoverflow.com/a/14393315/1162888), etc.
438 // https://stackoverflow.com/a/14393315/1162888), etc.
429 if &directory_mtime >= status_start {
439 if &directory_mtime >= status_start {
430 // The directory was modified too recently, don’t cache its
440 // The directory was modified too recently, don’t cache its
431 // `read_dir` results.
441 // `read_dir` results.
432 //
442 //
433 // A timeline like this is possible:
443 // A timeline like this is possible:
434 //
444 //
435 // 1. A change to this directory (direct child was
445 // 1. A change to this directory (direct child was
436 // added or removed) cause its mtime to be set
446 // added or removed) cause its mtime to be set
437 // (possibly truncated) to `directory_mtime`
447 // (possibly truncated) to `directory_mtime`
438 // 2. This `status` algorithm calls `read_dir`
448 // 2. This `status` algorithm calls `read_dir`
439 // 3. An other change is made to the same directory is
449 // 3. An other change is made to the same directory is
440 // made so that calling `read_dir` agin would give
450 // made so that calling `read_dir` agin would give
441 // different results, but soon enough after 1. that
451 // different results, but soon enough after 1. that
442 // the mtime stays the same
452 // the mtime stays the same
443 //
453 //
444 // On a system where the time resolution poor, this
454 // On a system where the time resolution poor, this
445 // scenario is not unlikely if all three steps are caused
455 // scenario is not unlikely if all three steps are caused
446 // by the same script.
456 // by the same script.
447 } else {
457 } else {
448 // We’ve observed (through `status_start`) that time has
458 // We’ve observed (through `status_start`) that time has
449 // “progressed” since `directory_mtime`, so any further
459 // “progressed” since `directory_mtime`, so any further
450 // change to this directory is extremely likely to cause a
460 // change to this directory is extremely likely to cause a
451 // different mtime.
461 // different mtime.
452 //
462 //
453 // Having the same mtime again is not entirely impossible
463 // Having the same mtime again is not entirely impossible
454 // since the system clock is not monotonous. It could jump
464 // since the system clock is not monotonous. It could jump
455 // backward to some point before `directory_mtime`, then a
465 // backward to some point before `directory_mtime`, then a
456 // directory change could potentially happen during exactly
466 // directory change could potentially happen during exactly
457 // the wrong tick.
467 // the wrong tick.
458 //
468 //
459 // We deem this scenario (unlike the previous one) to be
469 // We deem this scenario (unlike the previous one) to be
460 // unlikely enough in practice.
470 // unlikely enough in practice.
461 let timestamp = directory_mtime.into();
471 let timestamp = directory_mtime.into();
462 let cached = dirstate_node.cached_directory_mtime();
472 let cached = dirstate_node.cached_directory_mtime();
463 if cached != Some(&timestamp) {
473 if cached != Some(&timestamp) {
464 let hg_path = dirstate_node
474 let hg_path = dirstate_node
465 .full_path_borrowed(self.dmap.on_disk)?
475 .full_path_borrowed(self.dmap.on_disk)?
466 .detach_from_tree();
476 .detach_from_tree();
467 self.new_cachable_directories
477 self.new_cachable_directories
468 .lock()
478 .lock()
469 .unwrap()
479 .unwrap()
470 .push((hg_path, timestamp))
480 .push((hg_path, timestamp))
471 }
481 }
472 }
482 }
473 }
483 }
474 }
484 }
475 Ok(())
485 Ok(())
476 }
486 }
477
487
478 /// A file with `EntryState::Normal` in the dirstate was found in the
488 /// A file with `EntryState::Normal` in the dirstate was found in the
479 /// filesystem
489 /// filesystem
480 fn handle_normal_file(
490 fn handle_normal_file(
481 &self,
491 &self,
482 dirstate_node: &NodeRef<'tree, 'on_disk>,
492 dirstate_node: &NodeRef<'tree, 'on_disk>,
483 fs_metadata: &std::fs::Metadata,
493 fs_metadata: &std::fs::Metadata,
484 ) -> Result<(), DirstateV2ParseError> {
494 ) -> Result<(), DirstateV2ParseError> {
485 // Keep the low 31 bits
495 // Keep the low 31 bits
486 fn truncate_u64(value: u64) -> i32 {
496 fn truncate_u64(value: u64) -> i32 {
487 (value & 0x7FFF_FFFF) as i32
497 (value & 0x7FFF_FFFF) as i32
488 }
498 }
489 fn truncate_i64(value: i64) -> i32 {
499 fn truncate_i64(value: i64) -> i32 {
490 (value & 0x7FFF_FFFF) as i32
500 (value & 0x7FFF_FFFF) as i32
491 }
501 }
492
502
493 let entry = dirstate_node
503 let entry = dirstate_node
494 .entry()?
504 .entry()?
495 .expect("handle_normal_file called with entry-less node");
505 .expect("handle_normal_file called with entry-less node");
496 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
506 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
497 let mode_changed =
507 let mode_changed =
498 || self.options.check_exec && entry.mode_changed(fs_metadata);
508 || self.options.check_exec && entry.mode_changed(fs_metadata);
499 let size_changed = entry.size != truncate_u64(fs_metadata.len());
509 let size_changed = entry.size != truncate_u64(fs_metadata.len());
500 if entry.size >= 0
510 if entry.size >= 0
501 && size_changed
511 && size_changed
502 && fs_metadata.file_type().is_symlink()
512 && fs_metadata.file_type().is_symlink()
503 {
513 {
504 // issue6456: Size returned may be longer due to encryption
514 // issue6456: Size returned may be longer due to encryption
505 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
515 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
506 self.outcome
516 self.outcome
507 .lock()
517 .lock()
508 .unwrap()
518 .unwrap()
509 .unsure
519 .unsure
510 .push(hg_path.detach_from_tree())
520 .push(hg_path.detach_from_tree())
511 } else if dirstate_node.has_copy_source()
521 } else if dirstate_node.has_copy_source()
512 || entry.is_from_other_parent()
522 || entry.is_from_other_parent()
513 || (entry.size >= 0 && (size_changed || mode_changed()))
523 || (entry.size >= 0 && (size_changed || mode_changed()))
514 {
524 {
515 self.outcome
525 self.outcome
516 .lock()
526 .lock()
517 .unwrap()
527 .unwrap()
518 .modified
528 .modified
519 .push(hg_path.detach_from_tree())
529 .push(hg_path.detach_from_tree())
520 } else {
530 } else {
521 let mtime = mtime_seconds(fs_metadata);
531 let mtime = mtime_seconds(fs_metadata);
522 if truncate_i64(mtime) != entry.mtime
532 if truncate_i64(mtime) != entry.mtime
523 || mtime == self.options.last_normal_time
533 || mtime == self.options.last_normal_time
524 {
534 {
525 self.outcome
535 self.outcome
526 .lock()
536 .lock()
527 .unwrap()
537 .unwrap()
528 .unsure
538 .unsure
529 .push(hg_path.detach_from_tree())
539 .push(hg_path.detach_from_tree())
530 } else if self.options.list_clean {
540 } else if self.options.list_clean {
531 self.outcome
541 self.outcome
532 .lock()
542 .lock()
533 .unwrap()
543 .unwrap()
534 .clean
544 .clean
535 .push(hg_path.detach_from_tree())
545 .push(hg_path.detach_from_tree())
536 }
546 }
537 }
547 }
538 Ok(())
548 Ok(())
539 }
549 }
540
550
541 /// A node in the dirstate tree has no corresponding filesystem entry
551 /// A node in the dirstate tree has no corresponding filesystem entry
542 fn traverse_dirstate_only(
552 fn traverse_dirstate_only(
543 &self,
553 &self,
544 dirstate_node: NodeRef<'tree, 'on_disk>,
554 dirstate_node: NodeRef<'tree, 'on_disk>,
545 ) -> Result<(), DirstateV2ParseError> {
555 ) -> Result<(), DirstateV2ParseError> {
546 self.check_for_outdated_directory_cache(&dirstate_node)?;
556 self.check_for_outdated_directory_cache(&dirstate_node)?;
547 self.mark_removed_or_deleted_if_file(
557 self.mark_removed_or_deleted_if_file(
548 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
558 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
549 dirstate_node.state()?,
559 dirstate_node.state()?,
550 );
560 );
551 dirstate_node
561 dirstate_node
552 .children(self.dmap.on_disk)?
562 .children(self.dmap.on_disk)?
553 .par_iter()
563 .par_iter()
554 .map(|child_node| self.traverse_dirstate_only(child_node))
564 .map(|child_node| self.traverse_dirstate_only(child_node))
555 .collect()
565 .collect()
556 }
566 }
557
567
558 /// A node in the dirstate tree has no corresponding *file* on the
568 /// A node in the dirstate tree has no corresponding *file* on the
559 /// filesystem
569 /// filesystem
560 ///
570 ///
561 /// Does nothing on a "directory" node
571 /// Does nothing on a "directory" node
562 fn mark_removed_or_deleted_if_file(
572 fn mark_removed_or_deleted_if_file(
563 &self,
573 &self,
564 hg_path: &BorrowedPath<'tree, 'on_disk>,
574 hg_path: &BorrowedPath<'tree, 'on_disk>,
565 dirstate_node_state: Option<EntryState>,
575 dirstate_node_state: Option<EntryState>,
566 ) {
576 ) {
567 if let Some(state) = dirstate_node_state {
577 if let Some(state) = dirstate_node_state {
568 if self.matcher.matches(hg_path) {
578 if self.matcher.matches(hg_path) {
569 if let EntryState::Removed = state {
579 if let EntryState::Removed = state {
570 self.outcome
580 self.outcome
571 .lock()
581 .lock()
572 .unwrap()
582 .unwrap()
573 .removed
583 .removed
574 .push(hg_path.detach_from_tree())
584 .push(hg_path.detach_from_tree())
575 } else {
585 } else {
576 self.outcome
586 self.outcome
577 .lock()
587 .lock()
578 .unwrap()
588 .unwrap()
579 .deleted
589 .deleted
580 .push(hg_path.detach_from_tree())
590 .push(hg_path.detach_from_tree())
581 }
591 }
582 }
592 }
583 }
593 }
584 }
594 }
585
595
586 /// Something in the filesystem has no corresponding dirstate node
596 /// Something in the filesystem has no corresponding dirstate node
597 ///
598 /// Returns whether that path is ignored
587 fn traverse_fs_only(
599 fn traverse_fs_only(
588 &self,
600 &self,
589 has_ignored_ancestor: bool,
601 has_ignored_ancestor: bool,
590 directory_hg_path: &HgPath,
602 directory_hg_path: &HgPath,
591 fs_entry: &DirEntry,
603 fs_entry: &DirEntry,
592 ) {
604 ) -> bool {
593 let hg_path = directory_hg_path.join(&fs_entry.base_name);
605 let hg_path = directory_hg_path.join(&fs_entry.base_name);
594 let file_type = fs_entry.metadata.file_type();
606 let file_type = fs_entry.metadata.file_type();
595 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
607 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
596 if file_type.is_dir() {
608 if file_type.is_dir() {
597 let is_ignored =
609 let is_ignored =
598 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
610 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
599 let traverse_children = if is_ignored {
611 let traverse_children = if is_ignored {
600 // Descendants of an ignored directory are all ignored
612 // Descendants of an ignored directory are all ignored
601 self.options.list_ignored
613 self.options.list_ignored
602 } else {
614 } else {
603 // Descendants of an unknown directory may be either unknown or
615 // Descendants of an unknown directory may be either unknown or
604 // ignored
616 // ignored
605 self.options.list_unknown || self.options.list_ignored
617 self.options.list_unknown || self.options.list_ignored
606 };
618 };
607 if traverse_children {
619 if traverse_children {
608 let is_at_repo_root = false;
620 let is_at_repo_root = false;
609 if let Ok(children_fs_entries) = self.read_dir(
621 if let Ok(children_fs_entries) = self.read_dir(
610 &hg_path,
622 &hg_path,
611 &fs_entry.full_path,
623 &fs_entry.full_path,
612 is_at_repo_root,
624 is_at_repo_root,
613 ) {
625 ) {
614 children_fs_entries.par_iter().for_each(|child_fs_entry| {
626 children_fs_entries.par_iter().for_each(|child_fs_entry| {
615 self.traverse_fs_only(
627 self.traverse_fs_only(
616 is_ignored,
628 is_ignored,
617 &hg_path,
629 &hg_path,
618 child_fs_entry,
630 child_fs_entry,
619 )
631 );
620 })
632 })
621 }
633 }
622 }
634 }
623 if self.options.collect_traversed_dirs {
635 if self.options.collect_traversed_dirs {
624 self.outcome.lock().unwrap().traversed.push(hg_path.into())
636 self.outcome.lock().unwrap().traversed.push(hg_path.into())
625 }
637 }
626 } else if file_or_symlink && self.matcher.matches(&hg_path) {
638 is_ignored
627 self.mark_unknown_or_ignored(
639 } else {
628 has_ignored_ancestor,
640 if file_or_symlink {
629 &BorrowedPath::InMemory(&hg_path),
641 if self.matcher.matches(&hg_path) {
630 )
642 self.mark_unknown_or_ignored(
643 has_ignored_ancestor,
644 &BorrowedPath::InMemory(&hg_path),
645 )
646 } else {
647 // We haven’t computed whether this path is ignored. It
648 // might not be, and a future run of status might have a
649 // different matcher that matches it. So treat it as not
650 // ignored. That is, inhibit readdir caching of the parent
651 // directory.
652 false
653 }
654 } else {
655 // This is neither a directory, a plain file, or a symlink.
656 // Treat it like an ignored file.
657 true
658 }
631 }
659 }
632 }
660 }
633
661
662 /// Returns whether that path is ignored
634 fn mark_unknown_or_ignored(
663 fn mark_unknown_or_ignored(
635 &self,
664 &self,
636 has_ignored_ancestor: bool,
665 has_ignored_ancestor: bool,
637 hg_path: &BorrowedPath<'_, 'on_disk>,
666 hg_path: &BorrowedPath<'_, 'on_disk>,
638 ) {
667 ) -> bool {
639 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
668 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
640 if is_ignored {
669 if is_ignored {
641 if self.options.list_ignored {
670 if self.options.list_ignored {
642 self.outcome
671 self.outcome
643 .lock()
672 .lock()
644 .unwrap()
673 .unwrap()
645 .ignored
674 .ignored
646 .push(hg_path.detach_from_tree())
675 .push(hg_path.detach_from_tree())
647 }
676 }
648 } else {
677 } else {
649 if self.options.list_unknown {
678 if self.options.list_unknown {
650 self.outcome
679 self.outcome
651 .lock()
680 .lock()
652 .unwrap()
681 .unwrap()
653 .unknown
682 .unknown
654 .push(hg_path.detach_from_tree())
683 .push(hg_path.detach_from_tree())
655 }
684 }
656 }
685 }
686 is_ignored
657 }
687 }
658 }
688 }
659
689
660 #[cfg(unix)] // TODO
690 #[cfg(unix)] // TODO
661 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
691 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
662 // Going through `Metadata::modified()` would be portable, but would take
692 // Going through `Metadata::modified()` would be portable, but would take
663 // care to construct a `SystemTime` value with sub-second precision just
693 // care to construct a `SystemTime` value with sub-second precision just
664 // for us to throw that away here.
694 // for us to throw that away here.
665 use std::os::unix::fs::MetadataExt;
695 use std::os::unix::fs::MetadataExt;
666 metadata.mtime()
696 metadata.mtime()
667 }
697 }
668
698
669 struct DirEntry {
699 struct DirEntry {
670 base_name: HgPathBuf,
700 base_name: HgPathBuf,
671 full_path: PathBuf,
701 full_path: PathBuf,
672 metadata: std::fs::Metadata,
702 metadata: std::fs::Metadata,
673 }
703 }
674
704
675 impl DirEntry {
705 impl DirEntry {
676 /// Returns **unsorted** entries in the given directory, with name and
706 /// Returns **unsorted** entries in the given directory, with name and
677 /// metadata.
707 /// metadata.
678 ///
708 ///
679 /// If a `.hg` sub-directory is encountered:
709 /// If a `.hg` sub-directory is encountered:
680 ///
710 ///
681 /// * At the repository root, ignore that sub-directory
711 /// * At the repository root, ignore that sub-directory
682 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
712 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
683 /// list instead.
713 /// list instead.
684 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
714 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
685 let mut results = Vec::new();
715 let mut results = Vec::new();
686 for entry in path.read_dir()? {
716 for entry in path.read_dir()? {
687 let entry = entry?;
717 let entry = entry?;
688 let metadata = entry.metadata()?;
718 let metadata = entry.metadata()?;
689 let name = get_bytes_from_os_string(entry.file_name());
719 let name = get_bytes_from_os_string(entry.file_name());
690 // FIXME don't do this when cached
720 // FIXME don't do this when cached
691 if name == b".hg" {
721 if name == b".hg" {
692 if is_at_repo_root {
722 if is_at_repo_root {
693 // Skip the repo’s own .hg (might be a symlink)
723 // Skip the repo’s own .hg (might be a symlink)
694 continue;
724 continue;
695 } else if metadata.is_dir() {
725 } else if metadata.is_dir() {
696 // A .hg sub-directory at another location means a subrepo,
726 // A .hg sub-directory at another location means a subrepo,
697 // skip it entirely.
727 // skip it entirely.
698 return Ok(Vec::new());
728 return Ok(Vec::new());
699 }
729 }
700 }
730 }
701 results.push(DirEntry {
731 results.push(DirEntry {
702 base_name: name.into(),
732 base_name: name.into(),
703 full_path: entry.path(),
733 full_path: entry.path(),
704 metadata,
734 metadata,
705 })
735 })
706 }
736 }
707 Ok(results)
737 Ok(results)
708 }
738 }
709 }
739 }
710
740
711 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
741 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
712 /// of the give repository.
742 /// of the give repository.
713 ///
743 ///
714 /// This is similar to `SystemTime::now()`, with the result truncated to the
744 /// This is similar to `SystemTime::now()`, with the result truncated to the
715 /// same time resolution as other files’ modification times. Using `.hg`
745 /// same time resolution as other files’ modification times. Using `.hg`
716 /// instead of the system’s default temporary directory (such as `/tmp`) makes
746 /// instead of the system’s default temporary directory (such as `/tmp`) makes
717 /// it more likely the temporary file is in the same disk partition as contents
747 /// it more likely the temporary file is in the same disk partition as contents
718 /// of the working directory, which can matter since different filesystems may
748 /// of the working directory, which can matter since different filesystems may
719 /// store timestamps with different resolutions.
749 /// store timestamps with different resolutions.
720 ///
750 ///
721 /// This may fail, typically if we lack write permissions. In that case we
751 /// This may fail, typically if we lack write permissions. In that case we
722 /// should continue the `status()` algoritm anyway and consider the current
752 /// should continue the `status()` algoritm anyway and consider the current
723 /// date/time to be unknown.
753 /// date/time to be unknown.
724 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
754 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
725 tempfile::tempfile_in(repo_root.join(".hg"))?
755 tempfile::tempfile_in(repo_root.join(".hg"))?
726 .metadata()?
756 .metadata()?
727 .modified()
757 .modified()
728 }
758 }
General Comments 0
You need to be logged in to leave comments. Login now