##// END OF EJS Templates
rust-status: don't trigger dirstate v1 rewrite when only v2 data is changed...
Raphaël Gomès -
r50232:6cd24955 stable
parent child Browse files
Show More
@@ -1,1203 +1,1213 b''
1 use bytes_cast::BytesCast;
1 use bytes_cast::BytesCast;
2 use micro_timer::timed;
2 use micro_timer::timed;
3 use std::borrow::Cow;
3 use std::borrow::Cow;
4 use std::path::PathBuf;
4 use std::path::PathBuf;
5
5
6 use super::on_disk;
6 use super::on_disk;
7 use super::on_disk::DirstateV2ParseError;
7 use super::on_disk::DirstateV2ParseError;
8 use super::owning::OwningDirstateMap;
8 use super::owning::OwningDirstateMap;
9 use super::path_with_basename::WithBasename;
9 use super::path_with_basename::WithBasename;
10 use crate::dirstate::parsers::pack_entry;
10 use crate::dirstate::parsers::pack_entry;
11 use crate::dirstate::parsers::packed_entry_size;
11 use crate::dirstate::parsers::packed_entry_size;
12 use crate::dirstate::parsers::parse_dirstate_entries;
12 use crate::dirstate::parsers::parse_dirstate_entries;
13 use crate::dirstate::CopyMapIter;
13 use crate::dirstate::CopyMapIter;
14 use crate::dirstate::StateMapIter;
14 use crate::dirstate::StateMapIter;
15 use crate::dirstate::TruncatedTimestamp;
15 use crate::dirstate::TruncatedTimestamp;
16 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
16 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
17 use crate::dirstate::SIZE_NON_NORMAL;
17 use crate::dirstate::SIZE_NON_NORMAL;
18 use crate::matchers::Matcher;
18 use crate::matchers::Matcher;
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 use crate::DirstateEntry;
20 use crate::DirstateEntry;
21 use crate::DirstateError;
21 use crate::DirstateError;
22 use crate::DirstateParents;
22 use crate::DirstateParents;
23 use crate::DirstateStatus;
23 use crate::DirstateStatus;
24 use crate::EntryState;
24 use crate::EntryState;
25 use crate::FastHashMap;
25 use crate::FastHashMap;
26 use crate::PatternFileWarning;
26 use crate::PatternFileWarning;
27 use crate::StatusError;
27 use crate::StatusError;
28 use crate::StatusOptions;
28 use crate::StatusOptions;
29
29
30 /// Append to an existing data file if the amount of unreachable data (not used
30 /// Append to an existing data file if the amount of unreachable data (not used
31 /// anymore) is less than this fraction of the total amount of existing data.
31 /// anymore) is less than this fraction of the total amount of existing data.
32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33
33
34 #[derive(Debug, PartialEq, Eq)]
35 /// Version of the on-disk format
36 pub enum DirstateVersion {
37 V1,
38 V2,
39 }
40
34 pub struct DirstateMap<'on_disk> {
41 pub struct DirstateMap<'on_disk> {
35 /// Contents of the `.hg/dirstate` file
42 /// Contents of the `.hg/dirstate` file
36 pub(super) on_disk: &'on_disk [u8],
43 pub(super) on_disk: &'on_disk [u8],
37
44
38 pub(super) root: ChildNodes<'on_disk>,
45 pub(super) root: ChildNodes<'on_disk>,
39
46
40 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
47 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
41 pub(super) nodes_with_entry_count: u32,
48 pub(super) nodes_with_entry_count: u32,
42
49
43 /// Number of nodes anywhere in the tree that have
50 /// Number of nodes anywhere in the tree that have
44 /// `.copy_source.is_some()`.
51 /// `.copy_source.is_some()`.
45 pub(super) nodes_with_copy_source_count: u32,
52 pub(super) nodes_with_copy_source_count: u32,
46
53
47 /// See on_disk::Header
54 /// See on_disk::Header
48 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
55 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
49
56
50 /// How many bytes of `on_disk` are not used anymore
57 /// How many bytes of `on_disk` are not used anymore
51 pub(super) unreachable_bytes: u32,
58 pub(super) unreachable_bytes: u32,
52
59
53 /// Size of the data used to first load this `DirstateMap`. Used in case
60 /// Size of the data used to first load this `DirstateMap`. Used in case
54 /// we need to write some new metadata, but no new data on disk.
61 /// we need to write some new metadata, but no new data on disk.
55 pub(super) old_data_size: usize,
62 pub(super) old_data_size: usize,
63
64 pub(super) dirstate_version: DirstateVersion,
56 }
65 }
57
66
58 /// Using a plain `HgPathBuf` of the full path from the repository root as a
67 /// Using a plain `HgPathBuf` of the full path from the repository root as a
59 /// map key would also work: all paths in a given map have the same parent
68 /// map key would also work: all paths in a given map have the same parent
60 /// path, so comparing full paths gives the same result as comparing base
69 /// path, so comparing full paths gives the same result as comparing base
61 /// names. However `HashMap` would waste time always re-hashing the same
70 /// names. However `HashMap` would waste time always re-hashing the same
62 /// string prefix.
71 /// string prefix.
63 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
72 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
64
73
65 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
74 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
66 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
75 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
67 pub(super) enum BorrowedPath<'tree, 'on_disk> {
76 pub(super) enum BorrowedPath<'tree, 'on_disk> {
68 InMemory(&'tree HgPathBuf),
77 InMemory(&'tree HgPathBuf),
69 OnDisk(&'on_disk HgPath),
78 OnDisk(&'on_disk HgPath),
70 }
79 }
71
80
72 pub(super) enum ChildNodes<'on_disk> {
81 pub(super) enum ChildNodes<'on_disk> {
73 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
82 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
74 OnDisk(&'on_disk [on_disk::Node]),
83 OnDisk(&'on_disk [on_disk::Node]),
75 }
84 }
76
85
77 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
86 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
78 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
87 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
79 OnDisk(&'on_disk [on_disk::Node]),
88 OnDisk(&'on_disk [on_disk::Node]),
80 }
89 }
81
90
82 pub(super) enum NodeRef<'tree, 'on_disk> {
91 pub(super) enum NodeRef<'tree, 'on_disk> {
83 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
92 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
84 OnDisk(&'on_disk on_disk::Node),
93 OnDisk(&'on_disk on_disk::Node),
85 }
94 }
86
95
87 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
96 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
88 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
97 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
89 match *self {
98 match *self {
90 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
99 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
91 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
100 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
92 }
101 }
93 }
102 }
94 }
103 }
95
104
96 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
105 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
97 type Target = HgPath;
106 type Target = HgPath;
98
107
99 fn deref(&self) -> &HgPath {
108 fn deref(&self) -> &HgPath {
100 match *self {
109 match *self {
101 BorrowedPath::InMemory(in_memory) => in_memory,
110 BorrowedPath::InMemory(in_memory) => in_memory,
102 BorrowedPath::OnDisk(on_disk) => on_disk,
111 BorrowedPath::OnDisk(on_disk) => on_disk,
103 }
112 }
104 }
113 }
105 }
114 }
106
115
107 impl Default for ChildNodes<'_> {
116 impl Default for ChildNodes<'_> {
108 fn default() -> Self {
117 fn default() -> Self {
109 ChildNodes::InMemory(Default::default())
118 ChildNodes::InMemory(Default::default())
110 }
119 }
111 }
120 }
112
121
113 impl<'on_disk> ChildNodes<'on_disk> {
122 impl<'on_disk> ChildNodes<'on_disk> {
114 pub(super) fn as_ref<'tree>(
123 pub(super) fn as_ref<'tree>(
115 &'tree self,
124 &'tree self,
116 ) -> ChildNodesRef<'tree, 'on_disk> {
125 ) -> ChildNodesRef<'tree, 'on_disk> {
117 match self {
126 match self {
118 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
127 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
119 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
128 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
120 }
129 }
121 }
130 }
122
131
123 pub(super) fn is_empty(&self) -> bool {
132 pub(super) fn is_empty(&self) -> bool {
124 match self {
133 match self {
125 ChildNodes::InMemory(nodes) => nodes.is_empty(),
134 ChildNodes::InMemory(nodes) => nodes.is_empty(),
126 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
135 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
127 }
136 }
128 }
137 }
129
138
130 fn make_mut(
139 fn make_mut(
131 &mut self,
140 &mut self,
132 on_disk: &'on_disk [u8],
141 on_disk: &'on_disk [u8],
133 unreachable_bytes: &mut u32,
142 unreachable_bytes: &mut u32,
134 ) -> Result<
143 ) -> Result<
135 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
144 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
136 DirstateV2ParseError,
145 DirstateV2ParseError,
137 > {
146 > {
138 match self {
147 match self {
139 ChildNodes::InMemory(nodes) => Ok(nodes),
148 ChildNodes::InMemory(nodes) => Ok(nodes),
140 ChildNodes::OnDisk(nodes) => {
149 ChildNodes::OnDisk(nodes) => {
141 *unreachable_bytes +=
150 *unreachable_bytes +=
142 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
151 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
143 let nodes = nodes
152 let nodes = nodes
144 .iter()
153 .iter()
145 .map(|node| {
154 .map(|node| {
146 Ok((
155 Ok((
147 node.path(on_disk)?,
156 node.path(on_disk)?,
148 node.to_in_memory_node(on_disk)?,
157 node.to_in_memory_node(on_disk)?,
149 ))
158 ))
150 })
159 })
151 .collect::<Result<_, _>>()?;
160 .collect::<Result<_, _>>()?;
152 *self = ChildNodes::InMemory(nodes);
161 *self = ChildNodes::InMemory(nodes);
153 match self {
162 match self {
154 ChildNodes::InMemory(nodes) => Ok(nodes),
163 ChildNodes::InMemory(nodes) => Ok(nodes),
155 ChildNodes::OnDisk(_) => unreachable!(),
164 ChildNodes::OnDisk(_) => unreachable!(),
156 }
165 }
157 }
166 }
158 }
167 }
159 }
168 }
160 }
169 }
161
170
162 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
171 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
163 pub(super) fn get(
172 pub(super) fn get(
164 &self,
173 &self,
165 base_name: &HgPath,
174 base_name: &HgPath,
166 on_disk: &'on_disk [u8],
175 on_disk: &'on_disk [u8],
167 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
176 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
168 match self {
177 match self {
169 ChildNodesRef::InMemory(nodes) => Ok(nodes
178 ChildNodesRef::InMemory(nodes) => Ok(nodes
170 .get_key_value(base_name)
179 .get_key_value(base_name)
171 .map(|(k, v)| NodeRef::InMemory(k, v))),
180 .map(|(k, v)| NodeRef::InMemory(k, v))),
172 ChildNodesRef::OnDisk(nodes) => {
181 ChildNodesRef::OnDisk(nodes) => {
173 let mut parse_result = Ok(());
182 let mut parse_result = Ok(());
174 let search_result = nodes.binary_search_by(|node| {
183 let search_result = nodes.binary_search_by(|node| {
175 match node.base_name(on_disk) {
184 match node.base_name(on_disk) {
176 Ok(node_base_name) => node_base_name.cmp(base_name),
185 Ok(node_base_name) => node_base_name.cmp(base_name),
177 Err(e) => {
186 Err(e) => {
178 parse_result = Err(e);
187 parse_result = Err(e);
179 // Dummy comparison result, `search_result` won’t
188 // Dummy comparison result, `search_result` won’t
180 // be used since `parse_result` is an error
189 // be used since `parse_result` is an error
181 std::cmp::Ordering::Equal
190 std::cmp::Ordering::Equal
182 }
191 }
183 }
192 }
184 });
193 });
185 parse_result.map(|()| {
194 parse_result.map(|()| {
186 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
195 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
187 })
196 })
188 }
197 }
189 }
198 }
190 }
199 }
191
200
192 /// Iterate in undefined order
201 /// Iterate in undefined order
193 pub(super) fn iter(
202 pub(super) fn iter(
194 &self,
203 &self,
195 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
204 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
196 match self {
205 match self {
197 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
206 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
198 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
207 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
199 ),
208 ),
200 ChildNodesRef::OnDisk(nodes) => {
209 ChildNodesRef::OnDisk(nodes) => {
201 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
210 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
202 }
211 }
203 }
212 }
204 }
213 }
205
214
206 /// Iterate in parallel in undefined order
215 /// Iterate in parallel in undefined order
207 pub(super) fn par_iter(
216 pub(super) fn par_iter(
208 &self,
217 &self,
209 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
218 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
210 {
219 {
211 use rayon::prelude::*;
220 use rayon::prelude::*;
212 match self {
221 match self {
213 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
222 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
214 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
223 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
215 ),
224 ),
216 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
225 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
217 nodes.par_iter().map(NodeRef::OnDisk),
226 nodes.par_iter().map(NodeRef::OnDisk),
218 ),
227 ),
219 }
228 }
220 }
229 }
221
230
222 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
231 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
223 match self {
232 match self {
224 ChildNodesRef::InMemory(nodes) => {
233 ChildNodesRef::InMemory(nodes) => {
225 let mut vec: Vec<_> = nodes
234 let mut vec: Vec<_> = nodes
226 .iter()
235 .iter()
227 .map(|(k, v)| NodeRef::InMemory(k, v))
236 .map(|(k, v)| NodeRef::InMemory(k, v))
228 .collect();
237 .collect();
229 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
238 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
230 match node {
239 match node {
231 NodeRef::InMemory(path, _node) => path.base_name(),
240 NodeRef::InMemory(path, _node) => path.base_name(),
232 NodeRef::OnDisk(_) => unreachable!(),
241 NodeRef::OnDisk(_) => unreachable!(),
233 }
242 }
234 }
243 }
235 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
244 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
236 // value: https://github.com/rust-lang/rust/issues/34162
245 // value: https://github.com/rust-lang/rust/issues/34162
237 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
246 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
238 vec
247 vec
239 }
248 }
240 ChildNodesRef::OnDisk(nodes) => {
249 ChildNodesRef::OnDisk(nodes) => {
241 // Nodes on disk are already sorted
250 // Nodes on disk are already sorted
242 nodes.iter().map(NodeRef::OnDisk).collect()
251 nodes.iter().map(NodeRef::OnDisk).collect()
243 }
252 }
244 }
253 }
245 }
254 }
246 }
255 }
247
256
248 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
257 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
249 pub(super) fn full_path(
258 pub(super) fn full_path(
250 &self,
259 &self,
251 on_disk: &'on_disk [u8],
260 on_disk: &'on_disk [u8],
252 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
261 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
253 match self {
262 match self {
254 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
263 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
255 NodeRef::OnDisk(node) => node.full_path(on_disk),
264 NodeRef::OnDisk(node) => node.full_path(on_disk),
256 }
265 }
257 }
266 }
258
267
259 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
268 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
260 /// HgPath>` detached from `'tree`
269 /// HgPath>` detached from `'tree`
261 pub(super) fn full_path_borrowed(
270 pub(super) fn full_path_borrowed(
262 &self,
271 &self,
263 on_disk: &'on_disk [u8],
272 on_disk: &'on_disk [u8],
264 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
273 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
265 match self {
274 match self {
266 NodeRef::InMemory(path, _node) => match path.full_path() {
275 NodeRef::InMemory(path, _node) => match path.full_path() {
267 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
276 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
268 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
277 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
269 },
278 },
270 NodeRef::OnDisk(node) => {
279 NodeRef::OnDisk(node) => {
271 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
280 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
272 }
281 }
273 }
282 }
274 }
283 }
275
284
276 pub(super) fn base_name(
285 pub(super) fn base_name(
277 &self,
286 &self,
278 on_disk: &'on_disk [u8],
287 on_disk: &'on_disk [u8],
279 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
288 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
280 match self {
289 match self {
281 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
290 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
282 NodeRef::OnDisk(node) => node.base_name(on_disk),
291 NodeRef::OnDisk(node) => node.base_name(on_disk),
283 }
292 }
284 }
293 }
285
294
286 pub(super) fn children(
295 pub(super) fn children(
287 &self,
296 &self,
288 on_disk: &'on_disk [u8],
297 on_disk: &'on_disk [u8],
289 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
298 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
290 match self {
299 match self {
291 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
300 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
292 NodeRef::OnDisk(node) => {
301 NodeRef::OnDisk(node) => {
293 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
302 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
294 }
303 }
295 }
304 }
296 }
305 }
297
306
298 pub(super) fn has_copy_source(&self) -> bool {
307 pub(super) fn has_copy_source(&self) -> bool {
299 match self {
308 match self {
300 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
309 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
301 NodeRef::OnDisk(node) => node.has_copy_source(),
310 NodeRef::OnDisk(node) => node.has_copy_source(),
302 }
311 }
303 }
312 }
304
313
305 pub(super) fn copy_source(
314 pub(super) fn copy_source(
306 &self,
315 &self,
307 on_disk: &'on_disk [u8],
316 on_disk: &'on_disk [u8],
308 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
317 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
309 match self {
318 match self {
310 NodeRef::InMemory(_path, node) => {
319 NodeRef::InMemory(_path, node) => {
311 Ok(node.copy_source.as_ref().map(|s| &**s))
320 Ok(node.copy_source.as_ref().map(|s| &**s))
312 }
321 }
313 NodeRef::OnDisk(node) => node.copy_source(on_disk),
322 NodeRef::OnDisk(node) => node.copy_source(on_disk),
314 }
323 }
315 }
324 }
316 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
325 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
317 /// HgPath>` detached from `'tree`
326 /// HgPath>` detached from `'tree`
318 pub(super) fn copy_source_borrowed(
327 pub(super) fn copy_source_borrowed(
319 &self,
328 &self,
320 on_disk: &'on_disk [u8],
329 on_disk: &'on_disk [u8],
321 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
330 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
322 {
331 {
323 Ok(match self {
332 Ok(match self {
324 NodeRef::InMemory(_path, node) => {
333 NodeRef::InMemory(_path, node) => {
325 node.copy_source.as_ref().map(|source| match source {
334 node.copy_source.as_ref().map(|source| match source {
326 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
335 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
327 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
336 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
328 })
337 })
329 }
338 }
330 NodeRef::OnDisk(node) => node
339 NodeRef::OnDisk(node) => node
331 .copy_source(on_disk)?
340 .copy_source(on_disk)?
332 .map(|source| BorrowedPath::OnDisk(source)),
341 .map(|source| BorrowedPath::OnDisk(source)),
333 })
342 })
334 }
343 }
335
344
336 pub(super) fn entry(
345 pub(super) fn entry(
337 &self,
346 &self,
338 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
347 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
339 match self {
348 match self {
340 NodeRef::InMemory(_path, node) => {
349 NodeRef::InMemory(_path, node) => {
341 Ok(node.data.as_entry().copied())
350 Ok(node.data.as_entry().copied())
342 }
351 }
343 NodeRef::OnDisk(node) => node.entry(),
352 NodeRef::OnDisk(node) => node.entry(),
344 }
353 }
345 }
354 }
346
355
347 pub(super) fn state(
356 pub(super) fn state(
348 &self,
357 &self,
349 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
358 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
350 Ok(self.entry()?.map(|e| e.state()))
359 Ok(self.entry()?.map(|e| e.state()))
351 }
360 }
352
361
353 pub(super) fn cached_directory_mtime(
362 pub(super) fn cached_directory_mtime(
354 &self,
363 &self,
355 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
364 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
356 match self {
365 match self {
357 NodeRef::InMemory(_path, node) => Ok(match node.data {
366 NodeRef::InMemory(_path, node) => Ok(match node.data {
358 NodeData::CachedDirectory { mtime } => Some(mtime),
367 NodeData::CachedDirectory { mtime } => Some(mtime),
359 _ => None,
368 _ => None,
360 }),
369 }),
361 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
370 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
362 }
371 }
363 }
372 }
364
373
365 pub(super) fn descendants_with_entry_count(&self) -> u32 {
374 pub(super) fn descendants_with_entry_count(&self) -> u32 {
366 match self {
375 match self {
367 NodeRef::InMemory(_path, node) => {
376 NodeRef::InMemory(_path, node) => {
368 node.descendants_with_entry_count
377 node.descendants_with_entry_count
369 }
378 }
370 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
379 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
371 }
380 }
372 }
381 }
373
382
374 pub(super) fn tracked_descendants_count(&self) -> u32 {
383 pub(super) fn tracked_descendants_count(&self) -> u32 {
375 match self {
384 match self {
376 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
385 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
377 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
386 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
378 }
387 }
379 }
388 }
380 }
389 }
381
390
382 /// Represents a file or a directory
391 /// Represents a file or a directory
383 #[derive(Default)]
392 #[derive(Default)]
384 pub(super) struct Node<'on_disk> {
393 pub(super) struct Node<'on_disk> {
385 pub(super) data: NodeData,
394 pub(super) data: NodeData,
386
395
387 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
396 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
388
397
389 pub(super) children: ChildNodes<'on_disk>,
398 pub(super) children: ChildNodes<'on_disk>,
390
399
391 /// How many (non-inclusive) descendants of this node have an entry.
400 /// How many (non-inclusive) descendants of this node have an entry.
392 pub(super) descendants_with_entry_count: u32,
401 pub(super) descendants_with_entry_count: u32,
393
402
394 /// How many (non-inclusive) descendants of this node have an entry whose
403 /// How many (non-inclusive) descendants of this node have an entry whose
395 /// state is "tracked".
404 /// state is "tracked".
396 pub(super) tracked_descendants_count: u32,
405 pub(super) tracked_descendants_count: u32,
397 }
406 }
398
407
399 pub(super) enum NodeData {
408 pub(super) enum NodeData {
400 Entry(DirstateEntry),
409 Entry(DirstateEntry),
401 CachedDirectory { mtime: TruncatedTimestamp },
410 CachedDirectory { mtime: TruncatedTimestamp },
402 None,
411 None,
403 }
412 }
404
413
405 impl Default for NodeData {
414 impl Default for NodeData {
406 fn default() -> Self {
415 fn default() -> Self {
407 NodeData::None
416 NodeData::None
408 }
417 }
409 }
418 }
410
419
411 impl NodeData {
420 impl NodeData {
412 fn has_entry(&self) -> bool {
421 fn has_entry(&self) -> bool {
413 match self {
422 match self {
414 NodeData::Entry(_) => true,
423 NodeData::Entry(_) => true,
415 _ => false,
424 _ => false,
416 }
425 }
417 }
426 }
418
427
419 fn as_entry(&self) -> Option<&DirstateEntry> {
428 fn as_entry(&self) -> Option<&DirstateEntry> {
420 match self {
429 match self {
421 NodeData::Entry(entry) => Some(entry),
430 NodeData::Entry(entry) => Some(entry),
422 _ => None,
431 _ => None,
423 }
432 }
424 }
433 }
425 }
434 }
426
435
427 impl<'on_disk> DirstateMap<'on_disk> {
436 impl<'on_disk> DirstateMap<'on_disk> {
428 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
437 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
429 Self {
438 Self {
430 on_disk,
439 on_disk,
431 root: ChildNodes::default(),
440 root: ChildNodes::default(),
432 nodes_with_entry_count: 0,
441 nodes_with_entry_count: 0,
433 nodes_with_copy_source_count: 0,
442 nodes_with_copy_source_count: 0,
434 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
443 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
435 unreachable_bytes: 0,
444 unreachable_bytes: 0,
436 old_data_size: 0,
445 old_data_size: 0,
446 dirstate_version: DirstateVersion::V1,
437 }
447 }
438 }
448 }
439
449
440 #[timed]
450 #[timed]
441 pub fn new_v2(
451 pub fn new_v2(
442 on_disk: &'on_disk [u8],
452 on_disk: &'on_disk [u8],
443 data_size: usize,
453 data_size: usize,
444 metadata: &[u8],
454 metadata: &[u8],
445 ) -> Result<Self, DirstateError> {
455 ) -> Result<Self, DirstateError> {
446 if let Some(data) = on_disk.get(..data_size) {
456 if let Some(data) = on_disk.get(..data_size) {
447 Ok(on_disk::read(data, metadata)?)
457 Ok(on_disk::read(data, metadata)?)
448 } else {
458 } else {
449 Err(DirstateV2ParseError.into())
459 Err(DirstateV2ParseError.into())
450 }
460 }
451 }
461 }
452
462
453 #[timed]
463 #[timed]
454 pub fn new_v1(
464 pub fn new_v1(
455 on_disk: &'on_disk [u8],
465 on_disk: &'on_disk [u8],
456 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
466 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
457 let mut map = Self::empty(on_disk);
467 let mut map = Self::empty(on_disk);
458 if map.on_disk.is_empty() {
468 if map.on_disk.is_empty() {
459 return Ok((map, None));
469 return Ok((map, None));
460 }
470 }
461
471
462 let parents = parse_dirstate_entries(
472 let parents = parse_dirstate_entries(
463 map.on_disk,
473 map.on_disk,
464 |path, entry, copy_source| {
474 |path, entry, copy_source| {
465 let tracked = entry.state().is_tracked();
475 let tracked = entry.state().is_tracked();
466 let node = Self::get_or_insert_node(
476 let node = Self::get_or_insert_node(
467 map.on_disk,
477 map.on_disk,
468 &mut map.unreachable_bytes,
478 &mut map.unreachable_bytes,
469 &mut map.root,
479 &mut map.root,
470 path,
480 path,
471 WithBasename::to_cow_borrowed,
481 WithBasename::to_cow_borrowed,
472 |ancestor| {
482 |ancestor| {
473 if tracked {
483 if tracked {
474 ancestor.tracked_descendants_count += 1
484 ancestor.tracked_descendants_count += 1
475 }
485 }
476 ancestor.descendants_with_entry_count += 1
486 ancestor.descendants_with_entry_count += 1
477 },
487 },
478 )?;
488 )?;
479 assert!(
489 assert!(
480 !node.data.has_entry(),
490 !node.data.has_entry(),
481 "duplicate dirstate entry in read"
491 "duplicate dirstate entry in read"
482 );
492 );
483 assert!(
493 assert!(
484 node.copy_source.is_none(),
494 node.copy_source.is_none(),
485 "duplicate dirstate entry in read"
495 "duplicate dirstate entry in read"
486 );
496 );
487 node.data = NodeData::Entry(*entry);
497 node.data = NodeData::Entry(*entry);
488 node.copy_source = copy_source.map(Cow::Borrowed);
498 node.copy_source = copy_source.map(Cow::Borrowed);
489 map.nodes_with_entry_count += 1;
499 map.nodes_with_entry_count += 1;
490 if copy_source.is_some() {
500 if copy_source.is_some() {
491 map.nodes_with_copy_source_count += 1
501 map.nodes_with_copy_source_count += 1
492 }
502 }
493 Ok(())
503 Ok(())
494 },
504 },
495 )?;
505 )?;
496 let parents = Some(parents.clone());
506 let parents = Some(parents.clone());
497
507
498 Ok((map, parents))
508 Ok((map, parents))
499 }
509 }
500
510
501 /// Assuming dirstate-v2 format, returns whether the next write should
511 /// Assuming dirstate-v2 format, returns whether the next write should
502 /// append to the existing data file that contains `self.on_disk` (true),
512 /// append to the existing data file that contains `self.on_disk` (true),
503 /// or create a new data file from scratch (false).
513 /// or create a new data file from scratch (false).
504 pub(super) fn write_should_append(&self) -> bool {
514 pub(super) fn write_should_append(&self) -> bool {
505 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
515 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
506 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
516 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
507 }
517 }
508
518
509 fn get_node<'tree>(
519 fn get_node<'tree>(
510 &'tree self,
520 &'tree self,
511 path: &HgPath,
521 path: &HgPath,
512 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
522 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
513 let mut children = self.root.as_ref();
523 let mut children = self.root.as_ref();
514 let mut components = path.components();
524 let mut components = path.components();
515 let mut component =
525 let mut component =
516 components.next().expect("expected at least one components");
526 components.next().expect("expected at least one components");
517 loop {
527 loop {
518 if let Some(child) = children.get(component, self.on_disk)? {
528 if let Some(child) = children.get(component, self.on_disk)? {
519 if let Some(next_component) = components.next() {
529 if let Some(next_component) = components.next() {
520 component = next_component;
530 component = next_component;
521 children = child.children(self.on_disk)?;
531 children = child.children(self.on_disk)?;
522 } else {
532 } else {
523 return Ok(Some(child));
533 return Ok(Some(child));
524 }
534 }
525 } else {
535 } else {
526 return Ok(None);
536 return Ok(None);
527 }
537 }
528 }
538 }
529 }
539 }
530
540
531 /// Returns a mutable reference to the node at `path` if it exists
541 /// Returns a mutable reference to the node at `path` if it exists
532 ///
542 ///
533 /// This takes `root` instead of `&mut self` so that callers can mutate
543 /// This takes `root` instead of `&mut self` so that callers can mutate
534 /// other fields while the returned borrow is still valid
544 /// other fields while the returned borrow is still valid
535 fn get_node_mut<'tree>(
545 fn get_node_mut<'tree>(
536 on_disk: &'on_disk [u8],
546 on_disk: &'on_disk [u8],
537 unreachable_bytes: &mut u32,
547 unreachable_bytes: &mut u32,
538 root: &'tree mut ChildNodes<'on_disk>,
548 root: &'tree mut ChildNodes<'on_disk>,
539 path: &HgPath,
549 path: &HgPath,
540 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
550 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
541 let mut children = root;
551 let mut children = root;
542 let mut components = path.components();
552 let mut components = path.components();
543 let mut component =
553 let mut component =
544 components.next().expect("expected at least one components");
554 components.next().expect("expected at least one components");
545 loop {
555 loop {
546 if let Some(child) = children
556 if let Some(child) = children
547 .make_mut(on_disk, unreachable_bytes)?
557 .make_mut(on_disk, unreachable_bytes)?
548 .get_mut(component)
558 .get_mut(component)
549 {
559 {
550 if let Some(next_component) = components.next() {
560 if let Some(next_component) = components.next() {
551 component = next_component;
561 component = next_component;
552 children = &mut child.children;
562 children = &mut child.children;
553 } else {
563 } else {
554 return Ok(Some(child));
564 return Ok(Some(child));
555 }
565 }
556 } else {
566 } else {
557 return Ok(None);
567 return Ok(None);
558 }
568 }
559 }
569 }
560 }
570 }
561
571
562 pub(super) fn get_or_insert<'tree, 'path>(
572 pub(super) fn get_or_insert<'tree, 'path>(
563 &'tree mut self,
573 &'tree mut self,
564 path: &HgPath,
574 path: &HgPath,
565 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
575 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
566 Self::get_or_insert_node(
576 Self::get_or_insert_node(
567 self.on_disk,
577 self.on_disk,
568 &mut self.unreachable_bytes,
578 &mut self.unreachable_bytes,
569 &mut self.root,
579 &mut self.root,
570 path,
580 path,
571 WithBasename::to_cow_owned,
581 WithBasename::to_cow_owned,
572 |_| {},
582 |_| {},
573 )
583 )
574 }
584 }
575
585
576 fn get_or_insert_node<'tree, 'path>(
586 fn get_or_insert_node<'tree, 'path>(
577 on_disk: &'on_disk [u8],
587 on_disk: &'on_disk [u8],
578 unreachable_bytes: &mut u32,
588 unreachable_bytes: &mut u32,
579 root: &'tree mut ChildNodes<'on_disk>,
589 root: &'tree mut ChildNodes<'on_disk>,
580 path: &'path HgPath,
590 path: &'path HgPath,
581 to_cow: impl Fn(
591 to_cow: impl Fn(
582 WithBasename<&'path HgPath>,
592 WithBasename<&'path HgPath>,
583 ) -> WithBasename<Cow<'on_disk, HgPath>>,
593 ) -> WithBasename<Cow<'on_disk, HgPath>>,
584 mut each_ancestor: impl FnMut(&mut Node),
594 mut each_ancestor: impl FnMut(&mut Node),
585 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
595 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
586 let mut child_nodes = root;
596 let mut child_nodes = root;
587 let mut inclusive_ancestor_paths =
597 let mut inclusive_ancestor_paths =
588 WithBasename::inclusive_ancestors_of(path);
598 WithBasename::inclusive_ancestors_of(path);
589 let mut ancestor_path = inclusive_ancestor_paths
599 let mut ancestor_path = inclusive_ancestor_paths
590 .next()
600 .next()
591 .expect("expected at least one inclusive ancestor");
601 .expect("expected at least one inclusive ancestor");
592 loop {
602 loop {
593 // TODO: can we avoid allocating an owned key in cases where the
603 // TODO: can we avoid allocating an owned key in cases where the
594 // map already contains that key, without introducing double
604 // map already contains that key, without introducing double
595 // lookup?
605 // lookup?
596 let child_node = child_nodes
606 let child_node = child_nodes
597 .make_mut(on_disk, unreachable_bytes)?
607 .make_mut(on_disk, unreachable_bytes)?
598 .entry(to_cow(ancestor_path))
608 .entry(to_cow(ancestor_path))
599 .or_default();
609 .or_default();
600 if let Some(next) = inclusive_ancestor_paths.next() {
610 if let Some(next) = inclusive_ancestor_paths.next() {
601 each_ancestor(child_node);
611 each_ancestor(child_node);
602 ancestor_path = next;
612 ancestor_path = next;
603 child_nodes = &mut child_node.children;
613 child_nodes = &mut child_node.children;
604 } else {
614 } else {
605 return Ok(child_node);
615 return Ok(child_node);
606 }
616 }
607 }
617 }
608 }
618 }
609
619
610 fn add_or_remove_file(
620 fn add_or_remove_file(
611 &mut self,
621 &mut self,
612 path: &HgPath,
622 path: &HgPath,
613 old_state: Option<EntryState>,
623 old_state: Option<EntryState>,
614 new_entry: DirstateEntry,
624 new_entry: DirstateEntry,
615 ) -> Result<(), DirstateV2ParseError> {
625 ) -> Result<(), DirstateV2ParseError> {
616 let had_entry = old_state.is_some();
626 let had_entry = old_state.is_some();
617 let was_tracked = old_state.map_or(false, |s| s.is_tracked());
627 let was_tracked = old_state.map_or(false, |s| s.is_tracked());
618 let tracked_count_increment =
628 let tracked_count_increment =
619 match (was_tracked, new_entry.state().is_tracked()) {
629 match (was_tracked, new_entry.state().is_tracked()) {
620 (false, true) => 1,
630 (false, true) => 1,
621 (true, false) => -1,
631 (true, false) => -1,
622 _ => 0,
632 _ => 0,
623 };
633 };
624
634
625 let node = Self::get_or_insert_node(
635 let node = Self::get_or_insert_node(
626 self.on_disk,
636 self.on_disk,
627 &mut self.unreachable_bytes,
637 &mut self.unreachable_bytes,
628 &mut self.root,
638 &mut self.root,
629 path,
639 path,
630 WithBasename::to_cow_owned,
640 WithBasename::to_cow_owned,
631 |ancestor| {
641 |ancestor| {
632 if !had_entry {
642 if !had_entry {
633 ancestor.descendants_with_entry_count += 1;
643 ancestor.descendants_with_entry_count += 1;
634 }
644 }
635
645
636 // We can’t use `+= increment` because the counter is unsigned,
646 // We can’t use `+= increment` because the counter is unsigned,
637 // and we want debug builds to detect accidental underflow
647 // and we want debug builds to detect accidental underflow
638 // through zero
648 // through zero
639 match tracked_count_increment {
649 match tracked_count_increment {
640 1 => ancestor.tracked_descendants_count += 1,
650 1 => ancestor.tracked_descendants_count += 1,
641 -1 => ancestor.tracked_descendants_count -= 1,
651 -1 => ancestor.tracked_descendants_count -= 1,
642 _ => {}
652 _ => {}
643 }
653 }
644 },
654 },
645 )?;
655 )?;
646 if !had_entry {
656 if !had_entry {
647 self.nodes_with_entry_count += 1
657 self.nodes_with_entry_count += 1
648 }
658 }
649 node.data = NodeData::Entry(new_entry);
659 node.data = NodeData::Entry(new_entry);
650 Ok(())
660 Ok(())
651 }
661 }
652
662
653 fn iter_nodes<'tree>(
663 fn iter_nodes<'tree>(
654 &'tree self,
664 &'tree self,
655 ) -> impl Iterator<
665 ) -> impl Iterator<
656 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
666 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
657 > + 'tree {
667 > + 'tree {
658 // Depth first tree traversal.
668 // Depth first tree traversal.
659 //
669 //
660 // If we could afford internal iteration and recursion,
670 // If we could afford internal iteration and recursion,
661 // this would look like:
671 // this would look like:
662 //
672 //
663 // ```
673 // ```
664 // fn traverse_children(
674 // fn traverse_children(
665 // children: &ChildNodes,
675 // children: &ChildNodes,
666 // each: &mut impl FnMut(&Node),
676 // each: &mut impl FnMut(&Node),
667 // ) {
677 // ) {
668 // for child in children.values() {
678 // for child in children.values() {
669 // traverse_children(&child.children, each);
679 // traverse_children(&child.children, each);
670 // each(child);
680 // each(child);
671 // }
681 // }
672 // }
682 // }
673 // ```
683 // ```
674 //
684 //
675 // However we want an external iterator and therefore can’t use the
685 // However we want an external iterator and therefore can’t use the
676 // call stack. Use an explicit stack instead:
686 // call stack. Use an explicit stack instead:
677 let mut stack = Vec::new();
687 let mut stack = Vec::new();
678 let mut iter = self.root.as_ref().iter();
688 let mut iter = self.root.as_ref().iter();
679 std::iter::from_fn(move || {
689 std::iter::from_fn(move || {
680 while let Some(child_node) = iter.next() {
690 while let Some(child_node) = iter.next() {
681 let children = match child_node.children(self.on_disk) {
691 let children = match child_node.children(self.on_disk) {
682 Ok(children) => children,
692 Ok(children) => children,
683 Err(error) => return Some(Err(error)),
693 Err(error) => return Some(Err(error)),
684 };
694 };
685 // Pseudo-recursion
695 // Pseudo-recursion
686 let new_iter = children.iter();
696 let new_iter = children.iter();
687 let old_iter = std::mem::replace(&mut iter, new_iter);
697 let old_iter = std::mem::replace(&mut iter, new_iter);
688 stack.push((child_node, old_iter));
698 stack.push((child_node, old_iter));
689 }
699 }
690 // Found the end of a `children.iter()` iterator.
700 // Found the end of a `children.iter()` iterator.
691 if let Some((child_node, next_iter)) = stack.pop() {
701 if let Some((child_node, next_iter)) = stack.pop() {
692 // "Return" from pseudo-recursion by restoring state from the
702 // "Return" from pseudo-recursion by restoring state from the
693 // explicit stack
703 // explicit stack
694 iter = next_iter;
704 iter = next_iter;
695
705
696 Some(Ok(child_node))
706 Some(Ok(child_node))
697 } else {
707 } else {
698 // Reached the bottom of the stack, we’re done
708 // Reached the bottom of the stack, we’re done
699 None
709 None
700 }
710 }
701 })
711 })
702 }
712 }
703
713
704 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
714 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
705 if let Cow::Borrowed(path) = path {
715 if let Cow::Borrowed(path) = path {
706 *unreachable_bytes += path.len() as u32
716 *unreachable_bytes += path.len() as u32
707 }
717 }
708 }
718 }
709 }
719 }
710
720
711 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
721 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
712 ///
722 ///
713 /// The callback is only called for incoming `Ok` values. Errors are passed
723 /// The callback is only called for incoming `Ok` values. Errors are passed
714 /// through as-is. In order to let it use the `?` operator the callback is
724 /// through as-is. In order to let it use the `?` operator the callback is
715 /// expected to return a `Result` of `Option`, instead of an `Option` of
725 /// expected to return a `Result` of `Option`, instead of an `Option` of
716 /// `Result`.
726 /// `Result`.
717 fn filter_map_results<'a, I, F, A, B, E>(
727 fn filter_map_results<'a, I, F, A, B, E>(
718 iter: I,
728 iter: I,
719 f: F,
729 f: F,
720 ) -> impl Iterator<Item = Result<B, E>> + 'a
730 ) -> impl Iterator<Item = Result<B, E>> + 'a
721 where
731 where
722 I: Iterator<Item = Result<A, E>> + 'a,
732 I: Iterator<Item = Result<A, E>> + 'a,
723 F: Fn(A) -> Result<Option<B>, E> + 'a,
733 F: Fn(A) -> Result<Option<B>, E> + 'a,
724 {
734 {
725 iter.filter_map(move |result| match result {
735 iter.filter_map(move |result| match result {
726 Ok(node) => f(node).transpose(),
736 Ok(node) => f(node).transpose(),
727 Err(e) => Some(Err(e)),
737 Err(e) => Some(Err(e)),
728 })
738 })
729 }
739 }
730
740
731 impl OwningDirstateMap {
741 impl OwningDirstateMap {
732 pub fn clear(&mut self) {
742 pub fn clear(&mut self) {
733 self.with_dmap_mut(|map| {
743 self.with_dmap_mut(|map| {
734 map.root = Default::default();
744 map.root = Default::default();
735 map.nodes_with_entry_count = 0;
745 map.nodes_with_entry_count = 0;
736 map.nodes_with_copy_source_count = 0;
746 map.nodes_with_copy_source_count = 0;
737 });
747 });
738 }
748 }
739
749
740 pub fn set_entry(
750 pub fn set_entry(
741 &mut self,
751 &mut self,
742 filename: &HgPath,
752 filename: &HgPath,
743 entry: DirstateEntry,
753 entry: DirstateEntry,
744 ) -> Result<(), DirstateV2ParseError> {
754 ) -> Result<(), DirstateV2ParseError> {
745 self.with_dmap_mut(|map| {
755 self.with_dmap_mut(|map| {
746 map.get_or_insert(&filename)?.data = NodeData::Entry(entry);
756 map.get_or_insert(&filename)?.data = NodeData::Entry(entry);
747 Ok(())
757 Ok(())
748 })
758 })
749 }
759 }
750
760
751 pub fn add_file(
761 pub fn add_file(
752 &mut self,
762 &mut self,
753 filename: &HgPath,
763 filename: &HgPath,
754 entry: DirstateEntry,
764 entry: DirstateEntry,
755 ) -> Result<(), DirstateError> {
765 ) -> Result<(), DirstateError> {
756 let old_state = self.get(filename)?.map(|e| e.state());
766 let old_state = self.get(filename)?.map(|e| e.state());
757 self.with_dmap_mut(|map| {
767 self.with_dmap_mut(|map| {
758 Ok(map.add_or_remove_file(filename, old_state, entry)?)
768 Ok(map.add_or_remove_file(filename, old_state, entry)?)
759 })
769 })
760 }
770 }
761
771
762 pub fn remove_file(
772 pub fn remove_file(
763 &mut self,
773 &mut self,
764 filename: &HgPath,
774 filename: &HgPath,
765 in_merge: bool,
775 in_merge: bool,
766 ) -> Result<(), DirstateError> {
776 ) -> Result<(), DirstateError> {
767 let old_entry_opt = self.get(filename)?;
777 let old_entry_opt = self.get(filename)?;
768 let old_state = old_entry_opt.map(|e| e.state());
778 let old_state = old_entry_opt.map(|e| e.state());
769 let mut size = 0;
779 let mut size = 0;
770 if in_merge {
780 if in_merge {
771 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
781 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
772 // during a merge. So I (marmoute) am not sure we need the
782 // during a merge. So I (marmoute) am not sure we need the
773 // conditionnal at all. Adding double checking this with assert
783 // conditionnal at all. Adding double checking this with assert
774 // would be nice.
784 // would be nice.
775 if let Some(old_entry) = old_entry_opt {
785 if let Some(old_entry) = old_entry_opt {
776 // backup the previous state
786 // backup the previous state
777 if old_entry.state() == EntryState::Merged {
787 if old_entry.state() == EntryState::Merged {
778 size = SIZE_NON_NORMAL;
788 size = SIZE_NON_NORMAL;
779 } else if old_entry.state() == EntryState::Normal
789 } else if old_entry.state() == EntryState::Normal
780 && old_entry.size() == SIZE_FROM_OTHER_PARENT
790 && old_entry.size() == SIZE_FROM_OTHER_PARENT
781 {
791 {
782 // other parent
792 // other parent
783 size = SIZE_FROM_OTHER_PARENT;
793 size = SIZE_FROM_OTHER_PARENT;
784 }
794 }
785 }
795 }
786 }
796 }
787 if size == 0 {
797 if size == 0 {
788 self.copy_map_remove(filename)?;
798 self.copy_map_remove(filename)?;
789 }
799 }
790 self.with_dmap_mut(|map| {
800 self.with_dmap_mut(|map| {
791 let entry = DirstateEntry::new_removed(size);
801 let entry = DirstateEntry::new_removed(size);
792 Ok(map.add_or_remove_file(filename, old_state, entry)?)
802 Ok(map.add_or_remove_file(filename, old_state, entry)?)
793 })
803 })
794 }
804 }
795
805
796 pub fn drop_entry_and_copy_source(
806 pub fn drop_entry_and_copy_source(
797 &mut self,
807 &mut self,
798 filename: &HgPath,
808 filename: &HgPath,
799 ) -> Result<(), DirstateError> {
809 ) -> Result<(), DirstateError> {
800 let was_tracked = self
810 let was_tracked = self
801 .get(filename)?
811 .get(filename)?
802 .map_or(false, |e| e.state().is_tracked());
812 .map_or(false, |e| e.state().is_tracked());
803 struct Dropped {
813 struct Dropped {
804 was_tracked: bool,
814 was_tracked: bool,
805 had_entry: bool,
815 had_entry: bool,
806 had_copy_source: bool,
816 had_copy_source: bool,
807 }
817 }
808
818
809 /// If this returns `Ok(Some((dropped, removed)))`, then
819 /// If this returns `Ok(Some((dropped, removed)))`, then
810 ///
820 ///
811 /// * `dropped` is about the leaf node that was at `filename`
821 /// * `dropped` is about the leaf node that was at `filename`
812 /// * `removed` is whether this particular level of recursion just
822 /// * `removed` is whether this particular level of recursion just
813 /// removed a node in `nodes`.
823 /// removed a node in `nodes`.
814 fn recur<'on_disk>(
824 fn recur<'on_disk>(
815 on_disk: &'on_disk [u8],
825 on_disk: &'on_disk [u8],
816 unreachable_bytes: &mut u32,
826 unreachable_bytes: &mut u32,
817 nodes: &mut ChildNodes<'on_disk>,
827 nodes: &mut ChildNodes<'on_disk>,
818 path: &HgPath,
828 path: &HgPath,
819 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
829 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
820 let (first_path_component, rest_of_path) =
830 let (first_path_component, rest_of_path) =
821 path.split_first_component();
831 path.split_first_component();
822 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
832 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
823 let node = if let Some(node) = nodes.get_mut(first_path_component)
833 let node = if let Some(node) = nodes.get_mut(first_path_component)
824 {
834 {
825 node
835 node
826 } else {
836 } else {
827 return Ok(None);
837 return Ok(None);
828 };
838 };
829 let dropped;
839 let dropped;
830 if let Some(rest) = rest_of_path {
840 if let Some(rest) = rest_of_path {
831 if let Some((d, removed)) = recur(
841 if let Some((d, removed)) = recur(
832 on_disk,
842 on_disk,
833 unreachable_bytes,
843 unreachable_bytes,
834 &mut node.children,
844 &mut node.children,
835 rest,
845 rest,
836 )? {
846 )? {
837 dropped = d;
847 dropped = d;
838 if dropped.had_entry {
848 if dropped.had_entry {
839 node.descendants_with_entry_count = node
849 node.descendants_with_entry_count = node
840 .descendants_with_entry_count
850 .descendants_with_entry_count
841 .checked_sub(1)
851 .checked_sub(1)
842 .expect(
852 .expect(
843 "descendants_with_entry_count should be >= 0",
853 "descendants_with_entry_count should be >= 0",
844 );
854 );
845 }
855 }
846 if dropped.was_tracked {
856 if dropped.was_tracked {
847 node.tracked_descendants_count = node
857 node.tracked_descendants_count = node
848 .tracked_descendants_count
858 .tracked_descendants_count
849 .checked_sub(1)
859 .checked_sub(1)
850 .expect(
860 .expect(
851 "tracked_descendants_count should be >= 0",
861 "tracked_descendants_count should be >= 0",
852 );
862 );
853 }
863 }
854
864
855 // Directory caches must be invalidated when removing a
865 // Directory caches must be invalidated when removing a
856 // child node
866 // child node
857 if removed {
867 if removed {
858 if let NodeData::CachedDirectory { .. } = &node.data {
868 if let NodeData::CachedDirectory { .. } = &node.data {
859 node.data = NodeData::None
869 node.data = NodeData::None
860 }
870 }
861 }
871 }
862 } else {
872 } else {
863 return Ok(None);
873 return Ok(None);
864 }
874 }
865 } else {
875 } else {
866 let entry = node.data.as_entry();
876 let entry = node.data.as_entry();
867 let was_tracked = entry.map_or(false, |entry| entry.tracked());
877 let was_tracked = entry.map_or(false, |entry| entry.tracked());
868 let had_entry = entry.is_some();
878 let had_entry = entry.is_some();
869 if had_entry {
879 if had_entry {
870 node.data = NodeData::None
880 node.data = NodeData::None
871 }
881 }
872 let mut had_copy_source = false;
882 let mut had_copy_source = false;
873 if let Some(source) = &node.copy_source {
883 if let Some(source) = &node.copy_source {
874 DirstateMap::count_dropped_path(unreachable_bytes, source);
884 DirstateMap::count_dropped_path(unreachable_bytes, source);
875 had_copy_source = true;
885 had_copy_source = true;
876 node.copy_source = None
886 node.copy_source = None
877 }
887 }
878 dropped = Dropped {
888 dropped = Dropped {
879 was_tracked,
889 was_tracked,
880 had_entry,
890 had_entry,
881 had_copy_source,
891 had_copy_source,
882 };
892 };
883 }
893 }
884 // After recursion, for both leaf (rest_of_path is None) nodes and
894 // After recursion, for both leaf (rest_of_path is None) nodes and
885 // parent nodes, remove a node if it just became empty.
895 // parent nodes, remove a node if it just became empty.
886 let remove = !node.data.has_entry()
896 let remove = !node.data.has_entry()
887 && node.copy_source.is_none()
897 && node.copy_source.is_none()
888 && node.children.is_empty();
898 && node.children.is_empty();
889 if remove {
899 if remove {
890 let (key, _) =
900 let (key, _) =
891 nodes.remove_entry(first_path_component).unwrap();
901 nodes.remove_entry(first_path_component).unwrap();
892 DirstateMap::count_dropped_path(
902 DirstateMap::count_dropped_path(
893 unreachable_bytes,
903 unreachable_bytes,
894 key.full_path(),
904 key.full_path(),
895 )
905 )
896 }
906 }
897 Ok(Some((dropped, remove)))
907 Ok(Some((dropped, remove)))
898 }
908 }
899
909
900 self.with_dmap_mut(|map| {
910 self.with_dmap_mut(|map| {
901 if let Some((dropped, _removed)) = recur(
911 if let Some((dropped, _removed)) = recur(
902 map.on_disk,
912 map.on_disk,
903 &mut map.unreachable_bytes,
913 &mut map.unreachable_bytes,
904 &mut map.root,
914 &mut map.root,
905 filename,
915 filename,
906 )? {
916 )? {
907 if dropped.had_entry {
917 if dropped.had_entry {
908 map.nodes_with_entry_count = map
918 map.nodes_with_entry_count = map
909 .nodes_with_entry_count
919 .nodes_with_entry_count
910 .checked_sub(1)
920 .checked_sub(1)
911 .expect("nodes_with_entry_count should be >= 0");
921 .expect("nodes_with_entry_count should be >= 0");
912 }
922 }
913 if dropped.had_copy_source {
923 if dropped.had_copy_source {
914 map.nodes_with_copy_source_count = map
924 map.nodes_with_copy_source_count = map
915 .nodes_with_copy_source_count
925 .nodes_with_copy_source_count
916 .checked_sub(1)
926 .checked_sub(1)
917 .expect("nodes_with_copy_source_count should be >= 0");
927 .expect("nodes_with_copy_source_count should be >= 0");
918 }
928 }
919 } else {
929 } else {
920 debug_assert!(!was_tracked);
930 debug_assert!(!was_tracked);
921 }
931 }
922 Ok(())
932 Ok(())
923 })
933 })
924 }
934 }
925
935
926 pub fn has_tracked_dir(
936 pub fn has_tracked_dir(
927 &mut self,
937 &mut self,
928 directory: &HgPath,
938 directory: &HgPath,
929 ) -> Result<bool, DirstateError> {
939 ) -> Result<bool, DirstateError> {
930 self.with_dmap_mut(|map| {
940 self.with_dmap_mut(|map| {
931 if let Some(node) = map.get_node(directory)? {
941 if let Some(node) = map.get_node(directory)? {
932 // A node without a `DirstateEntry` was created to hold child
942 // A node without a `DirstateEntry` was created to hold child
933 // nodes, and is therefore a directory.
943 // nodes, and is therefore a directory.
934 let state = node.state()?;
944 let state = node.state()?;
935 Ok(state.is_none() && node.tracked_descendants_count() > 0)
945 Ok(state.is_none() && node.tracked_descendants_count() > 0)
936 } else {
946 } else {
937 Ok(false)
947 Ok(false)
938 }
948 }
939 })
949 })
940 }
950 }
941
951
942 pub fn has_dir(
952 pub fn has_dir(
943 &mut self,
953 &mut self,
944 directory: &HgPath,
954 directory: &HgPath,
945 ) -> Result<bool, DirstateError> {
955 ) -> Result<bool, DirstateError> {
946 self.with_dmap_mut(|map| {
956 self.with_dmap_mut(|map| {
947 if let Some(node) = map.get_node(directory)? {
957 if let Some(node) = map.get_node(directory)? {
948 // A node without a `DirstateEntry` was created to hold child
958 // A node without a `DirstateEntry` was created to hold child
949 // nodes, and is therefore a directory.
959 // nodes, and is therefore a directory.
950 let state = node.state()?;
960 let state = node.state()?;
951 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
961 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
952 } else {
962 } else {
953 Ok(false)
963 Ok(false)
954 }
964 }
955 })
965 })
956 }
966 }
957
967
958 #[timed]
968 #[timed]
959 pub fn pack_v1(
969 pub fn pack_v1(
960 &self,
970 &self,
961 parents: DirstateParents,
971 parents: DirstateParents,
962 ) -> Result<Vec<u8>, DirstateError> {
972 ) -> Result<Vec<u8>, DirstateError> {
963 let map = self.get_map();
973 let map = self.get_map();
964 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
974 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
965 // reallocations
975 // reallocations
966 let mut size = parents.as_bytes().len();
976 let mut size = parents.as_bytes().len();
967 for node in map.iter_nodes() {
977 for node in map.iter_nodes() {
968 let node = node?;
978 let node = node?;
969 if node.entry()?.is_some() {
979 if node.entry()?.is_some() {
970 size += packed_entry_size(
980 size += packed_entry_size(
971 node.full_path(map.on_disk)?,
981 node.full_path(map.on_disk)?,
972 node.copy_source(map.on_disk)?,
982 node.copy_source(map.on_disk)?,
973 );
983 );
974 }
984 }
975 }
985 }
976
986
977 let mut packed = Vec::with_capacity(size);
987 let mut packed = Vec::with_capacity(size);
978 packed.extend(parents.as_bytes());
988 packed.extend(parents.as_bytes());
979
989
980 for node in map.iter_nodes() {
990 for node in map.iter_nodes() {
981 let node = node?;
991 let node = node?;
982 if let Some(entry) = node.entry()? {
992 if let Some(entry) = node.entry()? {
983 pack_entry(
993 pack_entry(
984 node.full_path(map.on_disk)?,
994 node.full_path(map.on_disk)?,
985 &entry,
995 &entry,
986 node.copy_source(map.on_disk)?,
996 node.copy_source(map.on_disk)?,
987 &mut packed,
997 &mut packed,
988 );
998 );
989 }
999 }
990 }
1000 }
991 Ok(packed)
1001 Ok(packed)
992 }
1002 }
993
1003
994 /// Returns new data and metadata together with whether that data should be
1004 /// Returns new data and metadata together with whether that data should be
995 /// appended to the existing data file whose content is at
1005 /// appended to the existing data file whose content is at
996 /// `map.on_disk` (true), instead of written to a new data file
1006 /// `map.on_disk` (true), instead of written to a new data file
997 /// (false), and the previous size of data on disk.
1007 /// (false), and the previous size of data on disk.
998 #[timed]
1008 #[timed]
999 pub fn pack_v2(
1009 pub fn pack_v2(
1000 &self,
1010 &self,
1001 can_append: bool,
1011 can_append: bool,
1002 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1012 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1003 {
1013 {
1004 let map = self.get_map();
1014 let map = self.get_map();
1005 on_disk::write(map, can_append)
1015 on_disk::write(map, can_append)
1006 }
1016 }
1007
1017
1008 /// `callback` allows the caller to process and do something with the
1018 /// `callback` allows the caller to process and do something with the
1009 /// results of the status. This is needed to do so efficiently (i.e.
1019 /// results of the status. This is needed to do so efficiently (i.e.
1010 /// without cloning the `DirstateStatus` object with its paths) because
1020 /// without cloning the `DirstateStatus` object with its paths) because
1011 /// we need to borrow from `Self`.
1021 /// we need to borrow from `Self`.
1012 pub fn with_status<R>(
1022 pub fn with_status<R>(
1013 &mut self,
1023 &mut self,
1014 matcher: &(dyn Matcher + Sync),
1024 matcher: &(dyn Matcher + Sync),
1015 root_dir: PathBuf,
1025 root_dir: PathBuf,
1016 ignore_files: Vec<PathBuf>,
1026 ignore_files: Vec<PathBuf>,
1017 options: StatusOptions,
1027 options: StatusOptions,
1018 callback: impl for<'r> FnOnce(
1028 callback: impl for<'r> FnOnce(
1019 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1029 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1020 ) -> R,
1030 ) -> R,
1021 ) -> R {
1031 ) -> R {
1022 self.with_dmap_mut(|map| {
1032 self.with_dmap_mut(|map| {
1023 callback(super::status::status(
1033 callback(super::status::status(
1024 map,
1034 map,
1025 matcher,
1035 matcher,
1026 root_dir,
1036 root_dir,
1027 ignore_files,
1037 ignore_files,
1028 options,
1038 options,
1029 ))
1039 ))
1030 })
1040 })
1031 }
1041 }
1032
1042
1033 pub fn copy_map_len(&self) -> usize {
1043 pub fn copy_map_len(&self) -> usize {
1034 let map = self.get_map();
1044 let map = self.get_map();
1035 map.nodes_with_copy_source_count as usize
1045 map.nodes_with_copy_source_count as usize
1036 }
1046 }
1037
1047
1038 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1048 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1039 let map = self.get_map();
1049 let map = self.get_map();
1040 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1050 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1041 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1051 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1042 Some((node.full_path(map.on_disk)?, source))
1052 Some((node.full_path(map.on_disk)?, source))
1043 } else {
1053 } else {
1044 None
1054 None
1045 })
1055 })
1046 }))
1056 }))
1047 }
1057 }
1048
1058
1049 pub fn copy_map_contains_key(
1059 pub fn copy_map_contains_key(
1050 &self,
1060 &self,
1051 key: &HgPath,
1061 key: &HgPath,
1052 ) -> Result<bool, DirstateV2ParseError> {
1062 ) -> Result<bool, DirstateV2ParseError> {
1053 let map = self.get_map();
1063 let map = self.get_map();
1054 Ok(if let Some(node) = map.get_node(key)? {
1064 Ok(if let Some(node) = map.get_node(key)? {
1055 node.has_copy_source()
1065 node.has_copy_source()
1056 } else {
1066 } else {
1057 false
1067 false
1058 })
1068 })
1059 }
1069 }
1060
1070
1061 pub fn copy_map_get(
1071 pub fn copy_map_get(
1062 &self,
1072 &self,
1063 key: &HgPath,
1073 key: &HgPath,
1064 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1074 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1065 let map = self.get_map();
1075 let map = self.get_map();
1066 if let Some(node) = map.get_node(key)? {
1076 if let Some(node) = map.get_node(key)? {
1067 if let Some(source) = node.copy_source(map.on_disk)? {
1077 if let Some(source) = node.copy_source(map.on_disk)? {
1068 return Ok(Some(source));
1078 return Ok(Some(source));
1069 }
1079 }
1070 }
1080 }
1071 Ok(None)
1081 Ok(None)
1072 }
1082 }
1073
1083
1074 pub fn copy_map_remove(
1084 pub fn copy_map_remove(
1075 &mut self,
1085 &mut self,
1076 key: &HgPath,
1086 key: &HgPath,
1077 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1087 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1078 self.with_dmap_mut(|map| {
1088 self.with_dmap_mut(|map| {
1079 let count = &mut map.nodes_with_copy_source_count;
1089 let count = &mut map.nodes_with_copy_source_count;
1080 let unreachable_bytes = &mut map.unreachable_bytes;
1090 let unreachable_bytes = &mut map.unreachable_bytes;
1081 Ok(DirstateMap::get_node_mut(
1091 Ok(DirstateMap::get_node_mut(
1082 map.on_disk,
1092 map.on_disk,
1083 unreachable_bytes,
1093 unreachable_bytes,
1084 &mut map.root,
1094 &mut map.root,
1085 key,
1095 key,
1086 )?
1096 )?
1087 .and_then(|node| {
1097 .and_then(|node| {
1088 if let Some(source) = &node.copy_source {
1098 if let Some(source) = &node.copy_source {
1089 *count -= 1;
1099 *count -= 1;
1090 DirstateMap::count_dropped_path(unreachable_bytes, source);
1100 DirstateMap::count_dropped_path(unreachable_bytes, source);
1091 }
1101 }
1092 node.copy_source.take().map(Cow::into_owned)
1102 node.copy_source.take().map(Cow::into_owned)
1093 }))
1103 }))
1094 })
1104 })
1095 }
1105 }
1096
1106
1097 pub fn copy_map_insert(
1107 pub fn copy_map_insert(
1098 &mut self,
1108 &mut self,
1099 key: HgPathBuf,
1109 key: HgPathBuf,
1100 value: HgPathBuf,
1110 value: HgPathBuf,
1101 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1111 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1102 self.with_dmap_mut(|map| {
1112 self.with_dmap_mut(|map| {
1103 let node = DirstateMap::get_or_insert_node(
1113 let node = DirstateMap::get_or_insert_node(
1104 map.on_disk,
1114 map.on_disk,
1105 &mut map.unreachable_bytes,
1115 &mut map.unreachable_bytes,
1106 &mut map.root,
1116 &mut map.root,
1107 &key,
1117 &key,
1108 WithBasename::to_cow_owned,
1118 WithBasename::to_cow_owned,
1109 |_ancestor| {},
1119 |_ancestor| {},
1110 )?;
1120 )?;
1111 if node.copy_source.is_none() {
1121 if node.copy_source.is_none() {
1112 map.nodes_with_copy_source_count += 1
1122 map.nodes_with_copy_source_count += 1
1113 }
1123 }
1114 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1124 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1115 })
1125 })
1116 }
1126 }
1117
1127
1118 pub fn len(&self) -> usize {
1128 pub fn len(&self) -> usize {
1119 let map = self.get_map();
1129 let map = self.get_map();
1120 map.nodes_with_entry_count as usize
1130 map.nodes_with_entry_count as usize
1121 }
1131 }
1122
1132
1123 pub fn contains_key(
1133 pub fn contains_key(
1124 &self,
1134 &self,
1125 key: &HgPath,
1135 key: &HgPath,
1126 ) -> Result<bool, DirstateV2ParseError> {
1136 ) -> Result<bool, DirstateV2ParseError> {
1127 Ok(self.get(key)?.is_some())
1137 Ok(self.get(key)?.is_some())
1128 }
1138 }
1129
1139
1130 pub fn get(
1140 pub fn get(
1131 &self,
1141 &self,
1132 key: &HgPath,
1142 key: &HgPath,
1133 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1143 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1134 let map = self.get_map();
1144 let map = self.get_map();
1135 Ok(if let Some(node) = map.get_node(key)? {
1145 Ok(if let Some(node) = map.get_node(key)? {
1136 node.entry()?
1146 node.entry()?
1137 } else {
1147 } else {
1138 None
1148 None
1139 })
1149 })
1140 }
1150 }
1141
1151
1142 pub fn iter(&self) -> StateMapIter<'_> {
1152 pub fn iter(&self) -> StateMapIter<'_> {
1143 let map = self.get_map();
1153 let map = self.get_map();
1144 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1154 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1145 Ok(if let Some(entry) = node.entry()? {
1155 Ok(if let Some(entry) = node.entry()? {
1146 Some((node.full_path(map.on_disk)?, entry))
1156 Some((node.full_path(map.on_disk)?, entry))
1147 } else {
1157 } else {
1148 None
1158 None
1149 })
1159 })
1150 }))
1160 }))
1151 }
1161 }
1152
1162
1153 pub fn iter_tracked_dirs(
1163 pub fn iter_tracked_dirs(
1154 &mut self,
1164 &mut self,
1155 ) -> Result<
1165 ) -> Result<
1156 Box<
1166 Box<
1157 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1167 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1158 + Send
1168 + Send
1159 + '_,
1169 + '_,
1160 >,
1170 >,
1161 DirstateError,
1171 DirstateError,
1162 > {
1172 > {
1163 let map = self.get_map();
1173 let map = self.get_map();
1164 let on_disk = map.on_disk;
1174 let on_disk = map.on_disk;
1165 Ok(Box::new(filter_map_results(
1175 Ok(Box::new(filter_map_results(
1166 map.iter_nodes(),
1176 map.iter_nodes(),
1167 move |node| {
1177 move |node| {
1168 Ok(if node.tracked_descendants_count() > 0 {
1178 Ok(if node.tracked_descendants_count() > 0 {
1169 Some(node.full_path(on_disk)?)
1179 Some(node.full_path(on_disk)?)
1170 } else {
1180 } else {
1171 None
1181 None
1172 })
1182 })
1173 },
1183 },
1174 )))
1184 )))
1175 }
1185 }
1176
1186
1177 pub fn debug_iter(
1187 pub fn debug_iter(
1178 &self,
1188 &self,
1179 all: bool,
1189 all: bool,
1180 ) -> Box<
1190 ) -> Box<
1181 dyn Iterator<
1191 dyn Iterator<
1182 Item = Result<
1192 Item = Result<
1183 (&HgPath, (u8, i32, i32, i32)),
1193 (&HgPath, (u8, i32, i32, i32)),
1184 DirstateV2ParseError,
1194 DirstateV2ParseError,
1185 >,
1195 >,
1186 > + Send
1196 > + Send
1187 + '_,
1197 + '_,
1188 > {
1198 > {
1189 let map = self.get_map();
1199 let map = self.get_map();
1190 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1200 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1191 let debug_tuple = if let Some(entry) = node.entry()? {
1201 let debug_tuple = if let Some(entry) = node.entry()? {
1192 entry.debug_tuple()
1202 entry.debug_tuple()
1193 } else if !all {
1203 } else if !all {
1194 return Ok(None);
1204 return Ok(None);
1195 } else if let Some(mtime) = node.cached_directory_mtime()? {
1205 } else if let Some(mtime) = node.cached_directory_mtime()? {
1196 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1206 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1197 } else {
1207 } else {
1198 (b' ', 0, -1, -1)
1208 (b' ', 0, -1, -1)
1199 };
1209 };
1200 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1210 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1201 }))
1211 }))
1202 }
1212 }
1203 }
1213 }
@@ -1,849 +1,853 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::TruncatedTimestamp;
5 use crate::dirstate::TruncatedTimestamp;
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 use crate::dirstate_tree::path_with_basename::WithBasename;
8 use crate::errors::HgError;
9 use crate::errors::HgError;
9 use crate::utils::hg_path::HgPath;
10 use crate::utils::hg_path::HgPath;
10 use crate::DirstateEntry;
11 use crate::DirstateEntry;
11 use crate::DirstateError;
12 use crate::DirstateError;
12 use crate::DirstateParents;
13 use crate::DirstateParents;
13 use bitflags::bitflags;
14 use bitflags::bitflags;
14 use bytes_cast::unaligned::{U16Be, U32Be};
15 use bytes_cast::unaligned::{U16Be, U32Be};
15 use bytes_cast::BytesCast;
16 use bytes_cast::BytesCast;
16 use format_bytes::format_bytes;
17 use format_bytes::format_bytes;
17 use rand::Rng;
18 use rand::Rng;
18 use std::borrow::Cow;
19 use std::borrow::Cow;
19 use std::convert::{TryFrom, TryInto};
20 use std::convert::{TryFrom, TryInto};
20 use std::fmt::Write;
21 use std::fmt::Write;
21
22
22 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 /// This a redundant sanity check more than an actual "magic number" since
24 /// This a redundant sanity check more than an actual "magic number" since
24 /// `.hg/requires` already governs which format should be used.
25 /// `.hg/requires` already governs which format should be used.
25 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26
27
27 /// Keep space for 256-bit hashes
28 /// Keep space for 256-bit hashes
28 const STORED_NODE_ID_BYTES: usize = 32;
29 const STORED_NODE_ID_BYTES: usize = 32;
29
30
30 /// … even though only 160 bits are used for now, with SHA-1
31 /// … even though only 160 bits are used for now, with SHA-1
31 const USED_NODE_ID_BYTES: usize = 20;
32 const USED_NODE_ID_BYTES: usize = 20;
32
33
33 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35
36
36 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 const TREE_METADATA_SIZE: usize = 44;
38 const TREE_METADATA_SIZE: usize = 44;
38 const NODE_SIZE: usize = 44;
39 const NODE_SIZE: usize = 44;
39
40
40 /// Make sure that size-affecting changes are made knowingly
41 /// Make sure that size-affecting changes are made knowingly
41 #[allow(unused)]
42 #[allow(unused)]
42 fn static_assert_size_of() {
43 fn static_assert_size_of() {
43 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 }
47 }
47
48
48 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 #[derive(BytesCast)]
50 #[derive(BytesCast)]
50 #[repr(C)]
51 #[repr(C)]
51 struct DocketHeader {
52 struct DocketHeader {
52 marker: [u8; V2_FORMAT_MARKER.len()],
53 marker: [u8; V2_FORMAT_MARKER.len()],
53 parent_1: [u8; STORED_NODE_ID_BYTES],
54 parent_1: [u8; STORED_NODE_ID_BYTES],
54 parent_2: [u8; STORED_NODE_ID_BYTES],
55 parent_2: [u8; STORED_NODE_ID_BYTES],
55
56
56 metadata: TreeMetadata,
57 metadata: TreeMetadata,
57
58
58 /// Counted in bytes
59 /// Counted in bytes
59 data_size: Size,
60 data_size: Size,
60
61
61 uuid_size: u8,
62 uuid_size: u8,
62 }
63 }
63
64
64 pub struct Docket<'on_disk> {
65 pub struct Docket<'on_disk> {
65 header: &'on_disk DocketHeader,
66 header: &'on_disk DocketHeader,
66 pub uuid: &'on_disk [u8],
67 pub uuid: &'on_disk [u8],
67 }
68 }
68
69
69 /// Fields are documented in the *Tree metadata in the docket file*
70 /// Fields are documented in the *Tree metadata in the docket file*
70 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 #[derive(BytesCast)]
72 #[derive(BytesCast)]
72 #[repr(C)]
73 #[repr(C)]
73 pub struct TreeMetadata {
74 pub struct TreeMetadata {
74 root_nodes: ChildNodes,
75 root_nodes: ChildNodes,
75 nodes_with_entry_count: Size,
76 nodes_with_entry_count: Size,
76 nodes_with_copy_source_count: Size,
77 nodes_with_copy_source_count: Size,
77 unreachable_bytes: Size,
78 unreachable_bytes: Size,
78 unused: [u8; 4],
79 unused: [u8; 4],
79
80
80 /// See *Optional hash of ignore patterns* section of
81 /// See *Optional hash of ignore patterns* section of
81 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 ignore_patterns_hash: IgnorePatternsHash,
83 ignore_patterns_hash: IgnorePatternsHash,
83 }
84 }
84
85
85 /// Fields are documented in the *The data file format*
86 /// Fields are documented in the *The data file format*
86 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 #[derive(BytesCast)]
88 #[derive(BytesCast)]
88 #[repr(C)]
89 #[repr(C)]
89 pub(super) struct Node {
90 pub(super) struct Node {
90 full_path: PathSlice,
91 full_path: PathSlice,
91
92
92 /// In bytes from `self.full_path.start`
93 /// In bytes from `self.full_path.start`
93 base_name_start: PathSize,
94 base_name_start: PathSize,
94
95
95 copy_source: OptPathSlice,
96 copy_source: OptPathSlice,
96 children: ChildNodes,
97 children: ChildNodes,
97 pub(super) descendants_with_entry_count: Size,
98 pub(super) descendants_with_entry_count: Size,
98 pub(super) tracked_descendants_count: Size,
99 pub(super) tracked_descendants_count: Size,
99 flags: U16Be,
100 flags: U16Be,
100 size: U32Be,
101 size: U32Be,
101 mtime: PackedTruncatedTimestamp,
102 mtime: PackedTruncatedTimestamp,
102 }
103 }
103
104
104 bitflags! {
105 bitflags! {
105 #[repr(C)]
106 #[repr(C)]
106 struct Flags: u16 {
107 struct Flags: u16 {
107 const WDIR_TRACKED = 1 << 0;
108 const WDIR_TRACKED = 1 << 0;
108 const P1_TRACKED = 1 << 1;
109 const P1_TRACKED = 1 << 1;
109 const P2_INFO = 1 << 2;
110 const P2_INFO = 1 << 2;
110 const MODE_EXEC_PERM = 1 << 3;
111 const MODE_EXEC_PERM = 1 << 3;
111 const MODE_IS_SYMLINK = 1 << 4;
112 const MODE_IS_SYMLINK = 1 << 4;
112 const HAS_FALLBACK_EXEC = 1 << 5;
113 const HAS_FALLBACK_EXEC = 1 << 5;
113 const FALLBACK_EXEC = 1 << 6;
114 const FALLBACK_EXEC = 1 << 6;
114 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 const FALLBACK_SYMLINK = 1 << 8;
116 const FALLBACK_SYMLINK = 1 << 8;
116 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 const HAS_MODE_AND_SIZE = 1 <<10;
118 const HAS_MODE_AND_SIZE = 1 <<10;
118 const HAS_MTIME = 1 <<11;
119 const HAS_MTIME = 1 <<11;
119 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 const DIRECTORY = 1 <<13;
121 const DIRECTORY = 1 <<13;
121 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 const ALL_IGNORED_RECORDED = 1 <<15;
123 const ALL_IGNORED_RECORDED = 1 <<15;
123 }
124 }
124 }
125 }
125
126
126 /// Duration since the Unix epoch
127 /// Duration since the Unix epoch
127 #[derive(BytesCast, Copy, Clone)]
128 #[derive(BytesCast, Copy, Clone)]
128 #[repr(C)]
129 #[repr(C)]
129 struct PackedTruncatedTimestamp {
130 struct PackedTruncatedTimestamp {
130 truncated_seconds: U32Be,
131 truncated_seconds: U32Be,
131 nanoseconds: U32Be,
132 nanoseconds: U32Be,
132 }
133 }
133
134
134 /// Counted in bytes from the start of the file
135 /// Counted in bytes from the start of the file
135 ///
136 ///
136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 type Offset = U32Be;
138 type Offset = U32Be;
138
139
139 /// Counted in number of items
140 /// Counted in number of items
140 ///
141 ///
141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 type Size = U32Be;
143 type Size = U32Be;
143
144
144 /// Counted in bytes
145 /// Counted in bytes
145 ///
146 ///
146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 type PathSize = U16Be;
148 type PathSize = U16Be;
148
149
149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 /// of either some other node or of the repository root.
151 /// of either some other node or of the repository root.
151 ///
152 ///
152 /// Always sorted by ascending `full_path`, to allow binary search.
153 /// Always sorted by ascending `full_path`, to allow binary search.
153 /// Since nodes with the same parent nodes also have the same parent path,
154 /// Since nodes with the same parent nodes also have the same parent path,
154 /// only the `base_name`s need to be compared during binary search.
155 /// only the `base_name`s need to be compared during binary search.
155 #[derive(BytesCast, Copy, Clone)]
156 #[derive(BytesCast, Copy, Clone)]
156 #[repr(C)]
157 #[repr(C)]
157 struct ChildNodes {
158 struct ChildNodes {
158 start: Offset,
159 start: Offset,
159 len: Size,
160 len: Size,
160 }
161 }
161
162
162 /// A `HgPath` of `len` bytes
163 /// A `HgPath` of `len` bytes
163 #[derive(BytesCast, Copy, Clone)]
164 #[derive(BytesCast, Copy, Clone)]
164 #[repr(C)]
165 #[repr(C)]
165 struct PathSlice {
166 struct PathSlice {
166 start: Offset,
167 start: Offset,
167 len: PathSize,
168 len: PathSize,
168 }
169 }
169
170
170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 type OptPathSlice = PathSlice;
172 type OptPathSlice = PathSlice;
172
173
173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 ///
175 ///
175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 #[derive(Debug)]
177 #[derive(Debug)]
177 pub struct DirstateV2ParseError;
178 pub struct DirstateV2ParseError;
178
179
179 impl From<DirstateV2ParseError> for HgError {
180 impl From<DirstateV2ParseError> for HgError {
180 fn from(_: DirstateV2ParseError) -> Self {
181 fn from(_: DirstateV2ParseError) -> Self {
181 HgError::corrupted("dirstate-v2 parse error")
182 HgError::corrupted("dirstate-v2 parse error")
182 }
183 }
183 }
184 }
184
185
185 impl From<DirstateV2ParseError> for crate::DirstateError {
186 impl From<DirstateV2ParseError> for crate::DirstateError {
186 fn from(error: DirstateV2ParseError) -> Self {
187 fn from(error: DirstateV2ParseError) -> Self {
187 HgError::from(error).into()
188 HgError::from(error).into()
188 }
189 }
189 }
190 }
190
191
191 impl TreeMetadata {
192 impl TreeMetadata {
192 pub fn as_bytes(&self) -> &[u8] {
193 pub fn as_bytes(&self) -> &[u8] {
193 BytesCast::as_bytes(self)
194 BytesCast::as_bytes(self)
194 }
195 }
195 }
196 }
196
197
197 impl<'on_disk> Docket<'on_disk> {
198 impl<'on_disk> Docket<'on_disk> {
198 /// Generate the identifier for a new data file
199 /// Generate the identifier for a new data file
199 ///
200 ///
200 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
201 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
201 /// See `mercurial/revlogutils/docket.py`
202 /// See `mercurial/revlogutils/docket.py`
202 pub fn new_uid() -> String {
203 pub fn new_uid() -> String {
203 const ID_LENGTH: usize = 8;
204 const ID_LENGTH: usize = 8;
204 let mut id = String::with_capacity(ID_LENGTH);
205 let mut id = String::with_capacity(ID_LENGTH);
205 let mut rng = rand::thread_rng();
206 let mut rng = rand::thread_rng();
206 for _ in 0..ID_LENGTH {
207 for _ in 0..ID_LENGTH {
207 // One random hexadecimal digit.
208 // One random hexadecimal digit.
208 // `unwrap` never panics because `impl Write for String`
209 // `unwrap` never panics because `impl Write for String`
209 // never returns an error.
210 // never returns an error.
210 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
211 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
211 }
212 }
212 id
213 id
213 }
214 }
214
215
215 pub fn serialize(
216 pub fn serialize(
216 parents: DirstateParents,
217 parents: DirstateParents,
217 tree_metadata: TreeMetadata,
218 tree_metadata: TreeMetadata,
218 data_size: u64,
219 data_size: u64,
219 uuid: &[u8],
220 uuid: &[u8],
220 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
221 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
221 let header = DocketHeader {
222 let header = DocketHeader {
222 marker: *V2_FORMAT_MARKER,
223 marker: *V2_FORMAT_MARKER,
223 parent_1: parents.p1.pad_to_256_bits(),
224 parent_1: parents.p1.pad_to_256_bits(),
224 parent_2: parents.p2.pad_to_256_bits(),
225 parent_2: parents.p2.pad_to_256_bits(),
225 metadata: tree_metadata,
226 metadata: tree_metadata,
226 data_size: u32::try_from(data_size)?.into(),
227 data_size: u32::try_from(data_size)?.into(),
227 uuid_size: uuid.len().try_into()?,
228 uuid_size: uuid.len().try_into()?,
228 };
229 };
229 let header = header.as_bytes();
230 let header = header.as_bytes();
230 let mut docket = Vec::with_capacity(header.len() + uuid.len());
231 let mut docket = Vec::with_capacity(header.len() + uuid.len());
231 docket.extend_from_slice(header);
232 docket.extend_from_slice(header);
232 docket.extend_from_slice(uuid);
233 docket.extend_from_slice(uuid);
233 Ok(docket)
234 Ok(docket)
234 }
235 }
235
236
236 pub fn parents(&self) -> DirstateParents {
237 pub fn parents(&self) -> DirstateParents {
237 use crate::Node;
238 use crate::Node;
238 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
239 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
239 .unwrap()
240 .unwrap()
240 .clone();
241 .clone();
241 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
242 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
242 .unwrap()
243 .unwrap()
243 .clone();
244 .clone();
244 DirstateParents { p1, p2 }
245 DirstateParents { p1, p2 }
245 }
246 }
246
247
247 pub fn tree_metadata(&self) -> &[u8] {
248 pub fn tree_metadata(&self) -> &[u8] {
248 self.header.metadata.as_bytes()
249 self.header.metadata.as_bytes()
249 }
250 }
250
251
251 pub fn data_size(&self) -> usize {
252 pub fn data_size(&self) -> usize {
252 // This `unwrap` could only panic on a 16-bit CPU
253 // This `unwrap` could only panic on a 16-bit CPU
253 self.header.data_size.get().try_into().unwrap()
254 self.header.data_size.get().try_into().unwrap()
254 }
255 }
255
256
256 pub fn data_filename(&self) -> String {
257 pub fn data_filename(&self) -> String {
257 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
258 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
258 }
259 }
259 }
260 }
260
261
261 pub fn read_docket(
262 pub fn read_docket(
262 on_disk: &[u8],
263 on_disk: &[u8],
263 ) -> Result<Docket<'_>, DirstateV2ParseError> {
264 ) -> Result<Docket<'_>, DirstateV2ParseError> {
264 let (header, uuid) =
265 let (header, uuid) =
265 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
266 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
266 let uuid_size = header.uuid_size as usize;
267 let uuid_size = header.uuid_size as usize;
267 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
268 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
268 Ok(Docket { header, uuid })
269 Ok(Docket { header, uuid })
269 } else {
270 } else {
270 Err(DirstateV2ParseError)
271 Err(DirstateV2ParseError)
271 }
272 }
272 }
273 }
273
274
274 pub(super) fn read<'on_disk>(
275 pub(super) fn read<'on_disk>(
275 on_disk: &'on_disk [u8],
276 on_disk: &'on_disk [u8],
276 metadata: &[u8],
277 metadata: &[u8],
277 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
278 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
278 if on_disk.is_empty() {
279 if on_disk.is_empty() {
279 return Ok(DirstateMap::empty(on_disk));
280 let mut map = DirstateMap::empty(on_disk);
281 map.dirstate_version = DirstateVersion::V2;
282 return Ok(map);
280 }
283 }
281 let (meta, _) = TreeMetadata::from_bytes(metadata)
284 let (meta, _) = TreeMetadata::from_bytes(metadata)
282 .map_err(|_| DirstateV2ParseError)?;
285 .map_err(|_| DirstateV2ParseError)?;
283 let dirstate_map = DirstateMap {
286 let dirstate_map = DirstateMap {
284 on_disk,
287 on_disk,
285 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
288 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
286 on_disk,
289 on_disk,
287 meta.root_nodes,
290 meta.root_nodes,
288 )?),
291 )?),
289 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
292 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
290 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
293 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
291 ignore_patterns_hash: meta.ignore_patterns_hash,
294 ignore_patterns_hash: meta.ignore_patterns_hash,
292 unreachable_bytes: meta.unreachable_bytes.get(),
295 unreachable_bytes: meta.unreachable_bytes.get(),
293 old_data_size: on_disk.len(),
296 old_data_size: on_disk.len(),
297 dirstate_version: DirstateVersion::V2,
294 };
298 };
295 Ok(dirstate_map)
299 Ok(dirstate_map)
296 }
300 }
297
301
298 impl Node {
302 impl Node {
299 pub(super) fn full_path<'on_disk>(
303 pub(super) fn full_path<'on_disk>(
300 &self,
304 &self,
301 on_disk: &'on_disk [u8],
305 on_disk: &'on_disk [u8],
302 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
306 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
303 read_hg_path(on_disk, self.full_path)
307 read_hg_path(on_disk, self.full_path)
304 }
308 }
305
309
306 pub(super) fn base_name_start<'on_disk>(
310 pub(super) fn base_name_start<'on_disk>(
307 &self,
311 &self,
308 ) -> Result<usize, DirstateV2ParseError> {
312 ) -> Result<usize, DirstateV2ParseError> {
309 let start = self.base_name_start.get();
313 let start = self.base_name_start.get();
310 if start < self.full_path.len.get() {
314 if start < self.full_path.len.get() {
311 let start = usize::try_from(start)
315 let start = usize::try_from(start)
312 // u32 -> usize, could only panic on a 16-bit CPU
316 // u32 -> usize, could only panic on a 16-bit CPU
313 .expect("dirstate-v2 base_name_start out of bounds");
317 .expect("dirstate-v2 base_name_start out of bounds");
314 Ok(start)
318 Ok(start)
315 } else {
319 } else {
316 Err(DirstateV2ParseError)
320 Err(DirstateV2ParseError)
317 }
321 }
318 }
322 }
319
323
320 pub(super) fn base_name<'on_disk>(
324 pub(super) fn base_name<'on_disk>(
321 &self,
325 &self,
322 on_disk: &'on_disk [u8],
326 on_disk: &'on_disk [u8],
323 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
327 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
324 let full_path = self.full_path(on_disk)?;
328 let full_path = self.full_path(on_disk)?;
325 let base_name_start = self.base_name_start()?;
329 let base_name_start = self.base_name_start()?;
326 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
330 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
327 }
331 }
328
332
329 pub(super) fn path<'on_disk>(
333 pub(super) fn path<'on_disk>(
330 &self,
334 &self,
331 on_disk: &'on_disk [u8],
335 on_disk: &'on_disk [u8],
332 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
336 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
333 Ok(WithBasename::from_raw_parts(
337 Ok(WithBasename::from_raw_parts(
334 Cow::Borrowed(self.full_path(on_disk)?),
338 Cow::Borrowed(self.full_path(on_disk)?),
335 self.base_name_start()?,
339 self.base_name_start()?,
336 ))
340 ))
337 }
341 }
338
342
339 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
343 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
340 self.copy_source.start.get() != 0
344 self.copy_source.start.get() != 0
341 }
345 }
342
346
343 pub(super) fn copy_source<'on_disk>(
347 pub(super) fn copy_source<'on_disk>(
344 &self,
348 &self,
345 on_disk: &'on_disk [u8],
349 on_disk: &'on_disk [u8],
346 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
350 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
347 Ok(if self.has_copy_source() {
351 Ok(if self.has_copy_source() {
348 Some(read_hg_path(on_disk, self.copy_source)?)
352 Some(read_hg_path(on_disk, self.copy_source)?)
349 } else {
353 } else {
350 None
354 None
351 })
355 })
352 }
356 }
353
357
354 fn flags(&self) -> Flags {
358 fn flags(&self) -> Flags {
355 Flags::from_bits_truncate(self.flags.get())
359 Flags::from_bits_truncate(self.flags.get())
356 }
360 }
357
361
358 fn has_entry(&self) -> bool {
362 fn has_entry(&self) -> bool {
359 self.flags().intersects(
363 self.flags().intersects(
360 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
364 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
361 )
365 )
362 }
366 }
363
367
364 pub(super) fn node_data(
368 pub(super) fn node_data(
365 &self,
369 &self,
366 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
370 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
367 if self.has_entry() {
371 if self.has_entry() {
368 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
372 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
369 } else if let Some(mtime) = self.cached_directory_mtime()? {
373 } else if let Some(mtime) = self.cached_directory_mtime()? {
370 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
374 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
371 } else {
375 } else {
372 Ok(dirstate_map::NodeData::None)
376 Ok(dirstate_map::NodeData::None)
373 }
377 }
374 }
378 }
375
379
376 pub(super) fn cached_directory_mtime(
380 pub(super) fn cached_directory_mtime(
377 &self,
381 &self,
378 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
382 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
379 // For now we do not have code to handle the absence of
383 // For now we do not have code to handle the absence of
380 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
384 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
381 // unset.
385 // unset.
382 if self.flags().contains(Flags::DIRECTORY)
386 if self.flags().contains(Flags::DIRECTORY)
383 && self.flags().contains(Flags::HAS_MTIME)
387 && self.flags().contains(Flags::HAS_MTIME)
384 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
388 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
385 {
389 {
386 Ok(Some(self.mtime()?))
390 Ok(Some(self.mtime()?))
387 } else {
391 } else {
388 Ok(None)
392 Ok(None)
389 }
393 }
390 }
394 }
391
395
392 fn synthesize_unix_mode(&self) -> u32 {
396 fn synthesize_unix_mode(&self) -> u32 {
393 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
397 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
394 libc::S_IFLNK
398 libc::S_IFLNK
395 } else {
399 } else {
396 libc::S_IFREG
400 libc::S_IFREG
397 };
401 };
398 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
402 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
399 0o755
403 0o755
400 } else {
404 } else {
401 0o644
405 0o644
402 };
406 };
403 (file_type | permisions).into()
407 (file_type | permisions).into()
404 }
408 }
405
409
406 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
410 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
407 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
411 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
408 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
412 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
409 m.second_ambiguous = true;
413 m.second_ambiguous = true;
410 }
414 }
411 Ok(m)
415 Ok(m)
412 }
416 }
413
417
414 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
418 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
415 // TODO: convert through raw bits instead?
419 // TODO: convert through raw bits instead?
416 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
420 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
417 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
421 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
418 let p2_info = self.flags().contains(Flags::P2_INFO);
422 let p2_info = self.flags().contains(Flags::P2_INFO);
419 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
423 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
420 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
424 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
421 {
425 {
422 Some((self.synthesize_unix_mode(), self.size.into()))
426 Some((self.synthesize_unix_mode(), self.size.into()))
423 } else {
427 } else {
424 None
428 None
425 };
429 };
426 let mtime = if self.flags().contains(Flags::HAS_MTIME)
430 let mtime = if self.flags().contains(Flags::HAS_MTIME)
427 && !self.flags().contains(Flags::DIRECTORY)
431 && !self.flags().contains(Flags::DIRECTORY)
428 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
432 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
429 {
433 {
430 Some(self.mtime()?)
434 Some(self.mtime()?)
431 } else {
435 } else {
432 None
436 None
433 };
437 };
434 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
438 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
435 {
439 {
436 Some(self.flags().contains(Flags::FALLBACK_EXEC))
440 Some(self.flags().contains(Flags::FALLBACK_EXEC))
437 } else {
441 } else {
438 None
442 None
439 };
443 };
440 let fallback_symlink =
444 let fallback_symlink =
441 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
445 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
442 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
446 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
443 } else {
447 } else {
444 None
448 None
445 };
449 };
446 Ok(DirstateEntry::from_v2_data(
450 Ok(DirstateEntry::from_v2_data(
447 wdir_tracked,
451 wdir_tracked,
448 p1_tracked,
452 p1_tracked,
449 p2_info,
453 p2_info,
450 mode_size,
454 mode_size,
451 mtime,
455 mtime,
452 fallback_exec,
456 fallback_exec,
453 fallback_symlink,
457 fallback_symlink,
454 ))
458 ))
455 }
459 }
456
460
457 pub(super) fn entry(
461 pub(super) fn entry(
458 &self,
462 &self,
459 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
463 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
460 if self.has_entry() {
464 if self.has_entry() {
461 Ok(Some(self.assume_entry()?))
465 Ok(Some(self.assume_entry()?))
462 } else {
466 } else {
463 Ok(None)
467 Ok(None)
464 }
468 }
465 }
469 }
466
470
467 pub(super) fn children<'on_disk>(
471 pub(super) fn children<'on_disk>(
468 &self,
472 &self,
469 on_disk: &'on_disk [u8],
473 on_disk: &'on_disk [u8],
470 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
474 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
471 read_nodes(on_disk, self.children)
475 read_nodes(on_disk, self.children)
472 }
476 }
473
477
474 pub(super) fn to_in_memory_node<'on_disk>(
478 pub(super) fn to_in_memory_node<'on_disk>(
475 &self,
479 &self,
476 on_disk: &'on_disk [u8],
480 on_disk: &'on_disk [u8],
477 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
481 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
478 Ok(dirstate_map::Node {
482 Ok(dirstate_map::Node {
479 children: dirstate_map::ChildNodes::OnDisk(
483 children: dirstate_map::ChildNodes::OnDisk(
480 self.children(on_disk)?,
484 self.children(on_disk)?,
481 ),
485 ),
482 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
486 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
483 data: self.node_data()?,
487 data: self.node_data()?,
484 descendants_with_entry_count: self
488 descendants_with_entry_count: self
485 .descendants_with_entry_count
489 .descendants_with_entry_count
486 .get(),
490 .get(),
487 tracked_descendants_count: self.tracked_descendants_count.get(),
491 tracked_descendants_count: self.tracked_descendants_count.get(),
488 })
492 })
489 }
493 }
490
494
491 fn from_dirstate_entry(
495 fn from_dirstate_entry(
492 entry: &DirstateEntry,
496 entry: &DirstateEntry,
493 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
497 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
494 let (
498 let (
495 wdir_tracked,
499 wdir_tracked,
496 p1_tracked,
500 p1_tracked,
497 p2_info,
501 p2_info,
498 mode_size_opt,
502 mode_size_opt,
499 mtime_opt,
503 mtime_opt,
500 fallback_exec,
504 fallback_exec,
501 fallback_symlink,
505 fallback_symlink,
502 ) = entry.v2_data();
506 ) = entry.v2_data();
503 // TODO: convert throug raw flag bits instead?
507 // TODO: convert throug raw flag bits instead?
504 let mut flags = Flags::empty();
508 let mut flags = Flags::empty();
505 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
509 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
506 flags.set(Flags::P1_TRACKED, p1_tracked);
510 flags.set(Flags::P1_TRACKED, p1_tracked);
507 flags.set(Flags::P2_INFO, p2_info);
511 flags.set(Flags::P2_INFO, p2_info);
508 let size = if let Some((m, s)) = mode_size_opt {
512 let size = if let Some((m, s)) = mode_size_opt {
509 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
513 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
510 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
514 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
511 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
515 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
512 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
516 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
513 flags.insert(Flags::HAS_MODE_AND_SIZE);
517 flags.insert(Flags::HAS_MODE_AND_SIZE);
514 s.into()
518 s.into()
515 } else {
519 } else {
516 0.into()
520 0.into()
517 };
521 };
518 let mtime = if let Some(m) = mtime_opt {
522 let mtime = if let Some(m) = mtime_opt {
519 flags.insert(Flags::HAS_MTIME);
523 flags.insert(Flags::HAS_MTIME);
520 if m.second_ambiguous {
524 if m.second_ambiguous {
521 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
525 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
522 };
526 };
523 m.into()
527 m.into()
524 } else {
528 } else {
525 PackedTruncatedTimestamp::null()
529 PackedTruncatedTimestamp::null()
526 };
530 };
527 if let Some(f_exec) = fallback_exec {
531 if let Some(f_exec) = fallback_exec {
528 flags.insert(Flags::HAS_FALLBACK_EXEC);
532 flags.insert(Flags::HAS_FALLBACK_EXEC);
529 if f_exec {
533 if f_exec {
530 flags.insert(Flags::FALLBACK_EXEC);
534 flags.insert(Flags::FALLBACK_EXEC);
531 }
535 }
532 }
536 }
533 if let Some(f_symlink) = fallback_symlink {
537 if let Some(f_symlink) = fallback_symlink {
534 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
538 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
535 if f_symlink {
539 if f_symlink {
536 flags.insert(Flags::FALLBACK_SYMLINK);
540 flags.insert(Flags::FALLBACK_SYMLINK);
537 }
541 }
538 }
542 }
539 (flags, size, mtime)
543 (flags, size, mtime)
540 }
544 }
541 }
545 }
542
546
543 fn read_hg_path(
547 fn read_hg_path(
544 on_disk: &[u8],
548 on_disk: &[u8],
545 slice: PathSlice,
549 slice: PathSlice,
546 ) -> Result<&HgPath, DirstateV2ParseError> {
550 ) -> Result<&HgPath, DirstateV2ParseError> {
547 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
551 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
548 }
552 }
549
553
550 fn read_nodes(
554 fn read_nodes(
551 on_disk: &[u8],
555 on_disk: &[u8],
552 slice: ChildNodes,
556 slice: ChildNodes,
553 ) -> Result<&[Node], DirstateV2ParseError> {
557 ) -> Result<&[Node], DirstateV2ParseError> {
554 read_slice(on_disk, slice.start, slice.len.get())
558 read_slice(on_disk, slice.start, slice.len.get())
555 }
559 }
556
560
557 fn read_slice<T, Len>(
561 fn read_slice<T, Len>(
558 on_disk: &[u8],
562 on_disk: &[u8],
559 start: Offset,
563 start: Offset,
560 len: Len,
564 len: Len,
561 ) -> Result<&[T], DirstateV2ParseError>
565 ) -> Result<&[T], DirstateV2ParseError>
562 where
566 where
563 T: BytesCast,
567 T: BytesCast,
564 Len: TryInto<usize>,
568 Len: TryInto<usize>,
565 {
569 {
566 // Either `usize::MAX` would result in "out of bounds" error since a single
570 // Either `usize::MAX` would result in "out of bounds" error since a single
567 // `&[u8]` cannot occupy the entire addess space.
571 // `&[u8]` cannot occupy the entire addess space.
568 let start = start.get().try_into().unwrap_or(std::usize::MAX);
572 let start = start.get().try_into().unwrap_or(std::usize::MAX);
569 let len = len.try_into().unwrap_or(std::usize::MAX);
573 let len = len.try_into().unwrap_or(std::usize::MAX);
570 on_disk
574 on_disk
571 .get(start..)
575 .get(start..)
572 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
576 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
573 .map(|(slice, _rest)| slice)
577 .map(|(slice, _rest)| slice)
574 .ok_or_else(|| DirstateV2ParseError)
578 .ok_or_else(|| DirstateV2ParseError)
575 }
579 }
576
580
577 pub(crate) fn for_each_tracked_path<'on_disk>(
581 pub(crate) fn for_each_tracked_path<'on_disk>(
578 on_disk: &'on_disk [u8],
582 on_disk: &'on_disk [u8],
579 metadata: &[u8],
583 metadata: &[u8],
580 mut f: impl FnMut(&'on_disk HgPath),
584 mut f: impl FnMut(&'on_disk HgPath),
581 ) -> Result<(), DirstateV2ParseError> {
585 ) -> Result<(), DirstateV2ParseError> {
582 let (meta, _) = TreeMetadata::from_bytes(metadata)
586 let (meta, _) = TreeMetadata::from_bytes(metadata)
583 .map_err(|_| DirstateV2ParseError)?;
587 .map_err(|_| DirstateV2ParseError)?;
584 fn recur<'on_disk>(
588 fn recur<'on_disk>(
585 on_disk: &'on_disk [u8],
589 on_disk: &'on_disk [u8],
586 nodes: ChildNodes,
590 nodes: ChildNodes,
587 f: &mut impl FnMut(&'on_disk HgPath),
591 f: &mut impl FnMut(&'on_disk HgPath),
588 ) -> Result<(), DirstateV2ParseError> {
592 ) -> Result<(), DirstateV2ParseError> {
589 for node in read_nodes(on_disk, nodes)? {
593 for node in read_nodes(on_disk, nodes)? {
590 if let Some(entry) = node.entry()? {
594 if let Some(entry) = node.entry()? {
591 if entry.state().is_tracked() {
595 if entry.state().is_tracked() {
592 f(node.full_path(on_disk)?)
596 f(node.full_path(on_disk)?)
593 }
597 }
594 }
598 }
595 recur(on_disk, node.children, f)?
599 recur(on_disk, node.children, f)?
596 }
600 }
597 Ok(())
601 Ok(())
598 }
602 }
599 recur(on_disk, meta.root_nodes, &mut f)
603 recur(on_disk, meta.root_nodes, &mut f)
600 }
604 }
601
605
602 /// Returns new data and metadata, together with whether that data should be
606 /// Returns new data and metadata, together with whether that data should be
603 /// appended to the existing data file whose content is at
607 /// appended to the existing data file whose content is at
604 /// `dirstate_map.on_disk` (true), instead of written to a new data file
608 /// `dirstate_map.on_disk` (true), instead of written to a new data file
605 /// (false), and the previous size of data on disk.
609 /// (false), and the previous size of data on disk.
606 pub(super) fn write(
610 pub(super) fn write(
607 dirstate_map: &DirstateMap,
611 dirstate_map: &DirstateMap,
608 can_append: bool,
612 can_append: bool,
609 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
613 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
610 let append = can_append && dirstate_map.write_should_append();
614 let append = can_append && dirstate_map.write_should_append();
611
615
612 // This ignores the space for paths, and for nodes without an entry.
616 // This ignores the space for paths, and for nodes without an entry.
613 // TODO: better estimate? Skip the `Vec` and write to a file directly?
617 // TODO: better estimate? Skip the `Vec` and write to a file directly?
614 let size_guess = std::mem::size_of::<Node>()
618 let size_guess = std::mem::size_of::<Node>()
615 * dirstate_map.nodes_with_entry_count as usize;
619 * dirstate_map.nodes_with_entry_count as usize;
616
620
617 let mut writer = Writer {
621 let mut writer = Writer {
618 dirstate_map,
622 dirstate_map,
619 append,
623 append,
620 out: Vec::with_capacity(size_guess),
624 out: Vec::with_capacity(size_guess),
621 };
625 };
622
626
623 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
627 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
624
628
625 let unreachable_bytes = if append {
629 let unreachable_bytes = if append {
626 dirstate_map.unreachable_bytes
630 dirstate_map.unreachable_bytes
627 } else {
631 } else {
628 0
632 0
629 };
633 };
630 let meta = TreeMetadata {
634 let meta = TreeMetadata {
631 root_nodes,
635 root_nodes,
632 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
636 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
633 nodes_with_copy_source_count: dirstate_map
637 nodes_with_copy_source_count: dirstate_map
634 .nodes_with_copy_source_count
638 .nodes_with_copy_source_count
635 .into(),
639 .into(),
636 unreachable_bytes: unreachable_bytes.into(),
640 unreachable_bytes: unreachable_bytes.into(),
637 unused: [0; 4],
641 unused: [0; 4],
638 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
642 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
639 };
643 };
640 Ok((writer.out, meta, append, dirstate_map.old_data_size))
644 Ok((writer.out, meta, append, dirstate_map.old_data_size))
641 }
645 }
642
646
643 struct Writer<'dmap, 'on_disk> {
647 struct Writer<'dmap, 'on_disk> {
644 dirstate_map: &'dmap DirstateMap<'on_disk>,
648 dirstate_map: &'dmap DirstateMap<'on_disk>,
645 append: bool,
649 append: bool,
646 out: Vec<u8>,
650 out: Vec<u8>,
647 }
651 }
648
652
649 impl Writer<'_, '_> {
653 impl Writer<'_, '_> {
650 fn write_nodes(
654 fn write_nodes(
651 &mut self,
655 &mut self,
652 nodes: dirstate_map::ChildNodesRef,
656 nodes: dirstate_map::ChildNodesRef,
653 ) -> Result<ChildNodes, DirstateError> {
657 ) -> Result<ChildNodes, DirstateError> {
654 // Reuse already-written nodes if possible
658 // Reuse already-written nodes if possible
655 if self.append {
659 if self.append {
656 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
660 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
657 let start = self.on_disk_offset_of(nodes_slice).expect(
661 let start = self.on_disk_offset_of(nodes_slice).expect(
658 "dirstate-v2 OnDisk nodes not found within on_disk",
662 "dirstate-v2 OnDisk nodes not found within on_disk",
659 );
663 );
660 let len = child_nodes_len_from_usize(nodes_slice.len());
664 let len = child_nodes_len_from_usize(nodes_slice.len());
661 return Ok(ChildNodes { start, len });
665 return Ok(ChildNodes { start, len });
662 }
666 }
663 }
667 }
664
668
665 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
669 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
666 // undefined iteration order. Sort to enable binary search in the
670 // undefined iteration order. Sort to enable binary search in the
667 // written file.
671 // written file.
668 let nodes = nodes.sorted();
672 let nodes = nodes.sorted();
669 let nodes_len = nodes.len();
673 let nodes_len = nodes.len();
670
674
671 // First accumulate serialized nodes in a `Vec`
675 // First accumulate serialized nodes in a `Vec`
672 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
676 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
673 for node in nodes {
677 for node in nodes {
674 let children =
678 let children =
675 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
679 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
676 let full_path = node.full_path(self.dirstate_map.on_disk)?;
680 let full_path = node.full_path(self.dirstate_map.on_disk)?;
677 let full_path = self.write_path(full_path.as_bytes());
681 let full_path = self.write_path(full_path.as_bytes());
678 let copy_source = if let Some(source) =
682 let copy_source = if let Some(source) =
679 node.copy_source(self.dirstate_map.on_disk)?
683 node.copy_source(self.dirstate_map.on_disk)?
680 {
684 {
681 self.write_path(source.as_bytes())
685 self.write_path(source.as_bytes())
682 } else {
686 } else {
683 PathSlice {
687 PathSlice {
684 start: 0.into(),
688 start: 0.into(),
685 len: 0.into(),
689 len: 0.into(),
686 }
690 }
687 };
691 };
688 on_disk_nodes.push(match node {
692 on_disk_nodes.push(match node {
689 NodeRef::InMemory(path, node) => {
693 NodeRef::InMemory(path, node) => {
690 let (flags, size, mtime) = match &node.data {
694 let (flags, size, mtime) = match &node.data {
691 dirstate_map::NodeData::Entry(entry) => {
695 dirstate_map::NodeData::Entry(entry) => {
692 Node::from_dirstate_entry(entry)
696 Node::from_dirstate_entry(entry)
693 }
697 }
694 dirstate_map::NodeData::CachedDirectory { mtime } => {
698 dirstate_map::NodeData::CachedDirectory { mtime } => {
695 // we currently never set a mtime if unknown file
699 // we currently never set a mtime if unknown file
696 // are present.
700 // are present.
697 // So if we have a mtime for a directory, we know
701 // So if we have a mtime for a directory, we know
698 // they are no unknown
702 // they are no unknown
699 // files and we
703 // files and we
700 // blindly set ALL_UNKNOWN_RECORDED.
704 // blindly set ALL_UNKNOWN_RECORDED.
701 //
705 //
702 // We never set ALL_IGNORED_RECORDED since we
706 // We never set ALL_IGNORED_RECORDED since we
703 // don't track that case
707 // don't track that case
704 // currently.
708 // currently.
705 let mut flags = Flags::DIRECTORY
709 let mut flags = Flags::DIRECTORY
706 | Flags::HAS_MTIME
710 | Flags::HAS_MTIME
707 | Flags::ALL_UNKNOWN_RECORDED;
711 | Flags::ALL_UNKNOWN_RECORDED;
708 if mtime.second_ambiguous {
712 if mtime.second_ambiguous {
709 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
713 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
710 }
714 }
711 (flags, 0.into(), (*mtime).into())
715 (flags, 0.into(), (*mtime).into())
712 }
716 }
713 dirstate_map::NodeData::None => (
717 dirstate_map::NodeData::None => (
714 Flags::DIRECTORY,
718 Flags::DIRECTORY,
715 0.into(),
719 0.into(),
716 PackedTruncatedTimestamp::null(),
720 PackedTruncatedTimestamp::null(),
717 ),
721 ),
718 };
722 };
719 Node {
723 Node {
720 children,
724 children,
721 copy_source,
725 copy_source,
722 full_path,
726 full_path,
723 base_name_start: u16::try_from(path.base_name_start())
727 base_name_start: u16::try_from(path.base_name_start())
724 // Could only panic for paths over 64 KiB
728 // Could only panic for paths over 64 KiB
725 .expect("dirstate-v2 path length overflow")
729 .expect("dirstate-v2 path length overflow")
726 .into(),
730 .into(),
727 descendants_with_entry_count: node
731 descendants_with_entry_count: node
728 .descendants_with_entry_count
732 .descendants_with_entry_count
729 .into(),
733 .into(),
730 tracked_descendants_count: node
734 tracked_descendants_count: node
731 .tracked_descendants_count
735 .tracked_descendants_count
732 .into(),
736 .into(),
733 flags: flags.bits().into(),
737 flags: flags.bits().into(),
734 size,
738 size,
735 mtime,
739 mtime,
736 }
740 }
737 }
741 }
738 NodeRef::OnDisk(node) => Node {
742 NodeRef::OnDisk(node) => Node {
739 children,
743 children,
740 copy_source,
744 copy_source,
741 full_path,
745 full_path,
742 ..*node
746 ..*node
743 },
747 },
744 })
748 })
745 }
749 }
746 // … so we can write them contiguously, after writing everything else
750 // … so we can write them contiguously, after writing everything else
747 // they refer to.
751 // they refer to.
748 let start = self.current_offset();
752 let start = self.current_offset();
749 let len = child_nodes_len_from_usize(nodes_len);
753 let len = child_nodes_len_from_usize(nodes_len);
750 self.out.extend(on_disk_nodes.as_bytes());
754 self.out.extend(on_disk_nodes.as_bytes());
751 Ok(ChildNodes { start, len })
755 Ok(ChildNodes { start, len })
752 }
756 }
753
757
754 /// If the given slice of items is within `on_disk`, returns its offset
758 /// If the given slice of items is within `on_disk`, returns its offset
755 /// from the start of `on_disk`.
759 /// from the start of `on_disk`.
756 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
760 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
757 where
761 where
758 T: BytesCast,
762 T: BytesCast,
759 {
763 {
760 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
764 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
761 let start = slice.as_ptr() as usize;
765 let start = slice.as_ptr() as usize;
762 let end = start + slice.len();
766 let end = start + slice.len();
763 start..=end
767 start..=end
764 }
768 }
765 let slice_addresses = address_range(slice.as_bytes());
769 let slice_addresses = address_range(slice.as_bytes());
766 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
770 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
767 if on_disk_addresses.contains(slice_addresses.start())
771 if on_disk_addresses.contains(slice_addresses.start())
768 && on_disk_addresses.contains(slice_addresses.end())
772 && on_disk_addresses.contains(slice_addresses.end())
769 {
773 {
770 let offset = slice_addresses.start() - on_disk_addresses.start();
774 let offset = slice_addresses.start() - on_disk_addresses.start();
771 Some(offset_from_usize(offset))
775 Some(offset_from_usize(offset))
772 } else {
776 } else {
773 None
777 None
774 }
778 }
775 }
779 }
776
780
777 fn current_offset(&mut self) -> Offset {
781 fn current_offset(&mut self) -> Offset {
778 let mut offset = self.out.len();
782 let mut offset = self.out.len();
779 if self.append {
783 if self.append {
780 offset += self.dirstate_map.on_disk.len()
784 offset += self.dirstate_map.on_disk.len()
781 }
785 }
782 offset_from_usize(offset)
786 offset_from_usize(offset)
783 }
787 }
784
788
785 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
789 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
786 let len = path_len_from_usize(slice.len());
790 let len = path_len_from_usize(slice.len());
787 // Reuse an already-written path if possible
791 // Reuse an already-written path if possible
788 if self.append {
792 if self.append {
789 if let Some(start) = self.on_disk_offset_of(slice) {
793 if let Some(start) = self.on_disk_offset_of(slice) {
790 return PathSlice { start, len };
794 return PathSlice { start, len };
791 }
795 }
792 }
796 }
793 let start = self.current_offset();
797 let start = self.current_offset();
794 self.out.extend(slice.as_bytes());
798 self.out.extend(slice.as_bytes());
795 PathSlice { start, len }
799 PathSlice { start, len }
796 }
800 }
797 }
801 }
798
802
799 fn offset_from_usize(x: usize) -> Offset {
803 fn offset_from_usize(x: usize) -> Offset {
800 u32::try_from(x)
804 u32::try_from(x)
801 // Could only panic for a dirstate file larger than 4 GiB
805 // Could only panic for a dirstate file larger than 4 GiB
802 .expect("dirstate-v2 offset overflow")
806 .expect("dirstate-v2 offset overflow")
803 .into()
807 .into()
804 }
808 }
805
809
806 fn child_nodes_len_from_usize(x: usize) -> Size {
810 fn child_nodes_len_from_usize(x: usize) -> Size {
807 u32::try_from(x)
811 u32::try_from(x)
808 // Could only panic with over 4 billion nodes
812 // Could only panic with over 4 billion nodes
809 .expect("dirstate-v2 slice length overflow")
813 .expect("dirstate-v2 slice length overflow")
810 .into()
814 .into()
811 }
815 }
812
816
813 fn path_len_from_usize(x: usize) -> PathSize {
817 fn path_len_from_usize(x: usize) -> PathSize {
814 u16::try_from(x)
818 u16::try_from(x)
815 // Could only panic for paths over 64 KiB
819 // Could only panic for paths over 64 KiB
816 .expect("dirstate-v2 path length overflow")
820 .expect("dirstate-v2 path length overflow")
817 .into()
821 .into()
818 }
822 }
819
823
820 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
824 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
821 fn from(timestamp: TruncatedTimestamp) -> Self {
825 fn from(timestamp: TruncatedTimestamp) -> Self {
822 Self {
826 Self {
823 truncated_seconds: timestamp.truncated_seconds().into(),
827 truncated_seconds: timestamp.truncated_seconds().into(),
824 nanoseconds: timestamp.nanoseconds().into(),
828 nanoseconds: timestamp.nanoseconds().into(),
825 }
829 }
826 }
830 }
827 }
831 }
828
832
829 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
833 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
830 type Error = DirstateV2ParseError;
834 type Error = DirstateV2ParseError;
831
835
832 fn try_from(
836 fn try_from(
833 timestamp: PackedTruncatedTimestamp,
837 timestamp: PackedTruncatedTimestamp,
834 ) -> Result<Self, Self::Error> {
838 ) -> Result<Self, Self::Error> {
835 Self::from_already_truncated(
839 Self::from_already_truncated(
836 timestamp.truncated_seconds.get(),
840 timestamp.truncated_seconds.get(),
837 timestamp.nanoseconds.get(),
841 timestamp.nanoseconds.get(),
838 false,
842 false,
839 )
843 )
840 }
844 }
841 }
845 }
842 impl PackedTruncatedTimestamp {
846 impl PackedTruncatedTimestamp {
843 fn null() -> Self {
847 fn null() -> Self {
844 Self {
848 Self {
845 truncated_seconds: 0.into(),
849 truncated_seconds: 0.into(),
846 nanoseconds: 0.into(),
850 nanoseconds: 0.into(),
847 }
851 }
848 }
852 }
849 }
853 }
@@ -1,849 +1,864 b''
1 use crate::dirstate::entry::TruncatedTimestamp;
1 use crate::dirstate::entry::TruncatedTimestamp;
2 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate::status::IgnoreFnType;
3 use crate::dirstate::status::StatusPath;
3 use crate::dirstate::status::StatusPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::NodeData;
8 use crate::dirstate_tree::dirstate_map::NodeData;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 use crate::dirstate_tree::dirstate_map::NodeRef;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 use crate::matchers::get_ignore_function;
11 use crate::matchers::get_ignore_function;
11 use crate::matchers::Matcher;
12 use crate::matchers::Matcher;
12 use crate::utils::files::get_bytes_from_os_string;
13 use crate::utils::files::get_bytes_from_os_string;
13 use crate::utils::files::get_path_from_bytes;
14 use crate::utils::files::get_path_from_bytes;
14 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
15 use crate::BadMatch;
16 use crate::BadMatch;
16 use crate::DirstateStatus;
17 use crate::DirstateStatus;
17 use crate::EntryState;
18 use crate::EntryState;
18 use crate::HgPathBuf;
19 use crate::HgPathBuf;
19 use crate::HgPathCow;
20 use crate::HgPathCow;
20 use crate::PatternFileWarning;
21 use crate::PatternFileWarning;
21 use crate::StatusError;
22 use crate::StatusError;
22 use crate::StatusOptions;
23 use crate::StatusOptions;
23 use micro_timer::timed;
24 use micro_timer::timed;
24 use rayon::prelude::*;
25 use rayon::prelude::*;
25 use sha1::{Digest, Sha1};
26 use sha1::{Digest, Sha1};
26 use std::borrow::Cow;
27 use std::borrow::Cow;
27 use std::io;
28 use std::io;
28 use std::path::Path;
29 use std::path::Path;
29 use std::path::PathBuf;
30 use std::path::PathBuf;
30 use std::sync::Mutex;
31 use std::sync::Mutex;
31 use std::time::SystemTime;
32 use std::time::SystemTime;
32
33
33 /// Returns the status of the working directory compared to its parent
34 /// Returns the status of the working directory compared to its parent
34 /// changeset.
35 /// changeset.
35 ///
36 ///
36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 /// and variable names) and dirstate tree at the same time. The core of this
38 /// and variable names) and dirstate tree at the same time. The core of this
38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 /// exists in one of the two trees, depending on information requested by
41 /// exists in one of the two trees, depending on information requested by
41 /// `options` we may need to traverse the remaining subtree.
42 /// `options` we may need to traverse the remaining subtree.
42 #[timed]
43 #[timed]
43 pub fn status<'dirstate>(
44 pub fn status<'dirstate>(
44 dmap: &'dirstate mut DirstateMap,
45 dmap: &'dirstate mut DirstateMap,
45 matcher: &(dyn Matcher + Sync),
46 matcher: &(dyn Matcher + Sync),
46 root_dir: PathBuf,
47 root_dir: PathBuf,
47 ignore_files: Vec<PathBuf>,
48 ignore_files: Vec<PathBuf>,
48 options: StatusOptions,
49 options: StatusOptions,
49 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 {
51 {
51 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 // This is a stop-gap measure until we figure out why using more than 16
53 // This is a stop-gap measure until we figure out why using more than 16
53 // threads makes `status` slower for each additional thread.
54 // threads makes `status` slower for each additional thread.
54 // We use `ok()` in case the global threadpool has already been
55 // We use `ok()` in case the global threadpool has already been
55 // instantiated in `rhg` or some other caller.
56 // instantiated in `rhg` or some other caller.
56 // TODO find the underlying cause and fix it, then remove this.
57 // TODO find the underlying cause and fix it, then remove this.
57 rayon::ThreadPoolBuilder::new()
58 rayon::ThreadPoolBuilder::new()
58 .num_threads(16)
59 .num_threads(16)
59 .build_global()
60 .build_global()
60 .ok();
61 .ok();
61
62
62 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 if options.list_ignored || options.list_unknown {
64 if options.list_ignored || options.list_unknown {
64 let mut hasher = Sha1::new();
65 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
65 let (ignore_fn, warnings) = get_ignore_function(
66 DirstateVersion::V1 => {
66 ignore_files,
67 let (ignore_fn, warnings) = get_ignore_function(
67 &root_dir,
68 ignore_files,
68 &mut |pattern_bytes| hasher.update(pattern_bytes),
69 &root_dir,
69 )?;
70 &mut |_pattern_bytes| {},
70 let new_hash = *hasher.finalize().as_ref();
71 )?;
71 let changed = new_hash != dmap.ignore_patterns_hash;
72 (ignore_fn, warnings, None)
72 dmap.ignore_patterns_hash = new_hash;
73 }
73 (ignore_fn, warnings, Some(changed))
74 DirstateVersion::V2 => {
75 let mut hasher = Sha1::new();
76 let (ignore_fn, warnings) = get_ignore_function(
77 ignore_files,
78 &root_dir,
79 &mut |pattern_bytes| hasher.update(pattern_bytes),
80 )?;
81 let new_hash = *hasher.finalize().as_ref();
82 let changed = new_hash != dmap.ignore_patterns_hash;
83 dmap.ignore_patterns_hash = new_hash;
84 (ignore_fn, warnings, Some(changed))
85 }
86 };
87 (ignore_fn, warnings, changed)
74 } else {
88 } else {
75 (Box::new(|&_| true), vec![], None)
89 (Box::new(|&_| true), vec![], None)
76 };
90 };
77
91
78 let filesystem_time_at_status_start =
92 let filesystem_time_at_status_start =
79 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
93 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
80
94
81 // If the repository is under the current directory, prefer using a
95 // If the repository is under the current directory, prefer using a
82 // relative path, so the kernel needs to traverse fewer directory in every
96 // relative path, so the kernel needs to traverse fewer directory in every
83 // call to `read_dir` or `symlink_metadata`.
97 // call to `read_dir` or `symlink_metadata`.
84 // This is effective in the common case where the current directory is the
98 // This is effective in the common case where the current directory is the
85 // repository root.
99 // repository root.
86
100
87 // TODO: Better yet would be to use libc functions like `openat` and
101 // TODO: Better yet would be to use libc functions like `openat` and
88 // `fstatat` to remove such repeated traversals entirely, but the standard
102 // `fstatat` to remove such repeated traversals entirely, but the standard
89 // library does not provide APIs based on those.
103 // library does not provide APIs based on those.
90 // Maybe with a crate like https://crates.io/crates/openat instead?
104 // Maybe with a crate like https://crates.io/crates/openat instead?
91 let root_dir = if let Some(relative) = std::env::current_dir()
105 let root_dir = if let Some(relative) = std::env::current_dir()
92 .ok()
106 .ok()
93 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
107 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
94 {
108 {
95 relative
109 relative
96 } else {
110 } else {
97 &root_dir
111 &root_dir
98 };
112 };
99
113
100 let outcome = DirstateStatus {
114 let outcome = DirstateStatus {
101 filesystem_time_at_status_start,
115 filesystem_time_at_status_start,
102 ..Default::default()
116 ..Default::default()
103 };
117 };
104 let common = StatusCommon {
118 let common = StatusCommon {
105 dmap,
119 dmap,
106 options,
120 options,
107 matcher,
121 matcher,
108 ignore_fn,
122 ignore_fn,
109 outcome: Mutex::new(outcome),
123 outcome: Mutex::new(outcome),
110 ignore_patterns_have_changed: patterns_changed,
124 ignore_patterns_have_changed: patterns_changed,
111 new_cachable_directories: Default::default(),
125 new_cachable_directories: Default::default(),
112 outated_cached_directories: Default::default(),
126 outated_cached_directories: Default::default(),
113 filesystem_time_at_status_start,
127 filesystem_time_at_status_start,
114 };
128 };
115 let is_at_repo_root = true;
129 let is_at_repo_root = true;
116 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
130 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
117 let has_ignored_ancestor = false;
131 let has_ignored_ancestor = false;
118 let root_cached_mtime = None;
132 let root_cached_mtime = None;
119 let root_dir_metadata = None;
133 let root_dir_metadata = None;
120 // If the path we have for the repository root is a symlink, do follow it.
134 // If the path we have for the repository root is a symlink, do follow it.
121 // (As opposed to symlinks within the working directory which are not
135 // (As opposed to symlinks within the working directory which are not
122 // followed, using `std::fs::symlink_metadata`.)
136 // followed, using `std::fs::symlink_metadata`.)
123 common.traverse_fs_directory_and_dirstate(
137 common.traverse_fs_directory_and_dirstate(
124 has_ignored_ancestor,
138 has_ignored_ancestor,
125 dmap.root.as_ref(),
139 dmap.root.as_ref(),
126 hg_path,
140 hg_path,
127 &root_dir,
141 &root_dir,
128 root_dir_metadata,
142 root_dir_metadata,
129 root_cached_mtime,
143 root_cached_mtime,
130 is_at_repo_root,
144 is_at_repo_root,
131 )?;
145 )?;
132 let mut outcome = common.outcome.into_inner().unwrap();
146 let mut outcome = common.outcome.into_inner().unwrap();
133 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
147 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
134 let outdated = common.outated_cached_directories.into_inner().unwrap();
148 let outdated = common.outated_cached_directories.into_inner().unwrap();
135
149
136 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
150 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
137 || !outdated.is_empty()
151 || !outdated.is_empty()
138 || !new_cachable.is_empty();
152 || (!new_cachable.is_empty()
153 && dmap.dirstate_version == DirstateVersion::V2);
139
154
140 // Remove outdated mtimes before adding new mtimes, in case a given
155 // Remove outdated mtimes before adding new mtimes, in case a given
141 // directory is both
156 // directory is both
142 for path in &outdated {
157 for path in &outdated {
143 let node = dmap.get_or_insert(path)?;
158 let node = dmap.get_or_insert(path)?;
144 if let NodeData::CachedDirectory { .. } = &node.data {
159 if let NodeData::CachedDirectory { .. } = &node.data {
145 node.data = NodeData::None
160 node.data = NodeData::None
146 }
161 }
147 }
162 }
148 for (path, mtime) in &new_cachable {
163 for (path, mtime) in &new_cachable {
149 let node = dmap.get_or_insert(path)?;
164 let node = dmap.get_or_insert(path)?;
150 match &node.data {
165 match &node.data {
151 NodeData::Entry(_) => {} // Don’t overwrite an entry
166 NodeData::Entry(_) => {} // Don’t overwrite an entry
152 NodeData::CachedDirectory { .. } | NodeData::None => {
167 NodeData::CachedDirectory { .. } | NodeData::None => {
153 node.data = NodeData::CachedDirectory { mtime: *mtime }
168 node.data = NodeData::CachedDirectory { mtime: *mtime }
154 }
169 }
155 }
170 }
156 }
171 }
157
172
158 Ok((outcome, warnings))
173 Ok((outcome, warnings))
159 }
174 }
160
175
161 /// Bag of random things needed by various parts of the algorithm. Reduces the
176 /// Bag of random things needed by various parts of the algorithm. Reduces the
162 /// number of parameters passed to functions.
177 /// number of parameters passed to functions.
163 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
178 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
164 dmap: &'tree DirstateMap<'on_disk>,
179 dmap: &'tree DirstateMap<'on_disk>,
165 options: StatusOptions,
180 options: StatusOptions,
166 matcher: &'a (dyn Matcher + Sync),
181 matcher: &'a (dyn Matcher + Sync),
167 ignore_fn: IgnoreFnType<'a>,
182 ignore_fn: IgnoreFnType<'a>,
168 outcome: Mutex<DirstateStatus<'on_disk>>,
183 outcome: Mutex<DirstateStatus<'on_disk>>,
169 new_cachable_directories:
184 new_cachable_directories:
170 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
185 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
171 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
186 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
172
187
173 /// Whether ignore files like `.hgignore` have changed since the previous
188 /// Whether ignore files like `.hgignore` have changed since the previous
174 /// time a `status()` call wrote their hash to the dirstate. `None` means
189 /// time a `status()` call wrote their hash to the dirstate. `None` means
175 /// we don’t know as this run doesn’t list either ignored or uknown files
190 /// we don’t know as this run doesn’t list either ignored or uknown files
176 /// and therefore isn’t reading `.hgignore`.
191 /// and therefore isn’t reading `.hgignore`.
177 ignore_patterns_have_changed: Option<bool>,
192 ignore_patterns_have_changed: Option<bool>,
178
193
179 /// The current time at the start of the `status()` algorithm, as measured
194 /// The current time at the start of the `status()` algorithm, as measured
180 /// and possibly truncated by the filesystem.
195 /// and possibly truncated by the filesystem.
181 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
196 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
182 }
197 }
183
198
184 enum Outcome {
199 enum Outcome {
185 Modified,
200 Modified,
186 Added,
201 Added,
187 Removed,
202 Removed,
188 Deleted,
203 Deleted,
189 Clean,
204 Clean,
190 Ignored,
205 Ignored,
191 Unknown,
206 Unknown,
192 Unsure,
207 Unsure,
193 }
208 }
194
209
195 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
210 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
196 fn push_outcome(
211 fn push_outcome(
197 &self,
212 &self,
198 which: Outcome,
213 which: Outcome,
199 dirstate_node: &NodeRef<'tree, 'on_disk>,
214 dirstate_node: &NodeRef<'tree, 'on_disk>,
200 ) -> Result<(), DirstateV2ParseError> {
215 ) -> Result<(), DirstateV2ParseError> {
201 let path = dirstate_node
216 let path = dirstate_node
202 .full_path_borrowed(self.dmap.on_disk)?
217 .full_path_borrowed(self.dmap.on_disk)?
203 .detach_from_tree();
218 .detach_from_tree();
204 let copy_source = if self.options.list_copies {
219 let copy_source = if self.options.list_copies {
205 dirstate_node
220 dirstate_node
206 .copy_source_borrowed(self.dmap.on_disk)?
221 .copy_source_borrowed(self.dmap.on_disk)?
207 .map(|source| source.detach_from_tree())
222 .map(|source| source.detach_from_tree())
208 } else {
223 } else {
209 None
224 None
210 };
225 };
211 self.push_outcome_common(which, path, copy_source);
226 self.push_outcome_common(which, path, copy_source);
212 Ok(())
227 Ok(())
213 }
228 }
214
229
215 fn push_outcome_without_copy_source(
230 fn push_outcome_without_copy_source(
216 &self,
231 &self,
217 which: Outcome,
232 which: Outcome,
218 path: &BorrowedPath<'_, 'on_disk>,
233 path: &BorrowedPath<'_, 'on_disk>,
219 ) {
234 ) {
220 self.push_outcome_common(which, path.detach_from_tree(), None)
235 self.push_outcome_common(which, path.detach_from_tree(), None)
221 }
236 }
222
237
223 fn push_outcome_common(
238 fn push_outcome_common(
224 &self,
239 &self,
225 which: Outcome,
240 which: Outcome,
226 path: HgPathCow<'on_disk>,
241 path: HgPathCow<'on_disk>,
227 copy_source: Option<HgPathCow<'on_disk>>,
242 copy_source: Option<HgPathCow<'on_disk>>,
228 ) {
243 ) {
229 let mut outcome = self.outcome.lock().unwrap();
244 let mut outcome = self.outcome.lock().unwrap();
230 let vec = match which {
245 let vec = match which {
231 Outcome::Modified => &mut outcome.modified,
246 Outcome::Modified => &mut outcome.modified,
232 Outcome::Added => &mut outcome.added,
247 Outcome::Added => &mut outcome.added,
233 Outcome::Removed => &mut outcome.removed,
248 Outcome::Removed => &mut outcome.removed,
234 Outcome::Deleted => &mut outcome.deleted,
249 Outcome::Deleted => &mut outcome.deleted,
235 Outcome::Clean => &mut outcome.clean,
250 Outcome::Clean => &mut outcome.clean,
236 Outcome::Ignored => &mut outcome.ignored,
251 Outcome::Ignored => &mut outcome.ignored,
237 Outcome::Unknown => &mut outcome.unknown,
252 Outcome::Unknown => &mut outcome.unknown,
238 Outcome::Unsure => &mut outcome.unsure,
253 Outcome::Unsure => &mut outcome.unsure,
239 };
254 };
240 vec.push(StatusPath { path, copy_source });
255 vec.push(StatusPath { path, copy_source });
241 }
256 }
242
257
243 fn read_dir(
258 fn read_dir(
244 &self,
259 &self,
245 hg_path: &HgPath,
260 hg_path: &HgPath,
246 fs_path: &Path,
261 fs_path: &Path,
247 is_at_repo_root: bool,
262 is_at_repo_root: bool,
248 ) -> Result<Vec<DirEntry>, ()> {
263 ) -> Result<Vec<DirEntry>, ()> {
249 DirEntry::read_dir(fs_path, is_at_repo_root)
264 DirEntry::read_dir(fs_path, is_at_repo_root)
250 .map_err(|error| self.io_error(error, hg_path))
265 .map_err(|error| self.io_error(error, hg_path))
251 }
266 }
252
267
253 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
268 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
254 let errno = error.raw_os_error().expect("expected real OS error");
269 let errno = error.raw_os_error().expect("expected real OS error");
255 self.outcome
270 self.outcome
256 .lock()
271 .lock()
257 .unwrap()
272 .unwrap()
258 .bad
273 .bad
259 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
274 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
260 }
275 }
261
276
262 fn check_for_outdated_directory_cache(
277 fn check_for_outdated_directory_cache(
263 &self,
278 &self,
264 dirstate_node: &NodeRef<'tree, 'on_disk>,
279 dirstate_node: &NodeRef<'tree, 'on_disk>,
265 ) -> Result<(), DirstateV2ParseError> {
280 ) -> Result<(), DirstateV2ParseError> {
266 if self.ignore_patterns_have_changed == Some(true)
281 if self.ignore_patterns_have_changed == Some(true)
267 && dirstate_node.cached_directory_mtime()?.is_some()
282 && dirstate_node.cached_directory_mtime()?.is_some()
268 {
283 {
269 self.outated_cached_directories.lock().unwrap().push(
284 self.outated_cached_directories.lock().unwrap().push(
270 dirstate_node
285 dirstate_node
271 .full_path_borrowed(self.dmap.on_disk)?
286 .full_path_borrowed(self.dmap.on_disk)?
272 .detach_from_tree(),
287 .detach_from_tree(),
273 )
288 )
274 }
289 }
275 Ok(())
290 Ok(())
276 }
291 }
277
292
278 /// If this returns true, we can get accurate results by only using
293 /// If this returns true, we can get accurate results by only using
279 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
294 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
280 /// need to call `read_dir`.
295 /// need to call `read_dir`.
281 fn can_skip_fs_readdir(
296 fn can_skip_fs_readdir(
282 &self,
297 &self,
283 directory_metadata: Option<&std::fs::Metadata>,
298 directory_metadata: Option<&std::fs::Metadata>,
284 cached_directory_mtime: Option<TruncatedTimestamp>,
299 cached_directory_mtime: Option<TruncatedTimestamp>,
285 ) -> bool {
300 ) -> bool {
286 if !self.options.list_unknown && !self.options.list_ignored {
301 if !self.options.list_unknown && !self.options.list_ignored {
287 // All states that we care about listing have corresponding
302 // All states that we care about listing have corresponding
288 // dirstate entries.
303 // dirstate entries.
289 // This happens for example with `hg status -mard`.
304 // This happens for example with `hg status -mard`.
290 return true;
305 return true;
291 }
306 }
292 if !self.options.list_ignored
307 if !self.options.list_ignored
293 && self.ignore_patterns_have_changed == Some(false)
308 && self.ignore_patterns_have_changed == Some(false)
294 {
309 {
295 if let Some(cached_mtime) = cached_directory_mtime {
310 if let Some(cached_mtime) = cached_directory_mtime {
296 // The dirstate contains a cached mtime for this directory, set
311 // The dirstate contains a cached mtime for this directory, set
297 // by a previous run of the `status` algorithm which found this
312 // by a previous run of the `status` algorithm which found this
298 // directory eligible for `read_dir` caching.
313 // directory eligible for `read_dir` caching.
299 if let Some(meta) = directory_metadata {
314 if let Some(meta) = directory_metadata {
300 if cached_mtime
315 if cached_mtime
301 .likely_equal_to_mtime_of(meta)
316 .likely_equal_to_mtime_of(meta)
302 .unwrap_or(false)
317 .unwrap_or(false)
303 {
318 {
304 // The mtime of that directory has not changed
319 // The mtime of that directory has not changed
305 // since then, which means that the results of
320 // since then, which means that the results of
306 // `read_dir` should also be unchanged.
321 // `read_dir` should also be unchanged.
307 return true;
322 return true;
308 }
323 }
309 }
324 }
310 }
325 }
311 }
326 }
312 false
327 false
313 }
328 }
314
329
315 /// Returns whether all child entries of the filesystem directory have a
330 /// Returns whether all child entries of the filesystem directory have a
316 /// corresponding dirstate node or are ignored.
331 /// corresponding dirstate node or are ignored.
317 fn traverse_fs_directory_and_dirstate(
332 fn traverse_fs_directory_and_dirstate(
318 &self,
333 &self,
319 has_ignored_ancestor: bool,
334 has_ignored_ancestor: bool,
320 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
335 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
321 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
336 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
322 directory_fs_path: &Path,
337 directory_fs_path: &Path,
323 directory_metadata: Option<&std::fs::Metadata>,
338 directory_metadata: Option<&std::fs::Metadata>,
324 cached_directory_mtime: Option<TruncatedTimestamp>,
339 cached_directory_mtime: Option<TruncatedTimestamp>,
325 is_at_repo_root: bool,
340 is_at_repo_root: bool,
326 ) -> Result<bool, DirstateV2ParseError> {
341 ) -> Result<bool, DirstateV2ParseError> {
327 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
342 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
328 {
343 {
329 dirstate_nodes
344 dirstate_nodes
330 .par_iter()
345 .par_iter()
331 .map(|dirstate_node| {
346 .map(|dirstate_node| {
332 let fs_path = directory_fs_path.join(get_path_from_bytes(
347 let fs_path = directory_fs_path.join(get_path_from_bytes(
333 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
348 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
334 ));
349 ));
335 match std::fs::symlink_metadata(&fs_path) {
350 match std::fs::symlink_metadata(&fs_path) {
336 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
351 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
337 &fs_path,
352 &fs_path,
338 &fs_metadata,
353 &fs_metadata,
339 dirstate_node,
354 dirstate_node,
340 has_ignored_ancestor,
355 has_ignored_ancestor,
341 ),
356 ),
342 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
357 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
343 self.traverse_dirstate_only(dirstate_node)
358 self.traverse_dirstate_only(dirstate_node)
344 }
359 }
345 Err(error) => {
360 Err(error) => {
346 let hg_path =
361 let hg_path =
347 dirstate_node.full_path(self.dmap.on_disk)?;
362 dirstate_node.full_path(self.dmap.on_disk)?;
348 Ok(self.io_error(error, hg_path))
363 Ok(self.io_error(error, hg_path))
349 }
364 }
350 }
365 }
351 })
366 })
352 .collect::<Result<_, _>>()?;
367 .collect::<Result<_, _>>()?;
353
368
354 // We don’t know, so conservatively say this isn’t the case
369 // We don’t know, so conservatively say this isn’t the case
355 let children_all_have_dirstate_node_or_are_ignored = false;
370 let children_all_have_dirstate_node_or_are_ignored = false;
356
371
357 return Ok(children_all_have_dirstate_node_or_are_ignored);
372 return Ok(children_all_have_dirstate_node_or_are_ignored);
358 }
373 }
359
374
360 let mut fs_entries = if let Ok(entries) = self.read_dir(
375 let mut fs_entries = if let Ok(entries) = self.read_dir(
361 directory_hg_path,
376 directory_hg_path,
362 directory_fs_path,
377 directory_fs_path,
363 is_at_repo_root,
378 is_at_repo_root,
364 ) {
379 ) {
365 entries
380 entries
366 } else {
381 } else {
367 // Treat an unreadable directory (typically because of insufficient
382 // Treat an unreadable directory (typically because of insufficient
368 // permissions) like an empty directory. `self.read_dir` has
383 // permissions) like an empty directory. `self.read_dir` has
369 // already called `self.io_error` so a warning will be emitted.
384 // already called `self.io_error` so a warning will be emitted.
370 Vec::new()
385 Vec::new()
371 };
386 };
372
387
373 // `merge_join_by` requires both its input iterators to be sorted:
388 // `merge_join_by` requires both its input iterators to be sorted:
374
389
375 let dirstate_nodes = dirstate_nodes.sorted();
390 let dirstate_nodes = dirstate_nodes.sorted();
376 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
391 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
377 // https://github.com/rust-lang/rust/issues/34162
392 // https://github.com/rust-lang/rust/issues/34162
378 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
393 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
379
394
380 // Propagate here any error that would happen inside the comparison
395 // Propagate here any error that would happen inside the comparison
381 // callback below
396 // callback below
382 for dirstate_node in &dirstate_nodes {
397 for dirstate_node in &dirstate_nodes {
383 dirstate_node.base_name(self.dmap.on_disk)?;
398 dirstate_node.base_name(self.dmap.on_disk)?;
384 }
399 }
385 itertools::merge_join_by(
400 itertools::merge_join_by(
386 dirstate_nodes,
401 dirstate_nodes,
387 &fs_entries,
402 &fs_entries,
388 |dirstate_node, fs_entry| {
403 |dirstate_node, fs_entry| {
389 // This `unwrap` never panics because we already propagated
404 // This `unwrap` never panics because we already propagated
390 // those errors above
405 // those errors above
391 dirstate_node
406 dirstate_node
392 .base_name(self.dmap.on_disk)
407 .base_name(self.dmap.on_disk)
393 .unwrap()
408 .unwrap()
394 .cmp(&fs_entry.base_name)
409 .cmp(&fs_entry.base_name)
395 },
410 },
396 )
411 )
397 .par_bridge()
412 .par_bridge()
398 .map(|pair| {
413 .map(|pair| {
399 use itertools::EitherOrBoth::*;
414 use itertools::EitherOrBoth::*;
400 let has_dirstate_node_or_is_ignored;
415 let has_dirstate_node_or_is_ignored;
401 match pair {
416 match pair {
402 Both(dirstate_node, fs_entry) => {
417 Both(dirstate_node, fs_entry) => {
403 self.traverse_fs_and_dirstate(
418 self.traverse_fs_and_dirstate(
404 &fs_entry.full_path,
419 &fs_entry.full_path,
405 &fs_entry.metadata,
420 &fs_entry.metadata,
406 dirstate_node,
421 dirstate_node,
407 has_ignored_ancestor,
422 has_ignored_ancestor,
408 )?;
423 )?;
409 has_dirstate_node_or_is_ignored = true
424 has_dirstate_node_or_is_ignored = true
410 }
425 }
411 Left(dirstate_node) => {
426 Left(dirstate_node) => {
412 self.traverse_dirstate_only(dirstate_node)?;
427 self.traverse_dirstate_only(dirstate_node)?;
413 has_dirstate_node_or_is_ignored = true;
428 has_dirstate_node_or_is_ignored = true;
414 }
429 }
415 Right(fs_entry) => {
430 Right(fs_entry) => {
416 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
431 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
417 has_ignored_ancestor,
432 has_ignored_ancestor,
418 directory_hg_path,
433 directory_hg_path,
419 fs_entry,
434 fs_entry,
420 )
435 )
421 }
436 }
422 }
437 }
423 Ok(has_dirstate_node_or_is_ignored)
438 Ok(has_dirstate_node_or_is_ignored)
424 })
439 })
425 .try_reduce(|| true, |a, b| Ok(a && b))
440 .try_reduce(|| true, |a, b| Ok(a && b))
426 }
441 }
427
442
428 fn traverse_fs_and_dirstate(
443 fn traverse_fs_and_dirstate(
429 &self,
444 &self,
430 fs_path: &Path,
445 fs_path: &Path,
431 fs_metadata: &std::fs::Metadata,
446 fs_metadata: &std::fs::Metadata,
432 dirstate_node: NodeRef<'tree, 'on_disk>,
447 dirstate_node: NodeRef<'tree, 'on_disk>,
433 has_ignored_ancestor: bool,
448 has_ignored_ancestor: bool,
434 ) -> Result<(), DirstateV2ParseError> {
449 ) -> Result<(), DirstateV2ParseError> {
435 self.check_for_outdated_directory_cache(&dirstate_node)?;
450 self.check_for_outdated_directory_cache(&dirstate_node)?;
436 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
451 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
437 let file_type = fs_metadata.file_type();
452 let file_type = fs_metadata.file_type();
438 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
453 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
439 if !file_or_symlink {
454 if !file_or_symlink {
440 // If we previously had a file here, it was removed (with
455 // If we previously had a file here, it was removed (with
441 // `hg rm` or similar) or deleted before it could be
456 // `hg rm` or similar) or deleted before it could be
442 // replaced by a directory or something else.
457 // replaced by a directory or something else.
443 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
458 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
444 }
459 }
445 if file_type.is_dir() {
460 if file_type.is_dir() {
446 if self.options.collect_traversed_dirs {
461 if self.options.collect_traversed_dirs {
447 self.outcome
462 self.outcome
448 .lock()
463 .lock()
449 .unwrap()
464 .unwrap()
450 .traversed
465 .traversed
451 .push(hg_path.detach_from_tree())
466 .push(hg_path.detach_from_tree())
452 }
467 }
453 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
468 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
454 let is_at_repo_root = false;
469 let is_at_repo_root = false;
455 let children_all_have_dirstate_node_or_are_ignored = self
470 let children_all_have_dirstate_node_or_are_ignored = self
456 .traverse_fs_directory_and_dirstate(
471 .traverse_fs_directory_and_dirstate(
457 is_ignored,
472 is_ignored,
458 dirstate_node.children(self.dmap.on_disk)?,
473 dirstate_node.children(self.dmap.on_disk)?,
459 hg_path,
474 hg_path,
460 fs_path,
475 fs_path,
461 Some(fs_metadata),
476 Some(fs_metadata),
462 dirstate_node.cached_directory_mtime()?,
477 dirstate_node.cached_directory_mtime()?,
463 is_at_repo_root,
478 is_at_repo_root,
464 )?;
479 )?;
465 self.maybe_save_directory_mtime(
480 self.maybe_save_directory_mtime(
466 children_all_have_dirstate_node_or_are_ignored,
481 children_all_have_dirstate_node_or_are_ignored,
467 fs_metadata,
482 fs_metadata,
468 dirstate_node,
483 dirstate_node,
469 )?
484 )?
470 } else {
485 } else {
471 if file_or_symlink && self.matcher.matches(hg_path) {
486 if file_or_symlink && self.matcher.matches(hg_path) {
472 if let Some(state) = dirstate_node.state()? {
487 if let Some(state) = dirstate_node.state()? {
473 match state {
488 match state {
474 EntryState::Added => {
489 EntryState::Added => {
475 self.push_outcome(Outcome::Added, &dirstate_node)?
490 self.push_outcome(Outcome::Added, &dirstate_node)?
476 }
491 }
477 EntryState::Removed => self
492 EntryState::Removed => self
478 .push_outcome(Outcome::Removed, &dirstate_node)?,
493 .push_outcome(Outcome::Removed, &dirstate_node)?,
479 EntryState::Merged => self
494 EntryState::Merged => self
480 .push_outcome(Outcome::Modified, &dirstate_node)?,
495 .push_outcome(Outcome::Modified, &dirstate_node)?,
481 EntryState::Normal => self
496 EntryState::Normal => self
482 .handle_normal_file(&dirstate_node, fs_metadata)?,
497 .handle_normal_file(&dirstate_node, fs_metadata)?,
483 }
498 }
484 } else {
499 } else {
485 // `node.entry.is_none()` indicates a "directory"
500 // `node.entry.is_none()` indicates a "directory"
486 // node, but the filesystem has a file
501 // node, but the filesystem has a file
487 self.mark_unknown_or_ignored(
502 self.mark_unknown_or_ignored(
488 has_ignored_ancestor,
503 has_ignored_ancestor,
489 hg_path,
504 hg_path,
490 );
505 );
491 }
506 }
492 }
507 }
493
508
494 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
509 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
495 {
510 {
496 self.traverse_dirstate_only(child_node)?
511 self.traverse_dirstate_only(child_node)?
497 }
512 }
498 }
513 }
499 Ok(())
514 Ok(())
500 }
515 }
501
516
502 fn maybe_save_directory_mtime(
517 fn maybe_save_directory_mtime(
503 &self,
518 &self,
504 children_all_have_dirstate_node_or_are_ignored: bool,
519 children_all_have_dirstate_node_or_are_ignored: bool,
505 directory_metadata: &std::fs::Metadata,
520 directory_metadata: &std::fs::Metadata,
506 dirstate_node: NodeRef<'tree, 'on_disk>,
521 dirstate_node: NodeRef<'tree, 'on_disk>,
507 ) -> Result<(), DirstateV2ParseError> {
522 ) -> Result<(), DirstateV2ParseError> {
508 if !children_all_have_dirstate_node_or_are_ignored {
523 if !children_all_have_dirstate_node_or_are_ignored {
509 return Ok(());
524 return Ok(());
510 }
525 }
511 // All filesystem directory entries from `read_dir` have a
526 // All filesystem directory entries from `read_dir` have a
512 // corresponding node in the dirstate, so we can reconstitute the
527 // corresponding node in the dirstate, so we can reconstitute the
513 // names of those entries without calling `read_dir` again.
528 // names of those entries without calling `read_dir` again.
514
529
515 // TODO: use let-else here and below when available:
530 // TODO: use let-else here and below when available:
516 // https://github.com/rust-lang/rust/issues/87335
531 // https://github.com/rust-lang/rust/issues/87335
517 let status_start = if let Some(status_start) =
532 let status_start = if let Some(status_start) =
518 &self.filesystem_time_at_status_start
533 &self.filesystem_time_at_status_start
519 {
534 {
520 status_start
535 status_start
521 } else {
536 } else {
522 return Ok(());
537 return Ok(());
523 };
538 };
524
539
525 // Although the Rust standard library’s `SystemTime` type
540 // Although the Rust standard library’s `SystemTime` type
526 // has nanosecond precision, the times reported for a
541 // has nanosecond precision, the times reported for a
527 // directory’s (or file’s) modified time may have lower
542 // directory’s (or file’s) modified time may have lower
528 // resolution based on the filesystem (for example ext3
543 // resolution based on the filesystem (for example ext3
529 // only stores integer seconds), kernel (see
544 // only stores integer seconds), kernel (see
530 // https://stackoverflow.com/a/14393315/1162888), etc.
545 // https://stackoverflow.com/a/14393315/1162888), etc.
531 let directory_mtime = if let Ok(option) =
546 let directory_mtime = if let Ok(option) =
532 TruncatedTimestamp::for_reliable_mtime_of(
547 TruncatedTimestamp::for_reliable_mtime_of(
533 directory_metadata,
548 directory_metadata,
534 status_start,
549 status_start,
535 ) {
550 ) {
536 if let Some(directory_mtime) = option {
551 if let Some(directory_mtime) = option {
537 directory_mtime
552 directory_mtime
538 } else {
553 } else {
539 // The directory was modified too recently,
554 // The directory was modified too recently,
540 // don’t cache its `read_dir` results.
555 // don’t cache its `read_dir` results.
541 //
556 //
542 // 1. A change to this directory (direct child was
557 // 1. A change to this directory (direct child was
543 // added or removed) cause its mtime to be set
558 // added or removed) cause its mtime to be set
544 // (possibly truncated) to `directory_mtime`
559 // (possibly truncated) to `directory_mtime`
545 // 2. This `status` algorithm calls `read_dir`
560 // 2. This `status` algorithm calls `read_dir`
546 // 3. An other change is made to the same directory is
561 // 3. An other change is made to the same directory is
547 // made so that calling `read_dir` agin would give
562 // made so that calling `read_dir` agin would give
548 // different results, but soon enough after 1. that
563 // different results, but soon enough after 1. that
549 // the mtime stays the same
564 // the mtime stays the same
550 //
565 //
551 // On a system where the time resolution poor, this
566 // On a system where the time resolution poor, this
552 // scenario is not unlikely if all three steps are caused
567 // scenario is not unlikely if all three steps are caused
553 // by the same script.
568 // by the same script.
554 return Ok(());
569 return Ok(());
555 }
570 }
556 } else {
571 } else {
557 // OS/libc does not support mtime?
572 // OS/libc does not support mtime?
558 return Ok(());
573 return Ok(());
559 };
574 };
560 // We’ve observed (through `status_start`) that time has
575 // We’ve observed (through `status_start`) that time has
561 // “progressed” since `directory_mtime`, so any further
576 // “progressed” since `directory_mtime`, so any further
562 // change to this directory is extremely likely to cause a
577 // change to this directory is extremely likely to cause a
563 // different mtime.
578 // different mtime.
564 //
579 //
565 // Having the same mtime again is not entirely impossible
580 // Having the same mtime again is not entirely impossible
566 // since the system clock is not monotonous. It could jump
581 // since the system clock is not monotonous. It could jump
567 // backward to some point before `directory_mtime`, then a
582 // backward to some point before `directory_mtime`, then a
568 // directory change could potentially happen during exactly
583 // directory change could potentially happen during exactly
569 // the wrong tick.
584 // the wrong tick.
570 //
585 //
571 // We deem this scenario (unlike the previous one) to be
586 // We deem this scenario (unlike the previous one) to be
572 // unlikely enough in practice.
587 // unlikely enough in practice.
573
588
574 let is_up_to_date =
589 let is_up_to_date =
575 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
590 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
576 cached.likely_equal(directory_mtime)
591 cached.likely_equal(directory_mtime)
577 } else {
592 } else {
578 false
593 false
579 };
594 };
580 if !is_up_to_date {
595 if !is_up_to_date {
581 let hg_path = dirstate_node
596 let hg_path = dirstate_node
582 .full_path_borrowed(self.dmap.on_disk)?
597 .full_path_borrowed(self.dmap.on_disk)?
583 .detach_from_tree();
598 .detach_from_tree();
584 self.new_cachable_directories
599 self.new_cachable_directories
585 .lock()
600 .lock()
586 .unwrap()
601 .unwrap()
587 .push((hg_path, directory_mtime))
602 .push((hg_path, directory_mtime))
588 }
603 }
589 Ok(())
604 Ok(())
590 }
605 }
591
606
592 /// A file with `EntryState::Normal` in the dirstate was found in the
607 /// A file with `EntryState::Normal` in the dirstate was found in the
593 /// filesystem
608 /// filesystem
594 fn handle_normal_file(
609 fn handle_normal_file(
595 &self,
610 &self,
596 dirstate_node: &NodeRef<'tree, 'on_disk>,
611 dirstate_node: &NodeRef<'tree, 'on_disk>,
597 fs_metadata: &std::fs::Metadata,
612 fs_metadata: &std::fs::Metadata,
598 ) -> Result<(), DirstateV2ParseError> {
613 ) -> Result<(), DirstateV2ParseError> {
599 // Keep the low 31 bits
614 // Keep the low 31 bits
600 fn truncate_u64(value: u64) -> i32 {
615 fn truncate_u64(value: u64) -> i32 {
601 (value & 0x7FFF_FFFF) as i32
616 (value & 0x7FFF_FFFF) as i32
602 }
617 }
603
618
604 let entry = dirstate_node
619 let entry = dirstate_node
605 .entry()?
620 .entry()?
606 .expect("handle_normal_file called with entry-less node");
621 .expect("handle_normal_file called with entry-less node");
607 let mode_changed =
622 let mode_changed =
608 || self.options.check_exec && entry.mode_changed(fs_metadata);
623 || self.options.check_exec && entry.mode_changed(fs_metadata);
609 let size = entry.size();
624 let size = entry.size();
610 let size_changed = size != truncate_u64(fs_metadata.len());
625 let size_changed = size != truncate_u64(fs_metadata.len());
611 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
626 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
612 // issue6456: Size returned may be longer due to encryption
627 // issue6456: Size returned may be longer due to encryption
613 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
628 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
614 self.push_outcome(Outcome::Unsure, dirstate_node)?
629 self.push_outcome(Outcome::Unsure, dirstate_node)?
615 } else if dirstate_node.has_copy_source()
630 } else if dirstate_node.has_copy_source()
616 || entry.is_from_other_parent()
631 || entry.is_from_other_parent()
617 || (size >= 0 && (size_changed || mode_changed()))
632 || (size >= 0 && (size_changed || mode_changed()))
618 {
633 {
619 self.push_outcome(Outcome::Modified, dirstate_node)?
634 self.push_outcome(Outcome::Modified, dirstate_node)?
620 } else {
635 } else {
621 let mtime_looks_clean;
636 let mtime_looks_clean;
622 if let Some(dirstate_mtime) = entry.truncated_mtime() {
637 if let Some(dirstate_mtime) = entry.truncated_mtime() {
623 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
638 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
624 .expect("OS/libc does not support mtime?");
639 .expect("OS/libc does not support mtime?");
625 // There might be a change in the future if for example the
640 // There might be a change in the future if for example the
626 // internal clock become off while process run, but this is a
641 // internal clock become off while process run, but this is a
627 // case where the issues the user would face
642 // case where the issues the user would face
628 // would be a lot worse and there is nothing we
643 // would be a lot worse and there is nothing we
629 // can really do.
644 // can really do.
630 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
645 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
631 } else {
646 } else {
632 // No mtime in the dirstate entry
647 // No mtime in the dirstate entry
633 mtime_looks_clean = false
648 mtime_looks_clean = false
634 };
649 };
635 if !mtime_looks_clean {
650 if !mtime_looks_clean {
636 self.push_outcome(Outcome::Unsure, dirstate_node)?
651 self.push_outcome(Outcome::Unsure, dirstate_node)?
637 } else if self.options.list_clean {
652 } else if self.options.list_clean {
638 self.push_outcome(Outcome::Clean, dirstate_node)?
653 self.push_outcome(Outcome::Clean, dirstate_node)?
639 }
654 }
640 }
655 }
641 Ok(())
656 Ok(())
642 }
657 }
643
658
644 /// A node in the dirstate tree has no corresponding filesystem entry
659 /// A node in the dirstate tree has no corresponding filesystem entry
645 fn traverse_dirstate_only(
660 fn traverse_dirstate_only(
646 &self,
661 &self,
647 dirstate_node: NodeRef<'tree, 'on_disk>,
662 dirstate_node: NodeRef<'tree, 'on_disk>,
648 ) -> Result<(), DirstateV2ParseError> {
663 ) -> Result<(), DirstateV2ParseError> {
649 self.check_for_outdated_directory_cache(&dirstate_node)?;
664 self.check_for_outdated_directory_cache(&dirstate_node)?;
650 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
665 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
651 dirstate_node
666 dirstate_node
652 .children(self.dmap.on_disk)?
667 .children(self.dmap.on_disk)?
653 .par_iter()
668 .par_iter()
654 .map(|child_node| self.traverse_dirstate_only(child_node))
669 .map(|child_node| self.traverse_dirstate_only(child_node))
655 .collect()
670 .collect()
656 }
671 }
657
672
658 /// A node in the dirstate tree has no corresponding *file* on the
673 /// A node in the dirstate tree has no corresponding *file* on the
659 /// filesystem
674 /// filesystem
660 ///
675 ///
661 /// Does nothing on a "directory" node
676 /// Does nothing on a "directory" node
662 fn mark_removed_or_deleted_if_file(
677 fn mark_removed_or_deleted_if_file(
663 &self,
678 &self,
664 dirstate_node: &NodeRef<'tree, 'on_disk>,
679 dirstate_node: &NodeRef<'tree, 'on_disk>,
665 ) -> Result<(), DirstateV2ParseError> {
680 ) -> Result<(), DirstateV2ParseError> {
666 if let Some(state) = dirstate_node.state()? {
681 if let Some(state) = dirstate_node.state()? {
667 let path = dirstate_node.full_path(self.dmap.on_disk)?;
682 let path = dirstate_node.full_path(self.dmap.on_disk)?;
668 if self.matcher.matches(path) {
683 if self.matcher.matches(path) {
669 if let EntryState::Removed = state {
684 if let EntryState::Removed = state {
670 self.push_outcome(Outcome::Removed, dirstate_node)?
685 self.push_outcome(Outcome::Removed, dirstate_node)?
671 } else {
686 } else {
672 self.push_outcome(Outcome::Deleted, &dirstate_node)?
687 self.push_outcome(Outcome::Deleted, &dirstate_node)?
673 }
688 }
674 }
689 }
675 }
690 }
676 Ok(())
691 Ok(())
677 }
692 }
678
693
679 /// Something in the filesystem has no corresponding dirstate node
694 /// Something in the filesystem has no corresponding dirstate node
680 ///
695 ///
681 /// Returns whether that path is ignored
696 /// Returns whether that path is ignored
682 fn traverse_fs_only(
697 fn traverse_fs_only(
683 &self,
698 &self,
684 has_ignored_ancestor: bool,
699 has_ignored_ancestor: bool,
685 directory_hg_path: &HgPath,
700 directory_hg_path: &HgPath,
686 fs_entry: &DirEntry,
701 fs_entry: &DirEntry,
687 ) -> bool {
702 ) -> bool {
688 let hg_path = directory_hg_path.join(&fs_entry.base_name);
703 let hg_path = directory_hg_path.join(&fs_entry.base_name);
689 let file_type = fs_entry.metadata.file_type();
704 let file_type = fs_entry.metadata.file_type();
690 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
705 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
691 if file_type.is_dir() {
706 if file_type.is_dir() {
692 let is_ignored =
707 let is_ignored =
693 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
708 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
694 let traverse_children = if is_ignored {
709 let traverse_children = if is_ignored {
695 // Descendants of an ignored directory are all ignored
710 // Descendants of an ignored directory are all ignored
696 self.options.list_ignored
711 self.options.list_ignored
697 } else {
712 } else {
698 // Descendants of an unknown directory may be either unknown or
713 // Descendants of an unknown directory may be either unknown or
699 // ignored
714 // ignored
700 self.options.list_unknown || self.options.list_ignored
715 self.options.list_unknown || self.options.list_ignored
701 };
716 };
702 if traverse_children {
717 if traverse_children {
703 let is_at_repo_root = false;
718 let is_at_repo_root = false;
704 if let Ok(children_fs_entries) = self.read_dir(
719 if let Ok(children_fs_entries) = self.read_dir(
705 &hg_path,
720 &hg_path,
706 &fs_entry.full_path,
721 &fs_entry.full_path,
707 is_at_repo_root,
722 is_at_repo_root,
708 ) {
723 ) {
709 children_fs_entries.par_iter().for_each(|child_fs_entry| {
724 children_fs_entries.par_iter().for_each(|child_fs_entry| {
710 self.traverse_fs_only(
725 self.traverse_fs_only(
711 is_ignored,
726 is_ignored,
712 &hg_path,
727 &hg_path,
713 child_fs_entry,
728 child_fs_entry,
714 );
729 );
715 })
730 })
716 }
731 }
717 }
732 }
718 if self.options.collect_traversed_dirs {
733 if self.options.collect_traversed_dirs {
719 self.outcome.lock().unwrap().traversed.push(hg_path.into())
734 self.outcome.lock().unwrap().traversed.push(hg_path.into())
720 }
735 }
721 is_ignored
736 is_ignored
722 } else {
737 } else {
723 if file_or_symlink {
738 if file_or_symlink {
724 if self.matcher.matches(&hg_path) {
739 if self.matcher.matches(&hg_path) {
725 self.mark_unknown_or_ignored(
740 self.mark_unknown_or_ignored(
726 has_ignored_ancestor,
741 has_ignored_ancestor,
727 &BorrowedPath::InMemory(&hg_path),
742 &BorrowedPath::InMemory(&hg_path),
728 )
743 )
729 } else {
744 } else {
730 // We haven’t computed whether this path is ignored. It
745 // We haven’t computed whether this path is ignored. It
731 // might not be, and a future run of status might have a
746 // might not be, and a future run of status might have a
732 // different matcher that matches it. So treat it as not
747 // different matcher that matches it. So treat it as not
733 // ignored. That is, inhibit readdir caching of the parent
748 // ignored. That is, inhibit readdir caching of the parent
734 // directory.
749 // directory.
735 false
750 false
736 }
751 }
737 } else {
752 } else {
738 // This is neither a directory, a plain file, or a symlink.
753 // This is neither a directory, a plain file, or a symlink.
739 // Treat it like an ignored file.
754 // Treat it like an ignored file.
740 true
755 true
741 }
756 }
742 }
757 }
743 }
758 }
744
759
745 /// Returns whether that path is ignored
760 /// Returns whether that path is ignored
746 fn mark_unknown_or_ignored(
761 fn mark_unknown_or_ignored(
747 &self,
762 &self,
748 has_ignored_ancestor: bool,
763 has_ignored_ancestor: bool,
749 hg_path: &BorrowedPath<'_, 'on_disk>,
764 hg_path: &BorrowedPath<'_, 'on_disk>,
750 ) -> bool {
765 ) -> bool {
751 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
766 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
752 if is_ignored {
767 if is_ignored {
753 if self.options.list_ignored {
768 if self.options.list_ignored {
754 self.push_outcome_without_copy_source(
769 self.push_outcome_without_copy_source(
755 Outcome::Ignored,
770 Outcome::Ignored,
756 hg_path,
771 hg_path,
757 )
772 )
758 }
773 }
759 } else {
774 } else {
760 if self.options.list_unknown {
775 if self.options.list_unknown {
761 self.push_outcome_without_copy_source(
776 self.push_outcome_without_copy_source(
762 Outcome::Unknown,
777 Outcome::Unknown,
763 hg_path,
778 hg_path,
764 )
779 )
765 }
780 }
766 }
781 }
767 is_ignored
782 is_ignored
768 }
783 }
769 }
784 }
770
785
771 struct DirEntry {
786 struct DirEntry {
772 base_name: HgPathBuf,
787 base_name: HgPathBuf,
773 full_path: PathBuf,
788 full_path: PathBuf,
774 metadata: std::fs::Metadata,
789 metadata: std::fs::Metadata,
775 }
790 }
776
791
777 impl DirEntry {
792 impl DirEntry {
778 /// Returns **unsorted** entries in the given directory, with name and
793 /// Returns **unsorted** entries in the given directory, with name and
779 /// metadata.
794 /// metadata.
780 ///
795 ///
781 /// If a `.hg` sub-directory is encountered:
796 /// If a `.hg` sub-directory is encountered:
782 ///
797 ///
783 /// * At the repository root, ignore that sub-directory
798 /// * At the repository root, ignore that sub-directory
784 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
799 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
785 /// list instead.
800 /// list instead.
786 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
801 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
787 // `read_dir` returns a "not found" error for the empty path
802 // `read_dir` returns a "not found" error for the empty path
788 let at_cwd = path == Path::new("");
803 let at_cwd = path == Path::new("");
789 let read_dir_path = if at_cwd { Path::new(".") } else { path };
804 let read_dir_path = if at_cwd { Path::new(".") } else { path };
790 let mut results = Vec::new();
805 let mut results = Vec::new();
791 for entry in read_dir_path.read_dir()? {
806 for entry in read_dir_path.read_dir()? {
792 let entry = entry?;
807 let entry = entry?;
793 let metadata = match entry.metadata() {
808 let metadata = match entry.metadata() {
794 Ok(v) => v,
809 Ok(v) => v,
795 Err(e) => {
810 Err(e) => {
796 // race with file deletion?
811 // race with file deletion?
797 if e.kind() == std::io::ErrorKind::NotFound {
812 if e.kind() == std::io::ErrorKind::NotFound {
798 continue;
813 continue;
799 } else {
814 } else {
800 return Err(e);
815 return Err(e);
801 }
816 }
802 }
817 }
803 };
818 };
804 let file_name = entry.file_name();
819 let file_name = entry.file_name();
805 // FIXME don't do this when cached
820 // FIXME don't do this when cached
806 if file_name == ".hg" {
821 if file_name == ".hg" {
807 if is_at_repo_root {
822 if is_at_repo_root {
808 // Skip the repo’s own .hg (might be a symlink)
823 // Skip the repo’s own .hg (might be a symlink)
809 continue;
824 continue;
810 } else if metadata.is_dir() {
825 } else if metadata.is_dir() {
811 // A .hg sub-directory at another location means a subrepo,
826 // A .hg sub-directory at another location means a subrepo,
812 // skip it entirely.
827 // skip it entirely.
813 return Ok(Vec::new());
828 return Ok(Vec::new());
814 }
829 }
815 }
830 }
816 let full_path = if at_cwd {
831 let full_path = if at_cwd {
817 file_name.clone().into()
832 file_name.clone().into()
818 } else {
833 } else {
819 entry.path()
834 entry.path()
820 };
835 };
821 let base_name = get_bytes_from_os_string(file_name).into();
836 let base_name = get_bytes_from_os_string(file_name).into();
822 results.push(DirEntry {
837 results.push(DirEntry {
823 base_name,
838 base_name,
824 full_path,
839 full_path,
825 metadata,
840 metadata,
826 })
841 })
827 }
842 }
828 Ok(results)
843 Ok(results)
829 }
844 }
830 }
845 }
831
846
832 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
847 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
833 /// of the give repository.
848 /// of the give repository.
834 ///
849 ///
835 /// This is similar to `SystemTime::now()`, with the result truncated to the
850 /// This is similar to `SystemTime::now()`, with the result truncated to the
836 /// same time resolution as other files’ modification times. Using `.hg`
851 /// same time resolution as other files’ modification times. Using `.hg`
837 /// instead of the system’s default temporary directory (such as `/tmp`) makes
852 /// instead of the system’s default temporary directory (such as `/tmp`) makes
838 /// it more likely the temporary file is in the same disk partition as contents
853 /// it more likely the temporary file is in the same disk partition as contents
839 /// of the working directory, which can matter since different filesystems may
854 /// of the working directory, which can matter since different filesystems may
840 /// store timestamps with different resolutions.
855 /// store timestamps with different resolutions.
841 ///
856 ///
842 /// This may fail, typically if we lack write permissions. In that case we
857 /// This may fail, typically if we lack write permissions. In that case we
843 /// should continue the `status()` algoritm anyway and consider the current
858 /// should continue the `status()` algoritm anyway and consider the current
844 /// date/time to be unknown.
859 /// date/time to be unknown.
845 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
860 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
846 tempfile::tempfile_in(repo_root.join(".hg"))?
861 tempfile::tempfile_in(repo_root.join(".hg"))?
847 .metadata()?
862 .metadata()?
848 .modified()
863 .modified()
849 }
864 }
General Comments 0
You need to be logged in to leave comments. Login now