##// END OF EJS Templates
dirstate-v2: Add heuristic for when to create a new data file...
Simon Sapin -
r48481:d9411836 default
parent child Browse files
Show More
@@ -1,1232 +1,1270 b''
1 use bytes_cast::BytesCast;
1 use bytes_cast::BytesCast;
2 use micro_timer::timed;
2 use micro_timer::timed;
3 use std::borrow::Cow;
3 use std::borrow::Cow;
4 use std::convert::TryInto;
4 use std::convert::TryInto;
5 use std::path::PathBuf;
5 use std::path::PathBuf;
6
6
7 use super::on_disk;
7 use super::on_disk;
8 use super::on_disk::DirstateV2ParseError;
8 use super::on_disk::DirstateV2ParseError;
9 use super::path_with_basename::WithBasename;
9 use super::path_with_basename::WithBasename;
10 use crate::dirstate::parsers::pack_entry;
10 use crate::dirstate::parsers::pack_entry;
11 use crate::dirstate::parsers::packed_entry_size;
11 use crate::dirstate::parsers::packed_entry_size;
12 use crate::dirstate::parsers::parse_dirstate_entries;
12 use crate::dirstate::parsers::parse_dirstate_entries;
13 use crate::dirstate::parsers::Timestamp;
13 use crate::dirstate::parsers::Timestamp;
14 use crate::dirstate::MTIME_UNSET;
14 use crate::dirstate::MTIME_UNSET;
15 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
15 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
16 use crate::dirstate::SIZE_NON_NORMAL;
16 use crate::dirstate::SIZE_NON_NORMAL;
17 use crate::dirstate::V1_RANGEMASK;
17 use crate::dirstate::V1_RANGEMASK;
18 use crate::matchers::Matcher;
18 use crate::matchers::Matcher;
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 use crate::CopyMapIter;
20 use crate::CopyMapIter;
21 use crate::DirstateEntry;
21 use crate::DirstateEntry;
22 use crate::DirstateError;
22 use crate::DirstateError;
23 use crate::DirstateParents;
23 use crate::DirstateParents;
24 use crate::DirstateStatus;
24 use crate::DirstateStatus;
25 use crate::EntryState;
25 use crate::EntryState;
26 use crate::FastHashMap;
26 use crate::FastHashMap;
27 use crate::PatternFileWarning;
27 use crate::PatternFileWarning;
28 use crate::StateMapIter;
28 use crate::StateMapIter;
29 use crate::StatusError;
29 use crate::StatusError;
30 use crate::StatusOptions;
30 use crate::StatusOptions;
31
31
32 /// Append to an existing data file if the amount of unreachable data (not used
33 /// anymore) is less than this fraction of the total amount of existing data.
34 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
35
32 pub struct DirstateMap<'on_disk> {
36 pub struct DirstateMap<'on_disk> {
33 /// Contents of the `.hg/dirstate` file
37 /// Contents of the `.hg/dirstate` file
34 pub(super) on_disk: &'on_disk [u8],
38 pub(super) on_disk: &'on_disk [u8],
35
39
36 pub(super) root: ChildNodes<'on_disk>,
40 pub(super) root: ChildNodes<'on_disk>,
37
41
38 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
42 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
39 pub(super) nodes_with_entry_count: u32,
43 pub(super) nodes_with_entry_count: u32,
40
44
41 /// Number of nodes anywhere in the tree that have
45 /// Number of nodes anywhere in the tree that have
42 /// `.copy_source.is_some()`.
46 /// `.copy_source.is_some()`.
43 pub(super) nodes_with_copy_source_count: u32,
47 pub(super) nodes_with_copy_source_count: u32,
44
48
45 /// See on_disk::Header
49 /// See on_disk::Header
46 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
50 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
51
52 /// How many bytes of `on_disk` are not used anymore
53 pub(super) unreachable_bytes: u32,
47 }
54 }
48
55
49 /// Using a plain `HgPathBuf` of the full path from the repository root as a
56 /// Using a plain `HgPathBuf` of the full path from the repository root as a
50 /// map key would also work: all paths in a given map have the same parent
57 /// map key would also work: all paths in a given map have the same parent
51 /// path, so comparing full paths gives the same result as comparing base
58 /// path, so comparing full paths gives the same result as comparing base
52 /// names. However `HashMap` would waste time always re-hashing the same
59 /// names. However `HashMap` would waste time always re-hashing the same
53 /// string prefix.
60 /// string prefix.
54 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
61 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
55
62
56 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
63 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
57 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
64 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
58 pub(super) enum BorrowedPath<'tree, 'on_disk> {
65 pub(super) enum BorrowedPath<'tree, 'on_disk> {
59 InMemory(&'tree HgPathBuf),
66 InMemory(&'tree HgPathBuf),
60 OnDisk(&'on_disk HgPath),
67 OnDisk(&'on_disk HgPath),
61 }
68 }
62
69
63 pub(super) enum ChildNodes<'on_disk> {
70 pub(super) enum ChildNodes<'on_disk> {
64 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
71 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
65 OnDisk(&'on_disk [on_disk::Node]),
72 OnDisk(&'on_disk [on_disk::Node]),
66 }
73 }
67
74
68 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
75 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
69 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
76 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
70 OnDisk(&'on_disk [on_disk::Node]),
77 OnDisk(&'on_disk [on_disk::Node]),
71 }
78 }
72
79
73 pub(super) enum NodeRef<'tree, 'on_disk> {
80 pub(super) enum NodeRef<'tree, 'on_disk> {
74 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
81 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
75 OnDisk(&'on_disk on_disk::Node),
82 OnDisk(&'on_disk on_disk::Node),
76 }
83 }
77
84
78 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
85 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
79 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
86 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
80 match *self {
87 match *self {
81 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
88 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
82 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
89 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
83 }
90 }
84 }
91 }
85 }
92 }
86
93
87 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
94 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
88 type Target = HgPath;
95 type Target = HgPath;
89
96
90 fn deref(&self) -> &HgPath {
97 fn deref(&self) -> &HgPath {
91 match *self {
98 match *self {
92 BorrowedPath::InMemory(in_memory) => in_memory,
99 BorrowedPath::InMemory(in_memory) => in_memory,
93 BorrowedPath::OnDisk(on_disk) => on_disk,
100 BorrowedPath::OnDisk(on_disk) => on_disk,
94 }
101 }
95 }
102 }
96 }
103 }
97
104
98 impl Default for ChildNodes<'_> {
105 impl Default for ChildNodes<'_> {
99 fn default() -> Self {
106 fn default() -> Self {
100 ChildNodes::InMemory(Default::default())
107 ChildNodes::InMemory(Default::default())
101 }
108 }
102 }
109 }
103
110
104 impl<'on_disk> ChildNodes<'on_disk> {
111 impl<'on_disk> ChildNodes<'on_disk> {
105 pub(super) fn as_ref<'tree>(
112 pub(super) fn as_ref<'tree>(
106 &'tree self,
113 &'tree self,
107 ) -> ChildNodesRef<'tree, 'on_disk> {
114 ) -> ChildNodesRef<'tree, 'on_disk> {
108 match self {
115 match self {
109 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
116 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
110 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
117 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
111 }
118 }
112 }
119 }
113
120
114 pub(super) fn is_empty(&self) -> bool {
121 pub(super) fn is_empty(&self) -> bool {
115 match self {
122 match self {
116 ChildNodes::InMemory(nodes) => nodes.is_empty(),
123 ChildNodes::InMemory(nodes) => nodes.is_empty(),
117 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
124 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
118 }
125 }
119 }
126 }
120
127
121 pub(super) fn make_mut(
128 fn make_mut(
122 &mut self,
129 &mut self,
123 on_disk: &'on_disk [u8],
130 on_disk: &'on_disk [u8],
131 unreachable_bytes: &mut u32,
124 ) -> Result<
132 ) -> Result<
125 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
133 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
126 DirstateV2ParseError,
134 DirstateV2ParseError,
127 > {
135 > {
128 match self {
136 match self {
129 ChildNodes::InMemory(nodes) => Ok(nodes),
137 ChildNodes::InMemory(nodes) => Ok(nodes),
130 ChildNodes::OnDisk(nodes) => {
138 ChildNodes::OnDisk(nodes) => {
139 *unreachable_bytes +=
140 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
131 let nodes = nodes
141 let nodes = nodes
132 .iter()
142 .iter()
133 .map(|node| {
143 .map(|node| {
134 Ok((
144 Ok((
135 node.path(on_disk)?,
145 node.path(on_disk)?,
136 node.to_in_memory_node(on_disk)?,
146 node.to_in_memory_node(on_disk)?,
137 ))
147 ))
138 })
148 })
139 .collect::<Result<_, _>>()?;
149 .collect::<Result<_, _>>()?;
140 *self = ChildNodes::InMemory(nodes);
150 *self = ChildNodes::InMemory(nodes);
141 match self {
151 match self {
142 ChildNodes::InMemory(nodes) => Ok(nodes),
152 ChildNodes::InMemory(nodes) => Ok(nodes),
143 ChildNodes::OnDisk(_) => unreachable!(),
153 ChildNodes::OnDisk(_) => unreachable!(),
144 }
154 }
145 }
155 }
146 }
156 }
147 }
157 }
148 }
158 }
149
159
150 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
160 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
151 pub(super) fn get(
161 pub(super) fn get(
152 &self,
162 &self,
153 base_name: &HgPath,
163 base_name: &HgPath,
154 on_disk: &'on_disk [u8],
164 on_disk: &'on_disk [u8],
155 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
165 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
156 match self {
166 match self {
157 ChildNodesRef::InMemory(nodes) => Ok(nodes
167 ChildNodesRef::InMemory(nodes) => Ok(nodes
158 .get_key_value(base_name)
168 .get_key_value(base_name)
159 .map(|(k, v)| NodeRef::InMemory(k, v))),
169 .map(|(k, v)| NodeRef::InMemory(k, v))),
160 ChildNodesRef::OnDisk(nodes) => {
170 ChildNodesRef::OnDisk(nodes) => {
161 let mut parse_result = Ok(());
171 let mut parse_result = Ok(());
162 let search_result = nodes.binary_search_by(|node| {
172 let search_result = nodes.binary_search_by(|node| {
163 match node.base_name(on_disk) {
173 match node.base_name(on_disk) {
164 Ok(node_base_name) => node_base_name.cmp(base_name),
174 Ok(node_base_name) => node_base_name.cmp(base_name),
165 Err(e) => {
175 Err(e) => {
166 parse_result = Err(e);
176 parse_result = Err(e);
167 // Dummy comparison result, `search_result` won’t
177 // Dummy comparison result, `search_result` won’t
168 // be used since `parse_result` is an error
178 // be used since `parse_result` is an error
169 std::cmp::Ordering::Equal
179 std::cmp::Ordering::Equal
170 }
180 }
171 }
181 }
172 });
182 });
173 parse_result.map(|()| {
183 parse_result.map(|()| {
174 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
184 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
175 })
185 })
176 }
186 }
177 }
187 }
178 }
188 }
179
189
180 /// Iterate in undefined order
190 /// Iterate in undefined order
181 pub(super) fn iter(
191 pub(super) fn iter(
182 &self,
192 &self,
183 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
193 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
184 match self {
194 match self {
185 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
195 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
186 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
196 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
187 ),
197 ),
188 ChildNodesRef::OnDisk(nodes) => {
198 ChildNodesRef::OnDisk(nodes) => {
189 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
199 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
190 }
200 }
191 }
201 }
192 }
202 }
193
203
194 /// Iterate in parallel in undefined order
204 /// Iterate in parallel in undefined order
195 pub(super) fn par_iter(
205 pub(super) fn par_iter(
196 &self,
206 &self,
197 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
207 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
198 {
208 {
199 use rayon::prelude::*;
209 use rayon::prelude::*;
200 match self {
210 match self {
201 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
211 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
202 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
212 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
203 ),
213 ),
204 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
214 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
205 nodes.par_iter().map(NodeRef::OnDisk),
215 nodes.par_iter().map(NodeRef::OnDisk),
206 ),
216 ),
207 }
217 }
208 }
218 }
209
219
210 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
220 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
211 match self {
221 match self {
212 ChildNodesRef::InMemory(nodes) => {
222 ChildNodesRef::InMemory(nodes) => {
213 let mut vec: Vec<_> = nodes
223 let mut vec: Vec<_> = nodes
214 .iter()
224 .iter()
215 .map(|(k, v)| NodeRef::InMemory(k, v))
225 .map(|(k, v)| NodeRef::InMemory(k, v))
216 .collect();
226 .collect();
217 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
227 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
218 match node {
228 match node {
219 NodeRef::InMemory(path, _node) => path.base_name(),
229 NodeRef::InMemory(path, _node) => path.base_name(),
220 NodeRef::OnDisk(_) => unreachable!(),
230 NodeRef::OnDisk(_) => unreachable!(),
221 }
231 }
222 }
232 }
223 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
233 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
224 // value: https://github.com/rust-lang/rust/issues/34162
234 // value: https://github.com/rust-lang/rust/issues/34162
225 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
235 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
226 vec
236 vec
227 }
237 }
228 ChildNodesRef::OnDisk(nodes) => {
238 ChildNodesRef::OnDisk(nodes) => {
229 // Nodes on disk are already sorted
239 // Nodes on disk are already sorted
230 nodes.iter().map(NodeRef::OnDisk).collect()
240 nodes.iter().map(NodeRef::OnDisk).collect()
231 }
241 }
232 }
242 }
233 }
243 }
234 }
244 }
235
245
236 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
246 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
237 pub(super) fn full_path(
247 pub(super) fn full_path(
238 &self,
248 &self,
239 on_disk: &'on_disk [u8],
249 on_disk: &'on_disk [u8],
240 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
250 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
241 match self {
251 match self {
242 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
252 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
243 NodeRef::OnDisk(node) => node.full_path(on_disk),
253 NodeRef::OnDisk(node) => node.full_path(on_disk),
244 }
254 }
245 }
255 }
246
256
247 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
257 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
248 /// HgPath>` detached from `'tree`
258 /// HgPath>` detached from `'tree`
249 pub(super) fn full_path_borrowed(
259 pub(super) fn full_path_borrowed(
250 &self,
260 &self,
251 on_disk: &'on_disk [u8],
261 on_disk: &'on_disk [u8],
252 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
262 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
253 match self {
263 match self {
254 NodeRef::InMemory(path, _node) => match path.full_path() {
264 NodeRef::InMemory(path, _node) => match path.full_path() {
255 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
265 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
256 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
266 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
257 },
267 },
258 NodeRef::OnDisk(node) => {
268 NodeRef::OnDisk(node) => {
259 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
269 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
260 }
270 }
261 }
271 }
262 }
272 }
263
273
264 pub(super) fn base_name(
274 pub(super) fn base_name(
265 &self,
275 &self,
266 on_disk: &'on_disk [u8],
276 on_disk: &'on_disk [u8],
267 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
277 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
268 match self {
278 match self {
269 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
279 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
270 NodeRef::OnDisk(node) => node.base_name(on_disk),
280 NodeRef::OnDisk(node) => node.base_name(on_disk),
271 }
281 }
272 }
282 }
273
283
274 pub(super) fn children(
284 pub(super) fn children(
275 &self,
285 &self,
276 on_disk: &'on_disk [u8],
286 on_disk: &'on_disk [u8],
277 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
287 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
278 match self {
288 match self {
279 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
289 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
280 NodeRef::OnDisk(node) => {
290 NodeRef::OnDisk(node) => {
281 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
291 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
282 }
292 }
283 }
293 }
284 }
294 }
285
295
286 pub(super) fn has_copy_source(&self) -> bool {
296 pub(super) fn has_copy_source(&self) -> bool {
287 match self {
297 match self {
288 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
298 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
289 NodeRef::OnDisk(node) => node.has_copy_source(),
299 NodeRef::OnDisk(node) => node.has_copy_source(),
290 }
300 }
291 }
301 }
292
302
293 pub(super) fn copy_source(
303 pub(super) fn copy_source(
294 &self,
304 &self,
295 on_disk: &'on_disk [u8],
305 on_disk: &'on_disk [u8],
296 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
306 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
297 match self {
307 match self {
298 NodeRef::InMemory(_path, node) => {
308 NodeRef::InMemory(_path, node) => {
299 Ok(node.copy_source.as_ref().map(|s| &**s))
309 Ok(node.copy_source.as_ref().map(|s| &**s))
300 }
310 }
301 NodeRef::OnDisk(node) => node.copy_source(on_disk),
311 NodeRef::OnDisk(node) => node.copy_source(on_disk),
302 }
312 }
303 }
313 }
304
314
305 pub(super) fn entry(
315 pub(super) fn entry(
306 &self,
316 &self,
307 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
317 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
308 match self {
318 match self {
309 NodeRef::InMemory(_path, node) => {
319 NodeRef::InMemory(_path, node) => {
310 Ok(node.data.as_entry().copied())
320 Ok(node.data.as_entry().copied())
311 }
321 }
312 NodeRef::OnDisk(node) => node.entry(),
322 NodeRef::OnDisk(node) => node.entry(),
313 }
323 }
314 }
324 }
315
325
316 pub(super) fn state(
326 pub(super) fn state(
317 &self,
327 &self,
318 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
328 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
319 match self {
329 match self {
320 NodeRef::InMemory(_path, node) => {
330 NodeRef::InMemory(_path, node) => {
321 Ok(node.data.as_entry().map(|entry| entry.state))
331 Ok(node.data.as_entry().map(|entry| entry.state))
322 }
332 }
323 NodeRef::OnDisk(node) => node.state(),
333 NodeRef::OnDisk(node) => node.state(),
324 }
334 }
325 }
335 }
326
336
327 pub(super) fn cached_directory_mtime(
337 pub(super) fn cached_directory_mtime(
328 &self,
338 &self,
329 ) -> Option<&'tree on_disk::Timestamp> {
339 ) -> Option<&'tree on_disk::Timestamp> {
330 match self {
340 match self {
331 NodeRef::InMemory(_path, node) => match &node.data {
341 NodeRef::InMemory(_path, node) => match &node.data {
332 NodeData::CachedDirectory { mtime } => Some(mtime),
342 NodeData::CachedDirectory { mtime } => Some(mtime),
333 _ => None,
343 _ => None,
334 },
344 },
335 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
345 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
336 }
346 }
337 }
347 }
338
348
339 pub(super) fn descendants_with_entry_count(&self) -> u32 {
349 pub(super) fn descendants_with_entry_count(&self) -> u32 {
340 match self {
350 match self {
341 NodeRef::InMemory(_path, node) => {
351 NodeRef::InMemory(_path, node) => {
342 node.descendants_with_entry_count
352 node.descendants_with_entry_count
343 }
353 }
344 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
354 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
345 }
355 }
346 }
356 }
347
357
348 pub(super) fn tracked_descendants_count(&self) -> u32 {
358 pub(super) fn tracked_descendants_count(&self) -> u32 {
349 match self {
359 match self {
350 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
360 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
351 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
361 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
352 }
362 }
353 }
363 }
354 }
364 }
355
365
356 /// Represents a file or a directory
366 /// Represents a file or a directory
357 #[derive(Default)]
367 #[derive(Default)]
358 pub(super) struct Node<'on_disk> {
368 pub(super) struct Node<'on_disk> {
359 pub(super) data: NodeData,
369 pub(super) data: NodeData,
360
370
361 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
371 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
362
372
363 pub(super) children: ChildNodes<'on_disk>,
373 pub(super) children: ChildNodes<'on_disk>,
364
374
365 /// How many (non-inclusive) descendants of this node have an entry.
375 /// How many (non-inclusive) descendants of this node have an entry.
366 pub(super) descendants_with_entry_count: u32,
376 pub(super) descendants_with_entry_count: u32,
367
377
368 /// How many (non-inclusive) descendants of this node have an entry whose
378 /// How many (non-inclusive) descendants of this node have an entry whose
369 /// state is "tracked".
379 /// state is "tracked".
370 pub(super) tracked_descendants_count: u32,
380 pub(super) tracked_descendants_count: u32,
371 }
381 }
372
382
373 pub(super) enum NodeData {
383 pub(super) enum NodeData {
374 Entry(DirstateEntry),
384 Entry(DirstateEntry),
375 CachedDirectory { mtime: on_disk::Timestamp },
385 CachedDirectory { mtime: on_disk::Timestamp },
376 None,
386 None,
377 }
387 }
378
388
379 impl Default for NodeData {
389 impl Default for NodeData {
380 fn default() -> Self {
390 fn default() -> Self {
381 NodeData::None
391 NodeData::None
382 }
392 }
383 }
393 }
384
394
385 impl NodeData {
395 impl NodeData {
386 fn has_entry(&self) -> bool {
396 fn has_entry(&self) -> bool {
387 match self {
397 match self {
388 NodeData::Entry(_) => true,
398 NodeData::Entry(_) => true,
389 _ => false,
399 _ => false,
390 }
400 }
391 }
401 }
392
402
393 fn as_entry(&self) -> Option<&DirstateEntry> {
403 fn as_entry(&self) -> Option<&DirstateEntry> {
394 match self {
404 match self {
395 NodeData::Entry(entry) => Some(entry),
405 NodeData::Entry(entry) => Some(entry),
396 _ => None,
406 _ => None,
397 }
407 }
398 }
408 }
399 }
409 }
400
410
401 impl<'on_disk> DirstateMap<'on_disk> {
411 impl<'on_disk> DirstateMap<'on_disk> {
402 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
412 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
403 Self {
413 Self {
404 on_disk,
414 on_disk,
405 root: ChildNodes::default(),
415 root: ChildNodes::default(),
406 nodes_with_entry_count: 0,
416 nodes_with_entry_count: 0,
407 nodes_with_copy_source_count: 0,
417 nodes_with_copy_source_count: 0,
408 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
418 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
419 unreachable_bytes: 0,
409 }
420 }
410 }
421 }
411
422
412 #[timed]
423 #[timed]
413 pub fn new_v2(
424 pub fn new_v2(
414 on_disk: &'on_disk [u8],
425 on_disk: &'on_disk [u8],
415 data_size: usize,
426 data_size: usize,
416 ) -> Result<Self, DirstateError> {
427 ) -> Result<Self, DirstateError> {
417 if let Some(data) = on_disk.get(..data_size) {
428 if let Some(data) = on_disk.get(..data_size) {
418 Ok(on_disk::read(data)?)
429 Ok(on_disk::read(data)?)
419 } else {
430 } else {
420 Err(DirstateV2ParseError.into())
431 Err(DirstateV2ParseError.into())
421 }
432 }
422 }
433 }
423
434
424 #[timed]
435 #[timed]
425 pub fn new_v1(
436 pub fn new_v1(
426 on_disk: &'on_disk [u8],
437 on_disk: &'on_disk [u8],
427 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
438 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
428 let mut map = Self::empty(on_disk);
439 let mut map = Self::empty(on_disk);
429 if map.on_disk.is_empty() {
440 if map.on_disk.is_empty() {
430 return Ok((map, None));
441 return Ok((map, None));
431 }
442 }
432
443
433 let parents = parse_dirstate_entries(
444 let parents = parse_dirstate_entries(
434 map.on_disk,
445 map.on_disk,
435 |path, entry, copy_source| {
446 |path, entry, copy_source| {
436 let tracked = entry.state.is_tracked();
447 let tracked = entry.state.is_tracked();
437 let node = Self::get_or_insert_node(
448 let node = Self::get_or_insert_node(
438 map.on_disk,
449 map.on_disk,
450 &mut map.unreachable_bytes,
439 &mut map.root,
451 &mut map.root,
440 path,
452 path,
441 WithBasename::to_cow_borrowed,
453 WithBasename::to_cow_borrowed,
442 |ancestor| {
454 |ancestor| {
443 if tracked {
455 if tracked {
444 ancestor.tracked_descendants_count += 1
456 ancestor.tracked_descendants_count += 1
445 }
457 }
446 ancestor.descendants_with_entry_count += 1
458 ancestor.descendants_with_entry_count += 1
447 },
459 },
448 )?;
460 )?;
449 assert!(
461 assert!(
450 !node.data.has_entry(),
462 !node.data.has_entry(),
451 "duplicate dirstate entry in read"
463 "duplicate dirstate entry in read"
452 );
464 );
453 assert!(
465 assert!(
454 node.copy_source.is_none(),
466 node.copy_source.is_none(),
455 "duplicate dirstate entry in read"
467 "duplicate dirstate entry in read"
456 );
468 );
457 node.data = NodeData::Entry(*entry);
469 node.data = NodeData::Entry(*entry);
458 node.copy_source = copy_source.map(Cow::Borrowed);
470 node.copy_source = copy_source.map(Cow::Borrowed);
459 map.nodes_with_entry_count += 1;
471 map.nodes_with_entry_count += 1;
460 if copy_source.is_some() {
472 if copy_source.is_some() {
461 map.nodes_with_copy_source_count += 1
473 map.nodes_with_copy_source_count += 1
462 }
474 }
463 Ok(())
475 Ok(())
464 },
476 },
465 )?;
477 )?;
466 let parents = Some(parents.clone());
478 let parents = Some(parents.clone());
467
479
468 Ok((map, parents))
480 Ok((map, parents))
469 }
481 }
470
482
471 /// Assuming dirstate-v2 format, returns whether the next write should
483 /// Assuming dirstate-v2 format, returns whether the next write should
472 /// append to the existing data file that contains `self.on_disk` (true),
484 /// append to the existing data file that contains `self.on_disk` (true),
473 /// or create a new data file from scratch (false).
485 /// or create a new data file from scratch (false).
474 pub(super) fn write_should_append(&self) -> bool {
486 pub(super) fn write_should_append(&self) -> bool {
475 // Soon this will be a heuristic based on the amount of unreachable
487 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
476 // data. For now it’s pseudo-random in order to make tests exercise
488 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
477 // both code paths.
478
479 fn bad_rng() -> u32 {
480 std::time::SystemTime::now()
481 .duration_since(std::time::UNIX_EPOCH)
482 .unwrap()
483 .subsec_millis()
484 }
485
486 bad_rng() % 2 == 0
487 }
489 }
488
490
489 fn get_node<'tree>(
491 fn get_node<'tree>(
490 &'tree self,
492 &'tree self,
491 path: &HgPath,
493 path: &HgPath,
492 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
494 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
493 let mut children = self.root.as_ref();
495 let mut children = self.root.as_ref();
494 let mut components = path.components();
496 let mut components = path.components();
495 let mut component =
497 let mut component =
496 components.next().expect("expected at least one components");
498 components.next().expect("expected at least one components");
497 loop {
499 loop {
498 if let Some(child) = children.get(component, self.on_disk)? {
500 if let Some(child) = children.get(component, self.on_disk)? {
499 if let Some(next_component) = components.next() {
501 if let Some(next_component) = components.next() {
500 component = next_component;
502 component = next_component;
501 children = child.children(self.on_disk)?;
503 children = child.children(self.on_disk)?;
502 } else {
504 } else {
503 return Ok(Some(child));
505 return Ok(Some(child));
504 }
506 }
505 } else {
507 } else {
506 return Ok(None);
508 return Ok(None);
507 }
509 }
508 }
510 }
509 }
511 }
510
512
511 /// Returns a mutable reference to the node at `path` if it exists
513 /// Returns a mutable reference to the node at `path` if it exists
512 ///
514 ///
513 /// This takes `root` instead of `&mut self` so that callers can mutate
515 /// This takes `root` instead of `&mut self` so that callers can mutate
514 /// other fields while the returned borrow is still valid
516 /// other fields while the returned borrow is still valid
515 fn get_node_mut<'tree>(
517 fn get_node_mut<'tree>(
516 on_disk: &'on_disk [u8],
518 on_disk: &'on_disk [u8],
519 unreachable_bytes: &mut u32,
517 root: &'tree mut ChildNodes<'on_disk>,
520 root: &'tree mut ChildNodes<'on_disk>,
518 path: &HgPath,
521 path: &HgPath,
519 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
522 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
520 let mut children = root;
523 let mut children = root;
521 let mut components = path.components();
524 let mut components = path.components();
522 let mut component =
525 let mut component =
523 components.next().expect("expected at least one components");
526 components.next().expect("expected at least one components");
524 loop {
527 loop {
525 if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
528 if let Some(child) = children
529 .make_mut(on_disk, unreachable_bytes)?
530 .get_mut(component)
526 {
531 {
527 if let Some(next_component) = components.next() {
532 if let Some(next_component) = components.next() {
528 component = next_component;
533 component = next_component;
529 children = &mut child.children;
534 children = &mut child.children;
530 } else {
535 } else {
531 return Ok(Some(child));
536 return Ok(Some(child));
532 }
537 }
533 } else {
538 } else {
534 return Ok(None);
539 return Ok(None);
535 }
540 }
536 }
541 }
537 }
542 }
538
543
539 pub(super) fn get_or_insert<'tree, 'path>(
544 pub(super) fn get_or_insert<'tree, 'path>(
540 &'tree mut self,
545 &'tree mut self,
541 path: &HgPath,
546 path: &HgPath,
542 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
547 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
543 Self::get_or_insert_node(
548 Self::get_or_insert_node(
544 self.on_disk,
549 self.on_disk,
550 &mut self.unreachable_bytes,
545 &mut self.root,
551 &mut self.root,
546 path,
552 path,
547 WithBasename::to_cow_owned,
553 WithBasename::to_cow_owned,
548 |_| {},
554 |_| {},
549 )
555 )
550 }
556 }
551
557
552 pub(super) fn get_or_insert_node<'tree, 'path>(
558 fn get_or_insert_node<'tree, 'path>(
553 on_disk: &'on_disk [u8],
559 on_disk: &'on_disk [u8],
560 unreachable_bytes: &mut u32,
554 root: &'tree mut ChildNodes<'on_disk>,
561 root: &'tree mut ChildNodes<'on_disk>,
555 path: &'path HgPath,
562 path: &'path HgPath,
556 to_cow: impl Fn(
563 to_cow: impl Fn(
557 WithBasename<&'path HgPath>,
564 WithBasename<&'path HgPath>,
558 ) -> WithBasename<Cow<'on_disk, HgPath>>,
565 ) -> WithBasename<Cow<'on_disk, HgPath>>,
559 mut each_ancestor: impl FnMut(&mut Node),
566 mut each_ancestor: impl FnMut(&mut Node),
560 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
567 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
561 let mut child_nodes = root;
568 let mut child_nodes = root;
562 let mut inclusive_ancestor_paths =
569 let mut inclusive_ancestor_paths =
563 WithBasename::inclusive_ancestors_of(path);
570 WithBasename::inclusive_ancestors_of(path);
564 let mut ancestor_path = inclusive_ancestor_paths
571 let mut ancestor_path = inclusive_ancestor_paths
565 .next()
572 .next()
566 .expect("expected at least one inclusive ancestor");
573 .expect("expected at least one inclusive ancestor");
567 loop {
574 loop {
568 // TODO: can we avoid allocating an owned key in cases where the
575 // TODO: can we avoid allocating an owned key in cases where the
569 // map already contains that key, without introducing double
576 // map already contains that key, without introducing double
570 // lookup?
577 // lookup?
571 let child_node = child_nodes
578 let child_node = child_nodes
572 .make_mut(on_disk)?
579 .make_mut(on_disk, unreachable_bytes)?
573 .entry(to_cow(ancestor_path))
580 .entry(to_cow(ancestor_path))
574 .or_default();
581 .or_default();
575 if let Some(next) = inclusive_ancestor_paths.next() {
582 if let Some(next) = inclusive_ancestor_paths.next() {
576 each_ancestor(child_node);
583 each_ancestor(child_node);
577 ancestor_path = next;
584 ancestor_path = next;
578 child_nodes = &mut child_node.children;
585 child_nodes = &mut child_node.children;
579 } else {
586 } else {
580 return Ok(child_node);
587 return Ok(child_node);
581 }
588 }
582 }
589 }
583 }
590 }
584
591
585 fn add_or_remove_file(
592 fn add_or_remove_file(
586 &mut self,
593 &mut self,
587 path: &HgPath,
594 path: &HgPath,
588 old_state: EntryState,
595 old_state: EntryState,
589 new_entry: DirstateEntry,
596 new_entry: DirstateEntry,
590 ) -> Result<(), DirstateV2ParseError> {
597 ) -> Result<(), DirstateV2ParseError> {
591 let had_entry = old_state != EntryState::Unknown;
598 let had_entry = old_state != EntryState::Unknown;
592 let tracked_count_increment =
599 let tracked_count_increment =
593 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
600 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
594 (false, true) => 1,
601 (false, true) => 1,
595 (true, false) => -1,
602 (true, false) => -1,
596 _ => 0,
603 _ => 0,
597 };
604 };
598
605
599 let node = Self::get_or_insert_node(
606 let node = Self::get_or_insert_node(
600 self.on_disk,
607 self.on_disk,
608 &mut self.unreachable_bytes,
601 &mut self.root,
609 &mut self.root,
602 path,
610 path,
603 WithBasename::to_cow_owned,
611 WithBasename::to_cow_owned,
604 |ancestor| {
612 |ancestor| {
605 if !had_entry {
613 if !had_entry {
606 ancestor.descendants_with_entry_count += 1;
614 ancestor.descendants_with_entry_count += 1;
607 }
615 }
608
616
609 // We can’t use `+= increment` because the counter is unsigned,
617 // We can’t use `+= increment` because the counter is unsigned,
610 // and we want debug builds to detect accidental underflow
618 // and we want debug builds to detect accidental underflow
611 // through zero
619 // through zero
612 match tracked_count_increment {
620 match tracked_count_increment {
613 1 => ancestor.tracked_descendants_count += 1,
621 1 => ancestor.tracked_descendants_count += 1,
614 -1 => ancestor.tracked_descendants_count -= 1,
622 -1 => ancestor.tracked_descendants_count -= 1,
615 _ => {}
623 _ => {}
616 }
624 }
617 },
625 },
618 )?;
626 )?;
619 if !had_entry {
627 if !had_entry {
620 self.nodes_with_entry_count += 1
628 self.nodes_with_entry_count += 1
621 }
629 }
622 node.data = NodeData::Entry(new_entry);
630 node.data = NodeData::Entry(new_entry);
623 Ok(())
631 Ok(())
624 }
632 }
625
633
626 fn iter_nodes<'tree>(
634 fn iter_nodes<'tree>(
627 &'tree self,
635 &'tree self,
628 ) -> impl Iterator<
636 ) -> impl Iterator<
629 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
637 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
630 > + 'tree {
638 > + 'tree {
631 // Depth first tree traversal.
639 // Depth first tree traversal.
632 //
640 //
633 // If we could afford internal iteration and recursion,
641 // If we could afford internal iteration and recursion,
634 // this would look like:
642 // this would look like:
635 //
643 //
636 // ```
644 // ```
637 // fn traverse_children(
645 // fn traverse_children(
638 // children: &ChildNodes,
646 // children: &ChildNodes,
639 // each: &mut impl FnMut(&Node),
647 // each: &mut impl FnMut(&Node),
640 // ) {
648 // ) {
641 // for child in children.values() {
649 // for child in children.values() {
642 // traverse_children(&child.children, each);
650 // traverse_children(&child.children, each);
643 // each(child);
651 // each(child);
644 // }
652 // }
645 // }
653 // }
646 // ```
654 // ```
647 //
655 //
648 // However we want an external iterator and therefore can’t use the
656 // However we want an external iterator and therefore can’t use the
649 // call stack. Use an explicit stack instead:
657 // call stack. Use an explicit stack instead:
650 let mut stack = Vec::new();
658 let mut stack = Vec::new();
651 let mut iter = self.root.as_ref().iter();
659 let mut iter = self.root.as_ref().iter();
652 std::iter::from_fn(move || {
660 std::iter::from_fn(move || {
653 while let Some(child_node) = iter.next() {
661 while let Some(child_node) = iter.next() {
654 let children = match child_node.children(self.on_disk) {
662 let children = match child_node.children(self.on_disk) {
655 Ok(children) => children,
663 Ok(children) => children,
656 Err(error) => return Some(Err(error)),
664 Err(error) => return Some(Err(error)),
657 };
665 };
658 // Pseudo-recursion
666 // Pseudo-recursion
659 let new_iter = children.iter();
667 let new_iter = children.iter();
660 let old_iter = std::mem::replace(&mut iter, new_iter);
668 let old_iter = std::mem::replace(&mut iter, new_iter);
661 stack.push((child_node, old_iter));
669 stack.push((child_node, old_iter));
662 }
670 }
663 // Found the end of a `children.iter()` iterator.
671 // Found the end of a `children.iter()` iterator.
664 if let Some((child_node, next_iter)) = stack.pop() {
672 if let Some((child_node, next_iter)) = stack.pop() {
665 // "Return" from pseudo-recursion by restoring state from the
673 // "Return" from pseudo-recursion by restoring state from the
666 // explicit stack
674 // explicit stack
667 iter = next_iter;
675 iter = next_iter;
668
676
669 Some(Ok(child_node))
677 Some(Ok(child_node))
670 } else {
678 } else {
671 // Reached the bottom of the stack, we’re done
679 // Reached the bottom of the stack, we’re done
672 None
680 None
673 }
681 }
674 })
682 })
675 }
683 }
676
684
677 fn clear_known_ambiguous_mtimes(
685 fn clear_known_ambiguous_mtimes(
678 &mut self,
686 &mut self,
679 paths: &[impl AsRef<HgPath>],
687 paths: &[impl AsRef<HgPath>],
680 ) -> Result<(), DirstateV2ParseError> {
688 ) -> Result<(), DirstateV2ParseError> {
681 for path in paths {
689 for path in paths {
682 if let Some(node) = Self::get_node_mut(
690 if let Some(node) = Self::get_node_mut(
683 self.on_disk,
691 self.on_disk,
692 &mut self.unreachable_bytes,
684 &mut self.root,
693 &mut self.root,
685 path.as_ref(),
694 path.as_ref(),
686 )? {
695 )? {
687 if let NodeData::Entry(entry) = &mut node.data {
696 if let NodeData::Entry(entry) = &mut node.data {
688 entry.clear_mtime();
697 entry.clear_mtime();
689 }
698 }
690 }
699 }
691 }
700 }
692 Ok(())
701 Ok(())
693 }
702 }
694
703
695 /// Return a faillilble iterator of full paths of nodes that have an
704 /// Return a faillilble iterator of full paths of nodes that have an
696 /// `entry` for which the given `predicate` returns true.
705 /// `entry` for which the given `predicate` returns true.
697 ///
706 ///
698 /// Fallibility means that each iterator item is a `Result`, which may
707 /// Fallibility means that each iterator item is a `Result`, which may
699 /// indicate a parse error of the on-disk dirstate-v2 format. Such errors
708 /// indicate a parse error of the on-disk dirstate-v2 format. Such errors
700 /// should only happen if Mercurial is buggy or a repository is corrupted.
709 /// should only happen if Mercurial is buggy or a repository is corrupted.
701 fn filter_full_paths<'tree>(
710 fn filter_full_paths<'tree>(
702 &'tree self,
711 &'tree self,
703 predicate: impl Fn(&DirstateEntry) -> bool + 'tree,
712 predicate: impl Fn(&DirstateEntry) -> bool + 'tree,
704 ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree
713 ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree
705 {
714 {
706 filter_map_results(self.iter_nodes(), move |node| {
715 filter_map_results(self.iter_nodes(), move |node| {
707 if let Some(entry) = node.entry()? {
716 if let Some(entry) = node.entry()? {
708 if predicate(&entry) {
717 if predicate(&entry) {
709 return Ok(Some(node.full_path(self.on_disk)?));
718 return Ok(Some(node.full_path(self.on_disk)?));
710 }
719 }
711 }
720 }
712 Ok(None)
721 Ok(None)
713 })
722 })
714 }
723 }
724
725 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
726 if let Cow::Borrowed(path) = path {
727 *unreachable_bytes += path.len() as u32
728 }
729 }
715 }
730 }
716
731
717 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
732 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
718 ///
733 ///
719 /// The callback is only called for incoming `Ok` values. Errors are passed
734 /// The callback is only called for incoming `Ok` values. Errors are passed
720 /// through as-is. In order to let it use the `?` operator the callback is
735 /// through as-is. In order to let it use the `?` operator the callback is
721 /// expected to return a `Result` of `Option`, instead of an `Option` of
736 /// expected to return a `Result` of `Option`, instead of an `Option` of
722 /// `Result`.
737 /// `Result`.
723 fn filter_map_results<'a, I, F, A, B, E>(
738 fn filter_map_results<'a, I, F, A, B, E>(
724 iter: I,
739 iter: I,
725 f: F,
740 f: F,
726 ) -> impl Iterator<Item = Result<B, E>> + 'a
741 ) -> impl Iterator<Item = Result<B, E>> + 'a
727 where
742 where
728 I: Iterator<Item = Result<A, E>> + 'a,
743 I: Iterator<Item = Result<A, E>> + 'a,
729 F: Fn(A) -> Result<Option<B>, E> + 'a,
744 F: Fn(A) -> Result<Option<B>, E> + 'a,
730 {
745 {
731 iter.filter_map(move |result| match result {
746 iter.filter_map(move |result| match result {
732 Ok(node) => f(node).transpose(),
747 Ok(node) => f(node).transpose(),
733 Err(e) => Some(Err(e)),
748 Err(e) => Some(Err(e)),
734 })
749 })
735 }
750 }
736
751
737 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
752 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
738 fn clear(&mut self) {
753 fn clear(&mut self) {
739 self.root = Default::default();
754 self.root = Default::default();
740 self.nodes_with_entry_count = 0;
755 self.nodes_with_entry_count = 0;
741 self.nodes_with_copy_source_count = 0;
756 self.nodes_with_copy_source_count = 0;
742 }
757 }
743
758
744 fn add_file(
759 fn add_file(
745 &mut self,
760 &mut self,
746 filename: &HgPath,
761 filename: &HgPath,
747 entry: DirstateEntry,
762 entry: DirstateEntry,
748 added: bool,
763 added: bool,
749 merged: bool,
764 merged: bool,
750 from_p2: bool,
765 from_p2: bool,
751 possibly_dirty: bool,
766 possibly_dirty: bool,
752 ) -> Result<(), DirstateError> {
767 ) -> Result<(), DirstateError> {
753 let mut entry = entry;
768 let mut entry = entry;
754 if added {
769 if added {
755 assert!(!possibly_dirty);
770 assert!(!possibly_dirty);
756 assert!(!from_p2);
771 assert!(!from_p2);
757 entry.state = EntryState::Added;
772 entry.state = EntryState::Added;
758 entry.size = SIZE_NON_NORMAL;
773 entry.size = SIZE_NON_NORMAL;
759 entry.mtime = MTIME_UNSET;
774 entry.mtime = MTIME_UNSET;
760 } else if merged {
775 } else if merged {
761 assert!(!possibly_dirty);
776 assert!(!possibly_dirty);
762 assert!(!from_p2);
777 assert!(!from_p2);
763 entry.state = EntryState::Merged;
778 entry.state = EntryState::Merged;
764 entry.size = SIZE_FROM_OTHER_PARENT;
779 entry.size = SIZE_FROM_OTHER_PARENT;
765 entry.mtime = MTIME_UNSET;
780 entry.mtime = MTIME_UNSET;
766 } else if from_p2 {
781 } else if from_p2 {
767 assert!(!possibly_dirty);
782 assert!(!possibly_dirty);
768 entry.state = EntryState::Normal;
783 entry.state = EntryState::Normal;
769 entry.size = SIZE_FROM_OTHER_PARENT;
784 entry.size = SIZE_FROM_OTHER_PARENT;
770 entry.mtime = MTIME_UNSET;
785 entry.mtime = MTIME_UNSET;
771 } else if possibly_dirty {
786 } else if possibly_dirty {
772 entry.state = EntryState::Normal;
787 entry.state = EntryState::Normal;
773 entry.size = SIZE_NON_NORMAL;
788 entry.size = SIZE_NON_NORMAL;
774 entry.mtime = MTIME_UNSET;
789 entry.mtime = MTIME_UNSET;
775 } else {
790 } else {
776 entry.state = EntryState::Normal;
791 entry.state = EntryState::Normal;
777 entry.size = entry.size & V1_RANGEMASK;
792 entry.size = entry.size & V1_RANGEMASK;
778 entry.mtime = entry.mtime & V1_RANGEMASK;
793 entry.mtime = entry.mtime & V1_RANGEMASK;
779 }
794 }
780
795
781 let old_state = match self.get(filename)? {
796 let old_state = match self.get(filename)? {
782 Some(e) => e.state,
797 Some(e) => e.state,
783 None => EntryState::Unknown,
798 None => EntryState::Unknown,
784 };
799 };
785
800
786 Ok(self.add_or_remove_file(filename, old_state, entry)?)
801 Ok(self.add_or_remove_file(filename, old_state, entry)?)
787 }
802 }
788
803
789 fn remove_file(
804 fn remove_file(
790 &mut self,
805 &mut self,
791 filename: &HgPath,
806 filename: &HgPath,
792 in_merge: bool,
807 in_merge: bool,
793 ) -> Result<(), DirstateError> {
808 ) -> Result<(), DirstateError> {
794 let old_entry_opt = self.get(filename)?;
809 let old_entry_opt = self.get(filename)?;
795 let old_state = match old_entry_opt {
810 let old_state = match old_entry_opt {
796 Some(e) => e.state,
811 Some(e) => e.state,
797 None => EntryState::Unknown,
812 None => EntryState::Unknown,
798 };
813 };
799 let mut size = 0;
814 let mut size = 0;
800 if in_merge {
815 if in_merge {
801 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
816 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
802 // during a merge. So I (marmoute) am not sure we need the
817 // during a merge. So I (marmoute) am not sure we need the
803 // conditionnal at all. Adding double checking this with assert
818 // conditionnal at all. Adding double checking this with assert
804 // would be nice.
819 // would be nice.
805 if let Some(old_entry) = old_entry_opt {
820 if let Some(old_entry) = old_entry_opt {
806 // backup the previous state
821 // backup the previous state
807 if old_entry.state == EntryState::Merged {
822 if old_entry.state == EntryState::Merged {
808 size = SIZE_NON_NORMAL;
823 size = SIZE_NON_NORMAL;
809 } else if old_entry.state == EntryState::Normal
824 } else if old_entry.state == EntryState::Normal
810 && old_entry.size == SIZE_FROM_OTHER_PARENT
825 && old_entry.size == SIZE_FROM_OTHER_PARENT
811 {
826 {
812 // other parent
827 // other parent
813 size = SIZE_FROM_OTHER_PARENT;
828 size = SIZE_FROM_OTHER_PARENT;
814 }
829 }
815 }
830 }
816 }
831 }
817 if size == 0 {
832 if size == 0 {
818 self.copy_map_remove(filename)?;
833 self.copy_map_remove(filename)?;
819 }
834 }
820 let entry = DirstateEntry {
835 let entry = DirstateEntry {
821 state: EntryState::Removed,
836 state: EntryState::Removed,
822 mode: 0,
837 mode: 0,
823 size,
838 size,
824 mtime: 0,
839 mtime: 0,
825 };
840 };
826 Ok(self.add_or_remove_file(filename, old_state, entry)?)
841 Ok(self.add_or_remove_file(filename, old_state, entry)?)
827 }
842 }
828
843
829 fn drop_file(&mut self, filename: &HgPath) -> Result<bool, DirstateError> {
844 fn drop_file(&mut self, filename: &HgPath) -> Result<bool, DirstateError> {
830 let old_state = match self.get(filename)? {
845 let old_state = match self.get(filename)? {
831 Some(e) => e.state,
846 Some(e) => e.state,
832 None => EntryState::Unknown,
847 None => EntryState::Unknown,
833 };
848 };
834 struct Dropped {
849 struct Dropped {
835 was_tracked: bool,
850 was_tracked: bool,
836 had_entry: bool,
851 had_entry: bool,
837 had_copy_source: bool,
852 had_copy_source: bool,
838 }
853 }
839
854
840 /// If this returns `Ok(Some((dropped, removed)))`, then
855 /// If this returns `Ok(Some((dropped, removed)))`, then
841 ///
856 ///
842 /// * `dropped` is about the leaf node that was at `filename`
857 /// * `dropped` is about the leaf node that was at `filename`
843 /// * `removed` is whether this particular level of recursion just
858 /// * `removed` is whether this particular level of recursion just
844 /// removed a node in `nodes`.
859 /// removed a node in `nodes`.
845 fn recur<'on_disk>(
860 fn recur<'on_disk>(
846 on_disk: &'on_disk [u8],
861 on_disk: &'on_disk [u8],
862 unreachable_bytes: &mut u32,
847 nodes: &mut ChildNodes<'on_disk>,
863 nodes: &mut ChildNodes<'on_disk>,
848 path: &HgPath,
864 path: &HgPath,
849 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
865 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
850 let (first_path_component, rest_of_path) =
866 let (first_path_component, rest_of_path) =
851 path.split_first_component();
867 path.split_first_component();
852 let node = if let Some(node) =
868 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
853 nodes.make_mut(on_disk)?.get_mut(first_path_component)
869 let node = if let Some(node) = nodes.get_mut(first_path_component)
854 {
870 {
855 node
871 node
856 } else {
872 } else {
857 return Ok(None);
873 return Ok(None);
858 };
874 };
859 let dropped;
875 let dropped;
860 if let Some(rest) = rest_of_path {
876 if let Some(rest) = rest_of_path {
861 if let Some((d, removed)) =
877 if let Some((d, removed)) = recur(
862 recur(on_disk, &mut node.children, rest)?
878 on_disk,
863 {
879 unreachable_bytes,
880 &mut node.children,
881 rest,
882 )? {
864 dropped = d;
883 dropped = d;
865 if dropped.had_entry {
884 if dropped.had_entry {
866 node.descendants_with_entry_count -= 1;
885 node.descendants_with_entry_count -= 1;
867 }
886 }
868 if dropped.was_tracked {
887 if dropped.was_tracked {
869 node.tracked_descendants_count -= 1;
888 node.tracked_descendants_count -= 1;
870 }
889 }
871
890
872 // Directory caches must be invalidated when removing a
891 // Directory caches must be invalidated when removing a
873 // child node
892 // child node
874 if removed {
893 if removed {
875 if let NodeData::CachedDirectory { .. } = &node.data {
894 if let NodeData::CachedDirectory { .. } = &node.data {
876 node.data = NodeData::None
895 node.data = NodeData::None
877 }
896 }
878 }
897 }
879 } else {
898 } else {
880 return Ok(None);
899 return Ok(None);
881 }
900 }
882 } else {
901 } else {
883 let had_entry = node.data.has_entry();
902 let had_entry = node.data.has_entry();
884 if had_entry {
903 if had_entry {
885 node.data = NodeData::None
904 node.data = NodeData::None
886 }
905 }
906 if let Some(source) = &node.copy_source {
907 DirstateMap::count_dropped_path(unreachable_bytes, source)
908 }
887 dropped = Dropped {
909 dropped = Dropped {
888 was_tracked: node
910 was_tracked: node
889 .data
911 .data
890 .as_entry()
912 .as_entry()
891 .map_or(false, |entry| entry.state.is_tracked()),
913 .map_or(false, |entry| entry.state.is_tracked()),
892 had_entry,
914 had_entry,
893 had_copy_source: node.copy_source.take().is_some(),
915 had_copy_source: node.copy_source.take().is_some(),
894 };
916 };
895 }
917 }
896 // After recursion, for both leaf (rest_of_path is None) nodes and
918 // After recursion, for both leaf (rest_of_path is None) nodes and
897 // parent nodes, remove a node if it just became empty.
919 // parent nodes, remove a node if it just became empty.
898 let remove = !node.data.has_entry()
920 let remove = !node.data.has_entry()
899 && node.copy_source.is_none()
921 && node.copy_source.is_none()
900 && node.children.is_empty();
922 && node.children.is_empty();
901 if remove {
923 if remove {
902 nodes.make_mut(on_disk)?.remove(first_path_component);
924 let (key, _) =
925 nodes.remove_entry(first_path_component).unwrap();
926 DirstateMap::count_dropped_path(
927 unreachable_bytes,
928 key.full_path(),
929 )
903 }
930 }
904 Ok(Some((dropped, remove)))
931 Ok(Some((dropped, remove)))
905 }
932 }
906
933
907 if let Some((dropped, _removed)) =
934 if let Some((dropped, _removed)) = recur(
908 recur(self.on_disk, &mut self.root, filename)?
935 self.on_disk,
909 {
936 &mut self.unreachable_bytes,
937 &mut self.root,
938 filename,
939 )? {
910 if dropped.had_entry {
940 if dropped.had_entry {
911 self.nodes_with_entry_count -= 1
941 self.nodes_with_entry_count -= 1
912 }
942 }
913 if dropped.had_copy_source {
943 if dropped.had_copy_source {
914 self.nodes_with_copy_source_count -= 1
944 self.nodes_with_copy_source_count -= 1
915 }
945 }
916 Ok(dropped.had_entry)
946 Ok(dropped.had_entry)
917 } else {
947 } else {
918 debug_assert!(!old_state.is_tracked());
948 debug_assert!(!old_state.is_tracked());
919 Ok(false)
949 Ok(false)
920 }
950 }
921 }
951 }
922
952
923 fn clear_ambiguous_times(
953 fn clear_ambiguous_times(
924 &mut self,
954 &mut self,
925 filenames: Vec<HgPathBuf>,
955 filenames: Vec<HgPathBuf>,
926 now: i32,
956 now: i32,
927 ) -> Result<(), DirstateV2ParseError> {
957 ) -> Result<(), DirstateV2ParseError> {
928 for filename in filenames {
958 for filename in filenames {
929 if let Some(node) =
959 if let Some(node) = Self::get_node_mut(
930 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
960 self.on_disk,
931 {
961 &mut self.unreachable_bytes,
962 &mut self.root,
963 &filename,
964 )? {
932 if let NodeData::Entry(entry) = &mut node.data {
965 if let NodeData::Entry(entry) = &mut node.data {
933 entry.clear_ambiguous_mtime(now);
966 entry.clear_ambiguous_mtime(now);
934 }
967 }
935 }
968 }
936 }
969 }
937 Ok(())
970 Ok(())
938 }
971 }
939
972
940 fn non_normal_entries_contains(
973 fn non_normal_entries_contains(
941 &mut self,
974 &mut self,
942 key: &HgPath,
975 key: &HgPath,
943 ) -> Result<bool, DirstateV2ParseError> {
976 ) -> Result<bool, DirstateV2ParseError> {
944 Ok(if let Some(node) = self.get_node(key)? {
977 Ok(if let Some(node) = self.get_node(key)? {
945 node.entry()?.map_or(false, |entry| entry.is_non_normal())
978 node.entry()?.map_or(false, |entry| entry.is_non_normal())
946 } else {
979 } else {
947 false
980 false
948 })
981 })
949 }
982 }
950
983
951 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
984 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
952 // Do nothing, this `DirstateMap` does not have a separate "non normal
985 // Do nothing, this `DirstateMap` does not have a separate "non normal
953 // entries" set that need to be kept up to date
986 // entries" set that need to be kept up to date
954 }
987 }
955
988
956 fn non_normal_or_other_parent_paths(
989 fn non_normal_or_other_parent_paths(
957 &mut self,
990 &mut self,
958 ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
991 ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
959 {
992 {
960 Box::new(self.filter_full_paths(|entry| {
993 Box::new(self.filter_full_paths(|entry| {
961 entry.is_non_normal() || entry.is_from_other_parent()
994 entry.is_non_normal() || entry.is_from_other_parent()
962 }))
995 }))
963 }
996 }
964
997
965 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
998 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
966 // Do nothing, this `DirstateMap` does not have a separate "non normal
999 // Do nothing, this `DirstateMap` does not have a separate "non normal
967 // entries" and "from other parent" sets that need to be recomputed
1000 // entries" and "from other parent" sets that need to be recomputed
968 }
1001 }
969
1002
970 fn iter_non_normal_paths(
1003 fn iter_non_normal_paths(
971 &mut self,
1004 &mut self,
972 ) -> Box<
1005 ) -> Box<
973 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
1006 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
974 > {
1007 > {
975 self.iter_non_normal_paths_panic()
1008 self.iter_non_normal_paths_panic()
976 }
1009 }
977
1010
978 fn iter_non_normal_paths_panic(
1011 fn iter_non_normal_paths_panic(
979 &self,
1012 &self,
980 ) -> Box<
1013 ) -> Box<
981 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
1014 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
982 > {
1015 > {
983 Box::new(self.filter_full_paths(|entry| entry.is_non_normal()))
1016 Box::new(self.filter_full_paths(|entry| entry.is_non_normal()))
984 }
1017 }
985
1018
986 fn iter_other_parent_paths(
1019 fn iter_other_parent_paths(
987 &mut self,
1020 &mut self,
988 ) -> Box<
1021 ) -> Box<
989 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
1022 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
990 > {
1023 > {
991 Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent()))
1024 Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent()))
992 }
1025 }
993
1026
994 fn has_tracked_dir(
1027 fn has_tracked_dir(
995 &mut self,
1028 &mut self,
996 directory: &HgPath,
1029 directory: &HgPath,
997 ) -> Result<bool, DirstateError> {
1030 ) -> Result<bool, DirstateError> {
998 if let Some(node) = self.get_node(directory)? {
1031 if let Some(node) = self.get_node(directory)? {
999 // A node without a `DirstateEntry` was created to hold child
1032 // A node without a `DirstateEntry` was created to hold child
1000 // nodes, and is therefore a directory.
1033 // nodes, and is therefore a directory.
1001 let state = node.state()?;
1034 let state = node.state()?;
1002 Ok(state.is_none() && node.tracked_descendants_count() > 0)
1035 Ok(state.is_none() && node.tracked_descendants_count() > 0)
1003 } else {
1036 } else {
1004 Ok(false)
1037 Ok(false)
1005 }
1038 }
1006 }
1039 }
1007
1040
1008 fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> {
1041 fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> {
1009 if let Some(node) = self.get_node(directory)? {
1042 if let Some(node) = self.get_node(directory)? {
1010 // A node without a `DirstateEntry` was created to hold child
1043 // A node without a `DirstateEntry` was created to hold child
1011 // nodes, and is therefore a directory.
1044 // nodes, and is therefore a directory.
1012 let state = node.state()?;
1045 let state = node.state()?;
1013 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
1046 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
1014 } else {
1047 } else {
1015 Ok(false)
1048 Ok(false)
1016 }
1049 }
1017 }
1050 }
1018
1051
1019 #[timed]
1052 #[timed]
1020 fn pack_v1(
1053 fn pack_v1(
1021 &mut self,
1054 &mut self,
1022 parents: DirstateParents,
1055 parents: DirstateParents,
1023 now: Timestamp,
1056 now: Timestamp,
1024 ) -> Result<Vec<u8>, DirstateError> {
1057 ) -> Result<Vec<u8>, DirstateError> {
1025 let now: i32 = now.0.try_into().expect("time overflow");
1058 let now: i32 = now.0.try_into().expect("time overflow");
1026 let mut ambiguous_mtimes = Vec::new();
1059 let mut ambiguous_mtimes = Vec::new();
1027 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1060 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1028 // reallocations
1061 // reallocations
1029 let mut size = parents.as_bytes().len();
1062 let mut size = parents.as_bytes().len();
1030 for node in self.iter_nodes() {
1063 for node in self.iter_nodes() {
1031 let node = node?;
1064 let node = node?;
1032 if let Some(entry) = node.entry()? {
1065 if let Some(entry) = node.entry()? {
1033 size += packed_entry_size(
1066 size += packed_entry_size(
1034 node.full_path(self.on_disk)?,
1067 node.full_path(self.on_disk)?,
1035 node.copy_source(self.on_disk)?,
1068 node.copy_source(self.on_disk)?,
1036 );
1069 );
1037 if entry.mtime_is_ambiguous(now) {
1070 if entry.mtime_is_ambiguous(now) {
1038 ambiguous_mtimes.push(
1071 ambiguous_mtimes.push(
1039 node.full_path_borrowed(self.on_disk)?
1072 node.full_path_borrowed(self.on_disk)?
1040 .detach_from_tree(),
1073 .detach_from_tree(),
1041 )
1074 )
1042 }
1075 }
1043 }
1076 }
1044 }
1077 }
1045 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
1078 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
1046
1079
1047 let mut packed = Vec::with_capacity(size);
1080 let mut packed = Vec::with_capacity(size);
1048 packed.extend(parents.as_bytes());
1081 packed.extend(parents.as_bytes());
1049
1082
1050 for node in self.iter_nodes() {
1083 for node in self.iter_nodes() {
1051 let node = node?;
1084 let node = node?;
1052 if let Some(entry) = node.entry()? {
1085 if let Some(entry) = node.entry()? {
1053 pack_entry(
1086 pack_entry(
1054 node.full_path(self.on_disk)?,
1087 node.full_path(self.on_disk)?,
1055 &entry,
1088 &entry,
1056 node.copy_source(self.on_disk)?,
1089 node.copy_source(self.on_disk)?,
1057 &mut packed,
1090 &mut packed,
1058 );
1091 );
1059 }
1092 }
1060 }
1093 }
1061 Ok(packed)
1094 Ok(packed)
1062 }
1095 }
1063
1096
1064 /// Returns new data together with whether that data should be appended to
1097 /// Returns new data together with whether that data should be appended to
1065 /// the existing data file whose content is at `self.on_disk` (true),
1098 /// the existing data file whose content is at `self.on_disk` (true),
1066 /// instead of written to a new data file (false).
1099 /// instead of written to a new data file (false).
1067 #[timed]
1100 #[timed]
1068 fn pack_v2(
1101 fn pack_v2(
1069 &mut self,
1102 &mut self,
1070 now: Timestamp,
1103 now: Timestamp,
1071 can_append: bool,
1104 can_append: bool,
1072 ) -> Result<(Vec<u8>, bool), DirstateError> {
1105 ) -> Result<(Vec<u8>, bool), DirstateError> {
1073 // TODO: how do we want to handle this in 2038?
1106 // TODO: how do we want to handle this in 2038?
1074 let now: i32 = now.0.try_into().expect("time overflow");
1107 let now: i32 = now.0.try_into().expect("time overflow");
1075 let mut paths = Vec::new();
1108 let mut paths = Vec::new();
1076 for node in self.iter_nodes() {
1109 for node in self.iter_nodes() {
1077 let node = node?;
1110 let node = node?;
1078 if let Some(entry) = node.entry()? {
1111 if let Some(entry) = node.entry()? {
1079 if entry.mtime_is_ambiguous(now) {
1112 if entry.mtime_is_ambiguous(now) {
1080 paths.push(
1113 paths.push(
1081 node.full_path_borrowed(self.on_disk)?
1114 node.full_path_borrowed(self.on_disk)?
1082 .detach_from_tree(),
1115 .detach_from_tree(),
1083 )
1116 )
1084 }
1117 }
1085 }
1118 }
1086 }
1119 }
1087 // Borrow of `self` ends here since we collect cloned paths
1120 // Borrow of `self` ends here since we collect cloned paths
1088
1121
1089 self.clear_known_ambiguous_mtimes(&paths)?;
1122 self.clear_known_ambiguous_mtimes(&paths)?;
1090
1123
1091 on_disk::write(self, can_append)
1124 on_disk::write(self, can_append)
1092 }
1125 }
1093
1126
1094 fn status<'a>(
1127 fn status<'a>(
1095 &'a mut self,
1128 &'a mut self,
1096 matcher: &'a (dyn Matcher + Sync),
1129 matcher: &'a (dyn Matcher + Sync),
1097 root_dir: PathBuf,
1130 root_dir: PathBuf,
1098 ignore_files: Vec<PathBuf>,
1131 ignore_files: Vec<PathBuf>,
1099 options: StatusOptions,
1132 options: StatusOptions,
1100 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
1133 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
1101 {
1134 {
1102 super::status::status(self, matcher, root_dir, ignore_files, options)
1135 super::status::status(self, matcher, root_dir, ignore_files, options)
1103 }
1136 }
1104
1137
1105 fn copy_map_len(&self) -> usize {
1138 fn copy_map_len(&self) -> usize {
1106 self.nodes_with_copy_source_count as usize
1139 self.nodes_with_copy_source_count as usize
1107 }
1140 }
1108
1141
1109 fn copy_map_iter(&self) -> CopyMapIter<'_> {
1142 fn copy_map_iter(&self) -> CopyMapIter<'_> {
1110 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1143 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1111 Ok(if let Some(source) = node.copy_source(self.on_disk)? {
1144 Ok(if let Some(source) = node.copy_source(self.on_disk)? {
1112 Some((node.full_path(self.on_disk)?, source))
1145 Some((node.full_path(self.on_disk)?, source))
1113 } else {
1146 } else {
1114 None
1147 None
1115 })
1148 })
1116 }))
1149 }))
1117 }
1150 }
1118
1151
1119 fn copy_map_contains_key(
1152 fn copy_map_contains_key(
1120 &self,
1153 &self,
1121 key: &HgPath,
1154 key: &HgPath,
1122 ) -> Result<bool, DirstateV2ParseError> {
1155 ) -> Result<bool, DirstateV2ParseError> {
1123 Ok(if let Some(node) = self.get_node(key)? {
1156 Ok(if let Some(node) = self.get_node(key)? {
1124 node.has_copy_source()
1157 node.has_copy_source()
1125 } else {
1158 } else {
1126 false
1159 false
1127 })
1160 })
1128 }
1161 }
1129
1162
1130 fn copy_map_get(
1163 fn copy_map_get(
1131 &self,
1164 &self,
1132 key: &HgPath,
1165 key: &HgPath,
1133 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1166 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1134 if let Some(node) = self.get_node(key)? {
1167 if let Some(node) = self.get_node(key)? {
1135 if let Some(source) = node.copy_source(self.on_disk)? {
1168 if let Some(source) = node.copy_source(self.on_disk)? {
1136 return Ok(Some(source));
1169 return Ok(Some(source));
1137 }
1170 }
1138 }
1171 }
1139 Ok(None)
1172 Ok(None)
1140 }
1173 }
1141
1174
1142 fn copy_map_remove(
1175 fn copy_map_remove(
1143 &mut self,
1176 &mut self,
1144 key: &HgPath,
1177 key: &HgPath,
1145 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1178 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1146 let count = &mut self.nodes_with_copy_source_count;
1179 let count = &mut self.nodes_with_copy_source_count;
1147 Ok(
1180 let unreachable_bytes = &mut self.unreachable_bytes;
1148 Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
1181 Ok(Self::get_node_mut(
1149 |node| {
1182 self.on_disk,
1150 if node.copy_source.is_some() {
1183 unreachable_bytes,
1151 *count -= 1
1184 &mut self.root,
1152 }
1185 key,
1153 node.copy_source.take().map(Cow::into_owned)
1186 )?
1154 },
1187 .and_then(|node| {
1155 ),
1188 if let Some(source) = &node.copy_source {
1156 )
1189 *count -= 1;
1190 Self::count_dropped_path(unreachable_bytes, source);
1191 }
1192 node.copy_source.take().map(Cow::into_owned)
1193 }))
1157 }
1194 }
1158
1195
1159 fn copy_map_insert(
1196 fn copy_map_insert(
1160 &mut self,
1197 &mut self,
1161 key: HgPathBuf,
1198 key: HgPathBuf,
1162 value: HgPathBuf,
1199 value: HgPathBuf,
1163 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1200 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1164 let node = Self::get_or_insert_node(
1201 let node = Self::get_or_insert_node(
1165 self.on_disk,
1202 self.on_disk,
1203 &mut self.unreachable_bytes,
1166 &mut self.root,
1204 &mut self.root,
1167 &key,
1205 &key,
1168 WithBasename::to_cow_owned,
1206 WithBasename::to_cow_owned,
1169 |_ancestor| {},
1207 |_ancestor| {},
1170 )?;
1208 )?;
1171 if node.copy_source.is_none() {
1209 if node.copy_source.is_none() {
1172 self.nodes_with_copy_source_count += 1
1210 self.nodes_with_copy_source_count += 1
1173 }
1211 }
1174 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1212 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1175 }
1213 }
1176
1214
1177 fn len(&self) -> usize {
1215 fn len(&self) -> usize {
1178 self.nodes_with_entry_count as usize
1216 self.nodes_with_entry_count as usize
1179 }
1217 }
1180
1218
1181 fn contains_key(
1219 fn contains_key(
1182 &self,
1220 &self,
1183 key: &HgPath,
1221 key: &HgPath,
1184 ) -> Result<bool, DirstateV2ParseError> {
1222 ) -> Result<bool, DirstateV2ParseError> {
1185 Ok(self.get(key)?.is_some())
1223 Ok(self.get(key)?.is_some())
1186 }
1224 }
1187
1225
1188 fn get(
1226 fn get(
1189 &self,
1227 &self,
1190 key: &HgPath,
1228 key: &HgPath,
1191 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1229 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1192 Ok(if let Some(node) = self.get_node(key)? {
1230 Ok(if let Some(node) = self.get_node(key)? {
1193 node.entry()?
1231 node.entry()?
1194 } else {
1232 } else {
1195 None
1233 None
1196 })
1234 })
1197 }
1235 }
1198
1236
1199 fn iter(&self) -> StateMapIter<'_> {
1237 fn iter(&self) -> StateMapIter<'_> {
1200 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1238 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1201 Ok(if let Some(entry) = node.entry()? {
1239 Ok(if let Some(entry) = node.entry()? {
1202 Some((node.full_path(self.on_disk)?, entry))
1240 Some((node.full_path(self.on_disk)?, entry))
1203 } else {
1241 } else {
1204 None
1242 None
1205 })
1243 })
1206 }))
1244 }))
1207 }
1245 }
1208
1246
1209 fn iter_directories(
1247 fn iter_directories(
1210 &self,
1248 &self,
1211 ) -> Box<
1249 ) -> Box<
1212 dyn Iterator<
1250 dyn Iterator<
1213 Item = Result<
1251 Item = Result<
1214 (&HgPath, Option<Timestamp>),
1252 (&HgPath, Option<Timestamp>),
1215 DirstateV2ParseError,
1253 DirstateV2ParseError,
1216 >,
1254 >,
1217 > + Send
1255 > + Send
1218 + '_,
1256 + '_,
1219 > {
1257 > {
1220 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1258 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1221 Ok(if node.state()?.is_none() {
1259 Ok(if node.state()?.is_none() {
1222 Some((
1260 Some((
1223 node.full_path(self.on_disk)?,
1261 node.full_path(self.on_disk)?,
1224 node.cached_directory_mtime()
1262 node.cached_directory_mtime()
1225 .map(|mtime| Timestamp(mtime.seconds())),
1263 .map(|mtime| Timestamp(mtime.seconds())),
1226 ))
1264 ))
1227 } else {
1265 } else {
1228 None
1266 None
1229 })
1267 })
1230 }))
1268 }))
1231 }
1269 }
1232 }
1270 }
@@ -1,748 +1,756 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! # File format
3 //! # File format
4 //!
4 //!
5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
7 //! struct, followed by the data file identifier.
7 //! struct, followed by the data file identifier.
8 //!
8 //!
9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
10 //! file may be longer than the size given in the docket, but not shorter. Only
10 //! file may be longer than the size given in the docket, but not shorter. Only
11 //! the start of the data file up to the given size is considered. The
11 //! the start of the data file up to the given size is considered. The
12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
13 //! `Root` struct is found at the end of that slice of data.
13 //! `Root` struct is found at the end of that slice of data.
14 //!
14 //!
15 //! Its `root_nodes` field contains the slice (offset and length) to
15 //! Its `root_nodes` field contains the slice (offset and length) to
16 //! the nodes representing the files and directories at the root of the
16 //! the nodes representing the files and directories at the root of the
17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
18 //! Nodes in turn contain slices to variable-size paths, and to their own child
18 //! Nodes in turn contain slices to variable-size paths, and to their own child
19 //! nodes (if any) for nested files and directories.
19 //! nodes (if any) for nested files and directories.
20
20
21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
22 use crate::dirstate_tree::path_with_basename::WithBasename;
22 use crate::dirstate_tree::path_with_basename::WithBasename;
23 use crate::errors::HgError;
23 use crate::errors::HgError;
24 use crate::utils::hg_path::HgPath;
24 use crate::utils::hg_path::HgPath;
25 use crate::DirstateEntry;
25 use crate::DirstateEntry;
26 use crate::DirstateError;
26 use crate::DirstateError;
27 use crate::DirstateParents;
27 use crate::DirstateParents;
28 use crate::EntryState;
28 use crate::EntryState;
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
30 use bytes_cast::BytesCast;
30 use bytes_cast::BytesCast;
31 use format_bytes::format_bytes;
31 use format_bytes::format_bytes;
32 use std::borrow::Cow;
32 use std::borrow::Cow;
33 use std::convert::{TryFrom, TryInto};
33 use std::convert::{TryFrom, TryInto};
34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
35
35
36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
37 /// This a redundant sanity check more than an actual "magic number" since
37 /// This a redundant sanity check more than an actual "magic number" since
38 /// `.hg/requires` already governs which format should be used.
38 /// `.hg/requires` already governs which format should be used.
39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
40
40
41 /// Keep space for 256-bit hashes
41 /// Keep space for 256-bit hashes
42 const STORED_NODE_ID_BYTES: usize = 32;
42 const STORED_NODE_ID_BYTES: usize = 32;
43
43
44 /// … even though only 160 bits are used for now, with SHA-1
44 /// … even though only 160 bits are used for now, with SHA-1
45 const USED_NODE_ID_BYTES: usize = 20;
45 const USED_NODE_ID_BYTES: usize = 20;
46
46
47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
49
49
50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
51 #[derive(BytesCast)]
51 #[derive(BytesCast)]
52 #[repr(C)]
52 #[repr(C)]
53 struct DocketHeader {
53 struct DocketHeader {
54 marker: [u8; V2_FORMAT_MARKER.len()],
54 marker: [u8; V2_FORMAT_MARKER.len()],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
57
57
58 /// Counted in bytes
58 /// Counted in bytes
59 data_size: Size,
59 data_size: Size,
60
60
61 uuid_size: u8,
61 uuid_size: u8,
62 }
62 }
63
63
64 pub struct Docket<'on_disk> {
64 pub struct Docket<'on_disk> {
65 header: &'on_disk DocketHeader,
65 header: &'on_disk DocketHeader,
66 uuid: &'on_disk [u8],
66 uuid: &'on_disk [u8],
67 }
67 }
68
68
69 #[derive(BytesCast)]
69 #[derive(BytesCast)]
70 #[repr(C)]
70 #[repr(C)]
71 struct Root {
71 struct Root {
72 root_nodes: ChildNodes,
72 root_nodes: ChildNodes,
73 nodes_with_entry_count: Size,
73 nodes_with_entry_count: Size,
74 nodes_with_copy_source_count: Size,
74 nodes_with_copy_source_count: Size,
75
75
76 /// How many bytes of this data file are not used anymore
77 unreachable_bytes: Size,
78
76 /// If non-zero, a hash of ignore files that were used for some previous
79 /// If non-zero, a hash of ignore files that were used for some previous
77 /// run of the `status` algorithm.
80 /// run of the `status` algorithm.
78 ///
81 ///
79 /// We define:
82 /// We define:
80 ///
83 ///
81 /// * "Root" ignore files are `.hgignore` at the root of the repository if
84 /// * "Root" ignore files are `.hgignore` at the root of the repository if
82 /// it exists, and files from `ui.ignore.*` config. This set of files is
85 /// it exists, and files from `ui.ignore.*` config. This set of files is
83 /// then sorted by the string representation of their path.
86 /// then sorted by the string representation of their path.
84 /// * The "expanded contents" of an ignore files is the byte string made
87 /// * The "expanded contents" of an ignore files is the byte string made
85 /// by concatenating its contents with the "expanded contents" of other
88 /// by concatenating its contents with the "expanded contents" of other
86 /// files included with `include:` or `subinclude:` files, in inclusion
89 /// files included with `include:` or `subinclude:` files, in inclusion
87 /// order. This definition is recursive, as included files can
90 /// order. This definition is recursive, as included files can
88 /// themselves include more files.
91 /// themselves include more files.
89 ///
92 ///
90 /// This hash is defined as the SHA-1 of the concatenation (in sorted
93 /// This hash is defined as the SHA-1 of the concatenation (in sorted
91 /// order) of the "expanded contents" of each "root" ignore file.
94 /// order) of the "expanded contents" of each "root" ignore file.
92 /// (Note that computing this does not require actually concatenating byte
95 /// (Note that computing this does not require actually concatenating byte
93 /// strings into contiguous memory, instead SHA-1 hashing can be done
96 /// strings into contiguous memory, instead SHA-1 hashing can be done
94 /// incrementally.)
97 /// incrementally.)
95 ignore_patterns_hash: IgnorePatternsHash,
98 ignore_patterns_hash: IgnorePatternsHash,
96 }
99 }
97
100
98 #[derive(BytesCast)]
101 #[derive(BytesCast)]
99 #[repr(C)]
102 #[repr(C)]
100 pub(super) struct Node {
103 pub(super) struct Node {
101 full_path: PathSlice,
104 full_path: PathSlice,
102
105
103 /// In bytes from `self.full_path.start`
106 /// In bytes from `self.full_path.start`
104 base_name_start: PathSize,
107 base_name_start: PathSize,
105
108
106 copy_source: OptPathSlice,
109 copy_source: OptPathSlice,
107 children: ChildNodes,
110 children: ChildNodes,
108 pub(super) descendants_with_entry_count: Size,
111 pub(super) descendants_with_entry_count: Size,
109 pub(super) tracked_descendants_count: Size,
112 pub(super) tracked_descendants_count: Size,
110
113
111 /// Depending on the value of `state`:
114 /// Depending on the value of `state`:
112 ///
115 ///
113 /// * A null byte: `data` is not used.
116 /// * A null byte: `data` is not used.
114 ///
117 ///
115 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
118 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
116 /// represent a dirstate entry like in the v1 format.
119 /// represent a dirstate entry like in the v1 format.
117 ///
120 ///
118 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
121 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
119 /// as the `Timestamp` for the mtime of a cached directory.
122 /// as the `Timestamp` for the mtime of a cached directory.
120 ///
123 ///
121 /// The presence of this state means that at some point, this path in
124 /// The presence of this state means that at some point, this path in
122 /// the working directory was observed:
125 /// the working directory was observed:
123 ///
126 ///
124 /// - To be a directory
127 /// - To be a directory
125 /// - With the modification time as given by `Timestamp`
128 /// - With the modification time as given by `Timestamp`
126 /// - That timestamp was already strictly in the past when observed,
129 /// - That timestamp was already strictly in the past when observed,
127 /// meaning that later changes cannot happen in the same clock tick
130 /// meaning that later changes cannot happen in the same clock tick
128 /// and must cause a different modification time (unless the system
131 /// and must cause a different modification time (unless the system
129 /// clock jumps back and we get unlucky, which is not impossible but
132 /// clock jumps back and we get unlucky, which is not impossible but
130 /// but deemed unlikely enough).
133 /// but deemed unlikely enough).
131 /// - All direct children of this directory (as returned by
134 /// - All direct children of this directory (as returned by
132 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
135 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
133 /// are ignored by ignore patterns whose hash is in
136 /// are ignored by ignore patterns whose hash is in
134 /// `Root::ignore_patterns_hash`.
137 /// `Root::ignore_patterns_hash`.
135 ///
138 ///
136 /// This means that if `std::fs::symlink_metadata` later reports the
139 /// This means that if `std::fs::symlink_metadata` later reports the
137 /// same modification time and ignored patterns haven’t changed, a run
140 /// same modification time and ignored patterns haven’t changed, a run
138 /// of status that is not listing ignored files can skip calling
141 /// of status that is not listing ignored files can skip calling
139 /// `std::fs::read_dir` again for this directory, iterate child
142 /// `std::fs::read_dir` again for this directory, iterate child
140 /// dirstate nodes instead.
143 /// dirstate nodes instead.
141 state: u8,
144 state: u8,
142 data: Entry,
145 data: Entry,
143 }
146 }
144
147
145 #[derive(BytesCast, Copy, Clone)]
148 #[derive(BytesCast, Copy, Clone)]
146 #[repr(C)]
149 #[repr(C)]
147 struct Entry {
150 struct Entry {
148 mode: I32Be,
151 mode: I32Be,
149 mtime: I32Be,
152 mtime: I32Be,
150 size: I32Be,
153 size: I32Be,
151 }
154 }
152
155
153 /// Duration since the Unix epoch
156 /// Duration since the Unix epoch
154 #[derive(BytesCast, Copy, Clone, PartialEq)]
157 #[derive(BytesCast, Copy, Clone, PartialEq)]
155 #[repr(C)]
158 #[repr(C)]
156 pub(super) struct Timestamp {
159 pub(super) struct Timestamp {
157 seconds: I64Be,
160 seconds: I64Be,
158
161
159 /// In `0 .. 1_000_000_000`.
162 /// In `0 .. 1_000_000_000`.
160 ///
163 ///
161 /// This timestamp is later or earlier than `(seconds, 0)` by this many
164 /// This timestamp is later or earlier than `(seconds, 0)` by this many
162 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
165 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
163 nanoseconds: U32Be,
166 nanoseconds: U32Be,
164 }
167 }
165
168
166 /// Counted in bytes from the start of the file
169 /// Counted in bytes from the start of the file
167 ///
170 ///
168 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
171 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
169 type Offset = U32Be;
172 type Offset = U32Be;
170
173
171 /// Counted in number of items
174 /// Counted in number of items
172 ///
175 ///
173 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
176 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
174 type Size = U32Be;
177 type Size = U32Be;
175
178
176 /// Counted in bytes
179 /// Counted in bytes
177 ///
180 ///
178 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
181 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
179 type PathSize = U16Be;
182 type PathSize = U16Be;
180
183
181 /// A contiguous sequence of `len` times `Node`, representing the child nodes
184 /// A contiguous sequence of `len` times `Node`, representing the child nodes
182 /// of either some other node or of the repository root.
185 /// of either some other node or of the repository root.
183 ///
186 ///
184 /// Always sorted by ascending `full_path`, to allow binary search.
187 /// Always sorted by ascending `full_path`, to allow binary search.
185 /// Since nodes with the same parent nodes also have the same parent path,
188 /// Since nodes with the same parent nodes also have the same parent path,
186 /// only the `base_name`s need to be compared during binary search.
189 /// only the `base_name`s need to be compared during binary search.
187 #[derive(BytesCast, Copy, Clone)]
190 #[derive(BytesCast, Copy, Clone)]
188 #[repr(C)]
191 #[repr(C)]
189 struct ChildNodes {
192 struct ChildNodes {
190 start: Offset,
193 start: Offset,
191 len: Size,
194 len: Size,
192 }
195 }
193
196
194 /// A `HgPath` of `len` bytes
197 /// A `HgPath` of `len` bytes
195 #[derive(BytesCast, Copy, Clone)]
198 #[derive(BytesCast, Copy, Clone)]
196 #[repr(C)]
199 #[repr(C)]
197 struct PathSlice {
200 struct PathSlice {
198 start: Offset,
201 start: Offset,
199 len: PathSize,
202 len: PathSize,
200 }
203 }
201
204
202 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
205 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
203 type OptPathSlice = PathSlice;
206 type OptPathSlice = PathSlice;
204
207
205 /// Make sure that size-affecting changes are made knowingly
208 /// Make sure that size-affecting changes are made knowingly
206 fn _static_assert_size_of() {
209 fn _static_assert_size_of() {
207 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
210 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
208 let _ = std::mem::transmute::<Root, [u8; 36]>;
211 let _ = std::mem::transmute::<Root, [u8; 40]>;
209 let _ = std::mem::transmute::<Node, [u8; 43]>;
212 let _ = std::mem::transmute::<Node, [u8; 43]>;
210 }
213 }
211
214
212 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
215 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
213 ///
216 ///
214 /// This should only happen if Mercurial is buggy or a repository is corrupted.
217 /// This should only happen if Mercurial is buggy or a repository is corrupted.
215 #[derive(Debug)]
218 #[derive(Debug)]
216 pub struct DirstateV2ParseError;
219 pub struct DirstateV2ParseError;
217
220
218 impl From<DirstateV2ParseError> for HgError {
221 impl From<DirstateV2ParseError> for HgError {
219 fn from(_: DirstateV2ParseError) -> Self {
222 fn from(_: DirstateV2ParseError) -> Self {
220 HgError::corrupted("dirstate-v2 parse error")
223 HgError::corrupted("dirstate-v2 parse error")
221 }
224 }
222 }
225 }
223
226
224 impl From<DirstateV2ParseError> for crate::DirstateError {
227 impl From<DirstateV2ParseError> for crate::DirstateError {
225 fn from(error: DirstateV2ParseError) -> Self {
228 fn from(error: DirstateV2ParseError) -> Self {
226 HgError::from(error).into()
229 HgError::from(error).into()
227 }
230 }
228 }
231 }
229
232
230 impl<'on_disk> Docket<'on_disk> {
233 impl<'on_disk> Docket<'on_disk> {
231 pub fn parents(&self) -> DirstateParents {
234 pub fn parents(&self) -> DirstateParents {
232 use crate::Node;
235 use crate::Node;
233 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
236 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
234 .unwrap()
237 .unwrap()
235 .clone();
238 .clone();
236 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
239 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
237 .unwrap()
240 .unwrap()
238 .clone();
241 .clone();
239 DirstateParents { p1, p2 }
242 DirstateParents { p1, p2 }
240 }
243 }
241
244
242 pub fn data_size(&self) -> usize {
245 pub fn data_size(&self) -> usize {
243 // This `unwrap` could only panic on a 16-bit CPU
246 // This `unwrap` could only panic on a 16-bit CPU
244 self.header.data_size.get().try_into().unwrap()
247 self.header.data_size.get().try_into().unwrap()
245 }
248 }
246
249
247 pub fn data_filename(&self) -> String {
250 pub fn data_filename(&self) -> String {
248 String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
251 String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
249 }
252 }
250 }
253 }
251
254
252 pub fn read_docket(
255 pub fn read_docket(
253 on_disk: &[u8],
256 on_disk: &[u8],
254 ) -> Result<Docket<'_>, DirstateV2ParseError> {
257 ) -> Result<Docket<'_>, DirstateV2ParseError> {
255 let (header, uuid) =
258 let (header, uuid) =
256 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
259 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
257 let uuid_size = header.uuid_size as usize;
260 let uuid_size = header.uuid_size as usize;
258 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
261 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
259 Ok(Docket { header, uuid })
262 Ok(Docket { header, uuid })
260 } else {
263 } else {
261 Err(DirstateV2ParseError)
264 Err(DirstateV2ParseError)
262 }
265 }
263 }
266 }
264
267
265 fn read_root<'on_disk>(
268 fn read_root<'on_disk>(
266 on_disk: &'on_disk [u8],
269 on_disk: &'on_disk [u8],
267 ) -> Result<&'on_disk Root, DirstateV2ParseError> {
270 ) -> Result<&'on_disk Root, DirstateV2ParseError> {
268 // Find the `Root` at the end of the given slice
271 // Find the `Root` at the end of the given slice
269 let root_offset = on_disk
272 let root_offset = on_disk
270 .len()
273 .len()
271 .checked_sub(std::mem::size_of::<Root>())
274 .checked_sub(std::mem::size_of::<Root>())
272 // A non-empty slice too short is an error
275 // A non-empty slice too short is an error
273 .ok_or(DirstateV2ParseError)?;
276 .ok_or(DirstateV2ParseError)?;
274 let (root, _) = Root::from_bytes(&on_disk[root_offset..])
277 let (root, _) = Root::from_bytes(&on_disk[root_offset..])
275 .map_err(|_| DirstateV2ParseError)?;
278 .map_err(|_| DirstateV2ParseError)?;
276 Ok(root)
279 Ok(root)
277 }
280 }
278
281
279 pub(super) fn read<'on_disk>(
282 pub(super) fn read<'on_disk>(
280 on_disk: &'on_disk [u8],
283 on_disk: &'on_disk [u8],
281 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
284 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
282 if on_disk.is_empty() {
285 if on_disk.is_empty() {
283 return Ok(DirstateMap::empty(on_disk));
286 return Ok(DirstateMap::empty(on_disk));
284 }
287 }
285 let root = read_root(on_disk)?;
288 let root = read_root(on_disk)?;
289 let mut unreachable_bytes = root.unreachable_bytes.get();
290 // Each append writes a new `Root`, so it’s never reused
291 unreachable_bytes += std::mem::size_of::<Root>() as u32;
286 let dirstate_map = DirstateMap {
292 let dirstate_map = DirstateMap {
287 on_disk,
293 on_disk,
288 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
294 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
289 on_disk,
295 on_disk,
290 root.root_nodes,
296 root.root_nodes,
291 )?),
297 )?),
292 nodes_with_entry_count: root.nodes_with_entry_count.get(),
298 nodes_with_entry_count: root.nodes_with_entry_count.get(),
293 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
299 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
294 ignore_patterns_hash: root.ignore_patterns_hash,
300 ignore_patterns_hash: root.ignore_patterns_hash,
301 unreachable_bytes,
295 };
302 };
296 Ok(dirstate_map)
303 Ok(dirstate_map)
297 }
304 }
298
305
299 impl Node {
306 impl Node {
300 pub(super) fn full_path<'on_disk>(
307 pub(super) fn full_path<'on_disk>(
301 &self,
308 &self,
302 on_disk: &'on_disk [u8],
309 on_disk: &'on_disk [u8],
303 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
310 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
304 read_hg_path(on_disk, self.full_path)
311 read_hg_path(on_disk, self.full_path)
305 }
312 }
306
313
307 pub(super) fn base_name_start<'on_disk>(
314 pub(super) fn base_name_start<'on_disk>(
308 &self,
315 &self,
309 ) -> Result<usize, DirstateV2ParseError> {
316 ) -> Result<usize, DirstateV2ParseError> {
310 let start = self.base_name_start.get();
317 let start = self.base_name_start.get();
311 if start < self.full_path.len.get() {
318 if start < self.full_path.len.get() {
312 let start = usize::try_from(start)
319 let start = usize::try_from(start)
313 // u32 -> usize, could only panic on a 16-bit CPU
320 // u32 -> usize, could only panic on a 16-bit CPU
314 .expect("dirstate-v2 base_name_start out of bounds");
321 .expect("dirstate-v2 base_name_start out of bounds");
315 Ok(start)
322 Ok(start)
316 } else {
323 } else {
317 Err(DirstateV2ParseError)
324 Err(DirstateV2ParseError)
318 }
325 }
319 }
326 }
320
327
321 pub(super) fn base_name<'on_disk>(
328 pub(super) fn base_name<'on_disk>(
322 &self,
329 &self,
323 on_disk: &'on_disk [u8],
330 on_disk: &'on_disk [u8],
324 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
331 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
325 let full_path = self.full_path(on_disk)?;
332 let full_path = self.full_path(on_disk)?;
326 let base_name_start = self.base_name_start()?;
333 let base_name_start = self.base_name_start()?;
327 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
334 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
328 }
335 }
329
336
330 pub(super) fn path<'on_disk>(
337 pub(super) fn path<'on_disk>(
331 &self,
338 &self,
332 on_disk: &'on_disk [u8],
339 on_disk: &'on_disk [u8],
333 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
340 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
334 Ok(WithBasename::from_raw_parts(
341 Ok(WithBasename::from_raw_parts(
335 Cow::Borrowed(self.full_path(on_disk)?),
342 Cow::Borrowed(self.full_path(on_disk)?),
336 self.base_name_start()?,
343 self.base_name_start()?,
337 ))
344 ))
338 }
345 }
339
346
340 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
347 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
341 self.copy_source.start.get() != 0
348 self.copy_source.start.get() != 0
342 }
349 }
343
350
344 pub(super) fn copy_source<'on_disk>(
351 pub(super) fn copy_source<'on_disk>(
345 &self,
352 &self,
346 on_disk: &'on_disk [u8],
353 on_disk: &'on_disk [u8],
347 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
354 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
348 Ok(if self.has_copy_source() {
355 Ok(if self.has_copy_source() {
349 Some(read_hg_path(on_disk, self.copy_source)?)
356 Some(read_hg_path(on_disk, self.copy_source)?)
350 } else {
357 } else {
351 None
358 None
352 })
359 })
353 }
360 }
354
361
355 pub(super) fn node_data(
362 pub(super) fn node_data(
356 &self,
363 &self,
357 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
364 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
358 let entry = |state| {
365 let entry = |state| {
359 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
366 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
360 };
367 };
361
368
362 match self.state {
369 match self.state {
363 b'\0' => Ok(dirstate_map::NodeData::None),
370 b'\0' => Ok(dirstate_map::NodeData::None),
364 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
371 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
365 mtime: *self.data.as_timestamp(),
372 mtime: *self.data.as_timestamp(),
366 }),
373 }),
367 b'n' => Ok(entry(EntryState::Normal)),
374 b'n' => Ok(entry(EntryState::Normal)),
368 b'a' => Ok(entry(EntryState::Added)),
375 b'a' => Ok(entry(EntryState::Added)),
369 b'r' => Ok(entry(EntryState::Removed)),
376 b'r' => Ok(entry(EntryState::Removed)),
370 b'm' => Ok(entry(EntryState::Merged)),
377 b'm' => Ok(entry(EntryState::Merged)),
371 _ => Err(DirstateV2ParseError),
378 _ => Err(DirstateV2ParseError),
372 }
379 }
373 }
380 }
374
381
375 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
382 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
376 if self.state == b'd' {
383 if self.state == b'd' {
377 Some(self.data.as_timestamp())
384 Some(self.data.as_timestamp())
378 } else {
385 } else {
379 None
386 None
380 }
387 }
381 }
388 }
382
389
383 pub(super) fn state(
390 pub(super) fn state(
384 &self,
391 &self,
385 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
392 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
386 match self.state {
393 match self.state {
387 b'\0' | b'd' => Ok(None),
394 b'\0' | b'd' => Ok(None),
388 b'n' => Ok(Some(EntryState::Normal)),
395 b'n' => Ok(Some(EntryState::Normal)),
389 b'a' => Ok(Some(EntryState::Added)),
396 b'a' => Ok(Some(EntryState::Added)),
390 b'r' => Ok(Some(EntryState::Removed)),
397 b'r' => Ok(Some(EntryState::Removed)),
391 b'm' => Ok(Some(EntryState::Merged)),
398 b'm' => Ok(Some(EntryState::Merged)),
392 _ => Err(DirstateV2ParseError),
399 _ => Err(DirstateV2ParseError),
393 }
400 }
394 }
401 }
395
402
396 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
403 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
397 DirstateEntry {
404 DirstateEntry {
398 state,
405 state,
399 mode: self.data.mode.get(),
406 mode: self.data.mode.get(),
400 mtime: self.data.mtime.get(),
407 mtime: self.data.mtime.get(),
401 size: self.data.size.get(),
408 size: self.data.size.get(),
402 }
409 }
403 }
410 }
404
411
405 pub(super) fn entry(
412 pub(super) fn entry(
406 &self,
413 &self,
407 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
414 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
408 Ok(self
415 Ok(self
409 .state()?
416 .state()?
410 .map(|state| self.entry_with_given_state(state)))
417 .map(|state| self.entry_with_given_state(state)))
411 }
418 }
412
419
413 pub(super) fn children<'on_disk>(
420 pub(super) fn children<'on_disk>(
414 &self,
421 &self,
415 on_disk: &'on_disk [u8],
422 on_disk: &'on_disk [u8],
416 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
423 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
417 read_nodes(on_disk, self.children)
424 read_nodes(on_disk, self.children)
418 }
425 }
419
426
420 pub(super) fn to_in_memory_node<'on_disk>(
427 pub(super) fn to_in_memory_node<'on_disk>(
421 &self,
428 &self,
422 on_disk: &'on_disk [u8],
429 on_disk: &'on_disk [u8],
423 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
430 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
424 Ok(dirstate_map::Node {
431 Ok(dirstate_map::Node {
425 children: dirstate_map::ChildNodes::OnDisk(
432 children: dirstate_map::ChildNodes::OnDisk(
426 self.children(on_disk)?,
433 self.children(on_disk)?,
427 ),
434 ),
428 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
435 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
429 data: self.node_data()?,
436 data: self.node_data()?,
430 descendants_with_entry_count: self
437 descendants_with_entry_count: self
431 .descendants_with_entry_count
438 .descendants_with_entry_count
432 .get(),
439 .get(),
433 tracked_descendants_count: self.tracked_descendants_count.get(),
440 tracked_descendants_count: self.tracked_descendants_count.get(),
434 })
441 })
435 }
442 }
436 }
443 }
437
444
438 impl Entry {
445 impl Entry {
439 fn from_timestamp(timestamp: Timestamp) -> Self {
446 fn from_timestamp(timestamp: Timestamp) -> Self {
440 // Safety: both types implement the `ByteCast` trait, so we could
447 // Safety: both types implement the `ByteCast` trait, so we could
441 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
448 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
442 // `transmute` instead makes the compiler check that the two types
449 // `transmute` instead makes the compiler check that the two types
443 // have the same size, which eliminates the error case of
450 // have the same size, which eliminates the error case of
444 // `from_bytes`.
451 // `from_bytes`.
445 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
452 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
446 }
453 }
447
454
448 fn as_timestamp(&self) -> &Timestamp {
455 fn as_timestamp(&self) -> &Timestamp {
449 // Safety: same as above in `from_timestamp`
456 // Safety: same as above in `from_timestamp`
450 unsafe { &*(self as *const Entry as *const Timestamp) }
457 unsafe { &*(self as *const Entry as *const Timestamp) }
451 }
458 }
452 }
459 }
453
460
454 impl Timestamp {
461 impl Timestamp {
455 pub fn seconds(&self) -> i64 {
462 pub fn seconds(&self) -> i64 {
456 self.seconds.get()
463 self.seconds.get()
457 }
464 }
458 }
465 }
459
466
460 impl From<SystemTime> for Timestamp {
467 impl From<SystemTime> for Timestamp {
461 fn from(system_time: SystemTime) -> Self {
468 fn from(system_time: SystemTime) -> Self {
462 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
469 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
463 Ok(duration) => {
470 Ok(duration) => {
464 (duration.as_secs() as i64, duration.subsec_nanos())
471 (duration.as_secs() as i64, duration.subsec_nanos())
465 }
472 }
466 Err(error) => {
473 Err(error) => {
467 let negative = error.duration();
474 let negative = error.duration();
468 (-(negative.as_secs() as i64), negative.subsec_nanos())
475 (-(negative.as_secs() as i64), negative.subsec_nanos())
469 }
476 }
470 };
477 };
471 Timestamp {
478 Timestamp {
472 seconds: secs.into(),
479 seconds: secs.into(),
473 nanoseconds: nanos.into(),
480 nanoseconds: nanos.into(),
474 }
481 }
475 }
482 }
476 }
483 }
477
484
478 impl From<&'_ Timestamp> for SystemTime {
485 impl From<&'_ Timestamp> for SystemTime {
479 fn from(timestamp: &'_ Timestamp) -> Self {
486 fn from(timestamp: &'_ Timestamp) -> Self {
480 let secs = timestamp.seconds.get();
487 let secs = timestamp.seconds.get();
481 let nanos = timestamp.nanoseconds.get();
488 let nanos = timestamp.nanoseconds.get();
482 if secs >= 0 {
489 if secs >= 0 {
483 UNIX_EPOCH + Duration::new(secs as u64, nanos)
490 UNIX_EPOCH + Duration::new(secs as u64, nanos)
484 } else {
491 } else {
485 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
492 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
486 }
493 }
487 }
494 }
488 }
495 }
489
496
490 fn read_hg_path(
497 fn read_hg_path(
491 on_disk: &[u8],
498 on_disk: &[u8],
492 slice: PathSlice,
499 slice: PathSlice,
493 ) -> Result<&HgPath, DirstateV2ParseError> {
500 ) -> Result<&HgPath, DirstateV2ParseError> {
494 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
501 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
495 }
502 }
496
503
497 fn read_nodes(
504 fn read_nodes(
498 on_disk: &[u8],
505 on_disk: &[u8],
499 slice: ChildNodes,
506 slice: ChildNodes,
500 ) -> Result<&[Node], DirstateV2ParseError> {
507 ) -> Result<&[Node], DirstateV2ParseError> {
501 read_slice(on_disk, slice.start, slice.len.get())
508 read_slice(on_disk, slice.start, slice.len.get())
502 }
509 }
503
510
504 fn read_slice<T, Len>(
511 fn read_slice<T, Len>(
505 on_disk: &[u8],
512 on_disk: &[u8],
506 start: Offset,
513 start: Offset,
507 len: Len,
514 len: Len,
508 ) -> Result<&[T], DirstateV2ParseError>
515 ) -> Result<&[T], DirstateV2ParseError>
509 where
516 where
510 T: BytesCast,
517 T: BytesCast,
511 Len: TryInto<usize>,
518 Len: TryInto<usize>,
512 {
519 {
513 // Either `usize::MAX` would result in "out of bounds" error since a single
520 // Either `usize::MAX` would result in "out of bounds" error since a single
514 // `&[u8]` cannot occupy the entire addess space.
521 // `&[u8]` cannot occupy the entire addess space.
515 let start = start.get().try_into().unwrap_or(std::usize::MAX);
522 let start = start.get().try_into().unwrap_or(std::usize::MAX);
516 let len = len.try_into().unwrap_or(std::usize::MAX);
523 let len = len.try_into().unwrap_or(std::usize::MAX);
517 on_disk
524 on_disk
518 .get(start..)
525 .get(start..)
519 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
526 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
520 .map(|(slice, _rest)| slice)
527 .map(|(slice, _rest)| slice)
521 .ok_or_else(|| DirstateV2ParseError)
528 .ok_or_else(|| DirstateV2ParseError)
522 }
529 }
523
530
524 pub(crate) fn for_each_tracked_path<'on_disk>(
531 pub(crate) fn for_each_tracked_path<'on_disk>(
525 on_disk: &'on_disk [u8],
532 on_disk: &'on_disk [u8],
526 mut f: impl FnMut(&'on_disk HgPath),
533 mut f: impl FnMut(&'on_disk HgPath),
527 ) -> Result<(), DirstateV2ParseError> {
534 ) -> Result<(), DirstateV2ParseError> {
528 let root = read_root(on_disk)?;
535 let root = read_root(on_disk)?;
529 fn recur<'on_disk>(
536 fn recur<'on_disk>(
530 on_disk: &'on_disk [u8],
537 on_disk: &'on_disk [u8],
531 nodes: ChildNodes,
538 nodes: ChildNodes,
532 f: &mut impl FnMut(&'on_disk HgPath),
539 f: &mut impl FnMut(&'on_disk HgPath),
533 ) -> Result<(), DirstateV2ParseError> {
540 ) -> Result<(), DirstateV2ParseError> {
534 for node in read_nodes(on_disk, nodes)? {
541 for node in read_nodes(on_disk, nodes)? {
535 if let Some(state) = node.state()? {
542 if let Some(state) = node.state()? {
536 if state.is_tracked() {
543 if state.is_tracked() {
537 f(node.full_path(on_disk)?)
544 f(node.full_path(on_disk)?)
538 }
545 }
539 }
546 }
540 recur(on_disk, node.children, f)?
547 recur(on_disk, node.children, f)?
541 }
548 }
542 Ok(())
549 Ok(())
543 }
550 }
544 recur(on_disk, root.root_nodes, &mut f)
551 recur(on_disk, root.root_nodes, &mut f)
545 }
552 }
546
553
547 /// Returns new data together with whether that data should be appended to the
554 /// Returns new data together with whether that data should be appended to the
548 /// existing data file whose content is at `dirstate_map.on_disk` (true),
555 /// existing data file whose content is at `dirstate_map.on_disk` (true),
549 /// instead of written to a new data file (false).
556 /// instead of written to a new data file (false).
550 pub(super) fn write(
557 pub(super) fn write(
551 dirstate_map: &mut DirstateMap,
558 dirstate_map: &mut DirstateMap,
552 can_append: bool,
559 can_append: bool,
553 ) -> Result<(Vec<u8>, bool), DirstateError> {
560 ) -> Result<(Vec<u8>, bool), DirstateError> {
554 let append = can_append && dirstate_map.write_should_append();
561 let append = can_append && dirstate_map.write_should_append();
555
562
556 // This ignores the space for paths, and for nodes without an entry.
563 // This ignores the space for paths, and for nodes without an entry.
557 // TODO: better estimate? Skip the `Vec` and write to a file directly?
564 // TODO: better estimate? Skip the `Vec` and write to a file directly?
558 let size_guess = std::mem::size_of::<Root>()
565 let size_guess = std::mem::size_of::<Root>()
559 + std::mem::size_of::<Node>()
566 + std::mem::size_of::<Node>()
560 * dirstate_map.nodes_with_entry_count as usize;
567 * dirstate_map.nodes_with_entry_count as usize;
561
568
562 let mut writer = Writer {
569 let mut writer = Writer {
563 dirstate_map,
570 dirstate_map,
564 append,
571 append,
565 out: Vec::with_capacity(size_guess),
572 out: Vec::with_capacity(size_guess),
566 };
573 };
567
574
568 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
575 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
569
576
570 let root = Root {
577 let root = Root {
571 root_nodes,
578 root_nodes,
572 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
579 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
573 nodes_with_copy_source_count: dirstate_map
580 nodes_with_copy_source_count: dirstate_map
574 .nodes_with_copy_source_count
581 .nodes_with_copy_source_count
575 .into(),
582 .into(),
583 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
576 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
584 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
577 };
585 };
578 writer.out.extend(root.as_bytes());
586 writer.out.extend(root.as_bytes());
579 Ok((writer.out, append))
587 Ok((writer.out, append))
580 }
588 }
581
589
582 struct Writer<'dmap, 'on_disk> {
590 struct Writer<'dmap, 'on_disk> {
583 dirstate_map: &'dmap DirstateMap<'on_disk>,
591 dirstate_map: &'dmap DirstateMap<'on_disk>,
584 append: bool,
592 append: bool,
585 out: Vec<u8>,
593 out: Vec<u8>,
586 }
594 }
587
595
588 impl Writer<'_, '_> {
596 impl Writer<'_, '_> {
589 fn write_nodes(
597 fn write_nodes(
590 &mut self,
598 &mut self,
591 nodes: dirstate_map::ChildNodesRef,
599 nodes: dirstate_map::ChildNodesRef,
592 ) -> Result<ChildNodes, DirstateError> {
600 ) -> Result<ChildNodes, DirstateError> {
593 // Reuse already-written nodes if possible
601 // Reuse already-written nodes if possible
594 if self.append {
602 if self.append {
595 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
603 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
596 let start = self.on_disk_offset_of(nodes_slice).expect(
604 let start = self.on_disk_offset_of(nodes_slice).expect(
597 "dirstate-v2 OnDisk nodes not found within on_disk",
605 "dirstate-v2 OnDisk nodes not found within on_disk",
598 );
606 );
599 let len = child_nodes_len_from_usize(nodes_slice.len());
607 let len = child_nodes_len_from_usize(nodes_slice.len());
600 return Ok(ChildNodes { start, len });
608 return Ok(ChildNodes { start, len });
601 }
609 }
602 }
610 }
603
611
604 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
612 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
605 // undefined iteration order. Sort to enable binary search in the
613 // undefined iteration order. Sort to enable binary search in the
606 // written file.
614 // written file.
607 let nodes = nodes.sorted();
615 let nodes = nodes.sorted();
608 let nodes_len = nodes.len();
616 let nodes_len = nodes.len();
609
617
610 // First accumulate serialized nodes in a `Vec`
618 // First accumulate serialized nodes in a `Vec`
611 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
619 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
612 for node in nodes {
620 for node in nodes {
613 let children =
621 let children =
614 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
622 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
615 let full_path = node.full_path(self.dirstate_map.on_disk)?;
623 let full_path = node.full_path(self.dirstate_map.on_disk)?;
616 let full_path = self.write_path(full_path.as_bytes());
624 let full_path = self.write_path(full_path.as_bytes());
617 let copy_source = if let Some(source) =
625 let copy_source = if let Some(source) =
618 node.copy_source(self.dirstate_map.on_disk)?
626 node.copy_source(self.dirstate_map.on_disk)?
619 {
627 {
620 self.write_path(source.as_bytes())
628 self.write_path(source.as_bytes())
621 } else {
629 } else {
622 PathSlice {
630 PathSlice {
623 start: 0.into(),
631 start: 0.into(),
624 len: 0.into(),
632 len: 0.into(),
625 }
633 }
626 };
634 };
627 on_disk_nodes.push(match node {
635 on_disk_nodes.push(match node {
628 NodeRef::InMemory(path, node) => {
636 NodeRef::InMemory(path, node) => {
629 let (state, data) = match &node.data {
637 let (state, data) = match &node.data {
630 dirstate_map::NodeData::Entry(entry) => (
638 dirstate_map::NodeData::Entry(entry) => (
631 entry.state.into(),
639 entry.state.into(),
632 Entry {
640 Entry {
633 mode: entry.mode.into(),
641 mode: entry.mode.into(),
634 mtime: entry.mtime.into(),
642 mtime: entry.mtime.into(),
635 size: entry.size.into(),
643 size: entry.size.into(),
636 },
644 },
637 ),
645 ),
638 dirstate_map::NodeData::CachedDirectory { mtime } => {
646 dirstate_map::NodeData::CachedDirectory { mtime } => {
639 (b'd', Entry::from_timestamp(*mtime))
647 (b'd', Entry::from_timestamp(*mtime))
640 }
648 }
641 dirstate_map::NodeData::None => (
649 dirstate_map::NodeData::None => (
642 b'\0',
650 b'\0',
643 Entry {
651 Entry {
644 mode: 0.into(),
652 mode: 0.into(),
645 mtime: 0.into(),
653 mtime: 0.into(),
646 size: 0.into(),
654 size: 0.into(),
647 },
655 },
648 ),
656 ),
649 };
657 };
650 Node {
658 Node {
651 children,
659 children,
652 copy_source,
660 copy_source,
653 full_path,
661 full_path,
654 base_name_start: u16::try_from(path.base_name_start())
662 base_name_start: u16::try_from(path.base_name_start())
655 // Could only panic for paths over 64 KiB
663 // Could only panic for paths over 64 KiB
656 .expect("dirstate-v2 path length overflow")
664 .expect("dirstate-v2 path length overflow")
657 .into(),
665 .into(),
658 descendants_with_entry_count: node
666 descendants_with_entry_count: node
659 .descendants_with_entry_count
667 .descendants_with_entry_count
660 .into(),
668 .into(),
661 tracked_descendants_count: node
669 tracked_descendants_count: node
662 .tracked_descendants_count
670 .tracked_descendants_count
663 .into(),
671 .into(),
664 state,
672 state,
665 data,
673 data,
666 }
674 }
667 }
675 }
668 NodeRef::OnDisk(node) => Node {
676 NodeRef::OnDisk(node) => Node {
669 children,
677 children,
670 copy_source,
678 copy_source,
671 full_path,
679 full_path,
672 ..*node
680 ..*node
673 },
681 },
674 })
682 })
675 }
683 }
676 // … so we can write them contiguously, after writing everything else
684 // … so we can write them contiguously, after writing everything else
677 // they refer to.
685 // they refer to.
678 let start = self.current_offset();
686 let start = self.current_offset();
679 let len = child_nodes_len_from_usize(nodes_len);
687 let len = child_nodes_len_from_usize(nodes_len);
680 self.out.extend(on_disk_nodes.as_bytes());
688 self.out.extend(on_disk_nodes.as_bytes());
681 Ok(ChildNodes { start, len })
689 Ok(ChildNodes { start, len })
682 }
690 }
683
691
684 /// If the given slice of items is within `on_disk`, returns its offset
692 /// If the given slice of items is within `on_disk`, returns its offset
685 /// from the start of `on_disk`.
693 /// from the start of `on_disk`.
686 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
694 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
687 where
695 where
688 T: BytesCast,
696 T: BytesCast,
689 {
697 {
690 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
698 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
691 let start = slice.as_ptr() as usize;
699 let start = slice.as_ptr() as usize;
692 let end = start + slice.len();
700 let end = start + slice.len();
693 start..=end
701 start..=end
694 }
702 }
695 let slice_addresses = address_range(slice.as_bytes());
703 let slice_addresses = address_range(slice.as_bytes());
696 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
704 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
697 if on_disk_addresses.contains(slice_addresses.start())
705 if on_disk_addresses.contains(slice_addresses.start())
698 && on_disk_addresses.contains(slice_addresses.end())
706 && on_disk_addresses.contains(slice_addresses.end())
699 {
707 {
700 let offset = slice_addresses.start() - on_disk_addresses.start();
708 let offset = slice_addresses.start() - on_disk_addresses.start();
701 Some(offset_from_usize(offset))
709 Some(offset_from_usize(offset))
702 } else {
710 } else {
703 None
711 None
704 }
712 }
705 }
713 }
706
714
707 fn current_offset(&mut self) -> Offset {
715 fn current_offset(&mut self) -> Offset {
708 let mut offset = self.out.len();
716 let mut offset = self.out.len();
709 if self.append {
717 if self.append {
710 offset += self.dirstate_map.on_disk.len()
718 offset += self.dirstate_map.on_disk.len()
711 }
719 }
712 offset_from_usize(offset)
720 offset_from_usize(offset)
713 }
721 }
714
722
715 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
723 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
716 let len = path_len_from_usize(slice.len());
724 let len = path_len_from_usize(slice.len());
717 // Reuse an already-written path if possible
725 // Reuse an already-written path if possible
718 if self.append {
726 if self.append {
719 if let Some(start) = self.on_disk_offset_of(slice) {
727 if let Some(start) = self.on_disk_offset_of(slice) {
720 return PathSlice { start, len };
728 return PathSlice { start, len };
721 }
729 }
722 }
730 }
723 let start = self.current_offset();
731 let start = self.current_offset();
724 self.out.extend(slice.as_bytes());
732 self.out.extend(slice.as_bytes());
725 PathSlice { start, len }
733 PathSlice { start, len }
726 }
734 }
727 }
735 }
728
736
729 fn offset_from_usize(x: usize) -> Offset {
737 fn offset_from_usize(x: usize) -> Offset {
730 u32::try_from(x)
738 u32::try_from(x)
731 // Could only panic for a dirstate file larger than 4 GiB
739 // Could only panic for a dirstate file larger than 4 GiB
732 .expect("dirstate-v2 offset overflow")
740 .expect("dirstate-v2 offset overflow")
733 .into()
741 .into()
734 }
742 }
735
743
736 fn child_nodes_len_from_usize(x: usize) -> Size {
744 fn child_nodes_len_from_usize(x: usize) -> Size {
737 u32::try_from(x)
745 u32::try_from(x)
738 // Could only panic with over 4 billion nodes
746 // Could only panic with over 4 billion nodes
739 .expect("dirstate-v2 slice length overflow")
747 .expect("dirstate-v2 slice length overflow")
740 .into()
748 .into()
741 }
749 }
742
750
743 fn path_len_from_usize(x: usize) -> PathSize {
751 fn path_len_from_usize(x: usize) -> PathSize {
744 u16::try_from(x)
752 u16::try_from(x)
745 // Could only panic for paths over 64 KiB
753 // Could only panic for paths over 64 KiB
746 .expect("dirstate-v2 path length overflow")
754 .expect("dirstate-v2 path length overflow")
747 .into()
755 .into()
748 }
756 }
General Comments 0
You need to be logged in to leave comments. Login now