##// END OF EJS Templates
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
Simon Sapin -
r48128:0654b3b3 default
parent child Browse files
Show More
@@ -1,932 +1,995 b''
1 use bytes_cast::BytesCast;
1 use bytes_cast::BytesCast;
2 use micro_timer::timed;
2 use micro_timer::timed;
3 use std::borrow::Cow;
3 use std::borrow::Cow;
4 use std::convert::TryInto;
4 use std::convert::TryInto;
5 use std::path::PathBuf;
5 use std::path::PathBuf;
6
6
7 use super::on_disk;
7 use super::on_disk;
8 use super::on_disk::DirstateV2ParseError;
8 use super::on_disk::DirstateV2ParseError;
9 use super::path_with_basename::WithBasename;
9 use super::path_with_basename::WithBasename;
10 use crate::dirstate::parsers::pack_entry;
10 use crate::dirstate::parsers::pack_entry;
11 use crate::dirstate::parsers::packed_entry_size;
11 use crate::dirstate::parsers::packed_entry_size;
12 use crate::dirstate::parsers::parse_dirstate_entries;
12 use crate::dirstate::parsers::parse_dirstate_entries;
13 use crate::dirstate::parsers::Timestamp;
13 use crate::dirstate::parsers::Timestamp;
14 use crate::matchers::Matcher;
14 use crate::matchers::Matcher;
15 use crate::utils::hg_path::{HgPath, HgPathBuf};
15 use crate::utils::hg_path::{HgPath, HgPathBuf};
16 use crate::CopyMapIter;
16 use crate::CopyMapIter;
17 use crate::DirstateEntry;
17 use crate::DirstateEntry;
18 use crate::DirstateError;
18 use crate::DirstateError;
19 use crate::DirstateParents;
19 use crate::DirstateParents;
20 use crate::DirstateStatus;
20 use crate::DirstateStatus;
21 use crate::EntryState;
21 use crate::EntryState;
22 use crate::FastHashMap;
22 use crate::FastHashMap;
23 use crate::PatternFileWarning;
23 use crate::PatternFileWarning;
24 use crate::StateMapIter;
24 use crate::StateMapIter;
25 use crate::StatusError;
25 use crate::StatusError;
26 use crate::StatusOptions;
26 use crate::StatusOptions;
27
27
28 pub struct DirstateMap<'on_disk> {
28 pub struct DirstateMap<'on_disk> {
29 /// Contents of the `.hg/dirstate` file
29 /// Contents of the `.hg/dirstate` file
30 pub(super) on_disk: &'on_disk [u8],
30 pub(super) on_disk: &'on_disk [u8],
31
31
32 pub(super) root: ChildNodes<'on_disk>,
32 pub(super) root: ChildNodes<'on_disk>,
33
33
34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
35 pub(super) nodes_with_entry_count: u32,
35 pub(super) nodes_with_entry_count: u32,
36
36
37 /// Number of nodes anywhere in the tree that have
37 /// Number of nodes anywhere in the tree that have
38 /// `.copy_source.is_some()`.
38 /// `.copy_source.is_some()`.
39 pub(super) nodes_with_copy_source_count: u32,
39 pub(super) nodes_with_copy_source_count: u32,
40 }
40 }
41
41
42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
43 /// map key would also work: all paths in a given map have the same parent
43 /// map key would also work: all paths in a given map have the same parent
44 /// path, so comparing full paths gives the same result as comparing base
44 /// path, so comparing full paths gives the same result as comparing base
45 /// names. However `HashMap` would waste time always re-hashing the same
45 /// names. However `HashMap` would waste time always re-hashing the same
46 /// string prefix.
46 /// string prefix.
47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
48
48
49 pub(super) enum ChildNodes<'on_disk> {
49 pub(super) enum ChildNodes<'on_disk> {
50 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
50 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
51 OnDisk(&'on_disk [on_disk::Node]),
51 }
52 }
52
53
53 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
54 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
54 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
55 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
56 OnDisk(&'on_disk [on_disk::Node]),
55 }
57 }
56
58
57 pub(super) enum NodeRef<'tree, 'on_disk> {
59 pub(super) enum NodeRef<'tree, 'on_disk> {
58 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
60 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
61 OnDisk(&'on_disk on_disk::Node),
59 }
62 }
60
63
61 impl Default for ChildNodes<'_> {
64 impl Default for ChildNodes<'_> {
62 fn default() -> Self {
65 fn default() -> Self {
63 ChildNodes::InMemory(Default::default())
66 ChildNodes::InMemory(Default::default())
64 }
67 }
65 }
68 }
66
69
67 impl<'on_disk> ChildNodes<'on_disk> {
70 impl<'on_disk> ChildNodes<'on_disk> {
68 pub(super) fn as_ref<'tree>(
71 pub(super) fn as_ref<'tree>(
69 &'tree self,
72 &'tree self,
70 ) -> ChildNodesRef<'tree, 'on_disk> {
73 ) -> ChildNodesRef<'tree, 'on_disk> {
71 match self {
74 match self {
72 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
75 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
76 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
73 }
77 }
74 }
78 }
75
79
76 pub(super) fn is_empty(&self) -> bool {
80 pub(super) fn is_empty(&self) -> bool {
77 match self {
81 match self {
78 ChildNodes::InMemory(nodes) => nodes.is_empty(),
82 ChildNodes::InMemory(nodes) => nodes.is_empty(),
83 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
79 }
84 }
80 }
85 }
81
86
82 pub(super) fn make_mut(
87 pub(super) fn make_mut(
83 &mut self,
88 &mut self,
84 _on_disk: &'on_disk [u8],
89 on_disk: &'on_disk [u8],
85 ) -> Result<
90 ) -> Result<
86 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
91 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
87 DirstateV2ParseError,
92 DirstateV2ParseError,
88 > {
93 > {
89 match self {
94 match self {
90 ChildNodes::InMemory(nodes) => Ok(nodes),
95 ChildNodes::InMemory(nodes) => Ok(nodes),
96 ChildNodes::OnDisk(nodes) => {
97 let nodes = nodes
98 .iter()
99 .map(|node| {
100 Ok((
101 node.path(on_disk)?,
102 node.to_in_memory_node(on_disk)?,
103 ))
104 })
105 .collect::<Result<_, _>>()?;
106 *self = ChildNodes::InMemory(nodes);
107 match self {
108 ChildNodes::InMemory(nodes) => Ok(nodes),
109 ChildNodes::OnDisk(_) => unreachable!(),
110 }
111 }
91 }
112 }
92 }
113 }
93 }
114 }
94
115
95 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
116 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
96 pub(super) fn get(
117 pub(super) fn get(
97 &self,
118 &self,
98 base_name: &HgPath,
119 base_name: &HgPath,
99 _on_disk: &'on_disk [u8],
120 on_disk: &'on_disk [u8],
100 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
121 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
101 match self {
122 match self {
102 ChildNodesRef::InMemory(nodes) => Ok(nodes
123 ChildNodesRef::InMemory(nodes) => Ok(nodes
103 .get_key_value(base_name)
124 .get_key_value(base_name)
104 .map(|(k, v)| NodeRef::InMemory(k, v))),
125 .map(|(k, v)| NodeRef::InMemory(k, v))),
126 ChildNodesRef::OnDisk(nodes) => {
127 let mut parse_result = Ok(());
128 let search_result = nodes.binary_search_by(|node| {
129 match node.base_name(on_disk) {
130 Ok(node_base_name) => node_base_name.cmp(base_name),
131 Err(e) => {
132 parse_result = Err(e);
133 // Dummy comparison result, `search_result` won’t
134 // be used since `parse_result` is an error
135 std::cmp::Ordering::Equal
136 }
137 }
138 });
139 parse_result.map(|()| {
140 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
141 })
142 }
105 }
143 }
106 }
144 }
107
145
108 /// Iterate in undefined order
146 /// Iterate in undefined order
109 pub(super) fn iter(
147 pub(super) fn iter(
110 &self,
148 &self,
111 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
149 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
112 match self {
150 match self {
113 ChildNodesRef::InMemory(nodes) => {
151 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
114 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v))
152 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
153 ),
154 ChildNodesRef::OnDisk(nodes) => {
155 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
115 }
156 }
116 }
157 }
117 }
158 }
118
159
119 /// Iterate in parallel in undefined order
160 /// Iterate in parallel in undefined order
120 pub(super) fn par_iter(
161 pub(super) fn par_iter(
121 &self,
162 &self,
122 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
163 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
123 {
164 {
124 use rayon::prelude::*;
165 use rayon::prelude::*;
125 match self {
166 match self {
126 ChildNodesRef::InMemory(nodes) => {
167 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
127 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v))
168 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
128 }
169 ),
170 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
171 nodes.par_iter().map(NodeRef::OnDisk),
172 ),
129 }
173 }
130 }
174 }
131
175
132 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
176 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
133 match self {
177 match self {
134 ChildNodesRef::InMemory(nodes) => {
178 ChildNodesRef::InMemory(nodes) => {
135 let mut vec: Vec<_> = nodes
179 let mut vec: Vec<_> = nodes
136 .iter()
180 .iter()
137 .map(|(k, v)| NodeRef::InMemory(k, v))
181 .map(|(k, v)| NodeRef::InMemory(k, v))
138 .collect();
182 .collect();
139 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
183 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
140 match node {
184 match node {
141 NodeRef::InMemory(path, _node) => path.base_name(),
185 NodeRef::InMemory(path, _node) => path.base_name(),
186 NodeRef::OnDisk(_) => unreachable!(),
142 }
187 }
143 }
188 }
144 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
189 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
145 // value: https://github.com/rust-lang/rust/issues/34162
190 // value: https://github.com/rust-lang/rust/issues/34162
146 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
191 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
147 vec
192 vec
148 }
193 }
194 ChildNodesRef::OnDisk(nodes) => {
195 // Nodes on disk are already sorted
196 nodes.iter().map(NodeRef::OnDisk).collect()
197 }
149 }
198 }
150 }
199 }
151 }
200 }
152
201
153 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
202 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
154 pub(super) fn full_path(
203 pub(super) fn full_path(
155 &self,
204 &self,
156 _on_disk: &'on_disk [u8],
205 on_disk: &'on_disk [u8],
157 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
206 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
158 match self {
207 match self {
159 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
208 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
209 NodeRef::OnDisk(node) => node.full_path(on_disk),
160 }
210 }
161 }
211 }
162
212
163 /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
213 /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
164 pub(super) fn full_path_cow(
214 pub(super) fn full_path_cow(
165 &self,
215 &self,
166 _on_disk: &'on_disk [u8],
216 on_disk: &'on_disk [u8],
167 ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> {
217 ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> {
168 match self {
218 match self {
169 NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()),
219 NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()),
220 NodeRef::OnDisk(node) => {
221 Ok(Cow::Borrowed(node.full_path(on_disk)?))
222 }
170 }
223 }
171 }
224 }
172
225
173 pub(super) fn base_name(
226 pub(super) fn base_name(
174 &self,
227 &self,
175 _on_disk: &'on_disk [u8],
228 on_disk: &'on_disk [u8],
176 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
229 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
177 match self {
230 match self {
178 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
231 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
232 NodeRef::OnDisk(node) => node.base_name(on_disk),
179 }
233 }
180 }
234 }
181
235
182 pub(super) fn children(
236 pub(super) fn children(
183 &self,
237 &self,
184 _on_disk: &'on_disk [u8],
238 on_disk: &'on_disk [u8],
185 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
239 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
186 match self {
240 match self {
187 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
241 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
242 NodeRef::OnDisk(node) => {
243 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
244 }
188 }
245 }
189 }
246 }
190
247
191 pub(super) fn has_copy_source(&self) -> bool {
248 pub(super) fn has_copy_source(&self) -> bool {
192 match self {
249 match self {
193 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
250 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
251 NodeRef::OnDisk(node) => node.has_copy_source(),
194 }
252 }
195 }
253 }
196
254
197 pub(super) fn copy_source(
255 pub(super) fn copy_source(
198 &self,
256 &self,
199 _on_disk: &'on_disk [u8],
257 on_disk: &'on_disk [u8],
200 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
258 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
201 match self {
259 match self {
202 NodeRef::InMemory(_path, node) => {
260 NodeRef::InMemory(_path, node) => {
203 Ok(node.copy_source.as_ref().map(|s| &**s))
261 Ok(node.copy_source.as_ref().map(|s| &**s))
204 }
262 }
263 NodeRef::OnDisk(node) => node.copy_source(on_disk),
205 }
264 }
206 }
265 }
207
266
208 pub(super) fn has_entry(&self) -> bool {
267 pub(super) fn has_entry(&self) -> bool {
209 match self {
268 match self {
210 NodeRef::InMemory(_path, node) => node.entry.is_some(),
269 NodeRef::InMemory(_path, node) => node.entry.is_some(),
270 NodeRef::OnDisk(node) => node.has_entry(),
211 }
271 }
212 }
272 }
213
273
214 pub(super) fn entry(
274 pub(super) fn entry(
215 &self,
275 &self,
216 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
276 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
217 match self {
277 match self {
218 NodeRef::InMemory(_path, node) => Ok(node.entry),
278 NodeRef::InMemory(_path, node) => Ok(node.entry),
279 NodeRef::OnDisk(node) => node.entry(),
219 }
280 }
220 }
281 }
221
282
222 pub(super) fn state(
283 pub(super) fn state(
223 &self,
284 &self,
224 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
285 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
225 match self {
286 match self {
226 NodeRef::InMemory(_path, node) => {
287 NodeRef::InMemory(_path, node) => {
227 Ok(node.entry.as_ref().map(|entry| entry.state))
288 Ok(node.entry.as_ref().map(|entry| entry.state))
228 }
289 }
290 NodeRef::OnDisk(node) => node.state(),
229 }
291 }
230 }
292 }
231
293
232 pub(super) fn tracked_descendants_count(&self) -> u32 {
294 pub(super) fn tracked_descendants_count(&self) -> u32 {
233 match self {
295 match self {
234 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
296 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
297 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
235 }
298 }
236 }
299 }
237 }
300 }
238
301
239 /// Represents a file or a directory
302 /// Represents a file or a directory
240 #[derive(Default)]
303 #[derive(Default)]
241 pub(super) struct Node<'on_disk> {
304 pub(super) struct Node<'on_disk> {
242 /// `None` for directories
305 /// `None` for directories
243 pub(super) entry: Option<DirstateEntry>,
306 pub(super) entry: Option<DirstateEntry>,
244
307
245 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
308 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
246
309
247 pub(super) children: ChildNodes<'on_disk>,
310 pub(super) children: ChildNodes<'on_disk>,
248
311
249 /// How many (non-inclusive) descendants of this node are tracked files
312 /// How many (non-inclusive) descendants of this node are tracked files
250 pub(super) tracked_descendants_count: u32,
313 pub(super) tracked_descendants_count: u32,
251 }
314 }
252
315
253 impl<'on_disk> DirstateMap<'on_disk> {
316 impl<'on_disk> DirstateMap<'on_disk> {
254 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
317 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
255 Self {
318 Self {
256 on_disk,
319 on_disk,
257 root: ChildNodes::default(),
320 root: ChildNodes::default(),
258 nodes_with_entry_count: 0,
321 nodes_with_entry_count: 0,
259 nodes_with_copy_source_count: 0,
322 nodes_with_copy_source_count: 0,
260 }
323 }
261 }
324 }
262
325
263 #[timed]
326 #[timed]
264 pub fn new_v2(
327 pub fn new_v2(
265 on_disk: &'on_disk [u8],
328 on_disk: &'on_disk [u8],
266 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
329 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
267 Ok(on_disk::read(on_disk)?)
330 Ok(on_disk::read(on_disk)?)
268 }
331 }
269
332
270 #[timed]
333 #[timed]
271 pub fn new_v1(
334 pub fn new_v1(
272 on_disk: &'on_disk [u8],
335 on_disk: &'on_disk [u8],
273 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
336 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
274 let mut map = Self::empty(on_disk);
337 let mut map = Self::empty(on_disk);
275 if map.on_disk.is_empty() {
338 if map.on_disk.is_empty() {
276 return Ok((map, None));
339 return Ok((map, None));
277 }
340 }
278
341
279 let parents = parse_dirstate_entries(
342 let parents = parse_dirstate_entries(
280 map.on_disk,
343 map.on_disk,
281 |path, entry, copy_source| {
344 |path, entry, copy_source| {
282 let tracked = entry.state.is_tracked();
345 let tracked = entry.state.is_tracked();
283 let node = Self::get_or_insert_node(
346 let node = Self::get_or_insert_node(
284 map.on_disk,
347 map.on_disk,
285 &mut map.root,
348 &mut map.root,
286 path,
349 path,
287 WithBasename::to_cow_borrowed,
350 WithBasename::to_cow_borrowed,
288 |ancestor| {
351 |ancestor| {
289 if tracked {
352 if tracked {
290 ancestor.tracked_descendants_count += 1
353 ancestor.tracked_descendants_count += 1
291 }
354 }
292 },
355 },
293 )?;
356 )?;
294 assert!(
357 assert!(
295 node.entry.is_none(),
358 node.entry.is_none(),
296 "duplicate dirstate entry in read"
359 "duplicate dirstate entry in read"
297 );
360 );
298 assert!(
361 assert!(
299 node.copy_source.is_none(),
362 node.copy_source.is_none(),
300 "duplicate dirstate entry in read"
363 "duplicate dirstate entry in read"
301 );
364 );
302 node.entry = Some(*entry);
365 node.entry = Some(*entry);
303 node.copy_source = copy_source.map(Cow::Borrowed);
366 node.copy_source = copy_source.map(Cow::Borrowed);
304 map.nodes_with_entry_count += 1;
367 map.nodes_with_entry_count += 1;
305 if copy_source.is_some() {
368 if copy_source.is_some() {
306 map.nodes_with_copy_source_count += 1
369 map.nodes_with_copy_source_count += 1
307 }
370 }
308 Ok(())
371 Ok(())
309 },
372 },
310 )?;
373 )?;
311 let parents = Some(parents.clone());
374 let parents = Some(parents.clone());
312
375
313 Ok((map, parents))
376 Ok((map, parents))
314 }
377 }
315
378
316 fn get_node<'tree>(
379 fn get_node<'tree>(
317 &'tree self,
380 &'tree self,
318 path: &HgPath,
381 path: &HgPath,
319 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
382 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
320 let mut children = self.root.as_ref();
383 let mut children = self.root.as_ref();
321 let mut components = path.components();
384 let mut components = path.components();
322 let mut component =
385 let mut component =
323 components.next().expect("expected at least one components");
386 components.next().expect("expected at least one components");
324 loop {
387 loop {
325 if let Some(child) = children.get(component, self.on_disk)? {
388 if let Some(child) = children.get(component, self.on_disk)? {
326 if let Some(next_component) = components.next() {
389 if let Some(next_component) = components.next() {
327 component = next_component;
390 component = next_component;
328 children = child.children(self.on_disk)?;
391 children = child.children(self.on_disk)?;
329 } else {
392 } else {
330 return Ok(Some(child));
393 return Ok(Some(child));
331 }
394 }
332 } else {
395 } else {
333 return Ok(None);
396 return Ok(None);
334 }
397 }
335 }
398 }
336 }
399 }
337
400
338 /// Returns a mutable reference to the node at `path` if it exists
401 /// Returns a mutable reference to the node at `path` if it exists
339 ///
402 ///
340 /// This takes `root` instead of `&mut self` so that callers can mutate
403 /// This takes `root` instead of `&mut self` so that callers can mutate
341 /// other fields while the returned borrow is still valid
404 /// other fields while the returned borrow is still valid
342 fn get_node_mut<'tree>(
405 fn get_node_mut<'tree>(
343 on_disk: &'on_disk [u8],
406 on_disk: &'on_disk [u8],
344 root: &'tree mut ChildNodes<'on_disk>,
407 root: &'tree mut ChildNodes<'on_disk>,
345 path: &HgPath,
408 path: &HgPath,
346 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
409 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
347 let mut children = root;
410 let mut children = root;
348 let mut components = path.components();
411 let mut components = path.components();
349 let mut component =
412 let mut component =
350 components.next().expect("expected at least one components");
413 components.next().expect("expected at least one components");
351 loop {
414 loop {
352 if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
415 if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
353 {
416 {
354 if let Some(next_component) = components.next() {
417 if let Some(next_component) = components.next() {
355 component = next_component;
418 component = next_component;
356 children = &mut child.children;
419 children = &mut child.children;
357 } else {
420 } else {
358 return Ok(Some(child));
421 return Ok(Some(child));
359 }
422 }
360 } else {
423 } else {
361 return Ok(None);
424 return Ok(None);
362 }
425 }
363 }
426 }
364 }
427 }
365
428
366 fn get_or_insert_node<'tree, 'path>(
429 fn get_or_insert_node<'tree, 'path>(
367 on_disk: &'on_disk [u8],
430 on_disk: &'on_disk [u8],
368 root: &'tree mut ChildNodes<'on_disk>,
431 root: &'tree mut ChildNodes<'on_disk>,
369 path: &'path HgPath,
432 path: &'path HgPath,
370 to_cow: impl Fn(
433 to_cow: impl Fn(
371 WithBasename<&'path HgPath>,
434 WithBasename<&'path HgPath>,
372 ) -> WithBasename<Cow<'on_disk, HgPath>>,
435 ) -> WithBasename<Cow<'on_disk, HgPath>>,
373 mut each_ancestor: impl FnMut(&mut Node),
436 mut each_ancestor: impl FnMut(&mut Node),
374 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
437 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
375 let mut child_nodes = root;
438 let mut child_nodes = root;
376 let mut inclusive_ancestor_paths =
439 let mut inclusive_ancestor_paths =
377 WithBasename::inclusive_ancestors_of(path);
440 WithBasename::inclusive_ancestors_of(path);
378 let mut ancestor_path = inclusive_ancestor_paths
441 let mut ancestor_path = inclusive_ancestor_paths
379 .next()
442 .next()
380 .expect("expected at least one inclusive ancestor");
443 .expect("expected at least one inclusive ancestor");
381 loop {
444 loop {
382 // TODO: can we avoid allocating an owned key in cases where the
445 // TODO: can we avoid allocating an owned key in cases where the
383 // map already contains that key, without introducing double
446 // map already contains that key, without introducing double
384 // lookup?
447 // lookup?
385 let child_node = child_nodes
448 let child_node = child_nodes
386 .make_mut(on_disk)?
449 .make_mut(on_disk)?
387 .entry(to_cow(ancestor_path))
450 .entry(to_cow(ancestor_path))
388 .or_default();
451 .or_default();
389 if let Some(next) = inclusive_ancestor_paths.next() {
452 if let Some(next) = inclusive_ancestor_paths.next() {
390 each_ancestor(child_node);
453 each_ancestor(child_node);
391 ancestor_path = next;
454 ancestor_path = next;
392 child_nodes = &mut child_node.children;
455 child_nodes = &mut child_node.children;
393 } else {
456 } else {
394 return Ok(child_node);
457 return Ok(child_node);
395 }
458 }
396 }
459 }
397 }
460 }
398
461
399 fn add_or_remove_file(
462 fn add_or_remove_file(
400 &mut self,
463 &mut self,
401 path: &HgPath,
464 path: &HgPath,
402 old_state: EntryState,
465 old_state: EntryState,
403 new_entry: DirstateEntry,
466 new_entry: DirstateEntry,
404 ) -> Result<(), DirstateV2ParseError> {
467 ) -> Result<(), DirstateV2ParseError> {
405 let tracked_count_increment =
468 let tracked_count_increment =
406 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
469 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
407 (false, true) => 1,
470 (false, true) => 1,
408 (true, false) => -1,
471 (true, false) => -1,
409 _ => 0,
472 _ => 0,
410 };
473 };
411
474
412 let node = Self::get_or_insert_node(
475 let node = Self::get_or_insert_node(
413 self.on_disk,
476 self.on_disk,
414 &mut self.root,
477 &mut self.root,
415 path,
478 path,
416 WithBasename::to_cow_owned,
479 WithBasename::to_cow_owned,
417 |ancestor| {
480 |ancestor| {
418 // We can’t use `+= increment` because the counter is unsigned,
481 // We can’t use `+= increment` because the counter is unsigned,
419 // and we want debug builds to detect accidental underflow
482 // and we want debug builds to detect accidental underflow
420 // through zero
483 // through zero
421 match tracked_count_increment {
484 match tracked_count_increment {
422 1 => ancestor.tracked_descendants_count += 1,
485 1 => ancestor.tracked_descendants_count += 1,
423 -1 => ancestor.tracked_descendants_count -= 1,
486 -1 => ancestor.tracked_descendants_count -= 1,
424 _ => {}
487 _ => {}
425 }
488 }
426 },
489 },
427 )?;
490 )?;
428 if node.entry.is_none() {
491 if node.entry.is_none() {
429 self.nodes_with_entry_count += 1
492 self.nodes_with_entry_count += 1
430 }
493 }
431 node.entry = Some(new_entry);
494 node.entry = Some(new_entry);
432 Ok(())
495 Ok(())
433 }
496 }
434
497
435 fn iter_nodes<'tree>(
498 fn iter_nodes<'tree>(
436 &'tree self,
499 &'tree self,
437 ) -> impl Iterator<
500 ) -> impl Iterator<
438 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
501 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
439 > + 'tree {
502 > + 'tree {
440 // Depth first tree traversal.
503 // Depth first tree traversal.
441 //
504 //
442 // If we could afford internal iteration and recursion,
505 // If we could afford internal iteration and recursion,
443 // this would look like:
506 // this would look like:
444 //
507 //
445 // ```
508 // ```
446 // fn traverse_children(
509 // fn traverse_children(
447 // children: &ChildNodes,
510 // children: &ChildNodes,
448 // each: &mut impl FnMut(&Node),
511 // each: &mut impl FnMut(&Node),
449 // ) {
512 // ) {
450 // for child in children.values() {
513 // for child in children.values() {
451 // traverse_children(&child.children, each);
514 // traverse_children(&child.children, each);
452 // each(child);
515 // each(child);
453 // }
516 // }
454 // }
517 // }
455 // ```
518 // ```
456 //
519 //
457 // However we want an external iterator and therefore can’t use the
520 // However we want an external iterator and therefore can’t use the
458 // call stack. Use an explicit stack instead:
521 // call stack. Use an explicit stack instead:
459 let mut stack = Vec::new();
522 let mut stack = Vec::new();
460 let mut iter = self.root.as_ref().iter();
523 let mut iter = self.root.as_ref().iter();
461 std::iter::from_fn(move || {
524 std::iter::from_fn(move || {
462 while let Some(child_node) = iter.next() {
525 while let Some(child_node) = iter.next() {
463 let children = match child_node.children(self.on_disk) {
526 let children = match child_node.children(self.on_disk) {
464 Ok(children) => children,
527 Ok(children) => children,
465 Err(error) => return Some(Err(error)),
528 Err(error) => return Some(Err(error)),
466 };
529 };
467 // Pseudo-recursion
530 // Pseudo-recursion
468 let new_iter = children.iter();
531 let new_iter = children.iter();
469 let old_iter = std::mem::replace(&mut iter, new_iter);
532 let old_iter = std::mem::replace(&mut iter, new_iter);
470 stack.push((child_node, old_iter));
533 stack.push((child_node, old_iter));
471 }
534 }
472 // Found the end of a `children.iter()` iterator.
535 // Found the end of a `children.iter()` iterator.
473 if let Some((child_node, next_iter)) = stack.pop() {
536 if let Some((child_node, next_iter)) = stack.pop() {
474 // "Return" from pseudo-recursion by restoring state from the
537 // "Return" from pseudo-recursion by restoring state from the
475 // explicit stack
538 // explicit stack
476 iter = next_iter;
539 iter = next_iter;
477
540
478 Some(Ok(child_node))
541 Some(Ok(child_node))
479 } else {
542 } else {
480 // Reached the bottom of the stack, we’re done
543 // Reached the bottom of the stack, we’re done
481 None
544 None
482 }
545 }
483 })
546 })
484 }
547 }
485
548
486 fn clear_known_ambiguous_mtimes(
549 fn clear_known_ambiguous_mtimes(
487 &mut self,
550 &mut self,
488 paths: &[impl AsRef<HgPath>],
551 paths: &[impl AsRef<HgPath>],
489 ) -> Result<(), DirstateV2ParseError> {
552 ) -> Result<(), DirstateV2ParseError> {
490 for path in paths {
553 for path in paths {
491 if let Some(node) = Self::get_node_mut(
554 if let Some(node) = Self::get_node_mut(
492 self.on_disk,
555 self.on_disk,
493 &mut self.root,
556 &mut self.root,
494 path.as_ref(),
557 path.as_ref(),
495 )? {
558 )? {
496 if let Some(entry) = node.entry.as_mut() {
559 if let Some(entry) = node.entry.as_mut() {
497 entry.clear_mtime();
560 entry.clear_mtime();
498 }
561 }
499 }
562 }
500 }
563 }
501 Ok(())
564 Ok(())
502 }
565 }
503
566
504 /// Return a faillilble iterator of full paths of nodes that have an
567 /// Return a faillilble iterator of full paths of nodes that have an
505 /// `entry` for which the given `predicate` returns true.
568 /// `entry` for which the given `predicate` returns true.
506 ///
569 ///
507 /// Fallibility means that each iterator item is a `Result`, which may
570 /// Fallibility means that each iterator item is a `Result`, which may
508 /// indicate a parse error of the on-disk dirstate-v2 format. Such errors
571 /// indicate a parse error of the on-disk dirstate-v2 format. Such errors
509 /// should only happen if Mercurial is buggy or a repository is corrupted.
572 /// should only happen if Mercurial is buggy or a repository is corrupted.
510 fn filter_full_paths<'tree>(
573 fn filter_full_paths<'tree>(
511 &'tree self,
574 &'tree self,
512 predicate: impl Fn(&DirstateEntry) -> bool + 'tree,
575 predicate: impl Fn(&DirstateEntry) -> bool + 'tree,
513 ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree
576 ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree
514 {
577 {
515 filter_map_results(self.iter_nodes(), move |node| {
578 filter_map_results(self.iter_nodes(), move |node| {
516 if let Some(entry) = node.entry()? {
579 if let Some(entry) = node.entry()? {
517 if predicate(&entry) {
580 if predicate(&entry) {
518 return Ok(Some(node.full_path(self.on_disk)?));
581 return Ok(Some(node.full_path(self.on_disk)?));
519 }
582 }
520 }
583 }
521 Ok(None)
584 Ok(None)
522 })
585 })
523 }
586 }
524 }
587 }
525
588
526 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
589 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
527 ///
590 ///
528 /// The callback is only called for incoming `Ok` values. Errors are passed
591 /// The callback is only called for incoming `Ok` values. Errors are passed
529 /// through as-is. In order to let it use the `?` operator the callback is
592 /// through as-is. In order to let it use the `?` operator the callback is
530 /// expected to return a `Result` of `Option`, instead of an `Option` of
593 /// expected to return a `Result` of `Option`, instead of an `Option` of
531 /// `Result`.
594 /// `Result`.
532 fn filter_map_results<'a, I, F, A, B, E>(
595 fn filter_map_results<'a, I, F, A, B, E>(
533 iter: I,
596 iter: I,
534 f: F,
597 f: F,
535 ) -> impl Iterator<Item = Result<B, E>> + 'a
598 ) -> impl Iterator<Item = Result<B, E>> + 'a
536 where
599 where
537 I: Iterator<Item = Result<A, E>> + 'a,
600 I: Iterator<Item = Result<A, E>> + 'a,
538 F: Fn(A) -> Result<Option<B>, E> + 'a,
601 F: Fn(A) -> Result<Option<B>, E> + 'a,
539 {
602 {
540 iter.filter_map(move |result| match result {
603 iter.filter_map(move |result| match result {
541 Ok(node) => f(node).transpose(),
604 Ok(node) => f(node).transpose(),
542 Err(e) => Some(Err(e)),
605 Err(e) => Some(Err(e)),
543 })
606 })
544 }
607 }
545
608
546 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
609 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
547 fn clear(&mut self) {
610 fn clear(&mut self) {
548 self.root = Default::default();
611 self.root = Default::default();
549 self.nodes_with_entry_count = 0;
612 self.nodes_with_entry_count = 0;
550 self.nodes_with_copy_source_count = 0;
613 self.nodes_with_copy_source_count = 0;
551 }
614 }
552
615
553 fn add_file(
616 fn add_file(
554 &mut self,
617 &mut self,
555 filename: &HgPath,
618 filename: &HgPath,
556 old_state: EntryState,
619 old_state: EntryState,
557 entry: DirstateEntry,
620 entry: DirstateEntry,
558 ) -> Result<(), DirstateError> {
621 ) -> Result<(), DirstateError> {
559 Ok(self.add_or_remove_file(filename, old_state, entry)?)
622 Ok(self.add_or_remove_file(filename, old_state, entry)?)
560 }
623 }
561
624
562 fn remove_file(
625 fn remove_file(
563 &mut self,
626 &mut self,
564 filename: &HgPath,
627 filename: &HgPath,
565 old_state: EntryState,
628 old_state: EntryState,
566 size: i32,
629 size: i32,
567 ) -> Result<(), DirstateError> {
630 ) -> Result<(), DirstateError> {
568 let entry = DirstateEntry {
631 let entry = DirstateEntry {
569 state: EntryState::Removed,
632 state: EntryState::Removed,
570 mode: 0,
633 mode: 0,
571 size,
634 size,
572 mtime: 0,
635 mtime: 0,
573 };
636 };
574 Ok(self.add_or_remove_file(filename, old_state, entry)?)
637 Ok(self.add_or_remove_file(filename, old_state, entry)?)
575 }
638 }
576
639
577 fn drop_file(
640 fn drop_file(
578 &mut self,
641 &mut self,
579 filename: &HgPath,
642 filename: &HgPath,
580 old_state: EntryState,
643 old_state: EntryState,
581 ) -> Result<bool, DirstateError> {
644 ) -> Result<bool, DirstateError> {
582 struct Dropped {
645 struct Dropped {
583 was_tracked: bool,
646 was_tracked: bool,
584 had_entry: bool,
647 had_entry: bool,
585 had_copy_source: bool,
648 had_copy_source: bool,
586 }
649 }
587 fn recur<'on_disk>(
650 fn recur<'on_disk>(
588 on_disk: &'on_disk [u8],
651 on_disk: &'on_disk [u8],
589 nodes: &mut ChildNodes<'on_disk>,
652 nodes: &mut ChildNodes<'on_disk>,
590 path: &HgPath,
653 path: &HgPath,
591 ) -> Result<Option<Dropped>, DirstateV2ParseError> {
654 ) -> Result<Option<Dropped>, DirstateV2ParseError> {
592 let (first_path_component, rest_of_path) =
655 let (first_path_component, rest_of_path) =
593 path.split_first_component();
656 path.split_first_component();
594 let node = if let Some(node) =
657 let node = if let Some(node) =
595 nodes.make_mut(on_disk)?.get_mut(first_path_component)
658 nodes.make_mut(on_disk)?.get_mut(first_path_component)
596 {
659 {
597 node
660 node
598 } else {
661 } else {
599 return Ok(None);
662 return Ok(None);
600 };
663 };
601 let dropped;
664 let dropped;
602 if let Some(rest) = rest_of_path {
665 if let Some(rest) = rest_of_path {
603 if let Some(d) = recur(on_disk, &mut node.children, rest)? {
666 if let Some(d) = recur(on_disk, &mut node.children, rest)? {
604 dropped = d;
667 dropped = d;
605 if dropped.was_tracked {
668 if dropped.was_tracked {
606 node.tracked_descendants_count -= 1;
669 node.tracked_descendants_count -= 1;
607 }
670 }
608 } else {
671 } else {
609 return Ok(None);
672 return Ok(None);
610 }
673 }
611 } else {
674 } else {
612 dropped = Dropped {
675 dropped = Dropped {
613 was_tracked: node
676 was_tracked: node
614 .entry
677 .entry
615 .as_ref()
678 .as_ref()
616 .map_or(false, |entry| entry.state.is_tracked()),
679 .map_or(false, |entry| entry.state.is_tracked()),
617 had_entry: node.entry.take().is_some(),
680 had_entry: node.entry.take().is_some(),
618 had_copy_source: node.copy_source.take().is_some(),
681 had_copy_source: node.copy_source.take().is_some(),
619 };
682 };
620 }
683 }
621 // After recursion, for both leaf (rest_of_path is None) nodes and
684 // After recursion, for both leaf (rest_of_path is None) nodes and
622 // parent nodes, remove a node if it just became empty.
685 // parent nodes, remove a node if it just became empty.
623 if node.entry.is_none()
686 if node.entry.is_none()
624 && node.copy_source.is_none()
687 && node.copy_source.is_none()
625 && node.children.is_empty()
688 && node.children.is_empty()
626 {
689 {
627 nodes.make_mut(on_disk)?.remove(first_path_component);
690 nodes.make_mut(on_disk)?.remove(first_path_component);
628 }
691 }
629 Ok(Some(dropped))
692 Ok(Some(dropped))
630 }
693 }
631
694
632 if let Some(dropped) = recur(self.on_disk, &mut self.root, filename)? {
695 if let Some(dropped) = recur(self.on_disk, &mut self.root, filename)? {
633 if dropped.had_entry {
696 if dropped.had_entry {
634 self.nodes_with_entry_count -= 1
697 self.nodes_with_entry_count -= 1
635 }
698 }
636 if dropped.had_copy_source {
699 if dropped.had_copy_source {
637 self.nodes_with_copy_source_count -= 1
700 self.nodes_with_copy_source_count -= 1
638 }
701 }
639 Ok(dropped.had_entry)
702 Ok(dropped.had_entry)
640 } else {
703 } else {
641 debug_assert!(!old_state.is_tracked());
704 debug_assert!(!old_state.is_tracked());
642 Ok(false)
705 Ok(false)
643 }
706 }
644 }
707 }
645
708
646 fn clear_ambiguous_times(
709 fn clear_ambiguous_times(
647 &mut self,
710 &mut self,
648 filenames: Vec<HgPathBuf>,
711 filenames: Vec<HgPathBuf>,
649 now: i32,
712 now: i32,
650 ) -> Result<(), DirstateV2ParseError> {
713 ) -> Result<(), DirstateV2ParseError> {
651 for filename in filenames {
714 for filename in filenames {
652 if let Some(node) =
715 if let Some(node) =
653 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
716 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
654 {
717 {
655 if let Some(entry) = node.entry.as_mut() {
718 if let Some(entry) = node.entry.as_mut() {
656 entry.clear_ambiguous_mtime(now);
719 entry.clear_ambiguous_mtime(now);
657 }
720 }
658 }
721 }
659 }
722 }
660 Ok(())
723 Ok(())
661 }
724 }
662
725
663 fn non_normal_entries_contains(
726 fn non_normal_entries_contains(
664 &mut self,
727 &mut self,
665 key: &HgPath,
728 key: &HgPath,
666 ) -> Result<bool, DirstateV2ParseError> {
729 ) -> Result<bool, DirstateV2ParseError> {
667 Ok(if let Some(node) = self.get_node(key)? {
730 Ok(if let Some(node) = self.get_node(key)? {
668 node.entry()?.map_or(false, |entry| entry.is_non_normal())
731 node.entry()?.map_or(false, |entry| entry.is_non_normal())
669 } else {
732 } else {
670 false
733 false
671 })
734 })
672 }
735 }
673
736
674 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
737 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
675 // Do nothing, this `DirstateMap` does not have a separate "non normal
738 // Do nothing, this `DirstateMap` does not have a separate "non normal
676 // entries" set that need to be kept up to date
739 // entries" set that need to be kept up to date
677 }
740 }
678
741
679 fn non_normal_or_other_parent_paths(
742 fn non_normal_or_other_parent_paths(
680 &mut self,
743 &mut self,
681 ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
744 ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
682 {
745 {
683 Box::new(self.filter_full_paths(|entry| {
746 Box::new(self.filter_full_paths(|entry| {
684 entry.is_non_normal() || entry.is_from_other_parent()
747 entry.is_non_normal() || entry.is_from_other_parent()
685 }))
748 }))
686 }
749 }
687
750
688 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
751 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
689 // Do nothing, this `DirstateMap` does not have a separate "non normal
752 // Do nothing, this `DirstateMap` does not have a separate "non normal
690 // entries" and "from other parent" sets that need to be recomputed
753 // entries" and "from other parent" sets that need to be recomputed
691 }
754 }
692
755
693 fn iter_non_normal_paths(
756 fn iter_non_normal_paths(
694 &mut self,
757 &mut self,
695 ) -> Box<
758 ) -> Box<
696 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
759 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
697 > {
760 > {
698 self.iter_non_normal_paths_panic()
761 self.iter_non_normal_paths_panic()
699 }
762 }
700
763
701 fn iter_non_normal_paths_panic(
764 fn iter_non_normal_paths_panic(
702 &self,
765 &self,
703 ) -> Box<
766 ) -> Box<
704 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
767 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
705 > {
768 > {
706 Box::new(self.filter_full_paths(|entry| entry.is_non_normal()))
769 Box::new(self.filter_full_paths(|entry| entry.is_non_normal()))
707 }
770 }
708
771
709 fn iter_other_parent_paths(
772 fn iter_other_parent_paths(
710 &mut self,
773 &mut self,
711 ) -> Box<
774 ) -> Box<
712 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
775 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
713 > {
776 > {
714 Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent()))
777 Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent()))
715 }
778 }
716
779
717 fn has_tracked_dir(
780 fn has_tracked_dir(
718 &mut self,
781 &mut self,
719 directory: &HgPath,
782 directory: &HgPath,
720 ) -> Result<bool, DirstateError> {
783 ) -> Result<bool, DirstateError> {
721 if let Some(node) = self.get_node(directory)? {
784 if let Some(node) = self.get_node(directory)? {
722 // A node without a `DirstateEntry` was created to hold child
785 // A node without a `DirstateEntry` was created to hold child
723 // nodes, and is therefore a directory.
786 // nodes, and is therefore a directory.
724 Ok(!node.has_entry() && node.tracked_descendants_count() > 0)
787 Ok(!node.has_entry() && node.tracked_descendants_count() > 0)
725 } else {
788 } else {
726 Ok(false)
789 Ok(false)
727 }
790 }
728 }
791 }
729
792
730 fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> {
793 fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> {
731 if let Some(node) = self.get_node(directory)? {
794 if let Some(node) = self.get_node(directory)? {
732 // A node without a `DirstateEntry` was created to hold child
795 // A node without a `DirstateEntry` was created to hold child
733 // nodes, and is therefore a directory.
796 // nodes, and is therefore a directory.
734 Ok(!node.has_entry())
797 Ok(!node.has_entry())
735 } else {
798 } else {
736 Ok(false)
799 Ok(false)
737 }
800 }
738 }
801 }
739
802
740 #[timed]
803 #[timed]
741 fn pack_v1(
804 fn pack_v1(
742 &mut self,
805 &mut self,
743 parents: DirstateParents,
806 parents: DirstateParents,
744 now: Timestamp,
807 now: Timestamp,
745 ) -> Result<Vec<u8>, DirstateError> {
808 ) -> Result<Vec<u8>, DirstateError> {
746 let now: i32 = now.0.try_into().expect("time overflow");
809 let now: i32 = now.0.try_into().expect("time overflow");
747 let mut ambiguous_mtimes = Vec::new();
810 let mut ambiguous_mtimes = Vec::new();
748 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
811 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
749 // reallocations
812 // reallocations
750 let mut size = parents.as_bytes().len();
813 let mut size = parents.as_bytes().len();
751 for node in self.iter_nodes() {
814 for node in self.iter_nodes() {
752 let node = node?;
815 let node = node?;
753 if let Some(entry) = node.entry()? {
816 if let Some(entry) = node.entry()? {
754 size += packed_entry_size(
817 size += packed_entry_size(
755 node.full_path(self.on_disk)?,
818 node.full_path(self.on_disk)?,
756 node.copy_source(self.on_disk)?,
819 node.copy_source(self.on_disk)?,
757 );
820 );
758 if entry.mtime_is_ambiguous(now) {
821 if entry.mtime_is_ambiguous(now) {
759 ambiguous_mtimes.push(node.full_path_cow(self.on_disk)?)
822 ambiguous_mtimes.push(node.full_path_cow(self.on_disk)?)
760 }
823 }
761 }
824 }
762 }
825 }
763 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
826 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
764
827
765 let mut packed = Vec::with_capacity(size);
828 let mut packed = Vec::with_capacity(size);
766 packed.extend(parents.as_bytes());
829 packed.extend(parents.as_bytes());
767
830
768 for node in self.iter_nodes() {
831 for node in self.iter_nodes() {
769 let node = node?;
832 let node = node?;
770 if let Some(entry) = node.entry()? {
833 if let Some(entry) = node.entry()? {
771 pack_entry(
834 pack_entry(
772 node.full_path(self.on_disk)?,
835 node.full_path(self.on_disk)?,
773 &entry,
836 &entry,
774 node.copy_source(self.on_disk)?,
837 node.copy_source(self.on_disk)?,
775 &mut packed,
838 &mut packed,
776 );
839 );
777 }
840 }
778 }
841 }
779 Ok(packed)
842 Ok(packed)
780 }
843 }
781
844
782 #[timed]
845 #[timed]
783 fn pack_v2(
846 fn pack_v2(
784 &mut self,
847 &mut self,
785 parents: DirstateParents,
848 parents: DirstateParents,
786 now: Timestamp,
849 now: Timestamp,
787 ) -> Result<Vec<u8>, DirstateError> {
850 ) -> Result<Vec<u8>, DirstateError> {
788 // TODO: how do we want to handle this in 2038?
851 // TODO: how do we want to handle this in 2038?
789 let now: i32 = now.0.try_into().expect("time overflow");
852 let now: i32 = now.0.try_into().expect("time overflow");
790 let mut paths = Vec::new();
853 let mut paths = Vec::new();
791 for node in self.iter_nodes() {
854 for node in self.iter_nodes() {
792 let node = node?;
855 let node = node?;
793 if let Some(entry) = node.entry()? {
856 if let Some(entry) = node.entry()? {
794 if entry.mtime_is_ambiguous(now) {
857 if entry.mtime_is_ambiguous(now) {
795 paths.push(node.full_path_cow(self.on_disk)?)
858 paths.push(node.full_path_cow(self.on_disk)?)
796 }
859 }
797 }
860 }
798 }
861 }
799 // Borrow of `self` ends here since we collect cloned paths
862 // Borrow of `self` ends here since we collect cloned paths
800
863
801 self.clear_known_ambiguous_mtimes(&paths)?;
864 self.clear_known_ambiguous_mtimes(&paths)?;
802
865
803 on_disk::write(self, parents)
866 on_disk::write(self, parents)
804 }
867 }
805
868
806 fn set_all_dirs(&mut self) -> Result<(), DirstateError> {
869 fn set_all_dirs(&mut self) -> Result<(), DirstateError> {
807 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
870 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
808 // needs to be recomputed
871 // needs to be recomputed
809 Ok(())
872 Ok(())
810 }
873 }
811
874
812 fn set_dirs(&mut self) -> Result<(), DirstateError> {
875 fn set_dirs(&mut self) -> Result<(), DirstateError> {
813 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
876 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
814 // to be recomputed
877 // to be recomputed
815 Ok(())
878 Ok(())
816 }
879 }
817
880
818 fn status<'a>(
881 fn status<'a>(
819 &'a mut self,
882 &'a mut self,
820 matcher: &'a (dyn Matcher + Sync),
883 matcher: &'a (dyn Matcher + Sync),
821 root_dir: PathBuf,
884 root_dir: PathBuf,
822 ignore_files: Vec<PathBuf>,
885 ignore_files: Vec<PathBuf>,
823 options: StatusOptions,
886 options: StatusOptions,
824 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
887 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
825 {
888 {
826 super::status::status(self, matcher, root_dir, ignore_files, options)
889 super::status::status(self, matcher, root_dir, ignore_files, options)
827 }
890 }
828
891
829 fn copy_map_len(&self) -> usize {
892 fn copy_map_len(&self) -> usize {
830 self.nodes_with_copy_source_count as usize
893 self.nodes_with_copy_source_count as usize
831 }
894 }
832
895
833 fn copy_map_iter(&self) -> CopyMapIter<'_> {
896 fn copy_map_iter(&self) -> CopyMapIter<'_> {
834 Box::new(filter_map_results(self.iter_nodes(), move |node| {
897 Box::new(filter_map_results(self.iter_nodes(), move |node| {
835 Ok(if let Some(source) = node.copy_source(self.on_disk)? {
898 Ok(if let Some(source) = node.copy_source(self.on_disk)? {
836 Some((node.full_path(self.on_disk)?, source))
899 Some((node.full_path(self.on_disk)?, source))
837 } else {
900 } else {
838 None
901 None
839 })
902 })
840 }))
903 }))
841 }
904 }
842
905
843 fn copy_map_contains_key(
906 fn copy_map_contains_key(
844 &self,
907 &self,
845 key: &HgPath,
908 key: &HgPath,
846 ) -> Result<bool, DirstateV2ParseError> {
909 ) -> Result<bool, DirstateV2ParseError> {
847 Ok(if let Some(node) = self.get_node(key)? {
910 Ok(if let Some(node) = self.get_node(key)? {
848 node.has_copy_source()
911 node.has_copy_source()
849 } else {
912 } else {
850 false
913 false
851 })
914 })
852 }
915 }
853
916
854 fn copy_map_get(
917 fn copy_map_get(
855 &self,
918 &self,
856 key: &HgPath,
919 key: &HgPath,
857 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
920 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
858 if let Some(node) = self.get_node(key)? {
921 if let Some(node) = self.get_node(key)? {
859 if let Some(source) = node.copy_source(self.on_disk)? {
922 if let Some(source) = node.copy_source(self.on_disk)? {
860 return Ok(Some(source));
923 return Ok(Some(source));
861 }
924 }
862 }
925 }
863 Ok(None)
926 Ok(None)
864 }
927 }
865
928
866 fn copy_map_remove(
929 fn copy_map_remove(
867 &mut self,
930 &mut self,
868 key: &HgPath,
931 key: &HgPath,
869 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
932 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
870 let count = &mut self.nodes_with_copy_source_count;
933 let count = &mut self.nodes_with_copy_source_count;
871 Ok(
934 Ok(
872 Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
935 Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
873 |node| {
936 |node| {
874 if node.copy_source.is_some() {
937 if node.copy_source.is_some() {
875 *count -= 1
938 *count -= 1
876 }
939 }
877 node.copy_source.take().map(Cow::into_owned)
940 node.copy_source.take().map(Cow::into_owned)
878 },
941 },
879 ),
942 ),
880 )
943 )
881 }
944 }
882
945
883 fn copy_map_insert(
946 fn copy_map_insert(
884 &mut self,
947 &mut self,
885 key: HgPathBuf,
948 key: HgPathBuf,
886 value: HgPathBuf,
949 value: HgPathBuf,
887 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
950 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
888 let node = Self::get_or_insert_node(
951 let node = Self::get_or_insert_node(
889 self.on_disk,
952 self.on_disk,
890 &mut self.root,
953 &mut self.root,
891 &key,
954 &key,
892 WithBasename::to_cow_owned,
955 WithBasename::to_cow_owned,
893 |_ancestor| {},
956 |_ancestor| {},
894 )?;
957 )?;
895 if node.copy_source.is_none() {
958 if node.copy_source.is_none() {
896 self.nodes_with_copy_source_count += 1
959 self.nodes_with_copy_source_count += 1
897 }
960 }
898 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
961 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
899 }
962 }
900
963
901 fn len(&self) -> usize {
964 fn len(&self) -> usize {
902 self.nodes_with_entry_count as usize
965 self.nodes_with_entry_count as usize
903 }
966 }
904
967
905 fn contains_key(
968 fn contains_key(
906 &self,
969 &self,
907 key: &HgPath,
970 key: &HgPath,
908 ) -> Result<bool, DirstateV2ParseError> {
971 ) -> Result<bool, DirstateV2ParseError> {
909 Ok(self.get(key)?.is_some())
972 Ok(self.get(key)?.is_some())
910 }
973 }
911
974
912 fn get(
975 fn get(
913 &self,
976 &self,
914 key: &HgPath,
977 key: &HgPath,
915 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
978 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
916 Ok(if let Some(node) = self.get_node(key)? {
979 Ok(if let Some(node) = self.get_node(key)? {
917 node.entry()?
980 node.entry()?
918 } else {
981 } else {
919 None
982 None
920 })
983 })
921 }
984 }
922
985
923 fn iter(&self) -> StateMapIter<'_> {
986 fn iter(&self) -> StateMapIter<'_> {
924 Box::new(filter_map_results(self.iter_nodes(), move |node| {
987 Box::new(filter_map_results(self.iter_nodes(), move |node| {
925 Ok(if let Some(entry) = node.entry()? {
988 Ok(if let Some(entry) = node.entry()? {
926 Some((node.full_path(self.on_disk)?, entry))
989 Some((node.full_path(self.on_disk)?, entry))
927 } else {
990 } else {
928 None
991 None
929 })
992 })
930 }))
993 }))
931 }
994 }
932 }
995 }
@@ -1,365 +1,413 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! # File format
3 //! # File format
4 //!
4 //!
5 //! The file starts with a fixed-sized header, whose layout is defined by the
5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 //! the nodes representing the files and directories at the root of the
7 //! the nodes representing the files and directories at the root of the
8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 //! Nodes in turn contain slices to variable-size paths, and to their own child
9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 //! nodes (if any) for nested files and directories.
10 //! nodes (if any) for nested files and directories.
11
11
12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 use crate::dirstate_tree::path_with_basename::WithBasename;
13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 use crate::errors::HgError;
14 use crate::errors::HgError;
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::DirstateEntry;
16 use crate::DirstateEntry;
17 use crate::DirstateError;
17 use crate::DirstateError;
18 use crate::DirstateParents;
18 use crate::DirstateParents;
19 use crate::EntryState;
19 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 use bytes_cast::BytesCast;
21 use bytes_cast::BytesCast;
21 use std::borrow::Cow;
22 use std::borrow::Cow;
22 use std::convert::{TryFrom, TryInto};
23 use std::convert::{TryFrom, TryInto};
23
24
24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 /// This a redundant sanity check more than an actual "magic number" since
26 /// This a redundant sanity check more than an actual "magic number" since
26 /// `.hg/requires` already governs which format should be used.
27 /// `.hg/requires` already governs which format should be used.
27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28
29
29 #[derive(BytesCast)]
30 #[derive(BytesCast)]
30 #[repr(C)]
31 #[repr(C)]
31 struct Header {
32 struct Header {
32 marker: [u8; V2_FORMAT_MARKER.len()],
33 marker: [u8; V2_FORMAT_MARKER.len()],
33
34
34 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
35 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
35 /// `parents` field being at this offset, immediately after `marker`.
36 /// `parents` field being at this offset, immediately after `marker`.
36 parents: DirstateParents,
37 parents: DirstateParents,
37
38
38 root: ChildNodes,
39 root: ChildNodes,
39 nodes_with_entry_count: Size,
40 nodes_with_entry_count: Size,
40 nodes_with_copy_source_count: Size,
41 nodes_with_copy_source_count: Size,
41 }
42 }
42
43
43 #[derive(BytesCast)]
44 #[derive(BytesCast)]
44 #[repr(C)]
45 #[repr(C)]
45 struct Node {
46 pub(super) struct Node {
46 full_path: PathSlice,
47 full_path: PathSlice,
47
48
48 /// In bytes from `self.full_path.start`
49 /// In bytes from `self.full_path.start`
49 base_name_start: Size,
50 base_name_start: Size,
50
51
51 copy_source: OptPathSlice,
52 copy_source: OptPathSlice,
52 entry: OptEntry,
53 entry: OptEntry,
53 children: ChildNodes,
54 children: ChildNodes,
54 tracked_descendants_count: Size,
55 pub(super) tracked_descendants_count: Size,
55 }
56 }
56
57
57 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 /// format
59 /// format
59 #[derive(BytesCast)]
60 #[derive(BytesCast, Copy, Clone)]
60 #[repr(C)]
61 #[repr(C)]
61 struct OptEntry {
62 struct OptEntry {
62 state: u8,
63 state: u8,
63 mode: I32Be,
64 mode: I32Be,
64 mtime: I32Be,
65 mtime: I32Be,
65 size: I32Be,
66 size: I32Be,
66 }
67 }
67
68
68 /// Counted in bytes from the start of the file
69 /// Counted in bytes from the start of the file
69 ///
70 ///
70 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
71 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
71 /// we could save space by using `U32Be` instead.
72 /// we could save space by using `U32Be` instead.
72 type Offset = U64Be;
73 type Offset = U64Be;
73
74
74 /// Counted in number of items
75 /// Counted in number of items
75 ///
76 ///
76 /// NOTE: not supporting directories with more than 4 billion direct children,
77 /// NOTE: not supporting directories with more than 4 billion direct children,
77 /// or filenames more than 4 GiB.
78 /// or filenames more than 4 GiB.
78 type Size = U32Be;
79 type Size = U32Be;
79
80
80 /// Location of consecutive, fixed-size items.
81 /// Location of consecutive, fixed-size items.
81 ///
82 ///
82 /// An item can be a single byte for paths, or a struct with
83 /// An item can be a single byte for paths, or a struct with
83 /// `derive(BytesCast)`.
84 /// `derive(BytesCast)`.
84 #[derive(BytesCast, Copy, Clone)]
85 #[derive(BytesCast, Copy, Clone)]
85 #[repr(C)]
86 #[repr(C)]
86 struct Slice {
87 struct Slice {
87 start: Offset,
88 start: Offset,
88 len: Size,
89 len: Size,
89 }
90 }
90
91
91 /// A contiguous sequence of `len` times `Node`, representing the child nodes
92 /// A contiguous sequence of `len` times `Node`, representing the child nodes
92 /// of either some other node or of the repository root.
93 /// of either some other node or of the repository root.
93 ///
94 ///
94 /// Always sorted by ascending `full_path`, to allow binary search.
95 /// Always sorted by ascending `full_path`, to allow binary search.
95 /// Since nodes with the same parent nodes also have the same parent path,
96 /// Since nodes with the same parent nodes also have the same parent path,
96 /// only the `base_name`s need to be compared during binary search.
97 /// only the `base_name`s need to be compared during binary search.
97 type ChildNodes = Slice;
98 type ChildNodes = Slice;
98
99
99 /// A `HgPath` of `len` bytes
100 /// A `HgPath` of `len` bytes
100 type PathSlice = Slice;
101 type PathSlice = Slice;
101
102
102 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
103 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
103 type OptPathSlice = Slice;
104 type OptPathSlice = Slice;
104
105
105 /// Make sure that size-affecting changes are made knowingly
106 /// Make sure that size-affecting changes are made knowingly
106 fn _static_assert_size_of() {
107 fn _static_assert_size_of() {
107 let _ = std::mem::transmute::<Header, [u8; 72]>;
108 let _ = std::mem::transmute::<Header, [u8; 72]>;
108 let _ = std::mem::transmute::<Node, [u8; 57]>;
109 let _ = std::mem::transmute::<Node, [u8; 57]>;
109 }
110 }
110
111
111 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
112 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
112 ///
113 ///
113 /// This should only happen if Mercurial is buggy or a repository is corrupted.
114 /// This should only happen if Mercurial is buggy or a repository is corrupted.
114 #[derive(Debug)]
115 #[derive(Debug)]
115 pub struct DirstateV2ParseError;
116 pub struct DirstateV2ParseError;
116
117
117 impl From<DirstateV2ParseError> for HgError {
118 impl From<DirstateV2ParseError> for HgError {
118 fn from(_: DirstateV2ParseError) -> Self {
119 fn from(_: DirstateV2ParseError) -> Self {
119 HgError::corrupted("dirstate-v2 parse error")
120 HgError::corrupted("dirstate-v2 parse error")
120 }
121 }
121 }
122 }
122
123
123 impl From<DirstateV2ParseError> for crate::DirstateError {
124 impl From<DirstateV2ParseError> for crate::DirstateError {
124 fn from(error: DirstateV2ParseError) -> Self {
125 fn from(error: DirstateV2ParseError) -> Self {
125 HgError::from(error).into()
126 HgError::from(error).into()
126 }
127 }
127 }
128 }
128
129
129 pub(super) fn read<'on_disk>(
130 pub(super) fn read<'on_disk>(
130 on_disk: &'on_disk [u8],
131 on_disk: &'on_disk [u8],
131 ) -> Result<
132 ) -> Result<
132 (DirstateMap<'on_disk>, Option<DirstateParents>),
133 (DirstateMap<'on_disk>, Option<DirstateParents>),
133 DirstateV2ParseError,
134 DirstateV2ParseError,
134 > {
135 > {
135 if on_disk.is_empty() {
136 if on_disk.is_empty() {
136 return Ok((DirstateMap::empty(on_disk), None));
137 return Ok((DirstateMap::empty(on_disk), None));
137 }
138 }
138 let (header, _) =
139 let (header, _) =
139 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
140 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
140 let Header {
141 let Header {
141 marker,
142 marker,
142 parents,
143 parents,
143 root,
144 root,
144 nodes_with_entry_count,
145 nodes_with_entry_count,
145 nodes_with_copy_source_count,
146 nodes_with_copy_source_count,
146 } = header;
147 } = header;
147 if marker != V2_FORMAT_MARKER {
148 if marker != V2_FORMAT_MARKER {
148 return Err(DirstateV2ParseError);
149 return Err(DirstateV2ParseError);
149 }
150 }
150 let dirstate_map = DirstateMap {
151 let dirstate_map = DirstateMap {
151 on_disk,
152 on_disk,
152 root: read_nodes(on_disk, *root)?,
153 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
154 on_disk, *root,
155 )?),
153 nodes_with_entry_count: nodes_with_entry_count.get(),
156 nodes_with_entry_count: nodes_with_entry_count.get(),
154 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
157 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
155 };
158 };
156 let parents = Some(parents.clone());
159 let parents = Some(parents.clone());
157 Ok((dirstate_map, parents))
160 Ok((dirstate_map, parents))
158 }
161 }
159
162
160 impl Node {
163 impl Node {
161 pub(super) fn path<'on_disk>(
164 pub(super) fn full_path<'on_disk>(
162 &self,
165 &self,
163 on_disk: &'on_disk [u8],
166 on_disk: &'on_disk [u8],
164 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
167 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
165 let full_path = read_hg_path(on_disk, self.full_path)?;
168 read_hg_path(on_disk, self.full_path)
166 let base_name_start = usize::try_from(self.base_name_start.get())
169 }
167 // u32 -> usize, could only panic on a 16-bit CPU
170
168 .expect("dirstate-v2 base_name_start out of bounds");
171 pub(super) fn base_name_start<'on_disk>(
169 if base_name_start < full_path.len() {
172 &self,
170 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
173 ) -> Result<usize, DirstateV2ParseError> {
174 let start = self.base_name_start.get();
175 if start < self.full_path.len.get() {
176 let start = usize::try_from(start)
177 // u32 -> usize, could only panic on a 16-bit CPU
178 .expect("dirstate-v2 base_name_start out of bounds");
179 Ok(start)
171 } else {
180 } else {
172 Err(DirstateV2ParseError)
181 Err(DirstateV2ParseError)
173 }
182 }
174 }
183 }
175
184
185 pub(super) fn base_name<'on_disk>(
186 &self,
187 on_disk: &'on_disk [u8],
188 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
189 let full_path = self.full_path(on_disk)?;
190 let base_name_start = self.base_name_start()?;
191 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
192 }
193
194 pub(super) fn path<'on_disk>(
195 &self,
196 on_disk: &'on_disk [u8],
197 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
198 Ok(WithBasename::from_raw_parts(
199 Cow::Borrowed(self.full_path(on_disk)?),
200 self.base_name_start()?,
201 ))
202 }
203
204 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
205 self.copy_source.start.get() != 0
206 }
207
176 pub(super) fn copy_source<'on_disk>(
208 pub(super) fn copy_source<'on_disk>(
177 &self,
209 &self,
178 on_disk: &'on_disk [u8],
210 on_disk: &'on_disk [u8],
179 ) -> Result<Option<Cow<'on_disk, HgPath>>, DirstateV2ParseError> {
211 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
180 Ok(if self.copy_source.start.get() != 0 {
212 Ok(if self.has_copy_source() {
181 Some(read_hg_path(on_disk, self.copy_source)?)
213 Some(read_hg_path(on_disk, self.copy_source)?)
182 } else {
214 } else {
183 None
215 None
184 })
216 })
185 }
217 }
186
218
219 pub(super) fn has_entry(&self) -> bool {
220 self.entry.state != b'\0'
221 }
222
223 pub(super) fn state(
224 &self,
225 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
226 Ok(if self.has_entry() {
227 Some(
228 self.entry
229 .state
230 .try_into()
231 .map_err(|_| DirstateV2ParseError)?,
232 )
233 } else {
234 None
235 })
236 }
237
187 pub(super) fn entry(
238 pub(super) fn entry(
188 &self,
239 &self,
189 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
240 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
190 Ok(if self.entry.state != b'\0' {
241 Ok(self.state()?.map(|state| DirstateEntry {
191 Some(DirstateEntry {
242 state,
192 state: self
243 mode: self.entry.mode.get(),
193 .entry
244 mtime: self.entry.mtime.get(),
194 .state
245 size: self.entry.size.get(),
195 .try_into()
246 }))
196 .map_err(|_| DirstateV2ParseError)?,
247 }
197 mode: self.entry.mode.get(),
248
198 mtime: self.entry.mtime.get(),
249 pub(super) fn children<'on_disk>(
199 size: self.entry.size.get(),
250 &self,
200 })
251 on_disk: &'on_disk [u8],
201 } else {
252 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
202 None
253 read_slice::<Node>(on_disk, self.children)
203 })
204 }
254 }
205
255
206 pub(super) fn to_in_memory_node<'on_disk>(
256 pub(super) fn to_in_memory_node<'on_disk>(
207 &self,
257 &self,
208 on_disk: &'on_disk [u8],
258 on_disk: &'on_disk [u8],
209 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
259 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
210 Ok(dirstate_map::Node {
260 Ok(dirstate_map::Node {
211 children: read_nodes(on_disk, self.children)?,
261 children: dirstate_map::ChildNodes::OnDisk(
212 copy_source: self.copy_source(on_disk)?,
262 self.children(on_disk)?,
263 ),
264 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
213 entry: self.entry()?,
265 entry: self.entry()?,
214 tracked_descendants_count: self.tracked_descendants_count.get(),
266 tracked_descendants_count: self.tracked_descendants_count.get(),
215 })
267 })
216 }
268 }
217 }
269 }
218
270
219 fn read_nodes(
220 on_disk: &[u8],
221 slice: ChildNodes,
222 ) -> Result<dirstate_map::ChildNodes, DirstateV2ParseError> {
223 read_slice::<Node>(on_disk, slice)?
224 .iter()
225 .map(|node| {
226 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
227 })
228 .collect::<Result<_, _>>()
229 .map(dirstate_map::ChildNodes::InMemory)
230 }
231
232 fn read_hg_path(
271 fn read_hg_path(
233 on_disk: &[u8],
272 on_disk: &[u8],
234 slice: Slice,
273 slice: Slice,
235 ) -> Result<Cow<HgPath>, DirstateV2ParseError> {
274 ) -> Result<&HgPath, DirstateV2ParseError> {
236 let bytes = read_slice::<u8>(on_disk, slice)?;
275 let bytes = read_slice::<u8>(on_disk, slice)?;
237 Ok(Cow::Borrowed(HgPath::new(bytes)))
276 Ok(HgPath::new(bytes))
238 }
277 }
239
278
240 fn read_slice<T>(
279 fn read_slice<T>(
241 on_disk: &[u8],
280 on_disk: &[u8],
242 slice: Slice,
281 slice: Slice,
243 ) -> Result<&[T], DirstateV2ParseError>
282 ) -> Result<&[T], DirstateV2ParseError>
244 where
283 where
245 T: BytesCast,
284 T: BytesCast,
246 {
285 {
247 // Either `usize::MAX` would result in "out of bounds" error since a single
286 // Either `usize::MAX` would result in "out of bounds" error since a single
248 // `&[u8]` cannot occupy the entire addess space.
287 // `&[u8]` cannot occupy the entire addess space.
249 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
288 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
250 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
289 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
251 on_disk
290 on_disk
252 .get(start..)
291 .get(start..)
253 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
292 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
254 .map(|(slice, _rest)| slice)
293 .map(|(slice, _rest)| slice)
255 .ok_or_else(|| DirstateV2ParseError)
294 .ok_or_else(|| DirstateV2ParseError)
256 }
295 }
257
296
258 pub(super) fn write(
297 pub(super) fn write(
259 dirstate_map: &mut DirstateMap,
298 dirstate_map: &mut DirstateMap,
260 parents: DirstateParents,
299 parents: DirstateParents,
261 ) -> Result<Vec<u8>, DirstateError> {
300 ) -> Result<Vec<u8>, DirstateError> {
262 let header_len = std::mem::size_of::<Header>();
301 let header_len = std::mem::size_of::<Header>();
263
302
264 // This ignores the space for paths, and for nodes without an entry.
303 // This ignores the space for paths, and for nodes without an entry.
265 // TODO: better estimate? Skip the `Vec` and write to a file directly?
304 // TODO: better estimate? Skip the `Vec` and write to a file directly?
266 let size_guess = header_len
305 let size_guess = header_len
267 + std::mem::size_of::<Node>()
306 + std::mem::size_of::<Node>()
268 * dirstate_map.nodes_with_entry_count as usize;
307 * dirstate_map.nodes_with_entry_count as usize;
269 let mut out = Vec::with_capacity(size_guess);
308 let mut out = Vec::with_capacity(size_guess);
270
309
271 // Keep space for the header. We’ll fill it out at the end when we know the
310 // Keep space for the header. We’ll fill it out at the end when we know the
272 // actual offset for the root nodes.
311 // actual offset for the root nodes.
273 out.resize(header_len, 0_u8);
312 out.resize(header_len, 0_u8);
274
313
275 let root =
314 let root =
276 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
315 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
277
316
278 let header = Header {
317 let header = Header {
279 marker: *V2_FORMAT_MARKER,
318 marker: *V2_FORMAT_MARKER,
280 parents: parents,
319 parents: parents,
281 root,
320 root,
282 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
321 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
283 nodes_with_copy_source_count: dirstate_map
322 nodes_with_copy_source_count: dirstate_map
284 .nodes_with_copy_source_count
323 .nodes_with_copy_source_count
285 .into(),
324 .into(),
286 };
325 };
287 out[..header_len].copy_from_slice(header.as_bytes());
326 out[..header_len].copy_from_slice(header.as_bytes());
288 Ok(out)
327 Ok(out)
289 }
328 }
290
329
291 fn write_nodes(
330 fn write_nodes(
292 dirstate_map: &DirstateMap,
331 dirstate_map: &DirstateMap,
293 nodes: dirstate_map::ChildNodesRef,
332 nodes: dirstate_map::ChildNodesRef,
294 out: &mut Vec<u8>,
333 out: &mut Vec<u8>,
295 ) -> Result<ChildNodes, DirstateError> {
334 ) -> Result<ChildNodes, DirstateError> {
296 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
335 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
297 // order. Sort to enable binary search in the written file.
336 // order. Sort to enable binary search in the written file.
298 let nodes = nodes.sorted();
337 let nodes = nodes.sorted();
299
338
300 // First accumulate serialized nodes in a `Vec`
339 // First accumulate serialized nodes in a `Vec`
301 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
340 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
302 for node in nodes {
341 for node in nodes {
303 let children = node.children(dirstate_map.on_disk)?;
342 let children = write_nodes(
304 let children = write_nodes(dirstate_map, children, out)?;
343 dirstate_map,
344 node.children(dirstate_map.on_disk)?,
345 out,
346 )?;
305 let full_path = node.full_path(dirstate_map.on_disk)?;
347 let full_path = node.full_path(dirstate_map.on_disk)?;
306 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
348 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
307 let copy_source =
349 let copy_source =
308 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
350 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
309 write_slice::<u8>(source.as_bytes(), out)
351 write_slice::<u8>(source.as_bytes(), out)
310 } else {
352 } else {
311 Slice {
353 Slice {
312 start: 0.into(),
354 start: 0.into(),
313 len: 0.into(),
355 len: 0.into(),
314 }
356 }
315 };
357 };
316 on_disk_nodes.push(match node {
358 on_disk_nodes.push(match node {
317 NodeRef::InMemory(path, node) => Node {
359 NodeRef::InMemory(path, node) => Node {
318 children,
360 children,
319 copy_source,
361 copy_source,
320 full_path,
362 full_path,
321 base_name_start: u32::try_from(path.base_name_start())
363 base_name_start: u32::try_from(path.base_name_start())
322 // Could only panic for paths over 4 GiB
364 // Could only panic for paths over 4 GiB
323 .expect("dirstate-v2 offset overflow")
365 .expect("dirstate-v2 offset overflow")
324 .into(),
366 .into(),
325 tracked_descendants_count: node
367 tracked_descendants_count: node
326 .tracked_descendants_count
368 .tracked_descendants_count
327 .into(),
369 .into(),
328 entry: if let Some(entry) = &node.entry {
370 entry: if let Some(entry) = &node.entry {
329 OptEntry {
371 OptEntry {
330 state: entry.state.into(),
372 state: entry.state.into(),
331 mode: entry.mode.into(),
373 mode: entry.mode.into(),
332 mtime: entry.mtime.into(),
374 mtime: entry.mtime.into(),
333 size: entry.size.into(),
375 size: entry.size.into(),
334 }
376 }
335 } else {
377 } else {
336 OptEntry {
378 OptEntry {
337 state: b'\0',
379 state: b'\0',
338 mode: 0.into(),
380 mode: 0.into(),
339 mtime: 0.into(),
381 mtime: 0.into(),
340 size: 0.into(),
382 size: 0.into(),
341 }
383 }
342 },
384 },
343 },
385 },
386 NodeRef::OnDisk(node) => Node {
387 children,
388 copy_source,
389 full_path,
390 ..*node
391 },
344 })
392 })
345 }
393 }
346 // … so we can write them contiguously
394 // … so we can write them contiguously
347 Ok(write_slice::<Node>(&on_disk_nodes, out))
395 Ok(write_slice::<Node>(&on_disk_nodes, out))
348 }
396 }
349
397
350 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
398 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
351 where
399 where
352 T: BytesCast,
400 T: BytesCast,
353 {
401 {
354 let start = u64::try_from(out.len())
402 let start = u64::try_from(out.len())
355 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
403 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
356 .expect("dirstate-v2 offset overflow")
404 .expect("dirstate-v2 offset overflow")
357 .into();
405 .into();
358 let len = u32::try_from(slice.len())
406 let len = u32::try_from(slice.len())
359 // Could only panic for paths over 4 GiB or nodes with over 4 billions
407 // Could only panic for paths over 4 GiB or nodes with over 4 billions
360 // child nodes
408 // child nodes
361 .expect("dirstate-v2 offset overflow")
409 .expect("dirstate-v2 offset overflow")
362 .into();
410 .into();
363 out.extend(slice.as_bytes());
411 out.extend(slice.as_bytes());
364 Slice { start, len }
412 Slice { start, len }
365 }
413 }
General Comments 0
You need to be logged in to leave comments. Login now