##// END OF EJS Templates
dirstate-tree: Borrow copy source paths from the "on disk" bytes...
Simon Sapin -
r47895:9be61845 default
parent child Browse files
Show More
@@ -1,618 +1,620 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 use std::borrow::Cow;
3 4 use std::convert::TryInto;
4 5 use std::path::PathBuf;
5 6
6 7 use super::path_with_basename::WithBasename;
7 8 use crate::dirstate::parsers::clear_ambiguous_mtime;
8 9 use crate::dirstate::parsers::pack_entry;
9 10 use crate::dirstate::parsers::packed_entry_size;
10 11 use crate::dirstate::parsers::parse_dirstate_entries;
11 12 use crate::dirstate::parsers::Timestamp;
12 13 use crate::matchers::Matcher;
13 14 use crate::utils::hg_path::{HgPath, HgPathBuf};
14 15 use crate::CopyMapIter;
15 16 use crate::DirstateEntry;
16 17 use crate::DirstateError;
17 18 use crate::DirstateMapError;
18 19 use crate::DirstateParents;
19 20 use crate::DirstateStatus;
20 21 use crate::EntryState;
21 22 use crate::FastHashMap;
22 23 use crate::PatternFileWarning;
23 24 use crate::StateMapIter;
24 25 use crate::StatusError;
25 26 use crate::StatusOptions;
26 27
27 28 pub struct DirstateMap<'on_disk> {
28 29 /// Contents of the `.hg/dirstate` file
29 30 on_disk: &'on_disk [u8],
30 31
31 pub(super) root: ChildNodes,
32 pub(super) root: ChildNodes<'on_disk>,
32 33
33 34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
34 35 nodes_with_entry_count: usize,
35 36
36 37 /// Number of nodes anywhere in the tree that have
37 38 /// `.copy_source.is_some()`.
38 39 nodes_with_copy_source_count: usize,
39 40 }
40 41
41 42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
42 43 /// map key would also work: all paths in a given map have the same parent
43 44 /// path, so comparing full paths gives the same result as comparing base
44 45 /// names. However `BTreeMap` would waste time always re-comparing the same
45 46 /// string prefix.
46 pub(super) type ChildNodes = FastHashMap<WithBasename<HgPathBuf>, Node>;
47 pub(super) type ChildNodes<'on_disk> =
48 FastHashMap<WithBasename<HgPathBuf>, Node<'on_disk>>;
47 49
48 50 /// Represents a file or a directory
49 51 #[derive(Default)]
50 pub(super) struct Node {
52 pub(super) struct Node<'on_disk> {
51 53 /// `None` for directories
52 54 pub(super) entry: Option<DirstateEntry>,
53 55
54 pub(super) copy_source: Option<HgPathBuf>,
56 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
55 57
56 pub(super) children: ChildNodes,
58 pub(super) children: ChildNodes<'on_disk>,
57 59
58 60 /// How many (non-inclusive) descendants of this node are tracked files
59 61 tracked_descendants_count: usize,
60 62 }
61 63
62 impl Node {
64 impl Node<'_> {
63 65 pub(super) fn state(&self) -> Option<EntryState> {
64 66 self.entry.as_ref().map(|entry| entry.state)
65 67 }
66 68 }
67 69
68 70 /// `(full_path, entry, copy_source)`
69 type NodeDataMut<'a> = (
70 &'a HgPath,
71 &'a mut Option<DirstateEntry>,
72 &'a mut Option<HgPathBuf>,
71 type NodeDataMut<'tree, 'on_disk> = (
72 &'tree HgPath,
73 &'tree mut Option<DirstateEntry>,
74 &'tree mut Option<Cow<'on_disk, HgPath>>,
73 75 );
74 76
75 77 impl<'on_disk> DirstateMap<'on_disk> {
76 78 pub fn new(
77 79 on_disk: &'on_disk [u8],
78 80 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
79 81 let mut map = Self {
80 82 on_disk,
81 83 root: ChildNodes::default(),
82 84 nodes_with_entry_count: 0,
83 85 nodes_with_copy_source_count: 0,
84 86 };
85 87 let parents = map.read()?;
86 88 Ok((map, parents))
87 89 }
88 90
89 91 /// Should only be called in `new`
90 92 #[timed]
91 93 fn read(&mut self) -> Result<Option<DirstateParents>, DirstateError> {
92 94 if self.on_disk.is_empty() {
93 95 return Ok(None);
94 96 }
95 97
96 98 let parents = parse_dirstate_entries(
97 99 self.on_disk,
98 100 |path, entry, copy_source| {
99 101 let tracked = entry.state.is_tracked();
100 102 let node = Self::get_or_insert_node_tracing_ancestors(
101 103 &mut self.root,
102 104 path,
103 105 |ancestor| {
104 106 if tracked {
105 107 ancestor.tracked_descendants_count += 1
106 108 }
107 109 },
108 110 );
109 111 assert!(
110 112 node.entry.is_none(),
111 113 "duplicate dirstate entry in read"
112 114 );
113 115 assert!(
114 116 node.copy_source.is_none(),
115 117 "duplicate dirstate entry in read"
116 118 );
117 119 node.entry = Some(*entry);
118 node.copy_source = copy_source.map(HgPath::to_owned);
120 node.copy_source = copy_source.map(Cow::Borrowed);
119 121 self.nodes_with_entry_count += 1;
120 122 if copy_source.is_some() {
121 123 self.nodes_with_copy_source_count += 1
122 124 }
123 125 },
124 126 )?;
125 127
126 128 Ok(Some(parents.clone()))
127 129 }
128 130
129 131 fn get_node(&self, path: &HgPath) -> Option<&Node> {
130 132 let mut children = &self.root;
131 133 let mut components = path.components();
132 134 let mut component =
133 135 components.next().expect("expected at least one components");
134 136 loop {
135 137 let child = children.get(component)?;
136 138 if let Some(next_component) = components.next() {
137 139 component = next_component;
138 140 children = &child.children;
139 141 } else {
140 142 return Some(child);
141 143 }
142 144 }
143 145 }
144 146
145 147 /// Returns a mutable reference to the node at `path` if it exists
146 148 ///
147 149 /// This takes `root` instead of `&mut self` so that callers can mutate
148 150 /// other fields while the returned borrow is still valid
149 151 fn get_node_mut<'tree>(
150 root: &'tree mut ChildNodes,
152 root: &'tree mut ChildNodes<'on_disk>,
151 153 path: &HgPath,
152 ) -> Option<&'tree mut Node> {
154 ) -> Option<&'tree mut Node<'on_disk>> {
153 155 Self::get_node_mut_tracing_ancestors(root, path, |_| {})
154 156 }
155 157
156 158 /// Same as `get_node_mut`, and calls `each_ancestor` for each ancestor of
157 159 /// the node.
158 160 ///
159 161 /// Note that `each_ancestor` may be called (with what would be ancestors)
160 162 /// even if it turns out there is no node at `path`.
161 163 fn get_node_mut_tracing_ancestors<'tree>(
162 root: &'tree mut ChildNodes,
164 root: &'tree mut ChildNodes<'on_disk>,
163 165 path: &HgPath,
164 166 mut each_ancestor: impl FnMut(&mut Node),
165 ) -> Option<&'tree mut Node> {
167 ) -> Option<&'tree mut Node<'on_disk>> {
166 168 let mut children = root;
167 169 let mut components = path.components();
168 170 let mut component =
169 171 components.next().expect("expected at least one components");
170 172 loop {
171 173 let child = children.get_mut(component)?;
172 174 if let Some(next_component) = components.next() {
173 175 each_ancestor(child);
174 176 component = next_component;
175 177 children = &mut child.children;
176 178 } else {
177 179 return Some(child);
178 180 }
179 181 }
180 182 }
181 183
182 184 fn get_or_insert_node<'tree>(
183 root: &'tree mut ChildNodes,
185 root: &'tree mut ChildNodes<'on_disk>,
184 186 path: &HgPath,
185 ) -> &'tree mut Node {
187 ) -> &'tree mut Node<'on_disk> {
186 188 Self::get_or_insert_node_tracing_ancestors(root, path, |_| {})
187 189 }
188 190
189 191 fn get_or_insert_node_tracing_ancestors<'tree>(
190 root: &'tree mut ChildNodes,
192 root: &'tree mut ChildNodes<'on_disk>,
191 193 path: &HgPath,
192 194 mut each_ancestor: impl FnMut(&mut Node),
193 ) -> &'tree mut Node {
195 ) -> &'tree mut Node<'on_disk> {
194 196 let mut child_nodes = root;
195 197 let mut inclusive_ancestor_paths =
196 198 WithBasename::inclusive_ancestors_of(path);
197 199 let mut ancestor_path = inclusive_ancestor_paths
198 200 .next()
199 201 .expect("expected at least one inclusive ancestor");
200 202 loop {
201 203 // TODO: can we avoid allocating an owned key in cases where the
202 204 // map already contains that key, without introducing double
203 205 // lookup?
204 206 let child_node =
205 207 child_nodes.entry(ancestor_path.to_owned()).or_default();
206 208 if let Some(next) = inclusive_ancestor_paths.next() {
207 209 each_ancestor(child_node);
208 210 ancestor_path = next;
209 211 child_nodes = &mut child_node.children;
210 212 } else {
211 213 return child_node;
212 214 }
213 215 }
214 216 }
215 217
216 218 fn add_or_remove_file(
217 219 &mut self,
218 220 path: &HgPath,
219 221 old_state: EntryState,
220 222 new_entry: DirstateEntry,
221 223 ) {
222 224 let tracked_count_increment =
223 225 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
224 226 (false, true) => 1,
225 227 (true, false) => -1,
226 228 _ => 0,
227 229 };
228 230
229 231 let node = Self::get_or_insert_node_tracing_ancestors(
230 232 &mut self.root,
231 233 path,
232 234 |ancestor| {
233 235 // We can’t use `+= increment` because the counter is unsigned,
234 236 // and we want debug builds to detect accidental underflow
235 237 // through zero
236 238 match tracked_count_increment {
237 239 1 => ancestor.tracked_descendants_count += 1,
238 240 -1 => ancestor.tracked_descendants_count -= 1,
239 241 _ => {}
240 242 }
241 243 },
242 244 );
243 245 if node.entry.is_none() {
244 246 self.nodes_with_entry_count += 1
245 247 }
246 248 node.entry = Some(new_entry)
247 249 }
248 250
249 251 fn iter_nodes<'a>(
250 252 &'a self,
251 253 ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a {
252 254 // Depth first tree traversal.
253 255 //
254 256 // If we could afford internal iteration and recursion,
255 257 // this would look like:
256 258 //
257 259 // ```
258 260 // fn traverse_children(
259 261 // children: &ChildNodes,
260 262 // each: &mut impl FnMut(&Node),
261 263 // ) {
262 264 // for child in children.values() {
263 265 // traverse_children(&child.children, each);
264 266 // each(child);
265 267 // }
266 268 // }
267 269 // ```
268 270 //
269 271 // However we want an external iterator and therefore can’t use the
270 272 // call stack. Use an explicit stack instead:
271 273 let mut stack = Vec::new();
272 274 let mut iter = self.root.iter();
273 275 std::iter::from_fn(move || {
274 276 while let Some((key, child_node)) = iter.next() {
275 277 // Pseudo-recursion
276 278 let new_iter = child_node.children.iter();
277 279 let old_iter = std::mem::replace(&mut iter, new_iter);
278 280 let key = &**key.full_path();
279 281 stack.push((key, child_node, old_iter));
280 282 }
281 283 // Found the end of a `children.iter()` iterator.
282 284 if let Some((key, child_node, next_iter)) = stack.pop() {
283 285 // "Return" from pseudo-recursion by restoring state from the
284 286 // explicit stack
285 287 iter = next_iter;
286 288
287 289 Some((key, child_node))
288 290 } else {
289 291 // Reached the bottom of the stack, we’re done
290 292 None
291 293 }
292 294 })
293 295 }
294 296
295 297 /// Mutable iterator for the `(entry, copy source)` of each node.
296 298 ///
297 299 /// It would not be safe to yield mutable references to nodes themeselves
298 300 /// with `-> impl Iterator<Item = &mut Node>` since child nodes are
299 301 /// reachable from their ancestor nodes, potentially creating multiple
300 302 /// `&mut` references to a given node.
301 fn iter_node_data_mut<'a>(
302 &'a mut self,
303 ) -> impl Iterator<Item = NodeDataMut<'a>> + 'a {
303 fn iter_node_data_mut<'tree>(
304 &'tree mut self,
305 ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree {
304 306 // Explict stack for pseudo-recursion, see `iter_nodes` above.
305 307 let mut stack = Vec::new();
306 308 let mut iter = self.root.iter_mut();
307 309 std::iter::from_fn(move || {
308 310 while let Some((key, child_node)) = iter.next() {
309 311 // Pseudo-recursion
310 312 let data = (
311 313 &**key.full_path(),
312 314 &mut child_node.entry,
313 315 &mut child_node.copy_source,
314 316 );
315 317 let new_iter = child_node.children.iter_mut();
316 318 let old_iter = std::mem::replace(&mut iter, new_iter);
317 319 stack.push((data, old_iter));
318 320 }
319 321 // Found the end of a `children.values_mut()` iterator.
320 322 if let Some((data, next_iter)) = stack.pop() {
321 323 // "Return" from pseudo-recursion by restoring state from the
322 324 // explicit stack
323 325 iter = next_iter;
324 326
325 327 Some(data)
326 328 } else {
327 329 // Reached the bottom of the stack, we’re done
328 330 None
329 331 }
330 332 })
331 333 }
332 334 }
333 335
334 336 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
335 337 fn clear(&mut self) {
336 338 self.root.clear();
337 339 self.nodes_with_entry_count = 0;
338 340 self.nodes_with_copy_source_count = 0;
339 341 }
340 342
341 343 fn add_file(
342 344 &mut self,
343 345 filename: &HgPath,
344 346 old_state: EntryState,
345 347 entry: DirstateEntry,
346 348 ) -> Result<(), DirstateMapError> {
347 349 self.add_or_remove_file(filename, old_state, entry);
348 350 Ok(())
349 351 }
350 352
351 353 fn remove_file(
352 354 &mut self,
353 355 filename: &HgPath,
354 356 old_state: EntryState,
355 357 size: i32,
356 358 ) -> Result<(), DirstateMapError> {
357 359 let entry = DirstateEntry {
358 360 state: EntryState::Removed,
359 361 mode: 0,
360 362 size,
361 363 mtime: 0,
362 364 };
363 365 self.add_or_remove_file(filename, old_state, entry);
364 366 Ok(())
365 367 }
366 368
367 369 fn drop_file(
368 370 &mut self,
369 371 filename: &HgPath,
370 372 old_state: EntryState,
371 373 ) -> Result<bool, DirstateMapError> {
372 374 let was_tracked = old_state.is_tracked();
373 375 if let Some(node) = Self::get_node_mut_tracing_ancestors(
374 376 &mut self.root,
375 377 filename,
376 378 |ancestor| {
377 379 if was_tracked {
378 380 ancestor.tracked_descendants_count -= 1
379 381 }
380 382 },
381 383 ) {
382 384 let had_entry = node.entry.is_some();
383 385 let had_copy_source = node.copy_source.is_some();
384 386
385 387 // TODO: this leaves in the tree a "non-file" node. Should we
386 388 // remove the node instead, together with ancestor nodes for
387 389 // directories that become empty?
388 390 node.entry = None;
389 391 node.copy_source = None;
390 392
391 393 if had_entry {
392 394 self.nodes_with_entry_count -= 1
393 395 }
394 396 if had_copy_source {
395 397 self.nodes_with_copy_source_count -= 1
396 398 }
397 399 Ok(had_entry)
398 400 } else {
399 401 assert!(!was_tracked);
400 402 Ok(false)
401 403 }
402 404 }
403 405
404 406 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
405 407 for filename in filenames {
406 408 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
407 409 if let Some(entry) = node.entry.as_mut() {
408 410 clear_ambiguous_mtime(entry, now);
409 411 }
410 412 }
411 413 }
412 414 }
413 415
414 416 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
415 417 self.get_node(key)
416 418 .and_then(|node| node.entry.as_ref())
417 419 .map_or(false, DirstateEntry::is_non_normal)
418 420 }
419 421
420 422 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
421 423 // Do nothing, this `DirstateMap` does not have a separate "non normal
422 424 // entries" set that need to be kept up to date
423 425 }
424 426
425 427 fn non_normal_or_other_parent_paths(
426 428 &mut self,
427 429 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
428 430 Box::new(self.iter_nodes().filter_map(|(path, node)| {
429 431 node.entry
430 432 .as_ref()
431 433 .filter(|entry| {
432 434 entry.is_non_normal() || entry.is_from_other_parent()
433 435 })
434 436 .map(|_| path)
435 437 }))
436 438 }
437 439
438 440 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
439 441 // Do nothing, this `DirstateMap` does not have a separate "non normal
440 442 // entries" and "from other parent" sets that need to be recomputed
441 443 }
442 444
443 445 fn iter_non_normal_paths(
444 446 &mut self,
445 447 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
446 448 self.iter_non_normal_paths_panic()
447 449 }
448 450
449 451 fn iter_non_normal_paths_panic(
450 452 &self,
451 453 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
452 454 Box::new(self.iter_nodes().filter_map(|(path, node)| {
453 455 node.entry
454 456 .as_ref()
455 457 .filter(|entry| entry.is_non_normal())
456 458 .map(|_| path)
457 459 }))
458 460 }
459 461
460 462 fn iter_other_parent_paths(
461 463 &mut self,
462 464 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
463 465 Box::new(self.iter_nodes().filter_map(|(path, node)| {
464 466 node.entry
465 467 .as_ref()
466 468 .filter(|entry| entry.is_from_other_parent())
467 469 .map(|_| path)
468 470 }))
469 471 }
470 472
471 473 fn has_tracked_dir(
472 474 &mut self,
473 475 directory: &HgPath,
474 476 ) -> Result<bool, DirstateMapError> {
475 477 if let Some(node) = self.get_node(directory) {
476 478 // A node without a `DirstateEntry` was created to hold child
477 479 // nodes, and is therefore a directory.
478 480 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
479 481 } else {
480 482 Ok(false)
481 483 }
482 484 }
483 485
484 486 fn has_dir(
485 487 &mut self,
486 488 directory: &HgPath,
487 489 ) -> Result<bool, DirstateMapError> {
488 490 if let Some(node) = self.get_node(directory) {
489 491 // A node without a `DirstateEntry` was created to hold child
490 492 // nodes, and is therefore a directory.
491 493 Ok(node.entry.is_none())
492 494 } else {
493 495 Ok(false)
494 496 }
495 497 }
496 498
497 499 fn pack(
498 500 &mut self,
499 501 parents: DirstateParents,
500 502 now: Timestamp,
501 503 ) -> Result<Vec<u8>, DirstateError> {
502 504 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
503 505 // reallocations
504 506 let mut size = parents.as_bytes().len();
505 507 for (path, node) in self.iter_nodes() {
506 508 if node.entry.is_some() {
507 509 size += packed_entry_size(
508 510 path,
509 511 node.copy_source.as_ref().map(|p| &**p),
510 512 )
511 513 }
512 514 }
513 515
514 516 let mut packed = Vec::with_capacity(size);
515 517 packed.extend(parents.as_bytes());
516 518
517 519 let now: i32 = now.0.try_into().expect("time overflow");
518 520 for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
519 521 if let Some(entry) = opt_entry {
520 522 clear_ambiguous_mtime(entry, now);
521 523 pack_entry(
522 524 path,
523 525 entry,
524 526 copy_source.as_ref().map(|p| &**p),
525 527 &mut packed,
526 528 );
527 529 }
528 530 }
529 531 Ok(packed)
530 532 }
531 533
532 534 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
533 535 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
534 536 // needs to be recomputed
535 537 Ok(())
536 538 }
537 539
538 540 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
539 541 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
540 542 // to be recomputed
541 543 Ok(())
542 544 }
543 545
544 546 fn status<'a>(
545 547 &'a mut self,
546 548 matcher: &'a (dyn Matcher + Sync),
547 549 root_dir: PathBuf,
548 550 ignore_files: Vec<PathBuf>,
549 551 options: StatusOptions,
550 552 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
551 553 {
552 554 super::status::status(self, matcher, root_dir, ignore_files, options)
553 555 }
554 556
555 557 fn copy_map_len(&self) -> usize {
556 558 self.nodes_with_copy_source_count
557 559 }
558 560
559 561 fn copy_map_iter(&self) -> CopyMapIter<'_> {
560 562 Box::new(self.iter_nodes().filter_map(|(path, node)| {
561 563 node.copy_source
562 564 .as_ref()
563 565 .map(|copy_source| (path, &**copy_source))
564 566 }))
565 567 }
566 568
567 569 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
568 570 if let Some(node) = self.get_node(key) {
569 571 node.copy_source.is_some()
570 572 } else {
571 573 false
572 574 }
573 575 }
574 576
575 577 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
576 578 self.get_node(key)?.copy_source.as_ref().map(|p| &**p)
577 579 }
578 580
579 581 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
580 582 let count = &mut self.nodes_with_copy_source_count;
581 583 Self::get_node_mut(&mut self.root, key).and_then(|node| {
582 584 if node.copy_source.is_some() {
583 585 *count -= 1
584 586 }
585 node.copy_source.take()
587 node.copy_source.take().map(Cow::into_owned)
586 588 })
587 589 }
588 590
589 591 fn copy_map_insert(
590 592 &mut self,
591 593 key: HgPathBuf,
592 594 value: HgPathBuf,
593 595 ) -> Option<HgPathBuf> {
594 596 let node = Self::get_or_insert_node(&mut self.root, &key);
595 597 if node.copy_source.is_none() {
596 598 self.nodes_with_copy_source_count += 1
597 599 }
598 node.copy_source.replace(value)
600 node.copy_source.replace(value.into()).map(Cow::into_owned)
599 601 }
600 602
601 603 fn len(&self) -> usize {
602 604 self.nodes_with_entry_count
603 605 }
604 606
605 607 fn contains_key(&self, key: &HgPath) -> bool {
606 608 self.get(key).is_some()
607 609 }
608 610
609 611 fn get(&self, key: &HgPath) -> Option<&DirstateEntry> {
610 612 self.get_node(key)?.entry.as_ref()
611 613 }
612 614
613 615 fn iter(&self) -> StateMapIter<'_> {
614 616 Box::new(self.iter_nodes().filter_map(|(path, node)| {
615 617 node.entry.as_ref().map(|entry| (path, entry))
616 618 }))
617 619 }
618 620 }
General Comments 0
You need to be logged in to leave comments. Login now