Show More
@@ -3,7 +3,6 b' use crate::utils::hg_path::HgPathBuf;' | |||
|
3 | 3 | use crate::Revision; |
|
4 | 4 | use crate::NULL_REVISION; |
|
5 | 5 | |
|
6 | use im_rc::ordmap::DiffItem; | |
|
7 | 6 | use im_rc::ordmap::Entry; |
|
8 | 7 | use im_rc::ordmap::OrdMap; |
|
9 | 8 | use im_rc::OrdSet; |
@@ -624,210 +623,40 b' fn add_one_copy(' | |||
|
624 | 623 | fn merge_copies_dict( |
|
625 | 624 | path_map: &TwoWayPathMap, |
|
626 | 625 | current_merge: Revision, |
|
627 |
|
|
|
628 |
|
|
|
626 | minor: InternalPathCopies, | |
|
627 | major: InternalPathCopies, | |
|
629 | 628 | changes: &ChangedFiles, |
|
630 | 629 | ) -> InternalPathCopies { |
|
631 | // This closure exist as temporary help while multiple developper are | |
|
632 | // actively working on this code. Feel free to re-inline it once this | |
|
633 | // code is more settled. | |
|
634 | let cmp_value = | |
|
635 | |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| { | |
|
636 |
c |
|
|
637 |
|
|
|
630 | use crate::utils::{ordmap_union_with_merge, MergeResult}; | |
|
631 | ||
|
632 | ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| { | |
|
633 | let (pick, overwrite) = compare_value( | |
|
634 | path_map, | |
|
635 | current_merge, | |
|
636 | changes, | |
|
637 | dest, | |
|
638 | src_minor, | |
|
639 | src_major, | |
|
640 | ); | |
|
641 | if overwrite { | |
|
642 | let (winner, loser) = match pick { | |
|
643 | MergePick::Major | MergePick::Any => (src_major, src_minor), | |
|
644 | MergePick::Minor => (src_minor, src_major), | |
|
645 | }; | |
|
646 | MergeResult::UseNewValue(CopySource::new_from_merge( | |
|
638 | 647 | current_merge, |
|
639 |
|
|
|
640 |
|
|
|
641 |
|
|
|
642 | src_major, | |
|
643 |
|
|
|
644 | }; | |
|
645 | if minor.is_empty() { | |
|
646 | major | |
|
647 | } else if major.is_empty() { | |
|
648 | minor | |
|
649 | } else if minor.len() * 2 < major.len() { | |
|
650 | // Lets says we are merging two InternalPathCopies instance A and B. | |
|
651 | // | |
|
652 | // If A contains N items, the merge result will never contains more | |
|
653 | // than N values differents than the one in A | |
|
654 | // | |
|
655 | // If B contains M items, with M > N, the merge result will always | |
|
656 | // result in a minimum of M - N value differents than the on in | |
|
657 | // A | |
|
658 | // | |
|
659 | // As a result, if N < (M-N), we know that simply iterating over A will | |
|
660 | // yield less difference than iterating over the difference | |
|
661 | // between A and B. | |
|
662 | // | |
|
663 | // This help performance a lot in case were a tiny | |
|
664 | // InternalPathCopies is merged with a much larger one. | |
|
665 | for (dest, src_minor) in minor { | |
|
666 | let src_major = major.get(&dest); | |
|
667 | match src_major { | |
|
668 | None => { | |
|
669 | major.insert(dest, src_minor); | |
|
670 | } | |
|
671 | Some(src_major) => { | |
|
672 | let (pick, overwrite) = | |
|
673 | cmp_value(&dest, &src_minor, src_major); | |
|
674 | if overwrite { | |
|
675 | let src = match pick { | |
|
676 | MergePick::Major => CopySource::new_from_merge( | |
|
677 | current_merge, | |
|
678 | src_major, | |
|
679 | &src_minor, | |
|
680 | ), | |
|
681 | MergePick::Minor => CopySource::new_from_merge( | |
|
682 | current_merge, | |
|
683 | &src_minor, | |
|
684 | src_major, | |
|
685 | ), | |
|
686 | MergePick::Any => CopySource::new_from_merge( | |
|
687 | current_merge, | |
|
688 | src_major, | |
|
689 | &src_minor, | |
|
690 | ), | |
|
691 | }; | |
|
692 | major.insert(dest, src); | |
|
693 | } else { | |
|
694 | match pick { | |
|
695 | MergePick::Any | MergePick::Major => None, | |
|
696 | MergePick::Minor => major.insert(dest, src_minor), | |
|
697 | }; | |
|
698 | } | |
|
699 | } | |
|
700 | }; | |
|
701 | } | |
|
702 | major | |
|
703 | } else if major.len() * 2 < minor.len() { | |
|
704 | // This use the same rational than the previous block. | |
|
705 | // (Check previous block documentation for details.) | |
|
706 | for (dest, src_major) in major { | |
|
707 | let src_minor = minor.get(&dest); | |
|
708 | match src_minor { | |
|
709 | None => { | |
|
710 | minor.insert(dest, src_major); | |
|
648 | winner, | |
|
649 | loser, | |
|
650 | )) | |
|
651 | } else { | |
|
652 | match pick { | |
|
653 | MergePick::Any | MergePick::Major => { | |
|
654 | MergeResult::UseRightValue | |
|
711 | 655 | } |
|
712 | Some(src_minor) => { | |
|
713 | let (pick, overwrite) = | |
|
714 | cmp_value(&dest, src_minor, &src_major); | |
|
715 | if overwrite { | |
|
716 | let src = match pick { | |
|
717 | MergePick::Major => CopySource::new_from_merge( | |
|
718 | current_merge, | |
|
719 | &src_major, | |
|
720 | src_minor, | |
|
721 | ), | |
|
722 | MergePick::Minor => CopySource::new_from_merge( | |
|
723 | current_merge, | |
|
724 | src_minor, | |
|
725 | &src_major, | |
|
726 | ), | |
|
727 | MergePick::Any => CopySource::new_from_merge( | |
|
728 | current_merge, | |
|
729 | &src_major, | |
|
730 | src_minor, | |
|
731 | ), | |
|
732 | }; | |
|
733 | minor.insert(dest, src); | |
|
734 | } else { | |
|
735 | match pick { | |
|
736 | MergePick::Any | MergePick::Minor => None, | |
|
737 | MergePick::Major => minor.insert(dest, src_major), | |
|
738 | }; | |
|
739 | } | |
|
740 | } | |
|
741 | }; | |
|
742 | } | |
|
743 | minor | |
|
744 | } else { | |
|
745 | let mut override_minor = Vec::new(); | |
|
746 | let mut override_major = Vec::new(); | |
|
747 | ||
|
748 | let mut to_major = |k: &PathToken, v: &CopySource| { | |
|
749 | override_major.push((k.clone(), v.clone())) | |
|
750 | }; | |
|
751 | let mut to_minor = |k: &PathToken, v: &CopySource| { | |
|
752 | override_minor.push((k.clone(), v.clone())) | |
|
753 | }; | |
|
754 | ||
|
755 | // The diff function leverage detection of the identical subpart if | |
|
756 | // minor and major has some common ancestors. This make it very | |
|
757 | // fast is most case. | |
|
758 | // | |
|
759 | // In case where the two map are vastly different in size, the current | |
|
760 | // approach is still slowish because the iteration will iterate over | |
|
761 | // all the "exclusive" content of the larger on. This situation can be | |
|
762 | // frequent when the subgraph of revision we are processing has a lot | |
|
763 | // of roots. Each roots adding they own fully new map to the mix (and | |
|
764 | // likely a small map, if the path from the root to the "main path" is | |
|
765 | // small. | |
|
766 | // | |
|
767 | // We could do better by detecting such situation and processing them | |
|
768 | // differently. | |
|
769 | for d in minor.diff(&major) { | |
|
770 | match d { | |
|
771 | DiffItem::Add(k, v) => to_minor(k, v), | |
|
772 | DiffItem::Remove(k, v) => to_major(k, v), | |
|
773 | DiffItem::Update { old, new } => { | |
|
774 | let (dest, src_major) = new; | |
|
775 | let (_, src_minor) = old; | |
|
776 | let (pick, overwrite) = | |
|
777 | cmp_value(dest, src_minor, src_major); | |
|
778 | if overwrite { | |
|
779 | let src = match pick { | |
|
780 | MergePick::Major => CopySource::new_from_merge( | |
|
781 | current_merge, | |
|
782 | src_major, | |
|
783 | src_minor, | |
|
784 | ), | |
|
785 | MergePick::Minor => CopySource::new_from_merge( | |
|
786 | current_merge, | |
|
787 | src_minor, | |
|
788 | src_major, | |
|
789 | ), | |
|
790 | MergePick::Any => CopySource::new_from_merge( | |
|
791 | current_merge, | |
|
792 | src_major, | |
|
793 | src_minor, | |
|
794 | ), | |
|
795 | }; | |
|
796 | to_minor(dest, &src); | |
|
797 | to_major(dest, &src); | |
|
798 | } else { | |
|
799 | match pick { | |
|
800 | MergePick::Major => to_minor(dest, src_major), | |
|
801 | MergePick::Minor => to_major(dest, src_minor), | |
|
802 | // If the two entry are identical, no need to do | |
|
803 | // anything (but diff should not have yield them) | |
|
804 | MergePick::Any => unreachable!(), | |
|
805 | } | |
|
806 | } | |
|
807 | } | |
|
808 | }; | |
|
809 | } | |
|
810 | ||
|
811 | let updates; | |
|
812 | let mut result; | |
|
813 | if override_major.is_empty() { | |
|
814 | result = major | |
|
815 | } else if override_minor.is_empty() { | |
|
816 | result = minor | |
|
817 | } else { | |
|
818 | if override_minor.len() < override_major.len() { | |
|
819 | updates = override_minor; | |
|
820 | result = minor; | |
|
821 | } else { | |
|
822 | updates = override_major; | |
|
823 | result = major; | |
|
824 | } | |
|
825 | for (k, v) in updates { | |
|
826 | result.insert(k, v); | |
|
656 | MergePick::Minor => MergeResult::UseLeftValue, | |
|
827 | 657 | } |
|
828 | 658 | } |
|
829 | result | |
|
830 | } | |
|
659 | }) | |
|
831 | 660 | } |
|
832 | 661 | |
|
833 | 662 | /// represent the side that should prevail when merging two |
@@ -9,6 +9,8 b'' | |||
|
9 | 9 | |
|
10 | 10 | use crate::errors::{HgError, IoErrorContext}; |
|
11 | 11 | use crate::utils::hg_path::HgPath; |
|
12 | use im_rc::ordmap::DiffItem; | |
|
13 | use im_rc::ordmap::OrdMap; | |
|
12 | 14 | use std::{io::Write, ops::Deref}; |
|
13 | 15 | |
|
14 | 16 | pub mod files; |
@@ -199,3 +201,151 b' pub fn current_exe() -> Result<std::path' | |||
|
199 | 201 | context: IoErrorContext::CurrentExe, |
|
200 | 202 | }) |
|
201 | 203 | } |
|
204 | ||
|
205 | pub(crate) enum MergeResult<V> { | |
|
206 | UseLeftValue, | |
|
207 | UseRightValue, | |
|
208 | UseNewValue(V), | |
|
209 | } | |
|
210 | ||
|
211 | /// Return the union of the two given maps, | |
|
212 | /// calling `merge(key, left_value, right_value)` to resolve keys that exist in | |
|
213 | /// both. | |
|
214 | /// | |
|
215 | /// CC https://github.com/bodil/im-rs/issues/166 | |
|
216 | pub(crate) fn ordmap_union_with_merge<K, V>( | |
|
217 | left: OrdMap<K, V>, | |
|
218 | right: OrdMap<K, V>, | |
|
219 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |
|
220 | ) -> OrdMap<K, V> | |
|
221 | where | |
|
222 | K: Clone + Ord, | |
|
223 | V: Clone + PartialEq, | |
|
224 | { | |
|
225 | if left.ptr_eq(&right) { | |
|
226 | // One of the two maps is an unmodified clone of the other | |
|
227 | left | |
|
228 | } else if left.len() / 2 > right.len() { | |
|
229 | // When two maps have different sizes, | |
|
230 | // their size difference is a lower bound on | |
|
231 | // how many keys of the larger map are not also in the smaller map. | |
|
232 | // This in turn is a lower bound on the number of differences in | |
|
233 | // `OrdMap::diff` and the "amount of work" that would be done | |
|
234 | // by `ordmap_union_with_merge_by_diff`. | |
|
235 | // | |
|
236 | // Here `left` is more than twice the size of `right`, | |
|
237 | // so the number of differences is more than the total size of | |
|
238 | // `right`. Therefore an algorithm based on iterating `right` | |
|
239 | // is more efficient. | |
|
240 | // | |
|
241 | // This helps a lot when a tiny (or empty) map is merged | |
|
242 | // with a large one. | |
|
243 | ordmap_union_with_merge_by_iter(left, right, merge) | |
|
244 | } else if left.len() < right.len() / 2 { | |
|
245 | // Same as above but with `left` and `right` swapped | |
|
246 | ordmap_union_with_merge_by_iter(right, left, |key, a, b| { | |
|
247 | // Also swapped in `merge` arguments: | |
|
248 | match merge(key, b, a) { | |
|
249 | MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v), | |
|
250 | // … and swap back in `merge` result: | |
|
251 | MergeResult::UseLeftValue => MergeResult::UseRightValue, | |
|
252 | MergeResult::UseRightValue => MergeResult::UseLeftValue, | |
|
253 | } | |
|
254 | }) | |
|
255 | } else { | |
|
256 | // For maps of similar size, use the algorithm based on `OrdMap::diff` | |
|
257 | ordmap_union_with_merge_by_diff(left, right, merge) | |
|
258 | } | |
|
259 | } | |
|
260 | ||
|
261 | /// Efficient if `right` is much smaller than `left` | |
|
262 | fn ordmap_union_with_merge_by_iter<K, V>( | |
|
263 | mut left: OrdMap<K, V>, | |
|
264 | right: OrdMap<K, V>, | |
|
265 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |
|
266 | ) -> OrdMap<K, V> | |
|
267 | where | |
|
268 | K: Clone + Ord, | |
|
269 | V: Clone, | |
|
270 | { | |
|
271 | for (key, right_value) in right { | |
|
272 | match left.get(&key) { | |
|
273 | None => { | |
|
274 | left.insert(key, right_value); | |
|
275 | } | |
|
276 | Some(left_value) => match merge(&key, left_value, &right_value) { | |
|
277 | MergeResult::UseLeftValue => {} | |
|
278 | MergeResult::UseRightValue => { | |
|
279 | left.insert(key, right_value); | |
|
280 | } | |
|
281 | MergeResult::UseNewValue(new_value) => { | |
|
282 | left.insert(key, new_value); | |
|
283 | } | |
|
284 | }, | |
|
285 | } | |
|
286 | } | |
|
287 | left | |
|
288 | } | |
|
289 | ||
|
290 | /// Fallback when both maps are of similar size | |
|
291 | fn ordmap_union_with_merge_by_diff<K, V>( | |
|
292 | mut left: OrdMap<K, V>, | |
|
293 | mut right: OrdMap<K, V>, | |
|
294 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |
|
295 | ) -> OrdMap<K, V> | |
|
296 | where | |
|
297 | K: Clone + Ord, | |
|
298 | V: Clone + PartialEq, | |
|
299 | { | |
|
300 | // (key, value) pairs that would need to be inserted in either map | |
|
301 | // in order to turn it into the union. | |
|
302 | // | |
|
303 | // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted, | |
|
304 | // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>` | |
|
305 | // with `left_updates` only borrowing from `right` and `right_updates` from | |
|
306 | // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`. | |
|
307 | // | |
|
308 | // This would allow moving all `.clone()` calls to after we’ve decided | |
|
309 | // which of `right_updates` or `left_updates` to use | |
|
310 | // (value ones becoming `Cow::into_owned`), | |
|
311 | // and avoid making clones we don’t end up using. | |
|
312 | let mut left_updates = Vec::new(); | |
|
313 | let mut right_updates = Vec::new(); | |
|
314 | ||
|
315 | for difference in left.diff(&right) { | |
|
316 | match difference { | |
|
317 | DiffItem::Add(key, value) => { | |
|
318 | left_updates.push((key.clone(), value.clone())) | |
|
319 | } | |
|
320 | DiffItem::Remove(key, value) => { | |
|
321 | right_updates.push((key.clone(), value.clone())) | |
|
322 | } | |
|
323 | DiffItem::Update { | |
|
324 | old: (key, left_value), | |
|
325 | new: (_, right_value), | |
|
326 | } => match merge(key, left_value, right_value) { | |
|
327 | MergeResult::UseLeftValue => { | |
|
328 | right_updates.push((key.clone(), left_value.clone())) | |
|
329 | } | |
|
330 | MergeResult::UseRightValue => { | |
|
331 | left_updates.push((key.clone(), right_value.clone())) | |
|
332 | } | |
|
333 | MergeResult::UseNewValue(new_value) => { | |
|
334 | left_updates.push((key.clone(), new_value.clone())); | |
|
335 | right_updates.push((key.clone(), new_value)) | |
|
336 | } | |
|
337 | }, | |
|
338 | } | |
|
339 | } | |
|
340 | if left_updates.len() < right_updates.len() { | |
|
341 | for (key, value) in left_updates { | |
|
342 | left.insert(key, value); | |
|
343 | } | |
|
344 | left | |
|
345 | } else { | |
|
346 | for (key, value) in right_updates { | |
|
347 | right.insert(key, value); | |
|
348 | } | |
|
349 | right | |
|
350 | } | |
|
351 | } |
General Comments 0
You need to be logged in to leave comments.
Login now