##// END OF EJS Templates
copies-rust: extract generic map merge logic from merge_copies_dict...
Simon Sapin -
r47328:435d9fc7 default
parent child Browse files
Show More
@@ -3,7 +3,6 b' use crate::utils::hg_path::HgPathBuf;'
3 use crate::Revision;
3 use crate::Revision;
4 use crate::NULL_REVISION;
4 use crate::NULL_REVISION;
5
5
6 use im_rc::ordmap::DiffItem;
7 use im_rc::ordmap::Entry;
6 use im_rc::ordmap::Entry;
8 use im_rc::ordmap::OrdMap;
7 use im_rc::ordmap::OrdMap;
9 use im_rc::OrdSet;
8 use im_rc::OrdSet;
@@ -624,210 +623,40 b' fn add_one_copy('
624 fn merge_copies_dict(
623 fn merge_copies_dict(
625 path_map: &TwoWayPathMap,
624 path_map: &TwoWayPathMap,
626 current_merge: Revision,
625 current_merge: Revision,
627 mut minor: InternalPathCopies,
626 minor: InternalPathCopies,
628 mut major: InternalPathCopies,
627 major: InternalPathCopies,
629 changes: &ChangedFiles,
628 changes: &ChangedFiles,
630 ) -> InternalPathCopies {
629 ) -> InternalPathCopies {
631 // This closure exist as temporary help while multiple developper are
630 use crate::utils::{ordmap_union_with_merge, MergeResult};
632 // actively working on this code. Feel free to re-inline it once this
631
633 // code is more settled.
632 ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| {
634 let cmp_value =
633 let (pick, overwrite) = compare_value(
635 |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
634 path_map,
636 compare_value(
635 current_merge,
637 path_map,
636 changes,
637 dest,
638 src_minor,
639 src_major,
640 );
641 if overwrite {
642 let (winner, loser) = match pick {
643 MergePick::Major | MergePick::Any => (src_major, src_minor),
644 MergePick::Minor => (src_minor, src_major),
645 };
646 MergeResult::UseNewValue(CopySource::new_from_merge(
638 current_merge,
647 current_merge,
639 changes,
648 winner,
640 dest,
649 loser,
641 src_minor,
650 ))
642 src_major,
651 } else {
643 )
652 match pick {
644 };
653 MergePick::Any | MergePick::Major => {
645 if minor.is_empty() {
654 MergeResult::UseRightValue
646 major
647 } else if major.is_empty() {
648 minor
649 } else if minor.len() * 2 < major.len() {
650 // Lets says we are merging two InternalPathCopies instance A and B.
651 //
652 // If A contains N items, the merge result will never contains more
653 // than N values differents than the one in A
654 //
655 // If B contains M items, with M > N, the merge result will always
656 // result in a minimum of M - N value differents than the on in
657 // A
658 //
659 // As a result, if N < (M-N), we know that simply iterating over A will
660 // yield less difference than iterating over the difference
661 // between A and B.
662 //
663 // This help performance a lot in case were a tiny
664 // InternalPathCopies is merged with a much larger one.
665 for (dest, src_minor) in minor {
666 let src_major = major.get(&dest);
667 match src_major {
668 None => {
669 major.insert(dest, src_minor);
670 }
671 Some(src_major) => {
672 let (pick, overwrite) =
673 cmp_value(&dest, &src_minor, src_major);
674 if overwrite {
675 let src = match pick {
676 MergePick::Major => CopySource::new_from_merge(
677 current_merge,
678 src_major,
679 &src_minor,
680 ),
681 MergePick::Minor => CopySource::new_from_merge(
682 current_merge,
683 &src_minor,
684 src_major,
685 ),
686 MergePick::Any => CopySource::new_from_merge(
687 current_merge,
688 src_major,
689 &src_minor,
690 ),
691 };
692 major.insert(dest, src);
693 } else {
694 match pick {
695 MergePick::Any | MergePick::Major => None,
696 MergePick::Minor => major.insert(dest, src_minor),
697 };
698 }
699 }
700 };
701 }
702 major
703 } else if major.len() * 2 < minor.len() {
704 // This use the same rational than the previous block.
705 // (Check previous block documentation for details.)
706 for (dest, src_major) in major {
707 let src_minor = minor.get(&dest);
708 match src_minor {
709 None => {
710 minor.insert(dest, src_major);
711 }
655 }
712 Some(src_minor) => {
656 MergePick::Minor => MergeResult::UseLeftValue,
713 let (pick, overwrite) =
714 cmp_value(&dest, src_minor, &src_major);
715 if overwrite {
716 let src = match pick {
717 MergePick::Major => CopySource::new_from_merge(
718 current_merge,
719 &src_major,
720 src_minor,
721 ),
722 MergePick::Minor => CopySource::new_from_merge(
723 current_merge,
724 src_minor,
725 &src_major,
726 ),
727 MergePick::Any => CopySource::new_from_merge(
728 current_merge,
729 &src_major,
730 src_minor,
731 ),
732 };
733 minor.insert(dest, src);
734 } else {
735 match pick {
736 MergePick::Any | MergePick::Minor => None,
737 MergePick::Major => minor.insert(dest, src_major),
738 };
739 }
740 }
741 };
742 }
743 minor
744 } else {
745 let mut override_minor = Vec::new();
746 let mut override_major = Vec::new();
747
748 let mut to_major = |k: &PathToken, v: &CopySource| {
749 override_major.push((k.clone(), v.clone()))
750 };
751 let mut to_minor = |k: &PathToken, v: &CopySource| {
752 override_minor.push((k.clone(), v.clone()))
753 };
754
755 // The diff function leverage detection of the identical subpart if
756 // minor and major has some common ancestors. This make it very
757 // fast is most case.
758 //
759 // In case where the two map are vastly different in size, the current
760 // approach is still slowish because the iteration will iterate over
761 // all the "exclusive" content of the larger on. This situation can be
762 // frequent when the subgraph of revision we are processing has a lot
763 // of roots. Each roots adding they own fully new map to the mix (and
764 // likely a small map, if the path from the root to the "main path" is
765 // small.
766 //
767 // We could do better by detecting such situation and processing them
768 // differently.
769 for d in minor.diff(&major) {
770 match d {
771 DiffItem::Add(k, v) => to_minor(k, v),
772 DiffItem::Remove(k, v) => to_major(k, v),
773 DiffItem::Update { old, new } => {
774 let (dest, src_major) = new;
775 let (_, src_minor) = old;
776 let (pick, overwrite) =
777 cmp_value(dest, src_minor, src_major);
778 if overwrite {
779 let src = match pick {
780 MergePick::Major => CopySource::new_from_merge(
781 current_merge,
782 src_major,
783 src_minor,
784 ),
785 MergePick::Minor => CopySource::new_from_merge(
786 current_merge,
787 src_minor,
788 src_major,
789 ),
790 MergePick::Any => CopySource::new_from_merge(
791 current_merge,
792 src_major,
793 src_minor,
794 ),
795 };
796 to_minor(dest, &src);
797 to_major(dest, &src);
798 } else {
799 match pick {
800 MergePick::Major => to_minor(dest, src_major),
801 MergePick::Minor => to_major(dest, src_minor),
802 // If the two entry are identical, no need to do
803 // anything (but diff should not have yield them)
804 MergePick::Any => unreachable!(),
805 }
806 }
807 }
808 };
809 }
810
811 let updates;
812 let mut result;
813 if override_major.is_empty() {
814 result = major
815 } else if override_minor.is_empty() {
816 result = minor
817 } else {
818 if override_minor.len() < override_major.len() {
819 updates = override_minor;
820 result = minor;
821 } else {
822 updates = override_major;
823 result = major;
824 }
825 for (k, v) in updates {
826 result.insert(k, v);
827 }
657 }
828 }
658 }
829 result
659 })
830 }
831 }
660 }
832
661
833 /// represent the side that should prevail when merging two
662 /// represent the side that should prevail when merging two
@@ -9,6 +9,8 b''
9
9
10 use crate::errors::{HgError, IoErrorContext};
10 use crate::errors::{HgError, IoErrorContext};
11 use crate::utils::hg_path::HgPath;
11 use crate::utils::hg_path::HgPath;
12 use im_rc::ordmap::DiffItem;
13 use im_rc::ordmap::OrdMap;
12 use std::{io::Write, ops::Deref};
14 use std::{io::Write, ops::Deref};
13
15
14 pub mod files;
16 pub mod files;
@@ -199,3 +201,151 b' pub fn current_exe() -> Result<std::path'
199 context: IoErrorContext::CurrentExe,
201 context: IoErrorContext::CurrentExe,
200 })
202 })
201 }
203 }
204
205 pub(crate) enum MergeResult<V> {
206 UseLeftValue,
207 UseRightValue,
208 UseNewValue(V),
209 }
210
211 /// Return the union of the two given maps,
212 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
213 /// both.
214 ///
215 /// CC https://github.com/bodil/im-rs/issues/166
216 pub(crate) fn ordmap_union_with_merge<K, V>(
217 left: OrdMap<K, V>,
218 right: OrdMap<K, V>,
219 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
220 ) -> OrdMap<K, V>
221 where
222 K: Clone + Ord,
223 V: Clone + PartialEq,
224 {
225 if left.ptr_eq(&right) {
226 // One of the two maps is an unmodified clone of the other
227 left
228 } else if left.len() / 2 > right.len() {
229 // When two maps have different sizes,
230 // their size difference is a lower bound on
231 // how many keys of the larger map are not also in the smaller map.
232 // This in turn is a lower bound on the number of differences in
233 // `OrdMap::diff` and the "amount of work" that would be done
234 // by `ordmap_union_with_merge_by_diff`.
235 //
236 // Here `left` is more than twice the size of `right`,
237 // so the number of differences is more than the total size of
238 // `right`. Therefore an algorithm based on iterating `right`
239 // is more efficient.
240 //
241 // This helps a lot when a tiny (or empty) map is merged
242 // with a large one.
243 ordmap_union_with_merge_by_iter(left, right, merge)
244 } else if left.len() < right.len() / 2 {
245 // Same as above but with `left` and `right` swapped
246 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
247 // Also swapped in `merge` arguments:
248 match merge(key, b, a) {
249 MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
250 // … and swap back in `merge` result:
251 MergeResult::UseLeftValue => MergeResult::UseRightValue,
252 MergeResult::UseRightValue => MergeResult::UseLeftValue,
253 }
254 })
255 } else {
256 // For maps of similar size, use the algorithm based on `OrdMap::diff`
257 ordmap_union_with_merge_by_diff(left, right, merge)
258 }
259 }
260
261 /// Efficient if `right` is much smaller than `left`
262 fn ordmap_union_with_merge_by_iter<K, V>(
263 mut left: OrdMap<K, V>,
264 right: OrdMap<K, V>,
265 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
266 ) -> OrdMap<K, V>
267 where
268 K: Clone + Ord,
269 V: Clone,
270 {
271 for (key, right_value) in right {
272 match left.get(&key) {
273 None => {
274 left.insert(key, right_value);
275 }
276 Some(left_value) => match merge(&key, left_value, &right_value) {
277 MergeResult::UseLeftValue => {}
278 MergeResult::UseRightValue => {
279 left.insert(key, right_value);
280 }
281 MergeResult::UseNewValue(new_value) => {
282 left.insert(key, new_value);
283 }
284 },
285 }
286 }
287 left
288 }
289
290 /// Fallback when both maps are of similar size
291 fn ordmap_union_with_merge_by_diff<K, V>(
292 mut left: OrdMap<K, V>,
293 mut right: OrdMap<K, V>,
294 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
295 ) -> OrdMap<K, V>
296 where
297 K: Clone + Ord,
298 V: Clone + PartialEq,
299 {
300 // (key, value) pairs that would need to be inserted in either map
301 // in order to turn it into the union.
302 //
303 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
304 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
305 // with `left_updates` only borrowing from `right` and `right_updates` from
306 // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
307 //
308 // This would allow moving all `.clone()` calls to after we’ve decided
309 // which of `right_updates` or `left_updates` to use
310 // (value ones becoming `Cow::into_owned`),
311 // and avoid making clones we don’t end up using.
312 let mut left_updates = Vec::new();
313 let mut right_updates = Vec::new();
314
315 for difference in left.diff(&right) {
316 match difference {
317 DiffItem::Add(key, value) => {
318 left_updates.push((key.clone(), value.clone()))
319 }
320 DiffItem::Remove(key, value) => {
321 right_updates.push((key.clone(), value.clone()))
322 }
323 DiffItem::Update {
324 old: (key, left_value),
325 new: (_, right_value),
326 } => match merge(key, left_value, right_value) {
327 MergeResult::UseLeftValue => {
328 right_updates.push((key.clone(), left_value.clone()))
329 }
330 MergeResult::UseRightValue => {
331 left_updates.push((key.clone(), right_value.clone()))
332 }
333 MergeResult::UseNewValue(new_value) => {
334 left_updates.push((key.clone(), new_value.clone()));
335 right_updates.push((key.clone(), new_value))
336 }
337 },
338 }
339 }
340 if left_updates.len() < right_updates.len() {
341 for (key, value) in left_updates {
342 left.insert(key, value);
343 }
344 left
345 } else {
346 for (key, value) in right_updates {
347 right.insert(key, value);
348 }
349 right
350 }
351 }
General Comments 0
You need to be logged in to leave comments. Login now