# HG changeset patch # User Simon Sapin # Date 2020-12-23 10:48:16 # Node ID 435d9fc72646c164dac7079dbdba9ba245c01740 # Parent 60b2b7ecf9cbd30d14ee8fd26f15ae3f942566b2 copies-rust: extract generic map merge logic from merge_copies_dict This deduplicates the copy-tracing-specific logic Differential Revision: https://phab.mercurial-scm.org/D9682 diff --git a/rust/hg-core/src/copy_tracing.rs b/rust/hg-core/src/copy_tracing.rs --- a/rust/hg-core/src/copy_tracing.rs +++ b/rust/hg-core/src/copy_tracing.rs @@ -3,7 +3,6 @@ use crate::utils::hg_path::HgPathBuf; use crate::Revision; use crate::NULL_REVISION; -use im_rc::ordmap::DiffItem; use im_rc::ordmap::Entry; use im_rc::ordmap::OrdMap; use im_rc::OrdSet; @@ -624,210 +623,40 @@ fn add_one_copy( fn merge_copies_dict( path_map: &TwoWayPathMap, current_merge: Revision, - mut minor: InternalPathCopies, - mut major: InternalPathCopies, + minor: InternalPathCopies, + major: InternalPathCopies, changes: &ChangedFiles, ) -> InternalPathCopies { - // This closure exist as temporary help while multiple developper are - // actively working on this code. Feel free to re-inline it once this - // code is more settled. - let cmp_value = - |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| { - compare_value( - path_map, + use crate::utils::{ordmap_union_with_merge, MergeResult}; + + ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| { + let (pick, overwrite) = compare_value( + path_map, + current_merge, + changes, + dest, + src_minor, + src_major, + ); + if overwrite { + let (winner, loser) = match pick { + MergePick::Major | MergePick::Any => (src_major, src_minor), + MergePick::Minor => (src_minor, src_major), + }; + MergeResult::UseNewValue(CopySource::new_from_merge( current_merge, - changes, - dest, - src_minor, - src_major, - ) - }; - if minor.is_empty() { - major - } else if major.is_empty() { - minor - } else if minor.len() * 2 < major.len() { - // Lets says we are merging two InternalPathCopies instance A and B. - // - // If A contains N items, the merge result will never contains more - // than N values differents than the one in A - // - // If B contains M items, with M > N, the merge result will always - // result in a minimum of M - N value differents than the on in - // A - // - // As a result, if N < (M-N), we know that simply iterating over A will - // yield less difference than iterating over the difference - // between A and B. - // - // This help performance a lot in case were a tiny - // InternalPathCopies is merged with a much larger one. - for (dest, src_minor) in minor { - let src_major = major.get(&dest); - match src_major { - None => { - major.insert(dest, src_minor); - } - Some(src_major) => { - let (pick, overwrite) = - cmp_value(&dest, &src_minor, src_major); - if overwrite { - let src = match pick { - MergePick::Major => CopySource::new_from_merge( - current_merge, - src_major, - &src_minor, - ), - MergePick::Minor => CopySource::new_from_merge( - current_merge, - &src_minor, - src_major, - ), - MergePick::Any => CopySource::new_from_merge( - current_merge, - src_major, - &src_minor, - ), - }; - major.insert(dest, src); - } else { - match pick { - MergePick::Any | MergePick::Major => None, - MergePick::Minor => major.insert(dest, src_minor), - }; - } - } - }; - } - major - } else if major.len() * 2 < minor.len() { - // This use the same rational than the previous block. - // (Check previous block documentation for details.) - for (dest, src_major) in major { - let src_minor = minor.get(&dest); - match src_minor { - None => { - minor.insert(dest, src_major); + winner, + loser, + )) + } else { + match pick { + MergePick::Any | MergePick::Major => { + MergeResult::UseRightValue } - Some(src_minor) => { - let (pick, overwrite) = - cmp_value(&dest, src_minor, &src_major); - if overwrite { - let src = match pick { - MergePick::Major => CopySource::new_from_merge( - current_merge, - &src_major, - src_minor, - ), - MergePick::Minor => CopySource::new_from_merge( - current_merge, - src_minor, - &src_major, - ), - MergePick::Any => CopySource::new_from_merge( - current_merge, - &src_major, - src_minor, - ), - }; - minor.insert(dest, src); - } else { - match pick { - MergePick::Any | MergePick::Minor => None, - MergePick::Major => minor.insert(dest, src_major), - }; - } - } - }; - } - minor - } else { - let mut override_minor = Vec::new(); - let mut override_major = Vec::new(); - - let mut to_major = |k: &PathToken, v: &CopySource| { - override_major.push((k.clone(), v.clone())) - }; - let mut to_minor = |k: &PathToken, v: &CopySource| { - override_minor.push((k.clone(), v.clone())) - }; - - // The diff function leverage detection of the identical subpart if - // minor and major has some common ancestors. This make it very - // fast is most case. - // - // In case where the two map are vastly different in size, the current - // approach is still slowish because the iteration will iterate over - // all the "exclusive" content of the larger on. This situation can be - // frequent when the subgraph of revision we are processing has a lot - // of roots. Each roots adding they own fully new map to the mix (and - // likely a small map, if the path from the root to the "main path" is - // small. - // - // We could do better by detecting such situation and processing them - // differently. - for d in minor.diff(&major) { - match d { - DiffItem::Add(k, v) => to_minor(k, v), - DiffItem::Remove(k, v) => to_major(k, v), - DiffItem::Update { old, new } => { - let (dest, src_major) = new; - let (_, src_minor) = old; - let (pick, overwrite) = - cmp_value(dest, src_minor, src_major); - if overwrite { - let src = match pick { - MergePick::Major => CopySource::new_from_merge( - current_merge, - src_major, - src_minor, - ), - MergePick::Minor => CopySource::new_from_merge( - current_merge, - src_minor, - src_major, - ), - MergePick::Any => CopySource::new_from_merge( - current_merge, - src_major, - src_minor, - ), - }; - to_minor(dest, &src); - to_major(dest, &src); - } else { - match pick { - MergePick::Major => to_minor(dest, src_major), - MergePick::Minor => to_major(dest, src_minor), - // If the two entry are identical, no need to do - // anything (but diff should not have yield them) - MergePick::Any => unreachable!(), - } - } - } - }; - } - - let updates; - let mut result; - if override_major.is_empty() { - result = major - } else if override_minor.is_empty() { - result = minor - } else { - if override_minor.len() < override_major.len() { - updates = override_minor; - result = minor; - } else { - updates = override_major; - result = major; - } - for (k, v) in updates { - result.insert(k, v); + MergePick::Minor => MergeResult::UseLeftValue, } } - result - } + }) } /// represent the side that should prevail when merging two diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs --- a/rust/hg-core/src/utils.rs +++ b/rust/hg-core/src/utils.rs @@ -9,6 +9,8 @@ use crate::errors::{HgError, IoErrorContext}; use crate::utils::hg_path::HgPath; +use im_rc::ordmap::DiffItem; +use im_rc::ordmap::OrdMap; use std::{io::Write, ops::Deref}; pub mod files; @@ -199,3 +201,151 @@ pub fn current_exe() -> Result { + UseLeftValue, + UseRightValue, + UseNewValue(V), +} + +/// Return the union of the two given maps, +/// calling `merge(key, left_value, right_value)` to resolve keys that exist in +/// both. +/// +/// CC https://github.com/bodil/im-rs/issues/166 +pub(crate) fn ordmap_union_with_merge( + left: OrdMap, + right: OrdMap, + mut merge: impl FnMut(&K, &V, &V) -> MergeResult, +) -> OrdMap +where + K: Clone + Ord, + V: Clone + PartialEq, +{ + if left.ptr_eq(&right) { + // One of the two maps is an unmodified clone of the other + left + } else if left.len() / 2 > right.len() { + // When two maps have different sizes, + // their size difference is a lower bound on + // how many keys of the larger map are not also in the smaller map. + // This in turn is a lower bound on the number of differences in + // `OrdMap::diff` and the "amount of work" that would be done + // by `ordmap_union_with_merge_by_diff`. + // + // Here `left` is more than twice the size of `right`, + // so the number of differences is more than the total size of + // `right`. Therefore an algorithm based on iterating `right` + // is more efficient. + // + // This helps a lot when a tiny (or empty) map is merged + // with a large one. + ordmap_union_with_merge_by_iter(left, right, merge) + } else if left.len() < right.len() / 2 { + // Same as above but with `left` and `right` swapped + ordmap_union_with_merge_by_iter(right, left, |key, a, b| { + // Also swapped in `merge` arguments: + match merge(key, b, a) { + MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v), + // … and swap back in `merge` result: + MergeResult::UseLeftValue => MergeResult::UseRightValue, + MergeResult::UseRightValue => MergeResult::UseLeftValue, + } + }) + } else { + // For maps of similar size, use the algorithm based on `OrdMap::diff` + ordmap_union_with_merge_by_diff(left, right, merge) + } +} + +/// Efficient if `right` is much smaller than `left` +fn ordmap_union_with_merge_by_iter( + mut left: OrdMap, + right: OrdMap, + mut merge: impl FnMut(&K, &V, &V) -> MergeResult, +) -> OrdMap +where + K: Clone + Ord, + V: Clone, +{ + for (key, right_value) in right { + match left.get(&key) { + None => { + left.insert(key, right_value); + } + Some(left_value) => match merge(&key, left_value, &right_value) { + MergeResult::UseLeftValue => {} + MergeResult::UseRightValue => { + left.insert(key, right_value); + } + MergeResult::UseNewValue(new_value) => { + left.insert(key, new_value); + } + }, + } + } + left +} + +/// Fallback when both maps are of similar size +fn ordmap_union_with_merge_by_diff( + mut left: OrdMap, + mut right: OrdMap, + mut merge: impl FnMut(&K, &V, &V) -> MergeResult, +) -> OrdMap +where + K: Clone + Ord, + V: Clone + PartialEq, +{ + // (key, value) pairs that would need to be inserted in either map + // in order to turn it into the union. + // + // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted, + // change these from `Vec<(K, V)>` to `Vec<(&K, Cow)>` + // with `left_updates` only borrowing from `right` and `right_updates` from + // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`. + // + // This would allow moving all `.clone()` calls to after we’ve decided + // which of `right_updates` or `left_updates` to use + // (value ones becoming `Cow::into_owned`), + // and avoid making clones we don’t end up using. + let mut left_updates = Vec::new(); + let mut right_updates = Vec::new(); + + for difference in left.diff(&right) { + match difference { + DiffItem::Add(key, value) => { + left_updates.push((key.clone(), value.clone())) + } + DiffItem::Remove(key, value) => { + right_updates.push((key.clone(), value.clone())) + } + DiffItem::Update { + old: (key, left_value), + new: (_, right_value), + } => match merge(key, left_value, right_value) { + MergeResult::UseLeftValue => { + right_updates.push((key.clone(), left_value.clone())) + } + MergeResult::UseRightValue => { + left_updates.push((key.clone(), right_value.clone())) + } + MergeResult::UseNewValue(new_value) => { + left_updates.push((key.clone(), new_value.clone())); + right_updates.push((key.clone(), new_value)) + } + }, + } + } + if left_updates.len() < right_updates.len() { + for (key, value) in left_updates { + left.insert(key, value); + } + left + } else { + for (key, value) in right_updates { + right.insert(key, value); + } + right + } +}