upstream/mercurial-mirror Commit - r47328:435d9fc7

copies-rust: extract generic map merge logic from merge_copies_dict...

Simon Sapin -

r47328:435d9fc7 default

parent child

rust/hg-core/src/copy_tracing.rs

0 +28 -199

             use crate::utils::hg_path::HgPath;
             use crate::utils::hg_path::HgPathBuf;
             use crate::Revision;
             use crate::NULL_REVISION;
-            use im_rc::ordmap::DiffItem;
             use im_rc::ordmap::Entry;
             use im_rc::ordmap::OrdMap;
             use im_rc::OrdSet;
             use std::cmp::Ordering;
             use std::collections::HashMap;
             use std::convert::TryInto;
             pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
             type PathToken = usize;
             #[derive(Clone, Debug)]
             struct CopySource {
                 /// revision at which the copy information was added
                 rev: Revision,
                 /// the copy source, (Set to None in case of deletion of the associated
                 /// key)
                 path: Option<PathToken>,
                 /// a set of previous `CopySource.rev` value directly or indirectly
                 /// overwritten by this one.
                 overwritten: OrdSet<Revision>,
             }
             impl CopySource {
                 /// create a new CopySource
                 ///
                 /// Use this when no previous copy source existed.
                 fn new(rev: Revision, path: Option<PathToken>) -> Self {
                     Self {
                         rev,
                         path,
                         overwritten: OrdSet::new(),
                     }
                 }
                 /// create a new CopySource from merging two others
                 ///
                 /// Use this when merging two InternalPathCopies requires active merging of
                 /// some entries.
                 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
                     let mut overwritten = OrdSet::new();
                     overwritten.extend(winner.overwritten.iter().copied());
                     overwritten.extend(loser.overwritten.iter().copied());
                     overwritten.insert(winner.rev);
                     overwritten.insert(loser.rev);
                     Self {
                         rev,
                         path: winner.path,
                         overwritten: overwritten,
                     }
                 }
                 /// Update the value of a pre-existing CopySource
                 ///
                 /// Use this when recording copy information from  parent → child edges
                 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
                     self.overwritten.insert(self.rev);
                     self.rev = rev;
                     self.path = path;
                 }
                 /// Mark pre-existing copy information as "dropped" by a file deletion
                 ///
                 /// Use this when recording copy information from  parent → child edges
                 fn mark_delete(&mut self, rev: Revision) {
                     self.overwritten.insert(self.rev);
                     self.rev = rev;
                     self.path = None;
                 }
                 /// Mark pre-existing copy information as "dropped" by a file deletion
                 ///
                 /// Use this when recording copy information from  parent → child edges
                 fn mark_delete_with_pair(&mut self, rev: Revision, other: &Self) {
                     self.overwritten.insert(self.rev);
                     if other.rev != rev {
                         self.overwritten.insert(other.rev);
                     }
                     self.overwritten.extend(other.overwritten.iter().copied());
                     self.rev = rev;
                     self.path = None;
                 }
                 fn is_overwritten_by(&self, other: &Self) -> bool {
                     other.overwritten.contains(&self.rev)
                 }
             }
             // For the same "dest", content generated for a given revision will always be
             // the same.
             impl PartialEq for CopySource {
                 fn eq(&self, other: &Self) -> bool {
                     #[cfg(debug_assertions)]
                     {
                         if self.rev == other.rev {
                             debug_assert!(self.path == other.path);
                             debug_assert!(self.overwritten == other.overwritten);
                         }
                     }
                     self.rev == other.rev
                 }
             }
             /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
             type InternalPathCopies = OrdMap<PathToken, CopySource>;
             /// hold parent 1, parent 2 and relevant files actions.
             pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
             /// represent the files affected by a changesets
             ///
             /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
             /// all the data categories tracked by it.
             /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
             /// all the data categories tracked by it.
             pub struct ChangedFiles<'a> {
                 nb_items: u32,
                 index: &'a [u8],
                 data: &'a [u8],
             }
             /// Represent active changes that affect the copy tracing.
             enum Action<'a> {
                 /// The parent ? children edge is removing a file
                 ///
                 /// (actually, this could be the edge from the other parent, but it does
                 /// not matters)
                 Removed(&'a HgPath),
                 /// The parent ? children edge introduce copy information between (dest,
                 /// source)
                 CopiedFromP1(&'a HgPath, &'a HgPath),
                 CopiedFromP2(&'a HgPath, &'a HgPath),
             }
             /// This express the possible "special" case we can get in a merge
             ///
             /// See mercurial/metadata.py for details on these values.
             #[derive(PartialEq)]
             enum MergeCase {
                 /// Merged: file had history on both side that needed to be merged
                 Merged,
                 /// Salvaged: file was candidate for deletion, but survived the merge
                 Salvaged,
                 /// Normal: Not one of the two cases above
                 Normal,
             }
             type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
             const EMPTY: &[u8] = b"";
             const COPY_MASK: u8 = 3;
             const P1_COPY: u8 = 2;
             const P2_COPY: u8 = 3;
             const ACTION_MASK: u8 = 28;
             const REMOVED: u8 = 12;
             const MERGED: u8 = 8;
             const SALVAGED: u8 = 16;
             impl<'a> ChangedFiles<'a> {
                 const INDEX_START: usize = 4;
                 const ENTRY_SIZE: u32 = 9;
                 const FILENAME_START: u32 = 1;
                 const COPY_SOURCE_START: u32 = 5;
                 pub fn new(data: &'a [u8]) -> Self {
                     assert!(
                         data.len() >= 4,
                         "data size ({}) is too small to contain the header (4)",
                         data.len()
                     );
                     let nb_items_raw: [u8; 4] = (&data[0..=3])
                         .try_into()
                         .expect("failed to turn 4 bytes into 4 bytes");
                     let nb_items = u32::from_be_bytes(nb_items_raw);
                     let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
                     let index_end = Self::INDEX_START + index_size;
                     assert!(
                         data.len() >= index_end,
                         "data size ({}) is too small to fit the index_data ({})",
                         data.len(),
                         index_end
                     );
                     let ret = ChangedFiles {
                         nb_items,
                         index: &data[Self::INDEX_START..index_end],
                         data: &data[index_end..],
                     };
                     let max_data = ret.filename_end(nb_items - 1) as usize;
                     assert!(
                         ret.data.len() >= max_data,
                         "data size ({}) is too small to fit all data ({})",
                         data.len(),
                         index_end + max_data
                     );
                     ret
                 }
                 pub fn new_empty() -> Self {
                     ChangedFiles {
                         nb_items: 0,
                         index: EMPTY,
                         data: EMPTY,
                     }
                 }
                 /// internal function to return an individual entry at a given index
                 fn entry(&'a self, idx: u32) -> FileChange<'a> {
                     if idx >= self.nb_items {
                         panic!(
                             "index for entry is higher that the number of file {} >= {}",
                             idx, self.nb_items
                         )
                     }
                     let flags = self.flags(idx);
                     let filename = self.filename(idx);
                     let copy_idx = self.copy_idx(idx);
                     let copy_source = self.filename(copy_idx);
                     (flags, filename, copy_source)
                 }
                 /// internal function to return the filename of the entry at a given index
                 fn filename(&self, idx: u32) -> &HgPath {
                     let filename_start;
                     if idx == 0 {
                         filename_start = 0;
                     } else {
                         filename_start = self.filename_end(idx - 1)
                     }
                     let filename_end = self.filename_end(idx);
                     let filename_start = filename_start as usize;
                     let filename_end = filename_end as usize;
                     HgPath::new(&self.data[filename_start..filename_end])
                 }
                 /// internal function to return the flag field of the entry at a given
                 /// index
                 fn flags(&self, idx: u32) -> u8 {
                     let idx = idx as usize;
                     self.index[idx * (Self::ENTRY_SIZE as usize)]
                 }
                 /// internal function to return the end of a filename part at a given index
                 fn filename_end(&self, idx: u32) -> u32 {
                     let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
                     let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
                     let start = start as usize;
                     let end = end as usize;
                     let raw = (&self.index[start..end])
                         .try_into()
                         .expect("failed to turn 4 bytes into 4 bytes");
                     u32::from_be_bytes(raw)
                 }
                 /// internal function to return index of the copy source of the entry at a
                 /// given index
                 fn copy_idx(&self, idx: u32) -> u32 {
                     let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
                     let end = (idx + 1) * Self::ENTRY_SIZE;
                     let start = start as usize;
                     let end = end as usize;
                     let raw = (&self.index[start..end])
                         .try_into()
                         .expect("failed to turn 4 bytes into 4 bytes");
                     u32::from_be_bytes(raw)
                 }
                 /// Return an iterator over all the `Action` in this instance.
                 fn iter_actions(&self) -> ActionsIterator {
                     ActionsIterator {
                         changes: &self,
                         current: 0,
                     }
                 }
                 /// return the MergeCase value associated with a filename
                 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
                     if self.nb_items == 0 {
                         return MergeCase::Normal;
                     }
                     let mut low_part = 0;
                     let mut high_part = self.nb_items;
                     while low_part < high_part {
                         let cursor = (low_part + high_part - 1) / 2;
                         let (flags, filename, _source) = self.entry(cursor);
                         match path.cmp(filename) {
                             Ordering::Less => low_part = cursor + 1,
                             Ordering::Greater => high_part = cursor,
                             Ordering::Equal => {
                                 return match flags & ACTION_MASK {
                                     MERGED => MergeCase::Merged,
                                     SALVAGED => MergeCase::Salvaged,
                                     _ => MergeCase::Normal,
                                 };
                             }
                         }
                     }
                     MergeCase::Normal
                 }
             }
             struct ActionsIterator<'a> {
                 changes: &'a ChangedFiles<'a>,
                 current: u32,
             }
             impl<'a> Iterator for ActionsIterator<'a> {
                 type Item = Action<'a>;
                 fn next(&mut self) -> Option<Action<'a>> {
                     while self.current < self.changes.nb_items {
                         let (flags, file, source) = self.changes.entry(self.current);
                         self.current += 1;
                         if (flags & ACTION_MASK) == REMOVED {
                             return Some(Action::Removed(file));
                         }
                         let copy = flags & COPY_MASK;
                         if copy == P1_COPY {
                             return Some(Action::CopiedFromP1(file, source));
                         } else if copy == P2_COPY {
                             return Some(Action::CopiedFromP2(file, source));
                         }
                     }
                     return None;
                 }
             }
             /// A small struct whose purpose is to ensure lifetime of bytes referenced in
             /// ChangedFiles
             ///
             /// It is passed to the RevInfoMaker callback who can assign any necessary
             /// content to the `data` attribute. The copy tracing code is responsible for
             /// keeping the DataHolder alive at least as long as the ChangedFiles object.
             pub struct DataHolder<D> {
                 /// RevInfoMaker callback should assign data referenced by the
                 /// ChangedFiles struct it return to this attribute. The DataHolder
                 /// lifetime will be at least as long as the ChangedFiles one.
                 pub data: Option<D>,
             }
             pub type RevInfoMaker<'a, D> =
                 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
             /// A small "tokenizer" responsible of turning full HgPath into lighter
             /// PathToken
             ///
             /// Dealing with small object, like integer is much faster, so HgPath input are
             /// turned into integer "PathToken" and converted back in the end.
             #[derive(Clone, Debug, Default)]
             struct TwoWayPathMap {
                 token: HashMap<HgPathBuf, PathToken>,
                 path: Vec<HgPathBuf>,
             }
             impl TwoWayPathMap {
                 fn tokenize(&mut self, path: &HgPath) -> PathToken {
                     match self.token.get(path) {
                         Some(a) => *a,
                         None => {
                             let a = self.token.len();
                             let buf = path.to_owned();
                             self.path.push(buf.clone());
                             self.token.insert(buf, a);
                             a
                         }
                     }
                 }
                 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
                     assert!(token < self.path.len(), format!("Unknown token: {}", token));
                     &self.path[token]
                 }
             }
             /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
             ///
             /// Arguments are:
             ///
             /// revs: all revisions to be considered
             /// children: a {parent ? [childrens]} mapping
             /// target_rev: the final revision we are combining copies to
             /// rev_info(rev): callback to get revision information:
             ///   * first parent
             ///   * second parent
             ///   * ChangedFiles
             /// isancestors(low_rev, high_rev): callback to check if a revision is an
             ///                                 ancestor of another
             pub fn combine_changeset_copies<D>(
                 revs: Vec<Revision>,
                 mut children_count: HashMap<Revision, usize>,
                 target_rev: Revision,
                 rev_info: RevInfoMaker<D>,
             ) -> PathCopies {
                 let mut all_copies = HashMap::new();
                 let mut path_map = TwoWayPathMap::default();
                 for rev in revs {
                     let mut d: DataHolder<D> = DataHolder { data: None };
                     let (p1, p2, changes) = rev_info(rev, &mut d);
                     // We will chain the copies information accumulated for the parent with
                     // the individual copies information the curent revision.  Creating a
                     // new TimeStampedPath for each `rev` → `children` vertex.
                     // Retrieve data computed in a previous iteration
                     let p1_copies = match p1 {
                         NULL_REVISION => None,
                         _ => get_and_clean_parent_copies(
                             &mut all_copies,
                             &mut children_count,
                             p1,
                         ), // will be None if the vertex is not to be traversed
                     };
                     let p2_copies = match p2 {
                         NULL_REVISION => None,
                         _ => get_and_clean_parent_copies(
                             &mut all_copies,
                             &mut children_count,
                             p2,
                         ), // will be None if the vertex is not to be traversed
                     };
                     // combine it with data for that revision
                     let (p1_copies, p2_copies) =
                         chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev);
                     let copies = match (p1_copies, p2_copies) {
                         (None, None) => None,
                         (c, None) => c,
                         (None, c) => c,
                         (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
                             &path_map, rev, p2_copies, p1_copies, &changes,
                         )),
                     };
                     if let Some(c) = copies {
                         all_copies.insert(rev, c);
                     }
                 }
                 // Drop internal information (like the timestamp) and return the final
                 // mapping.
                 let tt_result = all_copies
                     .remove(&target_rev)
                     .expect("target revision was not processed");
                 let mut result = PathCopies::default();
                 for (dest, tt_source) in tt_result {
                     if let Some(path) = tt_source.path {
                         let path_dest = path_map.untokenize(dest).to_owned();
                         let path_path = path_map.untokenize(path).to_owned();
                         result.insert(path_dest, path_path);
                     }
                 }
                 result
             }
             /// fetch previous computed information
             ///
             /// If no other children are expected to need this information, we drop it from
             /// the cache.
             ///
             /// If parent is not part of the set we are expected to walk, return None.
             fn get_and_clean_parent_copies(
                 all_copies: &mut HashMap<Revision, InternalPathCopies>,
                 children_count: &mut HashMap<Revision, usize>,
                 parent_rev: Revision,
             ) -> Option<InternalPathCopies> {
                 let count = children_count.get_mut(&parent_rev)?;
                 *count -= 1;
                 if *count == 0 {
                     match all_copies.remove(&parent_rev) {
                         Some(c) => Some(c),
                         None => Some(InternalPathCopies::default()),
                     }
                 } else {
                     match all_copies.get(&parent_rev) {
                         Some(c) => Some(c.clone()),
                         None => Some(InternalPathCopies::default()),
                     }
                 }
             }
             /// Combine ChangedFiles with some existing PathCopies information and return
             /// the result
             fn chain_changes(
                 path_map: &mut TwoWayPathMap,
                 base_p1_copies: Option<InternalPathCopies>,
                 base_p2_copies: Option<InternalPathCopies>,
                 changes: &ChangedFiles,
                 current_rev: Revision,
             ) -> (Option<InternalPathCopies>, Option<InternalPathCopies>) {
                 // Fast path the "nothing to do" case.
                 if let (None, None) = (&base_p1_copies, &base_p2_copies) {
                     return (None, None);
                 }
                 let mut p1_copies = base_p1_copies.clone();
                 let mut p2_copies = base_p2_copies.clone();
                 for action in changes.iter_actions() {
                     match action {
                         Action::CopiedFromP1(path_dest, path_source) => {
                             match &mut p1_copies {
                                 None => (), // This is not a vertex we should proceed.
                                 Some(copies) => add_one_copy(
                                     current_rev,
                                     path_map,
                                     copies,
                                     base_p1_copies.as_ref().unwrap(),
                                     path_dest,
                                     path_source,
                                 ),
                             }
                         }
                         Action::CopiedFromP2(path_dest, path_source) => {
                             match &mut p2_copies {
                                 None => (), // This is not a vertex we should proceed.
                                 Some(copies) => add_one_copy(
                                     current_rev,
                                     path_map,
                                     copies,
                                     base_p2_copies.as_ref().unwrap(),
                                     path_dest,
                                     path_source,
                                 ),
                             }
                         }
                         Action::Removed(deleted_path) => {
                             // We must drop copy information for removed file.
                             //
                             // We need to explicitly record them as dropped to
                             // propagate this information when merging two
                             // InternalPathCopies object.
                             let deleted = path_map.tokenize(deleted_path);
                             let p1_entry = match &mut p1_copies {
                                 None => None,
                                 Some(copies) => match copies.entry(deleted) {
                                     Entry::Occupied(e) => Some(e),
                                     Entry::Vacant(_) => None,
                                 },
                             };
                             let p2_entry = match &mut p2_copies {
                                 None => None,
                                 Some(copies) => match copies.entry(deleted) {
                                     Entry::Occupied(e) => Some(e),
                                     Entry::Vacant(_) => None,
                                 },
                             };
                             match (p1_entry, p2_entry) {
                                 (None, None) => (),
                                 (Some(mut e), None) => {
                                     e.get_mut().mark_delete(current_rev)
                                 }
                                 (None, Some(mut e)) => {
                                     e.get_mut().mark_delete(current_rev)
                                 }
                                 (Some(mut e1), Some(mut e2)) => {
                                     let cs1 = e1.get_mut();
                                     let cs2 = e2.get();
                                     if cs1 == cs2 {
                                         cs1.mark_delete(current_rev);
                                     } else {
                                         cs1.mark_delete_with_pair(current_rev, &cs2);
                                     }
                                     e2.insert(cs1.clone());
                                 }
                             }
                         }
                     }
                 }
                 (p1_copies, p2_copies)
             }
             // insert one new copy information in an InternalPathCopies
             //
             // This deal with chaining and overwrite.
             fn add_one_copy(
                 current_rev: Revision,
                 path_map: &mut TwoWayPathMap,
                 copies: &mut InternalPathCopies,
                 base_copies: &InternalPathCopies,
                 path_dest: &HgPath,
                 path_source: &HgPath,
             ) {
                 let dest = path_map.tokenize(path_dest);
                 let source = path_map.tokenize(path_source);
                 let entry;
                 if let Some(v) = base_copies.get(&source) {
                     entry = match &v.path {
                         Some(path) => Some((*(path)).to_owned()),
                         None => Some(source.to_owned()),
                     }
                 } else {
                     entry = Some(source.to_owned());
                 }
                 // Each new entry is introduced by the children, we
                 // record this information as we will need it to take
                 // the right decision when merging conflicting copy
                 // information. See merge_copies_dict for details.
                 match copies.entry(dest) {
                     Entry::Vacant(slot) => {
                         let ttpc = CopySource::new(current_rev, entry);
                         slot.insert(ttpc);
                     }
                     Entry::Occupied(mut slot) => {
                         let ttpc = slot.get_mut();
                         ttpc.overwrite(current_rev, entry);
                     }
                 }
             }
             /// merge two copies-mapping together, minor and major
             ///
             /// In case of conflict, value from "major" will be picked, unless in some
             /// cases. See inline documentation for details.
             fn merge_copies_dict(
                 path_map: &TwoWayPathMap,
                 current_merge: Revision,
-                mut minor: InternalPathCopies,
+                minor: InternalPathCopies,
-                mut major: InternalPathCopies,
+                major: InternalPathCopies,
                 changes: &ChangedFiles,
             ) -> InternalPathCopies {
-                // This closure exist as temporary help while multiple developper are
+                use crate::utils::{ordmap_union_with_merge, MergeResult};
-                // actively working on this code. Feel free to re-inline it once this
-                // code is more settled.
+                ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| {
-                let cmp_value =
+                    let (pick, overwrite) = compare_value(
-                    |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
+                        path_map,
-                        compare_value(
+                        current_merge,
-                            path_map,
+                        changes,
+                        dest,
+                        src_minor,
+                        src_major,
+                    );
+                    if overwrite {
+                        let (winner, loser) = match pick {
+                            MergePick::Major | MergePick::Any => (src_major, src_minor),
+                            MergePick::Minor => (src_minor, src_major),
+                        };
+                        MergeResult::UseNewValue(CopySource::new_from_merge(
                             current_merge,
-                            changes,
+                            winner,
-                            dest,
+                            loser,
-                            src_minor,
+                        ))
-                            src_major,
+                    } else {
+                        match pick {
-                    };
+                            MergePick::Any | MergePick::Major => {
-                if minor.is_empty() {
+                                MergeResult::UseRightValue
-                    major
-                } else if major.is_empty() {
-                    minor
-                } else if minor.len() * 2 < major.len() {
-                    // Lets says we are merging two InternalPathCopies instance A and B.
-                    //
-                    // If A contains N items, the merge result will never contains more
-                    // than N values differents than the one in A
-                    //
-                    // If B contains M items, with M > N, the merge result will always
-                    // result in a minimum of M - N value differents than the on in
-                    // A
-                    //
-                    // As a result, if N < (M-N), we know that simply iterating over A will
-                    // yield less difference than iterating over the difference
-                    // between A and B.
-                    //
-                    // This help performance a lot in case were a tiny
-                    // InternalPathCopies is merged with a much larger one.
-                    for (dest, src_minor) in minor {
-                        let src_major = major.get(&dest);
-                        match src_major {
-                            None => {
-                                major.insert(dest, src_minor);
-                            Some(src_major) => {
-                                let (pick, overwrite) =
-                                    cmp_value(&dest, &src_minor, src_major);
-                                if overwrite {
-                                    let src = match pick {
-                                        MergePick::Major => CopySource::new_from_merge(
-                                            current_merge,
-                                            src_major,
-                                            &src_minor,
-                                        ),
-                                        MergePick::Minor => CopySource::new_from_merge(
-                                            current_merge,
-                                            &src_minor,
-                                            src_major,
-                                        ),
-                                        MergePick::Any => CopySource::new_from_merge(
-                                            current_merge,
-                                            src_major,
-                                            &src_minor,
-                                        ),
-                                    };
-                                    major.insert(dest, src);
-                                } else {
-                                    match pick {
-                                        MergePick::Any | MergePick::Major => None,
-                                        MergePick::Minor => major.insert(dest, src_minor),
-                                    };
-                        };
-                    major
-                } else if major.len() * 2 < minor.len() {
-                    // This use the same rational than the previous block.
-                    // (Check previous block documentation for details.)
-                    for (dest, src_major) in major {
-                        let src_minor = minor.get(&dest);
-                        match src_minor {
-                            None => {
-                                minor.insert(dest, src_major);
                             }
-                            Some(src_minor) => {
+                            MergePick::Minor => MergeResult::UseLeftValue,
-                                let (pick, overwrite) =
-                                    cmp_value(&dest, src_minor, &src_major);
-                                if overwrite {
-                                    let src = match pick {
-                                        MergePick::Major => CopySource::new_from_merge(
-                                            current_merge,
-                                            &src_major,
-                                            src_minor,
-                                        ),
-                                        MergePick::Minor => CopySource::new_from_merge(
-                                            current_merge,
-                                            src_minor,
-                                            &src_major,
-                                        ),
-                                        MergePick::Any => CopySource::new_from_merge(
-                                            current_merge,
-                                            &src_major,
-                                            src_minor,
-                                        ),
-                                    };
-                                    minor.insert(dest, src);
-                                } else {
-                                    match pick {
-                                        MergePick::Any | MergePick::Minor => None,
-                                        MergePick::Major => minor.insert(dest, src_major),
-                                    };
-                        };
-                    minor
-                } else {
-                    let mut override_minor = Vec::new();
-                    let mut override_major = Vec::new();
-                    let mut to_major = |k: &PathToken, v: &CopySource| {
-                        override_major.push((k.clone(), v.clone()))
-                    };
-                    let mut to_minor = |k: &PathToken, v: &CopySource| {
-                        override_minor.push((k.clone(), v.clone()))
-                    };
-                    // The diff function leverage detection of the identical subpart if
-                    // minor and major has some common ancestors. This make it very
-                    // fast is most case.
-                    //
-                    // In case where the two map are vastly different in size, the current
-                    // approach is still slowish because the iteration will iterate over
-                    // all the "exclusive" content of the larger on. This situation can be
-                    // frequent when the subgraph of revision we are processing has a lot
-                    // of roots. Each roots adding they own fully new map to the mix (and
-                    // likely a small map, if the path from the root to the "main path" is
-                    // small.
-                    //
-                    // We could do better by detecting such situation and processing them
-                    // differently.
-                    for d in minor.diff(&major) {
-                        match d {
-                            DiffItem::Add(k, v) => to_minor(k, v),
-                            DiffItem::Remove(k, v) => to_major(k, v),
-                            DiffItem::Update { old, new } => {
-                                let (dest, src_major) = new;
-                                let (_, src_minor) = old;
-                                let (pick, overwrite) =
-                                    cmp_value(dest, src_minor, src_major);
-                                if overwrite {
-                                    let src = match pick {
-                                        MergePick::Major => CopySource::new_from_merge(
-                                            current_merge,
-                                            src_major,
-                                            src_minor,
-                                        ),
-                                        MergePick::Minor => CopySource::new_from_merge(
-                                            current_merge,
-                                            src_minor,
-                                            src_major,
-                                        ),
-                                        MergePick::Any => CopySource::new_from_merge(
-                                            current_merge,
-                                            src_major,
-                                            src_minor,
-                                        ),
-                                    };
-                                    to_minor(dest, &src);
-                                    to_major(dest, &src);
-                                } else {
-                                    match pick {
-                                        MergePick::Major => to_minor(dest, src_major),
-                                        MergePick::Minor => to_major(dest, src_minor),
-                                        // If the two entry are identical, no need to do
-                                        // anything (but diff should not have yield them)
-                                        MergePick::Any => unreachable!(),
-                        };
-                    let updates;
-                    let mut result;
-                    if override_major.is_empty() {
-                        result = major
-                    } else if override_minor.is_empty() {
-                        result = minor
-                    } else {
-                        if override_minor.len() < override_major.len() {
-                            updates = override_minor;
-                            result = minor;
-                        } else {
-                            updates = override_major;
-                            result = major;
-                        for (k, v) in updates {
-                            result.insert(k, v);
                         }
                     }
-                    result
+                })
             }
             /// represent the side that should prevail when merging two
             /// InternalPathCopies
             enum MergePick {
                 /// The "major" (p1) side prevails
                 Major,
                 /// The "minor" (p2) side prevails
                 Minor,
                 /// Any side could be used (because they are the same)
                 Any,
             }
             /// decide which side prevails in case of conflicting values
             #[allow(clippy::if_same_then_else)]
             fn compare_value(
                 path_map: &TwoWayPathMap,
                 current_merge: Revision,
                 changes: &ChangedFiles,
                 dest: &PathToken,
                 src_minor: &CopySource,
                 src_major: &CopySource,
             ) -> (MergePick, bool) {
                 if src_major == src_minor {
                     (MergePick::Any, false)
                 } else if src_major.rev == current_merge {
                     // minor is different according to per minor == major check earlier
                     debug_assert!(src_minor.rev != current_merge);
                     // The last value comes the current merge, this value -will- win
                     // eventually.
                     (MergePick::Major, true)
                 } else if src_minor.rev == current_merge {
                     // The last value comes the current merge, this value -will- win
                     // eventually.
                     (MergePick::Minor, true)
                 } else if src_major.path == src_minor.path {
                     debug_assert!(src_major.rev != src_major.rev);
                     // we have the same value, but from other source;
                     if src_major.is_overwritten_by(src_minor) {
                         (MergePick::Minor, false)
                     } else if src_minor.is_overwritten_by(src_major) {
                         (MergePick::Major, false)
                     } else {
                         (MergePick::Any, true)
                     }
                 } else {
                     debug_assert!(src_major.rev != src_major.rev);
                     let dest_path = path_map.untokenize(*dest);
                     let action = changes.get_merge_case(dest_path);
                     if src_minor.path.is_some()
                         && src_major.path.is_none()
                         && action == MergeCase::Salvaged
                     {
                         // If the file is "deleted" in the major side but was
                         // salvaged by the merge, we keep the minor side alive
                         (MergePick::Minor, true)
                     } else if src_major.path.is_some()
                         && src_minor.path.is_none()
                         && action == MergeCase::Salvaged
                     {
                         // If the file is "deleted" in the minor side but was
                         // salvaged by the merge, unconditionnaly preserve the
                         // major side.
                         (MergePick::Major, true)
                     } else if src_minor.is_overwritten_by(src_major) {
                         // The information from the minor version are strictly older than
                         // the major version
                         if action == MergeCase::Merged {
                             // If the file was actively merged, its means some non-copy
                             // activity happened on the other branch. It
                             // mean the older copy information are still relevant.
                             //
                             // The major side wins such conflict.
                             (MergePick::Major, true)
                         } else {
                             // No activity on the minor branch, pick the newer one.
                             (MergePick::Major, false)
                         }
                     } else if src_major.is_overwritten_by(src_minor) {
                         if action == MergeCase::Merged {
                             // If the file was actively merged, its means some non-copy
                             // activity happened on the other branch. It
                             // mean the older copy information are still relevant.
                             //
                             // The major side wins such conflict.
                             (MergePick::Major, true)
                         } else {
                             // No activity on the minor branch, pick the newer one.
                             (MergePick::Minor, false)
                         }
                     } else if src_minor.path.is_none() {
                         // the minor side has no relevant information, pick the alive one
                         (MergePick::Major, true)
                     } else if src_major.path.is_none() {
                         // the major side has no relevant information, pick the alive one
                         (MergePick::Minor, true)
                     } else {
                         // by default the major side wins
                         (MergePick::Major, true)
                     }
                 }
             }

rust/hg-core/src/utils.rs

0 +150 0

             // utils module
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Contains useful functions, traits, structs, etc. for use in core.
             use crate::errors::{HgError, IoErrorContext};
             use crate::utils::hg_path::HgPath;
+            use im_rc::ordmap::DiffItem;
+            use im_rc::ordmap::OrdMap;
             use std::{io::Write, ops::Deref};
             pub mod files;
             pub mod hg_path;
             pub mod path_auditor;
             /// Useful until rust/issues/56345 is stable
             ///
             /// # Examples
             ///
             /// ```
             /// use crate::hg::utils::find_slice_in_slice;
             ///
             /// let haystack = b"This is the haystack".to_vec();
             /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
             /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
             /// ```
             pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
             where
                 for<'a> &'a [T]: PartialEq,
             {
                 slice
                     .windows(needle.len())
                     .position(|window| window == needle)
             }
             /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
             ///
             /// # Examples
             ///
             /// ```
             /// use crate::hg::utils::replace_slice;
             /// let mut line = b"I hate writing tests!".to_vec();
             /// replace_slice(&mut line, b"hate", b"love");
             /// assert_eq!(
             ///     line,
             ///     b"I love writing tests!".to_vec()
             /// );
             /// ```
             pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
             where
                 T: Clone + PartialEq,
             {
                 if buf.len() < from.len() || from.len() != to.len() {
                     return;
                 }
                 for i in 0..=buf.len() - from.len() {
                     if buf[i..].starts_with(from) {
                         buf[i..(i + from.len())].clone_from_slice(to);
                     }
                 }
             }
             pub trait SliceExt {
                 fn trim_end(&self) -> &Self;
                 fn trim_start(&self) -> &Self;
                 fn trim(&self) -> &Self;
                 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
                 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
             }
             #[allow(clippy::trivially_copy_pass_by_ref)]
             fn is_not_whitespace(c: &u8) -> bool {
                 !(*c as char).is_whitespace()
             }
             impl SliceExt for [u8] {
                 fn trim_end(&self) -> &[u8] {
                     if let Some(last) = self.iter().rposition(is_not_whitespace) {
                         &self[..=last]
                     } else {
                         &[]
                     }
                 }
                 fn trim_start(&self) -> &[u8] {
                     if let Some(first) = self.iter().position(is_not_whitespace) {
                         &self[first..]
                     } else {
                         &[]
                     }
                 }
                 /// ```
                 /// use hg::utils::SliceExt;
                 /// assert_eq!(
                 ///     b"  to trim  ".trim(),
                 ///     b"to trim"
                 /// );
                 /// assert_eq!(
                 ///     b"to trim  ".trim(),
                 ///     b"to trim"
                 /// );
                 /// assert_eq!(
                 ///     b"  to trim".trim(),
                 ///     b"to trim"
                 /// );
                 /// ```
                 fn trim(&self) -> &[u8] {
                     self.trim_start().trim_end()
                 }
                 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
                     if self.starts_with(needle) {
                         Some(&self[needle.len()..])
                     } else {
                         None
                     }
                 }
                 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
                     let mut iter = self.splitn(2, |&byte| byte == separator);
                     let a = iter.next()?;
                     let b = iter.next()?;
                     Some((a, b))
                 }
             }
             pub trait Escaped {
                 /// Return bytes escaped for display to the user
                 fn escaped_bytes(&self) -> Vec<u8>;
             }
             impl Escaped for u8 {
                 fn escaped_bytes(&self) -> Vec<u8> {
                     let mut acc = vec![];
                     match self {
                         c @ b'\'' | c @ b'\\' => {
                             acc.push(b'\\');
                             acc.push(*c);
                         }
                         b'\t' => {
                             acc.extend(br"\\t");
                         }
                         b'\n' => {
                             acc.extend(br"\\n");
                         }
                         b'\r' => {
                             acc.extend(br"\\r");
                         }
                         c if (*c < b' ' || *c >= 127) => {
                             write!(acc, "\\x{:x}", self).unwrap();
                         }
                         c => {
                             acc.push(*c);
                         }
                     }
                     acc
                 }
             }
             impl<'a, T: Escaped> Escaped for &'a [T] {
                 fn escaped_bytes(&self) -> Vec<u8> {
                     self.iter().flat_map(Escaped::escaped_bytes).collect()
                 }
             }
             impl<T: Escaped> Escaped for Vec<T> {
                 fn escaped_bytes(&self) -> Vec<u8> {
                     self.deref().escaped_bytes()
                 }
             }
             impl<'a> Escaped for &'a HgPath {
                 fn escaped_bytes(&self) -> Vec<u8> {
                     self.as_bytes().escaped_bytes()
                 }
             }
             // TODO: use the str method when we require Rust 1.45
             pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
                 if s.ends_with(suffix) {
                     Some(&s[..s.len() - suffix.len()])
                 } else {
                     None
                 }
             }
             pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
                 std::env::current_dir().map_err(|error| HgError::IoError {
                     error,
                     context: IoErrorContext::CurrentDir,
                 })
             }
             pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
                 std::env::current_exe().map_err(|error| HgError::IoError {
                     error,
                     context: IoErrorContext::CurrentExe,
                 })
             }
+            pub(crate) enum MergeResult<V> {
+                UseLeftValue,
+                UseRightValue,
+                UseNewValue(V),
+            }
+            /// Return the union of the two given maps,
+            /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
+            /// both.
+            ///
+            /// CC https://github.com/bodil/im-rs/issues/166
+            pub(crate) fn ordmap_union_with_merge<K, V>(
+                left: OrdMap<K, V>,
+                right: OrdMap<K, V>,
+                mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+            ) -> OrdMap<K, V>
+            where
+                K: Clone + Ord,
+                V: Clone + PartialEq,
+            {
+                if left.ptr_eq(&right) {
+                    // One of the two maps is an unmodified clone of the other
+                    left
+                } else if left.len() / 2 > right.len() {
+                    // When two maps have different sizes,
+                    // their size difference is a lower bound on
+                    // how many keys of the larger map are not also in the smaller map.
+                    // This in turn is a lower bound on the number of differences in
+                    // `OrdMap::diff` and the "amount of work" that would be done
+                    // by `ordmap_union_with_merge_by_diff`.
+                    //
+                    // Here `left` is more than twice the size of `right`,
+                    // so the number of differences is more than the total size of
+                    // `right`. Therefore an algorithm based on iterating `right`
+                    // is more efficient.
+                    //
+                    // This helps a lot when a tiny (or empty) map is merged
+                    // with a large one.
+                    ordmap_union_with_merge_by_iter(left, right, merge)
+                } else if left.len() < right.len() / 2 {
+                    // Same as above but with `left` and `right` swapped
+                    ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
+                        // Also swapped in `merge` arguments:
+                        match merge(key, b, a) {
+                            MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
+                            // … and swap back in `merge` result:
+                            MergeResult::UseLeftValue => MergeResult::UseRightValue,
+                            MergeResult::UseRightValue => MergeResult::UseLeftValue,
+                        }
+                    })
+                } else {
+                    // For maps of similar size, use the algorithm based on `OrdMap::diff`
+                    ordmap_union_with_merge_by_diff(left, right, merge)
+                }
+            }
+            /// Efficient if `right` is much smaller than `left`
+            fn ordmap_union_with_merge_by_iter<K, V>(
+                mut left: OrdMap<K, V>,
+                right: OrdMap<K, V>,
+                mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+            ) -> OrdMap<K, V>
+            where
+                K: Clone + Ord,
+                V: Clone,
+            {
+                for (key, right_value) in right {
+                    match left.get(&key) {
+                        None => {
+                            left.insert(key, right_value);
+                        }
+                        Some(left_value) => match merge(&key, left_value, &right_value) {
+                            MergeResult::UseLeftValue => {}
+                            MergeResult::UseRightValue => {
+                                left.insert(key, right_value);
+                            }
+                            MergeResult::UseNewValue(new_value) => {
+                                left.insert(key, new_value);
+                            }
+                        },
+                    }
+                }
+                left
+            }
+            /// Fallback when both maps are of similar size
+            fn ordmap_union_with_merge_by_diff<K, V>(
+                mut left: OrdMap<K, V>,
+                mut right: OrdMap<K, V>,
+                mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+            ) -> OrdMap<K, V>
+            where
+                K: Clone + Ord,
+                V: Clone + PartialEq,
+            {
+                // (key, value) pairs that would need to be inserted in either map
+                // in order to turn it into the union.
+                //
+                // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
+                // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
+                // with `left_updates` only borrowing from `right` and `right_updates` from
+                // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
+                //
+                // This would allow moving all `.clone()` calls to after we’ve decided
+                // which of `right_updates` or `left_updates` to use
+                // (value ones becoming `Cow::into_owned`),
+                // and avoid making clones we don’t end up using.
+                let mut left_updates = Vec::new();
+                let mut right_updates = Vec::new();
+                for difference in left.diff(&right) {
+                    match difference {
+                        DiffItem::Add(key, value) => {
+                            left_updates.push((key.clone(), value.clone()))
+                        }
+                        DiffItem::Remove(key, value) => {
+                            right_updates.push((key.clone(), value.clone()))
+                        }
+                        DiffItem::Update {
+                            old: (key, left_value),
+                            new: (_, right_value),
+                        } => match merge(key, left_value, right_value) {
+                            MergeResult::UseLeftValue => {
+                                right_updates.push((key.clone(), left_value.clone()))
+                            }
+                            MergeResult::UseRightValue => {
+                                left_updates.push((key.clone(), right_value.clone()))
+                            }
+                            MergeResult::UseNewValue(new_value) => {
+                                left_updates.push((key.clone(), new_value.clone()));
+                                right_updates.push((key.clone(), new_value))
+                            }
+                        },
+                    }
+                }
+                if left_updates.len() < right_updates.len() {
+                    for (key, value) in left_updates {
+                        left.insert(key, value);
+                    }
+                    left
+                } else {
+                    for (key, value) in right_updates {
+                        right.insert(key, value);
+                    }
+                    right
+                }
+            }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages