upstream/mercurial-mirror Commit - r47328:435d9fc7

copies-rust: extract generic map merge logic from merge_copies_dict...

Simon Sapin -

r47328:435d9fc7 default

parent child

rust/hg-core/src/copy_tracing.rs

0 +20 -191

              use crate::utils::hg_path::HgPath;
              use crate::utils::hg_path::HgPathBuf;
              use crate::Revision;
              use crate::NULL_REVISION;
-             use im_rc::ordmap::DiffItem;
              use im_rc::ordmap::Entry;
              use im_rc::ordmap::OrdMap;
              use im_rc::OrdSet;
              use std::cmp::Ordering;
              use std::collections::HashMap;
              use std::convert::TryInto;
              pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
              type PathToken = usize;
              #[derive(Clone, Debug)]
              struct CopySource {
                  /// revision at which the copy information was added
                  rev: Revision,
                  /// the copy source, (Set to None in case of deletion of the associated
                  /// key)
                  path: Option<PathToken>,
                  /// a set of previous `CopySource.rev` value directly or indirectly
                  /// overwritten by this one.
                  overwritten: OrdSet<Revision>,
              }
              impl CopySource {
                  /// create a new CopySource
                  ///
                  /// Use this when no previous copy source existed.
                  fn new(rev: Revision, path: Option<PathToken>) -> Self {
                      Self {
                          rev,
                          path,
                          overwritten: OrdSet::new(),
                      }
                  }
                  /// create a new CopySource from merging two others
                  ///
                  /// Use this when merging two InternalPathCopies requires active merging of
                  /// some entries.
                  fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
                      let mut overwritten = OrdSet::new();
                      overwritten.extend(winner.overwritten.iter().copied());
                      overwritten.extend(loser.overwritten.iter().copied());
                      overwritten.insert(winner.rev);
                      overwritten.insert(loser.rev);
                      Self {
                          rev,
                          path: winner.path,
                          overwritten: overwritten,
                      }
                  }
                  /// Update the value of a pre-existing CopySource
                  ///
                  /// Use this when recording copy information from  parent → child edges
                  fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
                      self.overwritten.insert(self.rev);
                      self.rev = rev;
                      self.path = path;
                  }
                  /// Mark pre-existing copy information as "dropped" by a file deletion
                  ///
                  /// Use this when recording copy information from  parent → child edges
                  fn mark_delete(&mut self, rev: Revision) {
                      self.overwritten.insert(self.rev);
                      self.rev = rev;
                      self.path = None;
                  }
                  /// Mark pre-existing copy information as "dropped" by a file deletion
                  ///
                  /// Use this when recording copy information from  parent → child edges
                  fn mark_delete_with_pair(&mut self, rev: Revision, other: &Self) {
                      self.overwritten.insert(self.rev);
                      if other.rev != rev {
                          self.overwritten.insert(other.rev);
                      }
                      self.overwritten.extend(other.overwritten.iter().copied());
                      self.rev = rev;
                      self.path = None;
                  }
                  fn is_overwritten_by(&self, other: &Self) -> bool {
                      other.overwritten.contains(&self.rev)
                  }
              }
              // For the same "dest", content generated for a given revision will always be
              // the same.
              impl PartialEq for CopySource {
                  fn eq(&self, other: &Self) -> bool {
                      #[cfg(debug_assertions)]
                      {
                          if self.rev == other.rev {
                              debug_assert!(self.path == other.path);
                              debug_assert!(self.overwritten == other.overwritten);
                          }
                      }
                      self.rev == other.rev
                  }
              }
              /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
              type InternalPathCopies = OrdMap<PathToken, CopySource>;
              /// hold parent 1, parent 2 and relevant files actions.
              pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
              /// represent the files affected by a changesets
              ///
              /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
              /// all the data categories tracked by it.
              /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
              /// all the data categories tracked by it.
              pub struct ChangedFiles<'a> {
                  nb_items: u32,
                  index: &'a [u8],
                  data: &'a [u8],
              }
              /// Represent active changes that affect the copy tracing.
              enum Action<'a> {
                  /// The parent ? children edge is removing a file
                  ///
                  /// (actually, this could be the edge from the other parent, but it does
                  /// not matters)
                  Removed(&'a HgPath),
                  /// The parent ? children edge introduce copy information between (dest,
                  /// source)
                  CopiedFromP1(&'a HgPath, &'a HgPath),
                  CopiedFromP2(&'a HgPath, &'a HgPath),
              }
              /// This express the possible "special" case we can get in a merge
              ///
              /// See mercurial/metadata.py for details on these values.
              #[derive(PartialEq)]
              enum MergeCase {
                  /// Merged: file had history on both side that needed to be merged
                  Merged,
                  /// Salvaged: file was candidate for deletion, but survived the merge
                  Salvaged,
                  /// Normal: Not one of the two cases above
                  Normal,
              }
              type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
              const EMPTY: &[u8] = b"";
              const COPY_MASK: u8 = 3;
              const P1_COPY: u8 = 2;
              const P2_COPY: u8 = 3;
              const ACTION_MASK: u8 = 28;
              const REMOVED: u8 = 12;
              const MERGED: u8 = 8;
              const SALVAGED: u8 = 16;
              impl<'a> ChangedFiles<'a> {
                  const INDEX_START: usize = 4;
                  const ENTRY_SIZE: u32 = 9;
                  const FILENAME_START: u32 = 1;
                  const COPY_SOURCE_START: u32 = 5;
                  pub fn new(data: &'a [u8]) -> Self {
                      assert!(
                          data.len() >= 4,
                          "data size ({}) is too small to contain the header (4)",
                          data.len()
                      );
                      let nb_items_raw: [u8; 4] = (&data[0..=3])
                          .try_into()
                          .expect("failed to turn 4 bytes into 4 bytes");
                      let nb_items = u32::from_be_bytes(nb_items_raw);
                      let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
                      let index_end = Self::INDEX_START + index_size;
                      assert!(
                          data.len() >= index_end,
                          "data size ({}) is too small to fit the index_data ({})",
                          data.len(),
                          index_end
                      );
                      let ret = ChangedFiles {
                          nb_items,
                          index: &data[Self::INDEX_START..index_end],
                          data: &data[index_end..],
                      };
                      let max_data = ret.filename_end(nb_items - 1) as usize;
                      assert!(
                          ret.data.len() >= max_data,
                          "data size ({}) is too small to fit all data ({})",
                          data.len(),
                          index_end + max_data
                      );
                      ret
                  }
                  pub fn new_empty() -> Self {
                      ChangedFiles {
                          nb_items: 0,
                          index: EMPTY,
                          data: EMPTY,
                      }
                  }
                  /// internal function to return an individual entry at a given index
                  fn entry(&'a self, idx: u32) -> FileChange<'a> {
                      if idx >= self.nb_items {
                          panic!(
                              "index for entry is higher that the number of file {} >= {}",
                              idx, self.nb_items
                          )
                      }
                      let flags = self.flags(idx);
                      let filename = self.filename(idx);
                      let copy_idx = self.copy_idx(idx);
                      let copy_source = self.filename(copy_idx);
                      (flags, filename, copy_source)
                  }
                  /// internal function to return the filename of the entry at a given index
                  fn filename(&self, idx: u32) -> &HgPath {
                      let filename_start;
                      if idx == 0 {
                          filename_start = 0;
                      } else {
                          filename_start = self.filename_end(idx - 1)
                      }
                      let filename_end = self.filename_end(idx);
                      let filename_start = filename_start as usize;
                      let filename_end = filename_end as usize;
                      HgPath::new(&self.data[filename_start..filename_end])
                  }
                  /// internal function to return the flag field of the entry at a given
                  /// index
                  fn flags(&self, idx: u32) -> u8 {
                      let idx = idx as usize;
                      self.index[idx * (Self::ENTRY_SIZE as usize)]
                  }
                  /// internal function to return the end of a filename part at a given index
                  fn filename_end(&self, idx: u32) -> u32 {
                      let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
                      let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
                      let start = start as usize;
                      let end = end as usize;
                      let raw = (&self.index[start..end])
                          .try_into()
                          .expect("failed to turn 4 bytes into 4 bytes");
                      u32::from_be_bytes(raw)
                  }
                  /// internal function to return index of the copy source of the entry at a
                  /// given index
                  fn copy_idx(&self, idx: u32) -> u32 {
                      let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
                      let end = (idx + 1) * Self::ENTRY_SIZE;
                      let start = start as usize;
                      let end = end as usize;
                      let raw = (&self.index[start..end])
                          .try_into()
                          .expect("failed to turn 4 bytes into 4 bytes");
                      u32::from_be_bytes(raw)
                  }
                  /// Return an iterator over all the `Action` in this instance.
                  fn iter_actions(&self) -> ActionsIterator {
                      ActionsIterator {
                          changes: &self,
                          current: 0,
                      }
                  }
                  /// return the MergeCase value associated with a filename
                  fn get_merge_case(&self, path: &HgPath) -> MergeCase {
                      if self.nb_items == 0 {
                          return MergeCase::Normal;
                      }
                      let mut low_part = 0;
                      let mut high_part = self.nb_items;
                      while low_part < high_part {
                          let cursor = (low_part + high_part - 1) / 2;
                          let (flags, filename, _source) = self.entry(cursor);
                          match path.cmp(filename) {
                              Ordering::Less => low_part = cursor + 1,
                              Ordering::Greater => high_part = cursor,
                              Ordering::Equal => {
                                  return match flags & ACTION_MASK {
                                      MERGED => MergeCase::Merged,
                                      SALVAGED => MergeCase::Salvaged,
                                      _ => MergeCase::Normal,
                                  };
                              }
                          }
                      }
                      MergeCase::Normal
                  }
              }
              struct ActionsIterator<'a> {
                  changes: &'a ChangedFiles<'a>,
                  current: u32,
              }
              impl<'a> Iterator for ActionsIterator<'a> {
                  type Item = Action<'a>;
                  fn next(&mut self) -> Option<Action<'a>> {
                      while self.current < self.changes.nb_items {
                          let (flags, file, source) = self.changes.entry(self.current);
                          self.current += 1;
                          if (flags & ACTION_MASK) == REMOVED {
                              return Some(Action::Removed(file));
                          }
                          let copy = flags & COPY_MASK;
                          if copy == P1_COPY {
                              return Some(Action::CopiedFromP1(file, source));
                          } else if copy == P2_COPY {
                              return Some(Action::CopiedFromP2(file, source));
                          }
                      }
                      return None;
                  }
              }
              /// A small struct whose purpose is to ensure lifetime of bytes referenced in
              /// ChangedFiles
              ///
              /// It is passed to the RevInfoMaker callback who can assign any necessary
              /// content to the `data` attribute. The copy tracing code is responsible for
              /// keeping the DataHolder alive at least as long as the ChangedFiles object.
              pub struct DataHolder<D> {
                  /// RevInfoMaker callback should assign data referenced by the
                  /// ChangedFiles struct it return to this attribute. The DataHolder
                  /// lifetime will be at least as long as the ChangedFiles one.
                  pub data: Option<D>,
              }
              pub type RevInfoMaker<'a, D> =
                  Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
              /// A small "tokenizer" responsible of turning full HgPath into lighter
              /// PathToken
              ///
              /// Dealing with small object, like integer is much faster, so HgPath input are
              /// turned into integer "PathToken" and converted back in the end.
              #[derive(Clone, Debug, Default)]
              struct TwoWayPathMap {
                  token: HashMap<HgPathBuf, PathToken>,
                  path: Vec<HgPathBuf>,
              }
              impl TwoWayPathMap {
                  fn tokenize(&mut self, path: &HgPath) -> PathToken {
                      match self.token.get(path) {
                          Some(a) => *a,
                          None => {
                              let a = self.token.len();
                              let buf = path.to_owned();
                              self.path.push(buf.clone());
                              self.token.insert(buf, a);
                              a
                          }
                      }
                  }
                  fn untokenize(&self, token: PathToken) -> &HgPathBuf {
                      assert!(token < self.path.len(), format!("Unknown token: {}", token));
                      &self.path[token]
                  }
              }
              /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
              ///
              /// Arguments are:
              ///
              /// revs: all revisions to be considered
              /// children: a {parent ? [childrens]} mapping
              /// target_rev: the final revision we are combining copies to
              /// rev_info(rev): callback to get revision information:
              ///   * first parent
              ///   * second parent
              ///   * ChangedFiles
              /// isancestors(low_rev, high_rev): callback to check if a revision is an
              ///                                 ancestor of another
              pub fn combine_changeset_copies<D>(
                  revs: Vec<Revision>,
                  mut children_count: HashMap<Revision, usize>,
                  target_rev: Revision,
                  rev_info: RevInfoMaker<D>,
              ) -> PathCopies {
                  let mut all_copies = HashMap::new();
                  let mut path_map = TwoWayPathMap::default();
                  for rev in revs {
                      let mut d: DataHolder<D> = DataHolder { data: None };
                      let (p1, p2, changes) = rev_info(rev, &mut d);
                      // We will chain the copies information accumulated for the parent with
                      // the individual copies information the curent revision.  Creating a
                      // new TimeStampedPath for each `rev` → `children` vertex.
                      // Retrieve data computed in a previous iteration
                      let p1_copies = match p1 {
                          NULL_REVISION => None,
                          _ => get_and_clean_parent_copies(
                              &mut all_copies,
                              &mut children_count,
                              p1,
                          ), // will be None if the vertex is not to be traversed
                      };
                      let p2_copies = match p2 {
                          NULL_REVISION => None,
                          _ => get_and_clean_parent_copies(
                              &mut all_copies,
                              &mut children_count,
                              p2,
                          ), // will be None if the vertex is not to be traversed
                      };
                      // combine it with data for that revision
                      let (p1_copies, p2_copies) =
                          chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev);
                      let copies = match (p1_copies, p2_copies) {
                          (None, None) => None,
                          (c, None) => c,
                          (None, c) => c,
                          (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
                              &path_map, rev, p2_copies, p1_copies, &changes,
                          )),
                      };
                      if let Some(c) = copies {
                          all_copies.insert(rev, c);
                      }
                  }
                  // Drop internal information (like the timestamp) and return the final
                  // mapping.
                  let tt_result = all_copies
                      .remove(&target_rev)
                      .expect("target revision was not processed");
                  let mut result = PathCopies::default();
                  for (dest, tt_source) in tt_result {
                      if let Some(path) = tt_source.path {
                          let path_dest = path_map.untokenize(dest).to_owned();
                          let path_path = path_map.untokenize(path).to_owned();
                          result.insert(path_dest, path_path);
                      }
                  }
                  result
              }
              /// fetch previous computed information
              ///
              /// If no other children are expected to need this information, we drop it from
              /// the cache.
              ///
              /// If parent is not part of the set we are expected to walk, return None.
              fn get_and_clean_parent_copies(
                  all_copies: &mut HashMap<Revision, InternalPathCopies>,
                  children_count: &mut HashMap<Revision, usize>,
                  parent_rev: Revision,
              ) -> Option<InternalPathCopies> {
                  let count = children_count.get_mut(&parent_rev)?;
                  *count -= 1;
                  if *count == 0 {
                      match all_copies.remove(&parent_rev) {
                          Some(c) => Some(c),
                          None => Some(InternalPathCopies::default()),
                      }
                  } else {
                      match all_copies.get(&parent_rev) {
                          Some(c) => Some(c.clone()),
                          None => Some(InternalPathCopies::default()),
                      }
                  }
              }
              /// Combine ChangedFiles with some existing PathCopies information and return
              /// the result
              fn chain_changes(
                  path_map: &mut TwoWayPathMap,
                  base_p1_copies: Option<InternalPathCopies>,
                  base_p2_copies: Option<InternalPathCopies>,
                  changes: &ChangedFiles,
                  current_rev: Revision,
              ) -> (Option<InternalPathCopies>, Option<InternalPathCopies>) {
                  // Fast path the "nothing to do" case.
                  if let (None, None) = (&base_p1_copies, &base_p2_copies) {
                      return (None, None);
                  }
                  let mut p1_copies = base_p1_copies.clone();
                  let mut p2_copies = base_p2_copies.clone();
                  for action in changes.iter_actions() {
                      match action {
                          Action::CopiedFromP1(path_dest, path_source) => {
                              match &mut p1_copies {
                                  None => (), // This is not a vertex we should proceed.
                                  Some(copies) => add_one_copy(
                                      current_rev,
                                      path_map,
                                      copies,
                                      base_p1_copies.as_ref().unwrap(),
                                      path_dest,
                                      path_source,
                                  ),
                              }
                          }
                          Action::CopiedFromP2(path_dest, path_source) => {
                              match &mut p2_copies {
                                  None => (), // This is not a vertex we should proceed.
                                  Some(copies) => add_one_copy(
                                      current_rev,
                                      path_map,
                                      copies,
                                      base_p2_copies.as_ref().unwrap(),
                                      path_dest,
                                      path_source,
                                  ),
                              }
                          }
                          Action::Removed(deleted_path) => {
                              // We must drop copy information for removed file.
                              //
                              // We need to explicitly record them as dropped to
                              // propagate this information when merging two
                              // InternalPathCopies object.
                              let deleted = path_map.tokenize(deleted_path);
                              let p1_entry = match &mut p1_copies {
                                  None => None,
                                  Some(copies) => match copies.entry(deleted) {
                                      Entry::Occupied(e) => Some(e),
                                      Entry::Vacant(_) => None,
                                  },
                              };
                              let p2_entry = match &mut p2_copies {
                                  None => None,
                                  Some(copies) => match copies.entry(deleted) {
                                      Entry::Occupied(e) => Some(e),
                                      Entry::Vacant(_) => None,
                                  },
                              };
                              match (p1_entry, p2_entry) {
                                  (None, None) => (),
                                  (Some(mut e), None) => {
                                      e.get_mut().mark_delete(current_rev)
                                  }
                                  (None, Some(mut e)) => {
                                      e.get_mut().mark_delete(current_rev)
                                  }
                                  (Some(mut e1), Some(mut e2)) => {
                                      let cs1 = e1.get_mut();
                                      let cs2 = e2.get();
                                      if cs1 == cs2 {
                                          cs1.mark_delete(current_rev);
                                      } else {
                                          cs1.mark_delete_with_pair(current_rev, &cs2);
                                      }
                                      e2.insert(cs1.clone());
                                  }
                              }
                          }
                      }
                  }
                  (p1_copies, p2_copies)
              }
              // insert one new copy information in an InternalPathCopies
              //
              // This deal with chaining and overwrite.
              fn add_one_copy(
                  current_rev: Revision,
                  path_map: &mut TwoWayPathMap,
                  copies: &mut InternalPathCopies,
                  base_copies: &InternalPathCopies,
                  path_dest: &HgPath,
                  path_source: &HgPath,
              ) {
                  let dest = path_map.tokenize(path_dest);
                  let source = path_map.tokenize(path_source);
                  let entry;
                  if let Some(v) = base_copies.get(&source) {
                      entry = match &v.path {
                          Some(path) => Some((*(path)).to_owned()),
                          None => Some(source.to_owned()),
                      }
                  } else {
                      entry = Some(source.to_owned());
                  }
                  // Each new entry is introduced by the children, we
                  // record this information as we will need it to take
                  // the right decision when merging conflicting copy
                  // information. See merge_copies_dict for details.
                  match copies.entry(dest) {
                      Entry::Vacant(slot) => {
                          let ttpc = CopySource::new(current_rev, entry);
                          slot.insert(ttpc);
                      }
                      Entry::Occupied(mut slot) => {
                          let ttpc = slot.get_mut();
                          ttpc.overwrite(current_rev, entry);
                      }
                  }
              }
              /// merge two copies-mapping together, minor and major
              ///
              /// In case of conflict, value from "major" will be picked, unless in some
              /// cases. See inline documentation for details.
              fn merge_copies_dict(
                  path_map: &TwoWayPathMap,
                  current_merge: Revision,
-                 mut minor: InternalPathCopies,
-                 mut major: InternalPathCopies,
+                 minor: InternalPathCopies,
+                 major: InternalPathCopies,
                  changes: &ChangedFiles,
              ) -> InternalPathCopies {
-                 // This closure exist as temporary help while multiple developper are
-                 // actively working on this code. Feel free to re-inline it once this
-                 // code is more settled.
-                 let cmp_value =
-                     |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
-                         compare_value(
+                 use crate::utils::{ordmap_union_with_merge, MergeResult};
+                 ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| {
+                     let (pick, overwrite) = compare_value(
-                             path_map,
-                             current_merge,
-                             changes,
-                             dest,
-                             src_minor,
-                             src_major,
+                         )
+                     );
+                     if overwrite {
+                         let (winner, loser) = match pick {
+                             MergePick::Major | MergePick::Any => (src_major, src_minor),
+                             MergePick::Minor => (src_minor, src_major),
                      };
-                 if minor.is_empty() {
-                     major
-                 } else if major.is_empty() {
-                     minor
-                 } else if minor.len() * 2 < major.len() {
-                     // Lets says we are merging two InternalPathCopies instance A and B.
-                     //
-                     // If A contains N items, the merge result will never contains more
-                     // than N values differents than the one in A
-                     //
-                     // If B contains M items, with M > N, the merge result will always
-                     // result in a minimum of M - N value differents than the on in
-                     // A
-                     //
-                     // As a result, if N < (M-N), we know that simply iterating over A will
-                     // yield less difference than iterating over the difference
-                     // between A and B.
-                     //
-                     // This help performance a lot in case were a tiny
-                     // InternalPathCopies is merged with a much larger one.
-                     for (dest, src_minor) in minor {
-                         let src_major = major.get(&dest);
-                         match src_major {
-                             None => {
-                                 major.insert(dest, src_minor);
+                             }
-                             Some(src_major) => {
-                                 let (pick, overwrite) =
-                                     cmp_value(&dest, &src_minor, src_major);
-                                 if overwrite {
-                                     let src = match pick {
-                                         MergePick::Major => CopySource::new_from_merge(
-                                             current_merge,
-                                             src_major,
-                                             &src_minor,
-                                         ),
-                                         MergePick::Minor => CopySource::new_from_merge(
-                                             current_merge,
-                                             &src_minor,
-                                             src_major,
-                                         ),
-                                         MergePick::Any => CopySource::new_from_merge(
+                         MergeResult::UseNewValue(CopySource::new_from_merge(
-                                             current_merge,
-                                             src_major,
-                                             &src_minor,
-                                         ),
-                                     };
-                                     major.insert(dest, src);
-                                 } else {
-                                     match pick {
-                                         MergePick::Any | MergePick::Major => None,
-                                         MergePick::Minor => major.insert(dest, src_minor),
-                                     };
+                                 }
+                             }
-                         };
+                     }
-                     major
-                 } else if major.len() * 2 < minor.len() {
-                     // This use the same rational than the previous block.
-                     // (Check previous block documentation for details.)
-                     for (dest, src_major) in major {
-                         let src_minor = minor.get(&dest);
-                         match src_minor {
-                             None => {
-                                 minor.insert(dest, src_major);
+                             }
-                             Some(src_minor) => {
-                                 let (pick, overwrite) =
-                                     cmp_value(&dest, src_minor, &src_major);
-                                 if overwrite {
-                                     let src = match pick {
-                                         MergePick::Major => CopySource::new_from_merge(
-                                             current_merge,
-                                             &src_major,
-                                             src_minor,
-                                         ),
-                                         MergePick::Minor => CopySource::new_from_merge(
-                                             current_merge,
-                                             src_minor,
-                                             &src_major,
-                                         ),
-                                         MergePick::Any => CopySource::new_from_merge(
-                                             current_merge,
-                                             &src_major,
-                                             src_minor,
-                                         ),
-                                     };
-                                     minor.insert(dest, src);
+                             winner,
+                             loser,
+                         ))
-                                 } else {
-                                     match pick {
-                                         MergePick::Any | MergePick::Minor => None,
-                                         MergePick::Major => minor.insert(dest, src_major),
-                                     };
+                             MergePick::Any | MergePick::Major => {
+                                 MergeResult::UseRightValue
+                             }
+                             MergePick::Minor => MergeResult::UseLeftValue,
-                                 }
-                             }
-                         };
+                     }
-                     minor
-                 } else {
-                     let mut override_minor = Vec::new();
-                     let mut override_major = Vec::new();
-                     let mut to_major = |k: &PathToken, v: &CopySource| {
-                         override_major.push((k.clone(), v.clone()))
-                     };
-                     let mut to_minor = |k: &PathToken, v: &CopySource| {
-                         override_minor.push((k.clone(), v.clone()))
-                     };
-                     // The diff function leverage detection of the identical subpart if
-                     // minor and major has some common ancestors. This make it very
-                     // fast is most case.
-                     //
-                     // In case where the two map are vastly different in size, the current
-                     // approach is still slowish because the iteration will iterate over
-                     // all the "exclusive" content of the larger on. This situation can be
-                     // frequent when the subgraph of revision we are processing has a lot
-                     // of roots. Each roots adding they own fully new map to the mix (and
-                     // likely a small map, if the path from the root to the "main path" is
-                     // small.
-                     //
-                     // We could do better by detecting such situation and processing them
-                     // differently.
-                     for d in minor.diff(&major) {
-                         match d {
-                             DiffItem::Add(k, v) => to_minor(k, v),
-                             DiffItem::Remove(k, v) => to_major(k, v),
-                             DiffItem::Update { old, new } => {
-                                 let (dest, src_major) = new;
-                                 let (_, src_minor) = old;
-                                 let (pick, overwrite) =
-                                     cmp_value(dest, src_minor, src_major);
-                                 if overwrite {
-                                     let src = match pick {
-                                         MergePick::Major => CopySource::new_from_merge(
-                                             current_merge,
-                                             src_major,
-                                             src_minor,
-                                         ),
-                                         MergePick::Minor => CopySource::new_from_merge(
-                                             current_merge,
-                                             src_minor,
-                                             src_major,
-                                         ),
-                                         MergePick::Any => CopySource::new_from_merge(
-                                             current_merge,
-                                             src_major,
-                                             src_minor,
-                                         ),
-                                     };
-                                     to_minor(dest, &src);
-                                     to_major(dest, &src);
-                                 } else {
-                                     match pick {
-                                         MergePick::Major => to_minor(dest, src_major),
-                                         MergePick::Minor => to_major(dest, src_minor),
-                                         // If the two entry are identical, no need to do
-                                         // anything (but diff should not have yield them)
-                                         MergePick::Any => unreachable!(),
+                                     }
+                                 }
+                             }
-                         };
+                     }
-                     let updates;
-                     let mut result;
-                     if override_major.is_empty() {
-                         result = major
-                     } else if override_minor.is_empty() {
-                         result = minor
-                     } else {
-                         if override_minor.len() < override_major.len() {
-                             updates = override_minor;
-                             result = minor;
-                         } else {
-                             updates = override_major;
-                             result = major;
+                         }
-                         for (k, v) in updates {
-                             result.insert(k, v);
+                         }
+                     }
-                     result
+                 }
+                 })
              }
              /// represent the side that should prevail when merging two
              /// InternalPathCopies
              enum MergePick {
                  /// The "major" (p1) side prevails
                  Major,
                  /// The "minor" (p2) side prevails
                  Minor,
                  /// Any side could be used (because they are the same)
                  Any,
              }
              /// decide which side prevails in case of conflicting values
              #[allow(clippy::if_same_then_else)]
              fn compare_value(
                  path_map: &TwoWayPathMap,
                  current_merge: Revision,
                  changes: &ChangedFiles,
                  dest: &PathToken,
                  src_minor: &CopySource,
                  src_major: &CopySource,
              ) -> (MergePick, bool) {
                  if src_major == src_minor {
                      (MergePick::Any, false)
                  } else if src_major.rev == current_merge {
                      // minor is different according to per minor == major check earlier
                      debug_assert!(src_minor.rev != current_merge);
                      // The last value comes the current merge, this value -will- win
                      // eventually.
                      (MergePick::Major, true)
                  } else if src_minor.rev == current_merge {
                      // The last value comes the current merge, this value -will- win
                      // eventually.
                      (MergePick::Minor, true)
                  } else if src_major.path == src_minor.path {
                      debug_assert!(src_major.rev != src_major.rev);
                      // we have the same value, but from other source;
                      if src_major.is_overwritten_by(src_minor) {
                          (MergePick::Minor, false)
                      } else if src_minor.is_overwritten_by(src_major) {
                          (MergePick::Major, false)
                      } else {
                          (MergePick::Any, true)
                      }
                  } else {
                      debug_assert!(src_major.rev != src_major.rev);
                      let dest_path = path_map.untokenize(*dest);
                      let action = changes.get_merge_case(dest_path);
                      if src_minor.path.is_some()
                          && src_major.path.is_none()
                          && action == MergeCase::Salvaged
                      {
                          // If the file is "deleted" in the major side but was
                          // salvaged by the merge, we keep the minor side alive
                          (MergePick::Minor, true)
                      } else if src_major.path.is_some()
                          && src_minor.path.is_none()
                          && action == MergeCase::Salvaged
                      {
                          // If the file is "deleted" in the minor side but was
                          // salvaged by the merge, unconditionnaly preserve the
                          // major side.
                          (MergePick::Major, true)
                      } else if src_minor.is_overwritten_by(src_major) {
                          // The information from the minor version are strictly older than
                          // the major version
                          if action == MergeCase::Merged {
                              // If the file was actively merged, its means some non-copy
                              // activity happened on the other branch. It
                              // mean the older copy information are still relevant.
                              //
                              // The major side wins such conflict.
                              (MergePick::Major, true)
                          } else {
                              // No activity on the minor branch, pick the newer one.
                              (MergePick::Major, false)
                          }
                      } else if src_major.is_overwritten_by(src_minor) {
                          if action == MergeCase::Merged {
                              // If the file was actively merged, its means some non-copy
                              // activity happened on the other branch. It
                              // mean the older copy information are still relevant.
                              //
                              // The major side wins such conflict.
                              (MergePick::Major, true)
                          } else {
                              // No activity on the minor branch, pick the newer one.
                              (MergePick::Minor, false)
                          }
                      } else if src_minor.path.is_none() {
                          // the minor side has no relevant information, pick the alive one
                          (MergePick::Major, true)
                      } else if src_major.path.is_none() {
                          // the major side has no relevant information, pick the alive one
                          (MergePick::Minor, true)
                      } else {
                          // by default the major side wins
                          (MergePick::Major, true)
                      }
                  }
              }

rust/hg-core/src/utils.rs

0 +150 0

              // utils module
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Contains useful functions, traits, structs, etc. for use in core.
              use crate::errors::{HgError, IoErrorContext};
              use crate::utils::hg_path::HgPath;
+             use im_rc::ordmap::DiffItem;
+             use im_rc::ordmap::OrdMap;
              use std::{io::Write, ops::Deref};
              pub mod files;
              pub mod hg_path;
              pub mod path_auditor;
              /// Useful until rust/issues/56345 is stable
              ///
              /// # Examples
              ///
              /// ```
              /// use crate::hg::utils::find_slice_in_slice;
              ///
              /// let haystack = b"This is the haystack".to_vec();
              /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
              /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
              /// ```
              pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
              where
                  for<'a> &'a [T]: PartialEq,
              {
                  slice
                      .windows(needle.len())
                      .position(|window| window == needle)
              }
              /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
              ///
              /// # Examples
              ///
              /// ```
              /// use crate::hg::utils::replace_slice;
              /// let mut line = b"I hate writing tests!".to_vec();
              /// replace_slice(&mut line, b"hate", b"love");
              /// assert_eq!(
              ///     line,
              ///     b"I love writing tests!".to_vec()
              /// );
              /// ```
              pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
              where
                  T: Clone + PartialEq,
              {
                  if buf.len() < from.len() || from.len() != to.len() {
                      return;
                  }
                  for i in 0..=buf.len() - from.len() {
                      if buf[i..].starts_with(from) {
                          buf[i..(i + from.len())].clone_from_slice(to);
                      }
                  }
              }
              pub trait SliceExt {
                  fn trim_end(&self) -> &Self;
                  fn trim_start(&self) -> &Self;
                  fn trim(&self) -> &Self;
                  fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
                  fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
              }
              #[allow(clippy::trivially_copy_pass_by_ref)]
              fn is_not_whitespace(c: &u8) -> bool {
                  !(*c as char).is_whitespace()
              }
              impl SliceExt for [u8] {
                  fn trim_end(&self) -> &[u8] {
                      if let Some(last) = self.iter().rposition(is_not_whitespace) {
                          &self[..=last]
                      } else {
                          &[]
                      }
                  }
                  fn trim_start(&self) -> &[u8] {
                      if let Some(first) = self.iter().position(is_not_whitespace) {
                          &self[first..]
                      } else {
                          &[]
                      }
                  }
                  /// ```
                  /// use hg::utils::SliceExt;
                  /// assert_eq!(
                  ///     b"  to trim  ".trim(),
                  ///     b"to trim"
                  /// );
                  /// assert_eq!(
                  ///     b"to trim  ".trim(),
                  ///     b"to trim"
                  /// );
                  /// assert_eq!(
                  ///     b"  to trim".trim(),
                  ///     b"to trim"
                  /// );
                  /// ```
                  fn trim(&self) -> &[u8] {
                      self.trim_start().trim_end()
                  }
                  fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
                      if self.starts_with(needle) {
                          Some(&self[needle.len()..])
                      } else {
                          None
                      }
                  }
                  fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
                      let mut iter = self.splitn(2, |&byte| byte == separator);
                      let a = iter.next()?;
                      let b = iter.next()?;
                      Some((a, b))
                  }
              }
              pub trait Escaped {
                  /// Return bytes escaped for display to the user
                  fn escaped_bytes(&self) -> Vec<u8>;
              }
              impl Escaped for u8 {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      let mut acc = vec![];
                      match self {
                          c @ b'\'' | c @ b'\\' => {
                              acc.push(b'\\');
                              acc.push(*c);
                          }
                          b'\t' => {
                              acc.extend(br"\\t");
                          }
                          b'\n' => {
                              acc.extend(br"\\n");
                          }
                          b'\r' => {
                              acc.extend(br"\\r");
                          }
                          c if (*c < b' ' || *c >= 127) => {
                              write!(acc, "\\x{:x}", self).unwrap();
                          }
                          c => {
                              acc.push(*c);
                          }
                      }
                      acc
                  }
              }
              impl<'a, T: Escaped> Escaped for &'a [T] {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.iter().flat_map(Escaped::escaped_bytes).collect()
                  }
              }
              impl<T: Escaped> Escaped for Vec<T> {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.deref().escaped_bytes()
                  }
              }
              impl<'a> Escaped for &'a HgPath {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.as_bytes().escaped_bytes()
                  }
              }
              // TODO: use the str method when we require Rust 1.45
              pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
                  if s.ends_with(suffix) {
                      Some(&s[..s.len() - suffix.len()])
                  } else {
                      None
                  }
              }
              pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
                  std::env::current_dir().map_err(|error| HgError::IoError {
                      error,
                      context: IoErrorContext::CurrentDir,
                  })
              }
              pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
                  std::env::current_exe().map_err(|error| HgError::IoError {
                      error,
                      context: IoErrorContext::CurrentExe,
                  })
              }
+             pub(crate) enum MergeResult<V> {
+                 UseLeftValue,
+                 UseRightValue,
+                 UseNewValue(V),
+             }
+             /// Return the union of the two given maps,
+             /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
+             /// both.
+             ///
+             /// CC https://github.com/bodil/im-rs/issues/166
+             pub(crate) fn ordmap_union_with_merge<K, V>(
+                 left: OrdMap<K, V>,
+                 right: OrdMap<K, V>,
+                 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+             ) -> OrdMap<K, V>
+             where
+                 K: Clone + Ord,
+                 V: Clone + PartialEq,
+             {
+                 if left.ptr_eq(&right) {
+                     // One of the two maps is an unmodified clone of the other
+                     left
+                 } else if left.len() / 2 > right.len() {
+                     // When two maps have different sizes,
+                     // their size difference is a lower bound on
+                     // how many keys of the larger map are not also in the smaller map.
+                     // This in turn is a lower bound on the number of differences in
+                     // `OrdMap::diff` and the "amount of work" that would be done
+                     // by `ordmap_union_with_merge_by_diff`.
+                     //
+                     // Here `left` is more than twice the size of `right`,
+                     // so the number of differences is more than the total size of
+                     // `right`. Therefore an algorithm based on iterating `right`
+                     // is more efficient.
+                     //
+                     // This helps a lot when a tiny (or empty) map is merged
+                     // with a large one.
+                     ordmap_union_with_merge_by_iter(left, right, merge)
+                 } else if left.len() < right.len() / 2 {
+                     // Same as above but with `left` and `right` swapped
+                     ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
+                         // Also swapped in `merge` arguments:
+                         match merge(key, b, a) {
+                             MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
+                             // … and swap back in `merge` result:
+                             MergeResult::UseLeftValue => MergeResult::UseRightValue,
+                             MergeResult::UseRightValue => MergeResult::UseLeftValue,
+                         }
+                     })
+                 } else {
+                     // For maps of similar size, use the algorithm based on `OrdMap::diff`
+                     ordmap_union_with_merge_by_diff(left, right, merge)
+                 }
+             }
+             /// Efficient if `right` is much smaller than `left`
+             fn ordmap_union_with_merge_by_iter<K, V>(
+                 mut left: OrdMap<K, V>,
+                 right: OrdMap<K, V>,
+                 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+             ) -> OrdMap<K, V>
+             where
+                 K: Clone + Ord,
+                 V: Clone,
+             {
+                 for (key, right_value) in right {
+                     match left.get(&key) {
+                         None => {
+                             left.insert(key, right_value);
+                         }
+                         Some(left_value) => match merge(&key, left_value, &right_value) {
+                             MergeResult::UseLeftValue => {}
+                             MergeResult::UseRightValue => {
+                                 left.insert(key, right_value);
+                             }
+                             MergeResult::UseNewValue(new_value) => {
+                                 left.insert(key, new_value);
+                             }
+                         },
+                     }
+                 }
+                 left
+             }
+             /// Fallback when both maps are of similar size
+             fn ordmap_union_with_merge_by_diff<K, V>(
+                 mut left: OrdMap<K, V>,
+                 mut right: OrdMap<K, V>,
+                 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+             ) -> OrdMap<K, V>
+             where
+                 K: Clone + Ord,
+                 V: Clone + PartialEq,
+             {
+                 // (key, value) pairs that would need to be inserted in either map
+                 // in order to turn it into the union.
+                 //
+                 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
+                 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
+                 // with `left_updates` only borrowing from `right` and `right_updates` from
+                 // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
+                 //
+                 // This would allow moving all `.clone()` calls to after we’ve decided
+                 // which of `right_updates` or `left_updates` to use
+                 // (value ones becoming `Cow::into_owned`),
+                 // and avoid making clones we don’t end up using.
+                 let mut left_updates = Vec::new();
+                 let mut right_updates = Vec::new();
+                 for difference in left.diff(&right) {
+                     match difference {
+                         DiffItem::Add(key, value) => {
+                             left_updates.push((key.clone(), value.clone()))
+                         }
+                         DiffItem::Remove(key, value) => {
+                             right_updates.push((key.clone(), value.clone()))
+                         }
+                         DiffItem::Update {
+                             old: (key, left_value),
+                             new: (_, right_value),
+                         } => match merge(key, left_value, right_value) {
+                             MergeResult::UseLeftValue => {
+                                 right_updates.push((key.clone(), left_value.clone()))
+                             }
+                             MergeResult::UseRightValue => {
+                                 left_updates.push((key.clone(), right_value.clone()))
+                             }
+                             MergeResult::UseNewValue(new_value) => {
+                                 left_updates.push((key.clone(), new_value.clone()));
+                                 right_updates.push((key.clone(), new_value))
+                             }
+                         },
+                     }
+                 }
+                 if left_updates.len() < right_updates.len() {
+                     for (key, value) in left_updates {
+                         left.insert(key, value);
+                     }
+                     left
+                 } else {
+                     for (key, value) in right_updates {
+                         right.insert(key, value);
+                     }
+                     right
+                 }
+             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages