upstream/mercurial-mirror Commit - r46676:dacb771f

copies-rust: extract the processing of a ChangedFiles in its own function...

marmoute -

r46676:dacb771f default

parent child

rust/hg-core/src/copy_tracing.rs

0 +52 -40

              use crate::utils::hg_path::HgPath;
              use crate::utils::hg_path::HgPathBuf;
              use crate::Revision;
              use im_rc::ordmap::DiffItem;
              use im_rc::ordmap::OrdMap;
              use std::cmp::Ordering;
              use std::collections::HashMap;
              use std::convert::TryInto;
              pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
              #[derive(Clone, Debug, PartialEq)]
              struct TimeStampedPathCopy {
                  /// revision at which the copy information was added
                  rev: Revision,
                  /// the copy source, (Set to None in case of deletion of the associated
                  /// key)
                  path: Option<HgPathBuf>,
              }
              /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
              type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
              /// hold parent 1, parent 2 and relevant files actions.
              pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
              /// represent the files affected by a changesets
              ///
              /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
              /// all the data categories tracked by it.
              /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
              /// all the data categories tracked by it.
              pub struct ChangedFiles<'a> {
                  nb_items: u32,
                  index: &'a [u8],
                  data: &'a [u8],
              }
              /// Represent active changes that affect the copy tracing.
              enum Action<'a> {
                  /// The parent ? children edge is removing a file
                  ///
                  /// (actually, this could be the edge from the other parent, but it does
                  /// not matters)
                  Removed(&'a HgPath),
                  /// The parent ? children edge introduce copy information between (dest,
                  /// source)
                  Copied(&'a HgPath, &'a HgPath),
              }
              /// This express the possible "special" case we can get in a merge
              ///
              /// See mercurial/metadata.py for details on these values.
              #[derive(PartialEq)]
              enum MergeCase {
                  /// Merged: file had history on both side that needed to be merged
                  Merged,
                  /// Salvaged: file was candidate for deletion, but survived the merge
                  Salvaged,
                  /// Normal: Not one of the two cases above
                  Normal,
              }
              type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
              const EMPTY: &[u8] = b"";
              const COPY_MASK: u8 = 3;
              const P1_COPY: u8 = 2;
              const P2_COPY: u8 = 3;
              const ACTION_MASK: u8 = 28;
              const REMOVED: u8 = 12;
              const MERGED: u8 = 8;
              const SALVAGED: u8 = 16;
              impl<'a> ChangedFiles<'a> {
                  const INDEX_START: usize = 4;
                  const ENTRY_SIZE: u32 = 9;
                  const FILENAME_START: u32 = 1;
                  const COPY_SOURCE_START: u32 = 5;
                  pub fn new(data: &'a [u8]) -> Self {
                      assert!(
                          data.len() >= 4,
                          "data size ({}) is too small to contain the header (4)",
                          data.len()
                      );
                      let nb_items_raw: [u8; 4] = (&data[0..=3])
                          .try_into()
                          .expect("failed to turn 4 bytes into 4 bytes");
                      let nb_items = u32::from_be_bytes(nb_items_raw);
                      let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
                      let index_end = Self::INDEX_START + index_size;
                      assert!(
                          data.len() >= index_end,
                          "data size ({}) is too small to fit the index_data ({})",
                          data.len(),
                          index_end
                      );
                      let ret = ChangedFiles {
                          nb_items,
                          index: &data[Self::INDEX_START..index_end],
                          data: &data[index_end..],
                      };
                      let max_data = ret.filename_end(nb_items - 1) as usize;
                      assert!(
                          ret.data.len() >= max_data,
                          "data size ({}) is too small to fit all data ({})",
                          data.len(),
                          index_end + max_data
                      );
                      ret
                  }
                  pub fn new_empty() -> Self {
                      ChangedFiles {
                          nb_items: 0,
                          index: EMPTY,
                          data: EMPTY,
                      }
                  }
                  /// internal function to return an individual entry at a given index
                  fn entry(&'a self, idx: u32) -> FileChange<'a> {
                      if idx >= self.nb_items {
                          panic!(
                              "index for entry is higher that the number of file {} >= {}",
                              idx, self.nb_items
                          )
                      }
                      let flags = self.flags(idx);
                      let filename = self.filename(idx);
                      let copy_idx = self.copy_idx(idx);
                      let copy_source = self.filename(copy_idx);
                      (flags, filename, copy_source)
                  }
                  /// internal function to return the filename of the entry at a given index
                  fn filename(&self, idx: u32) -> &HgPath {
                      let filename_start;
                      if idx == 0 {
                          filename_start = 0;
                      } else {
                          filename_start = self.filename_end(idx - 1)
                      }
                      let filename_end = self.filename_end(idx);
                      let filename_start = filename_start as usize;
                      let filename_end = filename_end as usize;
                      HgPath::new(&self.data[filename_start..filename_end])
                  }
                  /// internal function to return the flag field of the entry at a given
                  /// index
                  fn flags(&self, idx: u32) -> u8 {
                      let idx = idx as usize;
                      self.index[idx * (Self::ENTRY_SIZE as usize)]
                  }
                  /// internal function to return the end of a filename part at a given index
                  fn filename_end(&self, idx: u32) -> u32 {
                      let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
                      let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
                      let start = start as usize;
                      let end = end as usize;
                      let raw = (&self.index[start..end])
                          .try_into()
                          .expect("failed to turn 4 bytes into 4 bytes");
                      u32::from_be_bytes(raw)
                  }
                  /// internal function to return index of the copy source of the entry at a
                  /// given index
                  fn copy_idx(&self, idx: u32) -> u32 {
                      let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
                      let end = (idx + 1) * Self::ENTRY_SIZE;
                      let start = start as usize;
                      let end = end as usize;
                      let raw = (&self.index[start..end])
                          .try_into()
                          .expect("failed to turn 4 bytes into 4 bytes");
                      u32::from_be_bytes(raw)
                  }
                  /// Return an iterator over all the `Action` in this instance.
                  fn iter_actions(&self, parent: Parent) -> ActionsIterator {
                      ActionsIterator {
                          changes: &self,
                          parent: parent,
                          current: 0,
                      }
                  }
                  /// return the MergeCase value associated with a filename
                  fn get_merge_case(&self, path: &HgPath) -> MergeCase {
                      if self.nb_items == 0 {
                          return MergeCase::Normal;
                      }
                      let mut low_part = 0;
                      let mut high_part = self.nb_items;
                      while low_part < high_part {
                          let cursor = (low_part + high_part - 1) / 2;
                          let (flags, filename, _source) = self.entry(cursor);
                          match path.cmp(filename) {
                              Ordering::Less => low_part = cursor + 1,
                              Ordering::Greater => high_part = cursor,
                              Ordering::Equal => {
                                  return match flags & ACTION_MASK {
                                      MERGED => MergeCase::Merged,
                                      SALVAGED => MergeCase::Salvaged,
                                      _ => MergeCase::Normal,
                                  };
                              }
                          }
                      }
                      MergeCase::Normal
                  }
              }
              /// A struct responsible for answering "is X ancestors of Y" quickly
              ///
              /// The structure will delegate ancestors call to a callback, and cache the
              /// result.
              #[derive(Debug)]
              struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
                  inner: &'a A,
                  pairs: HashMap<(Revision, Revision), bool>,
              }
              impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
                  fn new(func: &'a A) -> Self {
                      Self {
                          inner: func,
                          pairs: HashMap::default(),
                      }
                  }
                  /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
                  fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
                      if anc > desc {
                          false
                      } else if anc == desc {
                          true
                      } else {
                          if let Some(b) = self.pairs.get(&(anc, desc)) {
                              *b
                          } else {
                              let b = (self.inner)(anc, desc);
                              self.pairs.insert((anc, desc), b);
                              b
                          }
                      }
                  }
              }
              struct ActionsIterator<'a> {
                  changes: &'a ChangedFiles<'a>,
                  parent: Parent,
                  current: u32,
              }
              impl<'a> Iterator for ActionsIterator<'a> {
                  type Item = Action<'a>;
                  fn next(&mut self) -> Option<Action<'a>> {
                      let copy_flag = match self.parent {
                          Parent::FirstParent => P1_COPY,
                          Parent::SecondParent => P2_COPY,
                      };
                      while self.current < self.changes.nb_items {
                          let (flags, file, source) = self.changes.entry(self.current);
                          self.current += 1;
                          if (flags & ACTION_MASK) == REMOVED {
                              return Some(Action::Removed(file));
                          }
                          let copy = flags & COPY_MASK;
                          if copy == copy_flag {
                              return Some(Action::Copied(file, source));
                          }
                      }
                      return None;
                  }
              }
              /// A small struct whose purpose is to ensure lifetime of bytes referenced in
              /// ChangedFiles
              ///
              /// It is passed to the RevInfoMaker callback who can assign any necessary
              /// content to the `data` attribute. The copy tracing code is responsible for
              /// keeping the DataHolder alive at least as long as the ChangedFiles object.
              pub struct DataHolder<D> {
                  /// RevInfoMaker callback should assign data referenced by the
                  /// ChangedFiles struct it return to this attribute. The DataHolder
                  /// lifetime will be at least as long as the ChangedFiles one.
                  pub data: Option<D>,
              }
              pub type RevInfoMaker<'a, D> =
                  Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
              /// enum used to carry information about the parent → child currently processed
              #[derive(Copy, Clone, Debug)]
              enum Parent {
                  /// The `p1(x) → x` edge
                  FirstParent,
                  /// The `p2(x) → x` edge
                  SecondParent,
              }
              /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
              ///
              /// Arguments are:
              ///
              /// revs: all revisions to be considered
              /// children: a {parent ? [childrens]} mapping
              /// target_rev: the final revision we are combining copies to
              /// rev_info(rev): callback to get revision information:
              ///   * first parent
              ///   * second parent
              ///   * ChangedFiles
              /// isancestors(low_rev, high_rev): callback to check if a revision is an
              ///                                 ancestor of another
              pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
                  revs: Vec<Revision>,
                  children: HashMap<Revision, Vec<Revision>>,
                  target_rev: Revision,
                  rev_info: RevInfoMaker<D>,
                  is_ancestor: &A,
              ) -> PathCopies {
                  let mut all_copies = HashMap::new();
                  let mut oracle = AncestorOracle::new(is_ancestor);
                  for rev in revs {
                      // Retrieve data computed in a previous iteration
                      let copies = all_copies.remove(&rev);
                      let copies = match copies {
                          Some(c) => c,
                          None => TimeStampedPathCopies::default(), // root of the walked set
                      };
                      let current_children = match children.get(&rev) {
                          Some(c) => c,
                          None => panic!("inconsistent `revs` and `children`"),
                      };
                      for child in current_children {
                          // We will chain the copies information accumulated for `rev` with
                          // the individual copies information for each of its children.
                          // Creating a new PathCopies for each `rev` → `children` vertex.
                          let mut d: DataHolder<D> = DataHolder { data: None };
                          let (p1, p2, changes) = rev_info(*child, &mut d);
                          let parent = if rev == p1 {
                              Parent::FirstParent
                          } else {
                              assert_eq!(rev, p2);
                              Parent::SecondParent
                          };
-                         let mut new_copies = copies.clone();
-                         for action in changes.iter_actions(parent) {
-                             match action {
-                                 Action::Copied(dest, source) => {
-                                     let entry;
-                                     if let Some(v) = copies.get(source) {
-                                         entry = match &v.path {
-                                             Some(path) => Some((*(path)).to_owned()),
-                                             None => Some(source.to_owned()),
+                                         }
-                                     } else {
-                                         entry = Some(source.to_owned());
+                                     }
-                                     // Each new entry is introduced by the children, we
-                                     // record this information as we will need it to take
-                                     // the right decision when merging conflicting copy
-                                     // information. See merge_copies_dict for details.
-                                     let ttpc = TimeStampedPathCopy {
-                                         rev: *child,
-                                         path: entry,
-                                     };
-                                     new_copies.insert(dest.to_owned(), ttpc);
+                                 }
-                                 Action::Removed(f) => {
-                                     // We must drop copy information for removed file.
-                                     //
-                                     // We need to explicitly record them as dropped to
-                                     // propagate this information when merging two
-                                     // TimeStampedPathCopies object.
-                                     if new_copies.contains_key(f.as_ref()) {
-                                         let ttpc = TimeStampedPathCopy {
-                                             rev: *child,
-                                             path: None,
-                                         };
-                                         new_copies.insert(f.to_owned(), ttpc);
+                                     }
+                                 }
+                             }
+                         }
+                         let new_copies =
+                             add_from_changes(&copies, &changes, parent, *child);
                          // Merge has two parents needs to combines their copy information.
                          //
                          // If the vertex from the other parent was already processed, we
                          // will have a value for the child ready to be used. We need to
                          // grab it and combine it with the one we already
                          // computed. If not we can simply store the newly
                          // computed data. The processing happening at
                          // the time of the second parent will take care of combining the
                          // two TimeStampedPathCopies instance.
                          match all_copies.remove(child) {
                              None => {
                                  all_copies.insert(child, new_copies);
                              }
                              Some(other_copies) => {
                                  let (minor, major) = match parent {
                                      Parent::FirstParent => (other_copies, new_copies),
                                      Parent::SecondParent => (new_copies, other_copies),
                                  };
                                  let merged_copies =
                                      merge_copies_dict(minor, major, &changes, &mut oracle);
                                  all_copies.insert(child, merged_copies);
                              }
                          };
                      }
                  }
                  // Drop internal information (like the timestamp) and return the final
                  // mapping.
                  let tt_result = all_copies
                      .remove(&target_rev)
                      .expect("target revision was not processed");
                  let mut result = PathCopies::default();
                  for (dest, tt_source) in tt_result {
                      if let Some(path) = tt_source.path {
                          result.insert(dest, path);
                      }
                  }
                  result
              }
+             /// Combine ChangedFiles with some existing PathCopies information and return
+             /// the result
+             fn add_from_changes(
+                 base_copies: &TimeStampedPathCopies,
+                 changes: &ChangedFiles,
+                 parent: Parent,
+                 current_rev: Revision,
+             ) -> TimeStampedPathCopies {
+                 let mut copies = base_copies.clone();
+                 for action in changes.iter_actions(parent) {
+                     match action {
+                         Action::Copied(dest, source) => {
+                             let entry;
+                             if let Some(v) = base_copies.get(source) {
+                                 entry = match &v.path {
+                                     Some(path) => Some((*(path)).to_owned()),
+                                     None => Some(source.to_owned()),
+                                 }
+                             } else {
+                                 entry = Some(source.to_owned());
+                             }
+                             // Each new entry is introduced by the children, we
+                             // record this information as we will need it to take
+                             // the right decision when merging conflicting copy
+                             // information. See merge_copies_dict for details.
+                             let ttpc = TimeStampedPathCopy {
+                                 rev: current_rev,
+                                 path: entry,
+                             };
+                             copies.insert(dest.to_owned(), ttpc);
+                         }
+                         Action::Removed(f) => {
+                             // We must drop copy information for removed file.
+                             //
+                             // We need to explicitly record them as dropped to
+                             // propagate this information when merging two
+                             // TimeStampedPathCopies object.
+                             if copies.contains_key(f.as_ref()) {
+                                 let ttpc = TimeStampedPathCopy {
+                                     rev: current_rev,
+                                     path: None,
+                                 };
+                                 copies.insert(f.to_owned(), ttpc);
+                             }
+                         }
+                     }
+                 }
+                 copies
+             }
              /// merge two copies-mapping together, minor and major
              ///
              /// In case of conflict, value from "major" will be picked, unless in some
              /// cases. See inline documentation for details.
              #[allow(clippy::if_same_then_else)]
              fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
                  minor: TimeStampedPathCopies,
                  major: TimeStampedPathCopies,
                  changes: &ChangedFiles,
                  oracle: &mut AncestorOracle<A>,
              ) -> TimeStampedPathCopies {
                  if minor.is_empty() {
                      return major;
                  } else if major.is_empty() {
                      return minor;
                  }
                  let mut override_minor = Vec::new();
                  let mut override_major = Vec::new();
                  let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
                      override_major.push((k.clone(), v.clone()))
                  };
                  let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
                      override_minor.push((k.clone(), v.clone()))
                  };
                  // The diff function leverage detection of the identical subpart if minor
                  // and major has some common ancestors. This make it very fast is most
                  // case.
                  //
                  // In case where the two map are vastly different in size, the current
                  // approach is still slowish because the iteration will iterate over
                  // all the "exclusive" content of the larger on. This situation can be
                  // frequent when the subgraph of revision we are processing has a lot
                  // of roots. Each roots adding they own fully new map to the mix (and
                  // likely a small map, if the path from the root to the "main path" is
                  // small.
                  //
                  // We could do better by detecting such situation and processing them
                  // differently.
                  for d in minor.diff(&major) {
                      match d {
                          DiffItem::Add(k, v) => to_minor(k, v),
                          DiffItem::Remove(k, v) => to_major(k, v),
                          DiffItem::Update { old, new } => {
                              let (dest, src_major) = new;
                              let (_, src_minor) = old;
                              let mut pick_minor = || (to_major(dest, src_minor));
                              let mut pick_major = || (to_minor(dest, src_major));
                              if src_major.path == src_minor.path {
                                  // we have the same value, but from other source;
                                  if src_major.rev == src_minor.rev {
                                      // If the two entry are identical, no need to do
                                      // anything (but diff should not have yield them)
                                      unreachable!();
                                  } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
                                  {
                                      pick_minor();
                                  } else {
                                      pick_major();
                                  }
                              } else if src_major.rev == src_minor.rev {
                                  // We cannot get copy information for both p1 and p2 in the
                                  // same rev. So this is the same value.
                                  unreachable!();
                              } else {
                                  let action = changes.get_merge_case(&dest);
                                  if src_major.path.is_none()
                                      && action == MergeCase::Salvaged
                                  {
                                      // If the file is "deleted" in the major side but was
                                      // salvaged by the merge, we keep the minor side alive
                                      pick_minor();
                                  } else if src_minor.path.is_none()
                                      && action == MergeCase::Salvaged
                                  {
                                      // If the file is "deleted" in the minor side but was
                                      // salvaged by the merge, unconditionnaly preserve the
                                      // major side.
                                      pick_major();
                                  } else if action == MergeCase::Merged {
                                      // If the file was actively merged, copy information
                                      // from each side might conflict.  The major side will
                                      // win such conflict.
                                      pick_major();
                                  } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
                                  {
                                      // If the minor side is strictly newer than the major
                                      // side, it should be kept.
                                      pick_minor();
                                  } else if src_major.path.is_some() {
                                      // without any special case, the "major" value win
                                      // other the "minor" one.
                                      pick_major();
                                  } else if oracle.is_ancestor(src_minor.rev, src_major.rev)
                                  {
                                      // the "major" rev is a direct ancestors of "minor",
                                      // any different value should
                                      // overwrite
                                      pick_major();
                                  } else {
                                      // major version is None (so the file was deleted on
                                      // that branch) and that branch is independant (neither
                                      // minor nor major is an ancestors of the other one.)
                                      // We preserve the new
                                      // information about the new file.
                                      pick_minor();
                                  }
                              }
                          }
                      };
                  }
                  let updates;
                  let mut result;
                  if override_major.is_empty() {
                      result = major
                  } else if override_minor.is_empty() {
                      result = minor
                  } else {
                      if override_minor.len() < override_major.len() {
                          updates = override_minor;
                          result = minor;
                      } else {
                          updates = override_major;
                          result = major;
                      }
                      for (k, v) in updates {
                          result.insert(k, v);
                      }
                  }
                  result
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages