upstream/mercurial-mirror Commit - r46556:595979dc

copies: introduce a basic Rust function for `combine_changeset_copies`...

marmoute -

r46556:595979dc default

parent child

rust/hg-core/src/copy_tracing.rs

0 created 644 +262 0

			@@ -0,0 +1,262
		1	use crate::utils::hg_path::HgPathBuf;
		2	use crate::Revision;
		3
		4	use std::collections::HashMap;
		5	use std::collections::HashSet;
		6
		7	pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
		8
		9	#[derive(Clone, Debug)]
		10	struct TimeStampedPathCopy {
		11	/// revision at which the copy information was added
		12	rev: Revision,
		13	/// the copy source, (Set to None in case of deletion of the associated
		14	/// key)
		15	path: Option<HgPathBuf>,
		16	}
		17
		18	/// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
		19	type TimeStampedPathCopies = HashMap<HgPathBuf, TimeStampedPathCopy>;
		20
		21	/// hold parent 1, parent 2 and relevant files actions.
		22	pub type RevInfo = (Revision, Revision, ChangedFiles);
		23
		24	/// represent the files affected by a changesets
		25	///
		26	/// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
		27	/// all the data categories tracked by it.
		28	pub struct ChangedFiles {
		29	removed: HashSet<HgPathBuf>,
		30	merged: HashSet<HgPathBuf>,
		31	salvaged: HashSet<HgPathBuf>,
		32	copied_from_p1: PathCopies,
		33	copied_from_p2: PathCopies,
		34	}
		35
		36	impl ChangedFiles {
		37	pub fn new(
		38	removed: HashSet<HgPathBuf>,
		39	merged: HashSet<HgPathBuf>,
		40	salvaged: HashSet<HgPathBuf>,
		41	copied_from_p1: PathCopies,
		42	copied_from_p2: PathCopies,
		43	) -> Self {
		44	ChangedFiles {
		45	removed,
		46	merged,
		47	salvaged,
		48	copied_from_p1,
		49	copied_from_p2,
		50	}
		51	}
		52
		53	pub fn new_empty() -> Self {
		54	ChangedFiles {
		55	removed: HashSet::new(),
		56	merged: HashSet::new(),
		57	salvaged: HashSet::new(),
		58	copied_from_p1: PathCopies::new(),
		59	copied_from_p2: PathCopies::new(),
		60	}
		61	}
		62	}
		63
		64	/// Same as mercurial.copies._combine_changeset_copies, but in Rust.
		65	///
		66	/// Arguments are:
		67	///
		68	/// revs: all revisions to be considered
		69	/// children: a {parent ? [childrens]} mapping
		70	/// target_rev: the final revision we are combining copies to
		71	/// rev_info(rev): callback to get revision information:
		72	/// * first parent
		73	/// * second parent
		74	/// * ChangedFiles
		75	/// isancestors(low_rev, high_rev): callback to check if a revision is an
		76	/// ancestor of another
		77	pub fn combine_changeset_copies(
		78	revs: Vec<Revision>,
		79	children: HashMap<Revision, Vec<Revision>>,
		80	target_rev: Revision,
		81	rev_info: &impl Fn(Revision) -> RevInfo,
		82	is_ancestor: &impl Fn(Revision, Revision) -> bool,
		83	) -> PathCopies {
		84	let mut all_copies = HashMap::new();
		85
		86	for rev in revs {
		87	// Retrieve data computed in a previous iteration
		88	let copies = all_copies.remove(&rev);
		89	let copies = match copies {
		90	Some(c) => c,
		91	None => TimeStampedPathCopies::default(), // root of the walked set
		92	};
		93
		94	let current_children = match children.get(&rev) {
		95	Some(c) => c,
		96	None => panic!("inconsistent `revs` and `children`"),
		97	};
		98
		99	for child in current_children {
		100	// We will chain the copies information accumulated for `rev` with
		101	// the individual copies information for each of its children.
		102	// Creating a new PathCopies for each `rev` ? `children` vertex.
		103	let (p1, p2, changes) = rev_info(*child);
		104
		105	let (parent, child_copies) = if rev == p1 {
		106	(1, &changes.copied_from_p1)
		107	} else {
		108	assert_eq!(rev, p2);
		109	(2, &changes.copied_from_p2)
		110	};
		111	let mut new_copies = copies.clone();
		112
		113	for (dest, source) in child_copies {
		114	let entry;
		115	if let Some(v) = copies.get(source) {
		116	entry = match &v.path {
		117	Some(path) => Some((*(path)).to_owned()),
		118	None => Some(source.to_owned()),
		119	}
		120	} else {
		121	entry = Some(source.to_owned());
		122	}
		123	// Each new entry is introduced by the children, we record this
		124	// information as we will need it to take the right decision
		125	// when merging conflicting copy information. See
		126	// merge_copies_dict for details.
		127	let ttpc = TimeStampedPathCopy {
		128	rev: *child,
		129	path: entry,
		130	};
		131	new_copies.insert(dest.to_owned(), ttpc);
		132	}
		133
		134	// We must drop copy information for removed file.
		135	//
		136	// We need to explicitly record them as dropped to propagate this
		137	// information when merging two TimeStampedPathCopies object.
		138	for f in changes.removed.iter() {
		139	if new_copies.contains_key(f.as_ref()) {
		140	let ttpc = TimeStampedPathCopy {
		141	rev: *child,
		142	path: None,
		143	};
		144	new_copies.insert(f.to_owned(), ttpc);
		145	}
		146	}
		147
		148	// Merge has two parents needs to combines their copy information.
		149	//
		150	// If the vertex from the other parent was already processed, we
		151	// will have a value for the child ready to be used. We need to
		152	// grab it and combine it with the one we already
		153	// computed. If not we can simply store the newly
		154	// computed data. The processing happening at
		155	// the time of the second parent will take care of combining the
		156	// two TimeStampedPathCopies instance.
		157	match all_copies.remove(child) {
		158	None => {
		159	all_copies.insert(child, new_copies);
		160	}
		161	Some(other_copies) => {
		162	let (minor, major) = match parent {
		163	1 => (other_copies, new_copies),
		164	2 => (new_copies, other_copies),
		165	_ => unreachable!(),
		166	};
		167	let merged_copies =
		168	merge_copies_dict(minor, major, &changes, is_ancestor);
		169	all_copies.insert(child, merged_copies);
		170	}
		171	};
		172	}
		173	}
		174
		175	// Drop internal information (like the timestamp) and return the final
		176	// mapping.
		177	let tt_result = all_copies
		178	.remove(&target_rev)
		179	.expect("target revision was not processed");
		180	let mut result = PathCopies::default();
		181	for (dest, tt_source) in tt_result {
		182	if let Some(path) = tt_source.path {
		183	result.insert(dest, path);
		184	}
		185	}
		186	result
		187	}
		188
		189	/// merge two copies-mapping together, minor and major
		190	///
		191	/// In case of conflict, value from "major" will be picked, unless in some
		192	/// cases. See inline documentation for details.
		193	#[allow(clippy::if_same_then_else)]
		194	fn merge_copies_dict(
		195	minor: TimeStampedPathCopies,
		196	major: TimeStampedPathCopies,
		197	changes: &ChangedFiles,
		198	is_ancestor: &impl Fn(Revision, Revision) -> bool,
		199	) -> TimeStampedPathCopies {
		200	let mut result = minor.clone();
		201	for (dest, src_major) in major {
		202	let overwrite;
		203	if let Some(src_minor) = minor.get(&dest) {
		204	if src_major.path == src_minor.path {
		205	// we have the same value, but from other source;
		206	if src_major.rev == src_minor.rev {
		207	// If the two entry are identical, no need to do anything
		208	overwrite = false;
		209	} else if is_ancestor(src_major.rev, src_minor.rev) {
		210	overwrite = false;
		211	} else {
		212	overwrite = true;
		213	}
		214	} else if src_major.rev == src_minor.rev {
		215	// We cannot get copy information for both p1 and p2 in the
		216	// same rev. So this is the same value.
		217	overwrite = false;
		218	} else if src_major.path.is_none()
		219	&& changes.salvaged.contains(&dest)
		220	{
		221	// If the file is "deleted" in the major side but was salvaged
		222	// by the merge, we keep the minor side alive
		223	overwrite = false;
		224	} else if src_minor.path.is_none()
		225	&& changes.salvaged.contains(&dest)
		226	{
		227	// If the file is "deleted" in the minor side but was salvaged
		228	// by the merge, unconditionnaly preserve the major side.
		229	overwrite = true;
		230	} else if changes.merged.contains(&dest) {
		231	// If the file was actively merged, copy information from each
		232	// side might conflict. The major side will win such conflict.
		233	overwrite = true;
		234	} else if is_ancestor(src_major.rev, src_minor.rev) {
		235	// If the minor side is strictly newer than the major side, it
		236	// should be kept.
		237	overwrite = false;
		238	} else if src_major.path.is_some() {
		239	// without any special case, the "major" value win other the
		240	// "minor" one.
		241	overwrite = true;
		242	} else if is_ancestor(src_minor.rev, src_major.rev) {
		243	// the "major" rev is a direct ancestors of "minor", any
		244	// different value should overwrite
		245	overwrite = true;
		246	} else {
		247	// major version is None (so the file was deleted on that
		248	// branch) annd that branch is independant (neither minor nor
		249	// major is an ancestors of the other one.) We preserve the new
		250	// information about the new file.
		251	overwrite = false;
		252	}
		253	} else {
		254	// minor had no value
		255	overwrite = true;
		256	}
		257	if overwrite {
		258	result.insert(dest, src_major);
		259	}
		260	}
		261	result
		262	}

rust/hg-core/src/lib.rs

0 +1 0

              // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
              //           and Mercurial contributors
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              mod ancestors;
              pub mod dagops;
              pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
              mod dirstate;
              pub mod discovery;
              pub mod requirements;
              pub mod testing; // unconditionally built, for use from integration tests
              pub use dirstate::{
                  dirs_multiset::{DirsMultiset, DirsMultisetIter},
                  dirstate_map::DirstateMap,
                  parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
                  status::{
                      status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
                  },
                  CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
                  StateMap, StateMapIter,
              };
+             pub mod copy_tracing;
              mod filepatterns;
              pub mod matchers;
              pub mod revlog;
              pub use revlog::*;
              pub mod operations;
              pub mod utils;
              // Remove this to see (potential) non-artificial compile failures. MacOS
              // *should* compile, but fail to compile tests for example as of 2020-03-06
              #[cfg(not(target_os = "linux"))]
              compile_error!(
                  "`hg-core` has only been tested on Linux and will most \
                   likely not behave correctly on other platforms."
              );
              use crate::utils::hg_path::{HgPathBuf, HgPathError};
              pub use filepatterns::{
                  parse_pattern_syntax, read_pattern_file, IgnorePattern,
                  PatternFileWarning, PatternSyntax,
              };
              use std::collections::HashMap;
              use twox_hash::RandomXxHashBuilder64;
              /// This is a contract between the `micro-timer` crate and us, to expose
              /// the `log` crate as `crate::log`.
              use log;
              pub type LineNumber = usize;
              /// Rust's default hasher is too slow because it tries to prevent collision
              /// attacks. We are not concerned about those: if an ill-minded person has
              /// write access to your repository, you have other issues.
              pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
              #[derive(Clone, Debug, PartialEq)]
              pub enum DirstateParseError {
                  TooLittleData,
                  Overflow,
                  // TODO refactor to use bytes instead of String
                  CorruptedEntry(String),
                  Damaged,
              }
              impl From<std::io::Error> for DirstateParseError {
                  fn from(e: std::io::Error) -> Self {
                      DirstateParseError::CorruptedEntry(e.to_string())
                  }
              }
              impl ToString for DirstateParseError {
                  fn to_string(&self) -> String {
                      use crate::DirstateParseError::*;
                      match self {
                          TooLittleData => "Too little data for dirstate.".to_string(),
                          Overflow => "Overflow in dirstate.".to_string(),
                          CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
                          Damaged => "Dirstate appears to be damaged.".to_string(),
                      }
                  }
              }
              #[derive(Debug, PartialEq)]
              pub enum DirstatePackError {
                  CorruptedEntry(String),
                  CorruptedParent,
                  BadSize(usize, usize),
              }
              impl From<std::io::Error> for DirstatePackError {
                  fn from(e: std::io::Error) -> Self {
                      DirstatePackError::CorruptedEntry(e.to_string())
                  }
              }
              #[derive(Debug, PartialEq)]
              pub enum DirstateMapError {
                  PathNotFound(HgPathBuf),
                  EmptyPath,
                  InvalidPath(HgPathError),
              }
              impl ToString for DirstateMapError {
                  fn to_string(&self) -> String {
                      match self {
                          DirstateMapError::PathNotFound(_) => {
                              "expected a value, found none".to_string()
                          }
                          DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
                          DirstateMapError::InvalidPath(e) => e.to_string(),
                      }
                  }
              }
              #[derive(Debug)]
              pub enum DirstateError {
                  Parse(DirstateParseError),
                  Pack(DirstatePackError),
                  Map(DirstateMapError),
                  IO(std::io::Error),
              }
              impl From<DirstateParseError> for DirstateError {
                  fn from(e: DirstateParseError) -> Self {
                      DirstateError::Parse(e)
                  }
              }
              impl From<DirstatePackError> for DirstateError {
                  fn from(e: DirstatePackError) -> Self {
                      DirstateError::Pack(e)
                  }
              }
              #[derive(Debug)]
              pub enum PatternError {
                  Path(HgPathError),
                  UnsupportedSyntax(String),
                  UnsupportedSyntaxInFile(String, String, usize),
                  TooLong(usize),
                  IO(std::io::Error),
                  /// Needed a pattern that can be turned into a regex but got one that
                  /// can't. This should only happen through programmer error.
                  NonRegexPattern(IgnorePattern),
              }
              impl ToString for PatternError {
                  fn to_string(&self) -> String {
                      match self {
                          PatternError::UnsupportedSyntax(syntax) => {
                              format!("Unsupported syntax {}", syntax)
                          }
                          PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
                              format!(
                                  "{}:{}: unsupported syntax {}",
                                  file_path, line, syntax
                              )
                          }
                          PatternError::TooLong(size) => {
                              format!("matcher pattern is too long ({} bytes)", size)
                          }
                          PatternError::IO(e) => e.to_string(),
                          PatternError::Path(e) => e.to_string(),
                          PatternError::NonRegexPattern(pattern) => {
                              format!("'{:?}' cannot be turned into a regex", pattern)
                          }
                      }
                  }
              }
              impl From<DirstateMapError> for DirstateError {
                  fn from(e: DirstateMapError) -> Self {
                      DirstateError::Map(e)
                  }
              }
              impl From<std::io::Error> for DirstateError {
                  fn from(e: std::io::Error) -> Self {
                      DirstateError::IO(e)
                  }
              }
              impl From<std::io::Error> for PatternError {
                  fn from(e: std::io::Error) -> Self {
                      PatternError::IO(e)
                  }
              }
              impl From<HgPathError> for PatternError {
                  fn from(e: HgPathError) -> Self {
                      PatternError::Path(e)
                  }
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages