upstream/mercurial-mirror Commit - r46556:595979dc

copies: introduce a basic Rust function for `combine_changeset_copies`...

marmoute -

r46556:595979dc default

parent child

rust/hg-core/src/copy_tracing.rs

0 created 644 +262 0

@@ -0,0 +1,262
	1	use crate::utils::hg_path::HgPathBuf;
	2	use crate::Revision;
	3
	4	use std::collections::HashMap;
	5	use std::collections::HashSet;
	6
	7	pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
	8
	9	#[derive(Clone, Debug)]
	10	struct TimeStampedPathCopy {
	11	/// revision at which the copy information was added
	12	rev: Revision,
	13	/// the copy source, (Set to None in case of deletion of the associated
	14	/// key)
	15	path: Option<HgPathBuf>,
	16	}
	17
	18	/// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
	19	type TimeStampedPathCopies = HashMap<HgPathBuf, TimeStampedPathCopy>;
	20
	21	/// hold parent 1, parent 2 and relevant files actions.
	22	pub type RevInfo = (Revision, Revision, ChangedFiles);
	23
	24	/// represent the files affected by a changesets
	25	///
	26	/// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
	27	/// all the data categories tracked by it.
	28	pub struct ChangedFiles {
	29	removed: HashSet<HgPathBuf>,
	30	merged: HashSet<HgPathBuf>,
	31	salvaged: HashSet<HgPathBuf>,
	32	copied_from_p1: PathCopies,
	33	copied_from_p2: PathCopies,
	34	}
	35
	36	impl ChangedFiles {
	37	pub fn new(
	38	removed: HashSet<HgPathBuf>,
	39	merged: HashSet<HgPathBuf>,
	40	salvaged: HashSet<HgPathBuf>,
	41	copied_from_p1: PathCopies,
	42	copied_from_p2: PathCopies,
	43	) -> Self {
	44	ChangedFiles {
	45	removed,
	46	merged,
	47	salvaged,
	48	copied_from_p1,
	49	copied_from_p2,
	50	}
	51	}
	52
	53	pub fn new_empty() -> Self {
	54	ChangedFiles {
	55	removed: HashSet::new(),
	56	merged: HashSet::new(),
	57	salvaged: HashSet::new(),
	58	copied_from_p1: PathCopies::new(),
	59	copied_from_p2: PathCopies::new(),
	60	}
	61	}
	62	}
	63
	64	/// Same as mercurial.copies._combine_changeset_copies, but in Rust.
	65	///
	66	/// Arguments are:
	67	///
	68	/// revs: all revisions to be considered
	69	/// children: a {parent ? [childrens]} mapping
	70	/// target_rev: the final revision we are combining copies to
	71	/// rev_info(rev): callback to get revision information:
	72	/// * first parent
	73	/// * second parent
	74	/// * ChangedFiles
	75	/// isancestors(low_rev, high_rev): callback to check if a revision is an
	76	/// ancestor of another
	77	pub fn combine_changeset_copies(
	78	revs: Vec<Revision>,
	79	children: HashMap<Revision, Vec<Revision>>,
	80	target_rev: Revision,
	81	rev_info: &impl Fn(Revision) -> RevInfo,
	82	is_ancestor: &impl Fn(Revision, Revision) -> bool,
	83	) -> PathCopies {
	84	let mut all_copies = HashMap::new();
	85
	86	for rev in revs {
	87	// Retrieve data computed in a previous iteration
	88	let copies = all_copies.remove(&rev);
	89	let copies = match copies {
	90	Some(c) => c,
	91	None => TimeStampedPathCopies::default(), // root of the walked set
	92	};
	93
	94	let current_children = match children.get(&rev) {
	95	Some(c) => c,
	96	None => panic!("inconsistent `revs` and `children`"),
	97	};
	98
	99	for child in current_children {
	100	// We will chain the copies information accumulated for `rev` with
	101	// the individual copies information for each of its children.
	102	// Creating a new PathCopies for each `rev` ? `children` vertex.
	103	let (p1, p2, changes) = rev_info(*child);
	104
	105	let (parent, child_copies) = if rev == p1 {
	106	(1, &changes.copied_from_p1)
	107	} else {
	108	assert_eq!(rev, p2);
	109	(2, &changes.copied_from_p2)
	110	};
	111	let mut new_copies = copies.clone();
	112
	113	for (dest, source) in child_copies {
	114	let entry;
	115	if let Some(v) = copies.get(source) {
	116	entry = match &v.path {
	117	Some(path) => Some((*(path)).to_owned()),
	118	None => Some(source.to_owned()),
	119	}
	120	} else {
	121	entry = Some(source.to_owned());
	122	}
	123	// Each new entry is introduced by the children, we record this
	124	// information as we will need it to take the right decision
	125	// when merging conflicting copy information. See
	126	// merge_copies_dict for details.
	127	let ttpc = TimeStampedPathCopy {
	128	rev: *child,
	129	path: entry,
	130	};
	131	new_copies.insert(dest.to_owned(), ttpc);
	132	}
	133
	134	// We must drop copy information for removed file.
	135	//
	136	// We need to explicitly record them as dropped to propagate this
	137	// information when merging two TimeStampedPathCopies object.
	138	for f in changes.removed.iter() {
	139	if new_copies.contains_key(f.as_ref()) {
	140	let ttpc = TimeStampedPathCopy {
	141	rev: *child,
	142	path: None,
	143	};
	144	new_copies.insert(f.to_owned(), ttpc);
	145	}
	146	}
	147
	148	// Merge has two parents needs to combines their copy information.
	149	//
	150	// If the vertex from the other parent was already processed, we
	151	// will have a value for the child ready to be used. We need to
	152	// grab it and combine it with the one we already
	153	// computed. If not we can simply store the newly
	154	// computed data. The processing happening at
	155	// the time of the second parent will take care of combining the
	156	// two TimeStampedPathCopies instance.
	157	match all_copies.remove(child) {
	158	None => {
	159	all_copies.insert(child, new_copies);
	160	}
	161	Some(other_copies) => {
	162	let (minor, major) = match parent {
	163	1 => (other_copies, new_copies),
	164	2 => (new_copies, other_copies),
	165	_ => unreachable!(),
	166	};
	167	let merged_copies =
	168	merge_copies_dict(minor, major, &changes, is_ancestor);
	169	all_copies.insert(child, merged_copies);
	170	}
	171	};
	172	}
	173	}
	174
	175	// Drop internal information (like the timestamp) and return the final
	176	// mapping.
	177	let tt_result = all_copies
	178	.remove(&target_rev)
	179	.expect("target revision was not processed");
	180	let mut result = PathCopies::default();
	181	for (dest, tt_source) in tt_result {
	182	if let Some(path) = tt_source.path {
	183	result.insert(dest, path);
	184	}
	185	}
	186	result
	187	}
	188
	189	/// merge two copies-mapping together, minor and major
	190	///
	191	/// In case of conflict, value from "major" will be picked, unless in some
	192	/// cases. See inline documentation for details.
	193	#[allow(clippy::if_same_then_else)]
	194	fn merge_copies_dict(
	195	minor: TimeStampedPathCopies,
	196	major: TimeStampedPathCopies,
	197	changes: &ChangedFiles,
	198	is_ancestor: &impl Fn(Revision, Revision) -> bool,
	199	) -> TimeStampedPathCopies {
	200	let mut result = minor.clone();
	201	for (dest, src_major) in major {
	202	let overwrite;
	203	if let Some(src_minor) = minor.get(&dest) {
	204	if src_major.path == src_minor.path {
	205	// we have the same value, but from other source;
	206	if src_major.rev == src_minor.rev {
	207	// If the two entry are identical, no need to do anything
	208	overwrite = false;
	209	} else if is_ancestor(src_major.rev, src_minor.rev) {
	210	overwrite = false;
	211	} else {
	212	overwrite = true;
	213	}
	214	} else if src_major.rev == src_minor.rev {
	215	// We cannot get copy information for both p1 and p2 in the
	216	// same rev. So this is the same value.
	217	overwrite = false;
	218	} else if src_major.path.is_none()
	219	&& changes.salvaged.contains(&dest)
	220	{
	221	// If the file is "deleted" in the major side but was salvaged
	222	// by the merge, we keep the minor side alive
	223	overwrite = false;
	224	} else if src_minor.path.is_none()
	225	&& changes.salvaged.contains(&dest)
	226	{
	227	// If the file is "deleted" in the minor side but was salvaged
	228	// by the merge, unconditionnaly preserve the major side.
	229	overwrite = true;
	230	} else if changes.merged.contains(&dest) {
	231	// If the file was actively merged, copy information from each
	232	// side might conflict. The major side will win such conflict.
	233	overwrite = true;
	234	} else if is_ancestor(src_major.rev, src_minor.rev) {
	235	// If the minor side is strictly newer than the major side, it
	236	// should be kept.
	237	overwrite = false;
	238	} else if src_major.path.is_some() {
	239	// without any special case, the "major" value win other the
	240	// "minor" one.
	241	overwrite = true;
	242	} else if is_ancestor(src_minor.rev, src_major.rev) {
	243	// the "major" rev is a direct ancestors of "minor", any
	244	// different value should overwrite
	245	overwrite = true;
	246	} else {
	247	// major version is None (so the file was deleted on that
	248	// branch) annd that branch is independant (neither minor nor
	249	// major is an ancestors of the other one.) We preserve the new
	250	// information about the new file.
	251	overwrite = false;
	252	}
	253	} else {
	254	// minor had no value
	255	overwrite = true;
	256	}
	257	if overwrite {
	258	result.insert(dest, src_major);
	259	}
	260	}
	261	result
	262	}

rust/hg-core/src/lib.rs

0 +1 0

             // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             mod ancestors;
             pub mod dagops;
             pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
             mod dirstate;
             pub mod discovery;
             pub mod requirements;
             pub mod testing; // unconditionally built, for use from integration tests
             pub use dirstate::{
                 dirs_multiset::{DirsMultiset, DirsMultisetIter},
                 dirstate_map::DirstateMap,
                 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
                 status::{
                     status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
                 },
                 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
                 StateMap, StateMapIter,
             };
+            pub mod copy_tracing;
             mod filepatterns;
             pub mod matchers;
             pub mod revlog;
             pub use revlog::*;
             pub mod operations;
             pub mod utils;
             // Remove this to see (potential) non-artificial compile failures. MacOS
             // *should* compile, but fail to compile tests for example as of 2020-03-06
             #[cfg(not(target_os = "linux"))]
             compile_error!(
                 "`hg-core` has only been tested on Linux and will most \
                  likely not behave correctly on other platforms."
             );
             use crate::utils::hg_path::{HgPathBuf, HgPathError};
             pub use filepatterns::{
                 parse_pattern_syntax, read_pattern_file, IgnorePattern,
                 PatternFileWarning, PatternSyntax,
             };
             use std::collections::HashMap;
             use twox_hash::RandomXxHashBuilder64;
             /// This is a contract between the `micro-timer` crate and us, to expose
             /// the `log` crate as `crate::log`.
             use log;
             pub type LineNumber = usize;
             /// Rust's default hasher is too slow because it tries to prevent collision
             /// attacks. We are not concerned about those: if an ill-minded person has
             /// write access to your repository, you have other issues.
             pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
             #[derive(Clone, Debug, PartialEq)]
             pub enum DirstateParseError {
                 TooLittleData,
                 Overflow,
                 // TODO refactor to use bytes instead of String
                 CorruptedEntry(String),
                 Damaged,
             }
             impl From<std::io::Error> for DirstateParseError {
                 fn from(e: std::io::Error) -> Self {
                     DirstateParseError::CorruptedEntry(e.to_string())
                 }
             }
             impl ToString for DirstateParseError {
                 fn to_string(&self) -> String {
                     use crate::DirstateParseError::*;
                     match self {
                         TooLittleData => "Too little data for dirstate.".to_string(),
                         Overflow => "Overflow in dirstate.".to_string(),
                         CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
                         Damaged => "Dirstate appears to be damaged.".to_string(),
                     }
                 }
             }
             #[derive(Debug, PartialEq)]
             pub enum DirstatePackError {
                 CorruptedEntry(String),
                 CorruptedParent,
                 BadSize(usize, usize),
             }
             impl From<std::io::Error> for DirstatePackError {
                 fn from(e: std::io::Error) -> Self {
                     DirstatePackError::CorruptedEntry(e.to_string())
                 }
             }
             #[derive(Debug, PartialEq)]
             pub enum DirstateMapError {
                 PathNotFound(HgPathBuf),
                 EmptyPath,
                 InvalidPath(HgPathError),
             }
             impl ToString for DirstateMapError {
                 fn to_string(&self) -> String {
                     match self {
                         DirstateMapError::PathNotFound(_) => {
                             "expected a value, found none".to_string()
                         }
                         DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
                         DirstateMapError::InvalidPath(e) => e.to_string(),
                     }
                 }
             }
             #[derive(Debug)]
             pub enum DirstateError {
                 Parse(DirstateParseError),
                 Pack(DirstatePackError),
                 Map(DirstateMapError),
                 IO(std::io::Error),
             }
             impl From<DirstateParseError> for DirstateError {
                 fn from(e: DirstateParseError) -> Self {
                     DirstateError::Parse(e)
                 }
             }
             impl From<DirstatePackError> for DirstateError {
                 fn from(e: DirstatePackError) -> Self {
                     DirstateError::Pack(e)
                 }
             }
             #[derive(Debug)]
             pub enum PatternError {
                 Path(HgPathError),
                 UnsupportedSyntax(String),
                 UnsupportedSyntaxInFile(String, String, usize),
                 TooLong(usize),
                 IO(std::io::Error),
                 /// Needed a pattern that can be turned into a regex but got one that
                 /// can't. This should only happen through programmer error.
                 NonRegexPattern(IgnorePattern),
             }
             impl ToString for PatternError {
                 fn to_string(&self) -> String {
                     match self {
                         PatternError::UnsupportedSyntax(syntax) => {
                             format!("Unsupported syntax {}", syntax)
                         }
                         PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
                             format!(
                                 "{}:{}: unsupported syntax {}",
                                 file_path, line, syntax
                             )
                         }
                         PatternError::TooLong(size) => {
                             format!("matcher pattern is too long ({} bytes)", size)
                         }
                         PatternError::IO(e) => e.to_string(),
                         PatternError::Path(e) => e.to_string(),
                         PatternError::NonRegexPattern(pattern) => {
                             format!("'{:?}' cannot be turned into a regex", pattern)
                         }
                     }
                 }
             }
             impl From<DirstateMapError> for DirstateError {
                 fn from(e: DirstateMapError) -> Self {
                     DirstateError::Map(e)
                 }
             }
             impl From<std::io::Error> for DirstateError {
                 fn from(e: std::io::Error) -> Self {
                     DirstateError::IO(e)
                 }
             }
             impl From<std::io::Error> for PatternError {
                 fn from(e: std::io::Error) -> Self {
                     PatternError::IO(e)
                 }
             }
             impl From<HgPathError> for PatternError {
                 fn from(e: HgPathError) -> Self {
                     PatternError::Path(e)
                 }
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages