##// END OF EJS Templates
copies: introduce a basic Rust function for `combine_changeset_copies`...
marmoute -
r46556:595979dc default
parent child Browse files
Show More
@@ -0,0 +1,262
1 use crate::utils::hg_path::HgPathBuf;
2 use crate::Revision;
3
4 use std::collections::HashMap;
5 use std::collections::HashSet;
6
7 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
8
9 #[derive(Clone, Debug)]
10 struct TimeStampedPathCopy {
11 /// revision at which the copy information was added
12 rev: Revision,
13 /// the copy source, (Set to None in case of deletion of the associated
14 /// key)
15 path: Option<HgPathBuf>,
16 }
17
18 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
19 type TimeStampedPathCopies = HashMap<HgPathBuf, TimeStampedPathCopy>;
20
21 /// hold parent 1, parent 2 and relevant files actions.
22 pub type RevInfo = (Revision, Revision, ChangedFiles);
23
24 /// represent the files affected by a changesets
25 ///
26 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
27 /// all the data categories tracked by it.
28 pub struct ChangedFiles {
29 removed: HashSet<HgPathBuf>,
30 merged: HashSet<HgPathBuf>,
31 salvaged: HashSet<HgPathBuf>,
32 copied_from_p1: PathCopies,
33 copied_from_p2: PathCopies,
34 }
35
36 impl ChangedFiles {
37 pub fn new(
38 removed: HashSet<HgPathBuf>,
39 merged: HashSet<HgPathBuf>,
40 salvaged: HashSet<HgPathBuf>,
41 copied_from_p1: PathCopies,
42 copied_from_p2: PathCopies,
43 ) -> Self {
44 ChangedFiles {
45 removed,
46 merged,
47 salvaged,
48 copied_from_p1,
49 copied_from_p2,
50 }
51 }
52
53 pub fn new_empty() -> Self {
54 ChangedFiles {
55 removed: HashSet::new(),
56 merged: HashSet::new(),
57 salvaged: HashSet::new(),
58 copied_from_p1: PathCopies::new(),
59 copied_from_p2: PathCopies::new(),
60 }
61 }
62 }
63
64 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
65 ///
66 /// Arguments are:
67 ///
68 /// revs: all revisions to be considered
69 /// children: a {parent ? [childrens]} mapping
70 /// target_rev: the final revision we are combining copies to
71 /// rev_info(rev): callback to get revision information:
72 /// * first parent
73 /// * second parent
74 /// * ChangedFiles
75 /// isancestors(low_rev, high_rev): callback to check if a revision is an
76 /// ancestor of another
77 pub fn combine_changeset_copies(
78 revs: Vec<Revision>,
79 children: HashMap<Revision, Vec<Revision>>,
80 target_rev: Revision,
81 rev_info: &impl Fn(Revision) -> RevInfo,
82 is_ancestor: &impl Fn(Revision, Revision) -> bool,
83 ) -> PathCopies {
84 let mut all_copies = HashMap::new();
85
86 for rev in revs {
87 // Retrieve data computed in a previous iteration
88 let copies = all_copies.remove(&rev);
89 let copies = match copies {
90 Some(c) => c,
91 None => TimeStampedPathCopies::default(), // root of the walked set
92 };
93
94 let current_children = match children.get(&rev) {
95 Some(c) => c,
96 None => panic!("inconsistent `revs` and `children`"),
97 };
98
99 for child in current_children {
100 // We will chain the copies information accumulated for `rev` with
101 // the individual copies information for each of its children.
102 // Creating a new PathCopies for each `rev` ? `children` vertex.
103 let (p1, p2, changes) = rev_info(*child);
104
105 let (parent, child_copies) = if rev == p1 {
106 (1, &changes.copied_from_p1)
107 } else {
108 assert_eq!(rev, p2);
109 (2, &changes.copied_from_p2)
110 };
111 let mut new_copies = copies.clone();
112
113 for (dest, source) in child_copies {
114 let entry;
115 if let Some(v) = copies.get(source) {
116 entry = match &v.path {
117 Some(path) => Some((*(path)).to_owned()),
118 None => Some(source.to_owned()),
119 }
120 } else {
121 entry = Some(source.to_owned());
122 }
123 // Each new entry is introduced by the children, we record this
124 // information as we will need it to take the right decision
125 // when merging conflicting copy information. See
126 // merge_copies_dict for details.
127 let ttpc = TimeStampedPathCopy {
128 rev: *child,
129 path: entry,
130 };
131 new_copies.insert(dest.to_owned(), ttpc);
132 }
133
134 // We must drop copy information for removed file.
135 //
136 // We need to explicitly record them as dropped to propagate this
137 // information when merging two TimeStampedPathCopies object.
138 for f in changes.removed.iter() {
139 if new_copies.contains_key(f.as_ref()) {
140 let ttpc = TimeStampedPathCopy {
141 rev: *child,
142 path: None,
143 };
144 new_copies.insert(f.to_owned(), ttpc);
145 }
146 }
147
148 // Merge has two parents needs to combines their copy information.
149 //
150 // If the vertex from the other parent was already processed, we
151 // will have a value for the child ready to be used. We need to
152 // grab it and combine it with the one we already
153 // computed. If not we can simply store the newly
154 // computed data. The processing happening at
155 // the time of the second parent will take care of combining the
156 // two TimeStampedPathCopies instance.
157 match all_copies.remove(child) {
158 None => {
159 all_copies.insert(child, new_copies);
160 }
161 Some(other_copies) => {
162 let (minor, major) = match parent {
163 1 => (other_copies, new_copies),
164 2 => (new_copies, other_copies),
165 _ => unreachable!(),
166 };
167 let merged_copies =
168 merge_copies_dict(minor, major, &changes, is_ancestor);
169 all_copies.insert(child, merged_copies);
170 }
171 };
172 }
173 }
174
175 // Drop internal information (like the timestamp) and return the final
176 // mapping.
177 let tt_result = all_copies
178 .remove(&target_rev)
179 .expect("target revision was not processed");
180 let mut result = PathCopies::default();
181 for (dest, tt_source) in tt_result {
182 if let Some(path) = tt_source.path {
183 result.insert(dest, path);
184 }
185 }
186 result
187 }
188
189 /// merge two copies-mapping together, minor and major
190 ///
191 /// In case of conflict, value from "major" will be picked, unless in some
192 /// cases. See inline documentation for details.
193 #[allow(clippy::if_same_then_else)]
194 fn merge_copies_dict(
195 minor: TimeStampedPathCopies,
196 major: TimeStampedPathCopies,
197 changes: &ChangedFiles,
198 is_ancestor: &impl Fn(Revision, Revision) -> bool,
199 ) -> TimeStampedPathCopies {
200 let mut result = minor.clone();
201 for (dest, src_major) in major {
202 let overwrite;
203 if let Some(src_minor) = minor.get(&dest) {
204 if src_major.path == src_minor.path {
205 // we have the same value, but from other source;
206 if src_major.rev == src_minor.rev {
207 // If the two entry are identical, no need to do anything
208 overwrite = false;
209 } else if is_ancestor(src_major.rev, src_minor.rev) {
210 overwrite = false;
211 } else {
212 overwrite = true;
213 }
214 } else if src_major.rev == src_minor.rev {
215 // We cannot get copy information for both p1 and p2 in the
216 // same rev. So this is the same value.
217 overwrite = false;
218 } else if src_major.path.is_none()
219 && changes.salvaged.contains(&dest)
220 {
221 // If the file is "deleted" in the major side but was salvaged
222 // by the merge, we keep the minor side alive
223 overwrite = false;
224 } else if src_minor.path.is_none()
225 && changes.salvaged.contains(&dest)
226 {
227 // If the file is "deleted" in the minor side but was salvaged
228 // by the merge, unconditionnaly preserve the major side.
229 overwrite = true;
230 } else if changes.merged.contains(&dest) {
231 // If the file was actively merged, copy information from each
232 // side might conflict. The major side will win such conflict.
233 overwrite = true;
234 } else if is_ancestor(src_major.rev, src_minor.rev) {
235 // If the minor side is strictly newer than the major side, it
236 // should be kept.
237 overwrite = false;
238 } else if src_major.path.is_some() {
239 // without any special case, the "major" value win other the
240 // "minor" one.
241 overwrite = true;
242 } else if is_ancestor(src_minor.rev, src_major.rev) {
243 // the "major" rev is a direct ancestors of "minor", any
244 // different value should overwrite
245 overwrite = true;
246 } else {
247 // major version is None (so the file was deleted on that
248 // branch) annd that branch is independant (neither minor nor
249 // major is an ancestors of the other one.) We preserve the new
250 // information about the new file.
251 overwrite = false;
252 }
253 } else {
254 // minor had no value
255 overwrite = true;
256 }
257 if overwrite {
258 result.insert(dest, src_major);
259 }
260 }
261 result
262 }
@@ -1,193 +1,194
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 mod ancestors;
7 7 pub mod dagops;
8 8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
9 9 mod dirstate;
10 10 pub mod discovery;
11 11 pub mod requirements;
12 12 pub mod testing; // unconditionally built, for use from integration tests
13 13 pub use dirstate::{
14 14 dirs_multiset::{DirsMultiset, DirsMultisetIter},
15 15 dirstate_map::DirstateMap,
16 16 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
17 17 status::{
18 18 status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
19 19 },
20 20 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
21 21 StateMap, StateMapIter,
22 22 };
23 pub mod copy_tracing;
23 24 mod filepatterns;
24 25 pub mod matchers;
25 26 pub mod revlog;
26 27 pub use revlog::*;
27 28 pub mod operations;
28 29 pub mod utils;
29 30
30 31 // Remove this to see (potential) non-artificial compile failures. MacOS
31 32 // *should* compile, but fail to compile tests for example as of 2020-03-06
32 33 #[cfg(not(target_os = "linux"))]
33 34 compile_error!(
34 35 "`hg-core` has only been tested on Linux and will most \
35 36 likely not behave correctly on other platforms."
36 37 );
37 38
38 39 use crate::utils::hg_path::{HgPathBuf, HgPathError};
39 40 pub use filepatterns::{
40 41 parse_pattern_syntax, read_pattern_file, IgnorePattern,
41 42 PatternFileWarning, PatternSyntax,
42 43 };
43 44 use std::collections::HashMap;
44 45 use twox_hash::RandomXxHashBuilder64;
45 46
46 47 /// This is a contract between the `micro-timer` crate and us, to expose
47 48 /// the `log` crate as `crate::log`.
48 49 use log;
49 50
50 51 pub type LineNumber = usize;
51 52
52 53 /// Rust's default hasher is too slow because it tries to prevent collision
53 54 /// attacks. We are not concerned about those: if an ill-minded person has
54 55 /// write access to your repository, you have other issues.
55 56 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
56 57
57 58 #[derive(Clone, Debug, PartialEq)]
58 59 pub enum DirstateParseError {
59 60 TooLittleData,
60 61 Overflow,
61 62 // TODO refactor to use bytes instead of String
62 63 CorruptedEntry(String),
63 64 Damaged,
64 65 }
65 66
66 67 impl From<std::io::Error> for DirstateParseError {
67 68 fn from(e: std::io::Error) -> Self {
68 69 DirstateParseError::CorruptedEntry(e.to_string())
69 70 }
70 71 }
71 72
72 73 impl ToString for DirstateParseError {
73 74 fn to_string(&self) -> String {
74 75 use crate::DirstateParseError::*;
75 76 match self {
76 77 TooLittleData => "Too little data for dirstate.".to_string(),
77 78 Overflow => "Overflow in dirstate.".to_string(),
78 79 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
79 80 Damaged => "Dirstate appears to be damaged.".to_string(),
80 81 }
81 82 }
82 83 }
83 84
84 85 #[derive(Debug, PartialEq)]
85 86 pub enum DirstatePackError {
86 87 CorruptedEntry(String),
87 88 CorruptedParent,
88 89 BadSize(usize, usize),
89 90 }
90 91
91 92 impl From<std::io::Error> for DirstatePackError {
92 93 fn from(e: std::io::Error) -> Self {
93 94 DirstatePackError::CorruptedEntry(e.to_string())
94 95 }
95 96 }
96 97 #[derive(Debug, PartialEq)]
97 98 pub enum DirstateMapError {
98 99 PathNotFound(HgPathBuf),
99 100 EmptyPath,
100 101 InvalidPath(HgPathError),
101 102 }
102 103
103 104 impl ToString for DirstateMapError {
104 105 fn to_string(&self) -> String {
105 106 match self {
106 107 DirstateMapError::PathNotFound(_) => {
107 108 "expected a value, found none".to_string()
108 109 }
109 110 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
110 111 DirstateMapError::InvalidPath(e) => e.to_string(),
111 112 }
112 113 }
113 114 }
114 115
115 116 #[derive(Debug)]
116 117 pub enum DirstateError {
117 118 Parse(DirstateParseError),
118 119 Pack(DirstatePackError),
119 120 Map(DirstateMapError),
120 121 IO(std::io::Error),
121 122 }
122 123
123 124 impl From<DirstateParseError> for DirstateError {
124 125 fn from(e: DirstateParseError) -> Self {
125 126 DirstateError::Parse(e)
126 127 }
127 128 }
128 129
129 130 impl From<DirstatePackError> for DirstateError {
130 131 fn from(e: DirstatePackError) -> Self {
131 132 DirstateError::Pack(e)
132 133 }
133 134 }
134 135
135 136 #[derive(Debug)]
136 137 pub enum PatternError {
137 138 Path(HgPathError),
138 139 UnsupportedSyntax(String),
139 140 UnsupportedSyntaxInFile(String, String, usize),
140 141 TooLong(usize),
141 142 IO(std::io::Error),
142 143 /// Needed a pattern that can be turned into a regex but got one that
143 144 /// can't. This should only happen through programmer error.
144 145 NonRegexPattern(IgnorePattern),
145 146 }
146 147
147 148 impl ToString for PatternError {
148 149 fn to_string(&self) -> String {
149 150 match self {
150 151 PatternError::UnsupportedSyntax(syntax) => {
151 152 format!("Unsupported syntax {}", syntax)
152 153 }
153 154 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
154 155 format!(
155 156 "{}:{}: unsupported syntax {}",
156 157 file_path, line, syntax
157 158 )
158 159 }
159 160 PatternError::TooLong(size) => {
160 161 format!("matcher pattern is too long ({} bytes)", size)
161 162 }
162 163 PatternError::IO(e) => e.to_string(),
163 164 PatternError::Path(e) => e.to_string(),
164 165 PatternError::NonRegexPattern(pattern) => {
165 166 format!("'{:?}' cannot be turned into a regex", pattern)
166 167 }
167 168 }
168 169 }
169 170 }
170 171
171 172 impl From<DirstateMapError> for DirstateError {
172 173 fn from(e: DirstateMapError) -> Self {
173 174 DirstateError::Map(e)
174 175 }
175 176 }
176 177
177 178 impl From<std::io::Error> for DirstateError {
178 179 fn from(e: std::io::Error) -> Self {
179 180 DirstateError::IO(e)
180 181 }
181 182 }
182 183
183 184 impl From<std::io::Error> for PatternError {
184 185 fn from(e: std::io::Error) -> Self {
185 186 PatternError::IO(e)
186 187 }
187 188 }
188 189
189 190 impl From<HgPathError> for PatternError {
190 191 fn from(e: HgPathError) -> Self {
191 192 PatternError::Path(e)
192 193 }
193 194 }
General Comments 0
You need to be logged in to leave comments. Login now