##// END OF EJS Templates
copies-rust: encapsulate internal sets on `changes`...
marmoute -
r46589:23679379 default
parent child Browse files
Show More
@@ -1,390 +1,415 b''
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4
5 5 use im_rc::ordmap::DiffItem;
6 6 use im_rc::ordmap::OrdMap;
7 7
8 8 use std::collections::HashMap;
9 9 use std::collections::HashSet;
10 10
11 11 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
12 12
13 13 #[derive(Clone, Debug, PartialEq)]
14 14 struct TimeStampedPathCopy {
15 15 /// revision at which the copy information was added
16 16 rev: Revision,
17 17 /// the copy source, (Set to None in case of deletion of the associated
18 18 /// key)
19 19 path: Option<HgPathBuf>,
20 20 }
21 21
22 22 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
23 23 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
24 24
25 25 /// hold parent 1, parent 2 and relevant files actions.
26 26 pub type RevInfo = (Revision, Revision, ChangedFiles);
27 27
28 28 /// represent the files affected by a changesets
29 29 ///
30 30 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
31 31 /// all the data categories tracked by it.
32 32 pub struct ChangedFiles {
33 33 removed: HashSet<HgPathBuf>,
34 34 merged: HashSet<HgPathBuf>,
35 35 salvaged: HashSet<HgPathBuf>,
36 36 copied_from_p1: PathCopies,
37 37 copied_from_p2: PathCopies,
38 38 }
39 39
40 40 /// Represent active changes that affect the copy tracing.
41 41 enum Action<'a> {
42 42 /// The parent ? children edge is removing a file
43 43 ///
44 44 /// (actually, this could be the edge from the other parent, but it does
45 45 /// not matters)
46 46 Removed(&'a HgPath),
47 47 /// The parent ? children edge introduce copy information between (dest,
48 48 /// source)
49 49 Copied(&'a HgPath, &'a HgPath),
50 50 }
51 51
52 /// This express the possible "special" case we can get in a merge
53 ///
54 /// See mercurial/metadata.py for details on these values.
55 #[derive(PartialEq)]
56 enum MergeCase {
57 /// Merged: file had history on both side that needed to be merged
58 Merged,
59 /// Salvaged: file was candidate for deletion, but survived the merge
60 Salvaged,
61 /// Normal: Not one of the two cases above
62 Normal,
63 }
64
52 65 impl ChangedFiles {
53 66 pub fn new(
54 67 removed: HashSet<HgPathBuf>,
55 68 merged: HashSet<HgPathBuf>,
56 69 salvaged: HashSet<HgPathBuf>,
57 70 copied_from_p1: PathCopies,
58 71 copied_from_p2: PathCopies,
59 72 ) -> Self {
60 73 ChangedFiles {
61 74 removed,
62 75 merged,
63 76 salvaged,
64 77 copied_from_p1,
65 78 copied_from_p2,
66 79 }
67 80 }
68 81
69 82 pub fn new_empty() -> Self {
70 83 ChangedFiles {
71 84 removed: HashSet::new(),
72 85 merged: HashSet::new(),
73 86 salvaged: HashSet::new(),
74 87 copied_from_p1: PathCopies::new(),
75 88 copied_from_p2: PathCopies::new(),
76 89 }
77 90 }
78 91
79 92 /// Return an iterator over all the `Action` in this instance.
80 93 fn iter_actions(&self, parent: usize) -> impl Iterator<Item = Action> {
81 94 let copies_iter = match parent {
82 95 1 => self.copied_from_p1.iter(),
83 96 2 => self.copied_from_p2.iter(),
84 97 _ => unreachable!(),
85 98 };
86 99 let remove_iter = self.removed.iter();
87 100 let copies_iter = copies_iter.map(|(x, y)| Action::Copied(x, y));
88 101 let remove_iter = remove_iter.map(|x| Action::Removed(x));
89 102 copies_iter.chain(remove_iter)
90 103 }
104
105 /// return the MergeCase value associated with a filename
106 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
107 if self.salvaged.contains(path) {
108 return MergeCase::Salvaged;
109 } else if self.merged.contains(path) {
110 return MergeCase::Merged;
111 } else {
112 return MergeCase::Normal;
113 }
114 }
91 115 }
92 116
93 117 /// A struct responsible for answering "is X ancestors of Y" quickly
94 118 ///
95 119 /// The structure will delegate ancestors call to a callback, and cache the
96 120 /// result.
97 121 #[derive(Debug)]
98 122 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
99 123 inner: &'a A,
100 124 pairs: HashMap<(Revision, Revision), bool>,
101 125 }
102 126
103 127 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
104 128 fn new(func: &'a A) -> Self {
105 129 Self {
106 130 inner: func,
107 131 pairs: HashMap::default(),
108 132 }
109 133 }
110 134
111 135 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
112 136 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
113 137 if anc > desc {
114 138 false
115 139 } else if anc == desc {
116 140 true
117 141 } else {
118 142 if let Some(b) = self.pairs.get(&(anc, desc)) {
119 143 *b
120 144 } else {
121 145 let b = (self.inner)(anc, desc);
122 146 self.pairs.insert((anc, desc), b);
123 147 b
124 148 }
125 149 }
126 150 }
127 151 }
128 152
129 153 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
130 154 ///
131 155 /// Arguments are:
132 156 ///
133 157 /// revs: all revisions to be considered
134 158 /// children: a {parent ? [childrens]} mapping
135 159 /// target_rev: the final revision we are combining copies to
136 160 /// rev_info(rev): callback to get revision information:
137 161 /// * first parent
138 162 /// * second parent
139 163 /// * ChangedFiles
140 164 /// isancestors(low_rev, high_rev): callback to check if a revision is an
141 165 /// ancestor of another
142 166 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool>(
143 167 revs: Vec<Revision>,
144 168 children: HashMap<Revision, Vec<Revision>>,
145 169 target_rev: Revision,
146 170 rev_info: &impl Fn(Revision) -> RevInfo,
147 171 is_ancestor: &A,
148 172 ) -> PathCopies {
149 173 let mut all_copies = HashMap::new();
150 174 let mut oracle = AncestorOracle::new(is_ancestor);
151 175
152 176 for rev in revs {
153 177 // Retrieve data computed in a previous iteration
154 178 let copies = all_copies.remove(&rev);
155 179 let copies = match copies {
156 180 Some(c) => c,
157 181 None => TimeStampedPathCopies::default(), // root of the walked set
158 182 };
159 183
160 184 let current_children = match children.get(&rev) {
161 185 Some(c) => c,
162 186 None => panic!("inconsistent `revs` and `children`"),
163 187 };
164 188
165 189 for child in current_children {
166 190 // We will chain the copies information accumulated for `rev` with
167 191 // the individual copies information for each of its children.
168 192 // Creating a new PathCopies for each `rev` ? `children` vertex.
169 193 let (p1, p2, changes) = rev_info(*child);
170 194
171 195 let parent = if rev == p1 {
172 196 1
173 197 } else {
174 198 assert_eq!(rev, p2);
175 199 2
176 200 };
177 201 let mut new_copies = copies.clone();
178 202
179 203 for action in changes.iter_actions(parent) {
180 204 match action {
181 205 Action::Copied(dest, source) => {
182 206 let entry;
183 207 if let Some(v) = copies.get(source) {
184 208 entry = match &v.path {
185 209 Some(path) => Some((*(path)).to_owned()),
186 210 None => Some(source.to_owned()),
187 211 }
188 212 } else {
189 213 entry = Some(source.to_owned());
190 214 }
191 215 // Each new entry is introduced by the children, we
192 216 // record this information as we will need it to take
193 217 // the right decision when merging conflicting copy
194 218 // information. See merge_copies_dict for details.
195 219 let ttpc = TimeStampedPathCopy {
196 220 rev: *child,
197 221 path: entry,
198 222 };
199 223 new_copies.insert(dest.to_owned(), ttpc);
200 224 }
201 225 Action::Removed(f) => {
202 226 // We must drop copy information for removed file.
203 227 //
204 228 // We need to explicitly record them as dropped to
205 229 // propagate this information when merging two
206 230 // TimeStampedPathCopies object.
207 231 if new_copies.contains_key(f.as_ref()) {
208 232 let ttpc = TimeStampedPathCopy {
209 233 rev: *child,
210 234 path: None,
211 235 };
212 236 new_copies.insert(f.to_owned(), ttpc);
213 237 }
214 238 }
215 239 }
216 240 }
217 241
218 242 // Merge has two parents needs to combines their copy information.
219 243 //
220 244 // If the vertex from the other parent was already processed, we
221 245 // will have a value for the child ready to be used. We need to
222 246 // grab it and combine it with the one we already
223 247 // computed. If not we can simply store the newly
224 248 // computed data. The processing happening at
225 249 // the time of the second parent will take care of combining the
226 250 // two TimeStampedPathCopies instance.
227 251 match all_copies.remove(child) {
228 252 None => {
229 253 all_copies.insert(child, new_copies);
230 254 }
231 255 Some(other_copies) => {
232 256 let (minor, major) = match parent {
233 257 1 => (other_copies, new_copies),
234 258 2 => (new_copies, other_copies),
235 259 _ => unreachable!(),
236 260 };
237 261 let merged_copies =
238 262 merge_copies_dict(minor, major, &changes, &mut oracle);
239 263 all_copies.insert(child, merged_copies);
240 264 }
241 265 };
242 266 }
243 267 }
244 268
245 269 // Drop internal information (like the timestamp) and return the final
246 270 // mapping.
247 271 let tt_result = all_copies
248 272 .remove(&target_rev)
249 273 .expect("target revision was not processed");
250 274 let mut result = PathCopies::default();
251 275 for (dest, tt_source) in tt_result {
252 276 if let Some(path) = tt_source.path {
253 277 result.insert(dest, path);
254 278 }
255 279 }
256 280 result
257 281 }
258 282
259 283 /// merge two copies-mapping together, minor and major
260 284 ///
261 285 /// In case of conflict, value from "major" will be picked, unless in some
262 286 /// cases. See inline documentation for details.
263 287 #[allow(clippy::if_same_then_else)]
264 288 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
265 289 minor: TimeStampedPathCopies,
266 290 major: TimeStampedPathCopies,
267 291 changes: &ChangedFiles,
268 292 oracle: &mut AncestorOracle<A>,
269 293 ) -> TimeStampedPathCopies {
270 294 if minor.is_empty() {
271 295 return major;
272 296 } else if major.is_empty() {
273 297 return minor;
274 298 }
275 299 let mut override_minor = Vec::new();
276 300 let mut override_major = Vec::new();
277 301
278 302 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
279 303 override_major.push((k.clone(), v.clone()))
280 304 };
281 305 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
282 306 override_minor.push((k.clone(), v.clone()))
283 307 };
284 308
285 309 // The diff function leverage detection of the identical subpart if minor
286 310 // and major has some common ancestors. This make it very fast is most
287 311 // case.
288 312 //
289 313 // In case where the two map are vastly different in size, the current
290 314 // approach is still slowish because the iteration will iterate over
291 315 // all the "exclusive" content of the larger on. This situation can be
292 316 // frequent when the subgraph of revision we are processing has a lot
293 317 // of roots. Each roots adding they own fully new map to the mix (and
294 318 // likely a small map, if the path from the root to the "main path" is
295 319 // small.
296 320 //
297 321 // We could do better by detecting such situation and processing them
298 322 // differently.
299 323 for d in minor.diff(&major) {
300 324 match d {
301 325 DiffItem::Add(k, v) => to_minor(k, v),
302 326 DiffItem::Remove(k, v) => to_major(k, v),
303 327 DiffItem::Update { old, new } => {
304 328 let (dest, src_major) = new;
305 329 let (_, src_minor) = old;
306 330 let mut pick_minor = || (to_major(dest, src_minor));
307 331 let mut pick_major = || (to_minor(dest, src_major));
308 332 if src_major.path == src_minor.path {
309 333 // we have the same value, but from other source;
310 334 if src_major.rev == src_minor.rev {
311 335 // If the two entry are identical, no need to do
312 336 // anything (but diff should not have yield them)
313 337 unreachable!();
314 338 } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
315 339 {
316 340 pick_minor();
317 341 } else {
318 342 pick_major();
319 343 }
320 344 } else if src_major.rev == src_minor.rev {
321 345 // We cannot get copy information for both p1 and p2 in the
322 346 // same rev. So this is the same value.
323 347 unreachable!();
324 348 } else {
349 let action = changes.get_merge_case(&dest);
325 350 if src_major.path.is_none()
326 && changes.salvaged.contains(dest)
351 && action == MergeCase::Salvaged
327 352 {
328 353 // If the file is "deleted" in the major side but was
329 354 // salvaged by the merge, we keep the minor side alive
330 355 pick_minor();
331 356 } else if src_minor.path.is_none()
332 && changes.salvaged.contains(dest)
357 && action == MergeCase::Salvaged
333 358 {
334 359 // If the file is "deleted" in the minor side but was
335 360 // salvaged by the merge, unconditionnaly preserve the
336 361 // major side.
337 362 pick_major();
338 } else if changes.merged.contains(dest) {
363 } else if action == MergeCase::Merged {
339 364 // If the file was actively merged, copy information
340 365 // from each side might conflict. The major side will
341 366 // win such conflict.
342 367 pick_major();
343 368 } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
344 369 {
345 370 // If the minor side is strictly newer than the major
346 371 // side, it should be kept.
347 372 pick_minor();
348 373 } else if src_major.path.is_some() {
349 374 // without any special case, the "major" value win
350 375 // other the "minor" one.
351 376 pick_major();
352 377 } else if oracle.is_ancestor(src_minor.rev, src_major.rev)
353 378 {
354 379 // the "major" rev is a direct ancestors of "minor",
355 380 // any different value should
356 381 // overwrite
357 382 pick_major();
358 383 } else {
359 384 // major version is None (so the file was deleted on
360 385 // that branch) and that branch is independant (neither
361 386 // minor nor major is an ancestors of the other one.)
362 387 // We preserve the new
363 388 // information about the new file.
364 389 pick_minor();
365 390 }
366 391 }
367 392 }
368 393 };
369 394 }
370 395
371 396 let updates;
372 397 let mut result;
373 398 if override_major.is_empty() {
374 399 result = major
375 400 } else if override_minor.is_empty() {
376 401 result = minor
377 402 } else {
378 403 if override_minor.len() < override_major.len() {
379 404 updates = override_minor;
380 405 result = minor;
381 406 } else {
382 407 updates = override_major;
383 408 result = major;
384 409 }
385 410 for (k, v) in updates {
386 411 result.insert(k, v);
387 412 }
388 413 }
389 414 result
390 415 }
General Comments 0
You need to be logged in to leave comments. Login now