##// END OF EJS Templates
copies-rust: track "overwrites" directly within CopySource...
marmoute -
r47315:0d840b9d default
parent child Browse files
Show More
@@ -1,924 +1,949 b''
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 6 use im_rc::ordmap::DiffItem;
7 7 use im_rc::ordmap::Entry;
8 8 use im_rc::ordmap::OrdMap;
9 9
10 10 use std::cmp::Ordering;
11 11 use std::collections::HashMap;
12 use std::collections::HashSet;
12 13 use std::convert::TryInto;
13 14
14 15 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
15 16
16 17 type PathToken = usize;
17 18
18 19 #[derive(Clone, Debug, PartialEq)]
19 20 struct CopySource {
20 21 /// revision at which the copy information was added
21 22 rev: Revision,
22 23 /// the copy source, (Set to None in case of deletion of the associated
23 24 /// key)
24 25 path: Option<PathToken>,
26 /// a set of previous `CopySource.rev` value directly or indirectly
27 /// overwritten by this one.
28 overwritten: HashSet<Revision>,
25 29 }
26 30
27 31 impl CopySource {
28 32 /// create a new CopySource
29 33 ///
30 34 /// Use this when no previous copy source existed.
31 35 fn new(rev: Revision, path: Option<PathToken>) -> Self {
32 Self { rev, path }
36 Self {
37 rev,
38 path,
39 overwritten: HashSet::new(),
40 }
33 41 }
34 42
35 43 /// create a new CopySource from merging two others
36 44 ///
37 45 /// Use this when merging two InternalPathCopies requires active merging of
38 46 /// some entries.
39 47 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
48 let mut overwritten = HashSet::new();
49 overwritten.extend(winner.overwritten.iter().copied());
50 overwritten.extend(loser.overwritten.iter().copied());
51 overwritten.insert(winner.rev);
52 overwritten.insert(loser.rev);
40 53 Self {
41 54 rev,
42 55 path: winner.path,
56 overwritten: overwritten,
43 57 }
44 58 }
45 59
46 60 /// Update the value of a pre-existing CopySource
47 61 ///
48 62 /// Use this when recording copy information from parent → child edges
49 63 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
64 self.overwritten.insert(self.rev);
50 65 self.rev = rev;
51 66 self.path = path;
52 67 }
53 68
54 69 /// Mark pre-existing copy information as "dropped" by a file deletion
55 70 ///
56 71 /// Use this when recording copy information from parent → child edges
57 72 fn mark_delete(&mut self, rev: Revision) {
73 self.overwritten.insert(self.rev);
58 74 self.rev = rev;
59 75 self.path = None;
60 76 }
77
78 fn is_overwritten_by(&self, other: &Self) -> bool {
79 other.overwritten.contains(&self.rev)
80 }
61 81 }
62 82
63 83 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
64 84 type InternalPathCopies = OrdMap<PathToken, CopySource>;
65 85
66 86 /// hold parent 1, parent 2 and relevant files actions.
67 87 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
68 88
69 89 /// represent the files affected by a changesets
70 90 ///
71 91 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
72 92 /// all the data categories tracked by it.
73 93 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
74 94 /// all the data categories tracked by it.
75 95 pub struct ChangedFiles<'a> {
76 96 nb_items: u32,
77 97 index: &'a [u8],
78 98 data: &'a [u8],
79 99 }
80 100
81 101 /// Represent active changes that affect the copy tracing.
82 102 enum Action<'a> {
83 103 /// The parent ? children edge is removing a file
84 104 ///
85 105 /// (actually, this could be the edge from the other parent, but it does
86 106 /// not matters)
87 107 Removed(&'a HgPath),
88 108 /// The parent ? children edge introduce copy information between (dest,
89 109 /// source)
90 110 Copied(&'a HgPath, &'a HgPath),
91 111 }
92 112
93 113 /// This express the possible "special" case we can get in a merge
94 114 ///
95 115 /// See mercurial/metadata.py for details on these values.
96 116 #[derive(PartialEq)]
97 117 enum MergeCase {
98 118 /// Merged: file had history on both side that needed to be merged
99 119 Merged,
100 120 /// Salvaged: file was candidate for deletion, but survived the merge
101 121 Salvaged,
102 122 /// Normal: Not one of the two cases above
103 123 Normal,
104 124 }
105 125
106 126 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
107 127
108 128 const EMPTY: &[u8] = b"";
109 129 const COPY_MASK: u8 = 3;
110 130 const P1_COPY: u8 = 2;
111 131 const P2_COPY: u8 = 3;
112 132 const ACTION_MASK: u8 = 28;
113 133 const REMOVED: u8 = 12;
114 134 const MERGED: u8 = 8;
115 135 const SALVAGED: u8 = 16;
116 136
117 137 impl<'a> ChangedFiles<'a> {
118 138 const INDEX_START: usize = 4;
119 139 const ENTRY_SIZE: u32 = 9;
120 140 const FILENAME_START: u32 = 1;
121 141 const COPY_SOURCE_START: u32 = 5;
122 142
123 143 pub fn new(data: &'a [u8]) -> Self {
124 144 assert!(
125 145 data.len() >= 4,
126 146 "data size ({}) is too small to contain the header (4)",
127 147 data.len()
128 148 );
129 149 let nb_items_raw: [u8; 4] = (&data[0..=3])
130 150 .try_into()
131 151 .expect("failed to turn 4 bytes into 4 bytes");
132 152 let nb_items = u32::from_be_bytes(nb_items_raw);
133 153
134 154 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
135 155 let index_end = Self::INDEX_START + index_size;
136 156
137 157 assert!(
138 158 data.len() >= index_end,
139 159 "data size ({}) is too small to fit the index_data ({})",
140 160 data.len(),
141 161 index_end
142 162 );
143 163
144 164 let ret = ChangedFiles {
145 165 nb_items,
146 166 index: &data[Self::INDEX_START..index_end],
147 167 data: &data[index_end..],
148 168 };
149 169 let max_data = ret.filename_end(nb_items - 1) as usize;
150 170 assert!(
151 171 ret.data.len() >= max_data,
152 172 "data size ({}) is too small to fit all data ({})",
153 173 data.len(),
154 174 index_end + max_data
155 175 );
156 176 ret
157 177 }
158 178
159 179 pub fn new_empty() -> Self {
160 180 ChangedFiles {
161 181 nb_items: 0,
162 182 index: EMPTY,
163 183 data: EMPTY,
164 184 }
165 185 }
166 186
167 187 /// internal function to return an individual entry at a given index
168 188 fn entry(&'a self, idx: u32) -> FileChange<'a> {
169 189 if idx >= self.nb_items {
170 190 panic!(
171 191 "index for entry is higher that the number of file {} >= {}",
172 192 idx, self.nb_items
173 193 )
174 194 }
175 195 let flags = self.flags(idx);
176 196 let filename = self.filename(idx);
177 197 let copy_idx = self.copy_idx(idx);
178 198 let copy_source = self.filename(copy_idx);
179 199 (flags, filename, copy_source)
180 200 }
181 201
182 202 /// internal function to return the filename of the entry at a given index
183 203 fn filename(&self, idx: u32) -> &HgPath {
184 204 let filename_start;
185 205 if idx == 0 {
186 206 filename_start = 0;
187 207 } else {
188 208 filename_start = self.filename_end(idx - 1)
189 209 }
190 210 let filename_end = self.filename_end(idx);
191 211 let filename_start = filename_start as usize;
192 212 let filename_end = filename_end as usize;
193 213 HgPath::new(&self.data[filename_start..filename_end])
194 214 }
195 215
196 216 /// internal function to return the flag field of the entry at a given
197 217 /// index
198 218 fn flags(&self, idx: u32) -> u8 {
199 219 let idx = idx as usize;
200 220 self.index[idx * (Self::ENTRY_SIZE as usize)]
201 221 }
202 222
203 223 /// internal function to return the end of a filename part at a given index
204 224 fn filename_end(&self, idx: u32) -> u32 {
205 225 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
206 226 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
207 227 let start = start as usize;
208 228 let end = end as usize;
209 229 let raw = (&self.index[start..end])
210 230 .try_into()
211 231 .expect("failed to turn 4 bytes into 4 bytes");
212 232 u32::from_be_bytes(raw)
213 233 }
214 234
215 235 /// internal function to return index of the copy source of the entry at a
216 236 /// given index
217 237 fn copy_idx(&self, idx: u32) -> u32 {
218 238 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
219 239 let end = (idx + 1) * Self::ENTRY_SIZE;
220 240 let start = start as usize;
221 241 let end = end as usize;
222 242 let raw = (&self.index[start..end])
223 243 .try_into()
224 244 .expect("failed to turn 4 bytes into 4 bytes");
225 245 u32::from_be_bytes(raw)
226 246 }
227 247
228 248 /// Return an iterator over all the `Action` in this instance.
229 249 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
230 250 ActionsIterator {
231 251 changes: &self,
232 252 parent: parent,
233 253 current: 0,
234 254 }
235 255 }
236 256
237 257 /// return the MergeCase value associated with a filename
238 258 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
239 259 if self.nb_items == 0 {
240 260 return MergeCase::Normal;
241 261 }
242 262 let mut low_part = 0;
243 263 let mut high_part = self.nb_items;
244 264
245 265 while low_part < high_part {
246 266 let cursor = (low_part + high_part - 1) / 2;
247 267 let (flags, filename, _source) = self.entry(cursor);
248 268 match path.cmp(filename) {
249 269 Ordering::Less => low_part = cursor + 1,
250 270 Ordering::Greater => high_part = cursor,
251 271 Ordering::Equal => {
252 272 return match flags & ACTION_MASK {
253 273 MERGED => MergeCase::Merged,
254 274 SALVAGED => MergeCase::Salvaged,
255 275 _ => MergeCase::Normal,
256 276 };
257 277 }
258 278 }
259 279 }
260 280 MergeCase::Normal
261 281 }
262 282 }
263 283
264 284 /// A struct responsible for answering "is X ancestors of Y" quickly
265 285 ///
266 286 /// The structure will delegate ancestors call to a callback, and cache the
267 287 /// result.
268 288 #[derive(Debug)]
269 289 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
270 290 inner: &'a A,
271 291 pairs: HashMap<(Revision, Revision), bool>,
272 292 }
273 293
274 294 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
275 295 fn new(func: &'a A) -> Self {
276 296 Self {
277 297 inner: func,
278 298 pairs: HashMap::default(),
279 299 }
280 300 }
281 301
282 302 fn record_overwrite(&mut self, anc: Revision, desc: Revision) {
283 303 self.pairs.insert((anc, desc), true);
284 304 }
285 305
286 306 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
287 307 fn is_overwrite(&mut self, anc: Revision, desc: Revision) -> bool {
288 308 if anc > desc {
289 309 false
290 310 } else if anc == desc {
291 311 true
292 312 } else {
293 313 if let Some(b) = self.pairs.get(&(anc, desc)) {
294 314 *b
295 315 } else {
296 316 let b = (self.inner)(anc, desc);
297 317 self.pairs.insert((anc, desc), b);
298 318 b
299 319 }
300 320 }
301 321 }
302 322 }
303 323
304 324 struct ActionsIterator<'a> {
305 325 changes: &'a ChangedFiles<'a>,
306 326 parent: Parent,
307 327 current: u32,
308 328 }
309 329
310 330 impl<'a> Iterator for ActionsIterator<'a> {
311 331 type Item = Action<'a>;
312 332
313 333 fn next(&mut self) -> Option<Action<'a>> {
314 334 let copy_flag = match self.parent {
315 335 Parent::FirstParent => P1_COPY,
316 336 Parent::SecondParent => P2_COPY,
317 337 };
318 338 while self.current < self.changes.nb_items {
319 339 let (flags, file, source) = self.changes.entry(self.current);
320 340 self.current += 1;
321 341 if (flags & ACTION_MASK) == REMOVED {
322 342 return Some(Action::Removed(file));
323 343 }
324 344 let copy = flags & COPY_MASK;
325 345 if copy == copy_flag {
326 346 return Some(Action::Copied(file, source));
327 347 }
328 348 }
329 349 return None;
330 350 }
331 351 }
332 352
333 353 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
334 354 /// ChangedFiles
335 355 ///
336 356 /// It is passed to the RevInfoMaker callback who can assign any necessary
337 357 /// content to the `data` attribute. The copy tracing code is responsible for
338 358 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
339 359 pub struct DataHolder<D> {
340 360 /// RevInfoMaker callback should assign data referenced by the
341 361 /// ChangedFiles struct it return to this attribute. The DataHolder
342 362 /// lifetime will be at least as long as the ChangedFiles one.
343 363 pub data: Option<D>,
344 364 }
345 365
346 366 pub type RevInfoMaker<'a, D> =
347 367 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
348 368
349 369 /// enum used to carry information about the parent → child currently processed
350 370 #[derive(Copy, Clone, Debug)]
351 371 enum Parent {
352 372 /// The `p1(x) → x` edge
353 373 FirstParent,
354 374 /// The `p2(x) → x` edge
355 375 SecondParent,
356 376 }
357 377
358 378 /// A small "tokenizer" responsible of turning full HgPath into lighter
359 379 /// PathToken
360 380 ///
361 381 /// Dealing with small object, like integer is much faster, so HgPath input are
362 382 /// turned into integer "PathToken" and converted back in the end.
363 383 #[derive(Clone, Debug, Default)]
364 384 struct TwoWayPathMap {
365 385 token: HashMap<HgPathBuf, PathToken>,
366 386 path: Vec<HgPathBuf>,
367 387 }
368 388
369 389 impl TwoWayPathMap {
370 390 fn tokenize(&mut self, path: &HgPath) -> PathToken {
371 391 match self.token.get(path) {
372 392 Some(a) => *a,
373 393 None => {
374 394 let a = self.token.len();
375 395 let buf = path.to_owned();
376 396 self.path.push(buf.clone());
377 397 self.token.insert(buf, a);
378 398 a
379 399 }
380 400 }
381 401 }
382 402
383 403 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
384 404 assert!(token < self.path.len(), format!("Unknown token: {}", token));
385 405 &self.path[token]
386 406 }
387 407 }
388 408
389 409 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
390 410 ///
391 411 /// Arguments are:
392 412 ///
393 413 /// revs: all revisions to be considered
394 414 /// children: a {parent ? [childrens]} mapping
395 415 /// target_rev: the final revision we are combining copies to
396 416 /// rev_info(rev): callback to get revision information:
397 417 /// * first parent
398 418 /// * second parent
399 419 /// * ChangedFiles
400 420 /// isancestors(low_rev, high_rev): callback to check if a revision is an
401 421 /// ancestor of another
402 422 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
403 423 revs: Vec<Revision>,
404 424 mut children_count: HashMap<Revision, usize>,
405 425 target_rev: Revision,
406 426 rev_info: RevInfoMaker<D>,
407 427 is_ancestor: &A,
408 428 ) -> PathCopies {
409 429 let mut all_copies = HashMap::new();
410 430 let mut oracle = AncestorOracle::new(is_ancestor);
411 431
412 432 let mut path_map = TwoWayPathMap::default();
413 433
414 434 for rev in revs {
415 435 let mut d: DataHolder<D> = DataHolder { data: None };
416 436 let (p1, p2, changes) = rev_info(rev, &mut d);
417 437
418 438 // We will chain the copies information accumulated for the parent with
419 439 // the individual copies information the curent revision. Creating a
420 440 // new TimeStampedPath for each `rev` → `children` vertex.
421 441 let mut copies: Option<InternalPathCopies> = None;
422 442 if p1 != NULL_REVISION {
423 443 // Retrieve data computed in a previous iteration
424 444 let parent_copies = get_and_clean_parent_copies(
425 445 &mut all_copies,
426 446 &mut children_count,
427 447 p1,
428 448 );
429 449 if let Some(parent_copies) = parent_copies {
430 450 // combine it with data for that revision
431 451 let vertex_copies = add_from_changes(
432 452 &mut path_map,
433 453 &mut oracle,
434 454 &parent_copies,
435 455 &changes,
436 456 Parent::FirstParent,
437 457 rev,
438 458 );
439 459 // keep that data around for potential later combination
440 460 copies = Some(vertex_copies);
441 461 }
442 462 }
443 463 if p2 != NULL_REVISION {
444 464 // Retrieve data computed in a previous iteration
445 465 let parent_copies = get_and_clean_parent_copies(
446 466 &mut all_copies,
447 467 &mut children_count,
448 468 p2,
449 469 );
450 470 if let Some(parent_copies) = parent_copies {
451 471 // combine it with data for that revision
452 472 let vertex_copies = add_from_changes(
453 473 &mut path_map,
454 474 &mut oracle,
455 475 &parent_copies,
456 476 &changes,
457 477 Parent::SecondParent,
458 478 rev,
459 479 );
460 480
461 481 copies = match copies {
462 482 None => Some(vertex_copies),
463 483 // Merge has two parents needs to combines their copy
464 484 // information.
465 485 //
466 486 // If we got data from both parents, We need to combine
467 487 // them.
468 488 Some(copies) => Some(merge_copies_dict(
469 489 &path_map,
470 490 rev,
471 491 vertex_copies,
472 492 copies,
473 493 &changes,
474 494 &mut oracle,
475 495 )),
476 496 };
477 497 }
478 498 }
479 499 match copies {
480 500 Some(copies) => {
481 501 all_copies.insert(rev, copies);
482 502 }
483 503 _ => {}
484 504 }
485 505 }
486 506
487 507 // Drop internal information (like the timestamp) and return the final
488 508 // mapping.
489 509 let tt_result = all_copies
490 510 .remove(&target_rev)
491 511 .expect("target revision was not processed");
492 512 let mut result = PathCopies::default();
493 513 for (dest, tt_source) in tt_result {
494 514 if let Some(path) = tt_source.path {
495 515 let path_dest = path_map.untokenize(dest).to_owned();
496 516 let path_path = path_map.untokenize(path).to_owned();
497 517 result.insert(path_dest, path_path);
498 518 }
499 519 }
500 520 result
501 521 }
502 522
503 523 /// fetch previous computed information
504 524 ///
505 525 /// If no other children are expected to need this information, we drop it from
506 526 /// the cache.
507 527 ///
508 528 /// If parent is not part of the set we are expected to walk, return None.
509 529 fn get_and_clean_parent_copies(
510 530 all_copies: &mut HashMap<Revision, InternalPathCopies>,
511 531 children_count: &mut HashMap<Revision, usize>,
512 532 parent_rev: Revision,
513 533 ) -> Option<InternalPathCopies> {
514 534 let count = children_count.get_mut(&parent_rev)?;
515 535 *count -= 1;
516 536 if *count == 0 {
517 537 match all_copies.remove(&parent_rev) {
518 538 Some(c) => Some(c),
519 539 None => Some(InternalPathCopies::default()),
520 540 }
521 541 } else {
522 542 match all_copies.get(&parent_rev) {
523 543 Some(c) => Some(c.clone()),
524 544 None => Some(InternalPathCopies::default()),
525 545 }
526 546 }
527 547 }
528 548
529 549 /// Combine ChangedFiles with some existing PathCopies information and return
530 550 /// the result
531 551 fn add_from_changes<A: Fn(Revision, Revision) -> bool>(
532 552 path_map: &mut TwoWayPathMap,
533 553 oracle: &mut AncestorOracle<A>,
534 554 base_copies: &InternalPathCopies,
535 555 changes: &ChangedFiles,
536 556 parent: Parent,
537 557 current_rev: Revision,
538 558 ) -> InternalPathCopies {
539 559 let mut copies = base_copies.clone();
540 560 for action in changes.iter_actions(parent) {
541 561 match action {
542 562 Action::Copied(path_dest, path_source) => {
543 563 let dest = path_map.tokenize(path_dest);
544 564 let source = path_map.tokenize(path_source);
545 565 let entry;
546 566 if let Some(v) = base_copies.get(&source) {
547 567 entry = match &v.path {
548 568 Some(path) => Some((*(path)).to_owned()),
549 569 None => Some(source.to_owned()),
550 570 }
551 571 } else {
552 572 entry = Some(source.to_owned());
553 573 }
554 574 // Each new entry is introduced by the children, we
555 575 // record this information as we will need it to take
556 576 // the right decision when merging conflicting copy
557 577 // information. See merge_copies_dict for details.
558 578 match copies.entry(dest) {
559 579 Entry::Vacant(slot) => {
560 580 let ttpc = CopySource::new(current_rev, entry);
561 581 slot.insert(ttpc);
562 582 }
563 583 Entry::Occupied(mut slot) => {
564 584 let ttpc = slot.get_mut();
565 585 oracle.record_overwrite(ttpc.rev, current_rev);
566 586 ttpc.overwrite(current_rev, entry);
567 587 }
568 588 }
569 589 }
570 590 Action::Removed(deleted_path) => {
571 591 // We must drop copy information for removed file.
572 592 //
573 593 // We need to explicitly record them as dropped to
574 594 // propagate this information when merging two
575 595 // InternalPathCopies object.
576 596 let deleted = path_map.tokenize(deleted_path);
577 597 copies.entry(deleted).and_modify(|old| {
578 598 oracle.record_overwrite(old.rev, current_rev);
579 599 old.mark_delete(current_rev);
580 600 });
581 601 }
582 602 }
583 603 }
584 604 copies
585 605 }
586 606
587 607 /// merge two copies-mapping together, minor and major
588 608 ///
589 609 /// In case of conflict, value from "major" will be picked, unless in some
590 610 /// cases. See inline documentation for details.
591 611 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
592 612 path_map: &TwoWayPathMap,
593 613 current_merge: Revision,
594 614 mut minor: InternalPathCopies,
595 615 mut major: InternalPathCopies,
596 616 changes: &ChangedFiles,
597 617 oracle: &mut AncestorOracle<A>,
598 618 ) -> InternalPathCopies {
599 619 // This closure exist as temporary help while multiple developper are
600 620 // actively working on this code. Feel free to re-inline it once this
601 621 // code is more settled.
602 622 let cmp_value = |oracle: &mut AncestorOracle<A>,
603 623 dest: &PathToken,
604 624 src_minor: &CopySource,
605 625 src_major: &CopySource| {
606 626 compare_value(
607 627 path_map,
608 628 current_merge,
609 629 changes,
610 630 oracle,
611 631 dest,
612 632 src_minor,
613 633 src_major,
614 634 )
615 635 };
616 636 if minor.is_empty() {
617 637 major
618 638 } else if major.is_empty() {
619 639 minor
620 640 } else if minor.len() * 2 < major.len() {
621 641 // Lets says we are merging two InternalPathCopies instance A and B.
622 642 //
623 643 // If A contains N items, the merge result will never contains more
624 644 // than N values differents than the one in A
625 645 //
626 646 // If B contains M items, with M > N, the merge result will always
627 647 // result in a minimum of M - N value differents than the on in
628 648 // A
629 649 //
630 650 // As a result, if N < (M-N), we know that simply iterating over A will
631 651 // yield less difference than iterating over the difference
632 652 // between A and B.
633 653 //
634 654 // This help performance a lot in case were a tiny
635 655 // InternalPathCopies is merged with a much larger one.
636 656 for (dest, src_minor) in minor {
637 657 let src_major = major.get(&dest);
638 658 match src_major {
639 659 None => {
640 660 major.insert(dest, src_minor);
641 661 }
642 662 Some(src_major) => {
643 663 let (pick, overwrite) =
644 664 cmp_value(oracle, &dest, &src_minor, src_major);
645 665 if overwrite {
646 666 oracle.record_overwrite(src_minor.rev, current_merge);
647 667 oracle.record_overwrite(src_major.rev, current_merge);
648 668 let src = match pick {
649 669 MergePick::Major => CopySource::new_from_merge(
650 670 current_merge,
651 671 src_major,
652 672 &src_minor,
653 673 ),
654 674 MergePick::Minor => CopySource::new_from_merge(
655 675 current_merge,
656 676 &src_minor,
657 677 src_major,
658 678 ),
659 679 MergePick::Any => CopySource::new_from_merge(
660 680 current_merge,
661 681 src_major,
662 682 &src_minor,
663 683 ),
664 684 };
665 685 major.insert(dest, src);
666 686 } else {
667 687 match pick {
668 688 MergePick::Any | MergePick::Major => None,
669 689 MergePick::Minor => major.insert(dest, src_minor),
670 690 };
671 691 }
672 692 }
673 693 };
674 694 }
675 695 major
676 696 } else if major.len() * 2 < minor.len() {
677 697 // This use the same rational than the previous block.
678 698 // (Check previous block documentation for details.)
679 699 for (dest, src_major) in major {
680 700 let src_minor = minor.get(&dest);
681 701 match src_minor {
682 702 None => {
683 703 minor.insert(dest, src_major);
684 704 }
685 705 Some(src_minor) => {
686 706 let (pick, overwrite) =
687 707 cmp_value(oracle, &dest, src_minor, &src_major);
688 708 if overwrite {
689 709 oracle.record_overwrite(src_minor.rev, current_merge);
690 710 oracle.record_overwrite(src_major.rev, current_merge);
691 711 let src = match pick {
692 712 MergePick::Major => CopySource::new_from_merge(
693 713 current_merge,
694 714 &src_major,
695 715 src_minor,
696 716 ),
697 717 MergePick::Minor => CopySource::new_from_merge(
698 718 current_merge,
699 719 src_minor,
700 720 &src_major,
701 721 ),
702 722 MergePick::Any => CopySource::new_from_merge(
703 723 current_merge,
704 724 &src_major,
705 725 src_minor,
706 726 ),
707 727 };
708 728 minor.insert(dest, src);
709 729 } else {
710 730 match pick {
711 731 MergePick::Any | MergePick::Minor => None,
712 732 MergePick::Major => minor.insert(dest, src_major),
713 733 };
714 734 }
715 735 }
716 736 };
717 737 }
718 738 minor
719 739 } else {
720 740 let mut override_minor = Vec::new();
721 741 let mut override_major = Vec::new();
722 742
723 743 let mut to_major = |k: &PathToken, v: &CopySource| {
724 744 override_major.push((k.clone(), v.clone()))
725 745 };
726 746 let mut to_minor = |k: &PathToken, v: &CopySource| {
727 747 override_minor.push((k.clone(), v.clone()))
728 748 };
729 749
730 750 // The diff function leverage detection of the identical subpart if
731 751 // minor and major has some common ancestors. This make it very
732 752 // fast is most case.
733 753 //
734 754 // In case where the two map are vastly different in size, the current
735 755 // approach is still slowish because the iteration will iterate over
736 756 // all the "exclusive" content of the larger on. This situation can be
737 757 // frequent when the subgraph of revision we are processing has a lot
738 758 // of roots. Each roots adding they own fully new map to the mix (and
739 759 // likely a small map, if the path from the root to the "main path" is
740 760 // small.
741 761 //
742 762 // We could do better by detecting such situation and processing them
743 763 // differently.
744 764 for d in minor.diff(&major) {
745 765 match d {
746 766 DiffItem::Add(k, v) => to_minor(k, v),
747 767 DiffItem::Remove(k, v) => to_major(k, v),
748 768 DiffItem::Update { old, new } => {
749 769 let (dest, src_major) = new;
750 770 let (_, src_minor) = old;
751 771 let (pick, overwrite) =
752 772 cmp_value(oracle, dest, src_minor, src_major);
753 773 if overwrite {
754 774 oracle.record_overwrite(src_minor.rev, current_merge);
755 775 oracle.record_overwrite(src_major.rev, current_merge);
756 776 let src = match pick {
757 777 MergePick::Major => CopySource::new_from_merge(
758 778 current_merge,
759 779 src_major,
760 780 src_minor,
761 781 ),
762 782 MergePick::Minor => CopySource::new_from_merge(
763 783 current_merge,
764 784 src_minor,
765 785 src_major,
766 786 ),
767 787 MergePick::Any => CopySource::new_from_merge(
768 788 current_merge,
769 789 src_major,
770 790 src_minor,
771 791 ),
772 792 };
773 793 to_minor(dest, &src);
774 794 to_major(dest, &src);
775 795 } else {
776 796 match pick {
777 797 MergePick::Major => to_minor(dest, src_major),
778 798 MergePick::Minor => to_major(dest, src_minor),
779 799 // If the two entry are identical, no need to do
780 800 // anything (but diff should not have yield them)
781 801 MergePick::Any => unreachable!(),
782 802 }
783 803 }
784 804 }
785 805 };
786 806 }
787 807
788 808 let updates;
789 809 let mut result;
790 810 if override_major.is_empty() {
791 811 result = major
792 812 } else if override_minor.is_empty() {
793 813 result = minor
794 814 } else {
795 815 if override_minor.len() < override_major.len() {
796 816 updates = override_minor;
797 817 result = minor;
798 818 } else {
799 819 updates = override_major;
800 820 result = major;
801 821 }
802 822 for (k, v) in updates {
803 823 result.insert(k, v);
804 824 }
805 825 }
806 826 result
807 827 }
808 828 }
809 829
810 830 /// represent the side that should prevail when merging two
811 831 /// InternalPathCopies
812 832 enum MergePick {
813 833 /// The "major" (p1) side prevails
814 834 Major,
815 835 /// The "minor" (p2) side prevails
816 836 Minor,
817 837 /// Any side could be used (because they are the same)
818 838 Any,
819 839 }
820 840
821 841 /// decide which side prevails in case of conflicting values
822 842 #[allow(clippy::if_same_then_else)]
823 843 fn compare_value<A: Fn(Revision, Revision) -> bool>(
824 844 path_map: &TwoWayPathMap,
825 845 current_merge: Revision,
826 846 changes: &ChangedFiles,
827 847 oracle: &mut AncestorOracle<A>,
828 848 dest: &PathToken,
829 849 src_minor: &CopySource,
830 850 src_major: &CopySource,
831 851 ) -> (MergePick, bool) {
832 852 if src_major.rev == current_merge {
833 853 if src_minor.rev == current_merge {
834 854 if src_major.path.is_none() {
835 855 // We cannot get different copy information for both p1 and p2
836 856 // from the same revision. Unless this was a
837 // deletion
838 (MergePick::Any, false)
857 // deletion.
858 //
859 // However the deletion might come over different data on each
860 // branch.
861 let need_over = src_major.overwritten != src_minor.overwritten;
862 (MergePick::Any, need_over)
839 863 } else {
840 864 unreachable!();
841 865 }
842 866 } else {
843 867 // The last value comes the current merge, this value -will- win
844 868 // eventually.
845 869 (MergePick::Major, true)
846 870 }
847 871 } else if src_minor.rev == current_merge {
848 872 // The last value comes the current merge, this value -will- win
849 873 // eventually.
850 874 (MergePick::Minor, true)
851 875 } else if src_major.path == src_minor.path {
852 876 // we have the same value, but from other source;
853 877 if src_major.rev == src_minor.rev {
854 878 // If the two entry are identical, they are both valid
879 debug_assert!(src_minor.overwritten == src_minor.overwritten);
855 880 (MergePick::Any, false)
856 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
881 } else if src_major.is_overwritten_by(src_minor) {
857 882 (MergePick::Minor, false)
858 } else if oracle.is_overwrite(src_minor.rev, src_major.rev) {
883 } else if src_minor.is_overwritten_by(src_major) {
859 884 (MergePick::Major, false)
860 885 } else {
861 886 (MergePick::Any, true)
862 887 }
863 888 } else if src_major.rev == src_minor.rev {
864 889 // We cannot get copy information for both p1 and p2 in the
865 890 // same rev. So this is the same value.
866 891 unreachable!(
867 892 "conflicting information from p1 and p2 in the same revision"
868 893 );
869 894 } else {
870 895 let dest_path = path_map.untokenize(*dest);
871 896 let action = changes.get_merge_case(dest_path);
872 897 if src_minor.path.is_some()
873 898 && src_major.path.is_none()
874 899 && action == MergeCase::Salvaged
875 900 {
876 901 // If the file is "deleted" in the major side but was
877 902 // salvaged by the merge, we keep the minor side alive
878 903 (MergePick::Minor, true)
879 904 } else if src_major.path.is_some()
880 905 && src_minor.path.is_none()
881 906 && action == MergeCase::Salvaged
882 907 {
883 908 // If the file is "deleted" in the minor side but was
884 909 // salvaged by the merge, unconditionnaly preserve the
885 910 // major side.
886 911 (MergePick::Major, true)
887 } else if oracle.is_overwrite(src_minor.rev, src_major.rev) {
912 } else if src_minor.is_overwritten_by(src_major) {
888 913 // The information from the minor version are strictly older than
889 914 // the major version
890 915 if action == MergeCase::Merged {
891 916 // If the file was actively merged, its means some non-copy
892 917 // activity happened on the other branch. It
893 918 // mean the older copy information are still relevant.
894 919 //
895 920 // The major side wins such conflict.
896 921 (MergePick::Major, true)
897 922 } else {
898 923 // No activity on the minor branch, pick the newer one.
899 924 (MergePick::Major, false)
900 925 }
901 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
926 } else if src_major.is_overwritten_by(src_minor) {
902 927 if action == MergeCase::Merged {
903 928 // If the file was actively merged, its means some non-copy
904 929 // activity happened on the other branch. It
905 930 // mean the older copy information are still relevant.
906 931 //
907 932 // The major side wins such conflict.
908 933 (MergePick::Major, true)
909 934 } else {
910 935 // No activity on the minor branch, pick the newer one.
911 936 (MergePick::Minor, false)
912 937 }
913 938 } else if src_minor.path.is_none() {
914 939 // the minor side has no relevant information, pick the alive one
915 940 (MergePick::Major, true)
916 941 } else if src_major.path.is_none() {
917 942 // the major side has no relevant information, pick the alive one
918 943 (MergePick::Minor, true)
919 944 } else {
920 945 // by default the major side wins
921 946 (MergePick::Major, true)
922 947 }
923 948 }
924 949 }
General Comments 0
You need to be logged in to leave comments. Login now