##// END OF EJS Templates
copies-rust: yield both p1 and p2 copies in `ChangedFiles.actions()`...
marmoute -
r47320:a34cd9aa default
parent child Browse files
Show More
@@ -1,900 +1,913 b''
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 6 use im_rc::ordmap::DiffItem;
7 7 use im_rc::ordmap::Entry;
8 8 use im_rc::ordmap::OrdMap;
9 9
10 10 use std::cmp::Ordering;
11 11 use std::collections::HashMap;
12 12 use std::collections::HashSet;
13 13 use std::convert::TryInto;
14 14
15 15 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
16 16
17 17 type PathToken = usize;
18 18
19 19 #[derive(Clone, Debug, PartialEq)]
20 20 struct CopySource {
21 21 /// revision at which the copy information was added
22 22 rev: Revision,
23 23 /// the copy source, (Set to None in case of deletion of the associated
24 24 /// key)
25 25 path: Option<PathToken>,
26 26 /// a set of previous `CopySource.rev` value directly or indirectly
27 27 /// overwritten by this one.
28 28 overwritten: HashSet<Revision>,
29 29 }
30 30
31 31 impl CopySource {
32 32 /// create a new CopySource
33 33 ///
34 34 /// Use this when no previous copy source existed.
35 35 fn new(rev: Revision, path: Option<PathToken>) -> Self {
36 36 Self {
37 37 rev,
38 38 path,
39 39 overwritten: HashSet::new(),
40 40 }
41 41 }
42 42
43 43 /// create a new CopySource from merging two others
44 44 ///
45 45 /// Use this when merging two InternalPathCopies requires active merging of
46 46 /// some entries.
47 47 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
48 48 let mut overwritten = HashSet::new();
49 49 overwritten.extend(winner.overwritten.iter().copied());
50 50 overwritten.extend(loser.overwritten.iter().copied());
51 51 overwritten.insert(winner.rev);
52 52 overwritten.insert(loser.rev);
53 53 Self {
54 54 rev,
55 55 path: winner.path,
56 56 overwritten: overwritten,
57 57 }
58 58 }
59 59
60 60 /// Update the value of a pre-existing CopySource
61 61 ///
62 62 /// Use this when recording copy information from parent β†’ child edges
63 63 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
64 64 self.overwritten.insert(self.rev);
65 65 self.rev = rev;
66 66 self.path = path;
67 67 }
68 68
69 69 /// Mark pre-existing copy information as "dropped" by a file deletion
70 70 ///
71 71 /// Use this when recording copy information from parent β†’ child edges
72 72 fn mark_delete(&mut self, rev: Revision) {
73 73 self.overwritten.insert(self.rev);
74 74 self.rev = rev;
75 75 self.path = None;
76 76 }
77 77
78 78 fn is_overwritten_by(&self, other: &Self) -> bool {
79 79 other.overwritten.contains(&self.rev)
80 80 }
81 81 }
82 82
83 83 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
84 84 type InternalPathCopies = OrdMap<PathToken, CopySource>;
85 85
86 86 /// hold parent 1, parent 2 and relevant files actions.
87 87 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
88 88
89 89 /// represent the files affected by a changesets
90 90 ///
91 91 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
92 92 /// all the data categories tracked by it.
93 93 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
94 94 /// all the data categories tracked by it.
95 95 pub struct ChangedFiles<'a> {
96 96 nb_items: u32,
97 97 index: &'a [u8],
98 98 data: &'a [u8],
99 99 }
100 100
101 101 /// Represent active changes that affect the copy tracing.
102 102 enum Action<'a> {
103 103 /// The parent ? children edge is removing a file
104 104 ///
105 105 /// (actually, this could be the edge from the other parent, but it does
106 106 /// not matters)
107 107 Removed(&'a HgPath),
108 108 /// The parent ? children edge introduce copy information between (dest,
109 109 /// source)
110 Copied(&'a HgPath, &'a HgPath),
110 CopiedFromP1(&'a HgPath, &'a HgPath),
111 CopiedFromP2(&'a HgPath, &'a HgPath),
111 112 }
112 113
113 114 /// This express the possible "special" case we can get in a merge
114 115 ///
115 116 /// See mercurial/metadata.py for details on these values.
116 117 #[derive(PartialEq)]
117 118 enum MergeCase {
118 119 /// Merged: file had history on both side that needed to be merged
119 120 Merged,
120 121 /// Salvaged: file was candidate for deletion, but survived the merge
121 122 Salvaged,
122 123 /// Normal: Not one of the two cases above
123 124 Normal,
124 125 }
125 126
126 127 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
127 128
128 129 const EMPTY: &[u8] = b"";
129 130 const COPY_MASK: u8 = 3;
130 131 const P1_COPY: u8 = 2;
131 132 const P2_COPY: u8 = 3;
132 133 const ACTION_MASK: u8 = 28;
133 134 const REMOVED: u8 = 12;
134 135 const MERGED: u8 = 8;
135 136 const SALVAGED: u8 = 16;
136 137
137 138 impl<'a> ChangedFiles<'a> {
138 139 const INDEX_START: usize = 4;
139 140 const ENTRY_SIZE: u32 = 9;
140 141 const FILENAME_START: u32 = 1;
141 142 const COPY_SOURCE_START: u32 = 5;
142 143
143 144 pub fn new(data: &'a [u8]) -> Self {
144 145 assert!(
145 146 data.len() >= 4,
146 147 "data size ({}) is too small to contain the header (4)",
147 148 data.len()
148 149 );
149 150 let nb_items_raw: [u8; 4] = (&data[0..=3])
150 151 .try_into()
151 152 .expect("failed to turn 4 bytes into 4 bytes");
152 153 let nb_items = u32::from_be_bytes(nb_items_raw);
153 154
154 155 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
155 156 let index_end = Self::INDEX_START + index_size;
156 157
157 158 assert!(
158 159 data.len() >= index_end,
159 160 "data size ({}) is too small to fit the index_data ({})",
160 161 data.len(),
161 162 index_end
162 163 );
163 164
164 165 let ret = ChangedFiles {
165 166 nb_items,
166 167 index: &data[Self::INDEX_START..index_end],
167 168 data: &data[index_end..],
168 169 };
169 170 let max_data = ret.filename_end(nb_items - 1) as usize;
170 171 assert!(
171 172 ret.data.len() >= max_data,
172 173 "data size ({}) is too small to fit all data ({})",
173 174 data.len(),
174 175 index_end + max_data
175 176 );
176 177 ret
177 178 }
178 179
179 180 pub fn new_empty() -> Self {
180 181 ChangedFiles {
181 182 nb_items: 0,
182 183 index: EMPTY,
183 184 data: EMPTY,
184 185 }
185 186 }
186 187
187 188 /// internal function to return an individual entry at a given index
188 189 fn entry(&'a self, idx: u32) -> FileChange<'a> {
189 190 if idx >= self.nb_items {
190 191 panic!(
191 192 "index for entry is higher that the number of file {} >= {}",
192 193 idx, self.nb_items
193 194 )
194 195 }
195 196 let flags = self.flags(idx);
196 197 let filename = self.filename(idx);
197 198 let copy_idx = self.copy_idx(idx);
198 199 let copy_source = self.filename(copy_idx);
199 200 (flags, filename, copy_source)
200 201 }
201 202
202 203 /// internal function to return the filename of the entry at a given index
203 204 fn filename(&self, idx: u32) -> &HgPath {
204 205 let filename_start;
205 206 if idx == 0 {
206 207 filename_start = 0;
207 208 } else {
208 209 filename_start = self.filename_end(idx - 1)
209 210 }
210 211 let filename_end = self.filename_end(idx);
211 212 let filename_start = filename_start as usize;
212 213 let filename_end = filename_end as usize;
213 214 HgPath::new(&self.data[filename_start..filename_end])
214 215 }
215 216
216 217 /// internal function to return the flag field of the entry at a given
217 218 /// index
218 219 fn flags(&self, idx: u32) -> u8 {
219 220 let idx = idx as usize;
220 221 self.index[idx * (Self::ENTRY_SIZE as usize)]
221 222 }
222 223
223 224 /// internal function to return the end of a filename part at a given index
224 225 fn filename_end(&self, idx: u32) -> u32 {
225 226 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
226 227 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
227 228 let start = start as usize;
228 229 let end = end as usize;
229 230 let raw = (&self.index[start..end])
230 231 .try_into()
231 232 .expect("failed to turn 4 bytes into 4 bytes");
232 233 u32::from_be_bytes(raw)
233 234 }
234 235
235 236 /// internal function to return index of the copy source of the entry at a
236 237 /// given index
237 238 fn copy_idx(&self, idx: u32) -> u32 {
238 239 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
239 240 let end = (idx + 1) * Self::ENTRY_SIZE;
240 241 let start = start as usize;
241 242 let end = end as usize;
242 243 let raw = (&self.index[start..end])
243 244 .try_into()
244 245 .expect("failed to turn 4 bytes into 4 bytes");
245 246 u32::from_be_bytes(raw)
246 247 }
247 248
248 249 /// Return an iterator over all the `Action` in this instance.
249 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
250 fn iter_actions(&self) -> ActionsIterator {
250 251 ActionsIterator {
251 252 changes: &self,
252 parent: parent,
253 253 current: 0,
254 254 }
255 255 }
256 256
257 257 /// return the MergeCase value associated with a filename
258 258 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
259 259 if self.nb_items == 0 {
260 260 return MergeCase::Normal;
261 261 }
262 262 let mut low_part = 0;
263 263 let mut high_part = self.nb_items;
264 264
265 265 while low_part < high_part {
266 266 let cursor = (low_part + high_part - 1) / 2;
267 267 let (flags, filename, _source) = self.entry(cursor);
268 268 match path.cmp(filename) {
269 269 Ordering::Less => low_part = cursor + 1,
270 270 Ordering::Greater => high_part = cursor,
271 271 Ordering::Equal => {
272 272 return match flags & ACTION_MASK {
273 273 MERGED => MergeCase::Merged,
274 274 SALVAGED => MergeCase::Salvaged,
275 275 _ => MergeCase::Normal,
276 276 };
277 277 }
278 278 }
279 279 }
280 280 MergeCase::Normal
281 281 }
282 282 }
283 283
284 284 struct ActionsIterator<'a> {
285 285 changes: &'a ChangedFiles<'a>,
286 parent: Parent,
287 286 current: u32,
288 287 }
289 288
290 289 impl<'a> Iterator for ActionsIterator<'a> {
291 290 type Item = Action<'a>;
292 291
293 292 fn next(&mut self) -> Option<Action<'a>> {
294 let copy_flag = match self.parent {
295 Parent::FirstParent => P1_COPY,
296 Parent::SecondParent => P2_COPY,
297 };
298 293 while self.current < self.changes.nb_items {
299 294 let (flags, file, source) = self.changes.entry(self.current);
300 295 self.current += 1;
301 296 if (flags & ACTION_MASK) == REMOVED {
302 297 return Some(Action::Removed(file));
303 298 }
304 299 let copy = flags & COPY_MASK;
305 if copy == copy_flag {
306 return Some(Action::Copied(file, source));
300 if copy == P1_COPY {
301 return Some(Action::CopiedFromP1(file, source));
302 } else if copy == P2_COPY {
303 return Some(Action::CopiedFromP2(file, source));
307 304 }
308 305 }
309 306 return None;
310 307 }
311 308 }
312 309
313 310 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
314 311 /// ChangedFiles
315 312 ///
316 313 /// It is passed to the RevInfoMaker callback who can assign any necessary
317 314 /// content to the `data` attribute. The copy tracing code is responsible for
318 315 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
319 316 pub struct DataHolder<D> {
320 317 /// RevInfoMaker callback should assign data referenced by the
321 318 /// ChangedFiles struct it return to this attribute. The DataHolder
322 319 /// lifetime will be at least as long as the ChangedFiles one.
323 320 pub data: Option<D>,
324 321 }
325 322
326 323 pub type RevInfoMaker<'a, D> =
327 324 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
328 325
329 326 /// enum used to carry information about the parent β†’ child currently processed
330 327 #[derive(Copy, Clone, Debug)]
331 328 enum Parent {
332 329 /// The `p1(x) β†’ x` edge
333 330 FirstParent,
334 331 /// The `p2(x) β†’ x` edge
335 332 SecondParent,
336 333 }
337 334
338 335 /// A small "tokenizer" responsible of turning full HgPath into lighter
339 336 /// PathToken
340 337 ///
341 338 /// Dealing with small object, like integer is much faster, so HgPath input are
342 339 /// turned into integer "PathToken" and converted back in the end.
343 340 #[derive(Clone, Debug, Default)]
344 341 struct TwoWayPathMap {
345 342 token: HashMap<HgPathBuf, PathToken>,
346 343 path: Vec<HgPathBuf>,
347 344 }
348 345
349 346 impl TwoWayPathMap {
350 347 fn tokenize(&mut self, path: &HgPath) -> PathToken {
351 348 match self.token.get(path) {
352 349 Some(a) => *a,
353 350 None => {
354 351 let a = self.token.len();
355 352 let buf = path.to_owned();
356 353 self.path.push(buf.clone());
357 354 self.token.insert(buf, a);
358 355 a
359 356 }
360 357 }
361 358 }
362 359
363 360 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
364 361 assert!(token < self.path.len(), format!("Unknown token: {}", token));
365 362 &self.path[token]
366 363 }
367 364 }
368 365
369 366 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
370 367 ///
371 368 /// Arguments are:
372 369 ///
373 370 /// revs: all revisions to be considered
374 371 /// children: a {parent ? [childrens]} mapping
375 372 /// target_rev: the final revision we are combining copies to
376 373 /// rev_info(rev): callback to get revision information:
377 374 /// * first parent
378 375 /// * second parent
379 376 /// * ChangedFiles
380 377 /// isancestors(low_rev, high_rev): callback to check if a revision is an
381 378 /// ancestor of another
382 379 pub fn combine_changeset_copies<D>(
383 380 revs: Vec<Revision>,
384 381 mut children_count: HashMap<Revision, usize>,
385 382 target_rev: Revision,
386 383 rev_info: RevInfoMaker<D>,
387 384 ) -> PathCopies {
388 385 let mut all_copies = HashMap::new();
389 386
390 387 let mut path_map = TwoWayPathMap::default();
391 388
392 389 for rev in revs {
393 390 let mut d: DataHolder<D> = DataHolder { data: None };
394 391 let (p1, p2, changes) = rev_info(rev, &mut d);
395 392
396 393 // We will chain the copies information accumulated for the parent with
397 394 // the individual copies information the curent revision. Creating a
398 395 // new TimeStampedPath for each `rev` β†’ `children` vertex.
399 396 let mut copies: Option<InternalPathCopies> = None;
400 397 // Retrieve data computed in a previous iteration
401 398 let p1_copies = match p1 {
402 399 NULL_REVISION => None,
403 400 _ => get_and_clean_parent_copies(
404 401 &mut all_copies,
405 402 &mut children_count,
406 403 p1,
407 404 ), // will be None if the vertex is not to be traversed
408 405 };
409 406 let p2_copies = match p2 {
410 407 NULL_REVISION => None,
411 408 _ => get_and_clean_parent_copies(
412 409 &mut all_copies,
413 410 &mut children_count,
414 411 p2,
415 412 ), // will be None if the vertex is not to be traversed
416 413 };
417 414 // combine it with data for that revision
418 415 let p1_copies = match p1_copies {
419 416 None => None,
420 417 Some(parent_copies) => Some(add_from_changes(
421 418 &mut path_map,
422 419 &parent_copies,
423 420 &changes,
424 421 Parent::FirstParent,
425 422 rev,
426 423 )),
427 424 };
428 425 let p2_copies = match p2_copies {
429 426 None => None,
430 427 Some(parent_copies) => Some(add_from_changes(
431 428 &mut path_map,
432 429 &parent_copies,
433 430 &changes,
434 431 Parent::SecondParent,
435 432 rev,
436 433 )),
437 434 };
438 435 let copies = match (p1_copies, p2_copies) {
439 436 (None, None) => None,
440 437 (c, None) => c,
441 438 (None, c) => c,
442 439 (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
443 440 &path_map, rev, p2_copies, p1_copies, &changes,
444 441 )),
445 442 };
446 443 if let Some(c) = copies {
447 444 all_copies.insert(rev, c);
448 445 }
449 446 }
450 447
451 448 // Drop internal information (like the timestamp) and return the final
452 449 // mapping.
453 450 let tt_result = all_copies
454 451 .remove(&target_rev)
455 452 .expect("target revision was not processed");
456 453 let mut result = PathCopies::default();
457 454 for (dest, tt_source) in tt_result {
458 455 if let Some(path) = tt_source.path {
459 456 let path_dest = path_map.untokenize(dest).to_owned();
460 457 let path_path = path_map.untokenize(path).to_owned();
461 458 result.insert(path_dest, path_path);
462 459 }
463 460 }
464 461 result
465 462 }
466 463
467 464 /// fetch previous computed information
468 465 ///
469 466 /// If no other children are expected to need this information, we drop it from
470 467 /// the cache.
471 468 ///
472 469 /// If parent is not part of the set we are expected to walk, return None.
473 470 fn get_and_clean_parent_copies(
474 471 all_copies: &mut HashMap<Revision, InternalPathCopies>,
475 472 children_count: &mut HashMap<Revision, usize>,
476 473 parent_rev: Revision,
477 474 ) -> Option<InternalPathCopies> {
478 475 let count = children_count.get_mut(&parent_rev)?;
479 476 *count -= 1;
480 477 if *count == 0 {
481 478 match all_copies.remove(&parent_rev) {
482 479 Some(c) => Some(c),
483 480 None => Some(InternalPathCopies::default()),
484 481 }
485 482 } else {
486 483 match all_copies.get(&parent_rev) {
487 484 Some(c) => Some(c.clone()),
488 485 None => Some(InternalPathCopies::default()),
489 486 }
490 487 }
491 488 }
492 489
493 490 /// Combine ChangedFiles with some existing PathCopies information and return
494 491 /// the result
495 492 fn add_from_changes(
496 493 path_map: &mut TwoWayPathMap,
497 494 base_copies: &InternalPathCopies,
498 495 changes: &ChangedFiles,
499 496 parent: Parent,
500 497 current_rev: Revision,
501 498 ) -> InternalPathCopies {
502 499 let mut copies = base_copies.clone();
503 for action in changes.iter_actions(parent) {
500 for action in changes.iter_actions() {
504 501 match action {
505 Action::Copied(path_dest, path_source) => {
506 add_one_copy(
507 current_rev,
508 &mut path_map,
509 &mut copies,
510 &base_copies,
511 path_dest,
512 path_source,
513 );
502 Action::CopiedFromP1(path_dest, path_source) => {
503 match parent {
504 _ => (), // not the parent we are looking for
505 Parent::FirstParent => add_one_copy(
506 current_rev,
507 path_map,
508 &mut copies,
509 &base_copies,
510 path_dest,
511 path_source,
512 ),
513 };
514 }
515 Action::CopiedFromP2(path_dest, path_source) => {
516 match parent {
517 _ => (), //not the parent we are looking for
518 Parent::SecondParent => add_one_copy(
519 current_rev,
520 path_map,
521 &mut copies,
522 &base_copies,
523 path_dest,
524 path_source,
525 ),
526 };
514 527 }
515 528 Action::Removed(deleted_path) => {
516 529 // We must drop copy information for removed file.
517 530 //
518 531 // We need to explicitly record them as dropped to
519 532 // propagate this information when merging two
520 533 // InternalPathCopies object.
521 534 let deleted = path_map.tokenize(deleted_path);
522 535 copies.entry(deleted).and_modify(|old| {
523 536 old.mark_delete(current_rev);
524 537 });
525 538 }
526 539 }
527 540 }
528 541 copies
529 542 }
530 543
531 544 // insert one new copy information in an InternalPathCopies
532 545 //
533 546 // This deal with chaining and overwrite.
534 547 fn add_one_copy(
535 548 current_rev: Revision,
536 549 path_map: &mut TwoWayPathMap,
537 550 copies: &mut InternalPathCopies,
538 551 base_copies: &InternalPathCopies,
539 552 path_dest: &HgPath,
540 553 path_source: &HgPath,
541 554 ) {
542 555 let dest = path_map.tokenize(path_dest);
543 556 let source = path_map.tokenize(path_source);
544 557 let entry;
545 558 if let Some(v) = base_copies.get(&source) {
546 559 entry = match &v.path {
547 560 Some(path) => Some((*(path)).to_owned()),
548 561 None => Some(source.to_owned()),
549 562 }
550 563 } else {
551 564 entry = Some(source.to_owned());
552 565 }
553 566 // Each new entry is introduced by the children, we
554 567 // record this information as we will need it to take
555 568 // the right decision when merging conflicting copy
556 569 // information. See merge_copies_dict for details.
557 570 match copies.entry(dest) {
558 571 Entry::Vacant(slot) => {
559 572 let ttpc = CopySource::new(current_rev, entry);
560 573 slot.insert(ttpc);
561 574 }
562 575 Entry::Occupied(mut slot) => {
563 576 let ttpc = slot.get_mut();
564 577 ttpc.overwrite(current_rev, entry);
565 578 }
566 579 }
567 580 }
568 581
569 582 /// merge two copies-mapping together, minor and major
570 583 ///
571 584 /// In case of conflict, value from "major" will be picked, unless in some
572 585 /// cases. See inline documentation for details.
573 586 fn merge_copies_dict(
574 587 path_map: &TwoWayPathMap,
575 588 current_merge: Revision,
576 589 mut minor: InternalPathCopies,
577 590 mut major: InternalPathCopies,
578 591 changes: &ChangedFiles,
579 592 ) -> InternalPathCopies {
580 593 // This closure exist as temporary help while multiple developper are
581 594 // actively working on this code. Feel free to re-inline it once this
582 595 // code is more settled.
583 596 let cmp_value =
584 597 |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
585 598 compare_value(
586 599 path_map,
587 600 current_merge,
588 601 changes,
589 602 dest,
590 603 src_minor,
591 604 src_major,
592 605 )
593 606 };
594 607 if minor.is_empty() {
595 608 major
596 609 } else if major.is_empty() {
597 610 minor
598 611 } else if minor.len() * 2 < major.len() {
599 612 // Lets says we are merging two InternalPathCopies instance A and B.
600 613 //
601 614 // If A contains N items, the merge result will never contains more
602 615 // than N values differents than the one in A
603 616 //
604 617 // If B contains M items, with M > N, the merge result will always
605 618 // result in a minimum of M - N value differents than the on in
606 619 // A
607 620 //
608 621 // As a result, if N < (M-N), we know that simply iterating over A will
609 622 // yield less difference than iterating over the difference
610 623 // between A and B.
611 624 //
612 625 // This help performance a lot in case were a tiny
613 626 // InternalPathCopies is merged with a much larger one.
614 627 for (dest, src_minor) in minor {
615 628 let src_major = major.get(&dest);
616 629 match src_major {
617 630 None => {
618 631 major.insert(dest, src_minor);
619 632 }
620 633 Some(src_major) => {
621 634 let (pick, overwrite) =
622 635 cmp_value(&dest, &src_minor, src_major);
623 636 if overwrite {
624 637 let src = match pick {
625 638 MergePick::Major => CopySource::new_from_merge(
626 639 current_merge,
627 640 src_major,
628 641 &src_minor,
629 642 ),
630 643 MergePick::Minor => CopySource::new_from_merge(
631 644 current_merge,
632 645 &src_minor,
633 646 src_major,
634 647 ),
635 648 MergePick::Any => CopySource::new_from_merge(
636 649 current_merge,
637 650 src_major,
638 651 &src_minor,
639 652 ),
640 653 };
641 654 major.insert(dest, src);
642 655 } else {
643 656 match pick {
644 657 MergePick::Any | MergePick::Major => None,
645 658 MergePick::Minor => major.insert(dest, src_minor),
646 659 };
647 660 }
648 661 }
649 662 };
650 663 }
651 664 major
652 665 } else if major.len() * 2 < minor.len() {
653 666 // This use the same rational than the previous block.
654 667 // (Check previous block documentation for details.)
655 668 for (dest, src_major) in major {
656 669 let src_minor = minor.get(&dest);
657 670 match src_minor {
658 671 None => {
659 672 minor.insert(dest, src_major);
660 673 }
661 674 Some(src_minor) => {
662 675 let (pick, overwrite) =
663 676 cmp_value(&dest, src_minor, &src_major);
664 677 if overwrite {
665 678 let src = match pick {
666 679 MergePick::Major => CopySource::new_from_merge(
667 680 current_merge,
668 681 &src_major,
669 682 src_minor,
670 683 ),
671 684 MergePick::Minor => CopySource::new_from_merge(
672 685 current_merge,
673 686 src_minor,
674 687 &src_major,
675 688 ),
676 689 MergePick::Any => CopySource::new_from_merge(
677 690 current_merge,
678 691 &src_major,
679 692 src_minor,
680 693 ),
681 694 };
682 695 minor.insert(dest, src);
683 696 } else {
684 697 match pick {
685 698 MergePick::Any | MergePick::Minor => None,
686 699 MergePick::Major => minor.insert(dest, src_major),
687 700 };
688 701 }
689 702 }
690 703 };
691 704 }
692 705 minor
693 706 } else {
694 707 let mut override_minor = Vec::new();
695 708 let mut override_major = Vec::new();
696 709
697 710 let mut to_major = |k: &PathToken, v: &CopySource| {
698 711 override_major.push((k.clone(), v.clone()))
699 712 };
700 713 let mut to_minor = |k: &PathToken, v: &CopySource| {
701 714 override_minor.push((k.clone(), v.clone()))
702 715 };
703 716
704 717 // The diff function leverage detection of the identical subpart if
705 718 // minor and major has some common ancestors. This make it very
706 719 // fast is most case.
707 720 //
708 721 // In case where the two map are vastly different in size, the current
709 722 // approach is still slowish because the iteration will iterate over
710 723 // all the "exclusive" content of the larger on. This situation can be
711 724 // frequent when the subgraph of revision we are processing has a lot
712 725 // of roots. Each roots adding they own fully new map to the mix (and
713 726 // likely a small map, if the path from the root to the "main path" is
714 727 // small.
715 728 //
716 729 // We could do better by detecting such situation and processing them
717 730 // differently.
718 731 for d in minor.diff(&major) {
719 732 match d {
720 733 DiffItem::Add(k, v) => to_minor(k, v),
721 734 DiffItem::Remove(k, v) => to_major(k, v),
722 735 DiffItem::Update { old, new } => {
723 736 let (dest, src_major) = new;
724 737 let (_, src_minor) = old;
725 738 let (pick, overwrite) =
726 739 cmp_value(dest, src_minor, src_major);
727 740 if overwrite {
728 741 let src = match pick {
729 742 MergePick::Major => CopySource::new_from_merge(
730 743 current_merge,
731 744 src_major,
732 745 src_minor,
733 746 ),
734 747 MergePick::Minor => CopySource::new_from_merge(
735 748 current_merge,
736 749 src_minor,
737 750 src_major,
738 751 ),
739 752 MergePick::Any => CopySource::new_from_merge(
740 753 current_merge,
741 754 src_major,
742 755 src_minor,
743 756 ),
744 757 };
745 758 to_minor(dest, &src);
746 759 to_major(dest, &src);
747 760 } else {
748 761 match pick {
749 762 MergePick::Major => to_minor(dest, src_major),
750 763 MergePick::Minor => to_major(dest, src_minor),
751 764 // If the two entry are identical, no need to do
752 765 // anything (but diff should not have yield them)
753 766 MergePick::Any => unreachable!(),
754 767 }
755 768 }
756 769 }
757 770 };
758 771 }
759 772
760 773 let updates;
761 774 let mut result;
762 775 if override_major.is_empty() {
763 776 result = major
764 777 } else if override_minor.is_empty() {
765 778 result = minor
766 779 } else {
767 780 if override_minor.len() < override_major.len() {
768 781 updates = override_minor;
769 782 result = minor;
770 783 } else {
771 784 updates = override_major;
772 785 result = major;
773 786 }
774 787 for (k, v) in updates {
775 788 result.insert(k, v);
776 789 }
777 790 }
778 791 result
779 792 }
780 793 }
781 794
782 795 /// represent the side that should prevail when merging two
783 796 /// InternalPathCopies
784 797 enum MergePick {
785 798 /// The "major" (p1) side prevails
786 799 Major,
787 800 /// The "minor" (p2) side prevails
788 801 Minor,
789 802 /// Any side could be used (because they are the same)
790 803 Any,
791 804 }
792 805
793 806 /// decide which side prevails in case of conflicting values
794 807 #[allow(clippy::if_same_then_else)]
795 808 fn compare_value(
796 809 path_map: &TwoWayPathMap,
797 810 current_merge: Revision,
798 811 changes: &ChangedFiles,
799 812 dest: &PathToken,
800 813 src_minor: &CopySource,
801 814 src_major: &CopySource,
802 815 ) -> (MergePick, bool) {
803 816 if src_major.rev == current_merge {
804 817 if src_minor.rev == current_merge {
805 818 if src_major.path.is_none() {
806 819 // We cannot get different copy information for both p1 and p2
807 820 // from the same revision. Unless this was a
808 821 // deletion.
809 822 //
810 823 // However the deletion might come over different data on each
811 824 // branch.
812 825 let need_over = src_major.overwritten != src_minor.overwritten;
813 826 (MergePick::Any, need_over)
814 827 } else {
815 828 unreachable!();
816 829 }
817 830 } else {
818 831 // The last value comes the current merge, this value -will- win
819 832 // eventually.
820 833 (MergePick::Major, true)
821 834 }
822 835 } else if src_minor.rev == current_merge {
823 836 // The last value comes the current merge, this value -will- win
824 837 // eventually.
825 838 (MergePick::Minor, true)
826 839 } else if src_major.path == src_minor.path {
827 840 // we have the same value, but from other source;
828 841 if src_major.rev == src_minor.rev {
829 842 // If the two entry are identical, they are both valid
830 843 debug_assert!(src_minor.overwritten == src_minor.overwritten);
831 844 (MergePick::Any, false)
832 845 } else if src_major.is_overwritten_by(src_minor) {
833 846 (MergePick::Minor, false)
834 847 } else if src_minor.is_overwritten_by(src_major) {
835 848 (MergePick::Major, false)
836 849 } else {
837 850 (MergePick::Any, true)
838 851 }
839 852 } else if src_major.rev == src_minor.rev {
840 853 // We cannot get copy information for both p1 and p2 in the
841 854 // same rev. So this is the same value.
842 855 unreachable!(
843 856 "conflicting information from p1 and p2 in the same revision"
844 857 );
845 858 } else {
846 859 let dest_path = path_map.untokenize(*dest);
847 860 let action = changes.get_merge_case(dest_path);
848 861 if src_minor.path.is_some()
849 862 && src_major.path.is_none()
850 863 && action == MergeCase::Salvaged
851 864 {
852 865 // If the file is "deleted" in the major side but was
853 866 // salvaged by the merge, we keep the minor side alive
854 867 (MergePick::Minor, true)
855 868 } else if src_major.path.is_some()
856 869 && src_minor.path.is_none()
857 870 && action == MergeCase::Salvaged
858 871 {
859 872 // If the file is "deleted" in the minor side but was
860 873 // salvaged by the merge, unconditionnaly preserve the
861 874 // major side.
862 875 (MergePick::Major, true)
863 876 } else if src_minor.is_overwritten_by(src_major) {
864 877 // The information from the minor version are strictly older than
865 878 // the major version
866 879 if action == MergeCase::Merged {
867 880 // If the file was actively merged, its means some non-copy
868 881 // activity happened on the other branch. It
869 882 // mean the older copy information are still relevant.
870 883 //
871 884 // The major side wins such conflict.
872 885 (MergePick::Major, true)
873 886 } else {
874 887 // No activity on the minor branch, pick the newer one.
875 888 (MergePick::Major, false)
876 889 }
877 890 } else if src_major.is_overwritten_by(src_minor) {
878 891 if action == MergeCase::Merged {
879 892 // If the file was actively merged, its means some non-copy
880 893 // activity happened on the other branch. It
881 894 // mean the older copy information are still relevant.
882 895 //
883 896 // The major side wins such conflict.
884 897 (MergePick::Major, true)
885 898 } else {
886 899 // No activity on the minor branch, pick the newer one.
887 900 (MergePick::Minor, false)
888 901 }
889 902 } else if src_minor.path.is_none() {
890 903 // the minor side has no relevant information, pick the alive one
891 904 (MergePick::Major, true)
892 905 } else if src_major.path.is_none() {
893 906 // the major side has no relevant information, pick the alive one
894 907 (MergePick::Minor, true)
895 908 } else {
896 909 // by default the major side wins
897 910 (MergePick::Major, true)
898 911 }
899 912 }
900 913 }
General Comments 0
You need to be logged in to leave comments. Login now