##// END OF EJS Templates
copies-rust: make more use of the new comparison property...
marmoute -
r47325:8fcf07e6 default
parent child Browse files
Show More
@@ -1,947 +1,929
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 6 use im_rc::ordmap::DiffItem;
7 7 use im_rc::ordmap::Entry;
8 8 use im_rc::ordmap::OrdMap;
9 9
10 10 use std::cmp::Ordering;
11 11 use std::collections::HashMap;
12 12 use std::collections::HashSet;
13 13 use std::convert::TryInto;
14 14
15 15 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
16 16
17 17 type PathToken = usize;
18 18
19 19 #[derive(Clone, Debug)]
20 20 struct CopySource {
21 21 /// revision at which the copy information was added
22 22 rev: Revision,
23 23 /// the copy source, (Set to None in case of deletion of the associated
24 24 /// key)
25 25 path: Option<PathToken>,
26 26 /// a set of previous `CopySource.rev` value directly or indirectly
27 27 /// overwritten by this one.
28 28 overwritten: HashSet<Revision>,
29 29 }
30 30
31 31 impl CopySource {
32 32 /// create a new CopySource
33 33 ///
34 34 /// Use this when no previous copy source existed.
35 35 fn new(rev: Revision, path: Option<PathToken>) -> Self {
36 36 Self {
37 37 rev,
38 38 path,
39 39 overwritten: HashSet::new(),
40 40 }
41 41 }
42 42
43 43 /// create a new CopySource from merging two others
44 44 ///
45 45 /// Use this when merging two InternalPathCopies requires active merging of
46 46 /// some entries.
47 47 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
48 48 let mut overwritten = HashSet::new();
49 49 overwritten.extend(winner.overwritten.iter().copied());
50 50 overwritten.extend(loser.overwritten.iter().copied());
51 51 overwritten.insert(winner.rev);
52 52 overwritten.insert(loser.rev);
53 53 Self {
54 54 rev,
55 55 path: winner.path,
56 56 overwritten: overwritten,
57 57 }
58 58 }
59 59
60 60 /// Update the value of a pre-existing CopySource
61 61 ///
62 62 /// Use this when recording copy information from parent β†’ child edges
63 63 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
64 64 self.overwritten.insert(self.rev);
65 65 self.rev = rev;
66 66 self.path = path;
67 67 }
68 68
69 69 /// Mark pre-existing copy information as "dropped" by a file deletion
70 70 ///
71 71 /// Use this when recording copy information from parent β†’ child edges
72 72 fn mark_delete(&mut self, rev: Revision) {
73 73 self.overwritten.insert(self.rev);
74 74 self.rev = rev;
75 75 self.path = None;
76 76 }
77 77
78 78 /// Mark pre-existing copy information as "dropped" by a file deletion
79 79 ///
80 80 /// Use this when recording copy information from parent β†’ child edges
81 81 fn mark_delete_with_pair(&mut self, rev: Revision, other: &Self) {
82 82 self.overwritten.insert(self.rev);
83 83 if other.rev != rev {
84 84 self.overwritten.insert(other.rev);
85 85 }
86 86 self.overwritten.extend(other.overwritten.iter().copied());
87 87 self.rev = rev;
88 88 self.path = None;
89 89 }
90 90
91 91 fn is_overwritten_by(&self, other: &Self) -> bool {
92 92 other.overwritten.contains(&self.rev)
93 93 }
94 94 }
95 95
96 96 // For the same "dest", content generated for a given revision will always be
97 97 // the same.
98 98 impl PartialEq for CopySource {
99 99 fn eq(&self, other: &Self) -> bool {
100 100 #[cfg(debug_assertions)]
101 101 {
102 102 if self.rev == other.rev {
103 103 debug_assert!(self.path == other.path);
104 104 debug_assert!(self.overwritten == other.overwritten);
105 105 }
106 106 }
107 107 self.rev == other.rev
108 108 }
109 109 }
110 110
111 111 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
112 112 type InternalPathCopies = OrdMap<PathToken, CopySource>;
113 113
114 114 /// hold parent 1, parent 2 and relevant files actions.
115 115 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
116 116
117 117 /// represent the files affected by a changesets
118 118 ///
119 119 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
120 120 /// all the data categories tracked by it.
121 121 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
122 122 /// all the data categories tracked by it.
123 123 pub struct ChangedFiles<'a> {
124 124 nb_items: u32,
125 125 index: &'a [u8],
126 126 data: &'a [u8],
127 127 }
128 128
129 129 /// Represent active changes that affect the copy tracing.
130 130 enum Action<'a> {
131 131 /// The parent ? children edge is removing a file
132 132 ///
133 133 /// (actually, this could be the edge from the other parent, but it does
134 134 /// not matters)
135 135 Removed(&'a HgPath),
136 136 /// The parent ? children edge introduce copy information between (dest,
137 137 /// source)
138 138 CopiedFromP1(&'a HgPath, &'a HgPath),
139 139 CopiedFromP2(&'a HgPath, &'a HgPath),
140 140 }
141 141
142 142 /// This express the possible "special" case we can get in a merge
143 143 ///
144 144 /// See mercurial/metadata.py for details on these values.
145 145 #[derive(PartialEq)]
146 146 enum MergeCase {
147 147 /// Merged: file had history on both side that needed to be merged
148 148 Merged,
149 149 /// Salvaged: file was candidate for deletion, but survived the merge
150 150 Salvaged,
151 151 /// Normal: Not one of the two cases above
152 152 Normal,
153 153 }
154 154
155 155 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
156 156
157 157 const EMPTY: &[u8] = b"";
158 158 const COPY_MASK: u8 = 3;
159 159 const P1_COPY: u8 = 2;
160 160 const P2_COPY: u8 = 3;
161 161 const ACTION_MASK: u8 = 28;
162 162 const REMOVED: u8 = 12;
163 163 const MERGED: u8 = 8;
164 164 const SALVAGED: u8 = 16;
165 165
166 166 impl<'a> ChangedFiles<'a> {
167 167 const INDEX_START: usize = 4;
168 168 const ENTRY_SIZE: u32 = 9;
169 169 const FILENAME_START: u32 = 1;
170 170 const COPY_SOURCE_START: u32 = 5;
171 171
172 172 pub fn new(data: &'a [u8]) -> Self {
173 173 assert!(
174 174 data.len() >= 4,
175 175 "data size ({}) is too small to contain the header (4)",
176 176 data.len()
177 177 );
178 178 let nb_items_raw: [u8; 4] = (&data[0..=3])
179 179 .try_into()
180 180 .expect("failed to turn 4 bytes into 4 bytes");
181 181 let nb_items = u32::from_be_bytes(nb_items_raw);
182 182
183 183 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
184 184 let index_end = Self::INDEX_START + index_size;
185 185
186 186 assert!(
187 187 data.len() >= index_end,
188 188 "data size ({}) is too small to fit the index_data ({})",
189 189 data.len(),
190 190 index_end
191 191 );
192 192
193 193 let ret = ChangedFiles {
194 194 nb_items,
195 195 index: &data[Self::INDEX_START..index_end],
196 196 data: &data[index_end..],
197 197 };
198 198 let max_data = ret.filename_end(nb_items - 1) as usize;
199 199 assert!(
200 200 ret.data.len() >= max_data,
201 201 "data size ({}) is too small to fit all data ({})",
202 202 data.len(),
203 203 index_end + max_data
204 204 );
205 205 ret
206 206 }
207 207
208 208 pub fn new_empty() -> Self {
209 209 ChangedFiles {
210 210 nb_items: 0,
211 211 index: EMPTY,
212 212 data: EMPTY,
213 213 }
214 214 }
215 215
216 216 /// internal function to return an individual entry at a given index
217 217 fn entry(&'a self, idx: u32) -> FileChange<'a> {
218 218 if idx >= self.nb_items {
219 219 panic!(
220 220 "index for entry is higher that the number of file {} >= {}",
221 221 idx, self.nb_items
222 222 )
223 223 }
224 224 let flags = self.flags(idx);
225 225 let filename = self.filename(idx);
226 226 let copy_idx = self.copy_idx(idx);
227 227 let copy_source = self.filename(copy_idx);
228 228 (flags, filename, copy_source)
229 229 }
230 230
231 231 /// internal function to return the filename of the entry at a given index
232 232 fn filename(&self, idx: u32) -> &HgPath {
233 233 let filename_start;
234 234 if idx == 0 {
235 235 filename_start = 0;
236 236 } else {
237 237 filename_start = self.filename_end(idx - 1)
238 238 }
239 239 let filename_end = self.filename_end(idx);
240 240 let filename_start = filename_start as usize;
241 241 let filename_end = filename_end as usize;
242 242 HgPath::new(&self.data[filename_start..filename_end])
243 243 }
244 244
245 245 /// internal function to return the flag field of the entry at a given
246 246 /// index
247 247 fn flags(&self, idx: u32) -> u8 {
248 248 let idx = idx as usize;
249 249 self.index[idx * (Self::ENTRY_SIZE as usize)]
250 250 }
251 251
252 252 /// internal function to return the end of a filename part at a given index
253 253 fn filename_end(&self, idx: u32) -> u32 {
254 254 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
255 255 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
256 256 let start = start as usize;
257 257 let end = end as usize;
258 258 let raw = (&self.index[start..end])
259 259 .try_into()
260 260 .expect("failed to turn 4 bytes into 4 bytes");
261 261 u32::from_be_bytes(raw)
262 262 }
263 263
264 264 /// internal function to return index of the copy source of the entry at a
265 265 /// given index
266 266 fn copy_idx(&self, idx: u32) -> u32 {
267 267 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
268 268 let end = (idx + 1) * Self::ENTRY_SIZE;
269 269 let start = start as usize;
270 270 let end = end as usize;
271 271 let raw = (&self.index[start..end])
272 272 .try_into()
273 273 .expect("failed to turn 4 bytes into 4 bytes");
274 274 u32::from_be_bytes(raw)
275 275 }
276 276
277 277 /// Return an iterator over all the `Action` in this instance.
278 278 fn iter_actions(&self) -> ActionsIterator {
279 279 ActionsIterator {
280 280 changes: &self,
281 281 current: 0,
282 282 }
283 283 }
284 284
285 285 /// return the MergeCase value associated with a filename
286 286 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
287 287 if self.nb_items == 0 {
288 288 return MergeCase::Normal;
289 289 }
290 290 let mut low_part = 0;
291 291 let mut high_part = self.nb_items;
292 292
293 293 while low_part < high_part {
294 294 let cursor = (low_part + high_part - 1) / 2;
295 295 let (flags, filename, _source) = self.entry(cursor);
296 296 match path.cmp(filename) {
297 297 Ordering::Less => low_part = cursor + 1,
298 298 Ordering::Greater => high_part = cursor,
299 299 Ordering::Equal => {
300 300 return match flags & ACTION_MASK {
301 301 MERGED => MergeCase::Merged,
302 302 SALVAGED => MergeCase::Salvaged,
303 303 _ => MergeCase::Normal,
304 304 };
305 305 }
306 306 }
307 307 }
308 308 MergeCase::Normal
309 309 }
310 310 }
311 311
312 312 struct ActionsIterator<'a> {
313 313 changes: &'a ChangedFiles<'a>,
314 314 current: u32,
315 315 }
316 316
317 317 impl<'a> Iterator for ActionsIterator<'a> {
318 318 type Item = Action<'a>;
319 319
320 320 fn next(&mut self) -> Option<Action<'a>> {
321 321 while self.current < self.changes.nb_items {
322 322 let (flags, file, source) = self.changes.entry(self.current);
323 323 self.current += 1;
324 324 if (flags & ACTION_MASK) == REMOVED {
325 325 return Some(Action::Removed(file));
326 326 }
327 327 let copy = flags & COPY_MASK;
328 328 if copy == P1_COPY {
329 329 return Some(Action::CopiedFromP1(file, source));
330 330 } else if copy == P2_COPY {
331 331 return Some(Action::CopiedFromP2(file, source));
332 332 }
333 333 }
334 334 return None;
335 335 }
336 336 }
337 337
338 338 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
339 339 /// ChangedFiles
340 340 ///
341 341 /// It is passed to the RevInfoMaker callback who can assign any necessary
342 342 /// content to the `data` attribute. The copy tracing code is responsible for
343 343 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
344 344 pub struct DataHolder<D> {
345 345 /// RevInfoMaker callback should assign data referenced by the
346 346 /// ChangedFiles struct it return to this attribute. The DataHolder
347 347 /// lifetime will be at least as long as the ChangedFiles one.
348 348 pub data: Option<D>,
349 349 }
350 350
351 351 pub type RevInfoMaker<'a, D> =
352 352 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
353 353
354 354 /// A small "tokenizer" responsible of turning full HgPath into lighter
355 355 /// PathToken
356 356 ///
357 357 /// Dealing with small object, like integer is much faster, so HgPath input are
358 358 /// turned into integer "PathToken" and converted back in the end.
359 359 #[derive(Clone, Debug, Default)]
360 360 struct TwoWayPathMap {
361 361 token: HashMap<HgPathBuf, PathToken>,
362 362 path: Vec<HgPathBuf>,
363 363 }
364 364
365 365 impl TwoWayPathMap {
366 366 fn tokenize(&mut self, path: &HgPath) -> PathToken {
367 367 match self.token.get(path) {
368 368 Some(a) => *a,
369 369 None => {
370 370 let a = self.token.len();
371 371 let buf = path.to_owned();
372 372 self.path.push(buf.clone());
373 373 self.token.insert(buf, a);
374 374 a
375 375 }
376 376 }
377 377 }
378 378
379 379 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
380 380 assert!(token < self.path.len(), format!("Unknown token: {}", token));
381 381 &self.path[token]
382 382 }
383 383 }
384 384
385 385 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
386 386 ///
387 387 /// Arguments are:
388 388 ///
389 389 /// revs: all revisions to be considered
390 390 /// children: a {parent ? [childrens]} mapping
391 391 /// target_rev: the final revision we are combining copies to
392 392 /// rev_info(rev): callback to get revision information:
393 393 /// * first parent
394 394 /// * second parent
395 395 /// * ChangedFiles
396 396 /// isancestors(low_rev, high_rev): callback to check if a revision is an
397 397 /// ancestor of another
398 398 pub fn combine_changeset_copies<D>(
399 399 revs: Vec<Revision>,
400 400 mut children_count: HashMap<Revision, usize>,
401 401 target_rev: Revision,
402 402 rev_info: RevInfoMaker<D>,
403 403 ) -> PathCopies {
404 404 let mut all_copies = HashMap::new();
405 405
406 406 let mut path_map = TwoWayPathMap::default();
407 407
408 408 for rev in revs {
409 409 let mut d: DataHolder<D> = DataHolder { data: None };
410 410 let (p1, p2, changes) = rev_info(rev, &mut d);
411 411
412 412 // We will chain the copies information accumulated for the parent with
413 413 // the individual copies information the curent revision. Creating a
414 414 // new TimeStampedPath for each `rev` β†’ `children` vertex.
415 415 // Retrieve data computed in a previous iteration
416 416 let p1_copies = match p1 {
417 417 NULL_REVISION => None,
418 418 _ => get_and_clean_parent_copies(
419 419 &mut all_copies,
420 420 &mut children_count,
421 421 p1,
422 422 ), // will be None if the vertex is not to be traversed
423 423 };
424 424 let p2_copies = match p2 {
425 425 NULL_REVISION => None,
426 426 _ => get_and_clean_parent_copies(
427 427 &mut all_copies,
428 428 &mut children_count,
429 429 p2,
430 430 ), // will be None if the vertex is not to be traversed
431 431 };
432 432 // combine it with data for that revision
433 433 let (p1_copies, p2_copies) =
434 434 chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev);
435 435 let copies = match (p1_copies, p2_copies) {
436 436 (None, None) => None,
437 437 (c, None) => c,
438 438 (None, c) => c,
439 439 (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
440 440 &path_map, rev, p2_copies, p1_copies, &changes,
441 441 )),
442 442 };
443 443 if let Some(c) = copies {
444 444 all_copies.insert(rev, c);
445 445 }
446 446 }
447 447
448 448 // Drop internal information (like the timestamp) and return the final
449 449 // mapping.
450 450 let tt_result = all_copies
451 451 .remove(&target_rev)
452 452 .expect("target revision was not processed");
453 453 let mut result = PathCopies::default();
454 454 for (dest, tt_source) in tt_result {
455 455 if let Some(path) = tt_source.path {
456 456 let path_dest = path_map.untokenize(dest).to_owned();
457 457 let path_path = path_map.untokenize(path).to_owned();
458 458 result.insert(path_dest, path_path);
459 459 }
460 460 }
461 461 result
462 462 }
463 463
464 464 /// fetch previous computed information
465 465 ///
466 466 /// If no other children are expected to need this information, we drop it from
467 467 /// the cache.
468 468 ///
469 469 /// If parent is not part of the set we are expected to walk, return None.
470 470 fn get_and_clean_parent_copies(
471 471 all_copies: &mut HashMap<Revision, InternalPathCopies>,
472 472 children_count: &mut HashMap<Revision, usize>,
473 473 parent_rev: Revision,
474 474 ) -> Option<InternalPathCopies> {
475 475 let count = children_count.get_mut(&parent_rev)?;
476 476 *count -= 1;
477 477 if *count == 0 {
478 478 match all_copies.remove(&parent_rev) {
479 479 Some(c) => Some(c),
480 480 None => Some(InternalPathCopies::default()),
481 481 }
482 482 } else {
483 483 match all_copies.get(&parent_rev) {
484 484 Some(c) => Some(c.clone()),
485 485 None => Some(InternalPathCopies::default()),
486 486 }
487 487 }
488 488 }
489 489
490 490 /// Combine ChangedFiles with some existing PathCopies information and return
491 491 /// the result
492 492 fn chain_changes(
493 493 path_map: &mut TwoWayPathMap,
494 494 base_p1_copies: Option<InternalPathCopies>,
495 495 base_p2_copies: Option<InternalPathCopies>,
496 496 changes: &ChangedFiles,
497 497 current_rev: Revision,
498 498 ) -> (Option<InternalPathCopies>, Option<InternalPathCopies>) {
499 499 // Fast path the "nothing to do" case.
500 500 if let (None, None) = (&base_p1_copies, &base_p2_copies) {
501 501 return (None, None);
502 502 }
503 503
504 504 let mut p1_copies = base_p1_copies.clone();
505 505 let mut p2_copies = base_p2_copies.clone();
506 506 for action in changes.iter_actions() {
507 507 match action {
508 508 Action::CopiedFromP1(path_dest, path_source) => {
509 509 match &mut p1_copies {
510 510 None => (), // This is not a vertex we should proceed.
511 511 Some(copies) => add_one_copy(
512 512 current_rev,
513 513 path_map,
514 514 copies,
515 515 base_p1_copies.as_ref().unwrap(),
516 516 path_dest,
517 517 path_source,
518 518 ),
519 519 }
520 520 }
521 521 Action::CopiedFromP2(path_dest, path_source) => {
522 522 match &mut p2_copies {
523 523 None => (), // This is not a vertex we should proceed.
524 524 Some(copies) => add_one_copy(
525 525 current_rev,
526 526 path_map,
527 527 copies,
528 528 base_p2_copies.as_ref().unwrap(),
529 529 path_dest,
530 530 path_source,
531 531 ),
532 532 }
533 533 }
534 534 Action::Removed(deleted_path) => {
535 535 // We must drop copy information for removed file.
536 536 //
537 537 // We need to explicitly record them as dropped to
538 538 // propagate this information when merging two
539 539 // InternalPathCopies object.
540 540 let deleted = path_map.tokenize(deleted_path);
541 541
542 542 let p1_entry = match &mut p1_copies {
543 543 None => None,
544 544 Some(copies) => match copies.entry(deleted) {
545 545 Entry::Occupied(e) => Some(e),
546 546 Entry::Vacant(_) => None,
547 547 },
548 548 };
549 549 let p2_entry = match &mut p2_copies {
550 550 None => None,
551 551 Some(copies) => match copies.entry(deleted) {
552 552 Entry::Occupied(e) => Some(e),
553 553 Entry::Vacant(_) => None,
554 554 },
555 555 };
556 556
557 557 match (p1_entry, p2_entry) {
558 558 (None, None) => (),
559 559 (Some(mut e), None) => {
560 560 e.get_mut().mark_delete(current_rev)
561 561 }
562 562 (None, Some(mut e)) => {
563 563 e.get_mut().mark_delete(current_rev)
564 564 }
565 565 (Some(mut e1), Some(mut e2)) => {
566 566 let cs1 = e1.get_mut();
567 567 let cs2 = e2.get();
568 568 cs1.mark_delete_with_pair(current_rev, &cs2);
569 569 e2.insert(cs1.clone());
570 570 }
571 571 }
572 572 }
573 573 }
574 574 }
575 575 (p1_copies, p2_copies)
576 576 }
577 577
578 578 // insert one new copy information in an InternalPathCopies
579 579 //
580 580 // This deal with chaining and overwrite.
581 581 fn add_one_copy(
582 582 current_rev: Revision,
583 583 path_map: &mut TwoWayPathMap,
584 584 copies: &mut InternalPathCopies,
585 585 base_copies: &InternalPathCopies,
586 586 path_dest: &HgPath,
587 587 path_source: &HgPath,
588 588 ) {
589 589 let dest = path_map.tokenize(path_dest);
590 590 let source = path_map.tokenize(path_source);
591 591 let entry;
592 592 if let Some(v) = base_copies.get(&source) {
593 593 entry = match &v.path {
594 594 Some(path) => Some((*(path)).to_owned()),
595 595 None => Some(source.to_owned()),
596 596 }
597 597 } else {
598 598 entry = Some(source.to_owned());
599 599 }
600 600 // Each new entry is introduced by the children, we
601 601 // record this information as we will need it to take
602 602 // the right decision when merging conflicting copy
603 603 // information. See merge_copies_dict for details.
604 604 match copies.entry(dest) {
605 605 Entry::Vacant(slot) => {
606 606 let ttpc = CopySource::new(current_rev, entry);
607 607 slot.insert(ttpc);
608 608 }
609 609 Entry::Occupied(mut slot) => {
610 610 let ttpc = slot.get_mut();
611 611 ttpc.overwrite(current_rev, entry);
612 612 }
613 613 }
614 614 }
615 615
616 616 /// merge two copies-mapping together, minor and major
617 617 ///
618 618 /// In case of conflict, value from "major" will be picked, unless in some
619 619 /// cases. See inline documentation for details.
620 620 fn merge_copies_dict(
621 621 path_map: &TwoWayPathMap,
622 622 current_merge: Revision,
623 623 mut minor: InternalPathCopies,
624 624 mut major: InternalPathCopies,
625 625 changes: &ChangedFiles,
626 626 ) -> InternalPathCopies {
627 627 // This closure exist as temporary help while multiple developper are
628 628 // actively working on this code. Feel free to re-inline it once this
629 629 // code is more settled.
630 630 let cmp_value =
631 631 |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
632 632 compare_value(
633 633 path_map,
634 634 current_merge,
635 635 changes,
636 636 dest,
637 637 src_minor,
638 638 src_major,
639 639 )
640 640 };
641 641 if minor.is_empty() {
642 642 major
643 643 } else if major.is_empty() {
644 644 minor
645 645 } else if minor.len() * 2 < major.len() {
646 646 // Lets says we are merging two InternalPathCopies instance A and B.
647 647 //
648 648 // If A contains N items, the merge result will never contains more
649 649 // than N values differents than the one in A
650 650 //
651 651 // If B contains M items, with M > N, the merge result will always
652 652 // result in a minimum of M - N value differents than the on in
653 653 // A
654 654 //
655 655 // As a result, if N < (M-N), we know that simply iterating over A will
656 656 // yield less difference than iterating over the difference
657 657 // between A and B.
658 658 //
659 659 // This help performance a lot in case were a tiny
660 660 // InternalPathCopies is merged with a much larger one.
661 661 for (dest, src_minor) in minor {
662 662 let src_major = major.get(&dest);
663 663 match src_major {
664 664 None => {
665 665 major.insert(dest, src_minor);
666 666 }
667 667 Some(src_major) => {
668 668 let (pick, overwrite) =
669 669 cmp_value(&dest, &src_minor, src_major);
670 670 if overwrite {
671 671 let src = match pick {
672 672 MergePick::Major => CopySource::new_from_merge(
673 673 current_merge,
674 674 src_major,
675 675 &src_minor,
676 676 ),
677 677 MergePick::Minor => CopySource::new_from_merge(
678 678 current_merge,
679 679 &src_minor,
680 680 src_major,
681 681 ),
682 682 MergePick::Any => CopySource::new_from_merge(
683 683 current_merge,
684 684 src_major,
685 685 &src_minor,
686 686 ),
687 687 };
688 688 major.insert(dest, src);
689 689 } else {
690 690 match pick {
691 691 MergePick::Any | MergePick::Major => None,
692 692 MergePick::Minor => major.insert(dest, src_minor),
693 693 };
694 694 }
695 695 }
696 696 };
697 697 }
698 698 major
699 699 } else if major.len() * 2 < minor.len() {
700 700 // This use the same rational than the previous block.
701 701 // (Check previous block documentation for details.)
702 702 for (dest, src_major) in major {
703 703 let src_minor = minor.get(&dest);
704 704 match src_minor {
705 705 None => {
706 706 minor.insert(dest, src_major);
707 707 }
708 708 Some(src_minor) => {
709 709 let (pick, overwrite) =
710 710 cmp_value(&dest, src_minor, &src_major);
711 711 if overwrite {
712 712 let src = match pick {
713 713 MergePick::Major => CopySource::new_from_merge(
714 714 current_merge,
715 715 &src_major,
716 716 src_minor,
717 717 ),
718 718 MergePick::Minor => CopySource::new_from_merge(
719 719 current_merge,
720 720 src_minor,
721 721 &src_major,
722 722 ),
723 723 MergePick::Any => CopySource::new_from_merge(
724 724 current_merge,
725 725 &src_major,
726 726 src_minor,
727 727 ),
728 728 };
729 729 minor.insert(dest, src);
730 730 } else {
731 731 match pick {
732 732 MergePick::Any | MergePick::Minor => None,
733 733 MergePick::Major => minor.insert(dest, src_major),
734 734 };
735 735 }
736 736 }
737 737 };
738 738 }
739 739 minor
740 740 } else {
741 741 let mut override_minor = Vec::new();
742 742 let mut override_major = Vec::new();
743 743
744 744 let mut to_major = |k: &PathToken, v: &CopySource| {
745 745 override_major.push((k.clone(), v.clone()))
746 746 };
747 747 let mut to_minor = |k: &PathToken, v: &CopySource| {
748 748 override_minor.push((k.clone(), v.clone()))
749 749 };
750 750
751 751 // The diff function leverage detection of the identical subpart if
752 752 // minor and major has some common ancestors. This make it very
753 753 // fast is most case.
754 754 //
755 755 // In case where the two map are vastly different in size, the current
756 756 // approach is still slowish because the iteration will iterate over
757 757 // all the "exclusive" content of the larger on. This situation can be
758 758 // frequent when the subgraph of revision we are processing has a lot
759 759 // of roots. Each roots adding they own fully new map to the mix (and
760 760 // likely a small map, if the path from the root to the "main path" is
761 761 // small.
762 762 //
763 763 // We could do better by detecting such situation and processing them
764 764 // differently.
765 765 for d in minor.diff(&major) {
766 766 match d {
767 767 DiffItem::Add(k, v) => to_minor(k, v),
768 768 DiffItem::Remove(k, v) => to_major(k, v),
769 769 DiffItem::Update { old, new } => {
770 770 let (dest, src_major) = new;
771 771 let (_, src_minor) = old;
772 772 let (pick, overwrite) =
773 773 cmp_value(dest, src_minor, src_major);
774 774 if overwrite {
775 775 let src = match pick {
776 776 MergePick::Major => CopySource::new_from_merge(
777 777 current_merge,
778 778 src_major,
779 779 src_minor,
780 780 ),
781 781 MergePick::Minor => CopySource::new_from_merge(
782 782 current_merge,
783 783 src_minor,
784 784 src_major,
785 785 ),
786 786 MergePick::Any => CopySource::new_from_merge(
787 787 current_merge,
788 788 src_major,
789 789 src_minor,
790 790 ),
791 791 };
792 792 to_minor(dest, &src);
793 793 to_major(dest, &src);
794 794 } else {
795 795 match pick {
796 796 MergePick::Major => to_minor(dest, src_major),
797 797 MergePick::Minor => to_major(dest, src_minor),
798 798 // If the two entry are identical, no need to do
799 799 // anything (but diff should not have yield them)
800 800 MergePick::Any => unreachable!(),
801 801 }
802 802 }
803 803 }
804 804 };
805 805 }
806 806
807 807 let updates;
808 808 let mut result;
809 809 if override_major.is_empty() {
810 810 result = major
811 811 } else if override_minor.is_empty() {
812 812 result = minor
813 813 } else {
814 814 if override_minor.len() < override_major.len() {
815 815 updates = override_minor;
816 816 result = minor;
817 817 } else {
818 818 updates = override_major;
819 819 result = major;
820 820 }
821 821 for (k, v) in updates {
822 822 result.insert(k, v);
823 823 }
824 824 }
825 825 result
826 826 }
827 827 }
828 828
829 829 /// represent the side that should prevail when merging two
830 830 /// InternalPathCopies
831 831 enum MergePick {
832 832 /// The "major" (p1) side prevails
833 833 Major,
834 834 /// The "minor" (p2) side prevails
835 835 Minor,
836 836 /// Any side could be used (because they are the same)
837 837 Any,
838 838 }
839 839
840 840 /// decide which side prevails in case of conflicting values
841 841 #[allow(clippy::if_same_then_else)]
842 842 fn compare_value(
843 843 path_map: &TwoWayPathMap,
844 844 current_merge: Revision,
845 845 changes: &ChangedFiles,
846 846 dest: &PathToken,
847 847 src_minor: &CopySource,
848 848 src_major: &CopySource,
849 849 ) -> (MergePick, bool) {
850 if src_major.rev == current_merge {
851 if src_minor.rev == current_merge {
852 if src_major.path.is_none() {
853 // We cannot get different copy information for both p1 and p2
854 // from the same revision. Unless this was a
855 // deletion.
856 //
857 // However the deletion might come over different data on each
858 // branch.
859 let need_over = src_major.overwritten != src_minor.overwritten;
860 (MergePick::Any, need_over)
861 } else {
862 unreachable!();
863 }
864 } else {
865 // The last value comes the current merge, this value -will- win
866 // eventually.
867 (MergePick::Major, true)
868 }
850 if src_major == src_minor {
851 (MergePick::Any, false)
852 } else if src_major.rev == current_merge {
853 // minor is different according to per minor == major check earlier
854 debug_assert!(src_minor.rev != current_merge);
855
856 // The last value comes the current merge, this value -will- win
857 // eventually.
858 (MergePick::Major, true)
869 859 } else if src_minor.rev == current_merge {
870 860 // The last value comes the current merge, this value -will- win
871 861 // eventually.
872 862 (MergePick::Minor, true)
873 863 } else if src_major.path == src_minor.path {
864 debug_assert!(src_major.rev != src_major.rev);
874 865 // we have the same value, but from other source;
875 if src_major.rev == src_minor.rev {
876 // If the two entry are identical, they are both valid
877 debug_assert!(src_minor.overwritten == src_minor.overwritten);
878 (MergePick::Any, false)
879 } else if src_major.is_overwritten_by(src_minor) {
866 if src_major.is_overwritten_by(src_minor) {
880 867 (MergePick::Minor, false)
881 868 } else if src_minor.is_overwritten_by(src_major) {
882 869 (MergePick::Major, false)
883 870 } else {
884 871 (MergePick::Any, true)
885 872 }
886 } else if src_major.rev == src_minor.rev {
887 // We cannot get copy information for both p1 and p2 in the
888 // same rev. So this is the same value.
889 unreachable!(
890 "conflicting information from p1 and p2 in the same revision"
891 );
892 873 } else {
874 debug_assert!(src_major.rev != src_major.rev);
893 875 let dest_path = path_map.untokenize(*dest);
894 876 let action = changes.get_merge_case(dest_path);
895 877 if src_minor.path.is_some()
896 878 && src_major.path.is_none()
897 879 && action == MergeCase::Salvaged
898 880 {
899 881 // If the file is "deleted" in the major side but was
900 882 // salvaged by the merge, we keep the minor side alive
901 883 (MergePick::Minor, true)
902 884 } else if src_major.path.is_some()
903 885 && src_minor.path.is_none()
904 886 && action == MergeCase::Salvaged
905 887 {
906 888 // If the file is "deleted" in the minor side but was
907 889 // salvaged by the merge, unconditionnaly preserve the
908 890 // major side.
909 891 (MergePick::Major, true)
910 892 } else if src_minor.is_overwritten_by(src_major) {
911 893 // The information from the minor version are strictly older than
912 894 // the major version
913 895 if action == MergeCase::Merged {
914 896 // If the file was actively merged, its means some non-copy
915 897 // activity happened on the other branch. It
916 898 // mean the older copy information are still relevant.
917 899 //
918 900 // The major side wins such conflict.
919 901 (MergePick::Major, true)
920 902 } else {
921 903 // No activity on the minor branch, pick the newer one.
922 904 (MergePick::Major, false)
923 905 }
924 906 } else if src_major.is_overwritten_by(src_minor) {
925 907 if action == MergeCase::Merged {
926 908 // If the file was actively merged, its means some non-copy
927 909 // activity happened on the other branch. It
928 910 // mean the older copy information are still relevant.
929 911 //
930 912 // The major side wins such conflict.
931 913 (MergePick::Major, true)
932 914 } else {
933 915 // No activity on the minor branch, pick the newer one.
934 916 (MergePick::Minor, false)
935 917 }
936 918 } else if src_minor.path.is_none() {
937 919 // the minor side has no relevant information, pick the alive one
938 920 (MergePick::Major, true)
939 921 } else if src_major.path.is_none() {
940 922 // the major side has no relevant information, pick the alive one
941 923 (MergePick::Minor, true)
942 924 } else {
943 925 // by default the major side wins
944 926 (MergePick::Major, true)
945 927 }
946 928 }
947 929 }
General Comments 0
You need to be logged in to leave comments. Login now