##// END OF EJS Templates
copies-rust: use simpler overwrite when value on both side are identical...
marmoute -
r47326:aa19d60a default
parent child Browse files
Show More
@@ -1,929 +1,933 b''
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 6 use im_rc::ordmap::DiffItem;
7 7 use im_rc::ordmap::Entry;
8 8 use im_rc::ordmap::OrdMap;
9 9
10 10 use std::cmp::Ordering;
11 11 use std::collections::HashMap;
12 12 use std::collections::HashSet;
13 13 use std::convert::TryInto;
14 14
15 15 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
16 16
17 17 type PathToken = usize;
18 18
19 19 #[derive(Clone, Debug)]
20 20 struct CopySource {
21 21 /// revision at which the copy information was added
22 22 rev: Revision,
23 23 /// the copy source, (Set to None in case of deletion of the associated
24 24 /// key)
25 25 path: Option<PathToken>,
26 26 /// a set of previous `CopySource.rev` value directly or indirectly
27 27 /// overwritten by this one.
28 28 overwritten: HashSet<Revision>,
29 29 }
30 30
31 31 impl CopySource {
32 32 /// create a new CopySource
33 33 ///
34 34 /// Use this when no previous copy source existed.
35 35 fn new(rev: Revision, path: Option<PathToken>) -> Self {
36 36 Self {
37 37 rev,
38 38 path,
39 39 overwritten: HashSet::new(),
40 40 }
41 41 }
42 42
43 43 /// create a new CopySource from merging two others
44 44 ///
45 45 /// Use this when merging two InternalPathCopies requires active merging of
46 46 /// some entries.
47 47 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
48 48 let mut overwritten = HashSet::new();
49 49 overwritten.extend(winner.overwritten.iter().copied());
50 50 overwritten.extend(loser.overwritten.iter().copied());
51 51 overwritten.insert(winner.rev);
52 52 overwritten.insert(loser.rev);
53 53 Self {
54 54 rev,
55 55 path: winner.path,
56 56 overwritten: overwritten,
57 57 }
58 58 }
59 59
60 60 /// Update the value of a pre-existing CopySource
61 61 ///
62 62 /// Use this when recording copy information from parent → child edges
63 63 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
64 64 self.overwritten.insert(self.rev);
65 65 self.rev = rev;
66 66 self.path = path;
67 67 }
68 68
69 69 /// Mark pre-existing copy information as "dropped" by a file deletion
70 70 ///
71 71 /// Use this when recording copy information from parent → child edges
72 72 fn mark_delete(&mut self, rev: Revision) {
73 73 self.overwritten.insert(self.rev);
74 74 self.rev = rev;
75 75 self.path = None;
76 76 }
77 77
78 78 /// Mark pre-existing copy information as "dropped" by a file deletion
79 79 ///
80 80 /// Use this when recording copy information from parent → child edges
81 81 fn mark_delete_with_pair(&mut self, rev: Revision, other: &Self) {
82 82 self.overwritten.insert(self.rev);
83 83 if other.rev != rev {
84 84 self.overwritten.insert(other.rev);
85 85 }
86 86 self.overwritten.extend(other.overwritten.iter().copied());
87 87 self.rev = rev;
88 88 self.path = None;
89 89 }
90 90
91 91 fn is_overwritten_by(&self, other: &Self) -> bool {
92 92 other.overwritten.contains(&self.rev)
93 93 }
94 94 }
95 95
96 96 // For the same "dest", content generated for a given revision will always be
97 97 // the same.
98 98 impl PartialEq for CopySource {
99 99 fn eq(&self, other: &Self) -> bool {
100 100 #[cfg(debug_assertions)]
101 101 {
102 102 if self.rev == other.rev {
103 103 debug_assert!(self.path == other.path);
104 104 debug_assert!(self.overwritten == other.overwritten);
105 105 }
106 106 }
107 107 self.rev == other.rev
108 108 }
109 109 }
110 110
111 111 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
112 112 type InternalPathCopies = OrdMap<PathToken, CopySource>;
113 113
114 114 /// hold parent 1, parent 2 and relevant files actions.
115 115 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
116 116
117 117 /// represent the files affected by a changesets
118 118 ///
119 119 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
120 120 /// all the data categories tracked by it.
121 121 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
122 122 /// all the data categories tracked by it.
123 123 pub struct ChangedFiles<'a> {
124 124 nb_items: u32,
125 125 index: &'a [u8],
126 126 data: &'a [u8],
127 127 }
128 128
129 129 /// Represent active changes that affect the copy tracing.
130 130 enum Action<'a> {
131 131 /// The parent ? children edge is removing a file
132 132 ///
133 133 /// (actually, this could be the edge from the other parent, but it does
134 134 /// not matters)
135 135 Removed(&'a HgPath),
136 136 /// The parent ? children edge introduce copy information between (dest,
137 137 /// source)
138 138 CopiedFromP1(&'a HgPath, &'a HgPath),
139 139 CopiedFromP2(&'a HgPath, &'a HgPath),
140 140 }
141 141
142 142 /// This express the possible "special" case we can get in a merge
143 143 ///
144 144 /// See mercurial/metadata.py for details on these values.
145 145 #[derive(PartialEq)]
146 146 enum MergeCase {
147 147 /// Merged: file had history on both side that needed to be merged
148 148 Merged,
149 149 /// Salvaged: file was candidate for deletion, but survived the merge
150 150 Salvaged,
151 151 /// Normal: Not one of the two cases above
152 152 Normal,
153 153 }
154 154
155 155 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
156 156
157 157 const EMPTY: &[u8] = b"";
158 158 const COPY_MASK: u8 = 3;
159 159 const P1_COPY: u8 = 2;
160 160 const P2_COPY: u8 = 3;
161 161 const ACTION_MASK: u8 = 28;
162 162 const REMOVED: u8 = 12;
163 163 const MERGED: u8 = 8;
164 164 const SALVAGED: u8 = 16;
165 165
166 166 impl<'a> ChangedFiles<'a> {
167 167 const INDEX_START: usize = 4;
168 168 const ENTRY_SIZE: u32 = 9;
169 169 const FILENAME_START: u32 = 1;
170 170 const COPY_SOURCE_START: u32 = 5;
171 171
172 172 pub fn new(data: &'a [u8]) -> Self {
173 173 assert!(
174 174 data.len() >= 4,
175 175 "data size ({}) is too small to contain the header (4)",
176 176 data.len()
177 177 );
178 178 let nb_items_raw: [u8; 4] = (&data[0..=3])
179 179 .try_into()
180 180 .expect("failed to turn 4 bytes into 4 bytes");
181 181 let nb_items = u32::from_be_bytes(nb_items_raw);
182 182
183 183 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
184 184 let index_end = Self::INDEX_START + index_size;
185 185
186 186 assert!(
187 187 data.len() >= index_end,
188 188 "data size ({}) is too small to fit the index_data ({})",
189 189 data.len(),
190 190 index_end
191 191 );
192 192
193 193 let ret = ChangedFiles {
194 194 nb_items,
195 195 index: &data[Self::INDEX_START..index_end],
196 196 data: &data[index_end..],
197 197 };
198 198 let max_data = ret.filename_end(nb_items - 1) as usize;
199 199 assert!(
200 200 ret.data.len() >= max_data,
201 201 "data size ({}) is too small to fit all data ({})",
202 202 data.len(),
203 203 index_end + max_data
204 204 );
205 205 ret
206 206 }
207 207
208 208 pub fn new_empty() -> Self {
209 209 ChangedFiles {
210 210 nb_items: 0,
211 211 index: EMPTY,
212 212 data: EMPTY,
213 213 }
214 214 }
215 215
216 216 /// internal function to return an individual entry at a given index
217 217 fn entry(&'a self, idx: u32) -> FileChange<'a> {
218 218 if idx >= self.nb_items {
219 219 panic!(
220 220 "index for entry is higher that the number of file {} >= {}",
221 221 idx, self.nb_items
222 222 )
223 223 }
224 224 let flags = self.flags(idx);
225 225 let filename = self.filename(idx);
226 226 let copy_idx = self.copy_idx(idx);
227 227 let copy_source = self.filename(copy_idx);
228 228 (flags, filename, copy_source)
229 229 }
230 230
231 231 /// internal function to return the filename of the entry at a given index
232 232 fn filename(&self, idx: u32) -> &HgPath {
233 233 let filename_start;
234 234 if idx == 0 {
235 235 filename_start = 0;
236 236 } else {
237 237 filename_start = self.filename_end(idx - 1)
238 238 }
239 239 let filename_end = self.filename_end(idx);
240 240 let filename_start = filename_start as usize;
241 241 let filename_end = filename_end as usize;
242 242 HgPath::new(&self.data[filename_start..filename_end])
243 243 }
244 244
245 245 /// internal function to return the flag field of the entry at a given
246 246 /// index
247 247 fn flags(&self, idx: u32) -> u8 {
248 248 let idx = idx as usize;
249 249 self.index[idx * (Self::ENTRY_SIZE as usize)]
250 250 }
251 251
252 252 /// internal function to return the end of a filename part at a given index
253 253 fn filename_end(&self, idx: u32) -> u32 {
254 254 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
255 255 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
256 256 let start = start as usize;
257 257 let end = end as usize;
258 258 let raw = (&self.index[start..end])
259 259 .try_into()
260 260 .expect("failed to turn 4 bytes into 4 bytes");
261 261 u32::from_be_bytes(raw)
262 262 }
263 263
264 264 /// internal function to return index of the copy source of the entry at a
265 265 /// given index
266 266 fn copy_idx(&self, idx: u32) -> u32 {
267 267 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
268 268 let end = (idx + 1) * Self::ENTRY_SIZE;
269 269 let start = start as usize;
270 270 let end = end as usize;
271 271 let raw = (&self.index[start..end])
272 272 .try_into()
273 273 .expect("failed to turn 4 bytes into 4 bytes");
274 274 u32::from_be_bytes(raw)
275 275 }
276 276
277 277 /// Return an iterator over all the `Action` in this instance.
278 278 fn iter_actions(&self) -> ActionsIterator {
279 279 ActionsIterator {
280 280 changes: &self,
281 281 current: 0,
282 282 }
283 283 }
284 284
285 285 /// return the MergeCase value associated with a filename
286 286 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
287 287 if self.nb_items == 0 {
288 288 return MergeCase::Normal;
289 289 }
290 290 let mut low_part = 0;
291 291 let mut high_part = self.nb_items;
292 292
293 293 while low_part < high_part {
294 294 let cursor = (low_part + high_part - 1) / 2;
295 295 let (flags, filename, _source) = self.entry(cursor);
296 296 match path.cmp(filename) {
297 297 Ordering::Less => low_part = cursor + 1,
298 298 Ordering::Greater => high_part = cursor,
299 299 Ordering::Equal => {
300 300 return match flags & ACTION_MASK {
301 301 MERGED => MergeCase::Merged,
302 302 SALVAGED => MergeCase::Salvaged,
303 303 _ => MergeCase::Normal,
304 304 };
305 305 }
306 306 }
307 307 }
308 308 MergeCase::Normal
309 309 }
310 310 }
311 311
312 312 struct ActionsIterator<'a> {
313 313 changes: &'a ChangedFiles<'a>,
314 314 current: u32,
315 315 }
316 316
317 317 impl<'a> Iterator for ActionsIterator<'a> {
318 318 type Item = Action<'a>;
319 319
320 320 fn next(&mut self) -> Option<Action<'a>> {
321 321 while self.current < self.changes.nb_items {
322 322 let (flags, file, source) = self.changes.entry(self.current);
323 323 self.current += 1;
324 324 if (flags & ACTION_MASK) == REMOVED {
325 325 return Some(Action::Removed(file));
326 326 }
327 327 let copy = flags & COPY_MASK;
328 328 if copy == P1_COPY {
329 329 return Some(Action::CopiedFromP1(file, source));
330 330 } else if copy == P2_COPY {
331 331 return Some(Action::CopiedFromP2(file, source));
332 332 }
333 333 }
334 334 return None;
335 335 }
336 336 }
337 337
338 338 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
339 339 /// ChangedFiles
340 340 ///
341 341 /// It is passed to the RevInfoMaker callback who can assign any necessary
342 342 /// content to the `data` attribute. The copy tracing code is responsible for
343 343 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
344 344 pub struct DataHolder<D> {
345 345 /// RevInfoMaker callback should assign data referenced by the
346 346 /// ChangedFiles struct it return to this attribute. The DataHolder
347 347 /// lifetime will be at least as long as the ChangedFiles one.
348 348 pub data: Option<D>,
349 349 }
350 350
351 351 pub type RevInfoMaker<'a, D> =
352 352 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
353 353
354 354 /// A small "tokenizer" responsible of turning full HgPath into lighter
355 355 /// PathToken
356 356 ///
357 357 /// Dealing with small object, like integer is much faster, so HgPath input are
358 358 /// turned into integer "PathToken" and converted back in the end.
359 359 #[derive(Clone, Debug, Default)]
360 360 struct TwoWayPathMap {
361 361 token: HashMap<HgPathBuf, PathToken>,
362 362 path: Vec<HgPathBuf>,
363 363 }
364 364
365 365 impl TwoWayPathMap {
366 366 fn tokenize(&mut self, path: &HgPath) -> PathToken {
367 367 match self.token.get(path) {
368 368 Some(a) => *a,
369 369 None => {
370 370 let a = self.token.len();
371 371 let buf = path.to_owned();
372 372 self.path.push(buf.clone());
373 373 self.token.insert(buf, a);
374 374 a
375 375 }
376 376 }
377 377 }
378 378
379 379 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
380 380 assert!(token < self.path.len(), format!("Unknown token: {}", token));
381 381 &self.path[token]
382 382 }
383 383 }
384 384
385 385 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
386 386 ///
387 387 /// Arguments are:
388 388 ///
389 389 /// revs: all revisions to be considered
390 390 /// children: a {parent ? [childrens]} mapping
391 391 /// target_rev: the final revision we are combining copies to
392 392 /// rev_info(rev): callback to get revision information:
393 393 /// * first parent
394 394 /// * second parent
395 395 /// * ChangedFiles
396 396 /// isancestors(low_rev, high_rev): callback to check if a revision is an
397 397 /// ancestor of another
398 398 pub fn combine_changeset_copies<D>(
399 399 revs: Vec<Revision>,
400 400 mut children_count: HashMap<Revision, usize>,
401 401 target_rev: Revision,
402 402 rev_info: RevInfoMaker<D>,
403 403 ) -> PathCopies {
404 404 let mut all_copies = HashMap::new();
405 405
406 406 let mut path_map = TwoWayPathMap::default();
407 407
408 408 for rev in revs {
409 409 let mut d: DataHolder<D> = DataHolder { data: None };
410 410 let (p1, p2, changes) = rev_info(rev, &mut d);
411 411
412 412 // We will chain the copies information accumulated for the parent with
413 413 // the individual copies information the curent revision. Creating a
414 414 // new TimeStampedPath for each `rev` → `children` vertex.
415 415 // Retrieve data computed in a previous iteration
416 416 let p1_copies = match p1 {
417 417 NULL_REVISION => None,
418 418 _ => get_and_clean_parent_copies(
419 419 &mut all_copies,
420 420 &mut children_count,
421 421 p1,
422 422 ), // will be None if the vertex is not to be traversed
423 423 };
424 424 let p2_copies = match p2 {
425 425 NULL_REVISION => None,
426 426 _ => get_and_clean_parent_copies(
427 427 &mut all_copies,
428 428 &mut children_count,
429 429 p2,
430 430 ), // will be None if the vertex is not to be traversed
431 431 };
432 432 // combine it with data for that revision
433 433 let (p1_copies, p2_copies) =
434 434 chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev);
435 435 let copies = match (p1_copies, p2_copies) {
436 436 (None, None) => None,
437 437 (c, None) => c,
438 438 (None, c) => c,
439 439 (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
440 440 &path_map, rev, p2_copies, p1_copies, &changes,
441 441 )),
442 442 };
443 443 if let Some(c) = copies {
444 444 all_copies.insert(rev, c);
445 445 }
446 446 }
447 447
448 448 // Drop internal information (like the timestamp) and return the final
449 449 // mapping.
450 450 let tt_result = all_copies
451 451 .remove(&target_rev)
452 452 .expect("target revision was not processed");
453 453 let mut result = PathCopies::default();
454 454 for (dest, tt_source) in tt_result {
455 455 if let Some(path) = tt_source.path {
456 456 let path_dest = path_map.untokenize(dest).to_owned();
457 457 let path_path = path_map.untokenize(path).to_owned();
458 458 result.insert(path_dest, path_path);
459 459 }
460 460 }
461 461 result
462 462 }
463 463
464 464 /// fetch previous computed information
465 465 ///
466 466 /// If no other children are expected to need this information, we drop it from
467 467 /// the cache.
468 468 ///
469 469 /// If parent is not part of the set we are expected to walk, return None.
470 470 fn get_and_clean_parent_copies(
471 471 all_copies: &mut HashMap<Revision, InternalPathCopies>,
472 472 children_count: &mut HashMap<Revision, usize>,
473 473 parent_rev: Revision,
474 474 ) -> Option<InternalPathCopies> {
475 475 let count = children_count.get_mut(&parent_rev)?;
476 476 *count -= 1;
477 477 if *count == 0 {
478 478 match all_copies.remove(&parent_rev) {
479 479 Some(c) => Some(c),
480 480 None => Some(InternalPathCopies::default()),
481 481 }
482 482 } else {
483 483 match all_copies.get(&parent_rev) {
484 484 Some(c) => Some(c.clone()),
485 485 None => Some(InternalPathCopies::default()),
486 486 }
487 487 }
488 488 }
489 489
490 490 /// Combine ChangedFiles with some existing PathCopies information and return
491 491 /// the result
492 492 fn chain_changes(
493 493 path_map: &mut TwoWayPathMap,
494 494 base_p1_copies: Option<InternalPathCopies>,
495 495 base_p2_copies: Option<InternalPathCopies>,
496 496 changes: &ChangedFiles,
497 497 current_rev: Revision,
498 498 ) -> (Option<InternalPathCopies>, Option<InternalPathCopies>) {
499 499 // Fast path the "nothing to do" case.
500 500 if let (None, None) = (&base_p1_copies, &base_p2_copies) {
501 501 return (None, None);
502 502 }
503 503
504 504 let mut p1_copies = base_p1_copies.clone();
505 505 let mut p2_copies = base_p2_copies.clone();
506 506 for action in changes.iter_actions() {
507 507 match action {
508 508 Action::CopiedFromP1(path_dest, path_source) => {
509 509 match &mut p1_copies {
510 510 None => (), // This is not a vertex we should proceed.
511 511 Some(copies) => add_one_copy(
512 512 current_rev,
513 513 path_map,
514 514 copies,
515 515 base_p1_copies.as_ref().unwrap(),
516 516 path_dest,
517 517 path_source,
518 518 ),
519 519 }
520 520 }
521 521 Action::CopiedFromP2(path_dest, path_source) => {
522 522 match &mut p2_copies {
523 523 None => (), // This is not a vertex we should proceed.
524 524 Some(copies) => add_one_copy(
525 525 current_rev,
526 526 path_map,
527 527 copies,
528 528 base_p2_copies.as_ref().unwrap(),
529 529 path_dest,
530 530 path_source,
531 531 ),
532 532 }
533 533 }
534 534 Action::Removed(deleted_path) => {
535 535 // We must drop copy information for removed file.
536 536 //
537 537 // We need to explicitly record them as dropped to
538 538 // propagate this information when merging two
539 539 // InternalPathCopies object.
540 540 let deleted = path_map.tokenize(deleted_path);
541 541
542 542 let p1_entry = match &mut p1_copies {
543 543 None => None,
544 544 Some(copies) => match copies.entry(deleted) {
545 545 Entry::Occupied(e) => Some(e),
546 546 Entry::Vacant(_) => None,
547 547 },
548 548 };
549 549 let p2_entry = match &mut p2_copies {
550 550 None => None,
551 551 Some(copies) => match copies.entry(deleted) {
552 552 Entry::Occupied(e) => Some(e),
553 553 Entry::Vacant(_) => None,
554 554 },
555 555 };
556 556
557 557 match (p1_entry, p2_entry) {
558 558 (None, None) => (),
559 559 (Some(mut e), None) => {
560 560 e.get_mut().mark_delete(current_rev)
561 561 }
562 562 (None, Some(mut e)) => {
563 563 e.get_mut().mark_delete(current_rev)
564 564 }
565 565 (Some(mut e1), Some(mut e2)) => {
566 566 let cs1 = e1.get_mut();
567 567 let cs2 = e2.get();
568 cs1.mark_delete_with_pair(current_rev, &cs2);
568 if cs1 == cs2 {
569 cs1.mark_delete(current_rev);
570 } else {
571 cs1.mark_delete_with_pair(current_rev, &cs2);
572 }
569 573 e2.insert(cs1.clone());
570 574 }
571 575 }
572 576 }
573 577 }
574 578 }
575 579 (p1_copies, p2_copies)
576 580 }
577 581
578 582 // insert one new copy information in an InternalPathCopies
579 583 //
580 584 // This deal with chaining and overwrite.
581 585 fn add_one_copy(
582 586 current_rev: Revision,
583 587 path_map: &mut TwoWayPathMap,
584 588 copies: &mut InternalPathCopies,
585 589 base_copies: &InternalPathCopies,
586 590 path_dest: &HgPath,
587 591 path_source: &HgPath,
588 592 ) {
589 593 let dest = path_map.tokenize(path_dest);
590 594 let source = path_map.tokenize(path_source);
591 595 let entry;
592 596 if let Some(v) = base_copies.get(&source) {
593 597 entry = match &v.path {
594 598 Some(path) => Some((*(path)).to_owned()),
595 599 None => Some(source.to_owned()),
596 600 }
597 601 } else {
598 602 entry = Some(source.to_owned());
599 603 }
600 604 // Each new entry is introduced by the children, we
601 605 // record this information as we will need it to take
602 606 // the right decision when merging conflicting copy
603 607 // information. See merge_copies_dict for details.
604 608 match copies.entry(dest) {
605 609 Entry::Vacant(slot) => {
606 610 let ttpc = CopySource::new(current_rev, entry);
607 611 slot.insert(ttpc);
608 612 }
609 613 Entry::Occupied(mut slot) => {
610 614 let ttpc = slot.get_mut();
611 615 ttpc.overwrite(current_rev, entry);
612 616 }
613 617 }
614 618 }
615 619
616 620 /// merge two copies-mapping together, minor and major
617 621 ///
618 622 /// In case of conflict, value from "major" will be picked, unless in some
619 623 /// cases. See inline documentation for details.
620 624 fn merge_copies_dict(
621 625 path_map: &TwoWayPathMap,
622 626 current_merge: Revision,
623 627 mut minor: InternalPathCopies,
624 628 mut major: InternalPathCopies,
625 629 changes: &ChangedFiles,
626 630 ) -> InternalPathCopies {
627 631 // This closure exist as temporary help while multiple developper are
628 632 // actively working on this code. Feel free to re-inline it once this
629 633 // code is more settled.
630 634 let cmp_value =
631 635 |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
632 636 compare_value(
633 637 path_map,
634 638 current_merge,
635 639 changes,
636 640 dest,
637 641 src_minor,
638 642 src_major,
639 643 )
640 644 };
641 645 if minor.is_empty() {
642 646 major
643 647 } else if major.is_empty() {
644 648 minor
645 649 } else if minor.len() * 2 < major.len() {
646 650 // Lets says we are merging two InternalPathCopies instance A and B.
647 651 //
648 652 // If A contains N items, the merge result will never contains more
649 653 // than N values differents than the one in A
650 654 //
651 655 // If B contains M items, with M > N, the merge result will always
652 656 // result in a minimum of M - N value differents than the on in
653 657 // A
654 658 //
655 659 // As a result, if N < (M-N), we know that simply iterating over A will
656 660 // yield less difference than iterating over the difference
657 661 // between A and B.
658 662 //
659 663 // This help performance a lot in case were a tiny
660 664 // InternalPathCopies is merged with a much larger one.
661 665 for (dest, src_minor) in minor {
662 666 let src_major = major.get(&dest);
663 667 match src_major {
664 668 None => {
665 669 major.insert(dest, src_minor);
666 670 }
667 671 Some(src_major) => {
668 672 let (pick, overwrite) =
669 673 cmp_value(&dest, &src_minor, src_major);
670 674 if overwrite {
671 675 let src = match pick {
672 676 MergePick::Major => CopySource::new_from_merge(
673 677 current_merge,
674 678 src_major,
675 679 &src_minor,
676 680 ),
677 681 MergePick::Minor => CopySource::new_from_merge(
678 682 current_merge,
679 683 &src_minor,
680 684 src_major,
681 685 ),
682 686 MergePick::Any => CopySource::new_from_merge(
683 687 current_merge,
684 688 src_major,
685 689 &src_minor,
686 690 ),
687 691 };
688 692 major.insert(dest, src);
689 693 } else {
690 694 match pick {
691 695 MergePick::Any | MergePick::Major => None,
692 696 MergePick::Minor => major.insert(dest, src_minor),
693 697 };
694 698 }
695 699 }
696 700 };
697 701 }
698 702 major
699 703 } else if major.len() * 2 < minor.len() {
700 704 // This use the same rational than the previous block.
701 705 // (Check previous block documentation for details.)
702 706 for (dest, src_major) in major {
703 707 let src_minor = minor.get(&dest);
704 708 match src_minor {
705 709 None => {
706 710 minor.insert(dest, src_major);
707 711 }
708 712 Some(src_minor) => {
709 713 let (pick, overwrite) =
710 714 cmp_value(&dest, src_minor, &src_major);
711 715 if overwrite {
712 716 let src = match pick {
713 717 MergePick::Major => CopySource::new_from_merge(
714 718 current_merge,
715 719 &src_major,
716 720 src_minor,
717 721 ),
718 722 MergePick::Minor => CopySource::new_from_merge(
719 723 current_merge,
720 724 src_minor,
721 725 &src_major,
722 726 ),
723 727 MergePick::Any => CopySource::new_from_merge(
724 728 current_merge,
725 729 &src_major,
726 730 src_minor,
727 731 ),
728 732 };
729 733 minor.insert(dest, src);
730 734 } else {
731 735 match pick {
732 736 MergePick::Any | MergePick::Minor => None,
733 737 MergePick::Major => minor.insert(dest, src_major),
734 738 };
735 739 }
736 740 }
737 741 };
738 742 }
739 743 minor
740 744 } else {
741 745 let mut override_minor = Vec::new();
742 746 let mut override_major = Vec::new();
743 747
744 748 let mut to_major = |k: &PathToken, v: &CopySource| {
745 749 override_major.push((k.clone(), v.clone()))
746 750 };
747 751 let mut to_minor = |k: &PathToken, v: &CopySource| {
748 752 override_minor.push((k.clone(), v.clone()))
749 753 };
750 754
751 755 // The diff function leverage detection of the identical subpart if
752 756 // minor and major has some common ancestors. This make it very
753 757 // fast is most case.
754 758 //
755 759 // In case where the two map are vastly different in size, the current
756 760 // approach is still slowish because the iteration will iterate over
757 761 // all the "exclusive" content of the larger on. This situation can be
758 762 // frequent when the subgraph of revision we are processing has a lot
759 763 // of roots. Each roots adding they own fully new map to the mix (and
760 764 // likely a small map, if the path from the root to the "main path" is
761 765 // small.
762 766 //
763 767 // We could do better by detecting such situation and processing them
764 768 // differently.
765 769 for d in minor.diff(&major) {
766 770 match d {
767 771 DiffItem::Add(k, v) => to_minor(k, v),
768 772 DiffItem::Remove(k, v) => to_major(k, v),
769 773 DiffItem::Update { old, new } => {
770 774 let (dest, src_major) = new;
771 775 let (_, src_minor) = old;
772 776 let (pick, overwrite) =
773 777 cmp_value(dest, src_minor, src_major);
774 778 if overwrite {
775 779 let src = match pick {
776 780 MergePick::Major => CopySource::new_from_merge(
777 781 current_merge,
778 782 src_major,
779 783 src_minor,
780 784 ),
781 785 MergePick::Minor => CopySource::new_from_merge(
782 786 current_merge,
783 787 src_minor,
784 788 src_major,
785 789 ),
786 790 MergePick::Any => CopySource::new_from_merge(
787 791 current_merge,
788 792 src_major,
789 793 src_minor,
790 794 ),
791 795 };
792 796 to_minor(dest, &src);
793 797 to_major(dest, &src);
794 798 } else {
795 799 match pick {
796 800 MergePick::Major => to_minor(dest, src_major),
797 801 MergePick::Minor => to_major(dest, src_minor),
798 802 // If the two entry are identical, no need to do
799 803 // anything (but diff should not have yield them)
800 804 MergePick::Any => unreachable!(),
801 805 }
802 806 }
803 807 }
804 808 };
805 809 }
806 810
807 811 let updates;
808 812 let mut result;
809 813 if override_major.is_empty() {
810 814 result = major
811 815 } else if override_minor.is_empty() {
812 816 result = minor
813 817 } else {
814 818 if override_minor.len() < override_major.len() {
815 819 updates = override_minor;
816 820 result = minor;
817 821 } else {
818 822 updates = override_major;
819 823 result = major;
820 824 }
821 825 for (k, v) in updates {
822 826 result.insert(k, v);
823 827 }
824 828 }
825 829 result
826 830 }
827 831 }
828 832
829 833 /// represent the side that should prevail when merging two
830 834 /// InternalPathCopies
831 835 enum MergePick {
832 836 /// The "major" (p1) side prevails
833 837 Major,
834 838 /// The "minor" (p2) side prevails
835 839 Minor,
836 840 /// Any side could be used (because they are the same)
837 841 Any,
838 842 }
839 843
840 844 /// decide which side prevails in case of conflicting values
841 845 #[allow(clippy::if_same_then_else)]
842 846 fn compare_value(
843 847 path_map: &TwoWayPathMap,
844 848 current_merge: Revision,
845 849 changes: &ChangedFiles,
846 850 dest: &PathToken,
847 851 src_minor: &CopySource,
848 852 src_major: &CopySource,
849 853 ) -> (MergePick, bool) {
850 854 if src_major == src_minor {
851 855 (MergePick::Any, false)
852 856 } else if src_major.rev == current_merge {
853 857 // minor is different according to per minor == major check earlier
854 858 debug_assert!(src_minor.rev != current_merge);
855 859
856 860 // The last value comes the current merge, this value -will- win
857 861 // eventually.
858 862 (MergePick::Major, true)
859 863 } else if src_minor.rev == current_merge {
860 864 // The last value comes the current merge, this value -will- win
861 865 // eventually.
862 866 (MergePick::Minor, true)
863 867 } else if src_major.path == src_minor.path {
864 868 debug_assert!(src_major.rev != src_major.rev);
865 869 // we have the same value, but from other source;
866 870 if src_major.is_overwritten_by(src_minor) {
867 871 (MergePick::Minor, false)
868 872 } else if src_minor.is_overwritten_by(src_major) {
869 873 (MergePick::Major, false)
870 874 } else {
871 875 (MergePick::Any, true)
872 876 }
873 877 } else {
874 878 debug_assert!(src_major.rev != src_major.rev);
875 879 let dest_path = path_map.untokenize(*dest);
876 880 let action = changes.get_merge_case(dest_path);
877 881 if src_minor.path.is_some()
878 882 && src_major.path.is_none()
879 883 && action == MergeCase::Salvaged
880 884 {
881 885 // If the file is "deleted" in the major side but was
882 886 // salvaged by the merge, we keep the minor side alive
883 887 (MergePick::Minor, true)
884 888 } else if src_major.path.is_some()
885 889 && src_minor.path.is_none()
886 890 && action == MergeCase::Salvaged
887 891 {
888 892 // If the file is "deleted" in the minor side but was
889 893 // salvaged by the merge, unconditionnaly preserve the
890 894 // major side.
891 895 (MergePick::Major, true)
892 896 } else if src_minor.is_overwritten_by(src_major) {
893 897 // The information from the minor version are strictly older than
894 898 // the major version
895 899 if action == MergeCase::Merged {
896 900 // If the file was actively merged, its means some non-copy
897 901 // activity happened on the other branch. It
898 902 // mean the older copy information are still relevant.
899 903 //
900 904 // The major side wins such conflict.
901 905 (MergePick::Major, true)
902 906 } else {
903 907 // No activity on the minor branch, pick the newer one.
904 908 (MergePick::Major, false)
905 909 }
906 910 } else if src_major.is_overwritten_by(src_minor) {
907 911 if action == MergeCase::Merged {
908 912 // If the file was actively merged, its means some non-copy
909 913 // activity happened on the other branch. It
910 914 // mean the older copy information are still relevant.
911 915 //
912 916 // The major side wins such conflict.
913 917 (MergePick::Major, true)
914 918 } else {
915 919 // No activity on the minor branch, pick the newer one.
916 920 (MergePick::Minor, false)
917 921 }
918 922 } else if src_minor.path.is_none() {
919 923 // the minor side has no relevant information, pick the alive one
920 924 (MergePick::Major, true)
921 925 } else if src_major.path.is_none() {
922 926 // the major side has no relevant information, pick the alive one
923 927 (MergePick::Minor, true)
924 928 } else {
925 929 // by default the major side wins
926 930 (MergePick::Major, true)
927 931 }
928 932 }
929 933 }
General Comments 0
You need to be logged in to leave comments. Login now