##// END OF EJS Templates
copies-rust: extract the processing of a single copy information...
marmoute -
r47319:d2ad44b8 default
parent child Browse files
Show More
@@ -1,879 +1,900
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 6 use im_rc::ordmap::DiffItem;
7 7 use im_rc::ordmap::Entry;
8 8 use im_rc::ordmap::OrdMap;
9 9
10 10 use std::cmp::Ordering;
11 11 use std::collections::HashMap;
12 12 use std::collections::HashSet;
13 13 use std::convert::TryInto;
14 14
15 15 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
16 16
17 17 type PathToken = usize;
18 18
19 19 #[derive(Clone, Debug, PartialEq)]
20 20 struct CopySource {
21 21 /// revision at which the copy information was added
22 22 rev: Revision,
23 23 /// the copy source, (Set to None in case of deletion of the associated
24 24 /// key)
25 25 path: Option<PathToken>,
26 26 /// a set of previous `CopySource.rev` value directly or indirectly
27 27 /// overwritten by this one.
28 28 overwritten: HashSet<Revision>,
29 29 }
30 30
31 31 impl CopySource {
32 32 /// create a new CopySource
33 33 ///
34 34 /// Use this when no previous copy source existed.
35 35 fn new(rev: Revision, path: Option<PathToken>) -> Self {
36 36 Self {
37 37 rev,
38 38 path,
39 39 overwritten: HashSet::new(),
40 40 }
41 41 }
42 42
43 43 /// create a new CopySource from merging two others
44 44 ///
45 45 /// Use this when merging two InternalPathCopies requires active merging of
46 46 /// some entries.
47 47 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
48 48 let mut overwritten = HashSet::new();
49 49 overwritten.extend(winner.overwritten.iter().copied());
50 50 overwritten.extend(loser.overwritten.iter().copied());
51 51 overwritten.insert(winner.rev);
52 52 overwritten.insert(loser.rev);
53 53 Self {
54 54 rev,
55 55 path: winner.path,
56 56 overwritten: overwritten,
57 57 }
58 58 }
59 59
60 60 /// Update the value of a pre-existing CopySource
61 61 ///
62 62 /// Use this when recording copy information from parent β†’ child edges
63 63 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
64 64 self.overwritten.insert(self.rev);
65 65 self.rev = rev;
66 66 self.path = path;
67 67 }
68 68
69 69 /// Mark pre-existing copy information as "dropped" by a file deletion
70 70 ///
71 71 /// Use this when recording copy information from parent β†’ child edges
72 72 fn mark_delete(&mut self, rev: Revision) {
73 73 self.overwritten.insert(self.rev);
74 74 self.rev = rev;
75 75 self.path = None;
76 76 }
77 77
78 78 fn is_overwritten_by(&self, other: &Self) -> bool {
79 79 other.overwritten.contains(&self.rev)
80 80 }
81 81 }
82 82
83 83 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
84 84 type InternalPathCopies = OrdMap<PathToken, CopySource>;
85 85
86 86 /// hold parent 1, parent 2 and relevant files actions.
87 87 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
88 88
89 89 /// represent the files affected by a changesets
90 90 ///
91 91 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
92 92 /// all the data categories tracked by it.
93 93 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
94 94 /// all the data categories tracked by it.
95 95 pub struct ChangedFiles<'a> {
96 96 nb_items: u32,
97 97 index: &'a [u8],
98 98 data: &'a [u8],
99 99 }
100 100
101 101 /// Represent active changes that affect the copy tracing.
102 102 enum Action<'a> {
103 103 /// The parent ? children edge is removing a file
104 104 ///
105 105 /// (actually, this could be the edge from the other parent, but it does
106 106 /// not matters)
107 107 Removed(&'a HgPath),
108 108 /// The parent ? children edge introduce copy information between (dest,
109 109 /// source)
110 110 Copied(&'a HgPath, &'a HgPath),
111 111 }
112 112
113 113 /// This express the possible "special" case we can get in a merge
114 114 ///
115 115 /// See mercurial/metadata.py for details on these values.
116 116 #[derive(PartialEq)]
117 117 enum MergeCase {
118 118 /// Merged: file had history on both side that needed to be merged
119 119 Merged,
120 120 /// Salvaged: file was candidate for deletion, but survived the merge
121 121 Salvaged,
122 122 /// Normal: Not one of the two cases above
123 123 Normal,
124 124 }
125 125
126 126 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
127 127
128 128 const EMPTY: &[u8] = b"";
129 129 const COPY_MASK: u8 = 3;
130 130 const P1_COPY: u8 = 2;
131 131 const P2_COPY: u8 = 3;
132 132 const ACTION_MASK: u8 = 28;
133 133 const REMOVED: u8 = 12;
134 134 const MERGED: u8 = 8;
135 135 const SALVAGED: u8 = 16;
136 136
137 137 impl<'a> ChangedFiles<'a> {
138 138 const INDEX_START: usize = 4;
139 139 const ENTRY_SIZE: u32 = 9;
140 140 const FILENAME_START: u32 = 1;
141 141 const COPY_SOURCE_START: u32 = 5;
142 142
143 143 pub fn new(data: &'a [u8]) -> Self {
144 144 assert!(
145 145 data.len() >= 4,
146 146 "data size ({}) is too small to contain the header (4)",
147 147 data.len()
148 148 );
149 149 let nb_items_raw: [u8; 4] = (&data[0..=3])
150 150 .try_into()
151 151 .expect("failed to turn 4 bytes into 4 bytes");
152 152 let nb_items = u32::from_be_bytes(nb_items_raw);
153 153
154 154 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
155 155 let index_end = Self::INDEX_START + index_size;
156 156
157 157 assert!(
158 158 data.len() >= index_end,
159 159 "data size ({}) is too small to fit the index_data ({})",
160 160 data.len(),
161 161 index_end
162 162 );
163 163
164 164 let ret = ChangedFiles {
165 165 nb_items,
166 166 index: &data[Self::INDEX_START..index_end],
167 167 data: &data[index_end..],
168 168 };
169 169 let max_data = ret.filename_end(nb_items - 1) as usize;
170 170 assert!(
171 171 ret.data.len() >= max_data,
172 172 "data size ({}) is too small to fit all data ({})",
173 173 data.len(),
174 174 index_end + max_data
175 175 );
176 176 ret
177 177 }
178 178
179 179 pub fn new_empty() -> Self {
180 180 ChangedFiles {
181 181 nb_items: 0,
182 182 index: EMPTY,
183 183 data: EMPTY,
184 184 }
185 185 }
186 186
187 187 /// internal function to return an individual entry at a given index
188 188 fn entry(&'a self, idx: u32) -> FileChange<'a> {
189 189 if idx >= self.nb_items {
190 190 panic!(
191 191 "index for entry is higher that the number of file {} >= {}",
192 192 idx, self.nb_items
193 193 )
194 194 }
195 195 let flags = self.flags(idx);
196 196 let filename = self.filename(idx);
197 197 let copy_idx = self.copy_idx(idx);
198 198 let copy_source = self.filename(copy_idx);
199 199 (flags, filename, copy_source)
200 200 }
201 201
202 202 /// internal function to return the filename of the entry at a given index
203 203 fn filename(&self, idx: u32) -> &HgPath {
204 204 let filename_start;
205 205 if idx == 0 {
206 206 filename_start = 0;
207 207 } else {
208 208 filename_start = self.filename_end(idx - 1)
209 209 }
210 210 let filename_end = self.filename_end(idx);
211 211 let filename_start = filename_start as usize;
212 212 let filename_end = filename_end as usize;
213 213 HgPath::new(&self.data[filename_start..filename_end])
214 214 }
215 215
216 216 /// internal function to return the flag field of the entry at a given
217 217 /// index
218 218 fn flags(&self, idx: u32) -> u8 {
219 219 let idx = idx as usize;
220 220 self.index[idx * (Self::ENTRY_SIZE as usize)]
221 221 }
222 222
223 223 /// internal function to return the end of a filename part at a given index
224 224 fn filename_end(&self, idx: u32) -> u32 {
225 225 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
226 226 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
227 227 let start = start as usize;
228 228 let end = end as usize;
229 229 let raw = (&self.index[start..end])
230 230 .try_into()
231 231 .expect("failed to turn 4 bytes into 4 bytes");
232 232 u32::from_be_bytes(raw)
233 233 }
234 234
235 235 /// internal function to return index of the copy source of the entry at a
236 236 /// given index
237 237 fn copy_idx(&self, idx: u32) -> u32 {
238 238 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
239 239 let end = (idx + 1) * Self::ENTRY_SIZE;
240 240 let start = start as usize;
241 241 let end = end as usize;
242 242 let raw = (&self.index[start..end])
243 243 .try_into()
244 244 .expect("failed to turn 4 bytes into 4 bytes");
245 245 u32::from_be_bytes(raw)
246 246 }
247 247
248 248 /// Return an iterator over all the `Action` in this instance.
249 249 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
250 250 ActionsIterator {
251 251 changes: &self,
252 252 parent: parent,
253 253 current: 0,
254 254 }
255 255 }
256 256
257 257 /// return the MergeCase value associated with a filename
258 258 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
259 259 if self.nb_items == 0 {
260 260 return MergeCase::Normal;
261 261 }
262 262 let mut low_part = 0;
263 263 let mut high_part = self.nb_items;
264 264
265 265 while low_part < high_part {
266 266 let cursor = (low_part + high_part - 1) / 2;
267 267 let (flags, filename, _source) = self.entry(cursor);
268 268 match path.cmp(filename) {
269 269 Ordering::Less => low_part = cursor + 1,
270 270 Ordering::Greater => high_part = cursor,
271 271 Ordering::Equal => {
272 272 return match flags & ACTION_MASK {
273 273 MERGED => MergeCase::Merged,
274 274 SALVAGED => MergeCase::Salvaged,
275 275 _ => MergeCase::Normal,
276 276 };
277 277 }
278 278 }
279 279 }
280 280 MergeCase::Normal
281 281 }
282 282 }
283 283
284 284 struct ActionsIterator<'a> {
285 285 changes: &'a ChangedFiles<'a>,
286 286 parent: Parent,
287 287 current: u32,
288 288 }
289 289
290 290 impl<'a> Iterator for ActionsIterator<'a> {
291 291 type Item = Action<'a>;
292 292
293 293 fn next(&mut self) -> Option<Action<'a>> {
294 294 let copy_flag = match self.parent {
295 295 Parent::FirstParent => P1_COPY,
296 296 Parent::SecondParent => P2_COPY,
297 297 };
298 298 while self.current < self.changes.nb_items {
299 299 let (flags, file, source) = self.changes.entry(self.current);
300 300 self.current += 1;
301 301 if (flags & ACTION_MASK) == REMOVED {
302 302 return Some(Action::Removed(file));
303 303 }
304 304 let copy = flags & COPY_MASK;
305 305 if copy == copy_flag {
306 306 return Some(Action::Copied(file, source));
307 307 }
308 308 }
309 309 return None;
310 310 }
311 311 }
312 312
313 313 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
314 314 /// ChangedFiles
315 315 ///
316 316 /// It is passed to the RevInfoMaker callback who can assign any necessary
317 317 /// content to the `data` attribute. The copy tracing code is responsible for
318 318 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
319 319 pub struct DataHolder<D> {
320 320 /// RevInfoMaker callback should assign data referenced by the
321 321 /// ChangedFiles struct it return to this attribute. The DataHolder
322 322 /// lifetime will be at least as long as the ChangedFiles one.
323 323 pub data: Option<D>,
324 324 }
325 325
326 326 pub type RevInfoMaker<'a, D> =
327 327 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
328 328
329 329 /// enum used to carry information about the parent β†’ child currently processed
330 330 #[derive(Copy, Clone, Debug)]
331 331 enum Parent {
332 332 /// The `p1(x) β†’ x` edge
333 333 FirstParent,
334 334 /// The `p2(x) β†’ x` edge
335 335 SecondParent,
336 336 }
337 337
338 338 /// A small "tokenizer" responsible of turning full HgPath into lighter
339 339 /// PathToken
340 340 ///
341 341 /// Dealing with small object, like integer is much faster, so HgPath input are
342 342 /// turned into integer "PathToken" and converted back in the end.
343 343 #[derive(Clone, Debug, Default)]
344 344 struct TwoWayPathMap {
345 345 token: HashMap<HgPathBuf, PathToken>,
346 346 path: Vec<HgPathBuf>,
347 347 }
348 348
349 349 impl TwoWayPathMap {
350 350 fn tokenize(&mut self, path: &HgPath) -> PathToken {
351 351 match self.token.get(path) {
352 352 Some(a) => *a,
353 353 None => {
354 354 let a = self.token.len();
355 355 let buf = path.to_owned();
356 356 self.path.push(buf.clone());
357 357 self.token.insert(buf, a);
358 358 a
359 359 }
360 360 }
361 361 }
362 362
363 363 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
364 364 assert!(token < self.path.len(), format!("Unknown token: {}", token));
365 365 &self.path[token]
366 366 }
367 367 }
368 368
369 369 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
370 370 ///
371 371 /// Arguments are:
372 372 ///
373 373 /// revs: all revisions to be considered
374 374 /// children: a {parent ? [childrens]} mapping
375 375 /// target_rev: the final revision we are combining copies to
376 376 /// rev_info(rev): callback to get revision information:
377 377 /// * first parent
378 378 /// * second parent
379 379 /// * ChangedFiles
380 380 /// isancestors(low_rev, high_rev): callback to check if a revision is an
381 381 /// ancestor of another
382 382 pub fn combine_changeset_copies<D>(
383 383 revs: Vec<Revision>,
384 384 mut children_count: HashMap<Revision, usize>,
385 385 target_rev: Revision,
386 386 rev_info: RevInfoMaker<D>,
387 387 ) -> PathCopies {
388 388 let mut all_copies = HashMap::new();
389 389
390 390 let mut path_map = TwoWayPathMap::default();
391 391
392 392 for rev in revs {
393 393 let mut d: DataHolder<D> = DataHolder { data: None };
394 394 let (p1, p2, changes) = rev_info(rev, &mut d);
395 395
396 396 // We will chain the copies information accumulated for the parent with
397 397 // the individual copies information the curent revision. Creating a
398 398 // new TimeStampedPath for each `rev` β†’ `children` vertex.
399 399 let mut copies: Option<InternalPathCopies> = None;
400 400 // Retrieve data computed in a previous iteration
401 401 let p1_copies = match p1 {
402 402 NULL_REVISION => None,
403 403 _ => get_and_clean_parent_copies(
404 404 &mut all_copies,
405 405 &mut children_count,
406 406 p1,
407 407 ), // will be None if the vertex is not to be traversed
408 408 };
409 409 let p2_copies = match p2 {
410 410 NULL_REVISION => None,
411 411 _ => get_and_clean_parent_copies(
412 412 &mut all_copies,
413 413 &mut children_count,
414 414 p2,
415 415 ), // will be None if the vertex is not to be traversed
416 416 };
417 417 // combine it with data for that revision
418 418 let p1_copies = match p1_copies {
419 419 None => None,
420 420 Some(parent_copies) => Some(add_from_changes(
421 421 &mut path_map,
422 422 &parent_copies,
423 423 &changes,
424 424 Parent::FirstParent,
425 425 rev,
426 426 )),
427 427 };
428 428 let p2_copies = match p2_copies {
429 429 None => None,
430 430 Some(parent_copies) => Some(add_from_changes(
431 431 &mut path_map,
432 432 &parent_copies,
433 433 &changes,
434 434 Parent::SecondParent,
435 435 rev,
436 436 )),
437 437 };
438 438 let copies = match (p1_copies, p2_copies) {
439 439 (None, None) => None,
440 440 (c, None) => c,
441 441 (None, c) => c,
442 442 (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
443 443 &path_map, rev, p2_copies, p1_copies, &changes,
444 444 )),
445 445 };
446 446 if let Some(c) = copies {
447 447 all_copies.insert(rev, c);
448 448 }
449 449 }
450 450
451 451 // Drop internal information (like the timestamp) and return the final
452 452 // mapping.
453 453 let tt_result = all_copies
454 454 .remove(&target_rev)
455 455 .expect("target revision was not processed");
456 456 let mut result = PathCopies::default();
457 457 for (dest, tt_source) in tt_result {
458 458 if let Some(path) = tt_source.path {
459 459 let path_dest = path_map.untokenize(dest).to_owned();
460 460 let path_path = path_map.untokenize(path).to_owned();
461 461 result.insert(path_dest, path_path);
462 462 }
463 463 }
464 464 result
465 465 }
466 466
467 467 /// fetch previous computed information
468 468 ///
469 469 /// If no other children are expected to need this information, we drop it from
470 470 /// the cache.
471 471 ///
472 472 /// If parent is not part of the set we are expected to walk, return None.
473 473 fn get_and_clean_parent_copies(
474 474 all_copies: &mut HashMap<Revision, InternalPathCopies>,
475 475 children_count: &mut HashMap<Revision, usize>,
476 476 parent_rev: Revision,
477 477 ) -> Option<InternalPathCopies> {
478 478 let count = children_count.get_mut(&parent_rev)?;
479 479 *count -= 1;
480 480 if *count == 0 {
481 481 match all_copies.remove(&parent_rev) {
482 482 Some(c) => Some(c),
483 483 None => Some(InternalPathCopies::default()),
484 484 }
485 485 } else {
486 486 match all_copies.get(&parent_rev) {
487 487 Some(c) => Some(c.clone()),
488 488 None => Some(InternalPathCopies::default()),
489 489 }
490 490 }
491 491 }
492 492
493 493 /// Combine ChangedFiles with some existing PathCopies information and return
494 494 /// the result
495 495 fn add_from_changes(
496 496 path_map: &mut TwoWayPathMap,
497 497 base_copies: &InternalPathCopies,
498 498 changes: &ChangedFiles,
499 499 parent: Parent,
500 500 current_rev: Revision,
501 501 ) -> InternalPathCopies {
502 502 let mut copies = base_copies.clone();
503 503 for action in changes.iter_actions(parent) {
504 504 match action {
505 505 Action::Copied(path_dest, path_source) => {
506 let dest = path_map.tokenize(path_dest);
507 let source = path_map.tokenize(path_source);
508 let entry;
509 if let Some(v) = base_copies.get(&source) {
510 entry = match &v.path {
511 Some(path) => Some((*(path)).to_owned()),
512 None => Some(source.to_owned()),
513 }
514 } else {
515 entry = Some(source.to_owned());
516 }
517 // Each new entry is introduced by the children, we
518 // record this information as we will need it to take
519 // the right decision when merging conflicting copy
520 // information. See merge_copies_dict for details.
521 match copies.entry(dest) {
522 Entry::Vacant(slot) => {
523 let ttpc = CopySource::new(current_rev, entry);
524 slot.insert(ttpc);
525 }
526 Entry::Occupied(mut slot) => {
527 let ttpc = slot.get_mut();
528 ttpc.overwrite(current_rev, entry);
529 }
530 }
506 add_one_copy(
507 current_rev,
508 &mut path_map,
509 &mut copies,
510 &base_copies,
511 path_dest,
512 path_source,
513 );
531 514 }
532 515 Action::Removed(deleted_path) => {
533 516 // We must drop copy information for removed file.
534 517 //
535 518 // We need to explicitly record them as dropped to
536 519 // propagate this information when merging two
537 520 // InternalPathCopies object.
538 521 let deleted = path_map.tokenize(deleted_path);
539 522 copies.entry(deleted).and_modify(|old| {
540 523 old.mark_delete(current_rev);
541 524 });
542 525 }
543 526 }
544 527 }
545 528 copies
546 529 }
547 530
531 // insert one new copy information in an InternalPathCopies
532 //
533 // This deal with chaining and overwrite.
534 fn add_one_copy(
535 current_rev: Revision,
536 path_map: &mut TwoWayPathMap,
537 copies: &mut InternalPathCopies,
538 base_copies: &InternalPathCopies,
539 path_dest: &HgPath,
540 path_source: &HgPath,
541 ) {
542 let dest = path_map.tokenize(path_dest);
543 let source = path_map.tokenize(path_source);
544 let entry;
545 if let Some(v) = base_copies.get(&source) {
546 entry = match &v.path {
547 Some(path) => Some((*(path)).to_owned()),
548 None => Some(source.to_owned()),
549 }
550 } else {
551 entry = Some(source.to_owned());
552 }
553 // Each new entry is introduced by the children, we
554 // record this information as we will need it to take
555 // the right decision when merging conflicting copy
556 // information. See merge_copies_dict for details.
557 match copies.entry(dest) {
558 Entry::Vacant(slot) => {
559 let ttpc = CopySource::new(current_rev, entry);
560 slot.insert(ttpc);
561 }
562 Entry::Occupied(mut slot) => {
563 let ttpc = slot.get_mut();
564 ttpc.overwrite(current_rev, entry);
565 }
566 }
567 }
568
548 569 /// merge two copies-mapping together, minor and major
549 570 ///
550 571 /// In case of conflict, value from "major" will be picked, unless in some
551 572 /// cases. See inline documentation for details.
552 573 fn merge_copies_dict(
553 574 path_map: &TwoWayPathMap,
554 575 current_merge: Revision,
555 576 mut minor: InternalPathCopies,
556 577 mut major: InternalPathCopies,
557 578 changes: &ChangedFiles,
558 579 ) -> InternalPathCopies {
559 580 // This closure exist as temporary help while multiple developper are
560 581 // actively working on this code. Feel free to re-inline it once this
561 582 // code is more settled.
562 583 let cmp_value =
563 584 |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
564 585 compare_value(
565 586 path_map,
566 587 current_merge,
567 588 changes,
568 589 dest,
569 590 src_minor,
570 591 src_major,
571 592 )
572 593 };
573 594 if minor.is_empty() {
574 595 major
575 596 } else if major.is_empty() {
576 597 minor
577 598 } else if minor.len() * 2 < major.len() {
578 599 // Lets says we are merging two InternalPathCopies instance A and B.
579 600 //
580 601 // If A contains N items, the merge result will never contains more
581 602 // than N values differents than the one in A
582 603 //
583 604 // If B contains M items, with M > N, the merge result will always
584 605 // result in a minimum of M - N value differents than the on in
585 606 // A
586 607 //
587 608 // As a result, if N < (M-N), we know that simply iterating over A will
588 609 // yield less difference than iterating over the difference
589 610 // between A and B.
590 611 //
591 612 // This help performance a lot in case were a tiny
592 613 // InternalPathCopies is merged with a much larger one.
593 614 for (dest, src_minor) in minor {
594 615 let src_major = major.get(&dest);
595 616 match src_major {
596 617 None => {
597 618 major.insert(dest, src_minor);
598 619 }
599 620 Some(src_major) => {
600 621 let (pick, overwrite) =
601 622 cmp_value(&dest, &src_minor, src_major);
602 623 if overwrite {
603 624 let src = match pick {
604 625 MergePick::Major => CopySource::new_from_merge(
605 626 current_merge,
606 627 src_major,
607 628 &src_minor,
608 629 ),
609 630 MergePick::Minor => CopySource::new_from_merge(
610 631 current_merge,
611 632 &src_minor,
612 633 src_major,
613 634 ),
614 635 MergePick::Any => CopySource::new_from_merge(
615 636 current_merge,
616 637 src_major,
617 638 &src_minor,
618 639 ),
619 640 };
620 641 major.insert(dest, src);
621 642 } else {
622 643 match pick {
623 644 MergePick::Any | MergePick::Major => None,
624 645 MergePick::Minor => major.insert(dest, src_minor),
625 646 };
626 647 }
627 648 }
628 649 };
629 650 }
630 651 major
631 652 } else if major.len() * 2 < minor.len() {
632 653 // This use the same rational than the previous block.
633 654 // (Check previous block documentation for details.)
634 655 for (dest, src_major) in major {
635 656 let src_minor = minor.get(&dest);
636 657 match src_minor {
637 658 None => {
638 659 minor.insert(dest, src_major);
639 660 }
640 661 Some(src_minor) => {
641 662 let (pick, overwrite) =
642 663 cmp_value(&dest, src_minor, &src_major);
643 664 if overwrite {
644 665 let src = match pick {
645 666 MergePick::Major => CopySource::new_from_merge(
646 667 current_merge,
647 668 &src_major,
648 669 src_minor,
649 670 ),
650 671 MergePick::Minor => CopySource::new_from_merge(
651 672 current_merge,
652 673 src_minor,
653 674 &src_major,
654 675 ),
655 676 MergePick::Any => CopySource::new_from_merge(
656 677 current_merge,
657 678 &src_major,
658 679 src_minor,
659 680 ),
660 681 };
661 682 minor.insert(dest, src);
662 683 } else {
663 684 match pick {
664 685 MergePick::Any | MergePick::Minor => None,
665 686 MergePick::Major => minor.insert(dest, src_major),
666 687 };
667 688 }
668 689 }
669 690 };
670 691 }
671 692 minor
672 693 } else {
673 694 let mut override_minor = Vec::new();
674 695 let mut override_major = Vec::new();
675 696
676 697 let mut to_major = |k: &PathToken, v: &CopySource| {
677 698 override_major.push((k.clone(), v.clone()))
678 699 };
679 700 let mut to_minor = |k: &PathToken, v: &CopySource| {
680 701 override_minor.push((k.clone(), v.clone()))
681 702 };
682 703
683 704 // The diff function leverage detection of the identical subpart if
684 705 // minor and major has some common ancestors. This make it very
685 706 // fast is most case.
686 707 //
687 708 // In case where the two map are vastly different in size, the current
688 709 // approach is still slowish because the iteration will iterate over
689 710 // all the "exclusive" content of the larger on. This situation can be
690 711 // frequent when the subgraph of revision we are processing has a lot
691 712 // of roots. Each roots adding they own fully new map to the mix (and
692 713 // likely a small map, if the path from the root to the "main path" is
693 714 // small.
694 715 //
695 716 // We could do better by detecting such situation and processing them
696 717 // differently.
697 718 for d in minor.diff(&major) {
698 719 match d {
699 720 DiffItem::Add(k, v) => to_minor(k, v),
700 721 DiffItem::Remove(k, v) => to_major(k, v),
701 722 DiffItem::Update { old, new } => {
702 723 let (dest, src_major) = new;
703 724 let (_, src_minor) = old;
704 725 let (pick, overwrite) =
705 726 cmp_value(dest, src_minor, src_major);
706 727 if overwrite {
707 728 let src = match pick {
708 729 MergePick::Major => CopySource::new_from_merge(
709 730 current_merge,
710 731 src_major,
711 732 src_minor,
712 733 ),
713 734 MergePick::Minor => CopySource::new_from_merge(
714 735 current_merge,
715 736 src_minor,
716 737 src_major,
717 738 ),
718 739 MergePick::Any => CopySource::new_from_merge(
719 740 current_merge,
720 741 src_major,
721 742 src_minor,
722 743 ),
723 744 };
724 745 to_minor(dest, &src);
725 746 to_major(dest, &src);
726 747 } else {
727 748 match pick {
728 749 MergePick::Major => to_minor(dest, src_major),
729 750 MergePick::Minor => to_major(dest, src_minor),
730 751 // If the two entry are identical, no need to do
731 752 // anything (but diff should not have yield them)
732 753 MergePick::Any => unreachable!(),
733 754 }
734 755 }
735 756 }
736 757 };
737 758 }
738 759
739 760 let updates;
740 761 let mut result;
741 762 if override_major.is_empty() {
742 763 result = major
743 764 } else if override_minor.is_empty() {
744 765 result = minor
745 766 } else {
746 767 if override_minor.len() < override_major.len() {
747 768 updates = override_minor;
748 769 result = minor;
749 770 } else {
750 771 updates = override_major;
751 772 result = major;
752 773 }
753 774 for (k, v) in updates {
754 775 result.insert(k, v);
755 776 }
756 777 }
757 778 result
758 779 }
759 780 }
760 781
761 782 /// represent the side that should prevail when merging two
762 783 /// InternalPathCopies
763 784 enum MergePick {
764 785 /// The "major" (p1) side prevails
765 786 Major,
766 787 /// The "minor" (p2) side prevails
767 788 Minor,
768 789 /// Any side could be used (because they are the same)
769 790 Any,
770 791 }
771 792
772 793 /// decide which side prevails in case of conflicting values
773 794 #[allow(clippy::if_same_then_else)]
774 795 fn compare_value(
775 796 path_map: &TwoWayPathMap,
776 797 current_merge: Revision,
777 798 changes: &ChangedFiles,
778 799 dest: &PathToken,
779 800 src_minor: &CopySource,
780 801 src_major: &CopySource,
781 802 ) -> (MergePick, bool) {
782 803 if src_major.rev == current_merge {
783 804 if src_minor.rev == current_merge {
784 805 if src_major.path.is_none() {
785 806 // We cannot get different copy information for both p1 and p2
786 807 // from the same revision. Unless this was a
787 808 // deletion.
788 809 //
789 810 // However the deletion might come over different data on each
790 811 // branch.
791 812 let need_over = src_major.overwritten != src_minor.overwritten;
792 813 (MergePick::Any, need_over)
793 814 } else {
794 815 unreachable!();
795 816 }
796 817 } else {
797 818 // The last value comes the current merge, this value -will- win
798 819 // eventually.
799 820 (MergePick::Major, true)
800 821 }
801 822 } else if src_minor.rev == current_merge {
802 823 // The last value comes the current merge, this value -will- win
803 824 // eventually.
804 825 (MergePick::Minor, true)
805 826 } else if src_major.path == src_minor.path {
806 827 // we have the same value, but from other source;
807 828 if src_major.rev == src_minor.rev {
808 829 // If the two entry are identical, they are both valid
809 830 debug_assert!(src_minor.overwritten == src_minor.overwritten);
810 831 (MergePick::Any, false)
811 832 } else if src_major.is_overwritten_by(src_minor) {
812 833 (MergePick::Minor, false)
813 834 } else if src_minor.is_overwritten_by(src_major) {
814 835 (MergePick::Major, false)
815 836 } else {
816 837 (MergePick::Any, true)
817 838 }
818 839 } else if src_major.rev == src_minor.rev {
819 840 // We cannot get copy information for both p1 and p2 in the
820 841 // same rev. So this is the same value.
821 842 unreachable!(
822 843 "conflicting information from p1 and p2 in the same revision"
823 844 );
824 845 } else {
825 846 let dest_path = path_map.untokenize(*dest);
826 847 let action = changes.get_merge_case(dest_path);
827 848 if src_minor.path.is_some()
828 849 && src_major.path.is_none()
829 850 && action == MergeCase::Salvaged
830 851 {
831 852 // If the file is "deleted" in the major side but was
832 853 // salvaged by the merge, we keep the minor side alive
833 854 (MergePick::Minor, true)
834 855 } else if src_major.path.is_some()
835 856 && src_minor.path.is_none()
836 857 && action == MergeCase::Salvaged
837 858 {
838 859 // If the file is "deleted" in the minor side but was
839 860 // salvaged by the merge, unconditionnaly preserve the
840 861 // major side.
841 862 (MergePick::Major, true)
842 863 } else if src_minor.is_overwritten_by(src_major) {
843 864 // The information from the minor version are strictly older than
844 865 // the major version
845 866 if action == MergeCase::Merged {
846 867 // If the file was actively merged, its means some non-copy
847 868 // activity happened on the other branch. It
848 869 // mean the older copy information are still relevant.
849 870 //
850 871 // The major side wins such conflict.
851 872 (MergePick::Major, true)
852 873 } else {
853 874 // No activity on the minor branch, pick the newer one.
854 875 (MergePick::Major, false)
855 876 }
856 877 } else if src_major.is_overwritten_by(src_minor) {
857 878 if action == MergeCase::Merged {
858 879 // If the file was actively merged, its means some non-copy
859 880 // activity happened on the other branch. It
860 881 // mean the older copy information are still relevant.
861 882 //
862 883 // The major side wins such conflict.
863 884 (MergePick::Major, true)
864 885 } else {
865 886 // No activity on the minor branch, pick the newer one.
866 887 (MergePick::Minor, false)
867 888 }
868 889 } else if src_minor.path.is_none() {
869 890 // the minor side has no relevant information, pick the alive one
870 891 (MergePick::Major, true)
871 892 } else if src_major.path.is_none() {
872 893 // the major side has no relevant information, pick the alive one
873 894 (MergePick::Minor, true)
874 895 } else {
875 896 // by default the major side wins
876 897 (MergePick::Major, true)
877 898 }
878 899 }
879 900 }
General Comments 0
You need to be logged in to leave comments. Login now