##// END OF EJS Templates
copies-rust: hide most of the comparison details inside a closure...
marmoute -
r46743:c58c8f1d default
parent child Browse files
Show More
@@ -1,611 +1,618 b''
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4
5 5 use im_rc::ordmap::DiffItem;
6 6 use im_rc::ordmap::OrdMap;
7 7
8 8 use std::cmp::Ordering;
9 9 use std::collections::HashMap;
10 10 use std::convert::TryInto;
11 11
12 12 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
13 13
14 14 #[derive(Clone, Debug, PartialEq)]
15 15 struct TimeStampedPathCopy {
16 16 /// revision at which the copy information was added
17 17 rev: Revision,
18 18 /// the copy source, (Set to None in case of deletion of the associated
19 19 /// key)
20 20 path: Option<HgPathBuf>,
21 21 }
22 22
23 23 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
24 24 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
25 25
26 26 /// hold parent 1, parent 2 and relevant files actions.
27 27 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
28 28
29 29 /// represent the files affected by a changesets
30 30 ///
31 31 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
32 32 /// all the data categories tracked by it.
33 33 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
34 34 /// all the data categories tracked by it.
35 35 pub struct ChangedFiles<'a> {
36 36 nb_items: u32,
37 37 index: &'a [u8],
38 38 data: &'a [u8],
39 39 }
40 40
41 41 /// Represent active changes that affect the copy tracing.
42 42 enum Action<'a> {
43 43 /// The parent ? children edge is removing a file
44 44 ///
45 45 /// (actually, this could be the edge from the other parent, but it does
46 46 /// not matters)
47 47 Removed(&'a HgPath),
48 48 /// The parent ? children edge introduce copy information between (dest,
49 49 /// source)
50 50 Copied(&'a HgPath, &'a HgPath),
51 51 }
52 52
53 53 /// This express the possible "special" case we can get in a merge
54 54 ///
55 55 /// See mercurial/metadata.py for details on these values.
56 56 #[derive(PartialEq)]
57 57 enum MergeCase {
58 58 /// Merged: file had history on both side that needed to be merged
59 59 Merged,
60 60 /// Salvaged: file was candidate for deletion, but survived the merge
61 61 Salvaged,
62 62 /// Normal: Not one of the two cases above
63 63 Normal,
64 64 }
65 65
66 66 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
67 67
68 68 const EMPTY: &[u8] = b"";
69 69 const COPY_MASK: u8 = 3;
70 70 const P1_COPY: u8 = 2;
71 71 const P2_COPY: u8 = 3;
72 72 const ACTION_MASK: u8 = 28;
73 73 const REMOVED: u8 = 12;
74 74 const MERGED: u8 = 8;
75 75 const SALVAGED: u8 = 16;
76 76
77 77 impl<'a> ChangedFiles<'a> {
78 78 const INDEX_START: usize = 4;
79 79 const ENTRY_SIZE: u32 = 9;
80 80 const FILENAME_START: u32 = 1;
81 81 const COPY_SOURCE_START: u32 = 5;
82 82
83 83 pub fn new(data: &'a [u8]) -> Self {
84 84 assert!(
85 85 data.len() >= 4,
86 86 "data size ({}) is too small to contain the header (4)",
87 87 data.len()
88 88 );
89 89 let nb_items_raw: [u8; 4] = (&data[0..=3])
90 90 .try_into()
91 91 .expect("failed to turn 4 bytes into 4 bytes");
92 92 let nb_items = u32::from_be_bytes(nb_items_raw);
93 93
94 94 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
95 95 let index_end = Self::INDEX_START + index_size;
96 96
97 97 assert!(
98 98 data.len() >= index_end,
99 99 "data size ({}) is too small to fit the index_data ({})",
100 100 data.len(),
101 101 index_end
102 102 );
103 103
104 104 let ret = ChangedFiles {
105 105 nb_items,
106 106 index: &data[Self::INDEX_START..index_end],
107 107 data: &data[index_end..],
108 108 };
109 109 let max_data = ret.filename_end(nb_items - 1) as usize;
110 110 assert!(
111 111 ret.data.len() >= max_data,
112 112 "data size ({}) is too small to fit all data ({})",
113 113 data.len(),
114 114 index_end + max_data
115 115 );
116 116 ret
117 117 }
118 118
119 119 pub fn new_empty() -> Self {
120 120 ChangedFiles {
121 121 nb_items: 0,
122 122 index: EMPTY,
123 123 data: EMPTY,
124 124 }
125 125 }
126 126
127 127 /// internal function to return an individual entry at a given index
128 128 fn entry(&'a self, idx: u32) -> FileChange<'a> {
129 129 if idx >= self.nb_items {
130 130 panic!(
131 131 "index for entry is higher that the number of file {} >= {}",
132 132 idx, self.nb_items
133 133 )
134 134 }
135 135 let flags = self.flags(idx);
136 136 let filename = self.filename(idx);
137 137 let copy_idx = self.copy_idx(idx);
138 138 let copy_source = self.filename(copy_idx);
139 139 (flags, filename, copy_source)
140 140 }
141 141
142 142 /// internal function to return the filename of the entry at a given index
143 143 fn filename(&self, idx: u32) -> &HgPath {
144 144 let filename_start;
145 145 if idx == 0 {
146 146 filename_start = 0;
147 147 } else {
148 148 filename_start = self.filename_end(idx - 1)
149 149 }
150 150 let filename_end = self.filename_end(idx);
151 151 let filename_start = filename_start as usize;
152 152 let filename_end = filename_end as usize;
153 153 HgPath::new(&self.data[filename_start..filename_end])
154 154 }
155 155
156 156 /// internal function to return the flag field of the entry at a given
157 157 /// index
158 158 fn flags(&self, idx: u32) -> u8 {
159 159 let idx = idx as usize;
160 160 self.index[idx * (Self::ENTRY_SIZE as usize)]
161 161 }
162 162
163 163 /// internal function to return the end of a filename part at a given index
164 164 fn filename_end(&self, idx: u32) -> u32 {
165 165 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
166 166 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
167 167 let start = start as usize;
168 168 let end = end as usize;
169 169 let raw = (&self.index[start..end])
170 170 .try_into()
171 171 .expect("failed to turn 4 bytes into 4 bytes");
172 172 u32::from_be_bytes(raw)
173 173 }
174 174
175 175 /// internal function to return index of the copy source of the entry at a
176 176 /// given index
177 177 fn copy_idx(&self, idx: u32) -> u32 {
178 178 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
179 179 let end = (idx + 1) * Self::ENTRY_SIZE;
180 180 let start = start as usize;
181 181 let end = end as usize;
182 182 let raw = (&self.index[start..end])
183 183 .try_into()
184 184 .expect("failed to turn 4 bytes into 4 bytes");
185 185 u32::from_be_bytes(raw)
186 186 }
187 187
188 188 /// Return an iterator over all the `Action` in this instance.
189 189 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
190 190 ActionsIterator {
191 191 changes: &self,
192 192 parent: parent,
193 193 current: 0,
194 194 }
195 195 }
196 196
197 197 /// return the MergeCase value associated with a filename
198 198 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
199 199 if self.nb_items == 0 {
200 200 return MergeCase::Normal;
201 201 }
202 202 let mut low_part = 0;
203 203 let mut high_part = self.nb_items;
204 204
205 205 while low_part < high_part {
206 206 let cursor = (low_part + high_part - 1) / 2;
207 207 let (flags, filename, _source) = self.entry(cursor);
208 208 match path.cmp(filename) {
209 209 Ordering::Less => low_part = cursor + 1,
210 210 Ordering::Greater => high_part = cursor,
211 211 Ordering::Equal => {
212 212 return match flags & ACTION_MASK {
213 213 MERGED => MergeCase::Merged,
214 214 SALVAGED => MergeCase::Salvaged,
215 215 _ => MergeCase::Normal,
216 216 };
217 217 }
218 218 }
219 219 }
220 220 MergeCase::Normal
221 221 }
222 222 }
223 223
224 224 /// A struct responsible for answering "is X ancestors of Y" quickly
225 225 ///
226 226 /// The structure will delegate ancestors call to a callback, and cache the
227 227 /// result.
228 228 #[derive(Debug)]
229 229 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
230 230 inner: &'a A,
231 231 pairs: HashMap<(Revision, Revision), bool>,
232 232 }
233 233
234 234 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
235 235 fn new(func: &'a A) -> Self {
236 236 Self {
237 237 inner: func,
238 238 pairs: HashMap::default(),
239 239 }
240 240 }
241 241
242 242 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
243 243 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
244 244 if anc > desc {
245 245 false
246 246 } else if anc == desc {
247 247 true
248 248 } else {
249 249 if let Some(b) = self.pairs.get(&(anc, desc)) {
250 250 *b
251 251 } else {
252 252 let b = (self.inner)(anc, desc);
253 253 self.pairs.insert((anc, desc), b);
254 254 b
255 255 }
256 256 }
257 257 }
258 258 }
259 259
260 260 struct ActionsIterator<'a> {
261 261 changes: &'a ChangedFiles<'a>,
262 262 parent: Parent,
263 263 current: u32,
264 264 }
265 265
266 266 impl<'a> Iterator for ActionsIterator<'a> {
267 267 type Item = Action<'a>;
268 268
269 269 fn next(&mut self) -> Option<Action<'a>> {
270 270 let copy_flag = match self.parent {
271 271 Parent::FirstParent => P1_COPY,
272 272 Parent::SecondParent => P2_COPY,
273 273 };
274 274 while self.current < self.changes.nb_items {
275 275 let (flags, file, source) = self.changes.entry(self.current);
276 276 self.current += 1;
277 277 if (flags & ACTION_MASK) == REMOVED {
278 278 return Some(Action::Removed(file));
279 279 }
280 280 let copy = flags & COPY_MASK;
281 281 if copy == copy_flag {
282 282 return Some(Action::Copied(file, source));
283 283 }
284 284 }
285 285 return None;
286 286 }
287 287 }
288 288
289 289 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
290 290 /// ChangedFiles
291 291 ///
292 292 /// It is passed to the RevInfoMaker callback who can assign any necessary
293 293 /// content to the `data` attribute. The copy tracing code is responsible for
294 294 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
295 295 pub struct DataHolder<D> {
296 296 /// RevInfoMaker callback should assign data referenced by the
297 297 /// ChangedFiles struct it return to this attribute. The DataHolder
298 298 /// lifetime will be at least as long as the ChangedFiles one.
299 299 pub data: Option<D>,
300 300 }
301 301
302 302 pub type RevInfoMaker<'a, D> =
303 303 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
304 304
305 305 /// enum used to carry information about the parent β†’ child currently processed
306 306 #[derive(Copy, Clone, Debug)]
307 307 enum Parent {
308 308 /// The `p1(x) β†’ x` edge
309 309 FirstParent,
310 310 /// The `p2(x) β†’ x` edge
311 311 SecondParent,
312 312 }
313 313
314 314 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
315 315 ///
316 316 /// Arguments are:
317 317 ///
318 318 /// revs: all revisions to be considered
319 319 /// children: a {parent ? [childrens]} mapping
320 320 /// target_rev: the final revision we are combining copies to
321 321 /// rev_info(rev): callback to get revision information:
322 322 /// * first parent
323 323 /// * second parent
324 324 /// * ChangedFiles
325 325 /// isancestors(low_rev, high_rev): callback to check if a revision is an
326 326 /// ancestor of another
327 327 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
328 328 revs: Vec<Revision>,
329 329 children: HashMap<Revision, Vec<Revision>>,
330 330 target_rev: Revision,
331 331 rev_info: RevInfoMaker<D>,
332 332 is_ancestor: &A,
333 333 ) -> PathCopies {
334 334 let mut all_copies = HashMap::new();
335 335 let mut oracle = AncestorOracle::new(is_ancestor);
336 336
337 337 for rev in revs {
338 338 // Retrieve data computed in a previous iteration
339 339 let copies = all_copies.remove(&rev);
340 340 let copies = match copies {
341 341 Some(c) => c,
342 342 None => TimeStampedPathCopies::default(), // root of the walked set
343 343 };
344 344
345 345 let current_children = match children.get(&rev) {
346 346 Some(c) => c,
347 347 None => panic!("inconsistent `revs` and `children`"),
348 348 };
349 349
350 350 for child in current_children {
351 351 // We will chain the copies information accumulated for `rev` with
352 352 // the individual copies information for each of its children.
353 353 // Creating a new PathCopies for each `rev` β†’ `children` vertex.
354 354 let mut d: DataHolder<D> = DataHolder { data: None };
355 355 let (p1, p2, changes) = rev_info(*child, &mut d);
356 356
357 357 let parent = if rev == p1 {
358 358 Parent::FirstParent
359 359 } else {
360 360 assert_eq!(rev, p2);
361 361 Parent::SecondParent
362 362 };
363 363 let new_copies =
364 364 add_from_changes(&copies, &changes, parent, *child);
365 365
366 366 // Merge has two parents needs to combines their copy information.
367 367 //
368 368 // If the vertex from the other parent was already processed, we
369 369 // will have a value for the child ready to be used. We need to
370 370 // grab it and combine it with the one we already
371 371 // computed. If not we can simply store the newly
372 372 // computed data. The processing happening at
373 373 // the time of the second parent will take care of combining the
374 374 // two TimeStampedPathCopies instance.
375 375 match all_copies.remove(child) {
376 376 None => {
377 377 all_copies.insert(child, new_copies);
378 378 }
379 379 Some(other_copies) => {
380 380 let (minor, major) = match parent {
381 381 Parent::FirstParent => (other_copies, new_copies),
382 382 Parent::SecondParent => (new_copies, other_copies),
383 383 };
384 384 let merged_copies =
385 385 merge_copies_dict(minor, major, &changes, &mut oracle);
386 386 all_copies.insert(child, merged_copies);
387 387 }
388 388 };
389 389 }
390 390 }
391 391
392 392 // Drop internal information (like the timestamp) and return the final
393 393 // mapping.
394 394 let tt_result = all_copies
395 395 .remove(&target_rev)
396 396 .expect("target revision was not processed");
397 397 let mut result = PathCopies::default();
398 398 for (dest, tt_source) in tt_result {
399 399 if let Some(path) = tt_source.path {
400 400 result.insert(dest, path);
401 401 }
402 402 }
403 403 result
404 404 }
405 405
406 406 /// Combine ChangedFiles with some existing PathCopies information and return
407 407 /// the result
408 408 fn add_from_changes(
409 409 base_copies: &TimeStampedPathCopies,
410 410 changes: &ChangedFiles,
411 411 parent: Parent,
412 412 current_rev: Revision,
413 413 ) -> TimeStampedPathCopies {
414 414 let mut copies = base_copies.clone();
415 415 for action in changes.iter_actions(parent) {
416 416 match action {
417 417 Action::Copied(dest, source) => {
418 418 let entry;
419 419 if let Some(v) = base_copies.get(source) {
420 420 entry = match &v.path {
421 421 Some(path) => Some((*(path)).to_owned()),
422 422 None => Some(source.to_owned()),
423 423 }
424 424 } else {
425 425 entry = Some(source.to_owned());
426 426 }
427 427 // Each new entry is introduced by the children, we
428 428 // record this information as we will need it to take
429 429 // the right decision when merging conflicting copy
430 430 // information. See merge_copies_dict for details.
431 431 let ttpc = TimeStampedPathCopy {
432 432 rev: current_rev,
433 433 path: entry,
434 434 };
435 435 copies.insert(dest.to_owned(), ttpc);
436 436 }
437 437 Action::Removed(f) => {
438 438 // We must drop copy information for removed file.
439 439 //
440 440 // We need to explicitly record them as dropped to
441 441 // propagate this information when merging two
442 442 // TimeStampedPathCopies object.
443 443 if copies.contains_key(f.as_ref()) {
444 444 let ttpc = TimeStampedPathCopy {
445 445 rev: current_rev,
446 446 path: None,
447 447 };
448 448 copies.insert(f.to_owned(), ttpc);
449 449 }
450 450 }
451 451 }
452 452 }
453 453 copies
454 454 }
455 455
456 456 /// merge two copies-mapping together, minor and major
457 457 ///
458 458 /// In case of conflict, value from "major" will be picked, unless in some
459 459 /// cases. See inline documentation for details.
460 460 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
461 461 minor: TimeStampedPathCopies,
462 462 major: TimeStampedPathCopies,
463 463 changes: &ChangedFiles,
464 464 oracle: &mut AncestorOracle<A>,
465 465 ) -> TimeStampedPathCopies {
466 // This closure exist as temporary help while multiple developper are
467 // actively working on this code. Feel free to re-inline it once this
468 // code is more settled.
469 let mut cmp_value =
470 |dest: &HgPathBuf,
471 src_minor: &TimeStampedPathCopy,
472 src_major: &TimeStampedPathCopy| {
473 compare_value(changes, oracle, dest, src_minor, src_major)
474 };
466 475 if minor.is_empty() {
467 476 major
468 477 } else if major.is_empty() {
469 478 minor
470 479 } else {
471 480 let mut override_minor = Vec::new();
472 481 let mut override_major = Vec::new();
473 482
474 483 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
475 484 override_major.push((k.clone(), v.clone()))
476 485 };
477 486 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
478 487 override_minor.push((k.clone(), v.clone()))
479 488 };
480 489
481 490 // The diff function leverage detection of the identical subpart if
482 491 // minor and major has some common ancestors. This make it very
483 492 // fast is most case.
484 493 //
485 494 // In case where the two map are vastly different in size, the current
486 495 // approach is still slowish because the iteration will iterate over
487 496 // all the "exclusive" content of the larger on. This situation can be
488 497 // frequent when the subgraph of revision we are processing has a lot
489 498 // of roots. Each roots adding they own fully new map to the mix (and
490 499 // likely a small map, if the path from the root to the "main path" is
491 500 // small.
492 501 //
493 502 // We could do better by detecting such situation and processing them
494 503 // differently.
495 504 for d in minor.diff(&major) {
496 505 match d {
497 506 DiffItem::Add(k, v) => to_minor(k, v),
498 507 DiffItem::Remove(k, v) => to_major(k, v),
499 508 DiffItem::Update { old, new } => {
500 509 let (dest, src_major) = new;
501 510 let (_, src_minor) = old;
502 match compare_value(
503 changes, oracle, dest, src_minor, src_major,
504 ) {
511 match cmp_value(dest, src_minor, src_major) {
505 512 MergePick::Major => to_minor(dest, src_major),
506 513 MergePick::Minor => to_major(dest, src_minor),
507 514 // If the two entry are identical, no need to do
508 515 // anything (but diff should not have yield them)
509 516 MergePick::Any => unreachable!(),
510 517 }
511 518 }
512 519 };
513 520 }
514 521
515 522 let updates;
516 523 let mut result;
517 524 if override_major.is_empty() {
518 525 result = major
519 526 } else if override_minor.is_empty() {
520 527 result = minor
521 528 } else {
522 529 if override_minor.len() < override_major.len() {
523 530 updates = override_minor;
524 531 result = minor;
525 532 } else {
526 533 updates = override_major;
527 534 result = major;
528 535 }
529 536 for (k, v) in updates {
530 537 result.insert(k, v);
531 538 }
532 539 }
533 540 result
534 541 }
535 542 }
536 543
537 544 /// represent the side that should prevail when merging two
538 545 /// TimeStampedPathCopies
539 546 enum MergePick {
540 547 /// The "major" (p1) side prevails
541 548 Major,
542 549 /// The "minor" (p2) side prevails
543 550 Minor,
544 551 /// Any side could be used (because they are the same)
545 552 Any,
546 553 }
547 554
548 555 /// decide which side prevails in case of conflicting values
549 556 #[allow(clippy::if_same_then_else)]
550 557 fn compare_value<A: Fn(Revision, Revision) -> bool>(
551 558 changes: &ChangedFiles,
552 559 oracle: &mut AncestorOracle<A>,
553 560 dest: &HgPathBuf,
554 561 src_minor: &TimeStampedPathCopy,
555 562 src_major: &TimeStampedPathCopy,
556 563 ) -> MergePick {
557 564 if src_major.path == src_minor.path {
558 565 // we have the same value, but from other source;
559 566 if src_major.rev == src_minor.rev {
560 567 // If the two entry are identical, they are both valid
561 568 MergePick::Any
562 569 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
563 570 MergePick::Minor
564 571 } else {
565 572 MergePick::Major
566 573 }
567 574 } else if src_major.rev == src_minor.rev {
568 575 // We cannot get copy information for both p1 and p2 in the
569 576 // same rev. So this is the same value.
570 577 unreachable!(
571 578 "conflict information from p1 and p2 in the same revision"
572 579 );
573 580 } else {
574 581 let action = changes.get_merge_case(&dest);
575 582 if src_major.path.is_none() && action == MergeCase::Salvaged {
576 583 // If the file is "deleted" in the major side but was
577 584 // salvaged by the merge, we keep the minor side alive
578 585 MergePick::Minor
579 586 } else if src_minor.path.is_none() && action == MergeCase::Salvaged {
580 587 // If the file is "deleted" in the minor side but was
581 588 // salvaged by the merge, unconditionnaly preserve the
582 589 // major side.
583 590 MergePick::Major
584 591 } else if action == MergeCase::Merged {
585 592 // If the file was actively merged, copy information
586 593 // from each side might conflict. The major side will
587 594 // win such conflict.
588 595 MergePick::Major
589 596 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
590 597 // If the minor side is strictly newer than the major
591 598 // side, it should be kept.
592 599 MergePick::Minor
593 600 } else if src_major.path.is_some() {
594 601 // without any special case, the "major" value win
595 602 // other the "minor" one.
596 603 MergePick::Major
597 604 } else if oracle.is_ancestor(src_minor.rev, src_major.rev) {
598 605 // the "major" rev is a direct ancestors of "minor",
599 606 // any different value should
600 607 // overwrite
601 608 MergePick::Major
602 609 } else {
603 610 // major version is None (so the file was deleted on
604 611 // that branch) and that branch is independant (neither
605 612 // minor nor major is an ancestors of the other one.)
606 613 // We preserve the new
607 614 // information about the new file.
608 615 MergePick::Minor
609 616 }
610 617 }
611 618 }
General Comments 0
You need to be logged in to leave comments. Login now