##// END OF EJS Templates
copies-rust: extract generic map merge logic from merge_copies_dict...
Simon Sapin -
r47328:435d9fc7 default
parent child Browse files
Show More
@@ -1,933 +1,762 b''
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 use im_rc::ordmap::DiffItem;
7 6 use im_rc::ordmap::Entry;
8 7 use im_rc::ordmap::OrdMap;
9 8 use im_rc::OrdSet;
10 9
11 10 use std::cmp::Ordering;
12 11 use std::collections::HashMap;
13 12 use std::convert::TryInto;
14 13
15 14 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
16 15
17 16 type PathToken = usize;
18 17
19 18 #[derive(Clone, Debug)]
20 19 struct CopySource {
21 20 /// revision at which the copy information was added
22 21 rev: Revision,
23 22 /// the copy source, (Set to None in case of deletion of the associated
24 23 /// key)
25 24 path: Option<PathToken>,
26 25 /// a set of previous `CopySource.rev` value directly or indirectly
27 26 /// overwritten by this one.
28 27 overwritten: OrdSet<Revision>,
29 28 }
30 29
31 30 impl CopySource {
32 31 /// create a new CopySource
33 32 ///
34 33 /// Use this when no previous copy source existed.
35 34 fn new(rev: Revision, path: Option<PathToken>) -> Self {
36 35 Self {
37 36 rev,
38 37 path,
39 38 overwritten: OrdSet::new(),
40 39 }
41 40 }
42 41
43 42 /// create a new CopySource from merging two others
44 43 ///
45 44 /// Use this when merging two InternalPathCopies requires active merging of
46 45 /// some entries.
47 46 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
48 47 let mut overwritten = OrdSet::new();
49 48 overwritten.extend(winner.overwritten.iter().copied());
50 49 overwritten.extend(loser.overwritten.iter().copied());
51 50 overwritten.insert(winner.rev);
52 51 overwritten.insert(loser.rev);
53 52 Self {
54 53 rev,
55 54 path: winner.path,
56 55 overwritten: overwritten,
57 56 }
58 57 }
59 58
60 59 /// Update the value of a pre-existing CopySource
61 60 ///
62 61 /// Use this when recording copy information from parent → child edges
63 62 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
64 63 self.overwritten.insert(self.rev);
65 64 self.rev = rev;
66 65 self.path = path;
67 66 }
68 67
69 68 /// Mark pre-existing copy information as "dropped" by a file deletion
70 69 ///
71 70 /// Use this when recording copy information from parent → child edges
72 71 fn mark_delete(&mut self, rev: Revision) {
73 72 self.overwritten.insert(self.rev);
74 73 self.rev = rev;
75 74 self.path = None;
76 75 }
77 76
78 77 /// Mark pre-existing copy information as "dropped" by a file deletion
79 78 ///
80 79 /// Use this when recording copy information from parent → child edges
81 80 fn mark_delete_with_pair(&mut self, rev: Revision, other: &Self) {
82 81 self.overwritten.insert(self.rev);
83 82 if other.rev != rev {
84 83 self.overwritten.insert(other.rev);
85 84 }
86 85 self.overwritten.extend(other.overwritten.iter().copied());
87 86 self.rev = rev;
88 87 self.path = None;
89 88 }
90 89
91 90 fn is_overwritten_by(&self, other: &Self) -> bool {
92 91 other.overwritten.contains(&self.rev)
93 92 }
94 93 }
95 94
96 95 // For the same "dest", content generated for a given revision will always be
97 96 // the same.
98 97 impl PartialEq for CopySource {
99 98 fn eq(&self, other: &Self) -> bool {
100 99 #[cfg(debug_assertions)]
101 100 {
102 101 if self.rev == other.rev {
103 102 debug_assert!(self.path == other.path);
104 103 debug_assert!(self.overwritten == other.overwritten);
105 104 }
106 105 }
107 106 self.rev == other.rev
108 107 }
109 108 }
110 109
111 110 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
112 111 type InternalPathCopies = OrdMap<PathToken, CopySource>;
113 112
114 113 /// hold parent 1, parent 2 and relevant files actions.
115 114 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
116 115
117 116 /// represent the files affected by a changesets
118 117 ///
119 118 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
120 119 /// all the data categories tracked by it.
121 120 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
122 121 /// all the data categories tracked by it.
123 122 pub struct ChangedFiles<'a> {
124 123 nb_items: u32,
125 124 index: &'a [u8],
126 125 data: &'a [u8],
127 126 }
128 127
129 128 /// Represent active changes that affect the copy tracing.
130 129 enum Action<'a> {
131 130 /// The parent ? children edge is removing a file
132 131 ///
133 132 /// (actually, this could be the edge from the other parent, but it does
134 133 /// not matters)
135 134 Removed(&'a HgPath),
136 135 /// The parent ? children edge introduce copy information between (dest,
137 136 /// source)
138 137 CopiedFromP1(&'a HgPath, &'a HgPath),
139 138 CopiedFromP2(&'a HgPath, &'a HgPath),
140 139 }
141 140
142 141 /// This express the possible "special" case we can get in a merge
143 142 ///
144 143 /// See mercurial/metadata.py for details on these values.
145 144 #[derive(PartialEq)]
146 145 enum MergeCase {
147 146 /// Merged: file had history on both side that needed to be merged
148 147 Merged,
149 148 /// Salvaged: file was candidate for deletion, but survived the merge
150 149 Salvaged,
151 150 /// Normal: Not one of the two cases above
152 151 Normal,
153 152 }
154 153
155 154 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
156 155
157 156 const EMPTY: &[u8] = b"";
158 157 const COPY_MASK: u8 = 3;
159 158 const P1_COPY: u8 = 2;
160 159 const P2_COPY: u8 = 3;
161 160 const ACTION_MASK: u8 = 28;
162 161 const REMOVED: u8 = 12;
163 162 const MERGED: u8 = 8;
164 163 const SALVAGED: u8 = 16;
165 164
166 165 impl<'a> ChangedFiles<'a> {
167 166 const INDEX_START: usize = 4;
168 167 const ENTRY_SIZE: u32 = 9;
169 168 const FILENAME_START: u32 = 1;
170 169 const COPY_SOURCE_START: u32 = 5;
171 170
172 171 pub fn new(data: &'a [u8]) -> Self {
173 172 assert!(
174 173 data.len() >= 4,
175 174 "data size ({}) is too small to contain the header (4)",
176 175 data.len()
177 176 );
178 177 let nb_items_raw: [u8; 4] = (&data[0..=3])
179 178 .try_into()
180 179 .expect("failed to turn 4 bytes into 4 bytes");
181 180 let nb_items = u32::from_be_bytes(nb_items_raw);
182 181
183 182 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
184 183 let index_end = Self::INDEX_START + index_size;
185 184
186 185 assert!(
187 186 data.len() >= index_end,
188 187 "data size ({}) is too small to fit the index_data ({})",
189 188 data.len(),
190 189 index_end
191 190 );
192 191
193 192 let ret = ChangedFiles {
194 193 nb_items,
195 194 index: &data[Self::INDEX_START..index_end],
196 195 data: &data[index_end..],
197 196 };
198 197 let max_data = ret.filename_end(nb_items - 1) as usize;
199 198 assert!(
200 199 ret.data.len() >= max_data,
201 200 "data size ({}) is too small to fit all data ({})",
202 201 data.len(),
203 202 index_end + max_data
204 203 );
205 204 ret
206 205 }
207 206
208 207 pub fn new_empty() -> Self {
209 208 ChangedFiles {
210 209 nb_items: 0,
211 210 index: EMPTY,
212 211 data: EMPTY,
213 212 }
214 213 }
215 214
216 215 /// internal function to return an individual entry at a given index
217 216 fn entry(&'a self, idx: u32) -> FileChange<'a> {
218 217 if idx >= self.nb_items {
219 218 panic!(
220 219 "index for entry is higher that the number of file {} >= {}",
221 220 idx, self.nb_items
222 221 )
223 222 }
224 223 let flags = self.flags(idx);
225 224 let filename = self.filename(idx);
226 225 let copy_idx = self.copy_idx(idx);
227 226 let copy_source = self.filename(copy_idx);
228 227 (flags, filename, copy_source)
229 228 }
230 229
231 230 /// internal function to return the filename of the entry at a given index
232 231 fn filename(&self, idx: u32) -> &HgPath {
233 232 let filename_start;
234 233 if idx == 0 {
235 234 filename_start = 0;
236 235 } else {
237 236 filename_start = self.filename_end(idx - 1)
238 237 }
239 238 let filename_end = self.filename_end(idx);
240 239 let filename_start = filename_start as usize;
241 240 let filename_end = filename_end as usize;
242 241 HgPath::new(&self.data[filename_start..filename_end])
243 242 }
244 243
245 244 /// internal function to return the flag field of the entry at a given
246 245 /// index
247 246 fn flags(&self, idx: u32) -> u8 {
248 247 let idx = idx as usize;
249 248 self.index[idx * (Self::ENTRY_SIZE as usize)]
250 249 }
251 250
252 251 /// internal function to return the end of a filename part at a given index
253 252 fn filename_end(&self, idx: u32) -> u32 {
254 253 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
255 254 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
256 255 let start = start as usize;
257 256 let end = end as usize;
258 257 let raw = (&self.index[start..end])
259 258 .try_into()
260 259 .expect("failed to turn 4 bytes into 4 bytes");
261 260 u32::from_be_bytes(raw)
262 261 }
263 262
264 263 /// internal function to return index of the copy source of the entry at a
265 264 /// given index
266 265 fn copy_idx(&self, idx: u32) -> u32 {
267 266 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
268 267 let end = (idx + 1) * Self::ENTRY_SIZE;
269 268 let start = start as usize;
270 269 let end = end as usize;
271 270 let raw = (&self.index[start..end])
272 271 .try_into()
273 272 .expect("failed to turn 4 bytes into 4 bytes");
274 273 u32::from_be_bytes(raw)
275 274 }
276 275
277 276 /// Return an iterator over all the `Action` in this instance.
278 277 fn iter_actions(&self) -> ActionsIterator {
279 278 ActionsIterator {
280 279 changes: &self,
281 280 current: 0,
282 281 }
283 282 }
284 283
285 284 /// return the MergeCase value associated with a filename
286 285 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
287 286 if self.nb_items == 0 {
288 287 return MergeCase::Normal;
289 288 }
290 289 let mut low_part = 0;
291 290 let mut high_part = self.nb_items;
292 291
293 292 while low_part < high_part {
294 293 let cursor = (low_part + high_part - 1) / 2;
295 294 let (flags, filename, _source) = self.entry(cursor);
296 295 match path.cmp(filename) {
297 296 Ordering::Less => low_part = cursor + 1,
298 297 Ordering::Greater => high_part = cursor,
299 298 Ordering::Equal => {
300 299 return match flags & ACTION_MASK {
301 300 MERGED => MergeCase::Merged,
302 301 SALVAGED => MergeCase::Salvaged,
303 302 _ => MergeCase::Normal,
304 303 };
305 304 }
306 305 }
307 306 }
308 307 MergeCase::Normal
309 308 }
310 309 }
311 310
312 311 struct ActionsIterator<'a> {
313 312 changes: &'a ChangedFiles<'a>,
314 313 current: u32,
315 314 }
316 315
317 316 impl<'a> Iterator for ActionsIterator<'a> {
318 317 type Item = Action<'a>;
319 318
320 319 fn next(&mut self) -> Option<Action<'a>> {
321 320 while self.current < self.changes.nb_items {
322 321 let (flags, file, source) = self.changes.entry(self.current);
323 322 self.current += 1;
324 323 if (flags & ACTION_MASK) == REMOVED {
325 324 return Some(Action::Removed(file));
326 325 }
327 326 let copy = flags & COPY_MASK;
328 327 if copy == P1_COPY {
329 328 return Some(Action::CopiedFromP1(file, source));
330 329 } else if copy == P2_COPY {
331 330 return Some(Action::CopiedFromP2(file, source));
332 331 }
333 332 }
334 333 return None;
335 334 }
336 335 }
337 336
338 337 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
339 338 /// ChangedFiles
340 339 ///
341 340 /// It is passed to the RevInfoMaker callback who can assign any necessary
342 341 /// content to the `data` attribute. The copy tracing code is responsible for
343 342 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
344 343 pub struct DataHolder<D> {
345 344 /// RevInfoMaker callback should assign data referenced by the
346 345 /// ChangedFiles struct it return to this attribute. The DataHolder
347 346 /// lifetime will be at least as long as the ChangedFiles one.
348 347 pub data: Option<D>,
349 348 }
350 349
351 350 pub type RevInfoMaker<'a, D> =
352 351 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
353 352
354 353 /// A small "tokenizer" responsible of turning full HgPath into lighter
355 354 /// PathToken
356 355 ///
357 356 /// Dealing with small object, like integer is much faster, so HgPath input are
358 357 /// turned into integer "PathToken" and converted back in the end.
359 358 #[derive(Clone, Debug, Default)]
360 359 struct TwoWayPathMap {
361 360 token: HashMap<HgPathBuf, PathToken>,
362 361 path: Vec<HgPathBuf>,
363 362 }
364 363
365 364 impl TwoWayPathMap {
366 365 fn tokenize(&mut self, path: &HgPath) -> PathToken {
367 366 match self.token.get(path) {
368 367 Some(a) => *a,
369 368 None => {
370 369 let a = self.token.len();
371 370 let buf = path.to_owned();
372 371 self.path.push(buf.clone());
373 372 self.token.insert(buf, a);
374 373 a
375 374 }
376 375 }
377 376 }
378 377
379 378 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
380 379 assert!(token < self.path.len(), format!("Unknown token: {}", token));
381 380 &self.path[token]
382 381 }
383 382 }
384 383
385 384 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
386 385 ///
387 386 /// Arguments are:
388 387 ///
389 388 /// revs: all revisions to be considered
390 389 /// children: a {parent ? [childrens]} mapping
391 390 /// target_rev: the final revision we are combining copies to
392 391 /// rev_info(rev): callback to get revision information:
393 392 /// * first parent
394 393 /// * second parent
395 394 /// * ChangedFiles
396 395 /// isancestors(low_rev, high_rev): callback to check if a revision is an
397 396 /// ancestor of another
398 397 pub fn combine_changeset_copies<D>(
399 398 revs: Vec<Revision>,
400 399 mut children_count: HashMap<Revision, usize>,
401 400 target_rev: Revision,
402 401 rev_info: RevInfoMaker<D>,
403 402 ) -> PathCopies {
404 403 let mut all_copies = HashMap::new();
405 404
406 405 let mut path_map = TwoWayPathMap::default();
407 406
408 407 for rev in revs {
409 408 let mut d: DataHolder<D> = DataHolder { data: None };
410 409 let (p1, p2, changes) = rev_info(rev, &mut d);
411 410
412 411 // We will chain the copies information accumulated for the parent with
413 412 // the individual copies information the curent revision. Creating a
414 413 // new TimeStampedPath for each `rev` → `children` vertex.
415 414 // Retrieve data computed in a previous iteration
416 415 let p1_copies = match p1 {
417 416 NULL_REVISION => None,
418 417 _ => get_and_clean_parent_copies(
419 418 &mut all_copies,
420 419 &mut children_count,
421 420 p1,
422 421 ), // will be None if the vertex is not to be traversed
423 422 };
424 423 let p2_copies = match p2 {
425 424 NULL_REVISION => None,
426 425 _ => get_and_clean_parent_copies(
427 426 &mut all_copies,
428 427 &mut children_count,
429 428 p2,
430 429 ), // will be None if the vertex is not to be traversed
431 430 };
432 431 // combine it with data for that revision
433 432 let (p1_copies, p2_copies) =
434 433 chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev);
435 434 let copies = match (p1_copies, p2_copies) {
436 435 (None, None) => None,
437 436 (c, None) => c,
438 437 (None, c) => c,
439 438 (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
440 439 &path_map, rev, p2_copies, p1_copies, &changes,
441 440 )),
442 441 };
443 442 if let Some(c) = copies {
444 443 all_copies.insert(rev, c);
445 444 }
446 445 }
447 446
448 447 // Drop internal information (like the timestamp) and return the final
449 448 // mapping.
450 449 let tt_result = all_copies
451 450 .remove(&target_rev)
452 451 .expect("target revision was not processed");
453 452 let mut result = PathCopies::default();
454 453 for (dest, tt_source) in tt_result {
455 454 if let Some(path) = tt_source.path {
456 455 let path_dest = path_map.untokenize(dest).to_owned();
457 456 let path_path = path_map.untokenize(path).to_owned();
458 457 result.insert(path_dest, path_path);
459 458 }
460 459 }
461 460 result
462 461 }
463 462
464 463 /// fetch previous computed information
465 464 ///
466 465 /// If no other children are expected to need this information, we drop it from
467 466 /// the cache.
468 467 ///
469 468 /// If parent is not part of the set we are expected to walk, return None.
470 469 fn get_and_clean_parent_copies(
471 470 all_copies: &mut HashMap<Revision, InternalPathCopies>,
472 471 children_count: &mut HashMap<Revision, usize>,
473 472 parent_rev: Revision,
474 473 ) -> Option<InternalPathCopies> {
475 474 let count = children_count.get_mut(&parent_rev)?;
476 475 *count -= 1;
477 476 if *count == 0 {
478 477 match all_copies.remove(&parent_rev) {
479 478 Some(c) => Some(c),
480 479 None => Some(InternalPathCopies::default()),
481 480 }
482 481 } else {
483 482 match all_copies.get(&parent_rev) {
484 483 Some(c) => Some(c.clone()),
485 484 None => Some(InternalPathCopies::default()),
486 485 }
487 486 }
488 487 }
489 488
490 489 /// Combine ChangedFiles with some existing PathCopies information and return
491 490 /// the result
492 491 fn chain_changes(
493 492 path_map: &mut TwoWayPathMap,
494 493 base_p1_copies: Option<InternalPathCopies>,
495 494 base_p2_copies: Option<InternalPathCopies>,
496 495 changes: &ChangedFiles,
497 496 current_rev: Revision,
498 497 ) -> (Option<InternalPathCopies>, Option<InternalPathCopies>) {
499 498 // Fast path the "nothing to do" case.
500 499 if let (None, None) = (&base_p1_copies, &base_p2_copies) {
501 500 return (None, None);
502 501 }
503 502
504 503 let mut p1_copies = base_p1_copies.clone();
505 504 let mut p2_copies = base_p2_copies.clone();
506 505 for action in changes.iter_actions() {
507 506 match action {
508 507 Action::CopiedFromP1(path_dest, path_source) => {
509 508 match &mut p1_copies {
510 509 None => (), // This is not a vertex we should proceed.
511 510 Some(copies) => add_one_copy(
512 511 current_rev,
513 512 path_map,
514 513 copies,
515 514 base_p1_copies.as_ref().unwrap(),
516 515 path_dest,
517 516 path_source,
518 517 ),
519 518 }
520 519 }
521 520 Action::CopiedFromP2(path_dest, path_source) => {
522 521 match &mut p2_copies {
523 522 None => (), // This is not a vertex we should proceed.
524 523 Some(copies) => add_one_copy(
525 524 current_rev,
526 525 path_map,
527 526 copies,
528 527 base_p2_copies.as_ref().unwrap(),
529 528 path_dest,
530 529 path_source,
531 530 ),
532 531 }
533 532 }
534 533 Action::Removed(deleted_path) => {
535 534 // We must drop copy information for removed file.
536 535 //
537 536 // We need to explicitly record them as dropped to
538 537 // propagate this information when merging two
539 538 // InternalPathCopies object.
540 539 let deleted = path_map.tokenize(deleted_path);
541 540
542 541 let p1_entry = match &mut p1_copies {
543 542 None => None,
544 543 Some(copies) => match copies.entry(deleted) {
545 544 Entry::Occupied(e) => Some(e),
546 545 Entry::Vacant(_) => None,
547 546 },
548 547 };
549 548 let p2_entry = match &mut p2_copies {
550 549 None => None,
551 550 Some(copies) => match copies.entry(deleted) {
552 551 Entry::Occupied(e) => Some(e),
553 552 Entry::Vacant(_) => None,
554 553 },
555 554 };
556 555
557 556 match (p1_entry, p2_entry) {
558 557 (None, None) => (),
559 558 (Some(mut e), None) => {
560 559 e.get_mut().mark_delete(current_rev)
561 560 }
562 561 (None, Some(mut e)) => {
563 562 e.get_mut().mark_delete(current_rev)
564 563 }
565 564 (Some(mut e1), Some(mut e2)) => {
566 565 let cs1 = e1.get_mut();
567 566 let cs2 = e2.get();
568 567 if cs1 == cs2 {
569 568 cs1.mark_delete(current_rev);
570 569 } else {
571 570 cs1.mark_delete_with_pair(current_rev, &cs2);
572 571 }
573 572 e2.insert(cs1.clone());
574 573 }
575 574 }
576 575 }
577 576 }
578 577 }
579 578 (p1_copies, p2_copies)
580 579 }
581 580
582 581 // insert one new copy information in an InternalPathCopies
583 582 //
584 583 // This deal with chaining and overwrite.
585 584 fn add_one_copy(
586 585 current_rev: Revision,
587 586 path_map: &mut TwoWayPathMap,
588 587 copies: &mut InternalPathCopies,
589 588 base_copies: &InternalPathCopies,
590 589 path_dest: &HgPath,
591 590 path_source: &HgPath,
592 591 ) {
593 592 let dest = path_map.tokenize(path_dest);
594 593 let source = path_map.tokenize(path_source);
595 594 let entry;
596 595 if let Some(v) = base_copies.get(&source) {
597 596 entry = match &v.path {
598 597 Some(path) => Some((*(path)).to_owned()),
599 598 None => Some(source.to_owned()),
600 599 }
601 600 } else {
602 601 entry = Some(source.to_owned());
603 602 }
604 603 // Each new entry is introduced by the children, we
605 604 // record this information as we will need it to take
606 605 // the right decision when merging conflicting copy
607 606 // information. See merge_copies_dict for details.
608 607 match copies.entry(dest) {
609 608 Entry::Vacant(slot) => {
610 609 let ttpc = CopySource::new(current_rev, entry);
611 610 slot.insert(ttpc);
612 611 }
613 612 Entry::Occupied(mut slot) => {
614 613 let ttpc = slot.get_mut();
615 614 ttpc.overwrite(current_rev, entry);
616 615 }
617 616 }
618 617 }
619 618
620 619 /// merge two copies-mapping together, minor and major
621 620 ///
622 621 /// In case of conflict, value from "major" will be picked, unless in some
623 622 /// cases. See inline documentation for details.
624 623 fn merge_copies_dict(
625 624 path_map: &TwoWayPathMap,
626 625 current_merge: Revision,
627 mut minor: InternalPathCopies,
628 mut major: InternalPathCopies,
626 minor: InternalPathCopies,
627 major: InternalPathCopies,
629 628 changes: &ChangedFiles,
630 629 ) -> InternalPathCopies {
631 // This closure exist as temporary help while multiple developper are
632 // actively working on this code. Feel free to re-inline it once this
633 // code is more settled.
634 let cmp_value =
635 |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
636 compare_value(
637 path_map,
630 use crate::utils::{ordmap_union_with_merge, MergeResult};
631
632 ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| {
633 let (pick, overwrite) = compare_value(
634 path_map,
635 current_merge,
636 changes,
637 dest,
638 src_minor,
639 src_major,
640 );
641 if overwrite {
642 let (winner, loser) = match pick {
643 MergePick::Major | MergePick::Any => (src_major, src_minor),
644 MergePick::Minor => (src_minor, src_major),
645 };
646 MergeResult::UseNewValue(CopySource::new_from_merge(
638 647 current_merge,
639 changes,
640 dest,
641 src_minor,
642 src_major,
643 )
644 };
645 if minor.is_empty() {
646 major
647 } else if major.is_empty() {
648 minor
649 } else if minor.len() * 2 < major.len() {
650 // Lets says we are merging two InternalPathCopies instance A and B.
651 //
652 // If A contains N items, the merge result will never contains more
653 // than N values differents than the one in A
654 //
655 // If B contains M items, with M > N, the merge result will always
656 // result in a minimum of M - N value differents than the on in
657 // A
658 //
659 // As a result, if N < (M-N), we know that simply iterating over A will
660 // yield less difference than iterating over the difference
661 // between A and B.
662 //
663 // This help performance a lot in case were a tiny
664 // InternalPathCopies is merged with a much larger one.
665 for (dest, src_minor) in minor {
666 let src_major = major.get(&dest);
667 match src_major {
668 None => {
669 major.insert(dest, src_minor);
670 }
671 Some(src_major) => {
672 let (pick, overwrite) =
673 cmp_value(&dest, &src_minor, src_major);
674 if overwrite {
675 let src = match pick {
676 MergePick::Major => CopySource::new_from_merge(
677 current_merge,
678 src_major,
679 &src_minor,
680 ),
681 MergePick::Minor => CopySource::new_from_merge(
682 current_merge,
683 &src_minor,
684 src_major,
685 ),
686 MergePick::Any => CopySource::new_from_merge(
687 current_merge,
688 src_major,
689 &src_minor,
690 ),
691 };
692 major.insert(dest, src);
693 } else {
694 match pick {
695 MergePick::Any | MergePick::Major => None,
696 MergePick::Minor => major.insert(dest, src_minor),
697 };
698 }
699 }
700 };
701 }
702 major
703 } else if major.len() * 2 < minor.len() {
704 // This use the same rational than the previous block.
705 // (Check previous block documentation for details.)
706 for (dest, src_major) in major {
707 let src_minor = minor.get(&dest);
708 match src_minor {
709 None => {
710 minor.insert(dest, src_major);
648 winner,
649 loser,
650 ))
651 } else {
652 match pick {
653 MergePick::Any | MergePick::Major => {
654 MergeResult::UseRightValue
711 655 }
712 Some(src_minor) => {
713 let (pick, overwrite) =
714 cmp_value(&dest, src_minor, &src_major);
715 if overwrite {
716 let src = match pick {
717 MergePick::Major => CopySource::new_from_merge(
718 current_merge,
719 &src_major,
720 src_minor,
721 ),
722 MergePick::Minor => CopySource::new_from_merge(
723 current_merge,
724 src_minor,
725 &src_major,
726 ),
727 MergePick::Any => CopySource::new_from_merge(
728 current_merge,
729 &src_major,
730 src_minor,
731 ),
732 };
733 minor.insert(dest, src);
734 } else {
735 match pick {
736 MergePick::Any | MergePick::Minor => None,
737 MergePick::Major => minor.insert(dest, src_major),
738 };
739 }
740 }
741 };
742 }
743 minor
744 } else {
745 let mut override_minor = Vec::new();
746 let mut override_major = Vec::new();
747
748 let mut to_major = |k: &PathToken, v: &CopySource| {
749 override_major.push((k.clone(), v.clone()))
750 };
751 let mut to_minor = |k: &PathToken, v: &CopySource| {
752 override_minor.push((k.clone(), v.clone()))
753 };
754
755 // The diff function leverage detection of the identical subpart if
756 // minor and major has some common ancestors. This make it very
757 // fast is most case.
758 //
759 // In case where the two map are vastly different in size, the current
760 // approach is still slowish because the iteration will iterate over
761 // all the "exclusive" content of the larger on. This situation can be
762 // frequent when the subgraph of revision we are processing has a lot
763 // of roots. Each roots adding they own fully new map to the mix (and
764 // likely a small map, if the path from the root to the "main path" is
765 // small.
766 //
767 // We could do better by detecting such situation and processing them
768 // differently.
769 for d in minor.diff(&major) {
770 match d {
771 DiffItem::Add(k, v) => to_minor(k, v),
772 DiffItem::Remove(k, v) => to_major(k, v),
773 DiffItem::Update { old, new } => {
774 let (dest, src_major) = new;
775 let (_, src_minor) = old;
776 let (pick, overwrite) =
777 cmp_value(dest, src_minor, src_major);
778 if overwrite {
779 let src = match pick {
780 MergePick::Major => CopySource::new_from_merge(
781 current_merge,
782 src_major,
783 src_minor,
784 ),
785 MergePick::Minor => CopySource::new_from_merge(
786 current_merge,
787 src_minor,
788 src_major,
789 ),
790 MergePick::Any => CopySource::new_from_merge(
791 current_merge,
792 src_major,
793 src_minor,
794 ),
795 };
796 to_minor(dest, &src);
797 to_major(dest, &src);
798 } else {
799 match pick {
800 MergePick::Major => to_minor(dest, src_major),
801 MergePick::Minor => to_major(dest, src_minor),
802 // If the two entry are identical, no need to do
803 // anything (but diff should not have yield them)
804 MergePick::Any => unreachable!(),
805 }
806 }
807 }
808 };
809 }
810
811 let updates;
812 let mut result;
813 if override_major.is_empty() {
814 result = major
815 } else if override_minor.is_empty() {
816 result = minor
817 } else {
818 if override_minor.len() < override_major.len() {
819 updates = override_minor;
820 result = minor;
821 } else {
822 updates = override_major;
823 result = major;
824 }
825 for (k, v) in updates {
826 result.insert(k, v);
656 MergePick::Minor => MergeResult::UseLeftValue,
827 657 }
828 658 }
829 result
830 }
659 })
831 660 }
832 661
833 662 /// represent the side that should prevail when merging two
834 663 /// InternalPathCopies
835 664 enum MergePick {
836 665 /// The "major" (p1) side prevails
837 666 Major,
838 667 /// The "minor" (p2) side prevails
839 668 Minor,
840 669 /// Any side could be used (because they are the same)
841 670 Any,
842 671 }
843 672
844 673 /// decide which side prevails in case of conflicting values
845 674 #[allow(clippy::if_same_then_else)]
846 675 fn compare_value(
847 676 path_map: &TwoWayPathMap,
848 677 current_merge: Revision,
849 678 changes: &ChangedFiles,
850 679 dest: &PathToken,
851 680 src_minor: &CopySource,
852 681 src_major: &CopySource,
853 682 ) -> (MergePick, bool) {
854 683 if src_major == src_minor {
855 684 (MergePick::Any, false)
856 685 } else if src_major.rev == current_merge {
857 686 // minor is different according to per minor == major check earlier
858 687 debug_assert!(src_minor.rev != current_merge);
859 688
860 689 // The last value comes the current merge, this value -will- win
861 690 // eventually.
862 691 (MergePick::Major, true)
863 692 } else if src_minor.rev == current_merge {
864 693 // The last value comes the current merge, this value -will- win
865 694 // eventually.
866 695 (MergePick::Minor, true)
867 696 } else if src_major.path == src_minor.path {
868 697 debug_assert!(src_major.rev != src_major.rev);
869 698 // we have the same value, but from other source;
870 699 if src_major.is_overwritten_by(src_minor) {
871 700 (MergePick::Minor, false)
872 701 } else if src_minor.is_overwritten_by(src_major) {
873 702 (MergePick::Major, false)
874 703 } else {
875 704 (MergePick::Any, true)
876 705 }
877 706 } else {
878 707 debug_assert!(src_major.rev != src_major.rev);
879 708 let dest_path = path_map.untokenize(*dest);
880 709 let action = changes.get_merge_case(dest_path);
881 710 if src_minor.path.is_some()
882 711 && src_major.path.is_none()
883 712 && action == MergeCase::Salvaged
884 713 {
885 714 // If the file is "deleted" in the major side but was
886 715 // salvaged by the merge, we keep the minor side alive
887 716 (MergePick::Minor, true)
888 717 } else if src_major.path.is_some()
889 718 && src_minor.path.is_none()
890 719 && action == MergeCase::Salvaged
891 720 {
892 721 // If the file is "deleted" in the minor side but was
893 722 // salvaged by the merge, unconditionnaly preserve the
894 723 // major side.
895 724 (MergePick::Major, true)
896 725 } else if src_minor.is_overwritten_by(src_major) {
897 726 // The information from the minor version are strictly older than
898 727 // the major version
899 728 if action == MergeCase::Merged {
900 729 // If the file was actively merged, its means some non-copy
901 730 // activity happened on the other branch. It
902 731 // mean the older copy information are still relevant.
903 732 //
904 733 // The major side wins such conflict.
905 734 (MergePick::Major, true)
906 735 } else {
907 736 // No activity on the minor branch, pick the newer one.
908 737 (MergePick::Major, false)
909 738 }
910 739 } else if src_major.is_overwritten_by(src_minor) {
911 740 if action == MergeCase::Merged {
912 741 // If the file was actively merged, its means some non-copy
913 742 // activity happened on the other branch. It
914 743 // mean the older copy information are still relevant.
915 744 //
916 745 // The major side wins such conflict.
917 746 (MergePick::Major, true)
918 747 } else {
919 748 // No activity on the minor branch, pick the newer one.
920 749 (MergePick::Minor, false)
921 750 }
922 751 } else if src_minor.path.is_none() {
923 752 // the minor side has no relevant information, pick the alive one
924 753 (MergePick::Major, true)
925 754 } else if src_major.path.is_none() {
926 755 // the major side has no relevant information, pick the alive one
927 756 (MergePick::Minor, true)
928 757 } else {
929 758 // by default the major side wins
930 759 (MergePick::Major, true)
931 760 }
932 761 }
933 762 }
@@ -1,201 +1,351 b''
1 1 // utils module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Contains useful functions, traits, structs, etc. for use in core.
9 9
10 10 use crate::errors::{HgError, IoErrorContext};
11 11 use crate::utils::hg_path::HgPath;
12 use im_rc::ordmap::DiffItem;
13 use im_rc::ordmap::OrdMap;
12 14 use std::{io::Write, ops::Deref};
13 15
14 16 pub mod files;
15 17 pub mod hg_path;
16 18 pub mod path_auditor;
17 19
18 20 /// Useful until rust/issues/56345 is stable
19 21 ///
20 22 /// # Examples
21 23 ///
22 24 /// ```
23 25 /// use crate::hg::utils::find_slice_in_slice;
24 26 ///
25 27 /// let haystack = b"This is the haystack".to_vec();
26 28 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
27 29 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
28 30 /// ```
29 31 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
30 32 where
31 33 for<'a> &'a [T]: PartialEq,
32 34 {
33 35 slice
34 36 .windows(needle.len())
35 37 .position(|window| window == needle)
36 38 }
37 39
38 40 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
39 41 ///
40 42 /// # Examples
41 43 ///
42 44 /// ```
43 45 /// use crate::hg::utils::replace_slice;
44 46 /// let mut line = b"I hate writing tests!".to_vec();
45 47 /// replace_slice(&mut line, b"hate", b"love");
46 48 /// assert_eq!(
47 49 /// line,
48 50 /// b"I love writing tests!".to_vec()
49 51 /// );
50 52 /// ```
51 53 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
52 54 where
53 55 T: Clone + PartialEq,
54 56 {
55 57 if buf.len() < from.len() || from.len() != to.len() {
56 58 return;
57 59 }
58 60 for i in 0..=buf.len() - from.len() {
59 61 if buf[i..].starts_with(from) {
60 62 buf[i..(i + from.len())].clone_from_slice(to);
61 63 }
62 64 }
63 65 }
64 66
65 67 pub trait SliceExt {
66 68 fn trim_end(&self) -> &Self;
67 69 fn trim_start(&self) -> &Self;
68 70 fn trim(&self) -> &Self;
69 71 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
70 72 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
71 73 }
72 74
73 75 #[allow(clippy::trivially_copy_pass_by_ref)]
74 76 fn is_not_whitespace(c: &u8) -> bool {
75 77 !(*c as char).is_whitespace()
76 78 }
77 79
78 80 impl SliceExt for [u8] {
79 81 fn trim_end(&self) -> &[u8] {
80 82 if let Some(last) = self.iter().rposition(is_not_whitespace) {
81 83 &self[..=last]
82 84 } else {
83 85 &[]
84 86 }
85 87 }
86 88 fn trim_start(&self) -> &[u8] {
87 89 if let Some(first) = self.iter().position(is_not_whitespace) {
88 90 &self[first..]
89 91 } else {
90 92 &[]
91 93 }
92 94 }
93 95
94 96 /// ```
95 97 /// use hg::utils::SliceExt;
96 98 /// assert_eq!(
97 99 /// b" to trim ".trim(),
98 100 /// b"to trim"
99 101 /// );
100 102 /// assert_eq!(
101 103 /// b"to trim ".trim(),
102 104 /// b"to trim"
103 105 /// );
104 106 /// assert_eq!(
105 107 /// b" to trim".trim(),
106 108 /// b"to trim"
107 109 /// );
108 110 /// ```
109 111 fn trim(&self) -> &[u8] {
110 112 self.trim_start().trim_end()
111 113 }
112 114
113 115 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
114 116 if self.starts_with(needle) {
115 117 Some(&self[needle.len()..])
116 118 } else {
117 119 None
118 120 }
119 121 }
120 122
121 123 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
122 124 let mut iter = self.splitn(2, |&byte| byte == separator);
123 125 let a = iter.next()?;
124 126 let b = iter.next()?;
125 127 Some((a, b))
126 128 }
127 129 }
128 130
129 131 pub trait Escaped {
130 132 /// Return bytes escaped for display to the user
131 133 fn escaped_bytes(&self) -> Vec<u8>;
132 134 }
133 135
134 136 impl Escaped for u8 {
135 137 fn escaped_bytes(&self) -> Vec<u8> {
136 138 let mut acc = vec![];
137 139 match self {
138 140 c @ b'\'' | c @ b'\\' => {
139 141 acc.push(b'\\');
140 142 acc.push(*c);
141 143 }
142 144 b'\t' => {
143 145 acc.extend(br"\\t");
144 146 }
145 147 b'\n' => {
146 148 acc.extend(br"\\n");
147 149 }
148 150 b'\r' => {
149 151 acc.extend(br"\\r");
150 152 }
151 153 c if (*c < b' ' || *c >= 127) => {
152 154 write!(acc, "\\x{:x}", self).unwrap();
153 155 }
154 156 c => {
155 157 acc.push(*c);
156 158 }
157 159 }
158 160 acc
159 161 }
160 162 }
161 163
162 164 impl<'a, T: Escaped> Escaped for &'a [T] {
163 165 fn escaped_bytes(&self) -> Vec<u8> {
164 166 self.iter().flat_map(Escaped::escaped_bytes).collect()
165 167 }
166 168 }
167 169
168 170 impl<T: Escaped> Escaped for Vec<T> {
169 171 fn escaped_bytes(&self) -> Vec<u8> {
170 172 self.deref().escaped_bytes()
171 173 }
172 174 }
173 175
174 176 impl<'a> Escaped for &'a HgPath {
175 177 fn escaped_bytes(&self) -> Vec<u8> {
176 178 self.as_bytes().escaped_bytes()
177 179 }
178 180 }
179 181
180 182 // TODO: use the str method when we require Rust 1.45
181 183 pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
182 184 if s.ends_with(suffix) {
183 185 Some(&s[..s.len() - suffix.len()])
184 186 } else {
185 187 None
186 188 }
187 189 }
188 190
189 191 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
190 192 std::env::current_dir().map_err(|error| HgError::IoError {
191 193 error,
192 194 context: IoErrorContext::CurrentDir,
193 195 })
194 196 }
195 197
196 198 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
197 199 std::env::current_exe().map_err(|error| HgError::IoError {
198 200 error,
199 201 context: IoErrorContext::CurrentExe,
200 202 })
201 203 }
204
205 pub(crate) enum MergeResult<V> {
206 UseLeftValue,
207 UseRightValue,
208 UseNewValue(V),
209 }
210
211 /// Return the union of the two given maps,
212 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
213 /// both.
214 ///
215 /// CC https://github.com/bodil/im-rs/issues/166
216 pub(crate) fn ordmap_union_with_merge<K, V>(
217 left: OrdMap<K, V>,
218 right: OrdMap<K, V>,
219 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
220 ) -> OrdMap<K, V>
221 where
222 K: Clone + Ord,
223 V: Clone + PartialEq,
224 {
225 if left.ptr_eq(&right) {
226 // One of the two maps is an unmodified clone of the other
227 left
228 } else if left.len() / 2 > right.len() {
229 // When two maps have different sizes,
230 // their size difference is a lower bound on
231 // how many keys of the larger map are not also in the smaller map.
232 // This in turn is a lower bound on the number of differences in
233 // `OrdMap::diff` and the "amount of work" that would be done
234 // by `ordmap_union_with_merge_by_diff`.
235 //
236 // Here `left` is more than twice the size of `right`,
237 // so the number of differences is more than the total size of
238 // `right`. Therefore an algorithm based on iterating `right`
239 // is more efficient.
240 //
241 // This helps a lot when a tiny (or empty) map is merged
242 // with a large one.
243 ordmap_union_with_merge_by_iter(left, right, merge)
244 } else if left.len() < right.len() / 2 {
245 // Same as above but with `left` and `right` swapped
246 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
247 // Also swapped in `merge` arguments:
248 match merge(key, b, a) {
249 MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
250 // … and swap back in `merge` result:
251 MergeResult::UseLeftValue => MergeResult::UseRightValue,
252 MergeResult::UseRightValue => MergeResult::UseLeftValue,
253 }
254 })
255 } else {
256 // For maps of similar size, use the algorithm based on `OrdMap::diff`
257 ordmap_union_with_merge_by_diff(left, right, merge)
258 }
259 }
260
261 /// Efficient if `right` is much smaller than `left`
262 fn ordmap_union_with_merge_by_iter<K, V>(
263 mut left: OrdMap<K, V>,
264 right: OrdMap<K, V>,
265 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
266 ) -> OrdMap<K, V>
267 where
268 K: Clone + Ord,
269 V: Clone,
270 {
271 for (key, right_value) in right {
272 match left.get(&key) {
273 None => {
274 left.insert(key, right_value);
275 }
276 Some(left_value) => match merge(&key, left_value, &right_value) {
277 MergeResult::UseLeftValue => {}
278 MergeResult::UseRightValue => {
279 left.insert(key, right_value);
280 }
281 MergeResult::UseNewValue(new_value) => {
282 left.insert(key, new_value);
283 }
284 },
285 }
286 }
287 left
288 }
289
290 /// Fallback when both maps are of similar size
291 fn ordmap_union_with_merge_by_diff<K, V>(
292 mut left: OrdMap<K, V>,
293 mut right: OrdMap<K, V>,
294 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
295 ) -> OrdMap<K, V>
296 where
297 K: Clone + Ord,
298 V: Clone + PartialEq,
299 {
300 // (key, value) pairs that would need to be inserted in either map
301 // in order to turn it into the union.
302 //
303 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
304 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
305 // with `left_updates` only borrowing from `right` and `right_updates` from
306 // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
307 //
308 // This would allow moving all `.clone()` calls to after we’ve decided
309 // which of `right_updates` or `left_updates` to use
310 // (value ones becoming `Cow::into_owned`),
311 // and avoid making clones we don’t end up using.
312 let mut left_updates = Vec::new();
313 let mut right_updates = Vec::new();
314
315 for difference in left.diff(&right) {
316 match difference {
317 DiffItem::Add(key, value) => {
318 left_updates.push((key.clone(), value.clone()))
319 }
320 DiffItem::Remove(key, value) => {
321 right_updates.push((key.clone(), value.clone()))
322 }
323 DiffItem::Update {
324 old: (key, left_value),
325 new: (_, right_value),
326 } => match merge(key, left_value, right_value) {
327 MergeResult::UseLeftValue => {
328 right_updates.push((key.clone(), left_value.clone()))
329 }
330 MergeResult::UseRightValue => {
331 left_updates.push((key.clone(), right_value.clone()))
332 }
333 MergeResult::UseNewValue(new_value) => {
334 left_updates.push((key.clone(), new_value.clone()));
335 right_updates.push((key.clone(), new_value))
336 }
337 },
338 }
339 }
340 if left_updates.len() < right_updates.len() {
341 for (key, value) in left_updates {
342 left.insert(key, value);
343 }
344 left
345 } else {
346 for (key, value) in right_updates {
347 right.insert(key, value);
348 }
349 right
350 }
351 }
General Comments 0
You need to be logged in to leave comments. Login now