##// END OF EJS Templates
copies-rust: combine the iteration over remove and copies into one...
marmoute -
r46587:ed0e1339 default
parent child Browse files
Show More
@@ -1,354 +1,384 b''
1 use crate::utils::hg_path::HgPath;
1 use crate::utils::hg_path::HgPathBuf;
2 use crate::utils::hg_path::HgPathBuf;
2 use crate::Revision;
3 use crate::Revision;
3
4
4 use im_rc::ordmap::DiffItem;
5 use im_rc::ordmap::DiffItem;
5 use im_rc::ordmap::OrdMap;
6 use im_rc::ordmap::OrdMap;
6
7
7 use std::collections::HashMap;
8 use std::collections::HashMap;
8 use std::collections::HashSet;
9 use std::collections::HashSet;
9
10
10 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
11 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
11
12
12 #[derive(Clone, Debug, PartialEq)]
13 #[derive(Clone, Debug, PartialEq)]
13 struct TimeStampedPathCopy {
14 struct TimeStampedPathCopy {
14 /// revision at which the copy information was added
15 /// revision at which the copy information was added
15 rev: Revision,
16 rev: Revision,
16 /// the copy source, (Set to None in case of deletion of the associated
17 /// the copy source, (Set to None in case of deletion of the associated
17 /// key)
18 /// key)
18 path: Option<HgPathBuf>,
19 path: Option<HgPathBuf>,
19 }
20 }
20
21
21 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
22 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
22 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
23 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
23
24
24 /// hold parent 1, parent 2 and relevant files actions.
25 /// hold parent 1, parent 2 and relevant files actions.
25 pub type RevInfo = (Revision, Revision, ChangedFiles);
26 pub type RevInfo = (Revision, Revision, ChangedFiles);
26
27
27 /// represent the files affected by a changesets
28 /// represent the files affected by a changesets
28 ///
29 ///
29 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
30 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
30 /// all the data categories tracked by it.
31 /// all the data categories tracked by it.
31 pub struct ChangedFiles {
32 pub struct ChangedFiles {
32 removed: HashSet<HgPathBuf>,
33 removed: HashSet<HgPathBuf>,
33 merged: HashSet<HgPathBuf>,
34 merged: HashSet<HgPathBuf>,
34 salvaged: HashSet<HgPathBuf>,
35 salvaged: HashSet<HgPathBuf>,
35 copied_from_p1: PathCopies,
36 copied_from_p1: PathCopies,
36 copied_from_p2: PathCopies,
37 copied_from_p2: PathCopies,
37 }
38 }
38
39
40 /// Represent active changes that affect the copy tracing.
41 enum Action<'a> {
42 /// The parent ? children edge is removing a file
43 ///
44 /// (actually, this could be the edge from the other parent, but it does
45 /// not matters)
46 Removed(&'a HgPath),
47 /// The parent ? children edge introduce copy information between (dest,
48 /// source)
49 Copied(&'a HgPath, &'a HgPath),
50 }
51
39 impl ChangedFiles {
52 impl ChangedFiles {
40 pub fn new(
53 pub fn new(
41 removed: HashSet<HgPathBuf>,
54 removed: HashSet<HgPathBuf>,
42 merged: HashSet<HgPathBuf>,
55 merged: HashSet<HgPathBuf>,
43 salvaged: HashSet<HgPathBuf>,
56 salvaged: HashSet<HgPathBuf>,
44 copied_from_p1: PathCopies,
57 copied_from_p1: PathCopies,
45 copied_from_p2: PathCopies,
58 copied_from_p2: PathCopies,
46 ) -> Self {
59 ) -> Self {
47 ChangedFiles {
60 ChangedFiles {
48 removed,
61 removed,
49 merged,
62 merged,
50 salvaged,
63 salvaged,
51 copied_from_p1,
64 copied_from_p1,
52 copied_from_p2,
65 copied_from_p2,
53 }
66 }
54 }
67 }
55
68
56 pub fn new_empty() -> Self {
69 pub fn new_empty() -> Self {
57 ChangedFiles {
70 ChangedFiles {
58 removed: HashSet::new(),
71 removed: HashSet::new(),
59 merged: HashSet::new(),
72 merged: HashSet::new(),
60 salvaged: HashSet::new(),
73 salvaged: HashSet::new(),
61 copied_from_p1: PathCopies::new(),
74 copied_from_p1: PathCopies::new(),
62 copied_from_p2: PathCopies::new(),
75 copied_from_p2: PathCopies::new(),
63 }
76 }
64 }
77 }
78
79 /// Return an iterator over all the `Action` in this instance.
80 fn iter_actions(&self, parent: usize) -> impl Iterator<Item = Action> {
81 let copies_iter = match parent {
82 1 => self.copied_from_p1.iter(),
83 2 => self.copied_from_p2.iter(),
84 _ => unreachable!(),
85 };
86 let remove_iter = self.removed.iter();
87 let copies_iter = copies_iter.map(|(x, y)| Action::Copied(x, y));
88 let remove_iter = remove_iter.map(|x| Action::Removed(x));
89 copies_iter.chain(remove_iter)
90 }
65 }
91 }
66
92
67 /// A struct responsible for answering "is X ancestors of Y" quickly
93 /// A struct responsible for answering "is X ancestors of Y" quickly
68 ///
94 ///
69 /// The structure will delegate ancestors call to a callback, and cache the
95 /// The structure will delegate ancestors call to a callback, and cache the
70 /// result.
96 /// result.
71 #[derive(Debug)]
97 #[derive(Debug)]
72 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
98 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
73 inner: &'a A,
99 inner: &'a A,
74 pairs: HashMap<(Revision, Revision), bool>,
100 pairs: HashMap<(Revision, Revision), bool>,
75 }
101 }
76
102
77 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
103 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
78 fn new(func: &'a A) -> Self {
104 fn new(func: &'a A) -> Self {
79 Self {
105 Self {
80 inner: func,
106 inner: func,
81 pairs: HashMap::default(),
107 pairs: HashMap::default(),
82 }
108 }
83 }
109 }
84
110
85 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
111 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
86 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
112 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
87 if anc > desc {
113 if anc > desc {
88 false
114 false
89 } else if anc == desc {
115 } else if anc == desc {
90 true
116 true
91 } else {
117 } else {
92 if let Some(b) = self.pairs.get(&(anc, desc)) {
118 if let Some(b) = self.pairs.get(&(anc, desc)) {
93 *b
119 *b
94 } else {
120 } else {
95 let b = (self.inner)(anc, desc);
121 let b = (self.inner)(anc, desc);
96 self.pairs.insert((anc, desc), b);
122 self.pairs.insert((anc, desc), b);
97 b
123 b
98 }
124 }
99 }
125 }
100 }
126 }
101 }
127 }
102
128
103 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
129 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
104 ///
130 ///
105 /// Arguments are:
131 /// Arguments are:
106 ///
132 ///
107 /// revs: all revisions to be considered
133 /// revs: all revisions to be considered
108 /// children: a {parent ? [childrens]} mapping
134 /// children: a {parent ? [childrens]} mapping
109 /// target_rev: the final revision we are combining copies to
135 /// target_rev: the final revision we are combining copies to
110 /// rev_info(rev): callback to get revision information:
136 /// rev_info(rev): callback to get revision information:
111 /// * first parent
137 /// * first parent
112 /// * second parent
138 /// * second parent
113 /// * ChangedFiles
139 /// * ChangedFiles
114 /// isancestors(low_rev, high_rev): callback to check if a revision is an
140 /// isancestors(low_rev, high_rev): callback to check if a revision is an
115 /// ancestor of another
141 /// ancestor of another
116 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool>(
142 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool>(
117 revs: Vec<Revision>,
143 revs: Vec<Revision>,
118 children: HashMap<Revision, Vec<Revision>>,
144 children: HashMap<Revision, Vec<Revision>>,
119 target_rev: Revision,
145 target_rev: Revision,
120 rev_info: &impl Fn(Revision) -> RevInfo,
146 rev_info: &impl Fn(Revision) -> RevInfo,
121 is_ancestor: &A,
147 is_ancestor: &A,
122 ) -> PathCopies {
148 ) -> PathCopies {
123 let mut all_copies = HashMap::new();
149 let mut all_copies = HashMap::new();
124 let mut oracle = AncestorOracle::new(is_ancestor);
150 let mut oracle = AncestorOracle::new(is_ancestor);
125
151
126 for rev in revs {
152 for rev in revs {
127 // Retrieve data computed in a previous iteration
153 // Retrieve data computed in a previous iteration
128 let copies = all_copies.remove(&rev);
154 let copies = all_copies.remove(&rev);
129 let copies = match copies {
155 let copies = match copies {
130 Some(c) => c,
156 Some(c) => c,
131 None => TimeStampedPathCopies::default(), // root of the walked set
157 None => TimeStampedPathCopies::default(), // root of the walked set
132 };
158 };
133
159
134 let current_children = match children.get(&rev) {
160 let current_children = match children.get(&rev) {
135 Some(c) => c,
161 Some(c) => c,
136 None => panic!("inconsistent `revs` and `children`"),
162 None => panic!("inconsistent `revs` and `children`"),
137 };
163 };
138
164
139 for child in current_children {
165 for child in current_children {
140 // We will chain the copies information accumulated for `rev` with
166 // We will chain the copies information accumulated for `rev` with
141 // the individual copies information for each of its children.
167 // the individual copies information for each of its children.
142 // Creating a new PathCopies for each `rev` ? `children` vertex.
168 // Creating a new PathCopies for each `rev` ? `children` vertex.
143 let (p1, p2, changes) = rev_info(*child);
169 let (p1, p2, changes) = rev_info(*child);
144
170
145 let (parent, child_copies) = if rev == p1 {
171 let parent = if rev == p1 {
146 (1, &changes.copied_from_p1)
172 1
147 } else {
173 } else {
148 assert_eq!(rev, p2);
174 assert_eq!(rev, p2);
149 (2, &changes.copied_from_p2)
175 2
150 };
176 };
151 let mut new_copies = copies.clone();
177 let mut new_copies = copies.clone();
152
178
153 for (dest, source) in child_copies {
179 for action in changes.iter_actions(parent) {
154 let entry;
180 match action {
155 if let Some(v) = copies.get(source) {
181 Action::Copied(dest, source) => {
156 entry = match &v.path {
182 let entry;
157 Some(path) => Some((*(path)).to_owned()),
183 if let Some(v) = copies.get(source) {
158 None => Some(source.to_owned()),
184 entry = match &v.path {
185 Some(path) => Some((*(path)).to_owned()),
186 None => Some(source.to_owned()),
187 }
188 } else {
189 entry = Some(source.to_owned());
190 }
191 // Each new entry is introduced by the children, we
192 // record this information as we will need it to take
193 // the right decision when merging conflicting copy
194 // information. See merge_copies_dict for details.
195 let ttpc = TimeStampedPathCopy {
196 rev: *child,
197 path: entry,
198 };
199 new_copies.insert(dest.to_owned(), ttpc);
159 }
200 }
160 } else {
201 Action::Removed(f) => {
161 entry = Some(source.to_owned());
202 // We must drop copy information for removed file.
162 }
203 //
163 // Each new entry is introduced by the children, we record this
204 // We need to explicitly record them as dropped to
164 // information as we will need it to take the right decision
205 // propagate this information when merging two
165 // when merging conflicting copy information. See
206 // TimeStampedPathCopies object.
166 // merge_copies_dict for details.
207 if new_copies.contains_key(f.as_ref()) {
167 let ttpc = TimeStampedPathCopy {
208 let ttpc = TimeStampedPathCopy {
168 rev: *child,
209 rev: *child,
169 path: entry,
210 path: None,
170 };
211 };
171 new_copies.insert(dest.to_owned(), ttpc);
212 new_copies.insert(f.to_owned(), ttpc);
172 }
213 }
173
214 }
174 // We must drop copy information for removed file.
175 //
176 // We need to explicitly record them as dropped to propagate this
177 // information when merging two TimeStampedPathCopies object.
178 for f in changes.removed.iter() {
179 if new_copies.contains_key(f.as_ref()) {
180 let ttpc = TimeStampedPathCopy {
181 rev: *child,
182 path: None,
183 };
184 new_copies.insert(f.to_owned(), ttpc);
185 }
215 }
186 }
216 }
187
217
188 // Merge has two parents needs to combines their copy information.
218 // Merge has two parents needs to combines their copy information.
189 //
219 //
190 // If the vertex from the other parent was already processed, we
220 // If the vertex from the other parent was already processed, we
191 // will have a value for the child ready to be used. We need to
221 // will have a value for the child ready to be used. We need to
192 // grab it and combine it with the one we already
222 // grab it and combine it with the one we already
193 // computed. If not we can simply store the newly
223 // computed. If not we can simply store the newly
194 // computed data. The processing happening at
224 // computed data. The processing happening at
195 // the time of the second parent will take care of combining the
225 // the time of the second parent will take care of combining the
196 // two TimeStampedPathCopies instance.
226 // two TimeStampedPathCopies instance.
197 match all_copies.remove(child) {
227 match all_copies.remove(child) {
198 None => {
228 None => {
199 all_copies.insert(child, new_copies);
229 all_copies.insert(child, new_copies);
200 }
230 }
201 Some(other_copies) => {
231 Some(other_copies) => {
202 let (minor, major) = match parent {
232 let (minor, major) = match parent {
203 1 => (other_copies, new_copies),
233 1 => (other_copies, new_copies),
204 2 => (new_copies, other_copies),
234 2 => (new_copies, other_copies),
205 _ => unreachable!(),
235 _ => unreachable!(),
206 };
236 };
207 let merged_copies =
237 let merged_copies =
208 merge_copies_dict(minor, major, &changes, &mut oracle);
238 merge_copies_dict(minor, major, &changes, &mut oracle);
209 all_copies.insert(child, merged_copies);
239 all_copies.insert(child, merged_copies);
210 }
240 }
211 };
241 };
212 }
242 }
213 }
243 }
214
244
215 // Drop internal information (like the timestamp) and return the final
245 // Drop internal information (like the timestamp) and return the final
216 // mapping.
246 // mapping.
217 let tt_result = all_copies
247 let tt_result = all_copies
218 .remove(&target_rev)
248 .remove(&target_rev)
219 .expect("target revision was not processed");
249 .expect("target revision was not processed");
220 let mut result = PathCopies::default();
250 let mut result = PathCopies::default();
221 for (dest, tt_source) in tt_result {
251 for (dest, tt_source) in tt_result {
222 if let Some(path) = tt_source.path {
252 if let Some(path) = tt_source.path {
223 result.insert(dest, path);
253 result.insert(dest, path);
224 }
254 }
225 }
255 }
226 result
256 result
227 }
257 }
228
258
229 /// merge two copies-mapping together, minor and major
259 /// merge two copies-mapping together, minor and major
230 ///
260 ///
231 /// In case of conflict, value from "major" will be picked, unless in some
261 /// In case of conflict, value from "major" will be picked, unless in some
232 /// cases. See inline documentation for details.
262 /// cases. See inline documentation for details.
233 #[allow(clippy::if_same_then_else)]
263 #[allow(clippy::if_same_then_else)]
234 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
264 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
235 minor: TimeStampedPathCopies,
265 minor: TimeStampedPathCopies,
236 major: TimeStampedPathCopies,
266 major: TimeStampedPathCopies,
237 changes: &ChangedFiles,
267 changes: &ChangedFiles,
238 oracle: &mut AncestorOracle<A>,
268 oracle: &mut AncestorOracle<A>,
239 ) -> TimeStampedPathCopies {
269 ) -> TimeStampedPathCopies {
240 if minor.is_empty() {
270 if minor.is_empty() {
241 return major;
271 return major;
242 } else if major.is_empty() {
272 } else if major.is_empty() {
243 return minor;
273 return minor;
244 }
274 }
245 let mut override_minor = Vec::new();
275 let mut override_minor = Vec::new();
246 let mut override_major = Vec::new();
276 let mut override_major = Vec::new();
247
277
248 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
278 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
249 override_major.push((k.clone(), v.clone()))
279 override_major.push((k.clone(), v.clone()))
250 };
280 };
251 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
281 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
252 override_minor.push((k.clone(), v.clone()))
282 override_minor.push((k.clone(), v.clone()))
253 };
283 };
254
284
255 // The diff function leverage detection of the identical subpart if minor
285 // The diff function leverage detection of the identical subpart if minor
256 // and major has some common ancestors. This make it very fast is most
286 // and major has some common ancestors. This make it very fast is most
257 // case.
287 // case.
258 //
288 //
259 // In case where the two map are vastly different in size, the current
289 // In case where the two map are vastly different in size, the current
260 // approach is still slowish because the iteration will iterate over
290 // approach is still slowish because the iteration will iterate over
261 // all the "exclusive" content of the larger on. This situation can be
291 // all the "exclusive" content of the larger on. This situation can be
262 // frequent when the subgraph of revision we are processing has a lot
292 // frequent when the subgraph of revision we are processing has a lot
263 // of roots. Each roots adding they own fully new map to the mix (and
293 // of roots. Each roots adding they own fully new map to the mix (and
264 // likely a small map, if the path from the root to the "main path" is
294 // likely a small map, if the path from the root to the "main path" is
265 // small.
295 // small.
266 //
296 //
267 // We could do better by detecting such situation and processing them
297 // We could do better by detecting such situation and processing them
268 // differently.
298 // differently.
269 for d in minor.diff(&major) {
299 for d in minor.diff(&major) {
270 match d {
300 match d {
271 DiffItem::Add(k, v) => to_minor(k, v),
301 DiffItem::Add(k, v) => to_minor(k, v),
272 DiffItem::Remove(k, v) => to_major(k, v),
302 DiffItem::Remove(k, v) => to_major(k, v),
273 DiffItem::Update { old, new } => {
303 DiffItem::Update { old, new } => {
274 let (dest, src_major) = new;
304 let (dest, src_major) = new;
275 let (_, src_minor) = old;
305 let (_, src_minor) = old;
276 let mut pick_minor = || (to_major(dest, src_minor));
306 let mut pick_minor = || (to_major(dest, src_minor));
277 let mut pick_major = || (to_minor(dest, src_major));
307 let mut pick_major = || (to_minor(dest, src_major));
278 if src_major.path == src_minor.path {
308 if src_major.path == src_minor.path {
279 // we have the same value, but from other source;
309 // we have the same value, but from other source;
280 if src_major.rev == src_minor.rev {
310 if src_major.rev == src_minor.rev {
281 // If the two entry are identical, no need to do
311 // If the two entry are identical, no need to do
282 // anything (but diff should not have yield them)
312 // anything (but diff should not have yield them)
283 unreachable!();
313 unreachable!();
284 } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
314 } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
285 {
315 {
286 pick_minor();
316 pick_minor();
287 } else {
317 } else {
288 pick_major();
318 pick_major();
289 }
319 }
290 } else if src_major.rev == src_minor.rev {
320 } else if src_major.rev == src_minor.rev {
291 // We cannot get copy information for both p1 and p2 in the
321 // We cannot get copy information for both p1 and p2 in the
292 // same rev. So this is the same value.
322 // same rev. So this is the same value.
293 unreachable!();
323 unreachable!();
294 } else if src_major.path.is_none()
324 } else if src_major.path.is_none()
295 && changes.salvaged.contains(dest)
325 && changes.salvaged.contains(dest)
296 {
326 {
297 // If the file is "deleted" in the major side but was
327 // If the file is "deleted" in the major side but was
298 // salvaged by the merge, we keep the minor side alive
328 // salvaged by the merge, we keep the minor side alive
299 pick_minor();
329 pick_minor();
300 } else if src_minor.path.is_none()
330 } else if src_minor.path.is_none()
301 && changes.salvaged.contains(dest)
331 && changes.salvaged.contains(dest)
302 {
332 {
303 // If the file is "deleted" in the minor side but was
333 // If the file is "deleted" in the minor side but was
304 // salvaged by the merge, unconditionnaly preserve the
334 // salvaged by the merge, unconditionnaly preserve the
305 // major side.
335 // major side.
306 pick_major();
336 pick_major();
307 } else if changes.merged.contains(dest) {
337 } else if changes.merged.contains(dest) {
308 // If the file was actively merged, copy information from
338 // If the file was actively merged, copy information from
309 // each side might conflict. The major side will win such
339 // each side might conflict. The major side will win such
310 // conflict.
340 // conflict.
311 pick_major();
341 pick_major();
312 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
342 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
313 // If the minor side is strictly newer than the major side,
343 // If the minor side is strictly newer than the major side,
314 // it should be kept.
344 // it should be kept.
315 pick_minor();
345 pick_minor();
316 } else if src_major.path.is_some() {
346 } else if src_major.path.is_some() {
317 // without any special case, the "major" value win other
347 // without any special case, the "major" value win other
318 // the "minor" one.
348 // the "minor" one.
319 pick_major();
349 pick_major();
320 } else if oracle.is_ancestor(src_minor.rev, src_major.rev) {
350 } else if oracle.is_ancestor(src_minor.rev, src_major.rev) {
321 // the "major" rev is a direct ancestors of "minor", any
351 // the "major" rev is a direct ancestors of "minor", any
322 // different value should overwrite
352 // different value should overwrite
323 pick_major();
353 pick_major();
324 } else {
354 } else {
325 // major version is None (so the file was deleted on that
355 // major version is None (so the file was deleted on that
326 // branch) and that branch is independant (neither minor
356 // branch) and that branch is independant (neither minor
327 // nor major is an ancestors of the other one.) We preserve
357 // nor major is an ancestors of the other one.) We preserve
328 // the new information about the new file.
358 // the new information about the new file.
329 pick_minor();
359 pick_minor();
330 }
360 }
331 }
361 }
332 };
362 };
333 }
363 }
334
364
335 let updates;
365 let updates;
336 let mut result;
366 let mut result;
337 if override_major.is_empty() {
367 if override_major.is_empty() {
338 result = major
368 result = major
339 } else if override_minor.is_empty() {
369 } else if override_minor.is_empty() {
340 result = minor
370 result = minor
341 } else {
371 } else {
342 if override_minor.len() < override_major.len() {
372 if override_minor.len() < override_major.len() {
343 updates = override_minor;
373 updates = override_minor;
344 result = minor;
374 result = minor;
345 } else {
375 } else {
346 updates = override_major;
376 updates = override_major;
347 result = major;
377 result = major;
348 }
378 }
349 for (k, v) in updates {
379 for (k, v) in updates {
350 result.insert(k, v);
380 result.insert(k, v);
351 }
381 }
352 }
382 }
353 result
383 result
354 }
384 }
General Comments 0
You need to be logged in to leave comments. Login now