##// END OF EJS Templates
copies-rust: leverage the immutability for efficient update...
marmoute -
r46585:cc759d3d default
parent child Browse files
Show More
@@ -1,269 +1,316
1 use crate::utils::hg_path::HgPathBuf;
1 use crate::utils::hg_path::HgPathBuf;
2 use crate::Revision;
2 use crate::Revision;
3
3
4 use im_rc::ordmap::DiffItem;
4 use im_rc::ordmap::OrdMap;
5 use im_rc::ordmap::OrdMap;
5
6
6 use std::collections::HashMap;
7 use std::collections::HashMap;
7 use std::collections::HashSet;
8 use std::collections::HashSet;
8
9
9 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
10 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
10
11
11 #[derive(Clone, Debug)]
12 #[derive(Clone, Debug, PartialEq)]
12 struct TimeStampedPathCopy {
13 struct TimeStampedPathCopy {
13 /// revision at which the copy information was added
14 /// revision at which the copy information was added
14 rev: Revision,
15 rev: Revision,
15 /// the copy source, (Set to None in case of deletion of the associated
16 /// the copy source, (Set to None in case of deletion of the associated
16 /// key)
17 /// key)
17 path: Option<HgPathBuf>,
18 path: Option<HgPathBuf>,
18 }
19 }
19
20
20 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
21 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
21 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
22 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
22
23
23 /// hold parent 1, parent 2 and relevant files actions.
24 /// hold parent 1, parent 2 and relevant files actions.
24 pub type RevInfo = (Revision, Revision, ChangedFiles);
25 pub type RevInfo = (Revision, Revision, ChangedFiles);
25
26
26 /// represent the files affected by a changesets
27 /// represent the files affected by a changesets
27 ///
28 ///
28 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
29 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
29 /// all the data categories tracked by it.
30 /// all the data categories tracked by it.
30 pub struct ChangedFiles {
31 pub struct ChangedFiles {
31 removed: HashSet<HgPathBuf>,
32 removed: HashSet<HgPathBuf>,
32 merged: HashSet<HgPathBuf>,
33 merged: HashSet<HgPathBuf>,
33 salvaged: HashSet<HgPathBuf>,
34 salvaged: HashSet<HgPathBuf>,
34 copied_from_p1: PathCopies,
35 copied_from_p1: PathCopies,
35 copied_from_p2: PathCopies,
36 copied_from_p2: PathCopies,
36 }
37 }
37
38
38 impl ChangedFiles {
39 impl ChangedFiles {
39 pub fn new(
40 pub fn new(
40 removed: HashSet<HgPathBuf>,
41 removed: HashSet<HgPathBuf>,
41 merged: HashSet<HgPathBuf>,
42 merged: HashSet<HgPathBuf>,
42 salvaged: HashSet<HgPathBuf>,
43 salvaged: HashSet<HgPathBuf>,
43 copied_from_p1: PathCopies,
44 copied_from_p1: PathCopies,
44 copied_from_p2: PathCopies,
45 copied_from_p2: PathCopies,
45 ) -> Self {
46 ) -> Self {
46 ChangedFiles {
47 ChangedFiles {
47 removed,
48 removed,
48 merged,
49 merged,
49 salvaged,
50 salvaged,
50 copied_from_p1,
51 copied_from_p1,
51 copied_from_p2,
52 copied_from_p2,
52 }
53 }
53 }
54 }
54
55
55 pub fn new_empty() -> Self {
56 pub fn new_empty() -> Self {
56 ChangedFiles {
57 ChangedFiles {
57 removed: HashSet::new(),
58 removed: HashSet::new(),
58 merged: HashSet::new(),
59 merged: HashSet::new(),
59 salvaged: HashSet::new(),
60 salvaged: HashSet::new(),
60 copied_from_p1: PathCopies::new(),
61 copied_from_p1: PathCopies::new(),
61 copied_from_p2: PathCopies::new(),
62 copied_from_p2: PathCopies::new(),
62 }
63 }
63 }
64 }
64 }
65 }
65
66
66 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
67 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
67 ///
68 ///
68 /// Arguments are:
69 /// Arguments are:
69 ///
70 ///
70 /// revs: all revisions to be considered
71 /// revs: all revisions to be considered
71 /// children: a {parent ? [childrens]} mapping
72 /// children: a {parent ? [childrens]} mapping
72 /// target_rev: the final revision we are combining copies to
73 /// target_rev: the final revision we are combining copies to
73 /// rev_info(rev): callback to get revision information:
74 /// rev_info(rev): callback to get revision information:
74 /// * first parent
75 /// * first parent
75 /// * second parent
76 /// * second parent
76 /// * ChangedFiles
77 /// * ChangedFiles
77 /// isancestors(low_rev, high_rev): callback to check if a revision is an
78 /// isancestors(low_rev, high_rev): callback to check if a revision is an
78 /// ancestor of another
79 /// ancestor of another
79 pub fn combine_changeset_copies(
80 pub fn combine_changeset_copies(
80 revs: Vec<Revision>,
81 revs: Vec<Revision>,
81 children: HashMap<Revision, Vec<Revision>>,
82 children: HashMap<Revision, Vec<Revision>>,
82 target_rev: Revision,
83 target_rev: Revision,
83 rev_info: &impl Fn(Revision) -> RevInfo,
84 rev_info: &impl Fn(Revision) -> RevInfo,
84 is_ancestor: &impl Fn(Revision, Revision) -> bool,
85 is_ancestor: &impl Fn(Revision, Revision) -> bool,
85 ) -> PathCopies {
86 ) -> PathCopies {
86 let mut all_copies = HashMap::new();
87 let mut all_copies = HashMap::new();
87
88
88 for rev in revs {
89 for rev in revs {
89 // Retrieve data computed in a previous iteration
90 // Retrieve data computed in a previous iteration
90 let copies = all_copies.remove(&rev);
91 let copies = all_copies.remove(&rev);
91 let copies = match copies {
92 let copies = match copies {
92 Some(c) => c,
93 Some(c) => c,
93 None => TimeStampedPathCopies::default(), // root of the walked set
94 None => TimeStampedPathCopies::default(), // root of the walked set
94 };
95 };
95
96
96 let current_children = match children.get(&rev) {
97 let current_children = match children.get(&rev) {
97 Some(c) => c,
98 Some(c) => c,
98 None => panic!("inconsistent `revs` and `children`"),
99 None => panic!("inconsistent `revs` and `children`"),
99 };
100 };
100
101
101 for child in current_children {
102 for child in current_children {
102 // We will chain the copies information accumulated for `rev` with
103 // We will chain the copies information accumulated for `rev` with
103 // the individual copies information for each of its children.
104 // the individual copies information for each of its children.
104 // Creating a new PathCopies for each `rev` ? `children` vertex.
105 // Creating a new PathCopies for each `rev` ? `children` vertex.
105 let (p1, p2, changes) = rev_info(*child);
106 let (p1, p2, changes) = rev_info(*child);
106
107
107 let (parent, child_copies) = if rev == p1 {
108 let (parent, child_copies) = if rev == p1 {
108 (1, &changes.copied_from_p1)
109 (1, &changes.copied_from_p1)
109 } else {
110 } else {
110 assert_eq!(rev, p2);
111 assert_eq!(rev, p2);
111 (2, &changes.copied_from_p2)
112 (2, &changes.copied_from_p2)
112 };
113 };
113 let mut new_copies = copies.clone();
114 let mut new_copies = copies.clone();
114
115
115 for (dest, source) in child_copies {
116 for (dest, source) in child_copies {
116 let entry;
117 let entry;
117 if let Some(v) = copies.get(source) {
118 if let Some(v) = copies.get(source) {
118 entry = match &v.path {
119 entry = match &v.path {
119 Some(path) => Some((*(path)).to_owned()),
120 Some(path) => Some((*(path)).to_owned()),
120 None => Some(source.to_owned()),
121 None => Some(source.to_owned()),
121 }
122 }
122 } else {
123 } else {
123 entry = Some(source.to_owned());
124 entry = Some(source.to_owned());
124 }
125 }
125 // Each new entry is introduced by the children, we record this
126 // Each new entry is introduced by the children, we record this
126 // information as we will need it to take the right decision
127 // information as we will need it to take the right decision
127 // when merging conflicting copy information. See
128 // when merging conflicting copy information. See
128 // merge_copies_dict for details.
129 // merge_copies_dict for details.
129 let ttpc = TimeStampedPathCopy {
130 let ttpc = TimeStampedPathCopy {
130 rev: *child,
131 rev: *child,
131 path: entry,
132 path: entry,
132 };
133 };
133 new_copies.insert(dest.to_owned(), ttpc);
134 new_copies.insert(dest.to_owned(), ttpc);
134 }
135 }
135
136
136 // We must drop copy information for removed file.
137 // We must drop copy information for removed file.
137 //
138 //
138 // We need to explicitly record them as dropped to propagate this
139 // We need to explicitly record them as dropped to propagate this
139 // information when merging two TimeStampedPathCopies object.
140 // information when merging two TimeStampedPathCopies object.
140 for f in changes.removed.iter() {
141 for f in changes.removed.iter() {
141 if new_copies.contains_key(f.as_ref()) {
142 if new_copies.contains_key(f.as_ref()) {
142 let ttpc = TimeStampedPathCopy {
143 let ttpc = TimeStampedPathCopy {
143 rev: *child,
144 rev: *child,
144 path: None,
145 path: None,
145 };
146 };
146 new_copies.insert(f.to_owned(), ttpc);
147 new_copies.insert(f.to_owned(), ttpc);
147 }
148 }
148 }
149 }
149
150
150 // Merge has two parents needs to combines their copy information.
151 // Merge has two parents needs to combines their copy information.
151 //
152 //
152 // If the vertex from the other parent was already processed, we
153 // If the vertex from the other parent was already processed, we
153 // will have a value for the child ready to be used. We need to
154 // will have a value for the child ready to be used. We need to
154 // grab it and combine it with the one we already
155 // grab it and combine it with the one we already
155 // computed. If not we can simply store the newly
156 // computed. If not we can simply store the newly
156 // computed data. The processing happening at
157 // computed data. The processing happening at
157 // the time of the second parent will take care of combining the
158 // the time of the second parent will take care of combining the
158 // two TimeStampedPathCopies instance.
159 // two TimeStampedPathCopies instance.
159 match all_copies.remove(child) {
160 match all_copies.remove(child) {
160 None => {
161 None => {
161 all_copies.insert(child, new_copies);
162 all_copies.insert(child, new_copies);
162 }
163 }
163 Some(other_copies) => {
164 Some(other_copies) => {
164 let (minor, major) = match parent {
165 let (minor, major) = match parent {
165 1 => (other_copies, new_copies),
166 1 => (other_copies, new_copies),
166 2 => (new_copies, other_copies),
167 2 => (new_copies, other_copies),
167 _ => unreachable!(),
168 _ => unreachable!(),
168 };
169 };
169 let merged_copies =
170 let merged_copies =
170 merge_copies_dict(minor, major, &changes, is_ancestor);
171 merge_copies_dict(minor, major, &changes, is_ancestor);
171 all_copies.insert(child, merged_copies);
172 all_copies.insert(child, merged_copies);
172 }
173 }
173 };
174 };
174 }
175 }
175 }
176 }
176
177
177 // Drop internal information (like the timestamp) and return the final
178 // Drop internal information (like the timestamp) and return the final
178 // mapping.
179 // mapping.
179 let tt_result = all_copies
180 let tt_result = all_copies
180 .remove(&target_rev)
181 .remove(&target_rev)
181 .expect("target revision was not processed");
182 .expect("target revision was not processed");
182 let mut result = PathCopies::default();
183 let mut result = PathCopies::default();
183 for (dest, tt_source) in tt_result {
184 for (dest, tt_source) in tt_result {
184 if let Some(path) = tt_source.path {
185 if let Some(path) = tt_source.path {
185 result.insert(dest, path);
186 result.insert(dest, path);
186 }
187 }
187 }
188 }
188 result
189 result
189 }
190 }
190
191
191 /// merge two copies-mapping together, minor and major
192 /// merge two copies-mapping together, minor and major
192 ///
193 ///
193 /// In case of conflict, value from "major" will be picked, unless in some
194 /// In case of conflict, value from "major" will be picked, unless in some
194 /// cases. See inline documentation for details.
195 /// cases. See inline documentation for details.
195 #[allow(clippy::if_same_then_else)]
196 #[allow(clippy::if_same_then_else)]
196 fn merge_copies_dict(
197 fn merge_copies_dict(
197 minor: TimeStampedPathCopies,
198 minor: TimeStampedPathCopies,
198 major: TimeStampedPathCopies,
199 major: TimeStampedPathCopies,
199 changes: &ChangedFiles,
200 changes: &ChangedFiles,
200 is_ancestor: &impl Fn(Revision, Revision) -> bool,
201 is_ancestor: &impl Fn(Revision, Revision) -> bool,
201 ) -> TimeStampedPathCopies {
202 ) -> TimeStampedPathCopies {
202 let mut result = minor.clone();
203 if minor.is_empty() {
203 for (dest, src_major) in major {
204 return major;
204 let overwrite;
205 } else if major.is_empty() {
205 if let Some(src_minor) = minor.get(&dest) {
206 return minor;
206 {
207 }
208 let mut override_minor = Vec::new();
209 let mut override_major = Vec::new();
210
211 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
212 override_major.push((k.clone(), v.clone()))
213 };
214 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
215 override_minor.push((k.clone(), v.clone()))
216 };
217
218 // The diff function leverage detection of the identical subpart if minor
219 // and major has some common ancestors. This make it very fast is most
220 // case.
221 //
222 // In case where the two map are vastly different in size, the current
223 // approach is still slowish because the iteration will iterate over
224 // all the "exclusive" content of the larger on. This situation can be
225 // frequent when the subgraph of revision we are processing has a lot
226 // of roots. Each roots adding they own fully new map to the mix (and
227 // likely a small map, if the path from the root to the "main path" is
228 // small.
229 //
230 // We could do better by detecting such situation and processing them
231 // differently.
232 for d in minor.diff(&major) {
233 match d {
234 DiffItem::Add(k, v) => to_minor(k, v),
235 DiffItem::Remove(k, v) => to_major(k, v),
236 DiffItem::Update { old, new } => {
237 let (dest, src_major) = new;
238 let (_, src_minor) = old;
239 let mut pick_minor = || (to_major(dest, src_minor));
240 let mut pick_major = || (to_minor(dest, src_major));
207 if src_major.path == src_minor.path {
241 if src_major.path == src_minor.path {
208 // we have the same value, no need to battle;
242 // we have the same value, but from other source;
209 if src_major.rev == src_minor.rev {
243 if src_major.rev == src_minor.rev {
210 // If the two entry are identical, no need to do
244 // If the two entry are identical, no need to do
211 // anything
245 // anything (but diff should not have yield them)
212 overwrite = false;
246 unreachable!();
213 } else if is_ancestor(src_major.rev, src_minor.rev) {
247 } else if is_ancestor(src_major.rev, src_minor.rev) {
214 overwrite = false;
248 pick_minor();
215 } else {
249 } else {
216 overwrite = true;
250 pick_major();
217 }
251 }
218 } else if src_major.rev == src_minor.rev {
252 } else if src_major.rev == src_minor.rev {
219 // We cannot get copy information for both p1 and p2 in the
253 // We cannot get copy information for both p1 and p2 in the
220 // same rev. So this is the same value.
254 // same rev. So this is the same value.
221 overwrite = false;
255 unreachable!();
222 } else if src_major.path.is_none()
256 } else if src_major.path.is_none()
223 && changes.salvaged.contains(&dest)
257 && changes.salvaged.contains(dest)
224 {
258 {
225 // If the file is "deleted" in the major side but was
259 // If the file is "deleted" in the major side but was
226 // salvaged by the merge, we keep the minor side alive
260 // salvaged by the merge, we keep the minor side alive
227 overwrite = false;
261 pick_minor();
228 } else if src_minor.path.is_none()
262 } else if src_minor.path.is_none()
229 && changes.salvaged.contains(&dest)
263 && changes.salvaged.contains(dest)
230 {
264 {
231 // If the file is "deleted" in the minor side but was
265 // If the file is "deleted" in the minor side but was
232 // salvaged by the merge, unconditionnaly preserve the
266 // salvaged by the merge, unconditionnaly preserve the
233 // major side.
267 // major side.
234 overwrite = true;
268 pick_major();
235 } else if changes.merged.contains(&dest) {
269 } else if changes.merged.contains(dest) {
236 // If the file was actively merged, copy information from
270 // If the file was actively merged, copy information from
237 // each side might conflict. The major side will win such
271 // each side might conflict. The major side will win such
238 // conflict.
272 // conflict.
239 overwrite = true;
273 pick_major();
240 } else if is_ancestor(src_major.rev, src_minor.rev) {
274 } else if is_ancestor(src_major.rev, src_minor.rev) {
241 // If the minor side is strictly newer than the major side,
275 // If the minor side is strictly newer than the major side,
242 // it should be kept.
276 // it should be kept.
243 overwrite = false;
277 pick_minor();
244 } else if src_major.path.is_some() {
278 } else if src_major.path.is_some() {
245 // without any special case, the "major" value win other
279 // without any special case, the "major" value win other
246 // the "minor" one.
280 // the "minor" one.
247 overwrite = true;
281 pick_major();
248 } else if is_ancestor(src_minor.rev, src_major.rev) {
282 } else if is_ancestor(src_minor.rev, src_major.rev) {
249 // the "major" rev is a direct ancestors of "minor", any
283 // the "major" rev is a direct ancestors of "minor", any
250 // different value should overwrite
284 // different value should overwrite
251 overwrite = true;
285 pick_major();
252 } else {
286 } else {
253 // major version is None (so the file was deleted on that
287 // major version is None (so the file was deleted on that
254 // branch) and that branch is independant (neither minor
288 // branch) and that branch is independant (neither minor
255 // nor major is an ancestors of the other one.) We preserve
289 // nor major is an ancestors of the other one.) We preserve
256 // the new information about the new file.
290 // the new information about the new file.
257 overwrite = false;
291 pick_minor();
258 }
292 }
259 }
293 }
294 };
295 }
296
297 let updates;
298 let mut result;
299 if override_major.is_empty() {
300 result = major
301 } else if override_minor.is_empty() {
302 result = minor
303 } else {
304 if override_minor.len() < override_major.len() {
305 updates = override_minor;
306 result = minor;
260 } else {
307 } else {
261 // minor had no value
308 updates = override_major;
262 overwrite = true;
309 result = major;
263 }
310 }
264 if overwrite {
311 for (k, v) in updates {
265 result.insert(dest, src_major);
312 result.insert(k, v);
266 }
313 }
267 }
314 }
268 result
315 result
269 }
316 }
General Comments 0
You need to be logged in to leave comments. Login now