##// END OF EJS Templates
merge-lists: make it possible to specify pattern to match...
Martin von Zweigbergk -
r49875:b999edb1 default
parent child Browse files
Show More
@@ -1,280 +1,300 b''
1 use clap::Parser;
1 use clap::{ArgGroup, Parser};
2 2 use itertools::Itertools;
3 3 use regex::bytes::Regex;
4 4 use similar::ChangeTag;
5 5 use std::cmp::{max, min, Ordering};
6 6 use std::collections::HashSet;
7 7 use std::ffi::OsString;
8 8 use std::ops::Range;
9 9 use std::path::PathBuf;
10 10
11 11 fn find_unchanged_ranges(
12 12 old_bytes: &[u8],
13 13 new_bytes: &[u8],
14 14 ) -> Vec<(Range<usize>, Range<usize>)> {
15 15 let diff = similar::TextDiff::configure()
16 16 .algorithm(similar::Algorithm::Patience)
17 17 .diff_lines(old_bytes, new_bytes);
18 18 let mut new_unchanged_ranges = vec![];
19 19 let mut old_index = 0;
20 20 let mut new_index = 0;
21 21 for diff in diff.iter_all_changes() {
22 22 match diff.tag() {
23 23 ChangeTag::Equal => {
24 24 new_unchanged_ranges.push((
25 25 old_index..old_index + diff.value().len(),
26 26 new_index..new_index + diff.value().len(),
27 27 ));
28 28 old_index += diff.value().len();
29 29 new_index += diff.value().len();
30 30 }
31 31 ChangeTag::Delete => {
32 32 old_index += diff.value().len();
33 33 }
34 34 ChangeTag::Insert => {
35 35 new_index += diff.value().len();
36 36 }
37 37 }
38 38 }
39 39 new_unchanged_ranges
40 40 }
41 41
42 42 /// Returns a list of all the lines in the input (including trailing newlines),
43 43 /// but only if they all match the regex and they are sorted.
44 44 fn get_lines<'input>(
45 45 input: &'input [u8],
46 46 regex: &Regex,
47 47 ) -> Option<Vec<&'input [u8]>> {
48 48 let lines = input.split_inclusive(|x| *x == b'\n').collect_vec();
49 49 let mut previous_line = "".as_bytes();
50 50 for line in &lines {
51 51 if *line < previous_line {
52 52 return None;
53 53 }
54 54 if !regex.is_match(line) {
55 55 return None;
56 56 }
57 57 previous_line = line;
58 58 }
59 59 Some(lines)
60 60 }
61 61
62 62 fn resolve_conflict(
63 63 base_slice: &[u8],
64 64 local_slice: &[u8],
65 65 other_slice: &[u8],
66 66 regex: &Regex,
67 67 ) -> Option<Vec<u8>> {
68 68 let base_lines = get_lines(base_slice, regex)?;
69 69 let local_lines = get_lines(local_slice, regex)?;
70 70 let other_lines = get_lines(other_slice, regex)?;
71 71 let base_lines_set: HashSet<_> = base_lines.iter().copied().collect();
72 72 let local_lines_set: HashSet<_> = local_lines.iter().copied().collect();
73 73 let other_lines_set: HashSet<_> = other_lines.iter().copied().collect();
74 74 let mut result = local_lines_set;
75 75 for to_add in other_lines_set.difference(&base_lines_set) {
76 76 result.insert(to_add);
77 77 }
78 78 for to_remove in base_lines_set.difference(&other_lines_set) {
79 79 result.remove(to_remove);
80 80 }
81 81 Some(result.into_iter().sorted().collect_vec().concat())
82 82 }
83 83
84 84 fn resolve(
85 85 base_bytes: &[u8],
86 86 local_bytes: &[u8],
87 87 other_bytes: &[u8],
88 88 regex: &Regex,
89 89 ) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
90 90 // Find unchanged ranges between the base and the two sides. We do that by
91 91 // initially considering the whole base unchanged. Then we compare each
92 92 // side with the base and intersect the unchanged ranges we find with
93 93 // what we had before.
94 94 let unchanged_ranges = vec![UnchangedRange {
95 95 base_range: 0..base_bytes.len(),
96 96 offsets: vec![],
97 97 }];
98 98 let unchanged_ranges = intersect_regions(
99 99 unchanged_ranges,
100 100 &find_unchanged_ranges(base_bytes, local_bytes),
101 101 );
102 102 let mut unchanged_ranges = intersect_regions(
103 103 unchanged_ranges,
104 104 &find_unchanged_ranges(base_bytes, other_bytes),
105 105 );
106 106 // Add an empty UnchangedRange at the end to make it easier to find change
107 107 // ranges. That way there's a changed range before each UnchangedRange.
108 108 unchanged_ranges.push(UnchangedRange {
109 109 base_range: base_bytes.len()..base_bytes.len(),
110 110 offsets: vec![
111 111 local_bytes.len().wrapping_sub(base_bytes.len()) as isize,
112 112 other_bytes.len().wrapping_sub(base_bytes.len()) as isize,
113 113 ],
114 114 });
115 115
116 116 let mut new_base_bytes: Vec<u8> = vec![];
117 117 let mut new_local_bytes: Vec<u8> = vec![];
118 118 let mut new_other_bytes: Vec<u8> = vec![];
119 119 let mut previous = UnchangedRange {
120 120 base_range: 0..0,
121 121 offsets: vec![0, 0],
122 122 };
123 123 for current in unchanged_ranges {
124 124 let base_slice =
125 125 &base_bytes[previous.base_range.end..current.base_range.start];
126 126 let local_slice = &local_bytes[previous.end(0)..current.start(0)];
127 127 let other_slice = &other_bytes[previous.end(1)..current.start(1)];
128 128 if let Some(resolution) =
129 129 resolve_conflict(base_slice, local_slice, other_slice, regex)
130 130 {
131 131 new_base_bytes.extend(&resolution);
132 132 new_local_bytes.extend(&resolution);
133 133 new_other_bytes.extend(&resolution);
134 134 } else {
135 135 new_base_bytes.extend(base_slice);
136 136 new_local_bytes.extend(local_slice);
137 137 new_other_bytes.extend(other_slice);
138 138 }
139 139 new_base_bytes.extend(&base_bytes[current.base_range.clone()]);
140 140 new_local_bytes.extend(&local_bytes[current.start(0)..current.end(0)]);
141 141 new_other_bytes.extend(&other_bytes[current.start(1)..current.end(1)]);
142 142 previous = current;
143 143 }
144 144
145 145 (new_base_bytes, new_local_bytes, new_other_bytes)
146 146 }
147 147
148 148 /// A tool that performs a 3-way merge, resolving conflicts in sorted lists and
149 149 /// leaving other conflicts unchanged. This is useful with Mercurial's support
150 150 /// for partial merge tools (configured in `[partial-merge-tools]`).
151 151 #[derive(Parser, Debug)]
152 152 #[clap(version, about, long_about = None)]
153 #[clap(group(ArgGroup::new("match").required(true).args(&["pattern", "python-imports"])))]
153 154 struct Args {
154 155 /// Path to the file's content in the "local" side
155 156 local: OsString,
156 157
157 158 /// Path to the file's content in the base
158 159 base: OsString,
159 160
160 161 /// Path to the file's content in the "other" side
161 162 other: OsString,
163
164 /// Regular expression to use
165 #[clap(long, short)]
166 pattern: Option<String>,
167
168 /// Use built-in regular expression for Python imports
169 #[clap(long)]
170 python_imports: bool,
171 }
172
173 fn get_regex(args: &Args) -> Regex {
174 let pattern = if args.python_imports {
175 r"import \w+(\.\w+)*( +#.*)?\n|from (\w+(\.\w+)* import \w+( as \w+)?(, \w+( as \w+)?)*( +#.*)?)"
176 } else if let Some(pattern) = &args.pattern {
177 pattern
178 } else {
179 ".*"
180 };
181 let pattern = format!(r"{}\r?\n?", pattern);
182 regex::bytes::Regex::new(&pattern).unwrap()
162 183 }
163 184
164 185 fn main() {
165 186 let args: Args = Args::parse();
166 187
167 188 let base_path = PathBuf::from(&args.base);
168 189 let local_path = PathBuf::from(&args.local);
169 190 let other_path = PathBuf::from(&args.other);
170 191
171 192 let base_bytes = std::fs::read(&base_path).unwrap();
172 193 let local_bytes = std::fs::read(&local_path).unwrap();
173 194 let other_bytes = std::fs::read(&other_path).unwrap();
174 195
175 let regex =
176 regex::bytes::Regex::new(r"import \w+(\.\w+)*( +#.*)?\n|from (\w+(\.\w+)* import \w+( as \w+)?(, \w+( as \w+)?)*( +#.*)?)\r?\n?").unwrap();
196 let regex = get_regex(&args);
177 197 let (new_base_bytes, new_local_bytes, new_other_bytes) =
178 198 resolve(&base_bytes, &local_bytes, &other_bytes, &regex);
179 199
180 200 // Write out the result if anything changed
181 201 if new_base_bytes != base_bytes {
182 202 std::fs::write(&base_path, new_base_bytes).unwrap();
183 203 }
184 204 if new_local_bytes != local_bytes {
185 205 std::fs::write(&local_path, new_local_bytes).unwrap();
186 206 }
187 207 if new_other_bytes != other_bytes {
188 208 std::fs::write(&other_path, new_other_bytes).unwrap();
189 209 }
190 210 }
191 211
192 212 fn checked_add(base: usize, offset: isize) -> usize {
193 213 if offset < 0 {
194 214 base.checked_sub(offset.checked_abs().unwrap() as usize)
195 215 .unwrap()
196 216 } else {
197 217 base.checked_add(offset as usize).unwrap()
198 218 }
199 219 }
200 220
201 221 // The remainder of the file is copied from
202 222 // https://github.com/martinvonz/jj/blob/main/lib/src/diff.rs
203 223
204 224 #[derive(Clone, PartialEq, Eq, Debug)]
205 225 struct UnchangedRange {
206 226 base_range: Range<usize>,
207 227 offsets: Vec<isize>,
208 228 }
209 229
210 230 impl UnchangedRange {
211 231 fn start(&self, side: usize) -> usize {
212 232 checked_add(self.base_range.start, self.offsets[side])
213 233 }
214 234
215 235 fn end(&self, side: usize) -> usize {
216 236 checked_add(self.base_range.end, self.offsets[side])
217 237 }
218 238 }
219 239
220 240 impl PartialOrd for UnchangedRange {
221 241 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
222 242 Some(self.cmp(other))
223 243 }
224 244 }
225 245
226 246 impl Ord for UnchangedRange {
227 247 fn cmp(&self, other: &Self) -> Ordering {
228 248 self.base_range
229 249 .start
230 250 .cmp(&other.base_range.start)
231 251 .then_with(|| self.base_range.end.cmp(&other.base_range.end))
232 252 }
233 253 }
234 254
235 255 /// Takes the current regions and intersects it with the new unchanged ranges
236 256 /// from a 2-way diff. The result is a map of unchanged regions with one more
237 257 /// offset in the map's values.
238 258 fn intersect_regions(
239 259 current_ranges: Vec<UnchangedRange>,
240 260 new_unchanged_ranges: &[(Range<usize>, Range<usize>)],
241 261 ) -> Vec<UnchangedRange> {
242 262 let mut result = vec![];
243 263 let mut current_ranges_iter = current_ranges.into_iter().peekable();
244 264 for (new_base_range, other_range) in new_unchanged_ranges.iter() {
245 265 assert_eq!(new_base_range.len(), other_range.len());
246 266 while let Some(UnchangedRange {
247 267 base_range,
248 268 offsets,
249 269 }) = current_ranges_iter.peek()
250 270 {
251 271 // No need to look further if we're past the new range.
252 272 if base_range.start >= new_base_range.end {
253 273 break;
254 274 }
255 275 // Discard any current unchanged regions that don't match between
256 276 // the base and the new input.
257 277 if base_range.end <= new_base_range.start {
258 278 current_ranges_iter.next();
259 279 continue;
260 280 }
261 281 let new_start = max(base_range.start, new_base_range.start);
262 282 let new_end = min(base_range.end, new_base_range.end);
263 283 let mut new_offsets = offsets.clone();
264 284 new_offsets
265 285 .push(other_range.start.wrapping_sub(new_base_range.start)
266 286 as isize);
267 287 result.push(UnchangedRange {
268 288 base_range: new_start..new_end,
269 289 offsets: new_offsets,
270 290 });
271 291 if base_range.end >= new_base_range.end {
272 292 // Break without consuming the item; there may be other new
273 293 // ranges that overlap with it.
274 294 break;
275 295 }
276 296 current_ranges_iter.next();
277 297 }
278 298 }
279 299 result
280 300 }
@@ -1,156 +1,204 b''
1 1 use similar::DiffableStr;
2 use std::ffi::OsStr;
2 3 use tempdir::TempDir;
3 4
4 fn run_test(input: &str) -> String {
5 fn run_test(arg: &str, input: &str) -> String {
5 6 let mut cmd = assert_cmd::Command::cargo_bin("merge-lists").unwrap();
6 7 let temp_dir = TempDir::new("test").unwrap();
7 8 let base_path = temp_dir.path().join("base");
8 9 let local_path = temp_dir.path().join("local");
9 10 let other_path = temp_dir.path().join("other");
10 11
11 12 let rest = input.strip_prefix("\nbase:\n").unwrap();
12 13 let mut split = rest.split("\nlocal:\n");
13 14 std::fs::write(&base_path, split.next().unwrap()).unwrap();
14 15 let rest = split.next().unwrap();
15 16 let mut split = rest.split("\nother:\n");
16 17 std::fs::write(&local_path, split.next().unwrap()).unwrap();
17 18 std::fs::write(&other_path, split.next().unwrap()).unwrap();
18 19 cmd.args(&[
20 OsStr::new(arg),
19 21 local_path.as_os_str(),
20 22 base_path.as_os_str(),
21 23 other_path.as_os_str(),
22 24 ])
23 25 .assert()
24 26 .success();
25 27
26 28 let new_base_bytes = std::fs::read(&base_path).unwrap();
27 29 let new_local_bytes = std::fs::read(&local_path).unwrap();
28 30 let new_other_bytes = std::fs::read(&other_path).unwrap();
29 31 // No newline before "base:" because of https://github.com/mitsuhiko/insta/issues/117
30 32 format!(
31 33 "base:\n{}\nlocal:\n{}\nother:\n{}",
32 34 new_base_bytes.as_str().unwrap(),
33 35 new_local_bytes.as_str().unwrap(),
34 36 new_other_bytes.as_str().unwrap()
35 37 )
36 38 }
37 39
38 40 #[test]
39 41 fn test_merge_lists_basic() {
40 42 let output = run_test(
43 "--python-imports",
41 44 r"
42 45 base:
43 46 import lib1
44 47 import lib2
45 48
46 49 local:
47 50 import lib2
48 51 import lib3
49 52
50 53 other:
51 54 import lib3
52 55 import lib4
53 56 ",
54 57 );
55 58 insta::assert_snapshot!(output, @r###"
56 59 base:
57 60 import lib3
58 61 import lib4
59 62
60 63 local:
61 64 import lib3
62 65 import lib4
63 66
64 67 other:
65 68 import lib3
66 69 import lib4
67 70 "###);
68 71 }
69 72
70 73 #[test]
71 74 fn test_merge_lists_from() {
72 75 // Test some "from x import y" statements and some non-import conflicts
73 76 // (unresolvable)
74 77 let output = run_test(
78 "--python-imports",
75 79 r"
76 80 base:
77 81 from . import x
78 82
79 83 1+1
80 84
81 85 local:
82 86 from . import x
83 87 from a import b
84 88
85 89 2+2
86 90
87 91 other:
88 92 from a import c
89 93
90 94 3+3
91 95 ",
92 96 );
93 97 insta::assert_snapshot!(output, @r###"
94 98 base:
95 99 from a import b
96 100 from a import c
97 101
98 102 1+1
99 103
100 104 local:
101 105 from a import b
102 106 from a import c
103 107
104 108 2+2
105 109
106 110 other:
107 111 from a import b
108 112 from a import c
109 113
110 114 3+3
111 115 "###);
112 116 }
113 117
114 118 #[test]
115 119 fn test_merge_lists_not_sorted() {
116 120 // Test that nothing is done if the elements in the conflicting hunks are
117 121 // not sorted
118 122 let output = run_test(
123 "--python-imports",
119 124 r"
120 125 base:
121 126 import x
122 127
123 128 1+1
124 129
125 130 local:
126 131 import a
127 132 import x
128 133
129 134 2+2
130 135
131 136 other:
132 137 import z
133 138 import y
134 139
135 140 3+3
136 141 ",
137 142 );
138 143 insta::assert_snapshot!(output, @r###"
139 144 base:
140 145 import x
141 146
142 147 1+1
143 148
144 149 local:
145 150 import a
146 151 import x
147 152
148 153 2+2
149 154
150 155 other:
151 156 import z
152 157 import y
153 158
154 159 3+3
155 160 "###);
156 161 }
162
163 #[test]
164 fn test_custom_regex() {
165 // Test merging of all lines (by matching anything)
166 let output = run_test(
167 "--pattern=.*",
168 r"
169 base:
170 aardvark
171 baboon
172 camel
173
174 local:
175 aardvark
176 camel
177 eagle
178
179 other:
180 aardvark
181 camel
182 deer
183 ",
184 );
185 insta::assert_snapshot!(output, @r###"
186 base:
187 aardvark
188 camel
189 deer
190 eagle
191
192 local:
193 aardvark
194 camel
195 deer
196 eagle
197
198 other:
199 aardvark
200 camel
201 deer
202 eagle
203 "###);
204 }
General Comments 0
You need to be logged in to leave comments. Login now