main.rs
300 lines
| 10.0 KiB
| application/rls-services+xml
|
RustLexer
Martin von Zweigbergk
|
r49875 | use clap::{ArgGroup, Parser}; | ||
Martin von Zweigbergk
|
r49874 | use itertools::Itertools; | ||
use regex::bytes::Regex; | ||||
use similar::ChangeTag; | ||||
use std::cmp::{max, min, Ordering}; | ||||
use std::collections::HashSet; | ||||
use std::ffi::OsString; | ||||
use std::ops::Range; | ||||
use std::path::PathBuf; | ||||
fn find_unchanged_ranges( | ||||
old_bytes: &[u8], | ||||
new_bytes: &[u8], | ||||
) -> Vec<(Range<usize>, Range<usize>)> { | ||||
let diff = similar::TextDiff::configure() | ||||
.algorithm(similar::Algorithm::Patience) | ||||
.diff_lines(old_bytes, new_bytes); | ||||
let mut new_unchanged_ranges = vec![]; | ||||
let mut old_index = 0; | ||||
let mut new_index = 0; | ||||
for diff in diff.iter_all_changes() { | ||||
match diff.tag() { | ||||
ChangeTag::Equal => { | ||||
new_unchanged_ranges.push(( | ||||
old_index..old_index + diff.value().len(), | ||||
new_index..new_index + diff.value().len(), | ||||
)); | ||||
old_index += diff.value().len(); | ||||
new_index += diff.value().len(); | ||||
} | ||||
ChangeTag::Delete => { | ||||
old_index += diff.value().len(); | ||||
} | ||||
ChangeTag::Insert => { | ||||
new_index += diff.value().len(); | ||||
} | ||||
} | ||||
} | ||||
new_unchanged_ranges | ||||
} | ||||
/// Returns a list of all the lines in the input (including trailing newlines), | ||||
/// but only if they all match the regex and they are sorted. | ||||
fn get_lines<'input>( | ||||
input: &'input [u8], | ||||
regex: &Regex, | ||||
) -> Option<Vec<&'input [u8]>> { | ||||
let lines = input.split_inclusive(|x| *x == b'\n').collect_vec(); | ||||
let mut previous_line = "".as_bytes(); | ||||
for line in &lines { | ||||
if *line < previous_line { | ||||
return None; | ||||
} | ||||
if !regex.is_match(line) { | ||||
return None; | ||||
} | ||||
previous_line = line; | ||||
} | ||||
Some(lines) | ||||
} | ||||
fn resolve_conflict( | ||||
base_slice: &[u8], | ||||
local_slice: &[u8], | ||||
other_slice: &[u8], | ||||
regex: &Regex, | ||||
) -> Option<Vec<u8>> { | ||||
let base_lines = get_lines(base_slice, regex)?; | ||||
let local_lines = get_lines(local_slice, regex)?; | ||||
let other_lines = get_lines(other_slice, regex)?; | ||||
let base_lines_set: HashSet<_> = base_lines.iter().copied().collect(); | ||||
let local_lines_set: HashSet<_> = local_lines.iter().copied().collect(); | ||||
let other_lines_set: HashSet<_> = other_lines.iter().copied().collect(); | ||||
let mut result = local_lines_set; | ||||
for to_add in other_lines_set.difference(&base_lines_set) { | ||||
result.insert(to_add); | ||||
} | ||||
for to_remove in base_lines_set.difference(&other_lines_set) { | ||||
result.remove(to_remove); | ||||
} | ||||
Some(result.into_iter().sorted().collect_vec().concat()) | ||||
} | ||||
fn resolve( | ||||
base_bytes: &[u8], | ||||
local_bytes: &[u8], | ||||
other_bytes: &[u8], | ||||
regex: &Regex, | ||||
) -> (Vec<u8>, Vec<u8>, Vec<u8>) { | ||||
// Find unchanged ranges between the base and the two sides. We do that by | ||||
// initially considering the whole base unchanged. Then we compare each | ||||
// side with the base and intersect the unchanged ranges we find with | ||||
// what we had before. | ||||
let unchanged_ranges = vec![UnchangedRange { | ||||
base_range: 0..base_bytes.len(), | ||||
offsets: vec![], | ||||
}]; | ||||
let unchanged_ranges = intersect_regions( | ||||
unchanged_ranges, | ||||
&find_unchanged_ranges(base_bytes, local_bytes), | ||||
); | ||||
let mut unchanged_ranges = intersect_regions( | ||||
unchanged_ranges, | ||||
&find_unchanged_ranges(base_bytes, other_bytes), | ||||
); | ||||
// Add an empty UnchangedRange at the end to make it easier to find change | ||||
// ranges. That way there's a changed range before each UnchangedRange. | ||||
unchanged_ranges.push(UnchangedRange { | ||||
base_range: base_bytes.len()..base_bytes.len(), | ||||
offsets: vec![ | ||||
local_bytes.len().wrapping_sub(base_bytes.len()) as isize, | ||||
other_bytes.len().wrapping_sub(base_bytes.len()) as isize, | ||||
], | ||||
}); | ||||
let mut new_base_bytes: Vec<u8> = vec![]; | ||||
let mut new_local_bytes: Vec<u8> = vec![]; | ||||
let mut new_other_bytes: Vec<u8> = vec![]; | ||||
let mut previous = UnchangedRange { | ||||
base_range: 0..0, | ||||
offsets: vec![0, 0], | ||||
}; | ||||
for current in unchanged_ranges { | ||||
let base_slice = | ||||
&base_bytes[previous.base_range.end..current.base_range.start]; | ||||
let local_slice = &local_bytes[previous.end(0)..current.start(0)]; | ||||
let other_slice = &other_bytes[previous.end(1)..current.start(1)]; | ||||
if let Some(resolution) = | ||||
resolve_conflict(base_slice, local_slice, other_slice, regex) | ||||
{ | ||||
new_base_bytes.extend(&resolution); | ||||
new_local_bytes.extend(&resolution); | ||||
new_other_bytes.extend(&resolution); | ||||
} else { | ||||
new_base_bytes.extend(base_slice); | ||||
new_local_bytes.extend(local_slice); | ||||
new_other_bytes.extend(other_slice); | ||||
} | ||||
new_base_bytes.extend(&base_bytes[current.base_range.clone()]); | ||||
new_local_bytes.extend(&local_bytes[current.start(0)..current.end(0)]); | ||||
new_other_bytes.extend(&other_bytes[current.start(1)..current.end(1)]); | ||||
previous = current; | ||||
} | ||||
(new_base_bytes, new_local_bytes, new_other_bytes) | ||||
} | ||||
/// A tool that performs a 3-way merge, resolving conflicts in sorted lists and | ||||
/// leaving other conflicts unchanged. This is useful with Mercurial's support | ||||
/// for partial merge tools (configured in `[partial-merge-tools]`). | ||||
#[derive(Parser, Debug)] | ||||
#[clap(version, about, long_about = None)] | ||||
Martin von Zweigbergk
|
r49875 | #[clap(group(ArgGroup::new("match").required(true).args(&["pattern", "python-imports"])))] | ||
Martin von Zweigbergk
|
r49874 | struct Args { | ||
/// Path to the file's content in the "local" side | ||||
local: OsString, | ||||
/// Path to the file's content in the base | ||||
base: OsString, | ||||
/// Path to the file's content in the "other" side | ||||
other: OsString, | ||||
Martin von Zweigbergk
|
r49875 | |||
/// Regular expression to use | ||||
#[clap(long, short)] | ||||
pattern: Option<String>, | ||||
/// Use built-in regular expression for Python imports | ||||
#[clap(long)] | ||||
python_imports: bool, | ||||
} | ||||
fn get_regex(args: &Args) -> Regex { | ||||
let pattern = if args.python_imports { | ||||
r"import \w+(\.\w+)*( +#.*)?\n|from (\w+(\.\w+)* import \w+( as \w+)?(, \w+( as \w+)?)*( +#.*)?)" | ||||
} else if let Some(pattern) = &args.pattern { | ||||
pattern | ||||
} else { | ||||
".*" | ||||
}; | ||||
let pattern = format!(r"{}\r?\n?", pattern); | ||||
regex::bytes::Regex::new(&pattern).unwrap() | ||||
Martin von Zweigbergk
|
r49874 | } | ||
fn main() { | ||||
let args: Args = Args::parse(); | ||||
let base_path = PathBuf::from(&args.base); | ||||
let local_path = PathBuf::from(&args.local); | ||||
let other_path = PathBuf::from(&args.other); | ||||
let base_bytes = std::fs::read(&base_path).unwrap(); | ||||
let local_bytes = std::fs::read(&local_path).unwrap(); | ||||
let other_bytes = std::fs::read(&other_path).unwrap(); | ||||
Martin von Zweigbergk
|
r49875 | let regex = get_regex(&args); | ||
Martin von Zweigbergk
|
r49874 | let (new_base_bytes, new_local_bytes, new_other_bytes) = | ||
resolve(&base_bytes, &local_bytes, &other_bytes, ®ex); | ||||
// Write out the result if anything changed | ||||
if new_base_bytes != base_bytes { | ||||
std::fs::write(&base_path, new_base_bytes).unwrap(); | ||||
} | ||||
if new_local_bytes != local_bytes { | ||||
std::fs::write(&local_path, new_local_bytes).unwrap(); | ||||
} | ||||
if new_other_bytes != other_bytes { | ||||
std::fs::write(&other_path, new_other_bytes).unwrap(); | ||||
} | ||||
} | ||||
fn checked_add(base: usize, offset: isize) -> usize { | ||||
if offset < 0 { | ||||
base.checked_sub(offset.checked_abs().unwrap() as usize) | ||||
.unwrap() | ||||
} else { | ||||
base.checked_add(offset as usize).unwrap() | ||||
} | ||||
} | ||||
// The remainder of the file is copied from | ||||
// https://github.com/martinvonz/jj/blob/main/lib/src/diff.rs | ||||
#[derive(Clone, PartialEq, Eq, Debug)] | ||||
struct UnchangedRange { | ||||
base_range: Range<usize>, | ||||
offsets: Vec<isize>, | ||||
} | ||||
impl UnchangedRange { | ||||
fn start(&self, side: usize) -> usize { | ||||
checked_add(self.base_range.start, self.offsets[side]) | ||||
} | ||||
fn end(&self, side: usize) -> usize { | ||||
checked_add(self.base_range.end, self.offsets[side]) | ||||
} | ||||
} | ||||
impl PartialOrd for UnchangedRange { | ||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
Some(self.cmp(other)) | ||||
} | ||||
} | ||||
impl Ord for UnchangedRange { | ||||
fn cmp(&self, other: &Self) -> Ordering { | ||||
self.base_range | ||||
.start | ||||
.cmp(&other.base_range.start) | ||||
.then_with(|| self.base_range.end.cmp(&other.base_range.end)) | ||||
} | ||||
} | ||||
/// Takes the current regions and intersects it with the new unchanged ranges | ||||
/// from a 2-way diff. The result is a map of unchanged regions with one more | ||||
/// offset in the map's values. | ||||
fn intersect_regions( | ||||
current_ranges: Vec<UnchangedRange>, | ||||
new_unchanged_ranges: &[(Range<usize>, Range<usize>)], | ||||
) -> Vec<UnchangedRange> { | ||||
let mut result = vec![]; | ||||
let mut current_ranges_iter = current_ranges.into_iter().peekable(); | ||||
for (new_base_range, other_range) in new_unchanged_ranges.iter() { | ||||
assert_eq!(new_base_range.len(), other_range.len()); | ||||
while let Some(UnchangedRange { | ||||
base_range, | ||||
offsets, | ||||
}) = current_ranges_iter.peek() | ||||
{ | ||||
// No need to look further if we're past the new range. | ||||
if base_range.start >= new_base_range.end { | ||||
break; | ||||
} | ||||
// Discard any current unchanged regions that don't match between | ||||
// the base and the new input. | ||||
if base_range.end <= new_base_range.start { | ||||
current_ranges_iter.next(); | ||||
continue; | ||||
} | ||||
let new_start = max(base_range.start, new_base_range.start); | ||||
let new_end = min(base_range.end, new_base_range.end); | ||||
let mut new_offsets = offsets.clone(); | ||||
new_offsets | ||||
.push(other_range.start.wrapping_sub(new_base_range.start) | ||||
as isize); | ||||
result.push(UnchangedRange { | ||||
base_range: new_start..new_end, | ||||
offsets: new_offsets, | ||||
}); | ||||
if base_range.end >= new_base_range.end { | ||||
// Break without consuming the item; there may be other new | ||||
// ranges that overlap with it. | ||||
break; | ||||
} | ||||
current_ranges_iter.next(); | ||||
} | ||||
} | ||||
result | ||||
} | ||||